def __init__(self, size, mean, std, max_intensity=255.0): super(VideoGroupValTransform, self).__init__() if isinstance(size, numbers.Number): self.size = (int(size), int(size)) else: self.size = size self.mean = np.array(mean).reshape((len(mean), 1, 1)) self.std = np.array(std).reshape((len(std), 1, 1)) self.max_intensity = max_intensity
def __init__(self, size, scale_ratios, mean, std, fix_crop=True, more_fix_crop=True, max_distort=1, prob=0.5, max_intensity=255.0): super(VideoGroupTrainTransform, self).__init__() self.height = size[0] self.width = size[1] self.scale_ratios = scale_ratios self.fix_crop = fix_crop self.more_fix_crop = more_fix_crop self.max_distort = max_distort self.prob = prob self.max_intensity = max_intensity self.mean = np.array(mean).reshape((len(mean), 1, 1)) self.std = np.array(std).reshape((len(std), 1, 1))
def transform(self, labels, categories=None): """ Convert a list of labels into a one-hot encoding. Parameters ---------- labels : list of length `N` A list of category labels. categories : list of length `C` List of the unique category labels for the items to encode. Default is None. Returns ------- Y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)` The one-hot encoded labels. Each row corresponds to an example, with a single 1 in the column corresponding to the respective label. """ if not self._is_fit: categories = set(labels) if categories is None else categories self.fit(categories) unknown = list(set(labels.asnumpy()) - set(self.cat2idx.keys())) assert len(unknown) == 0, "Unrecognized label(s): {}".format(unknown) N, C = len(labels), len(self.cat2idx) cols = np.array([self.cat2idx[c.item()] for c in labels]) Y = np.zeros((N, C)) Y[np.arange(N), cols] = 1 return Y
def fit(self, X): """ Store the feature-wise mean and standard deviation across the samples in `X` for future scaling. Parameters ---------- X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)` An array of N samples, each with dimensionality `C` """ if not isinstance(X, np.ndarray): X = np.array(X) if X.shape[0] < 2: raise ValueError("`X` must contain at least 2 samples") std = np.ones(X.shape[1]) mean = np.zeros(X.shape[1]) if self.with_mean: mean = np.mean(X, axis=0) if self.with_std: std = np.std(X, axis=0, ddof=0) self._mean = mean self._std = std self._is_fit = True
def _steps_without_decrease(self, robust=False, check_all=False): """ Returns the maximum number of timesteps for which `P(loss is decreasing) < 0.51`. Parameters ---------- robust : bool If `robust=True`, first filter out the largest 10% of the loss values to remove transient spikes in the loss due to, e.g., a few bad minibatches. Default is False. check_all : bool If False, returns the maximum number of timesteps for which P(loss is decreasing) < 0.51. If True, only checks whether the number of timesteps for which P(loss is decreasing) < 0.51 is equal to ``self.patience``. The former provides more information but is significantly more computationally expensive. Default is False. Returns ------- steps_without_decrease: int The maximum number of steps back in loss_history for which P(loss is decreasing) < 0.51. """ lh = np.array(self.loss_history) # drop top 10% of loss values to filter out large loss spikes if robust: thresh = np.quantile(lh, 0.9) lh = np.array([i for i in lh if i <= thresh]) N = len(lh) steps_without_decrease = 0 if check_all: for i in reversed(range(N - 2)): if self._p_decreasing(lh, i) < 0.51: steps_without_decrease = N - i else: i = max(0, N - self.patience - 1) if self._p_decreasing(lh, i) < 0.51: steps_without_decrease = N - i return steps_without_decrease
def generate(self, n_steps, latent_state_types, obs_types): """ Sample a sequence from the HMM. Parameters ---------- n_steps : int The length of the generated sequence latent_state_types : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` A collection of labels for the latent states obs_types : :py:class:`ndarray <numpy.ndarray>` of shape `(V,)` A collection of labels for the observations Returns ------- states : :py:class:`ndarray <numpy.ndarray>` of shape `(n_steps,)` The sampled latent states. emissions : :py:class:`ndarray <numpy.ndarray>` of shape `(n_steps,)` The sampled emissions. """ # sample the initial latent state s = np.random.multinomial(1, self.pi).astype(np.float32).argmax() s = int(s) states = [latent_state_types[s]] # generate an emission given latent state v = np.random.multinomial(1, self.B[s, :]).astype(np.float32).argmax() v = int(v) emissions = [obs_types[v]] # sample a latent transition, rinse, and repeat for i in range(n_steps - 1): s = np.random.multinomial(1, self.A[s, :]).astype(np.float32).argmax() s = int(s) states.append(latent_state_types[s]) v = np.random.multinomial(1, self.B[s, :]).astype(np.float32).argmax() v = int(v) emissions.append(obs_types[v]) return np.array(states), np.array(emissions)
def fillCropSize(self, input_height, input_width): crop_sizes = [] base_size = np.min(np.array((input_height, input_width))) scale_rates = self.scale_ratios for h, scale_rate_h in enumerate(scale_rates): crop_h = int(base_size * scale_rate_h) for w, scale_rate_w in enumerate(scale_rates): crop_w = int(base_size * scale_rate_w) if (np.absolute(h - w) <= self.max_distort): crop_sizes.append((crop_h, crop_w)) return crop_sizes
def generate_training_data(params, n_steps=500, n_examples=15): hmm = MultinomialHMM(A=params["A"], B=params["B"], pi=params["pi"]) # generate a new sequence observations = [] for i in range(n_examples): latent, obs = hmm.generate(n_steps, params["latent_states"], params["obs_types"]) assert len(latent) == len(obs) == n_steps observations.append(obs) observations = np.array(observations) return observations
def _maximize_gamma(self): """ Optimize variational parameter gamma γ_t = α_t + \sum_{n=1}^{N_d} ϕ_{t, n} """ D = self.D phi = self.phi alpha = self.alpha gamma = np.tile(alpha, (D, 1)) + np.array( list(map(lambda x: np.sum(x, axis=0), phi)) ) return gamma
def DFT(frame, positive_only=True): """ A naive :math:`O(N^2)` implementation of the 1D discrete Fourier transform (DFT). Notes ----- The Fourier transform decomposes a signal into a linear combination of sinusoids (ie., basis elements in the space of continuous periodic functions). For a sequence :math:`\mathbf{x} = [x_1, \ldots, x_N]` of N evenly spaced samples, the `k` th DFT coefficient is given by: .. math:: c_k = \sum_{n=0}^{N-1} x_n \exp(-2 \pi i k n / N) where `i` is the imaginary unit, `k` is an index ranging from `0, ..., N-1`, and :math:`X_k` is the complex coefficient representing the phase (imaginary part) and amplitude (real part) of the `k` th sinusoid in the DFT spectrum. The frequency of the `k` th sinusoid is :math:`(k 2 \pi / N)` radians per sample. When applied to a real-valued input, the negative frequency terms are the complex conjugates of the positive-frequency terms and the overall spectrum is symmetric (excluding the first index, which contains the zero-frequency / intercept term). Parameters ---------- frame : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` A signal frame consisting of N samples positive_only : bool Whether to only return the coefficients for the positive frequency terms. Default is True. Returns ------- spectrum : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` or `(N // 2 + 1,)` if `real_only` The coefficients of the frequency spectrum for `frame`, including imaginary components. """ N = len(frame) # window length # F[i,j] = coefficient for basis vector i, timestep j (i.e., k * n) F = np.arange(N).reshape(1, -1) * np.arange(N).reshape(-1, 1) F = np.exp(F * (-1j * 2 * np.pi / N)) # vdot only operates on vectors (rather than ndarrays), so we have to # loop over each basis vector in F explicitly spectrum = np.array([np.vdot(f, frame) for f in F]) return spectrum[:(N // 2) + 1] if positive_only else spectrum
def default_hmm(): obs_types = [0, 1, 2, 3] latent_states = ["H", "C"] # derived variables V = len(obs_types) N = len(latent_states) # define a very simple HMM with T=3 observations O = np.array([1, 3, 1]).reshape(1, -1) A = np.array([[0.9, 0.1], [0.5, 0.5]]) B = np.array([[0.2, 0.7, 0.09, 0.01], [0.1, 0.0, 0.8, 0.1]]) pi = np.array([0.75, 0.25]) return { "latent_states": latent_states, "obs_types": obs_types, "V": V, "N": N, "O": O, "A": A, "B": B, "pi": pi, }
def _initialize_params(self): """ Randomly initialize the starting GMM parameters. """ C, d = self.C, self.d rr = np.random.rand(C) self.pi = rr / rr.sum() # cluster priors self.Q = np.zeros((self.N, C)) # variational distribution q(T) self.mu = np.random.uniform(-5, 10, C * d).reshape(C, d) # cluster means self.sigma = np.array([np.identity(d) for _ in range(C)]) # cluster covariances self.best_pi = None self.best_mu = None self.best_sigma = None self.best_elbo = -np.inf
def train(self, corpus, verbose=False, max_iter=1000, tol=5): """ Train the LDA model on a corpus of documents (bags of words). Parameters ---------- corpus : list of length `D` A list of lists, with each sublist containing the tokenized text of a single document. verbose : bool Whether to print the VLB at each training iteration. Default is True. max_iter : int The maximum number of training iterations to perform before breaking. Default is 1000. tol : int Break the training loop if the difference betwen the VLB on the current iteration and the previous iteration is less than `tol`. Default is 5. """ self.D = len(corpus) arr = [] for i in corpus: arr.extend(i) self.V = len(set(arr)) self.N = np.array([len(d) for d in corpus]) self.corpus = corpus self.initialize_parameters() vlb = -np.inf for i in range(max_iter): old_vlb = vlb self._E_step() self._M_step() vlb = self.VLB() delta = vlb - old_vlb if verbose: print("Iteration {}: {:.3f} (delta: {:.2f})".format(i + 1, vlb, delta)) if delta < tol: break
def initialize_parameters(self): """ Provide reasonable initializations for model and variational parameters. """ T = self.T V = self.V N = self.N D = self.D # initialize model parameters self.alpha = 100 * onp.random.dirichlet(10 * np.ones(T).asnumpy(), 1)[0] self.beta = onp.random.dirichlet(np.ones(V).asnumpy(), T).T # initialize variational parameters print(T) d = 0 print( (int(N[d]), T) ) print([1 / T * np.ones((int(N[d]), T)) for d in range(D)]) self.phi = np.array([1 / T * np.ones((int(N[d]), T)) for d in range(D)]) self.gamma = np.tile(self.alpha, (D, 1)) + np.tile(N / T, (T, 1)).T
def predict(self, X): """ Generate predictions for the targets associated with the rows in `X`. Parameters ---------- X : numpy array of shape `(N', M')` An array of `N'` examples to generate predictions on. Returns ------- y : numpy array of shape `(N',\*)` Predicted targets for the `N'` rows in `X`. """ predictions = [] H = self.hyperparameters for x in X: pred = None nearest = self._ball_tree.nearest_neighbors(H["k"], x) targets = [n.val.item() for n in nearest] # print("targets", type(targets),targets) if H["classifier"]: if H["weights"] == "uniform": pred = Counter(targets).most_common(1)[0][0] elif H["weights"] == "distance": best_score = -np.inf for label in set(targets): scores = [1 / n.distance for n in nearest if n.val == label] pred = label if np.sum(scores) > best_score else pred else: if H["weights"] == "uniform": pred = np.mean(targets) elif H["weights"] == "distance": weights = [1 / n.distance for n in nearest] pred = np.average(targets, weights=weights) predictions.append(pred) return np.array(predictions)
def _maximize_beta(self): """ Optimize model parameter beta β_{t, n} ∝ \sum_{d=1}^D \sum_{i=1}^{N_d} ϕ_{d, t, n} [ i = n] """ T = self.T V = self.V phi = self.phi beta = self.beta corpus = self.corpus for n in range(V): # Construct binary mask [i == n] to be the same shape as phi mask = [np.tile((doc == n), (T, 1)).T for doc in corpus] beta[n, :] = np.sum( np.array(list(map(lambda x: np.sum(x, axis=0), phi * mask))), axis=0 ) # Normalize over words for t in range(T): beta[:, t] = beta[:, t] / np.sum(beta[:, t]) return beta
def sample(self, X, n_samples=1, dist="posterior_predictive"): """ Sample functions from the GP prior or posterior predictive distribution. Parameters ---------- X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, M)` The collection of datapoints to generate predictions on. Only used if `dist` = 'posterior_predictive'. n_samples: int The number of samples to generate. Default is 1. dist : {"posterior_predictive", "prior"} The distribution to draw samples from. Default is "posterior_predictive". Returns ------- samples : :py:class:`ndarray <numpy.ndarray>` of shape `(n_samples, O, N)` The generated samples for the points in `X`. """ mvnorm = np.random.multivariate_normal if dist == "prior": mu = np.zeros((X.shape[0], 1)) cov = self.kernel(X, X) elif dist == "posterior_predictive": mu, _, cov = self.predict(X, return_cov=True) else: raise ValueError("Unrecognized dist: '{}'".format(dist)) if mu.ndim == 1: mu = mu[:, np.newaxis] samples = np.array([mvnorm(_mu, cov, size=n_samples) for _mu in mu.T]) return samples.swapaxes(0, 1)
def tensor(data, dtype=None, **kwargs): if dtype is None and isinstance(data, numpy.ndarray): dtype = data.dtype return np.array(data, dtype=dtype)
def calc_pad_dims_2D(X_shape, out_dim, kernel_shape, stride, dilation=0): """ Compute the padding necessary to ensure that convolving `X` with a 2D kernel of shape `kernel_shape` and stride `stride` produces outputs with dimension `out_dim`. Parameters ---------- X_shape : tuple of `(n_ex, in_rows, in_cols, in_ch)` Dimensions of the input volume. Padding is applied to `in_rows` and `in_cols`. out_dim : tuple of `(out_rows, out_cols)` The desired dimension of an output example after applying the convolution. kernel_shape : 2-tuple The dimension of the 2D convolution kernel. stride : int The stride for the convolution kernel. dilation : int Number of pixels inserted between kernel elements. Default is 0. Returns ------- padding_dims : 4-tuple Padding dims for `X`. Organized as (left, right, up, down) """ if not isinstance(X_shape, tuple): raise ValueError("`X_shape` must be of type tuple") if not isinstance(out_dim, tuple): raise ValueError("`out_dim` must be of type tuple") if not isinstance(kernel_shape, tuple): raise ValueError("`kernel_shape` must be of type tuple") if not isinstance(stride, int): raise ValueError("`stride` must be of type int") d = dilation fr, fc = kernel_shape out_rows, out_cols = out_dim n_ex, in_rows, in_cols, in_ch = X_shape # update effective filter shape based on dilation factor _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d pr = int((stride * (out_rows - 1) + _fr - in_rows) / 2) pc = int((stride * (out_cols - 1) + _fc - in_cols) / 2) out_rows1 = int(1 + (in_rows + 2 * pr - _fr) / stride) out_cols1 = int(1 + (in_cols + 2 * pc - _fc) / stride) # add asymmetric padding pixels to right / bottom pr1, pr2 = pr, pr if out_rows1 == out_rows - 1: pr1, pr2 = pr, pr + 1 elif out_rows1 != out_rows: raise AssertionError pc1, pc2 = pc, pc if out_cols1 == out_cols - 1: pc1, pc2 = pc, pc + 1 elif out_cols1 != out_cols: raise AssertionError if any(np.array([pr1, pr2, pc1, pc2]) < 0): raise ValueError("Padding cannot be less than 0. Got: {}".format( (pr1, pr2, pc1, pc2))) return (pr1, pr2, pc1, pc2)
def calc_pad_dims_1D(X_shape, l_out, kernel_width, stride, dilation=0, causal=False): """ Compute the padding necessary to ensure that convolving `X` with a 1D kernel of shape `kernel_shape` and stride `stride` produces outputs with length `l_out`. Parameters ---------- X_shape : tuple of `(n_ex, l_in, in_ch)` Dimensions of the input volume. Padding is applied on either side of `l_in`. l_out : int The desired length an output example after applying the convolution. kernel_width : int The width of the 1D convolution kernel. stride : int The stride for the convolution kernel. dilation : int Number of pixels inserted between kernel elements. Default is 0. causal : bool Whether to compute the padding dims for a regular or causal convolution. If causal, padding is added only to the left side of the sequence. Default is False. Returns ------- padding_dims : 2-tuple Padding dims for X. Organized as (left, right) """ if not isinstance(X_shape, tuple): raise ValueError("`X_shape` must be of type tuple") if not isinstance(l_out, int): raise ValueError("`l_out` must be of type int") if not isinstance(kernel_width, int): raise ValueError("`kernel_width` must be of type int") if not isinstance(stride, int): raise ValueError("`stride` must be of type int") d = dilation fw = kernel_width n_ex, l_in, in_ch = X_shape # update effective filter shape based on dilation factor _fw = fw * (d + 1) - d total_pad = int((stride * (l_out - 1) + _fw - l_in)) if not causal: pw = total_pad // 2 l_out1 = int(1 + (l_in + 2 * pw - _fw) / stride) # add asymmetric padding pixels to right / bottom pw1, pw2 = pw, pw if l_out1 == l_out - 1: pw1, pw2 = pw, pw + 1 elif l_out1 != l_out: raise AssertionError if causal: # if this is a causal convolution, only pad the left side of the # sequence pw1, pw2 = total_pad, 0 l_out1 = int(1 + (l_in + total_pad - _fw) / stride) assert l_out1 == l_out if any(np.array([pw1, pw2]) < 0): raise ValueError("Padding cannot be less than 0. Got: {}".format( (pw1, pw2))) return (pw1, pw2)
def mfcc( x, fs=44000, n_mfccs=13, alpha=0.95, center=True, n_filters=20, window="hann", normalize=True, lifter_coef=22, stride_duration=0.01, window_duration=0.025, replace_intercept=True, ): """ Compute the Mel-frequency cepstral coefficients (MFCC) for a signal. Notes ----- Computing MFCC features proceeds in the following stages: 1. Convert the signal into overlapping frames and apply a window fn 2. Compute the power spectrum at each frame 3. Apply the mel filterbank to the power spectra to get mel filterbank powers 4. Take the logarithm of the mel filterbank powers at each frame 5. Take the discrete cosine transform (DCT) of the log filterbank energies and retain only the first k coefficients to further reduce the dimensionality MFCCs were developed in the context of HMM-GMM automatic speech recognition (ASR) systems and can be used to provide a somewhat speaker/pitch invariant representation of phonemes. Parameters ---------- x : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` A 1D signal consisting of N samples fs : int The sample rate/frequency for the signal. Default is 44000. n_mfccs : int The number of cepstral coefficients to return (including the intercept coefficient). Default is 13. alpha : float in [0, 1) The preemphasis coefficient. A value of 0 corresponds to no filtering. Default is 0.95. center : bool Whether to the kth frame of the signal should *begin* at index ``x[k * stride_len]`` (center = False) or be *centered* at ``x[k * stride_len]`` (center = True). Default is True. n_filters : int The number of filters to include in the Mel filterbank. Default is 20. normalize : bool Whether to mean-normalize the MFCC values. Default is True. lifter_coef : int in :math:[0, + \infty]` The cepstral filter coefficient. 0 corresponds to no filtering, larger values correspond to greater amounts of smoothing. Default is 22. window : {'hamming', 'hann', 'blackman_harris'} The windowing function to apply to the signal before taking the DFT. Default is 'hann'. stride_duration : float The duration of the hop between consecutive windows (in seconds). Default is 0.01. window_duration : float The duration of each frame / window (in seconds). Default is 0.025. replace_intercept : bool Replace the first MFCC coefficient (the intercept term) with the log of the total frame energy instead. Default is True. Returns ------- mfccs : :py:class:`ndarray <numpy.ndarray>` of shape `(G, C)` Matrix of Mel-frequency cepstral coefficients. Rows correspond to frames, columns to cepstral coefficients """ # map the power spectrum for the (framed + windowed representation of) `x` # onto the mel scale filter_energies, frame_energies = mel_spectrogram( x=x, fs=fs, alpha=alpha, center=center, window=window, n_filters=n_filters, mean_normalize=False, window_duration=window_duration, stride_duration=stride_duration, ) log_energies = 10 * np.log10(filter_energies) # perform a DCT on the log-mel coefficients to further reduce the data # dimensionality -- the early DCT coefficients will capture the majority of # the data, allowing us to discard coefficients > n_mfccs mfccs = np.array([DCT(frame) for frame in log_energies])[:, :n_mfccs] mfccs = cepstral_lifter(mfccs, D=lifter_coef) mfccs -= np.mean(mfccs, axis=0) if normalize else 0 if replace_intercept: # the 0th MFCC coefficient doesn't tell us anything about the spectrum; # replace it with the log of the frame energy for something more # informative mfccs[:, 0] = np.log(frame_energies) return mfccs
def plot_schedulers(): fig, axes = plt.subplots(2, 2) schedulers = [ ( [ConstantScheduler(lr=0.01), "lr=1e-2"], [ConstantScheduler(lr=0.008), "lr=8e-3"], [ConstantScheduler(lr=0.006), "lr=6e-3"], [ConstantScheduler(lr=0.004), "lr=4e-3"], [ConstantScheduler(lr=0.002), "lr=2e-3"], ), ( [ ExponentialScheduler(lr=0.01, stage_length=250, staircase=False, decay=0.4), "lr=0.01, stage=250, stair=False, decay=0.4", ], [ ExponentialScheduler(lr=0.01, stage_length=250, staircase=True, decay=0.4), "lr=0.01, stage=250, stair=True, decay=0.4", ], [ ExponentialScheduler(lr=0.01, stage_length=125, staircase=True, decay=0.1), "lr=0.01, stage=125, stair=True, decay=0.1", ], [ ExponentialScheduler(lr=0.001, stage_length=250, staircase=False, decay=0.1), "lr=0.001, stage=250, stair=False, decay=0.1", ], [ ExponentialScheduler(lr=0.001, stage_length=125, staircase=False, decay=0.8), "lr=0.001, stage=125, stair=False, decay=0.8", ], [ ExponentialScheduler(lr=0.01, stage_length=250, staircase=False, decay=0.01), "lr=0.01, stage=250, stair=False, decay=0.01", ], ), ( [ NoamScheduler(model_dim=512, scale_factor=1, warmup_steps=250), "dim=512, scale=1, warmup=250", ], [ NoamScheduler(model_dim=256, scale_factor=1, warmup_steps=250), "dim=256, scale=1, warmup=250", ], [ NoamScheduler(model_dim=512, scale_factor=1, warmup_steps=500), "dim=512, scale=1, warmup=500", ], [ NoamScheduler(model_dim=256, scale_factor=1, warmup_steps=500), "dim=512, scale=1, warmup=500", ], [ NoamScheduler(model_dim=512, scale_factor=2, warmup_steps=500), "dim=512, scale=2, warmup=500", ], [ NoamScheduler(model_dim=512, scale_factor=0.5, warmup_steps=500), "dim=512, scale=0.5, warmup=500", ], ), ( # [ # KingScheduler(initial_lr=0.01, patience=100, decay=0.1), # "lr=0.01, patience=100, decay=0.8", # ], # [ # KingScheduler(initial_lr=0.01, patience=300, decay=0.999), # "lr=0.01, patience=300, decay=0.999", # ], [ KingScheduler(initial_lr=0.009, patience=150, decay=0.995), "lr=0.009, patience=150, decay=0.9999", ], [ KingScheduler(initial_lr=0.008, patience=100, decay=0.995), "lr=0.008, patience=100, decay=0.995", ], [ KingScheduler(initial_lr=0.007, patience=50, decay=0.995), "lr=0.007, patience=50, decay=0.995", ], [ KingScheduler(initial_lr=0.005, patience=25, decay=0.9), "lr=0.005, patience=25, decay=0.99", ], ), ] for ax, schs, title in zip(axes.flatten(), schedulers, ["Constant", "Exponential", "Noam", "King"]): t0 = time.time() print("Running {} scheduler".format(title)) X = np.arange(1, 1000) loss = np.array([king_loss_fn(x) for x in X]) # scale loss to fit on same axis as lr scale = 0.01 / loss[0] loss *= scale if title == "King": ax.plot(X, loss, ls=":", label="Loss") for sc, lg in schs: Y = np.array([sc(x, ll) for x, ll in zip(X, loss)]) ax.plot(X, Y, label=lg, alpha=0.6) ax.legend(fontsize=5) ax.set_xlabel("Steps") ax.set_ylabel("Learning rate") ax.set_title("{} scheduler".format(title)) print("Finished plotting {} runs of {} in {:.2f}s".format( len(schs), title, time.time() - t0)) plt.tight_layout() plt.savefig("plot.png", dpi=300) plt.close("all")
def _add_dtype_workload_true_divide(): DtypeOpArgMngr.add_workload('true_divide', np.array([1, 2], dtype=int), 4) DtypeOpArgMngr.add_workload('true_divide', np.array([1, 2], dtype=int), 2.0) DtypeOpArgMngr.add_workload('true_divide', 4.0, np.array([1, 2], dtype=int))
def fit( self, O, latent_state_types, observation_types, pi=None, tol=1e-5, verbose=False ): """ Given an observation sequence `O` and the set of possible latent states, learn the MLE HMM parameters `A` and `B`. Notes ----- Model fitting is done iterativly using the Baum-Welch/Forward-Backward algorithm, a special case of the EM algorithm. We begin with an intial estimate for the transition (`A`) and emission (`B`) matrices and then use these to derive better and better estimates by computing the forward probability for an observation and then dividing that probability mass among all the paths that contributed to it. Parameters ---------- O : :py:class:`ndarray <numpy.ndarray>` of shape `(I, T)` The set of `I` training observations, each of length `T`. latent_state_types : list of length `N` The collection of valid latent states. observation_types : list of length `V` The collection of valid observation states. pi : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` The prior probability of each latent state. If None, assume each latent state is equally likely a priori. Default is None. tol : float The tolerance value. If the difference in log likelihood between two epochs is less than this value, terminate training. Default is 1e-5. verbose : bool Print training stats after each epoch. Default is True. Returns ------- A : :py:class:`ndarray <numpy.ndarray>` of shape `(N, N)` The estimated transition matrix. B : :py:class:`ndarray <numpy.ndarray>` of shape `(N, V)` The estimated emission matrix. pi : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` The estimated prior probabilities of each latent state. """ if O.ndim == 1: O = O.reshape(1, -1) # observations self.O = O # number of training examples (I) and their lengths (T) self.I, self.T = self.O.shape # number of types of observation self.V = len(observation_types) # number of latent state types self.N = len(latent_state_types) # Uniform initialization of prior over latent states self.pi = pi if self.pi is None: self.pi = np.ones(self.N) self.pi = self.pi / self.pi.sum() # Uniform initialization of A self.A = np.ones((self.N, self.N)) self.A = self.A / self.A.sum(axis=1)[:, None] # Random initialization of B self.B = np.random.rand(self.N, self.V) self.B = self.B / self.B.sum(axis=1)[:, None] # iterate E and M steps until convergence criteria is met step, delta = 0, np.inf ll_prev = np.sum(np.array([self.log_likelihood(o) for o in self.O])) while delta > tol: gamma, xi, phi = self._Estep() self.A, self.B, self.pi = self._Mstep(gamma, xi, phi) ll = np.sum(np.array([self.log_likelihood(o) for o in self.O])) delta = ll - ll_prev ll_prev = ll step += 1 if verbose: fstr = "[Epoch {}] LL: {:.3f} Delta: {:.5f}" print(fstr.format(step, ll_prev, delta)) return self.A, self.B, self.pi
def __init__(self, mean, std): super(VideoNormalize, self).__init__() self.mean = np.array(mean).reshape((len(mean), 1, 1)) self.std = np.array(std).reshape((len(std), 1, 1))