def test_gmm_MAP_3(): # Train a GMMMachine with MAP_GMMTrainer; compares to old reference ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) # Initialize GMMMachine n_gaussians = 5 prior_gmm = GMMMachine(n_gaussians) prior_gmm.means = load_array( resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) prior_gmm.variances = load_array( resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) prior_gmm.weights = load_array( resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) threshold = 0.001 prior_gmm.variance_thresholds = threshold # Initialize MAP Trainer prior = 0.001 accuracy = 0.00001 gmm = GMMMachine( n_gaussians, trainer="map", ubm=prior_gmm, convergence_threshold=prior, max_fitting_steps=1, update_means=True, update_variances=False, update_weights=False, mean_var_update_threshold=accuracy, map_relevance_factor=None, ) gmm.variance_thresholds = threshold # Test results # Load torch3vision reference meansMAP_ref = load_array( resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5")) variancesMAP_ref = load_array( resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5")) weightsMAP_ref = load_array( resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5")) for transform in (to_numpy, to_dask_array): ar = transform(ar) # Train gmm = gmm.fit(ar) # Compare to current results # Gaps are quite large. This might be explained by the fact that there is no # adaptation of a given Gaussian in torch3 when the corresponding responsibilities # are below the responsibilities threshold np.testing.assert_allclose(gmm.means, meansMAP_ref, atol=2e-1) np.testing.assert_allclose(gmm.variances, variancesMAP_ref, atol=1e-4) np.testing.assert_allclose(gmm.weights, weightsMAP_ref, atol=1e-4)
def test_gmm_test(): # Tests a GMMMachine by computing scores against a model and comparing to a reference ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) # Initialize GMMMachine n_gaussians = 5 gmm = GMMMachine(n_gaussians) gmm.means = load_array( resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) gmm.variances = load_array( resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) gmm.weights = load_array( resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) threshold = 0.001 gmm.variance_thresholds = threshold # Test against the model score_mean_ref = -1.50379e06 for transform in (to_numpy, to_dask_array): ar = transform(ar) score = gmm.log_likelihood(ar).sum() score /= len(ar) # Compare current results to torch3vision assert abs(score - score_mean_ref) / score_mean_ref < 1e-4
def test_GMMMachine_stats(): """Tests a GMMMachine (statistics)""" arrayset = load_array( resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) gmm = GMMMachine(n_gaussians=2) gmm.weights = np.array([0.5, 0.5], "float64") gmm.means = np.array([[3, 70], [4, 72]], "float64") gmm.variances = np.array([[1, 10], [2, 5]], "float64") gmm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") stats = gmm_module.e_step( arrayset, gmm, ) stats_ref = GMMStats(n_gaussians=2, n_features=2) stats_ref.load( HDF5File(resource_filename("bob.learn.em", "data/stats.hdf5"), "r")) np.testing.assert_equal(stats.t, stats_ref.t) np.testing.assert_almost_equal(stats.n, stats_ref.n, decimal=10) # np.testing.assert_equal(stats.sum_px, stats_ref.sum_px) # Note AA: precision error above np.testing.assert_almost_equal(stats.sum_px, stats_ref.sum_px, decimal=10) np.testing.assert_almost_equal(stats.sum_pxx, stats_ref.sum_pxx, decimal=10)
def _voice_activity_detection(self, energy_array: np.ndarray) -> np.ndarray: """Fits a 2 Gaussian GMM on the energy that splits between voice and silence.""" n_samples = len(energy_array) # if energy does not change a lot, it may not be audio? if np.std(energy_array) < 10e-5: return np.zeros(shape=n_samples) # Add an epsilon small Gaussian noise to avoid numerical issues (mainly due to artificial silence). energy_array = (1e-6 * np.random.randn(n_samples)) + energy_array # Normalize the energy array, make it an array of 1D samples normalized_energy = utils.normalize_std_array(energy_array).reshape( (-1, 1)) # Note: self.max_iterations and self.convergence_threshold are used for both # k-means and GMM training. kmeans_trainer = KMeansMachine( n_clusters=2, convergence_threshold=self.convergence_threshold, max_iter=self.max_iterations, init_max_iter=self.max_iterations, ) ubm_gmm = GMMMachine( n_gaussians=2, trainer="ml", update_means=True, update_variances=True, update_weights=True, convergence_threshold=self.convergence_threshold, max_fitting_steps=self.max_iterations, k_means_trainer=kmeans_trainer, ) ubm_gmm.variance_thresholds = self.variance_threshold ubm_gmm.fit(normalized_energy) if np.isnan(ubm_gmm.means).any(): logger.warn("Annotation aborted: File contains NaN's") return np.zeros(shape=n_samples, dtype=int) # Classify # Different behavior dep on which mean represents high energy (higher value) labels = ubm_gmm.log_weighted_likelihood(normalized_energy) if ubm_gmm.means.argmax() == 0: # High energy in means[0] labels = labels.argmin(axis=0) else: # High energy in means[1] labels = labels.argmax(axis=0) return labels
def test_gmm_ML_2(): # Trains a GMMMachine with ML_GMMTrainer; compares to a reference ar = load_array( resource_filename("bob.learn.em", "data/dataNormalized.hdf5")) # Test results # Load torch3vision reference meansML_ref = load_array( resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) variancesML_ref = load_array( resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) weightsML_ref = load_array( resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) for transform in (to_numpy, to_dask_array): ar = transform(ar) # Initialize GMMMachine gmm = GMMMachine(n_gaussians=5) gmm.means = load_array( resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64") gmm.variances = load_array( resource_filename( "bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64") gmm.weights = np.exp( load_array( resource_filename( "bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64")) threshold = 0.001 gmm.variance_thresholds = threshold # Initialize ML Trainer gmm.mean_var_update_threshold = 0.001 gmm.max_fitting_steps = 25 gmm.convergence_threshold = 0.000001 gmm.update_means = True gmm.update_variances = True gmm.update_weights = True # Run ML gmm = gmm.fit(ar) # Compare to current results np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3) np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3) np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4)
def test_GMMMachine_2(): # Test a GMMMachine (statistics) arrayset = bob.io.base.load(datafile("faithful.torch3_f64.hdf5", __name__, path="../data/")) gmm = GMMMachine(2, 2) gmm.weights = numpy.array([0.5, 0.5], 'float64') gmm.means = numpy.array([[3, 70], [4, 72]], 'float64') gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64') gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') stats = GMMStats(2, 2) gmm.acc_statistics(arrayset, stats) stats_ref = GMMStats(bob.io.base.HDF5File(datafile("stats.hdf5",__name__, path="../data/"))) assert stats.t == stats_ref.t assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10) #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx) #Note AA: precision error above assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10) assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10)
def test_GMMMachine_2(): # Test a GMMMachine (statistics) arrayset = bob.io.base.load( datafile("faithful.torch3_f64.hdf5", __name__, path="../data/")) gmm = GMMMachine(2, 2) gmm.weights = numpy.array([0.5, 0.5], 'float64') gmm.means = numpy.array([[3, 70], [4, 72]], 'float64') gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64') gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') stats = GMMStats(2, 2) gmm.acc_statistics(arrayset, stats) stats_ref = GMMStats( bob.io.base.HDF5File(datafile("stats.hdf5", __name__, path="../data/"))) assert stats.t == stats_ref.t assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10) #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx) #Note AA: precision error above assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10) assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10)
def test_LinearScoring(): ubm = GMMMachine(2, 2) ubm.weights = numpy.array([0.5, 0.5], 'float64') ubm.means = numpy.array([[3, 70], [4, 72]], 'float64') ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64') ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') model1 = GMMMachine(2, 2) model1.weights = numpy.array([0.5, 0.5], 'float64') model1.means = numpy.array([[1, 2], [3, 4]], 'float64') model1.variances = numpy.array([[9, 10], [11, 12]], 'float64') model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') model2 = GMMMachine(2, 2) model2.weights = numpy.array([0.5, 0.5], 'float64') model2.means = numpy.array([[5, 6], [7, 8]], 'float64') model2.variances = numpy.array([[13, 14], [15, 16]], 'float64') model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') stats1 = GMMStats(2, 2) stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64') stats1.n = numpy.array([1, 2], 'float64') stats1.t = 1 + 2 stats2 = GMMStats(2, 2) stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64') stats2.n = numpy.array([3, 4], 'float64') stats2.t = 3 + 4 stats3 = GMMStats(2, 2) stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64') stats3.n = numpy.array([3, 4], 'float64') stats3.t = 3 + 4 test_channeloffset = [ numpy.array([9, 8, 7, 6], 'float64'), numpy.array([5, 4, 3, 2], 'float64'), numpy.array([1, 0, 1, 2], 'float64') ] # Reference scores (from Idiap internal matlab implementation) ref_scores_00 = numpy.array( [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64') ref_scores_01 = numpy.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64') ref_scores_10 = numpy.array( [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64') ref_scores_11 = numpy.array( [[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64') # 1/ Use GMMMachines # 1/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 1/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True) assert (abs(scores - ref_scores_01) < 1e-7).all() #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True) #assert (abs(scores - ref_scores_01) < 1e-7).all() #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True) #assert (abs(scores - ref_scores_01) < 1e-7).all() # 1/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset) assert (abs(scores - ref_scores_10) < 1e-7).all() # 1/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True) assert (abs(scores - ref_scores_11) < 1e-7).all() # 2/ Use mean/variance supervectors # 2/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 2/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], [], True) assert (abs(scores - ref_scores_01) < 1e-7).all() # 2/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset) assert (abs(scores - ref_scores_10) < 1e-7).all() # 2/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset, True) assert (abs(scores - ref_scores_11) < 1e-7).all() # 3/ Using single model/sample # 3/a/ without frame-length normalisation score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) assert abs(score - ref_scores_10[0, 0]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) assert abs(score - ref_scores_10[0, 1]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) assert abs(score - ref_scores_10[0, 2]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) assert abs(score - ref_scores_10[1, 0]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) assert abs(score - ref_scores_10[1, 1]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) assert abs(score - ref_scores_10[1, 2]) < 1e-7 # 3/b/ without frame-length normalisation score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) assert abs(score - ref_scores_11[0, 0]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) assert abs(score - ref_scores_11[0, 1]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) assert abs(score - ref_scores_11[0, 2]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) assert abs(score - ref_scores_11[1, 0]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) assert abs(score - ref_scores_11[1, 1]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) assert abs(score - ref_scores_11[1, 2]) < 1e-7
def test_LinearScoring(): ubm = GMMMachine(n_gaussians=2) ubm.weights = np.array([0.5, 0.5], "float64") ubm.means = np.array([[3, 70], [4, 72]], "float64") ubm.variances = np.array([[1, 10], [2, 5]], "float64") ubm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") model1 = GMMMachine(n_gaussians=2) model1.weights = np.array([0.5, 0.5], "float64") model1.means = np.array([[1, 2], [3, 4]], "float64") model1.variances = np.array([[9, 10], [11, 12]], "float64") model1.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") model2 = GMMMachine(n_gaussians=2) model2.weights = np.array([0.5, 0.5], "float64") model2.means = np.array([[5, 6], [7, 8]], "float64") model2.variances = np.array([[13, 14], [15, 16]], "float64") model2.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") stats1 = GMMStats(2, 2) stats1.sum_px = np.array([[1, 2], [3, 4]], "float64") stats1.n = np.array([1, 2], "float64") stats1.t = 1 + 2 stats2 = GMMStats(2, 2) stats2.sum_px = np.array([[5, 6], [7, 8]], "float64") stats2.n = np.array([3, 4], "float64") stats2.t = 3 + 4 stats3 = GMMStats(2, 2) stats3.sum_px = np.array([[5, 6], [7, 3]], "float64") stats3.n = np.array([3, 4], "float64") stats3.t = 3 + 4 test_channeloffset = [ np.array([[9, 8], [7, 6]], "float64"), np.array([[5, 4], [3, 2]], "float64"), np.array([[1, 0], [1, 2]], "float64"), ] # Reference scores (from Idiap internal matlab implementation) ref_scores_00 = np.array( [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], "float64") ref_scores_01 = np.array( [ [790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144], ], "float64", ) ref_scores_10 = np.array( [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], "float64") ref_scores_11 = np.array( [ [871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000], ], "float64", ) # 1/ Use GMMMachines # 1/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) np.testing.assert_almost_equal(scores, ref_scores_00, decimal=7) # 1/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring( [model1, model2], ubm, [stats1, stats2, stats3], frame_length_normalization=True, ) np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7) scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], 0, True) np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7) # 1/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset) np.testing.assert_almost_equal(scores, ref_scores_10, decimal=7) # 1/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring( [model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, frame_length_normalization=True, ) np.testing.assert_almost_equal(scores, ref_scores_11, decimal=7) # 2/ Use means instead of models # 2/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.means, model2.means], ubm, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 2/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring( [model1.means, model2.means], ubm, [stats1, stats2, stats3], frame_length_normalization=True, ) assert (abs(scores - ref_scores_01) < 1e-7).all() # 2/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring( [model1.means, model2.means], ubm, [stats1, stats2, stats3], test_channeloffset, ) assert (abs(scores - ref_scores_10) < 1e-7).all() # 2/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring( [model1.means, model2.means], ubm, [stats1, stats2, stats3], test_channeloffset, frame_length_normalization=True, ) assert (abs(scores - ref_scores_11) < 1e-7).all() # 3/ Using single model/sample # 3/a/ without frame-length normalisation score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0]) np.testing.assert_almost_equal(score, ref_scores_10[0, 0], decimal=7) score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1]) np.testing.assert_almost_equal(score, ref_scores_10[0, 1], decimal=7) score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2]) np.testing.assert_almost_equal(score, ref_scores_10[0, 2], decimal=7) score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0]) np.testing.assert_almost_equal(score, ref_scores_10[1, 0], decimal=7) score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1]) np.testing.assert_almost_equal(score, ref_scores_10[1, 1], decimal=7) score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2]) np.testing.assert_almost_equal(score, ref_scores_10[1, 2], decimal=7) # 3/b/ with frame-length normalisation score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0], True) np.testing.assert_almost_equal(score, ref_scores_11[0, 0], decimal=7) score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1], True) np.testing.assert_almost_equal(score, ref_scores_11[0, 1], decimal=7) score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2], True) np.testing.assert_almost_equal(score, ref_scores_11[0, 2], decimal=7) score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0], True) np.testing.assert_almost_equal(score, ref_scores_11[1, 0], decimal=7) score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1], True) np.testing.assert_almost_equal(score, ref_scores_11[1, 1], decimal=7) score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2], True) np.testing.assert_almost_equal(score, ref_scores_11[1, 2], decimal=7)
def test_GMMMachine_1(): # Test a GMMMachine basic features weights = numpy.array([0.5, 0.5], 'float64') weights2 = numpy.array([0.6, 0.4], 'float64') means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64') means2 = numpy.array([[3, 7, 0], [4, 72, 0]], 'float64') variances = numpy.array([[1, 10, 1], [2, 5, 2]], 'float64') variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], 'float64') varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], 'float64') varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], 'float64') # Initializes a GMMMachine gmm = GMMMachine(2, 3) # Sets the weights, means, variances and varianceThresholds and # Checks correctness gmm.weights = weights gmm.means = means gmm.variances = variances gmm.variance_thresholds = varianceThresholds assert gmm.shape == (2, 3) assert (gmm.weights == weights).all() assert (gmm.means == means).all() assert (gmm.variances == variances).all() assert (gmm.variance_thresholds == varianceThresholds).all() # Checks supervector-like accesses assert (gmm.mean_supervector == means.reshape(means.size)).all() assert (gmm.variance_supervector == variances.reshape( variances.size)).all() newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], 'float64') newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], 'float64') # Checks particular varianceThresholds-related methods varianceThresholds1D = numpy.array([0.3, 1, 0.5], 'float64') gmm.set_variance_thresholds(varianceThresholds1D) assert (gmm.variance_thresholds[0, :] == varianceThresholds1D).all() assert (gmm.variance_thresholds[1, :] == varianceThresholds1D).all() gmm.set_variance_thresholds(0.005) assert (gmm.variance_thresholds == 0.005).all() # Checks Gaussians access gmm.means = newMeans gmm.variances = newVariances assert (gmm.get_gaussian(0).mean == newMeans[0, :]).all() assert (gmm.get_gaussian(1).mean == newMeans[1, :]).all() assert (gmm.get_gaussian(0).variance == newVariances[0, :]).all() assert (gmm.get_gaussian(1).variance == newVariances[1, :]).all() # Checks resize gmm.resize(4, 5) assert gmm.shape == (4, 5) # Checks comparison gmm2 = GMMMachine(gmm) gmm3 = GMMMachine(2, 3) gmm3.weights = weights2 gmm3.means = means gmm3.variances = variances #gmm3.varianceThresholds = varianceThresholds gmm4 = GMMMachine(2, 3) gmm4.weights = weights gmm4.means = means2 gmm4.variances = variances #gmm4.varianceThresholds = varianceThresholds gmm5 = GMMMachine(2, 3) gmm5.weights = weights gmm5.means = means gmm5.variances = variances2 #gmm5.varianceThresholds = varianceThresholds gmm6 = GMMMachine(2, 3) gmm6.weights = weights gmm6.means = means gmm6.variances = variances #gmm6.varianceThresholds = varianceThresholds2 assert gmm == gmm2 assert (gmm != gmm2) is False assert gmm.is_similar_to(gmm2) assert gmm != gmm3 assert (gmm == gmm3) is False assert gmm.is_similar_to(gmm3) is False assert gmm != gmm4 assert (gmm == gmm4) is False assert gmm.is_similar_to(gmm4) is False assert gmm != gmm5 assert (gmm == gmm5) is False assert gmm.is_similar_to(gmm5) is False assert gmm != gmm6 assert (gmm == gmm6) is False assert gmm.is_similar_to(gmm6) is False
def test_LinearScoring(): ubm = GMMMachine(2, 2) ubm.weights = numpy.array([0.5, 0.5], 'float64') ubm.means = numpy.array([[3, 70], [4, 72]], 'float64') ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64') ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') model1 = GMMMachine(2, 2) model1.weights = numpy.array([0.5, 0.5], 'float64') model1.means = numpy.array([[1, 2], [3, 4]], 'float64') model1.variances = numpy.array([[9, 10], [11, 12]], 'float64') model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') model2 = GMMMachine(2, 2) model2.weights = numpy.array([0.5, 0.5], 'float64') model2.means = numpy.array([[5, 6], [7, 8]], 'float64') model2.variances = numpy.array([[13, 14], [15, 16]], 'float64') model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') stats1 = GMMStats(2, 2) stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64') stats1.n = numpy.array([1, 2], 'float64') stats1.t = 1+2 stats2 = GMMStats(2, 2) stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64') stats2.n = numpy.array([3, 4], 'float64') stats2.t = 3+4 stats3 = GMMStats(2, 2) stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64') stats3.n = numpy.array([3, 4], 'float64') stats3.t = 3+4 test_channeloffset = [numpy.array([9, 8, 7, 6], 'float64'), numpy.array([5, 4, 3, 2], 'float64'), numpy.array([1, 0, 1, 2], 'float64')] # Reference scores (from Idiap internal matlab implementation) ref_scores_00 = numpy.array([[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64') ref_scores_01 = numpy.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64') ref_scores_10 = numpy.array([[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64') ref_scores_11 = numpy.array([[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64') # 1/ Use GMMMachines # 1/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 1/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True) assert (abs(scores - ref_scores_01) < 1e-7).all() #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True) #assert (abs(scores - ref_scores_01) < 1e-7).all() #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True) #assert (abs(scores - ref_scores_01) < 1e-7).all() # 1/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset) assert (abs(scores - ref_scores_10) < 1e-7).all() # 1/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True) assert (abs(scores - ref_scores_11) < 1e-7).all() # 2/ Use mean/variance supervectors # 2/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 2/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], [], True) assert (abs(scores - ref_scores_01) < 1e-7).all() # 2/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset) assert (abs(scores - ref_scores_10) < 1e-7).all() # 2/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset, True) assert (abs(scores - ref_scores_11) < 1e-7).all() # 3/ Using single model/sample # 3/a/ without frame-length normalisation score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) assert abs(score - ref_scores_10[0,0]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) assert abs(score - ref_scores_10[0,1]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) assert abs(score - ref_scores_10[0,2]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) assert abs(score - ref_scores_10[1,0]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) assert abs(score - ref_scores_10[1,1]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) assert abs(score - ref_scores_10[1,2]) < 1e-7 # 3/b/ without frame-length normalisation score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) assert abs(score - ref_scores_11[0,0]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) assert abs(score - ref_scores_11[0,1]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) assert abs(score - ref_scores_11[0,2]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) assert abs(score - ref_scores_11[1,0]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) assert abs(score - ref_scores_11[1,1]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) assert abs(score - ref_scores_11[1,2]) < 1e-7
def test_GMMMachine(): # Test a GMMMachine basic features weights = np.array([0.5, 0.5], "float64") weights2 = np.array([0.6, 0.4], "float64") means = np.array([[3, 70, 0], [4, 72, 0]], "float64") means2 = np.array([[3, 7, 0], [4, 72, 0]], "float64") variances = np.array([[1, 10, 1], [2, 5, 2]], "float64") variances2 = np.array([[10, 10, 1], [2, 5, 2]], "float64") varianceThresholds = np.array([[0, 0, 0], [0, 0, 0]], "float64") varianceThresholds2 = np.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], "float64") # Initializes a GMMMachine gmm = GMMMachine(n_gaussians=2) # Sets the weights, means, variances and varianceThresholds and # Checks correctness gmm.weights = weights gmm.means = means gmm.variances = variances gmm.variance_thresholds = varianceThresholds assert gmm.shape == (2, 3) np.testing.assert_equal(gmm.weights, weights) np.testing.assert_equal(gmm.means, means) np.testing.assert_equal(gmm.variances, variances) np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds) newMeans = np.array([[3, 70, 2], [4, 72, 2]], "float64") newVariances = np.array([[1, 1, 1], [2, 2, 2]], "float64") # Checks particular varianceThresholds-related methods varianceThresholds1D = np.array([0.3, 1, 0.5], "float64") gmm.variance_thresholds = varianceThresholds1D np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds1D) gmm.variance_thresholds = 0.005 np.testing.assert_equal(gmm.variance_thresholds, 0.005) gmm.means = newMeans gmm.variances = newVariances np.testing.assert_equal(gmm.means, newMeans) np.testing.assert_equal(gmm.variances, newVariances) # Checks comparison gmm2 = deepcopy(gmm) gmm3 = GMMMachine(n_gaussians=2) gmm3.weights = weights2 gmm3.means = means gmm3.variances = variances gmm3.variance_thresholds = varianceThresholds gmm4 = GMMMachine(n_gaussians=2) gmm4.weights = weights gmm4.means = means2 gmm4.variances = variances gmm4.variance_thresholds = varianceThresholds gmm5 = GMMMachine(n_gaussians=2) gmm5.weights = weights gmm5.means = means gmm5.variances = variances2 gmm5.variance_thresholds = varianceThresholds gmm6 = GMMMachine(n_gaussians=2) gmm6.weights = weights gmm6.means = means gmm6.variances = variances gmm6.variance_thresholds = varianceThresholds2 assert_gmm_equal(gmm, gmm2) assert (gmm != gmm2) is False assert gmm.is_similar_to(gmm2) assert gmm != gmm3 assert gmm.is_similar_to(gmm3) is False assert gmm != gmm4 assert gmm.is_similar_to(gmm4) is False assert gmm != gmm5 assert gmm.is_similar_to(gmm5) is False assert gmm != gmm6 assert gmm.is_similar_to(gmm6) is False # Saving and loading with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: filename = f.name gmm.save(HDF5File(filename, "w")) # Using from_hdf5 gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r")) assert type(gmm1.n_gaussians) is np.int64 assert type(gmm1.update_means) is np.bool_ assert type(gmm1.update_variances) is np.bool_ assert type(gmm1.update_weights) is np.bool_ assert type(gmm1.trainer) is str assert gmm1.ubm is None assert_gmm_equal(gmm, gmm1) # Using load gmm1 = GMMMachine(n_gaussians=gmm.n_gaussians) gmm1.load(HDF5File(filename, "r")) assert type(gmm1.n_gaussians) is np.int64 assert type(gmm1.update_means) is np.bool_ assert type(gmm1.update_variances) is np.bool_ assert type(gmm1.update_weights) is np.bool_ assert type(gmm1.trainer) is str assert gmm1.ubm is None assert_gmm_equal(gmm, gmm1) with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: filename = f.name gmm.save(filename) gmm1 = GMMMachine.from_hdf5(filename) assert_gmm_equal(gmm, gmm1) # Weights n_gaussians = 5 machine = GMMMachine(n_gaussians) default_weights = np.full(shape=(n_gaussians, ), fill_value=1.0 / n_gaussians) default_log_weights = np.full(shape=(n_gaussians, ), fill_value=np.log(1.0 / n_gaussians)) # Test weights getting and setting np.testing.assert_almost_equal(machine.weights, default_weights) np.testing.assert_almost_equal(machine.log_weights, default_log_weights) modified_weights = default_weights modified_weights[:n_gaussians // 2] = (1 / n_gaussians) / 2 modified_weights[n_gaussians // 2 + n_gaussians % 2:] = (1 / n_gaussians) * 1.5 # Ensure setter works (log_weights is updated correctly) machine.weights = modified_weights np.testing.assert_almost_equal(machine.weights, modified_weights) np.testing.assert_almost_equal(machine.log_weights, np.log(modified_weights))
def test_GMMMachine_1(): # Test a GMMMachine basic features weights = numpy.array([0.5, 0.5], 'float64') weights2 = numpy.array([0.6, 0.4], 'float64') means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64') means2 = numpy.array([[3, 7, 0], [4, 72, 0]], 'float64') variances = numpy.array([[1, 10, 1], [2, 5, 2]], 'float64') variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], 'float64') varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], 'float64') varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], 'float64') # Initializes a GMMMachine gmm = GMMMachine(2,3) # Sets the weights, means, variances and varianceThresholds and # Checks correctness gmm.weights = weights gmm.means = means gmm.variances = variances gmm.variance_thresholds = varianceThresholds assert gmm.shape == (2,3) assert (gmm.weights == weights).all() assert (gmm.means == means).all() assert (gmm.variances == variances).all() assert (gmm.variance_thresholds == varianceThresholds).all() # Checks supervector-like accesses assert (gmm.mean_supervector == means.reshape(means.size)).all() assert (gmm.variance_supervector == variances.reshape(variances.size)).all() newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], 'float64') newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], 'float64') # Checks particular varianceThresholds-related methods varianceThresholds1D = numpy.array([0.3, 1, 0.5], 'float64') gmm.set_variance_thresholds(varianceThresholds1D) assert (gmm.variance_thresholds[0,:] == varianceThresholds1D).all() assert (gmm.variance_thresholds[1,:] == varianceThresholds1D).all() gmm.set_variance_thresholds(0.005) assert (gmm.variance_thresholds == 0.005).all() # Checks Gaussians access gmm.means = newMeans gmm.variances = newVariances assert (gmm.get_gaussian(0).mean == newMeans[0,:]).all() assert (gmm.get_gaussian(1).mean == newMeans[1,:]).all() assert (gmm.get_gaussian(0).variance == newVariances[0,:]).all() assert (gmm.get_gaussian(1).variance == newVariances[1,:]).all() # Checks resize gmm.resize(4,5) assert gmm.shape == (4,5) # Checks comparison gmm2 = GMMMachine(gmm) gmm3 = GMMMachine(2,3) gmm3.weights = weights2 gmm3.means = means gmm3.variances = variances #gmm3.varianceThresholds = varianceThresholds gmm4 = GMMMachine(2,3) gmm4.weights = weights gmm4.means = means2 gmm4.variances = variances #gmm4.varianceThresholds = varianceThresholds gmm5 = GMMMachine(2,3) gmm5.weights = weights gmm5.means = means gmm5.variances = variances2 #gmm5.varianceThresholds = varianceThresholds gmm6 = GMMMachine(2,3) gmm6.weights = weights gmm6.means = means gmm6.variances = variances #gmm6.varianceThresholds = varianceThresholds2 assert gmm == gmm2 assert (gmm != gmm2) is False assert gmm.is_similar_to(gmm2) assert gmm != gmm3 assert (gmm == gmm3) is False assert gmm.is_similar_to(gmm3) is False assert gmm != gmm4 assert (gmm == gmm4) is False assert gmm.is_similar_to(gmm4) is False assert gmm != gmm5 assert (gmm == gmm5) is False assert gmm.is_similar_to(gmm5) is False assert gmm != gmm6 assert (gmm == gmm6) is False assert gmm.is_similar_to(gmm6) is False