def test_train(self, params='wm'): b = bernoullimm.BernoulliMM(n_components=self.n_components) b.weights_ = self.weights b.means_ = self.means b.log_odds_, b.log_inv_mean_sums_ = bernoullimm._compute_log_odds_inv_means_sums( self.means) # Create a training set by sampling from the predefined distribution. X = b.sample(n_samples=100) b = self.model(n_components=self.n_components, random_state=rng, n_iter=1, init_params=params) b.fit(X) # Do one training iteration at a time so we can keep track of # the log likelihood to make sure that it increases after each # iteration. trainll = [] for iter in range(5): b.params = params b.init_params = '' b.fit(X) trainll.append(self.score(b, X)) b.n_iter = 10 b.init_params = '' b.params = params b.fit(X) # finish fitting delta_min = np.diff(trainll).min() self.assertTrue( delta_min > self.threshold, "The min nll increase is %f which is lower than the admissible" " threshold of %f. The likelihoods are %s." % (delta_min, self.threshold, trainll))
def test_n_parameters(): """Test that the right number of parameters is estimated""" n_samples, n_dim, n_components = 7, 5, 2 X = (rng.rand(n_samples, n_dim) > .5).astype(np.uint8) n_params = n_dim * n_components + n_components - 1 b = bernoullimm.BernoulliMM(n_components=n_components, random_state=rng, n_iter=1) b.fit(X) assert_true(b._n_parameters() == n_params)
def test_multiple_init(): """Test that multiple inits performs at least as well as a single one""" X = (rng.rand(30, 5) > .5).astype(np.uint8) b = bernoullimm.BernoulliMM(n_components=2, random_state=rng, n_iter=5) train1 = b.fit(X).score(X).sum() b.n_init = 5 out_b = b.fit(X) print(out_b.means_) train2 = out_b.score(X).sum() print("train2 = {0}, train1 = {1}".format(train2, train1)) assert_true(train2 >= train1 - 1.e-2)
def test_BernoulliMM_attributes(): n_components, n_features = 10, 4 b = bernoullimm.BernoulliMM(n_components, random_state=rng) weights = rng.rand(n_components) weights = weights / weights.sum() means = np.clip(rng.rand(n_components, n_features), .01, .99) log_inv_means = np.log(1 - means) log_odds = np.log(means) - log_inv_means log_inv_mean_sums = log_inv_means.sum(-1) assert_true(b.n_components == n_components) b.weights_ = weights assert_array_almost_equal(b.weights_, weights) b.means_ = means assert_array_almost_equal(b.means_, means) b.log_inv_means_ = log_inv_means assert_array_almost_equal(b.log_inv_means_, log_inv_means) b.log_odds_ = log_odds assert_array_almost_equal(b.log_odds_, log_odds) b.log_inv_mean_sums_ = log_inv_mean_sums assert_array_almost_equal(b.log_inv_mean_sums_, log_inv_mean_sums)
def main(args): """ For each label and component constructed a positive and negative training set and train a linear SVM to separate them """ config_d = configParserWrapper.load_settings(open(args.config, 'r')) true_examples = [] false_examples = [] mean = 0 total = 0 num_less_than_eq = np.zeros(20) all_X_patches = [] all_S_patches = [] for phn_id, (fl_edge, fl_spec) in enumerate( itertools.izip(args.data, args.data_spec)): if len(all_X_patches) > 100000: break print phn_id X = np.load(fl_edge) X_shape = X.shape[1:] S = np.load(fl_spec) if args.do_exp_weighted_divergence: S *= np.exp(S) X_patches = [] S_patches = [] for i in xrange(len(X)): if len(X_patches) > 1000: break p_edge, p_spec = get_maximal_patches( X[i], S[i], patch_radius=args.patch_radius) X_patches.extend(p_edge) S_patches.extend(p_spec) num_new_patches = len(X_patches) print phn_id, num_new_patches all_X_patches.extend(X_patches) all_S_patches.extend(S_patches) X = np.array(all_X_patches) S = np.array(all_S_patches) data_shape = X.shape[1:] X = X.reshape(X.shape[0], np.prod(data_shape)) bmm = bernoullimm.BernoulliMM(n_components=args.n_components, n_init=20, n_iter=500, random_state=0, verbose=args.v, tol=1e-6) bmm.fit(X) # check above 30 use_means = bmm.predict_proba(X).sum(0) > 30 print use_means.sum() try: np.save( args.save_parts, bmm.means_.reshape(*((bmm.n_components, ) + data_shape))[use_means]) except: import pdb pdb.set_trace() S_shape = S.shape[1:] S_clusters = bmm.cluster_underlying_data( S.reshape(len(S), np.prod(S_shape)), X).reshape(*((bmm.n_components, ) + S_shape))[use_means] np.save(args.spec_save_parts, S_clusters) ncols = int(np.sqrt(args.n_components)) nrows = int(np.ceil(args.n_components / ncols)) if args.viz_spec_parts is not None: plt.close('all') fig = plt.figure(1, (6, 6)) grid = ImageGrid( fig, 111, # similar to subplot(111) nrows_ncols=(nrows, ncols), # creates 2x2 grid of axes axes_pad=0.001, # pad between axes in inch. ) for i in xrange(S_clusters.shape[0]): try: grid[i].imshow(S_clusters[i], cmap=cm.binary, interpolation='nearest') grid[i].spines['bottom'].set_color('red') grid[i].spines['top'].set_color('red') grid[i].spines['left'].set_color('red') grid[i].spines['right'].set_color('red') for a in grid[i].axis.values(): a.toggle(all=False) except: import pdb pdb.set_trace() for i in xrange(S_clusters.shape[0], nrows * ncols): try: grid[i].spines['bottom'].set_color('red') except: import pdb pdb.set_trace() grid[i].spines['top'].set_color('red') grid[i].spines['left'].set_color('red') grid[i].spines['right'].set_color('red') for a in grid[i].axis.values(): a.toggle(all=False) plt.savefig('%s' % args.viz_spec_parts, bbox_inches='tight')
def main(args): """ For each label and component constructed a positive and negative training set and train a linear SVM to separate them """ config_d = configParserWrapper.load_settings(open(args.config,'r')) true_examples = [] false_examples = [] mean = 0 total = 0 num_less_than_eq = np.zeros(20) fls = np.loadtxt(args.fls_txt, dtype=str) all_X_patches = [] all_S_patches = [] htemp, dhtemp, ddhtemp, tttemp = fb.hermite_window( args.winsize, args.num_tapers, args.win_half_time_support) run_transform = lambda x, winlength : esp.get_spectrogram_features(x, 16000, winlength, 80, 2**(int(np.ceil(np.log2(winlength)))), 4000, 7, ) X_patches = [] S_patches = [] for fl_id, fl_path in enumerate(fls): if len(X_patches) > 100000: break S = run_transform(wavfile.read(fl_path)[1], args.winsize) # spectrogram(, # 16000, # 3200, # args.winsize, # 2**int(np.ceil(np.log2(args.winsize))), # 2, # htemp) if args.do_exp_weighted_divergence: Sold = S.copy() S *=np.exp(S) X = get_edge_features_use_config(S.T,config_d['EDGES']) cur_X_patches, cur_S_patches = get_maximal_patches(X,S,patch_radius=2) X_patches.extend(cur_X_patches) S_patches.extend(cur_S_patches) num_new_patches = len(X_patches) X = np.array(X_patches) S = np.array(S_patches) data_shape = X.shape[1:] X = X.reshape(X.shape[0],np.prod(data_shape)) bmm = bernoullimm.BernoulliMM(n_components=args.n_components, n_init= 50, n_iter= 500, random_state=0, verbose=args.v, tol=1e-6) bmm.fit(X) # check above 30 use_means = bmm.predict_proba(X).sum(0) > 30 print use_means.sum() try: np.save(args.save_parts,bmm.means_.reshape(*( (bmm.n_components,)+data_shape))[use_means]) except: import pdb; pdb.set_trace() S_shape = S.shape[1:] import pdb; pdb.set_trace() S_clusters = bmm.cluster_underlying_data(S.reshape(len(S),np.prod(S_shape)),X).reshape( *( (bmm.n_components,) + S_shape))[use_means] np.save(args.spec_save_parts,S_clusters) ncols = int(np.sqrt(args.n_components)) nrows = int(np.ceil(args.n_components/ncols)) if args.viz_spec_parts is not None: plt.close('all') fig = plt.figure(1, (6, 6)) grid = ImageGrid(fig, 111, # similar to subplot(111) nrows_ncols = (nrows,ncols ), # creates 2x2 grid of axes axes_pad=0.001, # pad between axes in inch. ) for i in xrange(S_clusters.shape[0]): try: grid[i].imshow(S_clusters[i],cmap=cm.binary,interpolation='nearest') grid[i].spines['bottom'].set_color('red') grid[i].spines['top'].set_color('red') grid[i].spines['left'].set_color('red') grid[i].spines['right'].set_color('red') for a in grid[i].axis.values(): a.toggle(all=False) except: import pdb; pdb.set_trace() for i in xrange(S_clusters.shape[0],nrows*ncols): try: grid[i].spines['bottom'].set_color('red') except: import pdb; pdb.set_trace() grid[i].spines['top'].set_color('red') grid[i].spines['left'].set_color('red') grid[i].spines['right'].set_color('red') for a in grid[i].axis.values(): a.toggle(all=False) plt.savefig('%s' % args.viz_spec_parts ,bbox_inches='tight')