Python BernoulliMM Examples, amitgroup.stats.bernoullimm.BernoulliMM Python Examples

Example #1

0

Show file

    def test_train(self, params='wm'):
        b = bernoullimm.BernoulliMM(n_components=self.n_components)
        b.weights_ = self.weights
        b.means_ = self.means
        b.log_odds_, b.log_inv_mean_sums_ = bernoullimm._compute_log_odds_inv_means_sums(
            self.means)

        # Create a training set by sampling from the predefined distribution.
        X = b.sample(n_samples=100)
        b = self.model(n_components=self.n_components,
                       random_state=rng,
                       n_iter=1,
                       init_params=params)
        b.fit(X)

        # Do one training iteration at a time so we can keep track of
        # the log likelihood to make sure that it increases after each
        # iteration.
        trainll = []
        for iter in range(5):
            b.params = params
            b.init_params = ''
            b.fit(X)
            trainll.append(self.score(b, X))
        b.n_iter = 10
        b.init_params = ''
        b.params = params
        b.fit(X)  # finish fitting

        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > self.threshold,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f. The likelihoods are %s." %
            (delta_min, self.threshold, trainll))

Example #2

0

Show file

def test_n_parameters():
    """Test that the right number of parameters is estimated"""
    n_samples, n_dim, n_components = 7, 5, 2
    X = (rng.rand(n_samples, n_dim) > .5).astype(np.uint8)
    n_params = n_dim * n_components + n_components - 1

    b = bernoullimm.BernoulliMM(n_components=n_components,
                                random_state=rng,
                                n_iter=1)
    b.fit(X)
    assert_true(b._n_parameters() == n_params)

Example #3

0

Show file

def test_multiple_init():
    """Test that multiple inits performs at least as well as a single one"""
    X = (rng.rand(30, 5) > .5).astype(np.uint8)

    b = bernoullimm.BernoulliMM(n_components=2, random_state=rng, n_iter=5)
    train1 = b.fit(X).score(X).sum()
    b.n_init = 5
    out_b = b.fit(X)
    print(out_b.means_)
    train2 = out_b.score(X).sum()

    print("train2 = {0}, train1 = {1}".format(train2, train1))
    assert_true(train2 >= train1 - 1.e-2)

Example #4

0

Show file

def test_BernoulliMM_attributes():
    n_components, n_features = 10, 4

    b = bernoullimm.BernoulliMM(n_components, random_state=rng)
    weights = rng.rand(n_components)
    weights = weights / weights.sum()
    means = np.clip(rng.rand(n_components, n_features), .01, .99)
    log_inv_means = np.log(1 - means)
    log_odds = np.log(means) - log_inv_means
    log_inv_mean_sums = log_inv_means.sum(-1)

    assert_true(b.n_components == n_components)

    b.weights_ = weights
    assert_array_almost_equal(b.weights_, weights)
    b.means_ = means
    assert_array_almost_equal(b.means_, means)
    b.log_inv_means_ = log_inv_means
    assert_array_almost_equal(b.log_inv_means_, log_inv_means)
    b.log_odds_ = log_odds
    assert_array_almost_equal(b.log_odds_, log_odds)
    b.log_inv_mean_sums_ = log_inv_mean_sums
    assert_array_almost_equal(b.log_inv_mean_sums_, log_inv_mean_sums)

Example #5

0

Show file

File: CExtractPatches.py Project: markstoehr/phoneclassification

def main(args):
    """
    For each label and component constructed a positive and negative
    training set and train a linear SVM to separate them
    """
    config_d = configParserWrapper.load_settings(open(args.config, 'r'))

    true_examples = []
    false_examples = []
    mean = 0
    total = 0
    num_less_than_eq = np.zeros(20)

    all_X_patches = []
    all_S_patches = []
    for phn_id, (fl_edge, fl_spec) in enumerate(
            itertools.izip(args.data, args.data_spec)):
        if len(all_X_patches) > 100000: break
        print phn_id

        X = np.load(fl_edge)
        X_shape = X.shape[1:]

        S = np.load(fl_spec)
        if args.do_exp_weighted_divergence:
            S *= np.exp(S)

        X_patches = []
        S_patches = []
        for i in xrange(len(X)):
            if len(X_patches) > 1000: break
            p_edge, p_spec = get_maximal_patches(
                X[i], S[i], patch_radius=args.patch_radius)
            X_patches.extend(p_edge)
            S_patches.extend(p_spec)

        num_new_patches = len(X_patches)
        print phn_id, num_new_patches
        all_X_patches.extend(X_patches)
        all_S_patches.extend(S_patches)

    X = np.array(all_X_patches)
    S = np.array(all_S_patches)
    data_shape = X.shape[1:]
    X = X.reshape(X.shape[0], np.prod(data_shape))
    bmm = bernoullimm.BernoulliMM(n_components=args.n_components,
                                  n_init=20,
                                  n_iter=500,
                                  random_state=0,
                                  verbose=args.v,
                                  tol=1e-6)
    bmm.fit(X)

    # check above 30
    use_means = bmm.predict_proba(X).sum(0) > 30
    print use_means.sum()
    try:
        np.save(
            args.save_parts,
            bmm.means_.reshape(*((bmm.n_components, ) +
                                 data_shape))[use_means])
    except:
        import pdb
        pdb.set_trace()
    S_shape = S.shape[1:]
    S_clusters = bmm.cluster_underlying_data(
        S.reshape(len(S), np.prod(S_shape)),
        X).reshape(*((bmm.n_components, ) + S_shape))[use_means]
    np.save(args.spec_save_parts, S_clusters)

    ncols = int(np.sqrt(args.n_components))
    nrows = int(np.ceil(args.n_components / ncols))

    if args.viz_spec_parts is not None:
        plt.close('all')
        fig = plt.figure(1, (6, 6))
        grid = ImageGrid(
            fig,
            111,  # similar to subplot(111)
            nrows_ncols=(nrows, ncols),  # creates 2x2 grid of axes
            axes_pad=0.001,  # pad between axes in inch.
        )

        for i in xrange(S_clusters.shape[0]):

            try:
                grid[i].imshow(S_clusters[i],
                               cmap=cm.binary,
                               interpolation='nearest')
                grid[i].spines['bottom'].set_color('red')
                grid[i].spines['top'].set_color('red')
                grid[i].spines['left'].set_color('red')
                grid[i].spines['right'].set_color('red')
                for a in grid[i].axis.values():
                    a.toggle(all=False)
            except:
                import pdb
                pdb.set_trace()

        for i in xrange(S_clusters.shape[0], nrows * ncols):
            try:
                grid[i].spines['bottom'].set_color('red')
            except:
                import pdb
                pdb.set_trace()
            grid[i].spines['top'].set_color('red')
            grid[i].spines['left'].set_color('red')
            grid[i].spines['right'].set_color('red')

            for a in grid[i].axis.values():
                a.toggle(all=False)

        plt.savefig('%s' % args.viz_spec_parts, bbox_inches='tight')

Example #6

0

Show file

def main(args):
    """
    For each label and component constructed a positive and negative
    training set and train a linear SVM to separate them
    """
    config_d = configParserWrapper.load_settings(open(args.config,'r'))

    true_examples = []
    false_examples = []
    mean = 0
    total = 0
    num_less_than_eq = np.zeros(20)

    fls = np.loadtxt(args.fls_txt, dtype=str)
    
    
    all_X_patches = []
    all_S_patches = []
    
    htemp, dhtemp, ddhtemp, tttemp = fb.hermite_window(
                args.winsize,
                        args.num_tapers,
                                args.win_half_time_support)
    


    run_transform = lambda x, winlength : esp.get_spectrogram_features(x,
                                     16000,
                                     winlength,
                                     80,
                                                                     2**(int(np.ceil(np.log2(winlength)))),
                                     4000,
                                     7,
                                                                       
                                 )


    X_patches = []
    S_patches = []

    for fl_id, fl_path in enumerate(fls):
        if len(X_patches) > 100000: break
        S = run_transform(wavfile.read(fl_path)[1],                          args.winsize)
        # spectrogram(,
        #             16000,
        #             3200,
        #             args.winsize,
        #             2**int(np.ceil(np.log2(args.winsize))),
        #                 2,
        #             htemp)
        
        
        if args.do_exp_weighted_divergence:
            Sold = S.copy()
            S *=np.exp(S)
            
            
        X = get_edge_features_use_config(S.T,config_d['EDGES'])

        cur_X_patches, cur_S_patches = get_maximal_patches(X,S,patch_radius=2)
        
        X_patches.extend(cur_X_patches)
        S_patches.extend(cur_S_patches)

        num_new_patches = len(X_patches)


    X = np.array(X_patches)
    S = np.array(S_patches)
    data_shape = X.shape[1:]
    X = X.reshape(X.shape[0],np.prod(data_shape))
    bmm = bernoullimm.BernoulliMM(n_components=args.n_components,
                                  n_init= 50,
                                  n_iter= 500,
                                  random_state=0,
                                  verbose=args.v, tol=1e-6)
    bmm.fit(X)

    # check above 30
    use_means = bmm.predict_proba(X).sum(0) > 30
    print use_means.sum()
    try:
        np.save(args.save_parts,bmm.means_.reshape(*( (bmm.n_components,)+data_shape))[use_means])
    except:
        import pdb; pdb.set_trace()
    S_shape = S.shape[1:]

    import pdb; pdb.set_trace()
    S_clusters = bmm.cluster_underlying_data(S.reshape(len(S),np.prod(S_shape)),X).reshape(
            *( (bmm.n_components,) + S_shape))[use_means]
    np.save(args.spec_save_parts,S_clusters)

    ncols = int(np.sqrt(args.n_components))
    nrows = int(np.ceil(args.n_components/ncols))


    if args.viz_spec_parts is not None:
        plt.close('all')
        fig = plt.figure(1, (6, 6))
        grid = ImageGrid(fig, 111, # similar to subplot(111)
                             nrows_ncols = (nrows,ncols ), # creates 2x2 grid of axes
                             axes_pad=0.001, # pad between axes in inch.
                     )

        for i in xrange(S_clusters.shape[0]):

            try:
                grid[i].imshow(S_clusters[i],cmap=cm.binary,interpolation='nearest')
                grid[i].spines['bottom'].set_color('red')
                grid[i].spines['top'].set_color('red')
                grid[i].spines['left'].set_color('red')
                grid[i].spines['right'].set_color('red')
                for a in grid[i].axis.values():
                    a.toggle(all=False)
            except:
                import pdb; pdb.set_trace()

        for i in xrange(S_clusters.shape[0],nrows*ncols):
            try:
                grid[i].spines['bottom'].set_color('red')
            except: import pdb; pdb.set_trace()
            grid[i].spines['top'].set_color('red')
            grid[i].spines['left'].set_color('red')
            grid[i].spines['right'].set_color('red')

            for a in grid[i].axis.values():
                a.toggle(all=False)

        plt.savefig('%s' % args.viz_spec_parts
                                           ,bbox_inches='tight')