예제 #1
0
 def _train(self, data, params, verbose):
     import linearcorex
     if verbose:
         print("Training {} ...".format(self.name))
     start_time = time.time()
     c = linearcorex.Corex(n_hidden=params['n_hidden'],
                           max_iter=params['max_iter'],
                           anneal=params['anneal'])
     c.fit(data)
     clusters = c.mis.argmax(axis=0)
     finish_time = time.time()
     if verbose:
         print("\tElapsed time {:.1f}s".format(finish_time - start_time))
     return clusters
예제 #2
0
 def _train(self, train_data, params, verbose):
     import linearcorex
     if verbose:
         print("Training {} ...".format(self.name))
     start_time = time.time()
     covs = []
     for x in train_data:
         c = linearcorex.Corex(n_hidden=params['n_hidden'],
                               max_iter=params['max_iter'],
                               anneal=params['anneal'])
         c.fit(x)
         covs.append(c.get_covariance())
     finish_time = time.time()
     if verbose:
         print("\tElapsed time {:.1f}s".format(finish_time - start_time))
     return covs, None
예제 #3
0
def test_corex():
    r""" Test pytorch linear CorEx implementation.
    Check if the performance of pytorch CorEx matches that of standard CorEx.
    """
    print("=" * 100)
    print("Testing PyTorch Linear CorEx ...")

    # load data
    resources = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'resources')
    data_file = os.path.join(resources, 'test_corex_data.npy')
    data = np.load(data_file)
    print("Data is loaded, shape = {}".format(data.shape))

    # train linear corex
    lc_scores = []
    for i in tqdm(range(5)):
        X = data[32 * i:32 * (i + 1)]
        lc = linearcorex.Corex(n_hidden=8, max_iter=500, verbose=0)
        lc.fit(X)
        covs = lc.get_covariance()
        cur_score = calculate_nll_score(data=[X], covs=[covs])
        lc_scores.append(cur_score)

    # train pytorch corex
    pylc_scores = []
    for i in tqdm(range(5)):
        X = data[32 * i:32 * (i + 1)]
        lc = Corex(nv=128, n_hidden=8, max_iter=1000, verbose=0)
        lc.fit(X)
        covs = lc.get_covariance()
        cur_score = calculate_nll_score(data=[X], covs=[covs])
        pylc_scores.append(cur_score)

    lc_mean = np.mean(lc_scores)
    pylc_mean = np.mean(pylc_scores)
    print("pylc score: {:.4f}, lc score: {:.4f}".format(pylc_mean, lc_mean))
    assert (pylc_mean - lc_mean) / (np.abs(lc_mean) + 1e-6) < 0.01
예제 #4
0
    if verbose:
        print '\nData summary: X has %d rows and %d columns' % X.shape
        print 'Variable names are: ' + ','.join(map(str, list(enumerate(variable_names))))

    # Run CorEx on data
    if verbose:
        print 'Getting CorEx results'
    if not options.regraph:
        for l, layer in enumerate(layers):
            if verbose:
                print "Layer ", l
            if l == 0:
                t0 = time()
                corexes = [lc.Corex(n_hidden=layer, verbose=verbose, gaussianize=options.gaussianize,
                                    missing_values=options.missing, eliminate_synergy=options.additive,
                                    gpu=options.gpu,
                                    max_iter=options.max_iter).fit(X)]
                print 'Time for first layer: %0.2f' % (time() - t0)
                X_prev = X
            else:
                X_prev = corexes[-1].transform(X_prev)
                corexes.append(lc.Corex(n_hidden=layer, verbose=verbose, gaussianize=options.gaussianize,
                                        gpu=options.gpu,
                                        eliminate_synergy=options.additive, max_iter=options.max_iter).fit(X_prev))
        for l, corex in enumerate(corexes):
            # The learned model can be loaded again using ce.Corex().load(filename)
            print 'TC at layer %d is: %0.3f' % (l, corex.tc)
            cPickle.dump(corex, safe_open(options.output + '/layer_' + str(l) + '.dat', 'w'))
    else:
        corexes = [cPickle.load(open(options.output + '/layer_' + str(l) + '.dat')) for l in range(len(layers))]
예제 #5
0
import linearcorex as lc
import numpy as np

print('\nInput Matrix\n==================')
#              A     A     A     A     A     iA    C     C     A     C
X = np.array([[0.01, 0.01, 0.01, 0.01, 0.01, 1.00, 1.00, 1.00, 0.01, 1.00],
              [0.01, 0.01, 0.01, 0.01, 0.01, 1.00, 0.00, 0.00, 0.01, 0.00],
              [1.00, 1.00, 1.00, 1.00, 1.00, 0.01, 0.00, 0.00, 1.00, 0.00],
              [1.00, 1.00, 1.00, 1.00, 1.00, 0.01, 1.00, 1.00, 1.00, 1.00],
              [1.00, 1.00, 1.00, 1.00, 1.00, 0.01, 1.00, 1.00, 1.00, 1.00]])
print('%s' % str(X))

print('\nFitting...\n==================')
out = lc.Corex(n_hidden=2, max_iter=1000, verbose=True)
out.fit(X)

print('\nClusters\n==================')
print(out.clusters())

print('\nCovariance\n==================')
print(out.get_covariance())

print('\nTCS\n==================')
print(out.tcs)

print('\nTC\n==================')
print(out.tc)

print('\nPrediction\n==================')
sample = np.array([[1., 1., 1., 1., 1., 0., 1., 1., 1., 1.]])
p, log_z = out.transform(sample, details=True)
예제 #6
0
                  ','.join(map(str, list(enumerate(variable_names)))))

    # Run CorEx on data
    if verbose:
        print('Getting CorEx results')
    if not options.regraph:
        for l, layer in enumerate(layers):
            if verbose:
                print("Layer ", l)
            if l == 0:
                t0 = time()
                corexes = [
                    lc.Corex(n_hidden=layer,
                             verbose=verbose,
                             gaussianize=options.gaussianize,
                             missing_values=options.missing,
                             discourage_overlap=options.additive,
                             gpu=options.gpu,
                             max_iter=options.max_iter).fit(X)
                ]
                print('Time for first layer: %0.2f' % (time() - t0))
                X_prev = X
            else:
                X_prev = corexes[-1].transform(X_prev)
                corexes.append(
                    lc.Corex(n_hidden=layer,
                             verbose=verbose,
                             gaussianize=options.gaussianize,
                             gpu=options.gpu,
                             discourage_overlap=options.additive,
                             max_iter=options.max_iter).fit(X_prev))
예제 #7
0
    previous_latent_factors = input_matrix

latent_factors = START_NUMBER_FACTORS
while latent_factors <= END_NUMBER_FACTORS:
    print('========= LATENT FACTOR %d =========' % latent_factors)

    best = {
        'clusters': [],
        'tcs': [],
        'tc': 0,
    }
    for repetition in range(0, REPETITIONS):
        print('Executing with %d latent factors, repetition %d...' %
              (latent_factors, repetition))
        print('Fitting...')
        out = lc.Corex(n_hidden=latent_factors, max_iter=10000, verbose=True)
        fit = out.fit(previous_latent_factors)
        transform = out.transform(previous_latent_factors, details=True)
        clusters = out.clusters()
        tcs = out.tcs
        tc = out.tc

        print('Clusters:\n%s' % str(clusters))
        print('TCS:\n%s' % str(tcs))
        print('TC:\n%.4f' % tc)

        if tc > best['tc']:
            best['clusters'] = clusters
            best['tcs'] = tcs
            best['tc'] = tc
            best['fit'] = fit
예제 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_hidden',
                        '-m',
                        default=64,
                        type=int,
                        help='number of hidden variables')
    parser.add_argument('--n_observed',
                        '-p',
                        default=128,
                        type=int,
                        help='number of observed variables')
    parser.add_argument('--snr',
                        '-s',
                        default=0.1,
                        type=float,
                        help='signal-to-noise ratio')
    parser.add_argument('--n_samples',
                        '-n',
                        default=300,
                        type=int,
                        help='number of samples')
    parser.add_argument('--num_extra_parents',
                        default=0.1,
                        type=float,
                        help='average number of extra parents for each x_i')
    parser.add_argument(
        '--num_correlated_zs',
        default=0,
        type=int,
        help='number of zs each z_i is correlated with (besides z_i itself)')
    parser.add_argument('--random_scale',
                        dest='random_scale',
                        action='store_true',
                        help='if true x_i will have random scales')
    parser.add_argument('--method',
                        '-M',
                        type=str,
                        choices=['linearcorex', 'pycorex'],
                        default='pycorex',
                        help='which implementation of corex to use')
    parser.add_argument('--device',
                        '-d',
                        type=str,
                        default='cpu',
                        help='which device to use for pytorch corex')
    parser.add_argument('--output_dir',
                        '-o',
                        type=str,
                        default='outputs/blessing/')
    parser.set_defaults(random_scale=False)
    args = parser.parse_args()
    print(args)

    p = args.n_observed
    m = args.n_hidden
    snr = args.snr
    n = args.n_samples
    assert p % m == 0

    # generate some data
    data, _ = data_tools.generate_approximately_modular(
        nv=p,
        m=m,
        ns=n,
        snr=snr,
        num_extra_parents=args.num_extra_parents,
        num_correlated_zs=args.num_correlated_zs,
        random_scale=args.random_scale)

    # select the method
    if args.method == 'linearcorex':
        method = linearcorex.Corex(n_hidden=m, verbose=1)
    else:
        method = PyCorex(nv=p,
                         n_hidden=m,
                         verbose=2,
                         max_iter=10000,
                         tol=1e-6,
                         device=args.device,
                         optimizer_class=torch.optim.Adam,
                         optimizer_params={'lr': 0.01})

    # train and compute the clustering score
    true_clusters = np.arange(m).repeat(p // m, axis=0)
    method.fit(data)

    if args.method == 'linearcorex':
        pred_clusters = method.mis.argmax(axis=0)
    else:
        pred_clusters = method.clusters()

    score = adjusted_rand_score(labels_true=true_clusters,
                                labels_pred=pred_clusters)
    print(pred_clusters, score)

    # save the results
    run_id = str(np.random.choice(10**18))

    save_dict = {
        'p': p,
        'm': m,
        'snr': snr,
        'n': n,
        'num_extra_parents': args.num_extra_parents,
        'num_correlated_zs': args.num_correlated_zs,
        'random_scale': args.random_scale,
        'method': args.method,
        'score': score,
        'run_id': run_id
    }

    output_file = os.path.join(args.output_dir, run_id + '.pkl')
    make_sure_path_exists(output_file)
    with open(output_file, 'wb') as fout:
        pickle.dump(save_dict, fout)
    def __perform_corex(self):
        y = lc.Corex(n_hidden=2).fit_transform(self.embedding)
        y = MinMaxScaler().fit_transform(y).transpose()

        return y