예제 #1
0
def main():
    opt = get_opt()
    print(opt)
    print("GMM: Start to %s, named: %s!" % (opt.stage, "GMM"))

    # dataset setup
    dataset = Dataset(opt, "GMM")
    dataset_loader = DataLoader(opt, dataset)

    model = GMM(opt)

    if opt.stage == 'train':
        if not opt.checkpoint == '' and os.path.exists(opt.checkpoint):
            load_checkpoint(model, opt.checkpoint)
        train_gmm(opt, dataset_loader, model)
        save_checkpoint(
            model, os.path.join(opt.checkpoint_dir, opt.name,
                                'gmm_trained.pth'))
    elif opt.stage == 'test':
        load_checkpoint(model, opt.checkpoint)
        with torch.no_grad():
            test_gmm(opt, dataset_loader, model)
    else:
        raise NotImplementedError('Please input train or test stage')

    print('Finished %s stage, named: %s!' % (opt.datamode, opt.name))
def fit_retry(k, data, variant, init, attempt=1):
    if attempt >= 100:
        print("Failed!")
        g = GMM(k)
        g.likelihood = 0.0
        return g
    try:
        if init == 'kmeans':
            kmeans = KMeans(k)
            centroids, labels = kmeans.fit(data)
            gmm = GMM(k, mu=centroids, variant=variant,
                      progress=False, threshold=1e-3)
        else:
            gmm = GMM(k, variant=variant, progress=False, threshold=1e-3)
        gmm.fit(data)
        return gmm
    except ValueError as e:
        return fit_retry(k, data, variant, init, attempt=attempt + 1)
예제 #3
0
def gmm_log_plot(d, type="full", title=""):
    ks, ll = [], []
    for i in xrange(1, 5):
        data = dataset.read_data(d)
        if type == "kmeans":
            gmm = GMM(i, mu=KMeans(i).fit(data)[0])
        elif type == "diag":
            gmm = GMM(i, variant="diag")
        else:
            gmm = GMM(i)
        gmm.fit(data)
        ks.append(i)
        ll.append(-gmm.likelihood)
        print("Likelihood for k = {} => {}".format(i, gmm.likelihood))
    plot_loglikelihood(ks, ll, label=type, title=title)
    pl.ylabel("Log Likelihood")
    pl.xlabel("Number of mixtures")
    pl.draw()
예제 #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--height', '-ht', type=int, default=32)
    parser.add_argument('--width', '-wd', type=int, default=32)
    parser.add_argument('--channel', '-ch', type=int, default=1)
    parser.add_argument('--batch_size', '-bs', type=int, default=64)
    parser.add_argument('--nb_epoch', '-e', type=int, default=1000)
    parser.add_argument('--latent_dim', '-ld', type=int, default=2)
    parser.add_argument('--save_steps', '-ss', type=int, default=1)
    parser.add_argument('--logdir', '-log', type=str, default="../logs")
    parser.add_argument('--upsampling', '-up', type=str, default="deconv")
    parser.add_argument('--downsampling', '-down', type=str, default="stride")
    parser.add_argument('-lr', '--learning_rate', type=float, default=1e-4)
    parser.add_argument('--naef', '-naef', type=int, default=16)
    parser.add_argument('--distances', '-d', nargs='+', default=['mse'])
    parser.add_argument('--nb_components', '-c', type=int, default=4)

    args = parser.parse_args()

    args_to_csv(os.path.join(args.logdir, 'config.csv'), args)

    train_x = get_digits([0, 1], [4000, 10])
    image_sampler = ImageSampler(
        target_size=(args.width, args.height),
        color_mode='gray' if args.channel == 1 else 'rgb',
        is_training=True).flow(train_x, batch_size=args.batch_size)
    nb_features = args.latent_dim + len(args.distances)

    autoencoder = AutoEncoder((args.height, args.width, args.channel),
                              latent_dim=args.latent_dim,
                              first_filters=args.naef,
                              downsampling=args.downsampling,
                              upsampling=args.upsampling,
                              distances=args.distances)
    estimator = EstimationNetwork((nb_features, ),
                                  dense_units=[256, args.nb_components])
    gmm = GMM(args.nb_components, nb_features)

    dagmm = DAGMM(autoencoder, estimator, gmm)

    dagmm.fit(image_sampler,
              nb_epoch=args.nb_epoch,
              save_steps=args.save_steps,
              logdir=args.logdir)
예제 #5
0
def gmm_with_kmeans(k, data):
    data = dataset.read_data(data)
    kmeans = KMeans(k)
    centroids, labels = kmeans.fit(data)
    # plotKMeans(data, kmeans.mu, labels)
    # print centroids.shape
    gmm = GMM(k, mu=centroids)
    plotMOG(data, params(gmm.fit(data)), title="GMM with KMeans likelihood={}".format(gmm.likelihood))

    gmm = GMM(k)
    plotMOG(data, params(gmm.fit(data)),
            title="GMM general likelihood={}".format(gmm.likelihood))
    pl.show()
예제 #6
0
def gmm_test(k, variant, data_set, title):
    gmm = GMM(k, variant=variant)
    data = dataset.read_data(name=data_set)
    res = gmm.fit(data)
    plotMOG(data, params(res, variant), title=title + " Likelihood={}".format(gmm.likelihood))
    print "Log Likelihood: ", str(gmm.likelihood)
예제 #7
0
    #     print(counter)
    #     if counter == 100:
    #         break

    kf = KFold(n_splits=3, shuffle=True)
    puritites = []
    pars = [10, 30, 40, 50, 60]
    for par in pars:
        print(par)
        r = []
        pca = PCA(n_components=par)
        w = pca.fit_transform(hogs)
        for train_index, test_index in kf.split(w):
            print("TRAIN:", train_index, "TEST:", test_index)
            hog_temp_train, hog_temp_test = w[train_index], w[test_index]
            y_temp_train, y_temp_test = train_labels[
                train_index], train_labels[test_index]
            gmm = GMM(n_components=10, max_iter=120)
            gmm.fit(hog_temp_train)
            predicted = gmm.predict(hog_temp_test)
            acc = purity_score(predicted, y_temp_test)
            r.append(acc)
            print("total purity: {}".format(acc))
        r = np.array(r)
        puritites.append(r.mean())
        print("after")
    acc_max_arg = np.argmax(puritites)
    print("best {}: {}".format(pars[acc_max_arg], puritites[acc_max_arg]))
    for a, c in zip(puritites, pars):
        print("{}: {}".format(c, a))
예제 #8
0
        (train_imgs.shape[0], train_imgs.shape[1] * train_imgs.shape[2]))
    # train_imgs = train_imgs +128
    # model = NMF(n_components=20, init='random', random_state=0, verbose=True)
    # W = model.fit_transform(X=train_imgs)
    # H = model.components_
    #
    # # test_imgs, test_labels = load([i for i in range(10)],'testing')
    # W = np.array(W)
    #
    # print(W.shape)

    train_labels = train_labels.reshape(60000)

    label_gmm = []
    # gmm = mixture.GaussianMixture(n_components=10, verbose=True, max_iter=120)
    gmm = GMM(n_components=10, max_iter=120)
    counter = 1
    while True:
        pca = PCA(n_components=counter)
        W = pca.fit_transform(train_imgs)
        gmm.fit(W)
        l = gmm.predict(W)

        # for i in range(train_dss.shape[0]):
        #     label_temp = []
        #     for j in range(len(train_dss[i])):
        #         label_temp.append(l[counter])
        #         counter += 1
        #     temp = np.argmax(np.bincount(label_temp))
        #     label_gmm.append(temp)
        # label_gmm = np.array(label_gmm)