def main(): opt = get_opt() print(opt) print("GMM: Start to %s, named: %s!" % (opt.stage, "GMM")) # dataset setup dataset = Dataset(opt, "GMM") dataset_loader = DataLoader(opt, dataset) model = GMM(opt) if opt.stage == 'train': if not opt.checkpoint == '' and os.path.exists(opt.checkpoint): load_checkpoint(model, opt.checkpoint) train_gmm(opt, dataset_loader, model) save_checkpoint( model, os.path.join(opt.checkpoint_dir, opt.name, 'gmm_trained.pth')) elif opt.stage == 'test': load_checkpoint(model, opt.checkpoint) with torch.no_grad(): test_gmm(opt, dataset_loader, model) else: raise NotImplementedError('Please input train or test stage') print('Finished %s stage, named: %s!' % (opt.datamode, opt.name))
def fit_retry(k, data, variant, init, attempt=1): if attempt >= 100: print("Failed!") g = GMM(k) g.likelihood = 0.0 return g try: if init == 'kmeans': kmeans = KMeans(k) centroids, labels = kmeans.fit(data) gmm = GMM(k, mu=centroids, variant=variant, progress=False, threshold=1e-3) else: gmm = GMM(k, variant=variant, progress=False, threshold=1e-3) gmm.fit(data) return gmm except ValueError as e: return fit_retry(k, data, variant, init, attempt=attempt + 1)
def gmm_log_plot(d, type="full", title=""): ks, ll = [], [] for i in xrange(1, 5): data = dataset.read_data(d) if type == "kmeans": gmm = GMM(i, mu=KMeans(i).fit(data)[0]) elif type == "diag": gmm = GMM(i, variant="diag") else: gmm = GMM(i) gmm.fit(data) ks.append(i) ll.append(-gmm.likelihood) print("Likelihood for k = {} => {}".format(i, gmm.likelihood)) plot_loglikelihood(ks, ll, label=type, title=title) pl.ylabel("Log Likelihood") pl.xlabel("Number of mixtures") pl.draw()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--height', '-ht', type=int, default=32) parser.add_argument('--width', '-wd', type=int, default=32) parser.add_argument('--channel', '-ch', type=int, default=1) parser.add_argument('--batch_size', '-bs', type=int, default=64) parser.add_argument('--nb_epoch', '-e', type=int, default=1000) parser.add_argument('--latent_dim', '-ld', type=int, default=2) parser.add_argument('--save_steps', '-ss', type=int, default=1) parser.add_argument('--logdir', '-log', type=str, default="../logs") parser.add_argument('--upsampling', '-up', type=str, default="deconv") parser.add_argument('--downsampling', '-down', type=str, default="stride") parser.add_argument('-lr', '--learning_rate', type=float, default=1e-4) parser.add_argument('--naef', '-naef', type=int, default=16) parser.add_argument('--distances', '-d', nargs='+', default=['mse']) parser.add_argument('--nb_components', '-c', type=int, default=4) args = parser.parse_args() args_to_csv(os.path.join(args.logdir, 'config.csv'), args) train_x = get_digits([0, 1], [4000, 10]) image_sampler = ImageSampler( target_size=(args.width, args.height), color_mode='gray' if args.channel == 1 else 'rgb', is_training=True).flow(train_x, batch_size=args.batch_size) nb_features = args.latent_dim + len(args.distances) autoencoder = AutoEncoder((args.height, args.width, args.channel), latent_dim=args.latent_dim, first_filters=args.naef, downsampling=args.downsampling, upsampling=args.upsampling, distances=args.distances) estimator = EstimationNetwork((nb_features, ), dense_units=[256, args.nb_components]) gmm = GMM(args.nb_components, nb_features) dagmm = DAGMM(autoencoder, estimator, gmm) dagmm.fit(image_sampler, nb_epoch=args.nb_epoch, save_steps=args.save_steps, logdir=args.logdir)
def gmm_with_kmeans(k, data): data = dataset.read_data(data) kmeans = KMeans(k) centroids, labels = kmeans.fit(data) # plotKMeans(data, kmeans.mu, labels) # print centroids.shape gmm = GMM(k, mu=centroids) plotMOG(data, params(gmm.fit(data)), title="GMM with KMeans likelihood={}".format(gmm.likelihood)) gmm = GMM(k) plotMOG(data, params(gmm.fit(data)), title="GMM general likelihood={}".format(gmm.likelihood)) pl.show()
def gmm_test(k, variant, data_set, title): gmm = GMM(k, variant=variant) data = dataset.read_data(name=data_set) res = gmm.fit(data) plotMOG(data, params(res, variant), title=title + " Likelihood={}".format(gmm.likelihood)) print "Log Likelihood: ", str(gmm.likelihood)
# print(counter) # if counter == 100: # break kf = KFold(n_splits=3, shuffle=True) puritites = [] pars = [10, 30, 40, 50, 60] for par in pars: print(par) r = [] pca = PCA(n_components=par) w = pca.fit_transform(hogs) for train_index, test_index in kf.split(w): print("TRAIN:", train_index, "TEST:", test_index) hog_temp_train, hog_temp_test = w[train_index], w[test_index] y_temp_train, y_temp_test = train_labels[ train_index], train_labels[test_index] gmm = GMM(n_components=10, max_iter=120) gmm.fit(hog_temp_train) predicted = gmm.predict(hog_temp_test) acc = purity_score(predicted, y_temp_test) r.append(acc) print("total purity: {}".format(acc)) r = np.array(r) puritites.append(r.mean()) print("after") acc_max_arg = np.argmax(puritites) print("best {}: {}".format(pars[acc_max_arg], puritites[acc_max_arg])) for a, c in zip(puritites, pars): print("{}: {}".format(c, a))
(train_imgs.shape[0], train_imgs.shape[1] * train_imgs.shape[2])) # train_imgs = train_imgs +128 # model = NMF(n_components=20, init='random', random_state=0, verbose=True) # W = model.fit_transform(X=train_imgs) # H = model.components_ # # # test_imgs, test_labels = load([i for i in range(10)],'testing') # W = np.array(W) # # print(W.shape) train_labels = train_labels.reshape(60000) label_gmm = [] # gmm = mixture.GaussianMixture(n_components=10, verbose=True, max_iter=120) gmm = GMM(n_components=10, max_iter=120) counter = 1 while True: pca = PCA(n_components=counter) W = pca.fit_transform(train_imgs) gmm.fit(W) l = gmm.predict(W) # for i in range(train_dss.shape[0]): # label_temp = [] # for j in range(len(train_dss[i])): # label_temp.append(l[counter]) # counter += 1 # temp = np.argmax(np.bincount(label_temp)) # label_gmm.append(temp) # label_gmm = np.array(label_gmm)