def main(): output_path = Path('../output/try2_exactly_7_times') output_path.mkdir(exist_ok=True) save_path = output_path / 'vader.ckpt' # w_train, x_train, names = read_premade(DAYS_ORDERED) w_train, x_train, names = read_data() x_train = (x_train - np.mean(x_train)) / np.std(x_train) vader = VADER(x_train=x_train, w_train=w_train, save_path=save_path, n_hidden=[128, 32], k=5, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=8, alpha=0.1) # pre-train without latent loss vader.pre_fit(n_epoch=20, verbose=True) # train with latent loss vader.fit(n_epoch=100, verbose=True) # get the clusters c = vader.cluster(x_train, w_train) # get the re-constructions p = vader.predict(x_train) print(vader.get_clusters_on_x())
def test1(): save_path = os.path.join('test_vader', 'vader.ckpt') x_train, y_train, w_train = prepare_data() # json.dump(x_train, open("x_train.json", "wb")) # json.dump(y_train, open("y_train.json", "wb")) # json.dump(w_train, open("w_train.json", "wb")) pickle.dump(x_train, open("x_train.pickle", "wb")) pickle.dump(y_train, open("y_train.pickle", "wb")) pickle.dump(w_train, open("w_train.pickle", "wb")) # Note: y_train is used purely for monitoring performance when a ground truth clustering is available. # It can be omitted if no ground truth is available. vader = VADER(x_train=x_train, w_train=w_train, y_train=y_train, save_path=save_path, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) # pre-train without latent loss vader.pre_fit(n_epoch=50, verbose=True) # train with latent loss vader.fit(n_epoch=50, verbose=True) # get the clusters c = vader.cluster(x_train) # get the re-constructions p = vader.predict(x_train)
def _fit_vader(self, X_train: ndarray, W_train: Optional[ndarray]) -> VADER: k = self.params_dict["k"] n_hidden = self.params_dict["n_hidden"] learning_rate = self.params_dict["learning_rate"] batch_size = self.params_dict["batch_size"] alpha = self.params_dict["alpha"] # noinspection PyTypeChecker vader = VADER(X_train=X_train, W_train=W_train, save_path=None, n_hidden=n_hidden, k=k, seed=self.seed, learning_rate=learning_rate, recurrent=True, batch_size=batch_size, alpha=alpha) vader.pre_fit(n_epoch=10, verbose=False) vader.fit(n_epoch=self.n_epoch, verbose=False, early_stopping_ratio=self.early_stopping_ratio, early_stopping_batch_size=self.early_stopping_batch_size) return vader
def test_vader_save_load(self): save_path = "test_vader_save_load" if os.path.exists(save_path): shutil.rmtree(save_path) X_train, W_train, y_train = generate_x_w_y(7, 400) # noinspection PyTypeChecker vader = VADER(X_train=X_train, W_train=W_train, y_train=y_train, save_path=save_path, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) vader.pre_fit(n_epoch=10, verbose=True) vader.fit(n_epoch=10, verbose=True) clustering_before_loading = vader.cluster(X_train) loaded_vader = VADER.load_model(save_path, X_train, W_train, y_train) clustering_after_loading = loaded_vader.cluster(X_train) if os.path.exists(save_path): shutil.rmtree(save_path) assert list(clustering_before_loading) == list( clustering_after_loading)
def test_vader_recur(self): X_train, W_train, y_train = generate_x_w_y(7, 400) # Note: y_train is used purely for monitoring performance when a ground truth clustering is available. # It can be omitted if no ground truth is available. # noinspection PyTypeChecker vader = VADER(X_train=X_train, W_train=W_train, y_train=y_train, save_path=None, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) # pre-train without latent loss vader.pre_fit(n_epoch=10, verbose=True) # train with latent loss vader.fit(n_epoch=10, verbose=True) # get the clusters clustering = vader.cluster(X_train) assert any(clustering) assert len(clustering) == len(X_train) # get the re-constructions prediction = vader.predict(X_train) assert prediction.shape == X_train.shape # compute the loss given the network loss = vader.get_loss(X_train) assert loss assert "reconstruction_loss" in loss assert "latent_loss" in loss assert loss["reconstruction_loss"] >= 0 assert loss["latent_loss"] >= 0
def test_vader_save_load_transfer_learning(self): save_folder = "test_vader_save_load_transfer_learning" save_path = f"{save_folder}//weights" if os.path.exists(save_folder): shutil.rmtree(save_folder) X_train, W_train, y_train = generate_x_w_y(7, 400) # noinspection PyTypeChecker vader = VADER(X_train=X_train, W_train=W_train, y_train=y_train, save_path=save_path, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) vader.pre_fit(n_epoch=10, verbose=True) vader.fit(n_epoch=10, verbose=True) clustering_before_loading = vader.cluster(X_train) X_train_ft, W_train_ft, y_train_ft = generate_x_w_y(7, 400) vader = VADER(X_train=X_train_ft, W_train=W_train_ft, y_train=y_train_ft, save_path=None, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) vader.load_weights(save_path) vader.pre_fit(n_epoch=10, verbose=True) vader.fit(n_epoch=10, verbose=True) # get the clusters clustering = vader.cluster(X_train_ft) if os.path.exists(save_folder): shutil.rmtree(save_folder) assert any(clustering) assert len(clustering) == len(X_train_ft) # get the re-constructions prediction = vader.predict(X_train_ft) assert prediction.shape == X_train_ft.shape # compute the loss given the network loss = vader.get_loss(X_train_ft) assert loss assert "reconstruction_loss" in loss assert "latent_loss" in loss assert loss["reconstruction_loss"] >= 0 assert loss["latent_loss"] >= 0
def test_vader_nonrecur(self): NUM_OF_TIME_POINTS = 7 X_train, y_train = generate_x_y_for_nonrecur(NUM_OF_TIME_POINTS, 400) # Run VaDER non-recurrently (ordinary VAE with GM prior) # noinspection PyTypeChecker vader = VADER(X_train=X_train, y_train=y_train, n_hidden=[12, 2], k=2, learning_rate=1e-3, output_activation=None, recurrent=False, batch_size=16) # pre-train without latent loss vader.pre_fit(n_epoch=10, verbose=True) # train with latent loss vader.fit(n_epoch=10, verbose=True) # get the clusters clustering = vader.cluster(X_train) assert any(clustering) assert len(clustering) == len(X_train) # get the re-constructions prediction = vader.predict(X_train) assert prediction.shape == X_train.shape # compute the loss given the network loss = vader.get_loss(X_train) assert loss assert "reconstruction_loss" in loss assert "latent_loss" in loss assert loss["reconstruction_loss"] >= 0 assert loss["latent_loss"] >= 0 # generate some samples NUM_OF_GENERATED_SAMPLES = 10 generated_samples = vader.generate(NUM_OF_GENERATED_SAMPLES) assert generated_samples assert "clusters" in generated_samples assert "samples" in generated_samples assert len(generated_samples["clusters"]) == NUM_OF_GENERATED_SAMPLES assert generated_samples["samples"].shape == (NUM_OF_GENERATED_SAMPLES, NUM_OF_TIME_POINTS)
def test_vader_transfer_learning(self): X_train, W_train, y_train = generate_x_w_y(7, 400) # noinspection PyTypeChecker vader = VADER(X_train=X_train, W_train=W_train, y_train=y_train, save_path=None, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) # pre-train without latent loss vader.pre_fit(n_epoch=10, verbose=True) # train with latent loss vader.fit(n_epoch=10, verbose=True) X_train_ft, W_train_ft, y_train_ft = generate_x_w_y(7, 400) vader.set_inputs(X_train_ft, W_train_ft, y_train_ft) # pre-train without latent loss vader.pre_fit(n_epoch=10, verbose=True) # train with latent loss vader.fit(n_epoch=10, verbose=True) # get the clusters clustering = vader.cluster(X_train_ft) assert any(clustering) assert len(clustering) == len(X_train_ft) # get the re-constructions prediction = vader.predict(X_train_ft) assert prediction.shape == X_train_ft.shape # compute the loss given the network loss = vader.get_loss(X_train_ft) assert loss assert "reconstruction_loss" in loss assert "latent_loss" in loss assert loss["reconstruction_loss"] >= 0 assert loss["latent_loss"] >= 0
def test2(): x_train, y_train = get_dete_for_seconed_test() vader = VADER(x_train=x_train, y_train=y_train, n_hidden=[12, 2], k=2, learning_rate=1e-3, output_activation=None, recurrent=False, batch_size=16) # pre-train without latent loss vader.pre_fit(n_epoch=50, verbose=True) # train with latent loss vader.fit(n_epoch=50, verbose=True) # get the clusters c = vader.cluster(x_train) # get the re-constructions p = vader.predict(x_train) # compute the loss given the network l = vader.get_loss(x_train) # generate some samples g = vader.generate(10) # compute the loss given the network l = vader.get_loss(x_train)
seed = f"{args.seed}{i}{j}" if args.seed else None # noinspection PyTypeChecker vader = VADER(X_train=input_data, W_train=input_weights, k=args.k, n_hidden=n_hidden, learning_rate=args.learning_rate, batch_size=args.batch_size, alpha=args.alpha, seed=args.seed, save_path=args.save_path, output_activation=None, recurrent=True) vader.pre_fit(n_epoch=10, verbose=False) vader.fit(n_epoch=args.n_epoch, verbose=False, early_stopping_ratio=args.early_stopping_ratio, early_stopping_batch_size=args.early_stopping_batch_size) fig = plot_loss_history(vader, model_name=f"Model #{j}") loss_history_pdf.savefig(fig) # noinspection PyTypeChecker clustering = vader.cluster(input_data, input_weights) effective_k = len(Counter(clustering)) y_pred_repeats.append(clustering) effective_k_repeats.append(effective_k) train_reconstruction_loss_repeats.append( vader.reconstruction_loss[-1]) train_latent_loss_repeats.append(vader.latent_loss[-1]) effective_k = np.mean(effective_k_repeats) num_of_clusters = round(float(effective_k)) clustering = ClusteringUtils.consensus_clustering( y_pred_repeats, num_of_clusters)
# It can be omitted if no ground truth is available. vader = VADER(X_train=X_train, W_train=W_train, y_train=y_train, save_path=save_path, n_hidden=[12, 2], k=4, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=16) # pre-train without latent loss vader.pre_fit(n_epoch=50, verbose=True) # train with latent loss vader.fit(n_epoch=50, verbose=True) # get the clusters c = vader.cluster(X_train) # get the re-constructions p = vader.predict(X_train) # compute the loss given the network l = vader.get_loss(X_train) # Run VaDER non-recurrently (ordinary VAE with GM prior) nt = int(8) ns = int(2e2) sigma = np.diag(np.repeat(2, nt)) mu1 = np.repeat(-1, nt) mu2 = np.repeat(1, nt) a1 = np.random.multivariate_normal(mu1, sigma, ns) a2 = np.random.multivariate_normal(mu2, sigma, ns)