y_pred_proba=y_pred_proba, labels=labels, title="Test set (Deep prediction)", path=os.path.join(EXP_DIR, 'test_deep.pdf')) # ====== make a streamline classifier ====== # # training PLDA Z3_train, y_train = make_dnn_prediction(f_z3, X=train, title="TRAIN") print("Z3_train:", Z3_train.shape, y_train.shape) Z3_valid, y_valid = make_dnn_prediction(f_z3, X=valid, title="VALID") print("Z3_valid:", Z3_valid.shape, y_valid.shape) plda = PLDA(n_phi=200, random_state=K.get_rng().randint(10e8), n_iter=12, labels=labels, verbose=0) plda.fit(np.concatenate([Z3_train, Z3_valid], axis=0), np.concatenate([y_train, y_valid], axis=0)) y_pred_log_proba = plda.predict_log_proba(Z3_test) evaluate(y_true=X_test_true, y_pred_log_proba=y_pred_log_proba, labels=labels, title="Test set (PLDA - Latent prediction)", path=os.path.join(EXP_DIR, 'test_latent.pdf')) # ====== visualize ====== # visualize_latent_space(X_org=X_test_data, X_latent=Z1_test, name=X_test_name, labels=X_test_true, title="latent1") visualize_latent_space(X_org=X_test_data, X_latent=Z2_test, name=X_test_name,
# ====== tsne ====== # tsne = TSNE(n_components=NUM_DIM, perplexity=30.0, learning_rate=200.0, n_iter=1000, random_state=SEED) X_train_tsne = tsne.fit_transform(X_train) X_score_tsne = tsne.fit_transform(X_score) # ====== lda ====== # lda = LinearDiscriminantAnalysis(n_components=NUM_DIM) lda.fit(X_train, y_train) X_train_lda = lda.transform(X_train) X_score_lda = lda.transform(X_score) # ====== plda ====== # plda = PLDA(n_phi=NUM_DIM, random_state=SEED) plda.fit(X_train, y_train) X_train_plda = plda.predict_log_proba(X_train) X_score_plda = plda.predict_log_proba(X_score) # ====== gmm ====== # gmm = GaussianMixture(n_components=NUM_DIM, max_iter=100, covariance_type='full', random_state=SEED) gmm.fit(X_train) X_train_gmm = gmm._estimate_weighted_log_prob(X_train) X_score_gmm = gmm._estimate_weighted_log_prob(X_score) # ====== rbm ====== # rbm = BernoulliRBM(n_components=NUM_DIM, batch_size=8, learning_rate=0.0008, n_iter=8,
# =========================================================================== y_pred_proba, Z1_test, Z2_test, Z3_test = make_dnn_prediction( functions=[f_pred_proba, f_z1, f_z2, f_z3], X=X_test_data, title='TEST') print("Test Latent:", Z1_test.shape, Z2_test.shape, Z3_test.shape) y_pred = np.argmax(y_pred_proba, axis=-1) evaluate(y_true=X_test_true, y_pred_proba=y_pred_proba, labels=labels, title="Test set (Deep prediction)", path=os.path.join(EXP_DIR, 'test_deep.pdf')) # ====== make a streamline classifier ====== # # training PLDA Z3_train, y_train = make_dnn_prediction(f_z3, X=train, title="TRAIN") print("Z3_train:", Z3_train.shape, y_train.shape) Z3_valid, y_valid = make_dnn_prediction(f_z3, X=valid, title="VALID") print("Z3_valid:", Z3_valid.shape, y_valid.shape) plda = PLDA(n_phi=200, random_state=K.get_rng().randint(10e8), n_iter=12, labels=labels, verbose=0) plda.fit(np.concatenate([Z3_train, Z3_valid], axis=0), np.concatenate([y_train, y_valid], axis=0)) y_pred_log_proba = plda.predict_log_proba(Z3_test) evaluate(y_true=X_test_true, y_pred_log_proba=y_pred_log_proba, labels=labels, title="Test set (PLDA - Latent prediction)", path=os.path.join(EXP_DIR, 'test_latent.pdf')) # ====== visualize ====== # visualize_latent_space(X_org=X_test_data, X_latent=Z1_test, name=X_test_name, labels=X_test_true, title="latent1") visualize_latent_space(X_org=X_test_data, X_latent=Z2_test, name=X_test_name, labels=X_test_true, title="latent2") V.plot_save(os.path.join(EXP_DIR, 'latent.pdf'))
random_state=SEED) X_train_tsne_pca = tsne_pca.fit_transform(X_train_pca) X_score_tsne_pca = tsne_pca.fit_transform(X_score_pca) # ====== tsne ====== # tsne = TSNE(n_components=NUM_DIM, perplexity=30.0, learning_rate=200.0, n_iter=1000, random_state=SEED) X_train_tsne = tsne.fit_transform(X_train) X_score_tsne = tsne.fit_transform(X_score) # ====== lda ====== # lda = LinearDiscriminantAnalysis(n_components=NUM_DIM) lda.fit(X_train, y_train) X_train_lda = lda.transform(X_train) X_score_lda = lda.transform(X_score) # ====== plda ====== # plda = PLDA(n_phi=NUM_DIM, random_state=SEED) plda.fit(X_train, y_train) X_train_plda = plda.predict_log_proba(X_train) X_score_plda = plda.predict_log_proba(X_score) # ====== gmm ====== # gmm = GaussianMixture(n_components=NUM_DIM, max_iter=100, covariance_type='full', random_state=SEED) gmm.fit(X_train) X_train_gmm = gmm._estimate_weighted_log_prob(X_train) X_score_gmm = gmm._estimate_weighted_log_prob(X_score) # ====== rbm ====== # rbm = BernoulliRBM(n_components=NUM_DIM, batch_size=8, learning_rate=0.0008, n_iter=8, verbose=2, random_state=SEED) rbm.fit(X_train) X_train_rbm = rbm.transform(X_train) X_score_rbm = rbm.transform(X_score) # ===========================================================================
X_backend = lda.fit_transform(X=X_backend, y=y_backend) lda_transform = lda.transform else: lda_transform = lambda x: x # ====== training the PLDA ====== # plda = PLDA(n_phi=N_PLDA, centering=True, wccn=True, unit_length=True, n_iter=20, random_state=Config.SUPER_SEED, verbose=2 if PLDA_SHOW_LLK else 1) if PLDA_MAXIMUM_LIKELIHOOD: print(" Fitting PLDA maximum likelihood ...") plda.fit_maximum_likelihood(X=lda_transform(X_backend), y=y_backend) plda.fit(X=lda_transform(X_backend), y=y_backend) # =========================================================================== # Now scoring # =========================================================================== for dsname, scores in sorted(all_vectors.items(), key=lambda x: x[0]): # ====== skip non scoring dataset ====== # if dsname not in SCORING_DATASETS: continue # ====== proceed ====== # print("Scoring:", ctext(dsname, 'yellow')) # load the scores (seg_name, seg_meta, seg_path, seg_data) = (scores['name'], scores['y'], scores['path'], scores['X']) name_2_data = {i: j for i, j in zip(seg_name, seg_data)} name_2_ext = { i: '' if j is None else os.path.splitext(j)[-1]