Exemple #1
0
print("Test Latent:", Z1_test.shape, Z2_test.shape, Z3_test.shape)
y_pred = np.argmax(y_pred_proba, axis=-1)
evaluate(y_true=X_test_true,
         y_pred_proba=y_pred_proba,
         labels=labels,
         title="Test set (Deep prediction)",
         path=os.path.join(EXP_DIR, 'test_deep.pdf'))
# ====== make a streamline classifier ====== #
# training PLDA
Z3_train, y_train = make_dnn_prediction(f_z3, X=train, title="TRAIN")
print("Z3_train:", Z3_train.shape, y_train.shape)
Z3_valid, y_valid = make_dnn_prediction(f_z3, X=valid, title="VALID")
print("Z3_valid:", Z3_valid.shape, y_valid.shape)
plda = PLDA(n_phi=200,
            random_state=K.get_rng().randint(10e8),
            n_iter=12,
            labels=labels,
            verbose=0)
plda.fit(np.concatenate([Z3_train, Z3_valid], axis=0),
         np.concatenate([y_train, y_valid], axis=0))
y_pred_log_proba = plda.predict_log_proba(Z3_test)
evaluate(y_true=X_test_true,
         y_pred_log_proba=y_pred_log_proba,
         labels=labels,
         title="Test set (PLDA - Latent prediction)",
         path=os.path.join(EXP_DIR, 'test_latent.pdf'))
# ====== visualize ====== #
visualize_latent_space(X_org=X_test_data,
                       X_latent=Z1_test,
                       name=X_test_name,
                       labels=X_test_true,
Exemple #2
0
X_score_tsne_pca = tsne_pca.fit_transform(X_score_pca)
# ====== tsne ====== #
tsne = TSNE(n_components=NUM_DIM,
            perplexity=30.0,
            learning_rate=200.0,
            n_iter=1000,
            random_state=SEED)
X_train_tsne = tsne.fit_transform(X_train)
X_score_tsne = tsne.fit_transform(X_score)
# ====== lda ====== #
lda = LinearDiscriminantAnalysis(n_components=NUM_DIM)
lda.fit(X_train, y_train)
X_train_lda = lda.transform(X_train)
X_score_lda = lda.transform(X_score)
# ====== plda ====== #
plda = PLDA(n_phi=NUM_DIM, random_state=SEED)
plda.fit(X_train, y_train)
X_train_plda = plda.predict_log_proba(X_train)
X_score_plda = plda.predict_log_proba(X_score)
# ====== gmm ====== #
gmm = GaussianMixture(n_components=NUM_DIM,
                      max_iter=100,
                      covariance_type='full',
                      random_state=SEED)
gmm.fit(X_train)
X_train_gmm = gmm._estimate_weighted_log_prob(X_train)
X_score_gmm = gmm._estimate_weighted_log_prob(X_score)
# ====== rbm ====== #
rbm = BernoulliRBM(n_components=NUM_DIM,
                   batch_size=8,
                   learning_rate=0.0008,
Exemple #3
0
def fast_pca(*x,
             n_components=None,
             algo='rpca',
             y=None,
             batch_size=1024,
             return_model=False,
             random_state=1234):
    """ A shortcut for many different PCA algorithms

  Parameters
  ----------
  x : {list, tuple}
    list of matrices for transformation, the first matrix will
    be used for training
  n_components : {None, int}
    number of PCA components
  algo : {'pca', 'ipca', 'ppca', 'sppca', 'plda', 'rpca'}
    different PCA algorithm:
      'ipca' - IncrementalPCA,
      'ppca' - Probabilistic PCA,
      'sppca' - Supervised Probabilistic PCA,
      'plda' - Probabilistic LDA,
      'rpca' - randomized PCA using randomized SVD
  y : {numpy.ndarray, None}
    required for labels in case of `sppca`
  batch_size : int (default: 1024)
    batch size, only used for IncrementalPCA
  return_model : bool (default: False)
    if True, return the trained PCA model as the FIRST return
  """
    batch_size = int(batch_size)
    algo = str(algo).lower()
    if algo not in ('pca', 'ipca', 'ppca', 'sppca', 'plda', 'rpca'):
        raise ValueError(
            "`algo` must be one of the following: 'pca', "
            "'ppca', 'plda', 'sppca', or 'rpca'; but given: '%s'" % algo)
    if algo in ('sppca', 'plda') and y is None:
        raise RuntimeError("`y` must be not None if `algo='sppca'`")
    x = flatten_list(x, level=None)
    x = [i[:] if i.__class__.__name__ == 'MmapData' else i for i in x]
    # ====== check input ====== #
    x_train = x[0]
    x_test = x[1:]
    input_shape = None
    if x_train.ndim > 2:  # only 2D for PCA
        input_shape = (-1, ) + x_train.shape[1:]
        new_shape = (-1, np.prod(input_shape[1:]))
        x_train = np.reshape(x_train, new_shape)
        x_test = [np.reshape(x, new_shape) for x in x_test]
        if n_components is not None:  # no need to reshape back
            input_shape = None
    # ====== train PCA ====== #
    if algo == 'sppca':
        pca = SupervisedPPCA(n_components=n_components,
                             random_state=random_state)
        pca.fit(x_train, y)
    elif algo == 'plda':
        from odin.ml import PLDA
        pca = PLDA(n_phi=n_components, random_state=random_state)
        pca.fit(x_train, y)
    elif algo == 'pca':
        pca = PCA(n_components=n_components, random_state=random_state)
        pca.fit(x_train)
    elif algo == 'rpca':
        # we copy the implementation of RandomizedPCA because
        # it is significantly faster than PCA(svd_solver='randomize')
        pca = RandomizedPCA(n_components=n_components,
                            iterated_power=2,
                            random_state=random_state)
        pca.fit(x_train)
    elif algo == 'ipca':
        pca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
        prog = Progbar(target=x_train.shape[0],
                       print_report=False,
                       print_summary=False,
                       name="Fitting PCA")
        for start, end in batching(batch_size=batch_size,
                                   n=x_train.shape[0],
                                   seed=1234):
            pca.partial_fit(x_train[start:end], check_input=False)
            prog.add(end - start)
    elif algo == 'ppca':
        pca = PPCA(n_components=n_components, random_state=random_state)
        pca.fit(x_train)
    # ====== transform ====== #
    x_train = pca.transform(x_train)
    x_test = [pca.transform(x) for x in x_test]
    # reshape back to original shape if necessary
    if input_shape is not None:
        x_train = np.reshape(x_train, input_shape)
        x_test = [np.reshape(x, input_shape) for x in x_test]
    # return the results
    if len(x_test) == 0:
        return x_train if not return_model else (pca, x_train)
    return tuple([x_train] +
                 x_test) if not return_model else tuple([pca, x_train] +
                                                        x_test)
Exemple #4
0
tsne_pca = TSNE(n_components=NUM_DIM, perplexity=30.0, learning_rate=200.0, n_iter=1000,
                random_state=SEED)
X_train_tsne_pca = tsne_pca.fit_transform(X_train_pca)
X_score_tsne_pca = tsne_pca.fit_transform(X_score_pca)
# ====== tsne ====== #
tsne = TSNE(n_components=NUM_DIM, perplexity=30.0, learning_rate=200.0, n_iter=1000,
            random_state=SEED)
X_train_tsne = tsne.fit_transform(X_train)
X_score_tsne = tsne.fit_transform(X_score)
# ====== lda ====== #
lda = LinearDiscriminantAnalysis(n_components=NUM_DIM)
lda.fit(X_train, y_train)
X_train_lda = lda.transform(X_train)
X_score_lda = lda.transform(X_score)
# ====== plda ====== #
plda = PLDA(n_phi=NUM_DIM, random_state=SEED)
plda.fit(X_train, y_train)
X_train_plda = plda.predict_log_proba(X_train)
X_score_plda = plda.predict_log_proba(X_score)
# ====== gmm ====== #
gmm = GaussianMixture(n_components=NUM_DIM, max_iter=100, covariance_type='full',
                      random_state=SEED)
gmm.fit(X_train)
X_train_gmm = gmm._estimate_weighted_log_prob(X_train)
X_score_gmm = gmm._estimate_weighted_log_prob(X_score)
# ====== rbm ====== #
rbm = BernoulliRBM(n_components=NUM_DIM, batch_size=8, learning_rate=0.0008,
                   n_iter=8, verbose=2, random_state=SEED)
rbm.fit(X_train)
X_train_rbm = rbm.transform(X_train)
X_score_rbm = rbm.transform(X_score)
Exemple #5
0
# Prediction
# ===========================================================================
y_pred_proba, Z1_test, Z2_test, Z3_test = make_dnn_prediction(
    functions=[f_pred_proba, f_z1, f_z2, f_z3], X=X_test_data, title='TEST')
print("Test Latent:", Z1_test.shape, Z2_test.shape, Z3_test.shape)
y_pred = np.argmax(y_pred_proba, axis=-1)
evaluate(y_true=X_test_true, y_pred_proba=y_pred_proba, labels=labels,
         title="Test set (Deep prediction)",
         path=os.path.join(EXP_DIR, 'test_deep.pdf'))
# ====== make a streamline classifier ====== #
# training PLDA
Z3_train, y_train = make_dnn_prediction(f_z3, X=train, title="TRAIN")
print("Z3_train:", Z3_train.shape, y_train.shape)
Z3_valid, y_valid = make_dnn_prediction(f_z3, X=valid, title="VALID")
print("Z3_valid:", Z3_valid.shape, y_valid.shape)
plda = PLDA(n_phi=200, random_state=K.get_rng().randint(10e8),
            n_iter=12, labels=labels, verbose=0)
plda.fit(np.concatenate([Z3_train, Z3_valid], axis=0),
         np.concatenate([y_train, y_valid], axis=0))
y_pred_log_proba = plda.predict_log_proba(Z3_test)
evaluate(y_true=X_test_true, y_pred_log_proba=y_pred_log_proba, labels=labels,
         title="Test set (PLDA - Latent prediction)",
         path=os.path.join(EXP_DIR, 'test_latent.pdf'))
# ====== visualize ====== #
visualize_latent_space(X_org=X_test_data, X_latent=Z1_test,
                       name=X_test_name, labels=X_test_true,
                       title="latent1")
visualize_latent_space(X_org=X_test_data, X_latent=Z2_test,
                       name=X_test_name, labels=X_test_true,
                       title="latent2")
V.plot_save(os.path.join(EXP_DIR, 'latent.pdf'))
Exemple #6
0
# ===========================================================================
# Training the PLDA
# ===========================================================================
# ====== training the LDA ====== #
if N_LDA > 0:
    print("  Fitting LDA ...")
    lda = LinearDiscriminantAnalysis(n_components=N_LDA)
    X_backend = lda.fit_transform(X=X_backend, y=y_backend)
    lda_transform = lda.transform
else:
    lda_transform = lambda x: x
# ====== training the PLDA ====== #
plda = PLDA(n_phi=N_PLDA,
            centering=True,
            wccn=True,
            unit_length=True,
            n_iter=20,
            random_state=Config.SUPER_SEED,
            verbose=2 if PLDA_SHOW_LLK else 1)
if PLDA_MAXIMUM_LIKELIHOOD:
    print("  Fitting PLDA maximum likelihood ...")
    plda.fit_maximum_likelihood(X=lda_transform(X_backend), y=y_backend)
plda.fit(X=lda_transform(X_backend), y=y_backend)
# ===========================================================================
# Now scoring
# ===========================================================================
for dsname, scores in sorted(all_vectors.items(), key=lambda x: x[0]):
    # ====== skip non scoring dataset ====== #
    if dsname not in SCORING_DATASETS:
        continue
    # ====== proceed ====== #