test_loader = torch.utils.data.DataLoader(Subset(test_set, test_limit),
                                                  batch_size=args.batch_size,
                                                  shuffle=False)

    # Main body
    model = DCN(args)
    rec_loss_list, nmi_list, ari_list = solver(args, model, train_loader,
                                               test_loader)

    # X_train = X_train.to(self.device)
    # print(y_train[0])
    out = model.autoencoder(torch.FloatTensor(np.array(X_train)).to(
        args.device),
                            latent=True)
    reducer = pacmap.PaCMAP()
    X2 = reducer.fit_transform(out.cpu().detach().numpy())
    X4 = reducer.fit_transform(X_train)
    c_train = [color[int(y_train.iloc[i])] for i in range(len(y_train))]

    figure = plt.figure()
    fig, (ax1, ax2) = plt.subplots(1, 2)
    fig.suptitle('Normal vs CAC Embeddings')
    ax1.scatter(X4[:, 0], X4[:, 1], color=c_train)
    ax2.scatter(X2[:, 0], X2[:, 1], color=c_train)
    plt.savefig("normal_vs_cac.png", dpi=figure.dpi)
    # plt.show()

    # Testing
    out = model.autoencoder(torch.FloatTensor(np.array(X_test)).to(
        args.device),
Beispiel #2
0
mnist = np.load("../data/mnist_images.npy", allow_pickle=True)
mnist = mnist.reshape(mnist.shape[0], -1)
labels = np.load("../data/mnist_labels.npy", allow_pickle=True)
print("Loading data")
n_splits = [2, 5, 10]
for n in n_splits:
    skf = StratifiedKFold(n_splits=n)
    for train_index, test_index in skf.split(mnist, labels):
        X_train, X_test = mnist[train_index], mnist[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        break

    # Initialize the instance
    reducer = pacmap.PaCMAP(n_components=2,
                            n_neighbors=10,
                            MN_ratio=0.5,
                            FP_ratio=2.0,
                            random_state=20,
                            save_tree=False)

    # Fit the training set
    embedding = reducer.fit_transform(X_train)

    # Transform the test set into the same embedding space
    embedding_test = reducer.transform(X_test, basis=X_train)

    # Plot the results
    embedding_combined = np.concatenate((embedding, embedding_test))
    y = np.concatenate((y_train, y_test))
    embeddings = [embedding, embedding_test, embedding_combined]
    labelset = [y_train, y_test, y]
    titles = ['Training', 'Test', 'Combined']
Beispiel #3
0
import pacmap
import numpy as np
import matplotlib.pyplot as plt

# loading preprocessed coil_20 dataset
# you can change it with any dataset that is in the ndarray format, with the shape (N, D)
# where N is the number of samples and D is the dimension of each sample
X = np.load("../data/coil_20.npy", allow_pickle=True)
X = X.reshape(X.shape[0], -1)
y = np.load("./data/coil_20_labels.npy", allow_pickle=True)

# Initialize the pacmap instance
# Setting n_neighbors to "None" leads to an automatic parameter selection
# choice shown in "parameter" section of the README file.
# Notice that from v0.6.0 on, we rename the n_dims parameter to n_components.
embedding = pacmap.PaCMAP(n_components=2,
                          n_neighbors=None,
                          MN_ratio=0.5,
                          FP_ratio=2.0)

# fit the data (The index of transformed data corresponds to the index of the original data)
X_transformed = embedding.fit_transform(X, init="pca")

# visualize the embedding
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(X_transformed[:, 0],
           X_transformed[:, 1],
           cmap="Spectral",
           c=y,
           s=0.6)
Beispiel #4
0
def PaCMAP(data=None,
           init=None,
           n_dims=2,
           n_neighbors=10,
           MN_ratio=0.5,
           FP_ratio=2.0,
           pair_neighbors=None,
           pair_MN=None,
           pair_FP=None,
           distance="angular",
           lr=1.0,
           num_iters=450,
           verbose=False,
           intermediate=False):
    """
    Dimensionality Reduction Using Pairwise-controlled Manifold Approximation and Projectio

    Inputs
    ------
    data : np.array with the data to be reduced

    init : the initialization of the lower dimensional embedding. One of "pca" or "random", or a user-provided numpy ndarray with the shape (N, 2). Default to "random".

    n_dims :  the number of dimension of the output. Default to 2.

    n_neighbors : the number of neighbors considered in the k-Nearest Neighbor graph. Default to 10 for dataset whose
        sample size is smaller than 10000. For large dataset whose sample size (n) is larger than 10000, the default value
        is: 10 + 15 * (log10(n) - 4).

    MN_ratio :  the ratio of the number of mid-near pairs to the number of neighbors, n_MN = \lfloor n_neighbors * MN_ratio \rfloor .
     Default to 0.5.

    FP_ratio : the ratio of the number of further pairs to the number of neighbors, n_FP = \lfloor n_neighbors * FP_ratio \rfloor Default to 2.

    distance : Distance measure ('euclidean' (default), 'manhattan', 'angular',
    'hamming')


    lr : Optimization method ('sd': steepest descent,  'momentum': GD
    with momentum, 'dbd': GD with momentum delta-bar-delta (default))

    num_iters : number of iterations. Default to 450. 450 iterations is enough for most dataset to converge.

    pair_neighbors, pair_MN and pair_FP: pre-specified neighbor pairs, mid-near points, and further pairs. Allows user to use their own graphs. Default to None.

    verbose : controls verbosity (default False)

    intermediate : whether pacmap should also output the intermediate stages of the optimization process of the lower dimension embedding. If True, then the output will be a numpy array of the size (n, n_dims, 13), where each slice is a "screenshot" of the output embedding at a particular number of steps, from [0, 10, 30, 60, 100, 120, 140, 170, 200, 250, 300, 350, 450].

    random_state :
        RandomState object (default None)

    """

    try:
        import pacmap
        _have_pacmap = True
    except ImportError(
            'TriMAP is needed for this embedding. Install it with `pip install trimap`'
    ):
        return print(
            'TriMAP is needed for this embedding. Install it with `pip install trimap`'
        )

    pacmap_emb = pacmap.PaCMAP(n_dims=n_dims,
                               n_neighbors=n_neighbors,
                               MN_ratio=MN_ratio,
                               FP_ratio=FP_ratio,
                               pair_neighbors=pair_neighbors,
                               pair_MN=pair_MN,
                               pair_FP=pair_FP,
                               distance=distance,
                               lr=lr,
                               num_iters=num_iters,
                               verbose=verbose,
                               intermediate=intermediate).fit_transform(
                                   X=data, init=init)

    return pacmap_emb