Esempio n. 1
0
def display_latent_code(
    latent_code: np.ndarray, labels: np.ndarray, title: str, seed: int
) -> None:
    """
    Plots the computed latent code representation for the features.

    Parameters
    ----------
    latent_code: np.ndarray
        The latent code representation for features.
    labels: np.ndarray
        The labels for the dataset features.
    title: str
        The plot title to use.
    seed: int
        The pseudorandom seed to use for reproducible t-SNE visualization.
    """
    tsne_encoder = TSNE(random_seed=seed, perplexity=50, learning_rate=10, n_iter=5000)
    latent_code = tsne_encoder.fit_transform(latent_code)
    sns.set_style("darkgrid")
    plt.scatter(latent_code[:, 0], latent_code[:, 1], c=labels, marker="o")
    plt.title(title)
    plt.grid()
    plt.savefig(fname=f"data/{title}.png", dpi=150)
    plt.show()
Esempio n. 2
0
def apply_tsne(act, perplexity, n_iter):
    time_start = time.time()
    tsne = TSNE(n_components=2, verbose=1, perplexity=perplexity, n_iter=n_iter)
    tsne_results = tsne.fit_transform(act)
    # del act
    print(f"Time elapsed: {time.time() - time_start} seconds")
    return tsne_results
Esempio n. 3
0
def compute_tsne(X, plot=True):
    tsne_model = TSNE(n_components=2,
                      perplexity=40,
                      learning_rate=100,
                      verbose=10)
    tsne_Y = tsne_model.fit_transform(tsne_vectors)
    if plot:
        fig = plt.figure(figsize=(10, 10))
        ax = fig.gca()
        ax.scatter(tsne_Y[:, 1], tsne_Y[:, 0], c=tsne_labels, s=1, cmap='hsv')
Esempio n. 4
0
def write_tsne(label_dict, embedding, extension="png", tsne_model=None):
    """
    base:https://medium.com/analytics-vidhya/super-fast-tsne-cuda-on-kaggle-b66dcdc4a5a4
    """
    if tsne_model is None:
        tsne_model = TSNE()

    x_embedding = tsne_model.fit_transform(embedding)

    for key in label_dict:
        label = label_dict[key]
        embedding_and_label = pd.concat([pd.DataFrame(x_embedding), pd.DataFrame(data=label,columns=["label"])], axis=1)
        sns.FacetGrid(embedding_and_label, hue="label", height=6).map(plt.scatter, 0, 1).add_legend()
        plt.savefig("{}.{}".format(key,extension))
        plt.clf()
    plt.close('all')
Esempio n. 5
0
def reduce_dimensions(model, perplexity):
    num_dimensions = 2  # final num dimensions (2D, 3D, etc)

    vectors = []  # positions in vector space
    labels = []  # keep track of words to label our data again later
    for word in model.wv.vocab:
        vectors.append(model.wv[word])
        labels.append(word)

    # convert both lists into numpy vectors for reduction
    vectors = np.asarray(vectors)
    labels = np.asarray(labels)

    # reduce using t-SNE
    vectors = np.asarray(vectors)
    tsne = TSNE(n_components=num_dimensions, perplexity=perplexity)
    vectors = tsne.fit_transform(vectors)

    x_vals = [v[0] for v in vectors]
    y_vals = [v[1] for v in vectors]

    print(x_vals[:5])
    return x_vals, y_vals, labels
Esempio n. 6
0
 def apply_tsne(j):
     idx, md5, x = j
     tsne = TSNE(**kwargs)
     return (idx, md5, tsne.fit_transform(x))
Esempio n. 7
0
# the predictions would have a hazy circle, since the best guess would be the
# mean of all the rotated digits. Since we don't rotate our view2 images, we
# instead get something that's only a bit hazy around the edges -- corresonding
# to the mean of all the non-rotated digits.

# Next let's visualize our 20d test embeddings with T-SNE and see if they
# represent our original underlying representation -- the digits from 0-9 -- of
# which we made two views of. In the perfect scenario, each of the 10,000
# vectors of our test embedding would be one of ten vectors, representing the
# digits from 0-9. (Our network wouldn't do this, as it tries to reconstruct
# each unique view1 image exactly). In lieu of this we can hope for embedding
# vectors corresponding to the same digits to be closer together.


tsne = TSNE()
tsneEmbeddings = tsne.fit_transform(testEmbed)


def plot2DEmbeddings(embeddings, labels):
    pointColors = []
    origColors = [
        [55, 55, 55], [255, 34, 34], [38, 255, 38],
        [10, 10, 255], [255, 12, 255], [250, 200, 160],
        [120, 210, 180], [150, 180, 205], [210, 160, 210],
        [190, 190, 110]
        ]
    origColors = (np.array(origColors)) / 255
    for l in labels.cpu().numpy():
        pointColors.append(tuple(origColors[l].tolist()))

    fig, ax = plt.subplots()
Esempio n. 8
0
    data = []
    target = []
    subdirectories = {
        dir_: idx
        for (idx, dir_) in enumerate(os.listdir(bboxForTsne_path))
        if os.path.isdir(os.path.join(bboxForTsne_path, dir_))
    }
    for sub_ in subdirectories.keys():
        for img_ in imread_collection(
                os.path.join(bboxForTsne_path, sub_) + '/*.jpg'):
            target.append(sub_)
            data.append(resize(img_, (300, 300)).ravel())

    return np.array(data), target, subdirectories


data, target, target_num = load_images_from_subdirectories()

tsne = TSNE()
data_tsne = tsne.fit_transform(data)

#np.save('tsne.npy', tsne)

for x, y, tg in zip(data_tsne[:, 0], data_tsne[:, 1], target):
    plt.scatter(x, y, c=color[target_num[tg]])

plt.xlim(data_tsne[:, 0].min(), data_tsne[:, 0].max())  # 최소, 최대
plt.ylim(data_tsne[:, 1].min(), data_tsne[:, 1].max())  # 최소, 최대
plt.xlabel('t-SNE 특성0')  # x축 이름
plt.ylabel('t-SNE 특성1')  # y축 이름
plt.savefig('./result.png')
Esempio n. 9
0
    y_s = y_s.cuda()
    x_t = x_t.cuda()

    optimizer.zero_grad()
    # optimizer_ad.zero_grad()

    ########### Networks Forward Propagation
    f_s, p_s = model(x_s)
    f_t, p_t = model(x_t)
    features = torch.cat((f_s, f_t), dim=0)
    outputs = torch.cat((p_s, p_t), dim=0)
    loss = nn.CrossEntropyLoss()(outputs.narrow(0, 0, x_s.size(0)), y_s)

    ### TSNE
    tsne_model = TSNE(learning_rate=100)
    transformed = tsne_model.fit_transform(f_s.detach())
    # transformed = tsne_model.fit_transform(f_s.detach().cpu())

    X_embedded = TSNE(n_components=2, perplexity=15,
                      learning_rate=10).fit_transform(f_s.detach())

    xs = transformed[:, 0]
    ys = transformed[:, 1]

    fig = plt.figure()
    plt.scatter(xs, ys, c=y_s.cpu())
    fig.savefig('f_s_tsne.png')

    pdb.set_trace()

    writer.add_figure('f_s_tsne', fig, niter)
Esempio n. 10
0
    embed_set = "embedding"
    embed_dir = "stored_data/embeddings"
    embed_dir = os.path.join(embed_dir, embed_name, "embeddings")
    create_folder(embed_dir)
    fig_dir = os.path.join(embed_dir, "figures")
    create_folder(fig_dir)

    df_emb, embeddings = calculate_embedding(test_dataset, emb_model,
                                             savedir=os.path.join(embed_dir, embed_set), concatenate="append")
    print(embeddings.mean())
    print(embeddings.var())
    embeddings = sklearn.preprocessing.StandardScaler().fit_transform(embeddings.reshape(embeddings.shape[0], -1))
    print("normalized")
    print(embeddings.mean())
    print(embeddings.var())
    df_emb = df_emb.fillna("")
    tsne = TSNE()
    tsne_emb = tsne.fit_transform(X=embeddings.reshape(embeddings.shape[0], -1))
    tsne_plots(tsne_emb, df_emb, savefig=os.path.join(fig_dir, embed_set))
    scater_valid_rat = scatter_ratio(embeddings.reshape(embeddings.shape[0], -1), df_emb.reset_index())
    silhouette_valid_score = sklearn.metrics.silhouette_score(
        embeddings.reshape(embeddings.shape[0], -1), df_emb.event_labels, metric='euclidean')
    LOG.info("Valid silhouette for all classes in 2D (tsne) : {}".format(
        sklearn.metrics.silhouette_score(df_emb[["X", "Y"]], df_emb.event_labels, metric='euclidean')))

    embed_dir = "stored_data/embeddings"
    embed_dir = os.path.join(embed_dir, embed_name)
    create_folder(embed_dir)
    np.save(os.path.join(embed_dir, "embed" + str(epoch_model)), embeddings)
    test_fr.to_csv(os.path.join(embed_dir, "df" + str(epoch_model)), sep="\t", index=False)
Esempio n. 11
0
def tsne_grid(val_loader, model):
    # Generate t-sne-based matrix of images
    features = []
    images = []
    for i, (input, _, index, names) in enumerate(val_loader):
        index = index.cuda(async=True)
        input_var = torch.autograd.Variable(input)
        index_var = torch.autograd.Variable(index)

        # compute output
        feature = model(input_var)
        feature = feature.cpu()

        for i in range(feature.data.numpy().shape[0]):
            images.append(input.numpy()[i, ...])
            features.append(feature.data.numpy()[i, :])

        print(len(features))

    print(np.array(images).shape)
    print(np.array(features).shape)
    img_collection = np.moveaxis(np.array(images), 1, -1)
    print(img_collection.shape)
    size = 45
    perplexity = 20
    tsne_iter = 5000
    print("Running tsne...")
    tsne = TSNE(perplexity=perplexity,
                n_components=2,
                init='random',
                n_iter=tsne_iter)
    X_2d = tsne.fit_transform(np.array(features)[0:size * size, :])
    print("tsne complete.  Normalizing...")
    X_2d -= X_2d.min(axis=0)
    X_2d /= X_2d.max(axis=0)
    print("Normalization complete.  Creating plot...")
    grid = np.dstack(
        np.meshgrid(np.linspace(0, 1, size),
                    np.linspace(0, 1, size))).reshape(-1, 2)
    cost_matrix = cdist(grid, X_2d, "sqeuclidean").astype(np.float32)
    cost_matrix = cost_matrix * (100000 / cost_matrix.max())
    _, row_asses, col_asses = lapjv(cost_matrix)
    grid_jv = grid[col_asses]
    out = np.ones((size * 224, size * 224, 3))

    for pos, img in zip(grid_jv, img_collection[0:size * size]):
        h_range = int(np.floor(pos[0] * (size - 1) * 224))
        w_range = int(np.floor(pos[1] * (size - 1) * 224))
        out[h_range:h_range + 224,
            w_range:w_range + 224] = image.img_to_array(img)

    print("plot complete.  Saving gridded plot...")
    im = image.array_to_img(out)
    im.save('UFL_TSNE_GRID.jpeg', quality=100)
    print("Gridded plot saved!")
    out = np.zeros((size * 224, size * 224, 3))
    for pos, img in zip(X_2d, img_collection[0:size * size]):
        h_range = int(pos[0] * (size - 1) * 224)
        w_range = int(pos[1] * (size - 1) * 224)
        out[h_range:h_range + 224,
            w_range:w_range + 224] = image.img_to_array(img)

    print("plot complete.  Saving cloud plot...")
    im = image.array_to_img(out)
    im.save('UFL_TSNE_CLOUD.jpeg', quality=100)
    print("Cloud plot saved!")
Esempio n. 12
0
                    # verbose=2)
                    verbose=rsfk_verbose)
                # t = time.time() - init_t
                t = rsfk._last_search_time  # Ignore data initialization time

                nne_rate = quality_function(real_indices,
                                            indices,
                                            real_sqd_dist,
                                            dist,
                                            max_k=K)

                time_list.append(t)
                print("RSFK Time: {}".format(t), flush=True)
                print("RSFK NNP: {}".format(nne_rate), flush=True)

                p = tsne.fit_transform(dataX, pre_knn=(indices, dist))
                tsne_nne = get_nne_rate_tsne(dataX,
                                             p,
                                             max_k=K,
                                             pre_knn=(real_sqd_dist,
                                                      real_indices))
                quality_list.append([nne_rate, tsne_nne])
                print("TSNE NNE: {}".format(tsne_nne))

                # plot_emb(p,
                #          fig_name="knn_experiment2/{}_K{}_{}trees".format(dataset_name, K, n_trees),
                #          fig_title="t-SNE result with KNN error = {}\n".format(tsne_nne)+
                #                    r"$R_{\mathrm{NX}}(K)$ = "+str(nne_rate))

                plot_emb(p,
                         fig_name="knn_experiment2/{}_K{}_{}trees".format(
Esempio n. 13
0
from keras.datasets import mnist
from tsnecuda import TSNE
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(y_train.shape)
print(x_train.shape)

tsne = TSNE(n_iter=1000, verbose=1, num_neighbors=64)
tsne_results = tsne.fit_transform(x_train.reshape(60000, -1))

print(tsne_results.shape)

# Create the figure
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(1, 1, 1, title='TSNE')

# Create the scatter
ax.scatter(x=tsne_results[:, 0],
           y=tsne_results[:, 1],
           c=y_train,
           cmap=plt.cm.get_cmap('Paired'),
           alpha=0.4,
           s=0.5)
plt.show()
Esempio n. 14
0
 def apply_tsne(j):
   idx, md5, x = j
   tsne = TSNE(**kwargs)
   return (idx, md5, tsne.fit_transform(x))
Esempio n. 15
0
def figTwoManifold(fileList, bc_1, bc_2, output_file_name="default", labels=None, gen_random=True):

    X = []
    X_indices = []
    j = 0
    # labels = []

    for i, file in enumerate(fileList):
        unique_sequences = {}
        print(file)
        # labels.append(i)

        pat_start = r"(" + bc_1 + ")"
        pat_start = re.compile(pat_start)

        pat_end = r"(" + bc_2[:-3] + ")"
        pat_end = re.compile(pat_end)

        tmp_indices = [j,j]

        c = 0
        for h,seq in readFasta(file):

            if 'N' in seq:
                continue

            search_start = pat_start.search(seq)
            search_end = pat_end.search(seq)

            if search_start and search_end:
                seq_trimmed = seq[search_start.span()[1]:search_end.span()[0]]
                # seq_full = bc_1 + seq_trimmed + bc_2
                seq_full = seq_trimmed
                if len(seq_full) > 45:
                    continue
                    # enc = encode(seq_trimmed)
                    # x_gp.append(enc)
                    # y_gp.append(i)

                # print(seq_trimmed)
                if seq_full in unique_sequences:
                    unique_sequences[seq_full] += 1
                else:
                    unique_sequences[seq_full] = 1
                    c+=1

            # if c > 100000:
            #     break

        for seq in unique_sequences:
            enc = encode(seq)
            X.append(enc)
            j+=1

        tmp_indices[1] = j
        X_indices.append(tmp_indices)

        print(len(unique_sequences))


    if gen_random:
        print("Random")
        tmp_indices = [j,j]

        for k in range(2000000):
            seq = generate_random_rna(random.randint(32, 42))

            enc = encode(seq)
            X.append(enc)
            j+=1

        tmp_indices[1] = j
        X_indices.append(tmp_indices)
        print(k)


    ## Add specials.
    print("Specials")
    tmp_indices = [j,j]

    wt_seq = encode("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
    X.append(wt_seq)
    j+=1

    selected_seq = encode("GAAGGAAGAAAATGCAGAAAAAAAGAAAAAAATGTCTGG")
    X.append(selected_seq)
    j+=1


    for h, seq in readFasta("T5_R8_hits_strict.fasta"):
        search_start = pat_start.search(seq)
        search_end = pat_end.search(seq)

        if search_start and search_end:
            seq_trimmed = seq[search_start.span()[1]:search_end.span()[0]]
            X.append(encode(seq_trimmed))
            j+=1
            print(seq_trimmed)

    tmp_indices[1] = j
    X_indices.append(tmp_indices)



    # print(X_indices[-1][0]+1)

    # exit()




    X = np.array(X)
    print(X.shape)
    print(X_indices)

    # exit()

    plt.figure(figsize=(12, 6))
    ## Look at manifolds.

    # pca = PCA(n_components=2).fit(X)
    # pca_all = TSNE(n_components=2).fit_transform(X)
    # pca = TruncatedSVD(n_components=2, n_iter=7, random_state=42).fit(X)
    # pca_all = pca.transform(X)


    # tsne = TSNE(
    # perplexity=50,
    # metric="cosine",
    # initialization="pca",
    # n_jobs=16,
    # random_state=42,
    # verbose=True
    # )
    # pca_fit = tsne.fit(X)
    # pca = pca_fit.transform(X)

    tsne = TSNE(
    perplexity=50,
    metric="euclidean",
    random_seed=42,
    verbose=True
    )
    pca = tsne.fit_transform(X)

    # pca = umap.UMAP(n_neighbors=30,
    # n_components=2,
    # random_state=42)
    # pca.fit(X)

    n_col = 5
    n_row = 2

    cmap = sns.color_palette("tab10", 8)

    for i in range(len(X_indices)-1):
        idx_grp = X_indices[i]
        plt.subplot(n_row, n_col, i+1)
        # pca_all = tsne_fit.transform(x_gp)
        # pca_all = tsne.fit_transform(x_gp, pca)

        print("Plotting indices: %d:%d" %(idx_grp[0], idx_grp[1]))

        if labels == None:
            plt.title("R%d" % (i))
        else:
            plt.title(labels[i])

        # plt.scatter(pca_all[:, 0], pca_all[:, 1], s=3, c="slategrey")
        sns.kdeplot(x=pca[idx_grp[0]:idx_grp[1], 0], y=pca[idx_grp[0]:idx_grp[1], 1], cmap="viridis", fill=True)

        # print(encode("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"))
        # wt = pca.transform(encode(bc_1 + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + bc_2).reshape(1,-1))
        # selected = pca.transform(encode(bc_1 + "GAAGGAAGAAAATGCAGAAAAAAAGAAAAAAATGTCTGG" + bc_2).reshape(1,-1))
        # wt = tsne.fit_transform(encode("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA").reshape(1,-1))
        # selected = tsne.fit_transform(encode("GAAGGAAGAAAATGCAGAAAAAAAGAAAAAAATGTCTGG").reshape(1,-1))


        wt_index = X_indices[-1][0]
        selected_index = X_indices[-1][0]+1
        plt.scatter(pca[wt_index, 0], pca[wt_index, 1], s=10, c='red')
        # plt.scatter(pca[selected_index, 0], pca[selected_index, 1], s=10, c='red', marker='D')

        # motif_matches_indices = [X_indices[-1][0]+2, X_indices[-1][1]-1]
        # print(motif_matches_indices)
        # plt.scatter(pca[motif_matches_indices[0]:motif_matches_indices[1], 0], pca[motif_matches_indices[0]:motif_matches_indices[1], 1], s=10, c='pink', marker='P')


        plt.ylim(np.min(pca[:, 0])-5, np.max(pca[:, 0])+5)
        plt.xlim(np.min(pca[:, 1])-5, np.max(pca[:, 1])+5)

    plt.tight_layout()
    # plt.show(block=True)
    plt.savefig(output_file_name + '.pdf', dpi=300, bbox_inches='tight')
Esempio n. 16
0
    'vectors': vectors,
    'labels': labels,
    'image_paths': image_paths
}, './results/{}_ep_{}.pth'.format(task_name, load_result_ep))

result_dict = torch.load('./results/{}_ep_{}.pth'.format(
    task_name, load_result_ep))
vectors = result_dict['vectors']
labels = pd.DataFrame(result_dict['labels'])
image_paths = result_dict['image_paths']

tsne_vectors = vectors
tsne_labels = labels

tsne_model = TSNE()
tsne_Y = tsne_model.fit_transform(tsne_vectors)

image_paths_modified = [
    './' + os.path.join(*i.split('/')[-3:]) for i in image_paths
]
image_paths_modified[0]

vis_df = pd.DataFrame(tsne_labels)
vis_df['tsne_x'] = tsne_Y[:, 1]
vis_df['tsne_y'] = tsne_Y[:, 0]
vis_df['plot_id'] = vis_df['plot']
vis_df['scan_date'] = vis_df['scan_date'].astype(int)
vis_df['image_path'] = image_paths_modified

vis_df.to_csv(f'./{task_name}_ep{load_result_ep}.csv')