def get_2d_coordinates_tsne(multinet, output_format="json", verbose=True):

    embedding = multinet.embedding
    X = embedding[0]
    indices = embedding[1]
    if verbose:
        multinet.monitor("Doing the TSNE reduction to 2 dimensions!")
    if parallel_tsne:
        X_embedded = TSNE(n_components=2,
                          n_jobs=mp.cpu_count()).fit_transform(X)
    else:
        X_embedded = TSNE(n_components=2).fit_transform(X)

    dfr = pd.DataFrame(X_embedded, columns=['dim1', 'dim2'])
    dfr['node_names'] = [n for n in multinet.get_nodes()]
    dfr['node_codes'] = indices

    if output_format == "json":
        return dfr.to_json(orient='records')

    elif output_format == "dataframe":
        # pure pandas dataframe
        return dfr

    elif output_format == "pos_dict":
        output_dict = {}
        for index, row in dfr.iterrows():
            output_dict[row['node_names']] = (row['dim1'], row['dim2'])
        return output_dict

    else:
        return None
Esempio n. 2
0
def compare_embeddings_in_group(config, data, n_samples, point_size=10, log=True, images_path=None):
    sample_idx = random.sample(range(len(data)), n_samples)

    raw_obs, raw_action, raw_reward = data[0]
    raw_action = torch.repeat_interleave(raw_action[1:], 2, dim=0)
    action_dim = raw_action.size(1)
    initial_episode_size = raw_action.size(0)
    actual_episode_size = initial_episode_size - (initial_episode_size % config.traj_len)

    rewards_ak = []
    embeddings_ak = []
    for k in sample_idx:
        raw_obs, raw_action, raw_reward = data[k]
        raw_embeddings = torch.repeat_interleave(raw_action[1:], 2, dim=0)[:actual_episode_size] \
            .reshape([actual_episode_size // config.traj_len,
                      config.traj_len * action_dim])

        rewards = torch.repeat_interleave(raw_reward[1:]/2, 2, dim=0)[:actual_episode_size] \
            .reshape([actual_episode_size // config.traj_len,
                      config.traj_len]).sum(axis=1)
        for idx, vector in enumerate(raw_embeddings):
            embeddings_ak.append(vector.numpy())
            rewards_ak.append(rewards[idx])

    tsne_ak_2d = TSNE(perplexity=30, n_components=2, init='random', n_iter=3500, random_state=32, n_jobs=8)
    embeddings_ak_2d = tsne_ak_2d.fit_transform(np.array(embeddings_ak))

    dyne_emb_ak = []
    for k in sample_idx:
        mu, logvar, _ = data.transform_episode(k)
        for idx, vector in enumerate(mu):
            dyne_emb_ak.append(vector.numpy())

    tsne_dyne_ak_2d = TSNE(perplexity=30, n_components=2, init='random', n_iter=3500, random_state=32, n_jobs=8)
    embeddings_dyne_ak_2d = tsne_dyne_ak_2d.fit_transform(np.array(dyne_emb_ak))

    fig, axes = plt.subplots(2, 2, figsize=(20, 20), constrained_layout=True)
    tsne_plot_2d('raw actions by time'.format(config.env), embeddings_ak_2d,
                 color_style='time', rewards=rewards_ak, size=point_size,
                 log=False, ax=axes[0][0], episodes_num=n_samples)

    tsne_plot_2d('dyne actions by time', embeddings_dyne_ak_2d,
                 color_style='time', rewards=rewards_ak, size=point_size,
                 log=False, ax=axes[0][1], episodes_num=n_samples)

    tsne_plot_2d('raw actions by rewards', embeddings_ak_2d,
                 color_style='rewards', rewards=rewards_ak, size=point_size,
                 log=False, ax=axes[1][0], episodes_num=n_samples)

    tsne_plot_2d('dyne actions by rewards', embeddings_dyne_ak_2d,
                 color_style='rewards', rewards=rewards_ak, size=point_size,
                 log=False, ax=axes[1][1], episodes_num=n_samples)

    fig.suptitle("{}_DynE-{}".format(config.env, config.traj_len), fontsize=16)
    fig.savefig(images_path / "{}_emb_comparison_{}_samples.png".format(config.env, config.n_samples),
                format='png', dpi=150, bbox_inches='tight')
    if log:
        wandb.log({'Embeddings Comparison': wandb.Image(fig)})
Esempio n. 3
0
def create_compute_tsne_components_function(input_dim, target_dim,
                                            save_folder):
    # Get t-SNE function
    tsne = TSNE(n_jobs=4)
    if False:
        tsne = TSNE(n_jobs=number_of_jobs,
                    n_components=target_dim,
                    perplexity=perplexity,
                    early_exaggeration=early_exaggeration,
                    learning_rate=learning_rate,
                    n_iter=n_iter,
                    n_iter_without_progress=n_iter_without_progress,
                    min_grad_norm=min_grad_norm,
                    metric=metric,
                    init=init,
                    verbose=verbose,
                    random_state=random_state,
                    method=method,
                    angle=angle)

    # Create the function which we are interested in
    def compute_tsne_components(in_feature_file_path, out_feature_file_path):
        xprint("Processing '{}'...".format(in_feature_file_path))
        with open(in_feature_file_path, "rb") as in_feature_file:
            # Read features from file
            features = np.reshape(
                np.fromfile(in_feature_file, dtype=feature_dtype),
                (-1, input_dim))

            # Compute t-SNE components
            components = tsne.fit_transform(features)

            xprint("features.shape:", features.shape)
            xprint("components.shape:", components.shape)

            if procuce_output_files:
                xprint("Creating '{}'...".format(out_feature_file_path))
                with open(out_feature_file_path, "wb") as out_feature_file:
                    # Write standardized features to file
                    components.tofile(out_feature_file)

            if produce_plots:
                create_plots(components,
                             title=os.path.basename(in_feature_file_path),
                             total_variance=np.sum(np.var(features, axis=0)),
                             save_folder=save_folder)

    return compute_tsne_components
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(description='main function parser')
    parser.add_argument('--path', type=str, help='load file path', required=True)
    parser.add_argument('--dump_dir', type=str, help='dump directory', default=None)
    parser.add_argument('--size', type=int, default=1000, help='embedding vector size')
    args = parser.parse_args()

    embeddings, labels = load(args.path)
    embeddings = np.array(embeddings)

    output = args.path.split('/')[-1]
    # # UMAP
    n_neighbors = [15] #, 35, 55, 75]
    min_dists = [0.1] #0.001, 0.01, 0.1]
    for min_dist in min_dists:
        for n_neighbor in n_neighbors:
            start = time.time()
            weights = umap.UMAP(n_neighbors=n_neighbor, min_dist=min_dist).fit_transform(embeddings)
            finish = time.time()
            print(f'time: {finish-start} s', flush=True)
            os.makedirs(f'graph/umap/{output}', exist_ok=True)
            show(weights, labels, f'graph/umap/{output}/min_dist:{min_dist}_neighbor:{n_neighbor}.svg')

    # t-SNE
    perplexities = [30] #10, 20, 30, 40, 50]
    for perplexity in perplexities:
        start = time.time()
        tsne_model = TSNE(n_components=2, perplexity=perplexity, n_jobs=10)
        weights = tsne_model.fit_transform(embeddings)
        finish = time.time()
        print(f'time: {finish-start} s', flush=True)
        os.makedirs(f'graph/tsne/{output}', exist_ok=True)
        show(weights, labels, f'graph/tsne/{output}/perplexity:{perplexity}.svg')
Esempio n. 5
0
def main(path):
    embs = pool_sentence_embs(path)
    print("Dimension",
          embs.shape)  #number sentences X BERT hidden dimension (768)
    df = pd.read_csv('master_df_ALL.csv', encoding='utf - 8', index_col=False)
    filter_name = 'Coreference'
    target = df[filter_name]
    #df['Sentences'].to_csv("master_ALL", header=None, index=False)
    #target = [1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1.] + [0. for i in range(22)]
    print(len(target))
    embeddings = TSNE(n_jobs=4, random_state=1).fit_transform(
        embs)  #t-SNE reduces 768 dimension to 2D or 3D
    vis_x = embeddings[:, 0]
    vis_y = embeddings[:, 1]
    plt.scatter(vis_x,
                vis_y,
                c=target,
                cmap=ListedColormap(["blue", "red"]),
                marker='.',
                s=50)
    plt.title(filter_name +
              " filter (red=passed filter, blue=did not pass filter)")
    # plt.colorbar(ticks=range(10))
    # plt.clim(-0.5, 9.5)
    plt.ioff()
    #plt.show()
    plt.savefig(filter_name)
Esempio n. 6
0
def run_tSNE(natural_embed, n_jobs, perplexity):
    '''
    The GPU version requires CUDA 9.0 and install the tsnecuda package by running
    conda install tsnecuda -c cannylab

    The Multicore CPU version can be installed by running
    pip install MulticoreTSNE

    Apply t-SNE to the input data
    INPUT:
        natural_embed: 2d numpy array with size [number of points, embedding length]
        n_jobs:
        perplexity:
    OUTPUT:
        natural_2d: 2d numpy array with size [number of points, 2]
        adversary_2d: 2d numpy array with size [number of points, 2]
    '''
    X = natural_embed

    # CPU Sklearn
    # from sklearn.manifold import TSNE
    # tsne = TSNE(perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine')
    # X_embedded = tsne.fit_transform(X)

    # CPU
    from MulticoreTSNE import MulticoreTSNE as TSNE
    tsne = TSNE(n_jobs=n_jobs, perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine')
    X_embedded = tsne.fit_transform(X)

    # GPU
    # from tsnecuda import TSNE
    # X_embedded = TSNE(n_components=2, perplexity=30, learning_rate=10).fit_transform(X)

    return X_embedded
Esempio n. 7
0
def classifier_choice(method='tsne', neighbors=30, dimensions=2):
    if method in "tsne":
        return TSNE(n_components=dimensions, perplexity=30, verbose=1)
    elif method in "pca":
        return decomposition.TruncatedSVD(n_components=dimensions)
    elif method in "isomap":
        return manifold.Isomap(n_neighbors=neighbors, n_components=dimensions)
    elif method in "lle":
        return manifold.LocallyLinearEmbedding(n_neighbors=neighbors,
                                               n_components=dimensions,
                                               method='standard')
    elif method in "mlle":
        return manifold.LocallyLinearEmbedding(n_neighbors=neighbors,
                                               n_components=dimensions,
                                               method='modified')
    elif method in "hlle":
        return manifold.LocallyLinearEmbedding(n_neighbors=neighbors,
                                               n_components=dimensions,
                                               method='hessian')
    elif method in "ltsa":
        return manifold.LocallyLinearEmbedding(n_neighbors=neighbors,
                                               n_components=dimensions,
                                               method='ltsa')
    elif method in "mds":
        return manifold.MDS(n_components=dimensions, n_init=1, max_iter=100)
    elif method in "trees":
        trees = ensemble.RandomTreesEmbedding(n_estimators=200, max_depth=5)
        pca = decomposition.TruncatedSVD(n_components=dimensions)
        return Pipeline([('Random Tree Embedder', trees), ('PCA', pca)])
    elif method in "spectral":
        return manifold.SpectralEmbedding(n_components=dimensions,
                                          eigen_solver="arpack")
    else:
        print('Please use valid method')
def get_data(n_cmd, n_spk, only_missed=False):
    if only_missed:
        # most popular MIS-CLASSIFIED command based on utterances count
        top_cmd = itemfreq(y_command[y_missed.astype('int32')])
        top_spk = itemfreq(y_speaker[y_missed.astype('int32')])
    else:
        top_spk = itemfreq(y_speaker)
        top_cmd = itemfreq(y_command)
    top_cmd = top_cmd[np.argsort(top_cmd[:, 1])][::-1]
    top_cmd = top_cmd[:, 0]

    # most speaker command based on utterances count
    top_spk = top_spk[np.argsort(top_spk[:, 1].astype('int32'))][::-1]
    top_spk = top_spk[:, 0]

    spk = top_spk[:n_spk]
    cmd = top_cmd[:n_cmd]
    ids = get_indices(speaker_set=spk, command_set=cmd)
    if only_missed:
        ids = np.array([i for i in ids if i in y_missed], dtype='int32')

    y_cmd = y_command[ids]
    y_spk = y_speaker[ids]

    z_org = Z_original[ids]
    z_max = Z_maximize[ids]

    tsne = TSNE(random_state=SEED)
    t = tsne.fit_transform(np.concatenate((z_org, z_max), axis=0))
    t_org = t[:z_org.shape[0]]
    t_max = t[z_org.shape[0]:]

    return t_org, t_max, y_cmd, y_spk
Esempio n. 9
0
def tsne_reduction(samples,
                   perplexity,
                   data=None,
                   n_components=2,
                   l_r=200,
                   dim=2,
                   ex=12,
                   iterations=5000,
                   verbosity=0):
    if (samples is None) and (data is not None):
        samples = data[:, :-1]
        targets = data[:, -1]

    # tsne = manifold.TSNE(n_components = dim, init='pca', learning_rate = l_r,
    #                         perplexity=perplexity, early_exaggeration = ex,
    #                         n_iter = iterations, random_state=data_handling.RANDOM_SEED,
    #                         verbose = verbosity)

    tsne = TSNE(n_components=dim,
                n_jobs=-1,
                learning_rate=l_r,
                perplexity=perplexity,
                early_exaggeration=ex,
                n_iter=iterations,
                random_state=data_handling.RANDOM_SEED,
                verbose=verbosity)

    reduced_samples = tsne.fit_transform(samples)

    return reduced_samples, tsne
Esempio n. 10
0
def calcTSNEMulti(data, iterations, perplexity, learning_rate):
    tsne = TSNE(n_jobs=4,
                perplexity=perplexity,
                n_iter=iterations,
                learning_rate=learning_rate)
    Y = tsne.fit_transform(data)
    return data.assign(x=Y[:, 0], y=Y[:, 1])
Esempio n. 11
0
    def train(self, parameters):

        tsne = TSNE(**parameters)
        tsne_outputs = tsne.fit_transform(self.x_train)

        utils.save_data_to_pkl(tsne_outputs,
                               tsne_outputs_path + 'tsne_outputs.p')
Esempio n. 12
0
def main(feats_path):
    with open(feats_path, 'rb') as handle:
        unpickler = pickle.Unpickler(handle)
        labels = unpickler.load()

    labels = {
        name: vector
        for name, vector in labels.items() if vector is not None
    }
    features = np.asarray(list(labels.values()))

    print('[INFO] Conducting t-SNE on ' + feats_path)
    tsne = TSNE(metric='braycurtis',
                verbose=1,
                n_iter=5000,
                random_state=42,
                n_jobs=-1)
    projection = tsne.fit_transform(features)

    # save reduced vectors
    base = path.basename(feats_path)
    name = path.splitext(base)[0]

    output = name + '_tsne.pickle'
    print('[INFO] Saving reduced vectors to ' + output)
    with open(output, 'wb') as handle:
        pickle.dump(projection, handle)
Esempio n. 13
0
def dim_red_plot(plt_type,
                 emb,
                 vocab,
                 output_dir,
                 n_components=2,
                 random_state=42):
    print(f"-- Start {plt_type} --")
    if plt_type == 'tsne':
        new_values = TSNE(n_components=n_components,
                          random_state=random_state,
                          n_jobs=10,
                          verbose=2).fit_transform(emb)
        x = []
        y = []
        for value in new_values:
            x.append(value[0])
            y.append(value[1])

    elif plt_type == 'umap':
        new_values = umap.UMAP(n_components=n_components,
                               random_state=random_state).fit_transform(emb)
        x, y = new_values[:, 0], new_values[:, 1]

    print("-- Start ploting --")
    plt.figure(figsize=(16, 16))
    plt.scatter(x, y)
    # for i in range(len(x)):
    #     plt.annotate(vocab[i], xy=(x[i], y[i]), xytext=(
    #         5, 2), textcoords="offset points", ha="right", va="bottom")
    plt.savefig(os.path.join(output_dir, f'viz/emb_{plt_type}.png'))
Esempio n. 14
0
def draw(x, y):
    from matplotlib.colors import ListedColormap
    from MulticoreTSNE import MulticoreTSNE as TSNE

    print("TSNE: fitting start...")
    tsne = TSNE(2, n_jobs=4, perplexity=30)
    Y = tsne.fit_transform(x)

    # matplotlib_axes_logger.setLevel('ERROR')
    labels = [
        'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'open'
    ]
    id_to_label = {i: label for i, label in enumerate(labels)}
    y_true = pd.Series(y)
    plt.style.use('ggplot')
    n_class = y_true.unique().shape[0]
    colors = ('gray', 'lightgreen', 'plum', 'DarkMagenta', 'SkyBlue',
              'PaleTurquoise', 'DeepPink', 'Gold', 'Orange', 'Brown',
              'DarkKhaki')

    fig, ax = plt.subplots(figsize=(9, 6), )
    la = [i for i in range(n_class)]
    la = sorted(la, reverse=True)
    cmap = ListedColormap(colors)
    for idx, label in enumerate(la):
        ix = y_true[y_true == label].index
        x = Y[:, 0][ix]
        y = Y[:, 1][ix]
        ax.scatter(x, y, c=cmap(idx), label=id_to_label[label], alpha=0.5)

    # Shrink current axis by 20%
    ax.set_title('proto_loss')
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def generate2dftEmb():
    global w2id, w, i, word, tsne, post_2d
    ####
    # Loading glove embeddings from pickle file glove_new.pickle and writing into embedding map and a text file which
    # can be used to gensim model
    ####
    file = open(finetuned_path, 'rb')
    embedding_map = pickle.load(file)
    # In[470]:
    ##########
    ##Converting glove embeddings to numpy matrix where each row contains embedding of a word.
    ##Adding words to "word to id" and "id to word" maps
    ##########
    w2id = {}
    id2w = {}
    w = np.zeros((len(embedding_map.keys()), 300))
    for i, word in enumerate(embedding_map.keys()):
        w2id[word] = i
        id2w[i] = word
        w[i] = embedding_map[word]
    # In[6]:
    ######
    ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D.
    ######
    tsne = TSNE(n_jobs=12)
    post_2d = tsne.fit_transform(w)
    # In[486]:
    return post_2d, w2id, w
def generate2dpre():
    global word, i, pre_w2id, tsne, pre_2d
    pre_vocab = []
    pre = open(pretrained_path, 'r')
    for line in pre:
        embeds = line.rstrip().split(" ")
        word = embeds[0]
        pre_vocab.append(word)
    # In[37]:
    pre_w = np.zeros((len(pre_vocab), 300))
    for i, line in enumerate(pre):
        embeds = line.rstrip().split(" ")
        word = embeds[0]
        pre_w[i, :] = embeds[1:]
    # In[ ]:
    ##########
    ##Converting pre glove embeddings to numpy matrix where each row contains embedding of a word.
    ##Adding words to "word to id" and "id to word" maps
    ##########
    pre_w2id = {}
    for i in range(len(pre_vocab)):
        pre_w2id[pre_vocab[i]] = i
    # In[39]:
    ######
    ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D.
    ######
    tsne = TSNE(n_jobs=12)
    pre_2d = tsne.fit_transform(pre_w)
    return pre_2d, pre_w2id, pre_w
Esempio n. 17
0
def plot_distribution(
        epoch,
        train,
        #  acc,
        path,
        data_x,
        #  true_y,
        pred_y,
        learning_rate=100,
        n_jobs=-1):
    print("plotting image on " + path + "...")
    if (os.path.exists(path) == False):
        os.makedirs(path)
    tsne_model = TSNE(n_components=2,
                      learning_rate=learning_rate,
                      n_jobs=n_jobs)
    #  pca_model = PCA(n_components=2)

    data_x = np.array(data_x)
    if (len(data_x.shape) > 2):
        data_temp = []
        for data in data_x:
            data_temp.append(data.rehsape(-1))
        data_x = np.array(data_temp)

    transformed = tsne_model.fit_transform(data_x)
    #  transformed = pca_model.fit_transform(data_x)
    xs = transformed[:, 0]
    ys = transformed[:, 1]

    #  draw_plot(xs, ys, train, epoch, true_y, os.path.join(path, "true_label"))
    draw_plot(xs, ys, train, epoch, pred_y, path)
Esempio n. 18
0
def tsne_executor(X, y, logger, path_logs):
    check_input_type(
        ['epi'],
        "t-SNE experiment work just with epigenetic data, {} found".format(
            config['general']['input_type']))

    cell_lines = config['general']['cell_lines']

    tasks_dict = config['general']['tasks']

    results = {}
    for t in tasks_dict:
        task_name, X_filtered, y_filtered = filter_labels(X, y, t)
        logger.debug("TASK: {}".format(task_name))

        cpus = multiprocessing.cpu_count(
        ) // 2  # we use just half of avaible cpus to not overload the machine
        logger.debug("Using {} cpus".format(cpus))

        for cl, data, labels in zip(cell_lines, X_filtered, y_filtered):
            logger.debug("Computing t-SNE for {}".format(cl))

            tsne = TSNE(perplexity=config['tsne']['perplexity'],
                        n_jobs=cpus)  # TODO: add parameters
            tsne_results = tsne.fit_transform(data)
            assert len(tsne_results) == len(labels)
            tsne_results = np.c_[
                tsne_results,
                labels]  # to save the labels with the tsne results
            results["{}_{}".format(task_name, cl)] = tsne_results

    save_tsne(path_logs, "tsne_results", results)
    if config['tsne']['save_plots']:
        plot_tsne(results, path_logs, "tsne_plot")
Esempio n. 19
0
def plot_tsne(experience=None, latent_states=None, rewards=None):
    if latent_states is None or rewards is None:
        latent_states = np.array([
            list(rssm_state.prev_state.stoch)
            for rssm_state in experience['agent_infos']
        ])
        rewards = np.array(experience['reward'])
    np.random.seed(0)
    perm = np.random.permutation(10000)
    latent_states = latent_states[perm]
    rewards = rewards[perm]
    feature_cols = ['axis_' + str(i) for i in range(latent_states.shape[1])]
    df = DataFrame(latent_states, columns=feature_cols)
    df['y'] = rewards
    time_start = time()
    tsne = TSNE(n_components=2,
                verbose=1,
                perplexity=1000,
                n_iter=1000,
                n_jobs=16)
    tsne_results = tsne.fit_transform(df[feature_cols].values)
    print('t-SNE done! Time elapsed: {} seconds'.format(time() - time_start))
    pickle.dump(tsne_results, open('tsne_results.pkl', 'wb'))
    df['tsne-2d-one'] = tsne_results[:, 0]
    df['tsne-2d-two'] = tsne_results[:, 1]
    sns.scatterplot(x="tsne-2d-one",
                    y="tsne-2d-two",
                    hue="y",
                    palette=sns.color_palette("flare", as_cmap=True),
                    data=df,
                    alpha=0.6,
                    s=5)
    plt.show()
Esempio n. 20
0
def dimensionality_reduction(X, algorithm="PCA"):
    """Reduce the dimensionality of the AISNPs
    :param X: One-hot encoded 1kG AISNPs.
    :type X: pandas DataFrame
    :param algorithm: The type of dimensionality reduction to perform.
        One of {PCA, UMAP, t-SNE}
    :type algorithm: str
    :returns: The transformed X DataFrame, reduced to 3 components by <algorithm>,
    and the dimensionality reduction Transformer object.
    """
    n_components = 3

    if algorithm == "PCA":
        reducer = PCA(n_components=n_components)
    elif algorithm == "t-SNE":
        reducer = TSNE(n_components=n_components, n_jobs=4)
    elif algorithm == "UMAP":
        reducer = umap.UMAP(n_components=n_components,
                            min_dist=0.2,
                            metric="dice",
                            random_state=42)
    else:
        return None, None

    X_reduced = reducer.fit_transform(X.values)

    return pd.DataFrame(X_reduced, columns=["x", "y", "z"],
                        index=X.index), reducer
def decompose(dimred, dim, nneigh):
    if dimred == 'MDS':  # slowest!
        embedding = MDS(n_components=dim,
                        n_init=__inits,
                        max_iter=__iters,
                        n_jobs=-1,
                        dissimilarity=__dis)
    elif dimred == 'ISOMAP':  # slow
        embedding = Isomap(n_neighbors=nneigh, n_components=dim, n_jobs=-1)
    elif dimred == 'LLE':  # slow-acceptable
        embedding = LocallyLinearEmbedding(n_neighbors=nneigh,
                                           n_components=dim,
                                           n_jobs=-1)
    elif dimred == 'TSNE':  # acceptable
        embedding = TSNE(n_components=dim,
                         n_iter=__iters,
                         metric='precomputed',
                         learning_rate=__lrate,
                         perplexity=__perplexity)
    elif dimred == 'UMAP':  # fast
        # embedding = umap.UMAP(n_neighbors=nneigh, n_components=dim, metric=__dis, min_dist=0.1)
        embedding = umap.UMAP(n_neighbors=nneigh,
                              n_components=dim,
                              min_dist=0.1)
    elif dimred == 'PCA':  # fastest!
        embedding = PCA(n_components=dim)
    else:
        raise ValueError('dimension reduction method not recognized')

    positions = embedding.fit_transform(An)
    return positions
Esempio n. 22
0
    def reduce_dim(df, algorithm='pca'):
        """Reduce the dimensionality of the 55 AISNPs
        :param X: One-hot encoded 1kG 55 AISNPs.
        :type X: pandas DataFrame
        :param algorithm: The type of dimensionality reduction to perform.
            One of {pca, umap, tsne}
        :type algorithm: str

        :returns: The transformed X DataFrame, reduced to 3 components by <algorithm>.
        """
        ncols = len(df.columns)
        ohe = OneHotEncoder(categories=[range(4)] * ncols, sparse=False)

        n_components = 3

        X = ohe.fit_transform(df.values)
        if algorithm == 'pca':
            X_red = PCA(n_components=n_components).fit_transform(X)
        elif algorithm == 'tsne':
            # TSNE, Barnes-Hut have dim <= 3
            if n_components > 3:
                print(
                    'The Barnes-Hut method requires the dimensionaility to be <= 3'
                )
                return None
            else:
                X_red = TSNE(n_components=n_components,
                             n_jobs=4).fit_transform(X)
        elif algorithm == 'umap':
            X_red = umap.UMAP(n_components=n_components).fit_transform(X)
        else:
            return None
        return pd.DataFrame(X_red, columns=['x', 'y', 'z'], index=df.index)
Esempio n. 23
0
def display_closestwords_tsnescatterplot(arg_path_to_model, word):
    model = word2vec.Word2Vec.load(arg_path_to_model)
    for i in range(len(word)):
        arr = np.empty((0, 300), dtype='f')
        word_labels = [word[i]]

        # get close words
        close_words = model.similar_by_word(word[i])

        # add the vector for each of the closest words to the array
        arr = np.append(arr, np.array([model[word[i]]]), axis=0)
        for wrd_score in close_words:
            wrd_vector = model[wrd_score[0]]
            word_labels.append(wrd_score[0])
            arr = np.append(arr, np.array([wrd_vector]), axis=0)

        # find tsne coords for 2 dimensions
        tsne = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        Y = tsne.fit_transform(arr)

        x_coords = Y[:, 0]
        y_coords = Y[:, 1]
        # display scatter plot
        plt.scatter(x_coords, y_coords)

        for label, x, y in zip(word_labels, x_coords, y_coords):
            plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')

    # Zmiana mnoznika powoduje zmiane 'przyblizenia' wykresu (mniejszy mnoznik = wieksze przyblizenie)
    plt.xlim(x_coords.min()*1, x_coords.max()*1)
    plt.ylim(y_coords.min()*1, y_coords.max()*1)
    plt.show()
Esempio n. 24
0
def tsne_image(
    features, images, img_res=64, res=4000, background_color=255,  max_feature_size=-1, labels=None, point_radius=20, n_threads=0
):
    """
    Embeds images via tsne into a scatter plot.

    Parameters
    ---------
    features: numpy array
        Features to visualize

    images: list or numpy array
        Corresponding images to features.

    img_res: int
        Resolution to embed images at

    res: int
        Size of embedding image in pixels

    background_color: float or numpy array
        Background color value

    max_feature_size: int
        If input_feature_size > max_feature_size> 0, features are first
        reduced using PCA to the desired size.

    point_radius: int
        Size of the circle for the label image.

    n_threads: int
        Number of threads to use for t-SNE


    labels: List or numpy array if provided
        Label for each image for drawing circle image.


    """
    features = np.asarray(features, dtype=np.float32)
    assert len(features.shape) == 2

    print("Starting TSNE")
    s_time = time.time()
    if 0 < max_feature_size < features.shape[-1]:
        pca = PCA(n_components=max_feature_size)
        features = pca.fit_transform(features)

    if n_threads <= 0:
        n_threads = multiprocessing.cpu_count()
    model = TSNE(n_components=2, verbose=1, random_state=0, n_jobs=n_threads)

    f2d = model.fit_transform(features)
    print("TSNE done.", (time.time() - s_time))
    print("Starting drawing.")

    x_coords = f2d[:, 0]
    y_coords = f2d[:, 1]

    return image_util.draw_images_at_locations(images, x_coords, y_coords, img_res, res, background_color, labels, point_radius)
Esempio n. 25
0
def compute_tsne(X, y, n_class=2,
                 savepath=None,
                 xlim=(-50,50), ylim=(-50,50),
                 cls_lbl=['Benign','Tumor'],
                 title=' ',PCADIM=50):

    tsne = TSNE(n_jobs=4, random_state=1337)
    #X = PCA(n_components=PCADIM).fit_transform(X)
    embs = tsne.fit_transform(X)

    plt.figure(figsize=(10,10))
    for i in range(n_class):
        inds = np.where(y == i)[0]
        plt.scatter(embs[inds, 0], embs[inds, 1], color=colors[i], marker='*', s=30)
    if xlim:
        plt.xlim(xlim[0], xlim[1])
    if ylim:
        plt.ylim(ylim[0], ylim[1])
    plt.legend(cls_lbl)
    plt.grid(b=None)
    plt.title(title)
    if savepath:
        plt.savefig(savepath, dpi=300, bbox_inches='tight')
        plt.savefig(savepath.replace('.png','.pdf'), dpi=300, bbox_inches='tight')
    else:
        plt.show()
    plt.clf()
Esempio n. 26
0
def tsne_main(args):
    verbose_print(args, f'Loaded niche labels from {args.labels}')
    labels = np.load(args.labels)

    verbose_print(args, f'Running t-SNE based on {args.proximity}')
    proximities = np.load(args.proximity)

    x_tsne = TSNE(n_components=2, n_jobs=-1, perplexity=800,
                  learning_rate=100).fit_transform(proximities)

    if args.plot:
        # Show tSNE
        for i in range(4):
            idx = np.where(labels == i)[0]
            if len(idx) == 0:
                continue
            plt.plot(x_tsne[idx, 0], x_tsne[idx, 1], '.', label=f'Cluster {i}')
        plt.legend()
        plt.show()

    # Save the t-SNE coordinates
    np.save(args.tsne, x_tsne)
    verbose_print(args, f't-SNE coordinates saved to {args.tsne}')

    verbose_print(args, f'Niche clustering done!')
Esempio n. 27
0
def calc_tsne(
    X,
    n_jobs,
    n_components,
    perplexity,
    early_exaggeration,
    learning_rate,
    random_state,
    init="random",
    n_iter=1000,
    n_iter_early_exag=250,
):
    """
    TODO: Typing
    """
    tsne = TSNE(
        n_jobs=n_jobs,
        n_components=n_components,
        perplexity=perplexity,
        early_exaggeration=early_exaggeration,
        learning_rate=learning_rate,
        random_state=random_state,
        verbose=1,
        init=init,
        n_iter=n_iter,
        n_iter_early_exag=n_iter_early_exag,
    )
    X_tsne = tsne.fit_transform(X)
    logger.info("Final error = {}".format(tsne.kl_divergence_))
    return X_tsne
def plot_conti_code_tsne():
    data = pickle.load(
        open(
            "/home/patrick/repositories/hyperspectral_phenotyping_gan/experiments_{}/generated_code_noise{}_disc{}_conti{}_epoch{}.p"
            .format(opt.dataset, opt.n_noise, opt.n_dis, opt.n_conti,
                    opt.epoch), "rb"))
    labels = np.array(data["y"]).squeeze()
    labels_unique = np.unique(labels)
    code = np.array(data["z"]).copy()
    z = np.array(data["z"]).copy()
    # print(code[0])
    # code = code[:, -5:-2]
    code = code[:, -2:]
    # print(code[0])
    # 1 / 0
    signatures = np.array(data["x"])
    tsne = TSNE(n_jobs=26, n_components=2, learning_rate=100)
    Y = tsne.fit_transform(code)

    colors = ["red", "green", "blue"]
    for idx, label in enumerate(labels_unique):
        data_tsne = Y[labels == label]
        plt.scatter(data_tsne[:, 0],
                    data_tsne[:, 1],
                    c=colors[idx],
                    alpha=0.3,
                    label=str(label))
    plt.legend()
    plt.show()
Esempio n. 29
0
def main():
    parser = argparse.ArgumentParser(description='main function parser')
    parser.add_argument('--path',
                        type=str,
                        help='load file path',
                        required=True)
    parser.add_argument('--dump_dir',
                        type=str,
                        help='dump directory',
                        default=None)
    parser.add_argument('--size',
                        type=int,
                        default=1000,
                        help='embedding vector size')
    args = parser.parse_args()

    embeddings, labels = load(args.path, args.size)

    output = args.path.split('/')[-1]
    # # UMAP
    # weights = umap.UMAP().fit_transform(embeddings)
    # show(weights, labels, 'umap.svg')

    # t-SNE
    tsne_model = TSNE(n_components=2)
    weights = tsne_model.fit_transform(embeddings)
    show(weights, labels, f'graph/{output}.svg')
Esempio n. 30
0
def tsne(codewords, label, num_of_class):
    """plot the T-SNE based on codewords and label
	Params:
	------------------
	codewords: (num_of_samples, dims_of_feature) numpy array
		codewords to be dimension reduction
	label: (num_of_samples,) numpy array
		data label
	num_of_class: int
		number of class

	Returns:
	------------------
	None
	"""
    starter_time = time.time()
    embeddings = TSNE(n_components=2, perplexity=50,
                      n_jobs=4).fit_transform(codewords)
    vis_x = embeddings[:, 0]
    vis_y = embeddings[:, 1]
    plt.scatter(vis_x,
                vis_y,
                c=label,
                cmap=plt.cm.get_cmap("jet", num_of_class),
                marker='.',
                s=100)
    plt.colorbar(ticks=range(num_of_class))
    plt.clim(-0.5, 9.5)
    print('TSNE TIME: {} seconds'.format(time.time() - starter_time))