예제 #1
0
    def tsne_plot(self,
                  labels,
                  colors,
                  filename="tsne.png",
                  show=False,
                  n_components=2,
                  perplexity=30.0,
                  early_exaggeration=12.0,
                  learning_rate=200.0,
                  n_iter=1000,
                  n_iter_without_progress=300,
                  min_grad_norm=1e-07,
                  metric='euclidean',
                  init='random',
                  verbose=0,
                  random_state=None,
                  method='barnes_hut',
                  angle=0.5,
                  n_jobs=None) -> None:
        if len(labels) != len(colors):
            raise ValueError(
                "The list of labels and colours should be the same!")
        filename_abs = os.path.join(self.embeddings_dir, filename)
        X_embedded = tsne(n_components=n_components,
                          perplexity=perplexity,
                          early_exaggeration=early_exaggeration,
                          learning_rate=learning_rate,
                          n_iter=n_iter,
                          n_iter_without_progress=n_iter_without_progress,
                          min_grad_norm=min_grad_norm,
                          metric=metric,
                          init=init,
                          verbose=verbose,
                          random_state=random_state,
                          method=method,
                          angle=angle,
                          n_jobs=n_jobs).fit_transform(self.embeddings)
        plt.figure(figsize=(6, 5))

        for i, label in enumerate(self.labels):
            label = label.numpy()
            plt.scatter(X_embedded[i, 0],
                        X_embedded[i, 1],
                        c=colors[label],
                        label=int(label))
        lines = []

        for i, color in enumerate(colors):
            line = Line2D(range(1),
                          range(1),
                          color="white",
                          marker='o',
                          markerfacecolor=color)
            lines.append(line)

        plt.legend(lines, labels, numpoints=1, loc=1)
        plt.savefig(fname=filename_abs, format='png')
        if show:
            plt.show()
예제 #2
0
def project(embeddings, tokens, selectedTokens):
    print "Running tsne"

    projected = tsne().fit_transform(embeddings)
    colors = np.array([t in selectedTokens for t in tokens])

    plt.figure()
    plt.scatter(projected[:, 0], projected[:, 1], c=colors)
    plt.savefig("../results/embeddings/projection")
예제 #3
0
def project(embeddings, tokens):
	print "Running tsne"

	projected = tsne().fit_transform(embeddings)

	plt.figure()
	plt.scatter(projected[:, 0], projected[:, 1])
	for label, x, y in zip(tokens, projected[:, 0], projected[:, 1]):
		plt.annotate(label, xy=(x, y), xytext=(-10, -10), textcoords='offset points', size='x-small')
	plt.savefig("../results/embeddings/projection")
예제 #4
0
def visualize_clusters(X, all_labels):
    ''' '''

    # sort of like "unique"
    uniq_labels = list(set(all_labels))
    all_labels_indexed = [uniq_labels.index(x) for x in all_labels]

    tsne_output_file = os.path.join(c3d_feature_dir, 'Y_tsne_dog_cat.npy')
    tsne_output_txt_file = os.path.join(c3d_feature_dir, 'Y_tsne_dog_cat.txt')
    if os.path.isfile(tsne_output_file):
        #Y = np.load(tsne_output_file)
        Y = np.load(tsne_output_file)
    else:
        tsne_model = tsne(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        Y = tsne_model.fit_transform(X)
        np.save(tsne_output_file, Y)
        np.savetxt(tsne_output_txt_file, Y)

    #pylab.scatter(Y[:,0], Y[:,1], 20, all_labels_indexed);
    #pylab.savefig('tsne_video_clusters.png')
    #pylab.show()

    for count, label in enumerate(uniq_labels):
        # skip "both" label
        if label == 'both':
            continue
        this_label_ind = np.where(all_labels == label)[0]
        #pylab.scatter(Y[this_label_ind,0], Y[this_label_ind,1], 20, count)
        print "label={}, len(this_label_ind)={}".format(
            label, len(this_label_ind))
        if count < 6:
            marker = 'o'
        else:
            marker = 'v'

        if count == 1:
            color = 'g'
        elif count == 2:
            color = 'r'

        pylab.plot(Y[this_label_ind, 0],
                   Y[this_label_ind, 1],
                   marker=marker,
                   linestyle='',
                   ms=6,
                   label=label,
                   color=color)
    pylab.legend(numpoints=1, loc='upper left')
    pylab.xlim([-23, 17])
    pylab.ylim([-16, 16])
    pylab.grid()
    #pylab.xlim([-80, 60])
    pylab.savefig('tsne_cat_dog_clusters.png')
    pylab.show()
예제 #5
0
def genrate_tsne(high_dim_repr, seed=4, perplexity=30):
    print('compute tsne with perplexity {} and seed {}'.format(
        perplexity, seed))
    tsne_components = tsne(n_components=2,
                           perplexity=perplexity,
                           random_state=seed)
    transformed = tsne_components.fit_transform(high_dim_repr)
    df = pd.DataFrame()

    df['c1'] = transformed[:, 0]
    df['c2'] = transformed[:, 1]
    return df
예제 #6
0
def sk_tsne():

    X_true = np.load('examples/123/true2.npy')  #[0:500]
    from scipy import spatial
    D = spatial.distance_matrix(X_true, X_true)

    from sklearn.manifold import TSNE as tsne
    X_embedded = tsne(n_components=2, verbose=2,
                      method='exact').fit_transform(X_true)
    plt.figure()
    plt.plot(X_embedded[:, 0], X_embedded[:, 1], 'o')
    plt.show()
def visualize_clusters(X, all_labels):
    """ """

    # sort of like "unique"
    uniq_labels = list(set(all_labels))
    all_labels_indexed = [uniq_labels.index(x) for x in all_labels]

    tsne_output_file = os.path.join(c3d_feature_dir, "Y_tsne_dog_cat.npy")
    tsne_output_txt_file = os.path.join(c3d_feature_dir, "Y_tsne_dog_cat.txt")
    if os.path.isfile(tsne_output_file):
        # Y = np.load(tsne_output_file)
        Y = np.load(tsne_output_file)
    else:
        tsne_model = tsne(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        Y = tsne_model.fit_transform(X)
        np.save(tsne_output_file, Y)
        np.savetxt(tsne_output_txt_file, Y)

    # pylab.scatter(Y[:,0], Y[:,1], 20, all_labels_indexed);
    # pylab.savefig('tsne_video_clusters.png')
    # pylab.show()

    for count, label in enumerate(uniq_labels):
        # skip "both" label
        if label == "both":
            continue
        this_label_ind = np.where(all_labels == label)[0]
        # pylab.scatter(Y[this_label_ind,0], Y[this_label_ind,1], 20, count)
        print "label={}, len(this_label_ind)={}".format(label, len(this_label_ind))
        if count < 6:
            marker = "o"
        else:
            marker = "v"

        if count == 1:
            color = "g"
        elif count == 2:
            color = "r"

        pylab.plot(
            Y[this_label_ind, 0], Y[this_label_ind, 1], marker=marker, linestyle="", ms=6, label=label, color=color
        )
    pylab.legend(numpoints=1, loc="upper left")
    pylab.xlim([-23, 17])
    pylab.ylim([-16, 16])
    pylab.grid()
    # pylab.xlim([-80, 60])
    pylab.savefig("tsne_cat_dog_clusters.png")
    pylab.show()
예제 #8
0
def highlight(embeddings, tokens, keyword):
	print "Running tsne"

	projected = tsne().fit_transform(embeddings)
	colors = np.array([keyword in t for t in tokens])

	plt.figure()
	plt.scatter(projected[:, 0], projected[:, 1], c=colors)
	for label, x, y in zip(tokens, projected[:, 0], projected[:, 1]):
		if keyword in label:
			plt.annotate(label, xy=(x, y), xytext=(-10, -10), textcoords='offset points', size='x-small')
		plt.xticks([])
		plt.yticks([])
	plt.savefig("../results/embeddings/highlight_\"%s\".svg" %keyword)
def visualize_clusters(X, all_labels):
    ''' '''

    # sort of like "unique"
    uniq_labels = list(set(all_labels))
    all_labels_indexed = [uniq_labels.index(x) for x in all_labels]

    #tsne_output_file = os.path.join(c3d_feature_dir, 'tsne_output.npy')
    tsne_output_file = os.path.join(c3d_feature_dir, 'Y_bhtsne.txt')
    if os.path.isfile(tsne_output_file):
        #Y = np.load(tsne_output_file)
        Y = np.loadtxt(tsne_output_file)
    else:
        tsne_model = tsne(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        Y = tsne_model.fit_transform(X)
        np.save(tsne_output_file, Y)

    #pylab.scatter(Y[:,0], Y[:,1], 20, all_labels_indexed);
    #pylab.savefig('tsne_video_clusters.png')
    #pylab.show()

    for count, label in enumerate(uniq_labels):
        this_label_ind = np.where(all_labels == label)[0]
        #pylab.scatter(Y[this_label_ind,0], Y[this_label_ind,1], 20, count)
        print "count={}, label={}, len(this_label_ind)={}".format(count, label, len(this_label_ind))
        if count < 6:
            marker = 'o'
        else:
            marker = 'v'
        pylab.plot(Y[this_label_ind,0], Y[this_label_ind,1], marker=marker, linestyle='', ms=6, label=label);
    pylab.legend(numpoints=1, loc='upper left')
    #pylab.xlim([-23, 15])
    pylab.xlim([-80, 60])
    pylab.savefig('tsne_video_clusters.png')
    pylab.show()
예제 #10
0
def add_embeddings(tags_lst,
                   tag_dict,
                   debug_dict,
                   token,
                   tag,
                   dim_reduce=False,
                   dims=300):
    """given a tag(pos or dependency), add its embeddings value to dictionary"""
    if (tag in tags_lst) and (tag not in tag_dict.keys()):
        #include only the first value of a kind
        embed_vec = None
        if dims <= 1:
            dims = 1
            embed_vec = [token.vector_norm]  # l2 norm
        elif dim_reduce and dims < 300:
            #if we want to reduce the glove vector to smaller dimension. default is 300
            #dims = 3 if dims>4 else dims #because of a weird bug with tsne
            embed_vec = list(
                tsne(n_components=dims).fit_transform(
                    [token.vector, token.vector])[0])
        else:
            embed_vec = list(token.vector)
        tag_dict[tag] = embed_vec
        debug_dict[tag] = token.text
예제 #11
0
import pandas as pd
from sklearn.manifold import TSNE as tsne
from matplotlib import pyplot as plt
import seaborn as sns

TRAIN_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_output_train.csv"
VALID_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_output_valid.csv"
OUT_TRAIN_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_embed_train.csv"
OUT_VALID_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_embed_valid.csv"

train_df = pd.read_csv(TRAIN_LOCATION)
valid_df = pd.read_csv(VALID_LOCATION)
train_df.iloc[:, -1] = train_df.iloc[:, -1].astype(int)
valid_df.iloc[:, -1] = valid_df.iloc[:, -1].astype(int)

embedder = tsne()

embedder.fit(train_df.iloc[:, :-1])

train_embedded = embedder.transform(train_df.iloc[:, :-1])
valid_embedded = embedder.tranform(valid_df.iloc[:, -1])
train_df = pd.DataFrame(train_embedded, columns=["x", "y"])
valid_df = pd.DataFrame(valid_embedded, columns=["x", "y"])
train_df["target"] = train_df.iloc[:, -1]
valid_df["target"] = valid_df.iloc[:, -1]

train_df.to_csv(OUT_TRAIN_LOCATION, index=False)
valid_df.to_csv(OUT_VALID_LOCATION, index=False)

f, axes = plt.subplots(1, 2)
sns.scatterplot(y="y",
all_firing_array = np.asarray(data.normal_off_firing)

X = data.all_normal_off_firing.swapaxes(1, 2)[:, 80:160, :]

#taste = 0
#X = data.normal_off_firing[taste][:,:,80:160].swapaxes(-1,-2)

# Reduce dimensions of every timepoint using TSNE
X_long = X[:, :, 0]
for trial in range(1, X.shape[2]):
    X_long = np.concatenate((X_long, X[:, :, trial]), axis=-1)

perm = np.random.permutation(np.arange(X_long.shape[1]))
X_long_perm = X_long[:, perm]

X_embedded = tsne(n_components=2, perplexity=35).fit_transform(X_long_perm.T)

colors = range(np.int(X.shape[1])) / np.max(range(np.int(X.shape[1])))
for trial_num in range(60):
    fig = plt.figure()
    plt.scatter(X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 0],
                X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 1],
                c=colors)
    plt.colorbar()
    plt.plot(X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 0],
             X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 1])
    plt.savefig(plot_dir + '/' + 'tsne_trial_%i.png' % trial_num)
    plt.close(fig)

colors = np.matlib.repmat(range(np.int(X.shape[1])), 1, X.shape[2])[0, perm]
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=colors.flatten())
예제 #13
0
def t_sne(samples, use_scikit=False, files_dir=None, results_filename='result.dat', data_filename='data.dat',
          no_dims=DEFAULT_NO_DIMS, perplexity=DEFAULT_PERPLEXITY, theta=DEFAULT_THETA, eta=DEFAULT_ETA,
          iterations=DEFAULT_ITERATIONS, seed=DEFAULT_SEED, early_exaggeration=DEFAULT_EARLY_EXAGGERATION,
          gpu_mem=DEFAULT_GPU_MEM, randseed=DEFAULT_RANDOM_SEED, verbose=3):
    """
    Run t-sne on the sapplied samples (Nxsamples x Dfeatures array). It either:
    1) Calls the t_sne_bhcuda.exe (which should be in the Path of the OS somehow - maybe in the Scripts folder)
    which then run t-sne either on the CPU or the GPU)
    or 2) Calls the sklearn t-sne module (which runs only on CPU).

    Parameters
    ----------
    samples -- The N_examples X D_features array to t-sne
    use_scikit -- If yes use the sklearn t-sne implementation. Otherwise use the t_sne_bhcuda.exe
    files_dir -- The folder in which the t_sne_bhcuda.exe should look for the data_filename.dat and save the
    results_filename.dat
    results_filename -- The name of the file that the t_sne_bhcuda.exe saves the t-sne results in
    data_filename -- The name of the file that the t_sne_bhcuda.exe looks into for data to t-sne. This data file
    also has a header with all the parameters that the t_sne_bhcuda.exe needs to run.
    no_dims -- Number of dimensions of the t-sne embedding
    perplexity -- Defines the amount of samples whose distances are comparred to every sample (check sklearn and the
    van der Maatens paper)
    theta -- If > 0 then the algorithm run the burnes hat aproximation (with angle = theta). If = 0 then it runs the
     exact version. Values smaller than 0.5 do not add to much error.
    eta -- The learning rate
    iterations -- The number of itterations (usually around 1000 should suffice)
    early_exaggeration -- The amount by which the samples are initially pushed apart. Used only in the sckit-learn
    version
    seed -- Set to a number > 0 if the amount of samples is too large to t-sne. Then the algorithm will t-sne the first
    seed number of samples. Then it will compare the euclidean distance between every other sample and these t-sned
    samples. For each non t-sned sample it will find the 5 closest t-sned samples and place the new sample on the point
    of the t-sne space  that is given by the center of mass of those 5 closest ssamples. The mass of each closest
    sample is defined as the inverse of its euclidean distance to the new sample.
    gpu_mem -- If > 0 (and <= 1) then the t_sne_bhcuda.exe will run the eucledian distances calculations on the GPU
    (if possible) and will use (gpu_mem * 100) per cent of the available gpu memory to temporarily store results. If
    == 0 then the t_sne_bhcuda.exe will run only on the CPU. It has no affect if use_scikit = True
    randseed -- Set the random seed for the initiallization of the samples on the no_dims plane.
    verbose -- Define verbosity (0 = No output, 1 = Basic output, 2 = Full output)

    Returns
    -------
    A N_examples X no_dims array of the embeded examples

    """
    if use_scikit:  # using python's scikit tsne implementation
        try:
            from sklearn.manifold import TSNE as tsne
        except ImportError:
            print('You do not have sklearn installed. Try calling t_sne with use_scikit=False'
                  'and gpu_mem=0 if you do not want to run the code in GPU.')
            return None
        if theta > 0:
            method = 'barnes_hut'
        elif theta == 0:
            method = 'exact'
        model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration,
                     learning_rate=eta, n_iter=iterations, n_iter_without_progress=iterations,
                     min_grad_norm=0, metric="euclidean", init="random", verbose=verbose,
                     random_state=None, method=method, angle=theta)
        t_sne_results = model.fit_transform(samples)

        return t_sne_results

    else:  # using the C++/cuda implementation
        save_data_for_tsne(samples, files_dir, data_filename, theta, perplexity,
                           eta, no_dims, iterations, seed, gpu_mem, randseed)
        # Call t_sne_bhcuda and let it do its thing
        with Popen([_find_exe_dir(), ], cwd=files_dir, stdout=PIPE, bufsize=1, universal_newlines=True) \
                as t_sne_bhcuda_p:
            for line in iter(t_sne_bhcuda_p.stdout):
                print(line, end='')
                sys.stdout.flush()
            t_sne_bhcuda_p.wait()
        assert not t_sne_bhcuda_p.returncode, ('ERROR: Call to t_sne_bhcuda exited '
                                               'with a non-zero return code exit status, please ' +
                                               ('enable verbose mode and ' if not verbose else '') +
                                               'refer to the t_sne output for further details')

        return load_tsne_result(files_dir, results_filename)
예제 #14
0
def t_sne(samples, use_scikit=False, files_dir=None, results_filename='result.dat', data_filename='data.dat',
          no_dims=DEFAULT_NO_DIMS, perplexity=DEFAULT_PERPLEXITY, theta=DEFAULT_THETA, eta=DEFAULT_ETA,
          iterations=DEFAULT_ITERATIONS, seed=DEFAULT_SEED, early_exaggeration=DEFAULT_EARLY_EXAGGERATION,
          gpu_mem=DEFAULT_GPU_MEM, randseed=DEFAULT_RANDOM_SEED, verbose=2):
    """
    Run t-sne on the sapplied samples (Nxsamples x Dfeatures array). It either:
    1) Calls the t_sne_bhcuda.exe (which should be in the Path of the OS somehow - maybe in the Scripts folder for
    Windows or the python/bin folder for Linux) which then runs t-sne either on the CPU or the GPU
    or 2) Calls the sklearn t-sne module (which runs only on CPU).

    Parameters
    ----------
    samples -- The N_examples X D_features array to t-sne
    use_scikit -- If yes use the sklearn t-sne implementation. Otherwise use the t_sne_bhcuda.exe
    files_dir -- The folder in which the t_sne_bhcuda.exe should look for the data_filename.dat and save the
    results_filename.dat
    results_filename -- The name of the file that the t_sne_bhcuda.exe saves the t-sne results in
    data_filename -- The name of the file that the t_sne_bhcuda.exe looks into for data to t-sne. This data file
    also has a header with all the parameters that the t_sne_bhcuda.exe needs to run.
    no_dims -- Number of dimensions of the t-sne embedding
    perplexity -- Defines the amount of samples whose distances are comparred to every sample (check sklearn and the
    van der Maatens paper)
    theta -- If > 0 then the algorithm run the burnes hat aproximation (with angle = theta). If = 0 then it runs the
    exact version. Values smaller than 0.2 do not add to much error.
    eta -- The learning rate
    iterations -- The number of itterations (usually around 1000 should suffice)
    early_exaggeration -- The amount by which the samples are initially pushed apart. Used only in the sckit-learn
    version
    seed -- Set to a number > 0 if the amount of samples is too large to t-sne. Then the algorithm will t-sne the first
    seed number of samples. Then it will compare the euclidean distance between every other sample and these t-sned
    samples. For each non t-sned sample it will find the 5 closest t-sned samples and place the new sample on the point
    of the t-sne space  that is given by the center of mass of those 5 closest ssamples. The mass of each closest
    sample is defined as the inverse of its euclidean distance to the new sample.
    gpu_mem -- If > 0 (and <= 1) then the t_sne_bhcuda.exe will run the eucledian distances calculations on the GPU
    (if possible) and will use (gpu_mem * 100) per cent of the available gpu memory to temporarily store results. If
    == 0 then the t_sne_bhcuda.exe will run only on the CPU. It has no affect if use_scikit = True
    randseed -- Set the random seed for the initiallization of the samples on the no_dims plane.
    verbose -- Define verbosity. 0 = No output, 1 = Basic output, 2 = Full output, 3 = Also save t-sne results in
    interim files after every iteration. Option 3 is used to save all steps of t-sne to explore the way the algorithm
    seperates the data (good for movies).

    Returns
    -------
    A N_examples X no_dims array of the embeded examples

    """
    if use_scikit:  # using python's scikit tsne implementation
        try:
            from sklearn.manifold import TSNE as tsne
        except ImportError:
            print('You do not have sklearn installed. Try calling t_sne with use_scikit=False'
                  'and gpu_mem=0 if you do not want to run the code in GPU.')
            return None
        if theta > 0:
            method = 'barnes_hut'
        elif theta == 0:
            method = 'exact'
        model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration,
                     learning_rate=eta, n_iter=iterations, n_iter_without_progress=iterations,
                     min_grad_norm=0, metric="euclidean", init="random", verbose=verbose,
                     random_state=None, method=method, angle=theta)
        t_sne_results = model.fit_transform(samples)

        return t_sne_results

    else:  # using the C++/cuda implementation
        save_data_for_tsne(samples, files_dir, data_filename, theta, perplexity,
                           eta, no_dims, iterations, seed, gpu_mem, verbose, randseed)
        del samples
        # Call t_sne_bhcuda and let it do its thing
        with Popen([_find_exe_dir(), ], cwd=files_dir, stdout=PIPE) \
                as t_sne_bhcuda_p:
            for line in iter(t_sne_bhcuda_p.stdout):
                print (line)
                sys.stdout.flush()
            t_sne_bhcuda_p.wait()
        assert not t_sne_bhcuda_p.returncode, ('ERROR: Call to t_sne_bhcuda exited '
                                               'with a non-zero return code exit status, please ' +
                                               ('enable verbose mode and ' if not verbose else '') +
                                               'refer to the t_sne output for further details')

        return load_tsne_result(files_dir, results_filename)
t_tsne = np.load(r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\klustakwik'+\
                 r'\threshold_6_5std\t_sne_results_final_allspikes.npy')




# T-SNE
# Python scikit-learn t-sne
t0 = time.time()
perplexity = 500.0
early_exaggeration = 100.0
learning_rate = 3000.0
theta = 0.0
model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration,
             learning_rate=learning_rate, n_iter=1000, n_iter_without_progress=500,
             min_grad_norm=1e-7, metric="euclidean", init="random", verbose=3,
             random_state=None, method='barnes_hut', angle=theta)
t_tsne = model.fit_transform(data_for_tsne)
t_tsne = t_tsne.T
t1 = time.time()
print("Scikit t-sne took {} seconds, ({} minutes), for {} spikes".format(t1-t0, (t1-t0)/60, up_to_extra_spike))

# save the python scikit generated t-sne results
threshold = 5.5
file_name = r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\t_tsne_ivm_data_{}sp_{}per_{}ee_{}lr_{}tp_{}thres.pkl'\
    .format(len(indices_of_data_for_tsne), perplexity, early_exaggeration, learning_rate, number_of_time_points, threshold)
file = open(file_name, 'bw')
pickle.dump((ivm_data_filtered, t_tsne, juxta_cluster_indices_grouped, perplexity, early_exaggeration, learning_rate), file)
file.close()

예제 #16
0
for i in range(1000):
    img = imread(df.local_path.loc[i])
    
    if img.shape[0] < 200 or img.shape[1] < 200:
        df.drop(i)
    else:
		img_gray = color.rgb2gray(img)
		fd = hog(img_gray, orientations=9, pixels_per_cell=(8, 8),cells_per_block=(4, 4))
		vector_list.append(fd)
		print i, len(fd), df.local_path.loc[i]
    
X = np.vstack(vector_list)

from sklearn.manifold import TSNE as tsne

tsne = tsne(n_components=2)
tsne.fit(X)
subspace_tsne = pd.DataFrame(tsne.fit_transform(X),columns=["x","y"])

num_bins = 64

subspace_tsne['grid_x'] = pd.cut(subspace_tsne['x'],num_bins,labels=False)
subspace_tsne['grid_y'] = pd.cut(subspace_tsne['y'],num_bins,labels=False)

subspace_tsne['local_path'] = df.local_path[:len(subspace_tsne)]

# I should save the dataframe here, so later maybe I can use full images

thumb_side = 128

from PIL import Image
예제 #17
0
파일: sb_vae.py 프로젝트: nthakor/sb-vae
test_dataset = tf.data.Dataset.from_tensor_slices(
    (test_images, test_class)).shuffle(TEST_BUF).batch(BATCH_SIZE)

from sklearn.manifold import TSNE as tsne
i = 0
ds = np.empty([0, 10])
cls = []
for x, y in test_dataset:
    mean, logvar = model.encode(x)
    z = model.reparameterize(mean, logvar)
    z = z.numpy()
    ds = np.concatenate((ds, z))
    cls.append(y)
label = tf.convert_to_tensor(cls).numpy().flatten().astype(int)
ts = tsne(2)
mds = ts.fit_transform(ds)
plt.scatter(mds[:, 0], mds[:, 1], s=0.3, c=label)

for i, x in enumerate(test_dataset):
    img = x
    if (i == 5):
        break
predictions = img[0][:16]
fig = plt.figure(figsize=(4, 4))

for i in range(predictions.shape[0]):
    plt.subplot(4, 4, i + 1)
    plt.imshow(predictions[i, :, :, 0], cmap='gray')
    plt.axis('off')
예제 #18
0
#print(u.shape)
#print(vt.shape)
#print(u.shape[1])
#print(vt.shape[0])
#true_s = np.zeros((u.shape[1], vt.shape[0]))
#true_s[:s.size, :s.size] = np.diag(s)
#print(true_s)
#print(true_s.shape)
#np.save('../../reorganized_data/cluster1/sparse_matrix', true_s)

pca = PCA(n_components=2000)
pca.fit(sparsematrix)
X_pca = pca.transform(sparsematrix)
print("completed pca")

tsne_results = tsne(n_components=2, verbose=1).fit_transform(X_pca)
print("completed tsne")

#arr = list(tsnedata.T)
#x = tsne_results[:,0]
#x = [number**3 for number in x]
#y = tsne_results[:,1]
#y = [number**3 for number in y]

#fig = plt.figure()
#ax = fig.gca(projection='3d')
#fig, ax = plt.subplots()
#ax.set(xlim = (0,20), ylim = (-500, 1000))
#ax.scatter(x, y)

#for i in range(0, 3000):
예제 #19
0
파일: fsort.py 프로젝트: manuhg/fsort
def run_tsne(pool_arr,dims=1):
    #pca_m=PCA(n_components=1024)
    pca_res=pool_arr#pca_m.fit_transform(pool_arr)
    ts_model=tsne(n_components=dims,perplexity=30,n_iter=1500,learning_rate=120)
    embeddings=ts_model.fit_transform(pca_res)
    return embeddings   
예제 #20
0
plt.figure()
plt.scatter(reduced_stim[:,best_sep[2]],reduced_stim[:,best_sep[1]],c=tastes)
plt.colorbar()

clf.score(reduced_stim, tastes)

fig = plt.figure()
ax = Axes3D(fig)
p = ax.scatter(reduced_stim[:,best_sep[0]],reduced_stim[:,best_sep[1]],reduced_stim[:,best_sep[2]],
               c =tastes,s=20)
fig.colorbar(p)

#
for perp in np.arange(10,30,5):
    reduced_stim_tsne = tsne(perplexity = perp).fit_transform(total_this_off)
    plt.figure();plt.scatter(reduced_stim_tsne[:,0],reduced_stim_tsne[:,1],c=tastes)
    plt.title(perp)

## Palatability
clf = lda()
clf.fit(reduced_stim, pals)
fit_coefs = clf.coef_[0]
best_sep = np.argsort(np.abs(fit_coefs))[-3:]
plt.figure()
plt.scatter(reduced_stim[:,best_sep[2]],reduced_stim[:,best_sep[1]],c=pals)
plt.colorbar()

clf.score(reduced_stim, pals)

fig = plt.figure()
예제 #21
0
def learn_tSNE(x, params=PARAMS_LEARNING, version=VERSION, path=REDUCTED_DATA_PATH,
               reduction_size_factor=REDUCTION_SIZE_FACTOR, pca_variance_needed=0.9):
    """
    Learn tSNE representation.

    :param x: input data to project
    :param params: t-SNE parameters to learn
                   it will learn every combination possible
                   .. seealso:: sklearn.manifold.TSNE()
    :type params: {
                    'perplexities':array(int),
                    'learning_rates':array(int),
                    'inits':array({'pca', 'random'})
                    'n_iter':array(int),
                   }

    :param version: version of data to load (e.g: _20170614)
    :param path: where to store 2D representation, absolute path
    :param reduction_size_factor: factor by which we divide the
    number of samples (tsne is greedy)

    :return: Embedded data in 2D space, and t-SNE model
    :rtype:  dict{params:(float,float)}, dict({params:tsne.model})
    """
    
    perplexities = params['perplexities']
    learning_rates = params['learning_rates']
    inits = params['inits']
    n_iters = params['n_iters']

    models, x_transformed = {}, {}

    concatenated_iterator = itertools.product(
        perplexities,
        learning_rates,
        inits,
        n_iters
    )
    
    if pca_variance_needed:
        x = reduce_with_PCA(x, variance_needed=pca_variance_needed)

    for perplexity, learning_rate, init, n_iter in concatenated_iterator:
 
        param = (
            perplexity,
            learning_rate,
            init,
            n_iter
        )
        '''
        models[param] = tsne(
            perplexity=perplexity,
            learning_rate=learning_rate,
            init=init,
            n_iter=n_iter
        )'''  # in a desperate move to save RAM
        logging.info("learning model %s %s %s %s", str(perplexity), str(learning_rate), str(init), str(n_iter))
        x_transformed[param] = tsne(
            perplexity=perplexity,
            learning_rate=learning_rate,
            n_iter=n_iter,
            # only use with Multicore_tSNE:   n_jobs=12,
        ).fit_transform(x)
        logging.info("done!")
 
        name = ''.join('_' + str(p) for p in param)
        full_path = ''.join([
            path,
            'embedded_x_1-',
            str(reduction_size_factor),
            name,
            version,
            '.npz',
        ])
 
        np.savez(
            full_path,
            x_2D=x_transformed[param],
        )  # model=models[param])
 
    return x_transformed, models
예제 #22
0
for i in range(no_tasks): 
    input = torch.tensor(x_testsets[i]).float()
    code = model.encode(input, task_id = i, no_samples=1, const_mask=True, temp = model.min_temp)[0].mean(0)
    zs.append(code.detach().numpy())
    ys.append(torch.ones((code.shape[0]))*i)

Z = np.concatenate(zs, axis = 0)
Y = np.concatenate(ys)
means_emp = np.concatenate([np.mean(d, 0).reshape([1,-1]) for d in zs])
vars_emp = np.concatenate([np.std(d, 0).reshape([1,-1]) for d in zs])

# means_emp = np.concatenate([m.cpu().detach().numpy().reshape([1,-1]) for m in model.z_mus])
# vars_emp = np.concatenate([(m/2).exp().cpu().detach().numpy().reshape([1,-1]) for m in model.z_lvs])

Z_with_means = np.concatenate([Z, means_emp], axis = 0)
manif = tsne(n_components = 2)
Z_embedded = manif.fit_transform(Z_with_means)

prev = 0
figure = plt.figure(figsize = [8,8])
till = no_tasks
tasks = np.arange(no_tasks)[:till]
for t in tasks:
    now = zs[t].shape[0]
    plt.plot(Z_embedded[prev:now+prev,0],Z_embedded[prev:now+prev,1], '.', label = "class_" + str(t))
    prev = now+prev

# prev = Z.shape[0]
# for t in tasks:
#     plt.plot(Z_embedded[prev+t:prev+t+1,0],Z_embedded[prev+t:prev+t+1,1], 'o', markersize=12, 
#              color = 'C'+str(t), markeredgecolor=(0,0,0,1), markeredgewidth=2)
예제 #23
0
        bad = np.array(res_ts["indices"][:1000])
        if first_run:
            all_bad = bad
            first_run = False
        else:
            all_bad = np.intersect1d(bad, all_bad)
        print(all_bad)
        gc.collect()

    all_bad = all_bad[:10]
    print("Bad indexes in HuaWei hand data: ", all_bad)
    np.savetxt("huawei_hand_bad_indexes.csv", all_bad, delimiter=",", fmt="%d")

    e = tsne(n_components=2,
             n_jobs=8).fit_transform(np.genfromtxt(feature_file,
                                                   delimiter=','))
    np.savetxt("src/datasets/huawei1_tsne.csv", e, delimiter=",")

    plt.figure(1)
    for i in range(8):
        x = np.where(labels == i)
        plt.scatter(e[x, 0], e[x, 1], c=grays[i], s=4, label=names[i])
    plt.scatter(e[all_bad, 0],
                e[all_bad, 1],
                marker='x',
                s=200,
                c='red',
                label="Mislabeled")
    plt.title("Mislabeled Instances in Sussex-HuaWei Hand")
    plt.savefig('huawei_hand_bad_instances.pdf')
예제 #24
0
def applyTsne(images):
    return tsne(n_components=2).fit_transform(images)
예제 #25
0


sel_words = random.sample(list(np.arange(vocab_size)),1500)
# for epoch in range(16):
# 	model_embeds = np.load(base_model_dir+'epoch_'+str(epoch)+'_embedding.npy')
# 	# print(model_embeds.shape)
# 	# model_embeds = model_embeds[sel_words,:]
# 	# print(model_embeds.shape,sel_words[:10])
# 	img_save_name2d = base_model_dir+'epoch_'+str(epoch)+'_tsne.png'
# 	img_save_name3d = base_model_dir+'epoch_'+str(epoch)+'_tsne3D.png'
# 	# model_embeds = model_embeds[:1000,:]

# 	tsne_ak_2d = TSNE(perplexity=30, n_components=2, init='random', n_iter=3500,verbose=True)
# 	embeddings_ak_2d = tsne_ak_2d.fit_transform(model_embeds)
# 	tsne_plot_2d('ABC Corpus',embeddings_ak_2d,img_save_name2d,a=0.1)


for epoch in range(16):
	if epoch==5:
		model_embeds = np.load(base_model_dir+'epoch_'+str(epoch)+'_embedding.npy')
		print(model_embeds.shape)
		model_embeds = model_embeds[sel_words,:]
		img_save_name3d = base_model_dir+'epoch_'+str(epoch)+'_tsne3D.png'

		tsne_wp_3d = tsne(perplexity=30, n_components=3, init='pca', n_iter=3500, random_state=12, verbose=True,n_jobs=8)
		embeddings_wp_3d = tsne_wp_3d.fit_transform(model_embeds)
		tsne_plot_3d('Visualizing Embeddings using t-SNE ', 'ABC_Corpus', embeddings_wp_3d, a=0.5)
	else:
		continue
예제 #26
0
t_tsne = np.load(r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\klustakwik'+\
                 r'\threshold_6_5std\t_sne_results_final_allspikes.npy')




# T-SNE
# Python scikit-learn t-sne
t0 = time.time()
perplexity = 500.0
early_exaggeration = 100.0
learning_rate = 3000.0
theta = 0.0
model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration,
             learning_rate=learning_rate, n_iter=1000, n_iter_without_progress=500,
             min_grad_norm=1e-7, metric="euclidean", init="random", verbose=3,
             random_state=None, method='barnes_hut', angle=theta)
t_tsne = model.fit_transform(data_for_tsne)
t_tsne = t_tsne.T
t1 = time.time()
print("Scikit t-sne took {} seconds, ({} minutes), for {} spikes".format(t1-t0, (t1-t0)/60, up_to_extra_spike))

# save the python scikit generated t-sne results
threshold = 5.5
file_name = r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\t_tsne_ivm_data_{}sp_{}per_{}ee_{}lr_{}tp_{}thres.pkl'\
    .format(len(indices_of_data_for_tsne), perplexity, early_exaggeration, learning_rate, number_of_time_points, threshold)
file = open(file_name, 'bw')
pickle.dump((ivm_data_filtered, t_tsne, juxta_cluster_indices_grouped, perplexity, early_exaggeration, learning_rate), file)
file.close()

예제 #27
0
# compressing the tf-idf values
row_sums = np.sum(output, axis=1)
word_indices = np.array(np.argsort(row_sums)[-1:-101:-1])
output = output[word_indices, :]
voc_list = [voc_list[i] for i in word_indices]
dic = {}
for i, word in enumerate(voc_list):
    dic[word] = i

pd.DataFrame(data=output, index=voc_list, columns=docs) \
    .to_csv('tf_idf_matrix.csv')

# print(search_docs(output, dic, docs, 'veikk'))

embedded_output = tsne().fit_transform(output.T)
plt.scatter(embedded_output[:, 0], embedded_output[:, 1])
plt.show()

for doc in [
        'button-mapping-journeys', 'veikk-linux-driver-v3-notes',
        'on-developing-a-linux-driver', 'code-opinions'
]:
    print(embedded_output[docs.index('corpora/blog-posts/' + doc + '.txt'), :])

# print([docs[doc_index] for doc_index in np.argwhere(embedded_output[:, 0] > 300).flatten()])

# voc_list = [voc_list[i] for i in np.argsort(output[:, 1])]
# output = output[np.argsort(output[:,1])]
# for i,x in enumerate(output):
#   print(voc_list[i], end = ": ")
start = [0]
end = []
total_spikes = 0
for i in np.arange(0, len(cell_info['num_of_spikes'])):
    if i < len(cell_info['num_of_spikes']) - 1:
        start.append(cell_info['num_of_spikes'][i] + total_spikes)
    end.append(cell_info['num_of_spikes'][i] + total_spikes)
    total_spikes += cell_info['num_of_spikes'][i]


# T-SNE
perplexity = 5
early_exaggeration = 1.0
learning_rate = 100
model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration,
             learning_rate=learning_rate, metric='euclidean',
             random_state=None, init='random', verbose=10)
t_tsne = model.fit_transform(data_for_tsne)
t_tsne = t_tsne.T

#  2D plot
fig = plt.figure()
ax = fig.add_subplot(111)
#c = ['r', 'g', 'b', 'y', 'sienna', 'palegreen', 'm', 'pink', 'olivedrab', 'silver', 'forestgreen', 'palegoldenrod']
s = 20
for i in np.arange(0, len(cell_info['num_of_spikes'])):
    ax.scatter(t_tsne[0][start[i]:end[i]], t_tsne[1][start[i]:end[i]], color=cell_info['color'][i], s=s,
               label=cell_info['label'][i])
# Now add the legend with some customizations.
legend = ax.legend(loc='upper center', shadow=True)
예제 #29
0
        DATASET_NUM) + "_indexes.csv"
    grays = ['#111111', '#555555', '#818181', '#a1a1a1', '#c1c1c1']
    colors = [
        'blue', 'green', 'cyan', 'gray', 'olive', 'limegreen', 'gold',
        'darkgreen'
    ]

    names = ["0", "1", "2", "3", "4"]

    X = np.genfromtxt(data_file, delimiter=',')
    labels = np.genfromtxt(label_file, delimiter=',', dtype='int')
    bad_indexes = np.genfromtxt(index_file, delimiter=',', dtype='int')

    print("Creating visualization of synthetic dataset #", DATASET_NUM)

    e = tsne(n_components=2, n_jobs=8).fit_transform(X)

    plt.figure(1)
    for i in range(max(labels) + 1):
        x = np.where(labels == i)
        plt.scatter(e[x, 0], e[x, 1], c=colors[i], s=4, label=names[i])
    plt.scatter(e[bad_indexes, 0],
                e[bad_indexes, 1],
                marker='x',
                s=75,
                c='red')
    plt.title("Synthetic Set " + str(DATASET_NUM) + " t-SNE Visualization")
    plt.axis('off')
    plt.legend()
    plt.savefig('Synthetic_Set' + str(DATASET_NUM) + '.pdf')
예제 #30
0
    #		magerr = np.asarray(magerr)/1
        array = []
        stat_array = array.append(
            extract_features.extract_all(mag, magerr, convert=True))
        array = np.array([i for i in array])
        stat_array = pca_model.transform(array)
    except:
        continue
    x_data = np.append(x_data, stat_array, axis=0)
#for CONS in [99309, 98402, 105836, 68914, 69516]:
#	pass
#for ML in [33065, 107503]:
#	pass
data = [np.asarray(i) for i in x_data]

vis_data = tsne(n_components=2).fit_transform(data)
vis_x = vis_data[:, 0]
vis_y = vis_data[:, 1]

#plt.scatter(vis_x, vis_y, c=y_data, cmap=plt.cm.get_cmap("jet",4))
#plt.title('Feature Space Distribution', fontsize=40)
#plt.colorbar(ticks=range(4))
#plt.
#plt.show()

colors = ['red', 'green', 'blue', 'yellow', 'orange', 'purple', 'black']
fig, ax = plt.subplots()
for i, object_class in enumerate(['Variable', 'Constant', 'CV', 'ML']):
    x = vis_x[i * 500:(i + 1) * 500]
    y = vis_y[i * 500:(i + 1) * 500]
    ax.scatter(x, y, c=colors[i], label=object_class)
예제 #31
0
start = [0]
end = []
total_spikes = 0
for i in np.arange(0, len(cell_info['num_of_spikes'])):
    if i < len(cell_info['num_of_spikes']) - 1:
        start.append(cell_info['num_of_spikes'][i] + total_spikes)
    end.append(cell_info['num_of_spikes'][i] + total_spikes)
    total_spikes += cell_info['num_of_spikes'][i]


# T-SNE
perplexity = 5
early_exaggeration = 1.0
learning_rate = 100
model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration,
             learning_rate=learning_rate, metric='euclidean',
             random_state=None, init='random', verbose=10)
t_tsne = model.fit_transform(data_for_tsne)
t_tsne = t_tsne.T

#  2D plot
fig = plt.figure()
ax = fig.add_subplot(111)
#c = ['r', 'g', 'b', 'y', 'sienna', 'palegreen', 'm', 'pink', 'olivedrab', 'silver', 'forestgreen', 'palegoldenrod']
s = 20
for i in np.arange(0, len(cell_info['num_of_spikes'])):
    ax.scatter(t_tsne[0][start[i]:end[i]], t_tsne[1][start[i]:end[i]], color=cell_info['color'][i], s=s,
               label=cell_info['label'][i])
# Now add the legend with some customizations.
legend = ax.legend(loc='upper center', shadow=True)
예제 #32
0
# Fit CP tensor decomposition (two times).
rank = 10
repeats = 10
all_models = []
all_obj = []
for repeat in tqdm(range(repeats)):
    U = tt.cp_als(all_data_long, rank=rank, verbose=False)
    all_models.append(U)
    all_obj.append(U.obj)

U = all_models[np.argmin(all_obj)]

## We should be able to see differences in tastes by using distance matrices on trial factors
trial_factors = U.factors.factors[-1]
trial_distances = dist_mat(trial_factors, trial_factors)
plt.figure()
plt.imshow(exposure.equalize_hist(trial_distances))

# pca on trial factors and plot by taste
trial_labels = np.sort([0, 1, 2, 3, 4, 5] * 32)
reduced_trials_pca = pca(n_components=2).fit_transform(trial_factors)
plt.scatter(reduced_trials_pca[:, 0], reduced_trials_pca[:, 1], c=trial_labels)

# tsne on trial factors and plot by taste
X_embedded = tsne(n_components=2, perplexity=40).fit_transform(trial_factors)
plt.figure()
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=trial_labels)

# Compare the low-dimensional factors from the two fits.
fig, _, _ = tt.plot_factors(U.factors)
예제 #33
0
import scipy
import numpy as np
from sklearn.manifold import TSNE as tsne
import sys
import matplotlib.pyplot as plt
data = np.load(sys.argv[1])
with open(sys.argv[2]) as f:
	lines = f.readlines()
classes = []
for line in lines:
	classes.append(line.split())
classes = np.array(classes)[1:,1:].reshape([len(classes)-1])
print("Loaded data")
xsne = tsne(learning_rate=10).fit_transform(data)
np.save("save_%s"%(sys.argv[3]),xsne)
sizes = [3]*xsne.shape[0]
plt.scatter(xsne[:,0], xsne[:,1],c=classes,s=sizes)
plt.show()