Example #1
0
def plot_TSNE(labels, features, num_class):
    Y = tsne(features,
             no_dims=2,
             initial_dims=512,
             perplexity=20.0,
             max_iter=1000)
    #
    fig = plt.figure()
    ax = fig.add_subplot(111)
    sct = ax.scatter(Y[:, 0],
                     Y[:, 1],
                     s=20,
                     c=labels,
                     cmap=discrete_cmap(num_class))
    cbar = plt.colorbar(sct, ticks=range(num_class))
    labels = []
    i = 0
    for k in label_modelnet.keys():
        i += 1
        labels.append(k)
        if i >= num_class:
            break
    cbar.set_ticklabels(labels)
    cbar.set_clim(-0.5, num_class - 0.5)
    plt.tight_layout()
    plt.show()
def runTsne():
    size = 200

    labels = np.array([])
    images = np.array([]).reshape(0,size*size*3) # 3 for the colour channels

    i = 0
    for dir in os.listdir(base_dir): # this is one row
        j = 0
        dirPath = os.path.join(base_dir, dir)
        for imgName in os.listdir(dirPath): # the columns
            if j >= 10: # Only use 100 images
                break
            img = Image.open(os.path.join(dirPath, imgName))
            img = img.resize((size,size), Image.ANTIALIAS)
            np_img = np.array(img)
            np_img = np_img.reshape(1,-1)

            # print(np_img.shape)
            # print(labels.shape)
            # print(images.shape)
            labels = np.append(labels,dir)
            images = np.concatenate((images, np_img))
            print(images.shape)
            j += 1
        i += 1

    print("Running Tsne on " + str(len(labels)) + " data points")
    print(images.shape)
    Y = tsne.tsne(images, 2, 50, 30)

    pylab.scatter(Y[:,0], Y[:,1], 20, labels)
    pylab.show()
Example #3
0
def eval_all_pointcloud(sess, ops, num_votes=1, topk=1):
    '''
    Code to change....
    '''
    error_cnt = 0
    is_training = False
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    total_seen_class = [0 for _ in range(NUM_CLASSES)]
    total_correct_class = [0 for _ in range(NUM_CLASSES)]
    global_features = []
    labels = np.array([])

    for fn in range(len(TEST_FILES)):
        log_string('----' + str(fn) + '----')
        current_data, current_label = provider.loadDataFile(TEST_FILES[fn])
        current_data = current_data[:, 0:NUM_POINT, :]
        current_label = np.squeeze(current_label)
        labels = np.append(labels, current_label)
        print(labels)
        print(current_data.shape)

        file_size = current_data.shape[0]
        print(file_size)

        for pc_idx in range(file_size):
            #print(id_count)
            for vote_idx in range(num_votes):
                rotated_data = provider.rotate_point_cloud_by_angle(
                    current_data[pc_idx:pc_idx + 1, :, :],
                    vote_idx / float(num_votes) * np.pi * 2)

                feed_dict = {
                    ops['pointclouds_pl']: rotated_data,
                    ops['labels_pl']: current_label[pc_idx:pc_idx + 1],
                    ops['is_training_pl']: is_training
                }

                loss_val, net_val = sess.run([ops['loss'], ops['net']],
                                             feed_dict=feed_dict)

                global_features.append(np.squeeze(net_val['pc_maxpool']))

    global_features = np.array(global_features)
    print "global_features :: ", global_features.shape
    print "labels :: ", labels.shape

    global_features = tsne.tsne(global_features, 2, global_features.shape[1])
    Plot.scatter(global_features[:, 0],
                 global_features[:, 1],
                 30,
                 c=4 * labels,
                 cmap='jet')

    for i, txt in enumerate(labels):
        if i % 10 == 0:
            Plot.annotate(txt, (global_features[i, 0], global_features[i, 1]))

    Plot.show()
Example #4
0
    def tsne(self):
        vecs = []
        labels = []

        for key, value in self.vecs.iteritems():
            vecs += value
            labels += self.labels[key]

        vecs = np.array(vecs, dtype='float64') #TSNE expects float type values
        # call tsne with (vectors, #output dimensions (2=2D), intermediate dimensions (not sure what this does), perplexity)
        # perplexity modifies the repulsion between vectors, so a high value 
        # distributes nodes evenly over space, while a low value groups values
        self.t = tsne.tsne(vecs, 2, 2, 4)
        vec_group_start = 0;
        for key, value in self.vecs.iteritems():
            color = self.colors[key]
            for j in range(len(value)):
                index = vec_group_start + j
                label = self.labels[ key ][j]
                plt.plot(self.t[ index ][0], self.t[ index ][1])
                plt.text(self.t[ index ][0], self.t[ index ][1], label, color=color, horizontalalignment='center')
                #print self.t[ index ][0], self.t[ index ][1]
            vec_group_start += len(value)
        plt.show()
        return plt
Example #5
0
def main(args):
    font = {'family': 'serif'}
    rc('font', **font)

    basedir = args.output_prefix
    if not os.path.exists(basedir):
        os.makedirs(basedir)
    output_txt_file = basedir + '.txt'

    if args.load_tsne_output and os.path.exists(output_txt_file):
        Y = []
        labels = []
        with open(output_txt_file, 'r') as f:
            lines = f.readlines()
        for line in lines:
            terms = line.strip().split('\t')
            labels.append(terms[0])
            Y.append([float(terms[1]), float(terms[2])])
        Y = np.asarray(Y)
    else:
        X = np.loadtxt(args.input_file)
        if args.transpose:
            X = X.transpose()
        Y = tsne(X, 2, 50, 20.0)
        labels = load_dict(args.dict_file)
        with open(output_txt_file, 'w') as f:
            for a, y in zip(labels, Y):
                f.write(a + '\t' + str(y[0]) + '\t' + str(y[1]) + '\n')

    if args.separate_plot:
        draw_figures_separate(args, Y, labels, basedir)
    else:
        draw_figures_all(args, Y, labels, basedir)
Example #6
0
def get_samples(args):
    proj_samples = None
    lengths = None
    if args.load_proj:
        with open(args.load_input, 'rb') as f:
            samples, proj_samples = cPickle.load(f)
            print 'Loaded {} pickled samples from {}'.format(
                len(samples), args.load_input)
    else:
        samples = []
        lengths = []
        with open(args.file_in, 'rb') as f:
            unpickler = cPickle.Unpickler(f)
            while True:
                try:
                    saved = unpickler.load()
                    if args.load_lengths:
                        sample = np.array(saved['states'][0][0], dtype=float)
                        samples.append(sample)
                        lengths.append(saved['length'])
                    else:
                        samples.append(np.array(saved)[0])
                    if args.max_in and len(samples) >= args.max_in:
                        break
                except (EOFError):
                    break
            print 'Unpickled {} samples from {}'.format(
                len(samples), args.file_in)
    if args.plot:
        if not proj_samples:
            from tsne import tsne
            proj_samples = tsne(samples, max_iter=200)
        plot_samples(proj_samples)
    return samples, proj_samples, lengths
Example #7
0
def run():

	# Setup connection to SQL database
	connection = sqlite3.connect('freesurfer.db')
	cursor = connection.cursor()

	# Extract pandas data frame from SQL query
	cursor.execute('SELECT * FROM FreeSurfer WHERE Center = \'UiO\' AND (Diagnosis = \'SZ\' OR Diagnosis = \'BD\' OR Diagnosis = \'HC\')')
	data = cursor.fetchall()
	data = to_data_frame(data, get_columns(cursor))
	data = normalize(data, exclude=['Age'])

	for target in ['Gender', 'Age']:

		# Run t-SNE procedure and visualize clusters
		X, _ = get_xy(data, target, exclude=['Diagnosis', 'Age', 'Center', 'Gender'])
		labels = get_labels(data, target)
		X = X.T

		if not os.path.isfile('Y.txt'):
			Y = tsne.tsne(X, 2, 50, 20.0)
			save_csv('Y.txt', Y)
		else:
			Y = load_csv('Y.txt')

		label_colors = labels_to_colors(labels)
		pylab.scatter(x=Y[:,0], y=Y[:,1], s=20, c=label_colors)
		pylab.title(target)
		pylab.show()
Example #8
0
def similarity(songs, keys, filter_func= lambda x: True):
    # pdb.set_trace()
    avgs = avg_by(songs, keys, filter_func=filter_func)
    print len(avgs)
    pca = PCA(n_components=2)
    vals = avgs.values()
    print vals
    # pca.fit(vals)
    Y = tsne.tsne(np.array(vals), 2, 50, 20.0);

    points = []
    for (k,c) in zip(avgs,Y):
        # c = pca.transform(avgs[k])[0].tolist()
        p = {
            'x' : c[0],
            'y' : c[1],
            'song_title': k.next(),
            'song_url' : k.next()
            # 'z' : c[2],
        }
        points.append(p)

    series = {
        'name' : keys,
        'data' : points
    }


    filename = "%s_similarities.json" % (keys)
    with open(filename, "w") as outfile:
        json.dump(series, outfile, indent=4)
Example #9
0
def plot_clustering_2d(encodings, myCluster, output, **kw):
    if myCluster != 0:
        if kw['sof'] == 'sample':
            data = np.array(encodings)[1:, 1:].astype(float)
        else:
            data = np.array(encodings).T[1:, 1:].astype(float)
        labels = np.array(myCluster)[0:, 1:].reshape(-1, )
        e = ''
        try:
            Y = tsne.tsne(data, 2, 50, 20.0)
        except RuntimeWarning as e:
            Y = pca.pca(data, n_components=2)

        df = pd.DataFrame({'X': Y[:, 0], 'Y': Y[:, 1], 'L': labels})

        fig = plt.figure(0)
        mySet = set(labels)
        if len(mySet) > 5:
            plt.scatter(Y[:, 0], Y[:, 1], 20, labels)
        else:
            for l in mySet:
                newData = df.loc[df.loc[:, "L"] == l, :]
                plt.scatter(np.array(newData.X), np.array(newData.Y), 20, label="Cluster_%s" % l)
        plt.legend(loc='best')
        plt.savefig('%s.png' % output)
        plt.close(0)
Example #10
0
def display_data(word_vectors, words, target_words=None):
  target_matrix = word_vectors.copy()
  if target_words:
    target_words = [line.strip().lower() for line in open(target_words)][:2000]
    rows = [words.index(word) for word in target_words if word in words]
    target_matrix = target_matrix[rows,:]
  else:
    rows = np.random.choice(len(word_vectors), size=1000, replace=False)
    target_matrix = target_matrix[rows,:]
  reduced_matrix = tsne(target_matrix, 2);

  Plot.figure(figsize=(200, 200), dpi=100)
  max_x = np.amax(reduced_matrix, axis=0)[0]
  max_y = np.amax(reduced_matrix, axis=0)[1]
  Plot.xlim((-max_x,max_x))
  Plot.ylim((-max_y,max_y))

  Plot.scatter(reduced_matrix[:, 0], reduced_matrix[:, 1], 20);

  for row_id in range(0, len(rows)):
      target_word = words[rows[row_id]]
      x = reduced_matrix[row_id, 0]
      y = reduced_matrix[row_id, 1]
      Plot.annotate(target_word, (x,y))
  Plot.savefig("word_vectors.png");
Example #11
0
def visualizeWord(mod1, mod2, word, n):
    if word in mod1.vocab and word in mod2.vocab:
        # find old emplacement of word
        pt1 = mod1[word]
        # find neighbours of that place
        label1 = [
            label for label, p in mod2.most_similar(positive=[pt1], topn=n + 1)
        ]
        # because that place is in not the specific word, we could find it in the resulting neighbour so we make sure it
        # doesn't happen
        if word in label1:
            label1.remove(word)
        else:
            label1 = label1[:-1]
        data1 = mod2[label1]
        #print recent word
        pt2 = mod2[word]
        #print neighbours
        label2 = [label for label, p in mod2.most_similar(word, topn=n)]
        data2 = mod2[label2]
        #apply tsne on the two data before resplitting them
        res = tsne.tsne(np.concatenate(
            [pt1.reshape(1, 300),
             pt2.reshape(1, 300), data1, data2]),
                        no_dims=2,
                        initial_dims=300)
        pt1 = res[0].reshape(1, 2)
        pt2 = res[1].reshape(1, 2)
        data1 = res[2:n + 2]
        data2 = res[n + 2:]

        showEvolution(pt1, pt2, word, data1, data2, label1, label2)
    else:
        print(word + " is not in the vocabulary.")
Example #12
0
def embed(words, matrix, classes, usermodel, fname):
    perplexity = 5.0  # Should be smaller than the number of points!
    dimensionality = matrix.shape[1]
    y = tsne(matrix, 2, dimensionality, perplexity)

    print >> sys.stderr, '2-d embedding finished'

    class_set = [c for c in set(classes)]
    colors = plot.cm.rainbow(np.linspace(0, 1, len(class_set)))

    class2color = [colors[class_set.index(w)] for w in classes]

    xpositions = y[:, 0]
    ypositions = y[:, 1]
    seen = set()

    for color, word, class_label, x, y in zip(class2color, words, classes, xpositions, ypositions):
        plot.scatter(x, y, 20, marker='.', color=color, label=class_label if class_label not in seen else "")
        seen.add(class_label)

        lemma = word.split('_')[0].replace('::', ' ')
        mid = len(lemma) / 2
        mid *= 10  # TODO Should really think about how to adapt this variable to the real plot size
        plot.annotate(lemma, xy=(x - mid, y), size='x-large', weight='bold', fontproperties=font, color=color)

    plot.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
    plot.tick_params(axis='y', which='both', left='off', right='off', labelleft='off')
    plot.legend(loc=4)

    plot.savefig(root + 'data/images/tsneplots/' + usermodel + '_' + fname + '.png', dpi=150, bbox_inches='tight')
    plot.close()
Example #13
0
def t_sne(activation_size, csv_file, nc_file):
    global timestep_activations, seq_activations, timestep_tru_labels, sequence_lengths, seq_labels, Y
    ################################################################################################
    # t-SNE code
    ################################################################################################
    '''
        tasks:
        V 1. write code to average activations of a sequence for seq representations
        V 2. run t-SNE on softmax activations
        V 3. create bogus nc files to use to extract last layer activations

        other ideas:
        1. take last activation label for improved accuracy + better sequence representation?
        3. use mode of 2nd half of sequence for improved accuracy
        4. use rnnlib to classify entire sequences instead of currennt.
        '''
    # activations
    timestep_activations = parse_csv(csv_file, activation_size)
    seq_activations = np.array(average_seq_activations(timestep_activations),
                               dtype=np.float64)
    # labels
    timestep_tru_labels, sequence_lengths = get_labels_lengths_from_nc(nc_file)
    seq_labels = restore_seq_label_list(timestep_tru_labels, sequence_lengths)
    seq_labels = np.array(seq_labels)
    Y = tsne.tsne(seq_activations)
    Plot.scatter(Y[:, 0], Y[:, 1], 20, seq_labels)
    Plot.show()
Example #14
0
def visualise_context(network, dataset, device, title=None):
    """Plot a low-dimensional representation of the context space"""
    with to.no_grad():
        network.eval()
        contexts = []
        labels = []
        for batch in dataset:
            # Extract means only, so the 0th element
            contexts.append(
                network.statistic_network.forward(
                    batch['dataset'].to(device), batch['label'].to(device))[0])
            labels.append(batch['label'])
        contexts = to.cat(contexts, dim=0)
        labels = to.cat(labels, dim=0)

        data = tsne.tsne(contexts.cpu().numpy(),
                         no_dims=2,
                         initial_dims=contexts.shape[1],
                         perplexity=30.0)
        if title is None:
            plt.title(
                "2D t-SNE Plot of Test Data Contexts\nafter 1000 Iterations (Numeric Labels, Fully Supervised, Zeros Removed)"
            )
        else:
            plt.title(title)
        plt.scatter(data[:, 0],
                    data[:, 1],
                    c=labels.argmax(dim=1).cpu().numpy())
        plt.show()
        network.train()
Example #15
0
def run_all(tags, filepref):

    # notice here i require at least 2 uses of each word in the corpus - reduces noise

    # uncomment the version of the model you want to try: tag limited, or all words.
    #model = gensim.models.Word2Vec(MySentences('books', tags=tags), min_count=2, size=200, workers=2)
    model = gensim.models.Word2Vec(DirOfPlainTextCorpus('texts'), min_count=3, workers=4)
    model.save(filepref + modelName)

    # if you want to save time and use a saved file, comment out the above and uncomment this with right path
    #model = gensim.models.Word2Vec.load('data/pride_NNPRP_model_austen_all')

    # does the text tagging and word replacement
    print tags
    datadict = build_dict_write_file(model, filepref + '_labeled.txt', filepref + '_data.json', tags)

    # tsne input files part
    make_score_files(model, datadict, filepref)

    # the actual tsne graph bit
    X = np.loadtxt(filepref + "_scores.csv")
    labels = np.genfromtxt(filepref + "_words.csv", dtype=str)
    Y = tsne.tsne(X, 2, 50, 20.0)   # see tsne.py in repo
    do_tsne_files(filepref + '_coords.tsv', Y, labels, datadict, axis_off=True)

    return Y, labels, datadict, model
def visualize(wordEmbeddings):
    """
    Visualize a set of examples using t-SNE.
    """
    PERPLEXITY = 30

    titles = wordEmbeddings.keys()
    titlesStr = ["_".join(y.strip().split()) for y in titles]
    x = numpy.vstack(wordEmbeddings.values())

    filename = "./embeddings.png"
    try:
        #from textSNE.calc_tsne import tsne
        from tsne import tsne
        out = tsne(x, no_dims=2, perplexity=PERPLEXITY)
        import render
        render.render([(title, point[0], point[1])
                       for title, point in zip(titles, out)], filename)
    except IOError:
        print "ERROR visualizing", filename

    data = numpy.column_stack((titlesStr, out))
    numpy.savetxt(
        "/home/bhanu/workspace/RNTN/scripts/embeddings2d_phrase_vis.txt", data,
        "%s")
Example #17
0
def display_data(word_vectors, words, target_words=None):
    target_matrix = word_vectors.copy()
    if target_words:
        target_words = [line.strip().lower()
                        for line in open(target_words)][:2000]
        rows = [words.index(word) for word in target_words if word in words]
        target_matrix = target_matrix[rows, :]
    else:
        rows = np.random.choice(len(word_vectors), size=1000, replace=False)
        target_matrix = target_matrix[rows, :]
    reduced_matrix = tsne(target_matrix, 2)

    Plot.figure(figsize=(200, 200), dpi=100)
    max_x = np.amax(reduced_matrix, axis=0)[0]
    max_y = np.amax(reduced_matrix, axis=0)[1]
    Plot.xlim((-max_x, max_x))
    Plot.ylim((-max_y, max_y))

    Plot.scatter(reduced_matrix[:, 0], reduced_matrix[:, 1], 20)

    for row_id in range(0, len(rows)):
        target_word = words[rows[row_id]]
        x = reduced_matrix[row_id, 0]
        y = reduced_matrix[row_id, 1]
        Plot.annotate(target_word, (x, y))
    Plot.savefig("word_vectors.png")
def t_sne(activation_size, csv_file, nc_file):
    global timestep_activations, seq_activations, timestep_tru_labels, sequence_lengths, seq_labels, Y
    ################################################################################################
    # t-SNE code
    ################################################################################################
    '''
        tasks:
        V 1. write code to average activations of a sequence for seq representations
        V 2. run t-SNE on softmax activations
        V 3. create bogus nc files to use to extract last layer activations

        other ideas:
        1. take last activation label for improved accuracy + better sequence representation?
        3. use mode of 2nd half of sequence for improved accuracy
        4. use rnnlib to classify entire sequences instead of currennt.
        '''
    # activations
    timestep_activations = parse_csv(csv_file, activation_size)
    seq_activations = np.array(average_seq_activations(timestep_activations), dtype=np.float64)
    # labels
    timestep_tru_labels, sequence_lengths = get_labels_lengths_from_nc(nc_file)
    seq_labels = restore_seq_label_list(timestep_tru_labels, sequence_lengths)
    seq_labels = np.array(seq_labels)
    Y = tsne.tsne(seq_activations)
    Plot.scatter(Y[:, 0], Y[:, 1], 20, seq_labels)
    Plot.show()
Example #19
0
def plot_with_tsne(vectors,
                   words,
                   color_coding=None,
                   outfile_name="tsne_solution"):
    # Is vectors in the right data structure (numpy array)?
    if not isinstance(vectors, np.ndarray):
        vectors = np.array(vectors)

    # Apply t-sne to project the word embeddings into a 2-dimensional space
    Y = tsne.tsne(X=vectors,
                  no_dims=2,
                  initial_dims=int(len(words) / 2),
                  perplexity=5.0,
                  max_iter=1000)

    # Let's plot the solution:
    if color_coding is not None:
        plt.scatter(Y[:, 0], Y[:, 1], c=color_coding)
    else:
        plt.scatter(Y[:, 0], Y[:, 1])

    # Let's add the words to the plot:
    for label, x, y in zip(words, Y[:, 0], Y[:, 1]):
        plt.annotate(label,
                     xy=(x, y),
                     xytext=(0, 0),
                     textcoords='offset points',
                     size=9)

    plt.savefig(outfile_name + ".png", format='png')
    plt.show()
Example #20
0
def similarity(songs, keys, filter_func=lambda x: True):
    # pdb.set_trace()
    avgs = avg_by(songs, keys, filter_func=filter_func)
    print len(avgs)
    pca = PCA(n_components=2)
    vals = avgs.values()
    print vals
    # pca.fit(vals)
    Y = tsne.tsne(np.array(vals), 2, 50, 20.0)

    points = []
    for (k, c) in zip(avgs, Y):
        # c = pca.transform(avgs[k])[0].tolist()
        p = {
            'x': c[0],
            'y': c[1],
            'song_title': k.next(),
            'song_url': k.next()
            # 'z' : c[2],
        }
        points.append(p)

    series = {'name': keys, 'data': points}

    filename = "%s_similarities.json" % (keys)
    with open(filename, "w") as outfile:
        json.dump(series, outfile, indent=4)
Example #21
0
def dimension_reduction(data, method='tsne', label=None, plot=False):
    n_components = 2
    # 所有降维方法都是基于距离的,需要保证特征距离标准化
    scaler = StandardScaler().fit(data)
    data = scaler.transform(data)
    # 大多数情况下,用tsne 将高纬度数据用二维方式展示出来。不同方法采用不同的特征映射方法计算出
    # 不同的X,用fittransform方法进行标准化,这里是最小-最大规范化
    if method == 'tsne':
        model = TSNE(n_components=n_components, perplexity=20, early_exaggeration=20, method='exact',
                     learning_rate=100, n_iter=1000, random_state=250, verbose=2)
        X = model.fit_transform(data)  # X是两列数据,经过了聚类+规范化
    if method == 'isomap':
        model = Isomap(n_components=n_components, n_neighbors=20)
        X = model.fit_transform(data)
    if method == 'MDS':
        model = MDS(n_components=n_components, verbose=2, n_init=1, max_iter=500)
        X = model.fit_transform(data)
    if method == 'tsne_v2':
        X = tsne(data, 2, 44, 50.0)

    data_len = len(X)  # 统计X长度
    print(data_len)  # data_len = 1653
    print(X)  # 二维数组,(1653L,2L)
    if plot:
        fig, ax = plt.subplots()  # 说明有几个子图,数量未定
        ax.scatter(X[label == 0, 0], X[label == 0, 1], c='darkblue', alpha=0.25, marker='^')
        ax.scatter(X[label == 1, 0], X[label == 1, 1], c='darkred', alpha=0.75, marker='x')
        ax.scatter(X[label == 2, 0], X[label == 2, 1], c='green', alpha=0.25, marker='o')
        ax.set_xlim([np.min(X[label == 0, 0]), np.max(X[label == 0, 0])])
        ax.set_ylim([np.min(X[label == 0, 1]), np.max(X[label == 0, 1])])
        plt.show()
    return X
def data_visualize(data,triplet,all_info) :
    print "Run tsne"
    Y = tsne.tsne(data, 2, 50, 20.0)

    all_entity   = all_info[0] 
    all_relation = all_info[1]
    idx2name     = all_info[2]
    name2idx     = all_info[3]

    for pos in range(3) :
        print 'draw pos',pos
        all_possible_label = set()
        for tri in triplet :
            all_possible_label.add(tri[pos])

        colors = cm.rainbow(np.linspace(0, 1, len(all_possible_label)))
        for label_idx ,c in zip(all_possible_label,colors) :
            temp  = Y
            count = 0
            for tri_idx in range(len(triplet)) :
                if triplet[tri_idx][pos] == label_idx :
                    count = count + 1
                else :
                    temp = np.delete(temp,count,0)
            plt.scatter(temp[:, 0], temp[:, 1], marker = 'o', color=c)
        plt.show()
def tsne_(y_train, title, pred, title_second):
    fig, ax = plt.subplots()
    Y = tsne.tsne(x_train, no_dims=2, initial_dims=784, perplexity=30.0)
    cax = plt.scatter(Y[:, 0],
                      Y[:, 1],
                      20,
                      y_train,
                      edgecolors='face',
                      alpha=1,
                      cmap=plt.cm.get_cmap('jet', N))
    cbar = plt.colorbar(ticks=tick)
    plt.clim(-0.5, N - 0.5)
    cbar.ax.set_yticklabels(target_names)  # vertically$
    plt.title(title)
    plt.xlabel('t-SNE dimension - 1')
    plt.ylabel('t-SNE dimension - 2')
    fig.tight_layout()
    plt.savefig(title)

    fig, ax = plt.subplots()
    cax = plt.scatter(Y[:, 0],
                      Y[:, 1],
                      20,
                      pred,
                      edgecolors='face',
                      alpha=1,
                      cmap=plt.cm.get_cmap('jet', N))
    cbar = plt.colorbar(ticks=tick)
    plt.clim(-0.5, N - 0.5)
    cbar.ax.set_yticklabels(target_names)  # vertically$
    plt.title(title_second)
    plt.xlabel('t-SNE dimension - 1')
    plt.ylabel('t-SNE dimension - 2')
    fig.tight_layout()
    plt.savefig(title_second)
Example #24
0
    def do_tsne(self):
        """
        This function does t-SNE on the positive, negative, and unknown data provided
        :return: None
        """
        all_data = np.vstack((self.data_points, self.positive_data, self.negative_data))
        # This is to save memory and potentially prevent memory error
        del self.data_points
        del self.positive_data
        del self.negative_data
        if self.use_tsne_python:
            try:
                # Try importing and using the tsne_python implementation...i guess
                import tsne
                self.tsne_data = tsne.tsne(all_data, 2, self.tsne_perplexity)
            except:
                pass
        elif self.pca_preprocess:
            # This is to reduce memory requirement
            logger.info("Pre-processing with PCA...")
            pca_data = PCA(n_components=self.pca_preprocess_red).fit_transform(all_data)
            self.tsne_data = TSNE(perplexity=self.tsne_perplexity, early_exaggeration=self.early_exaggeration, random_state=self.tsne_seed, init=self.tsne_init, learning_rate=self.tsne_learning_rate, verbose=True).fit_transform(pca_data)
        else:
            self.tsne_data = TSNE(perplexity=self.tsne_perplexity, early_exaggeration=self.early_exaggeration, random_state=self.tsne_seed, init=self.tsne_init, learning_rate=self.tsne_learning_rate, verbose=True).fit_transform(all_data)
        logger.info("t-SNE complete.")

        logger.debug("Rearranging data...")
        chops = (self.num_points, self.num_positive, self.num_negative)
        self.data_points, self.positive_data, self.negative_data = basic.chop(all_data, chops)
        del all_data
Example #25
0
def main():
    # load data
    print("Loading data...")
    X, labels = sample_data(1000)

    # run pca
    #print("Run Y = pca(X, no_dims) to perform PCA on your dataset.")
    #Y = pca(X, 2)

    # run tsne
    print(
        "Run Y = tsne.tsne(X, no_dims, initial_dims, perplexity) to perform t-SNE on your dataset."
    )
    Y = tsne.tsne(X, 2, 50, 30.0)

    # plot the results
    legend_ = []
    colors = cm.rainbow(Math.linspace(0, 1, 10))
    for i in sorted(list(set(labels))):
        idxs = (labels == i).nonzero()
        l = Plot.scatter(Math.squeeze(Y[idxs, 0]),
                         Y[idxs, 1],
                         20,
                         color=colors[int(i)])
        legend_.append(l)
    Plot.legend(legend_,
                list(range(10)),
                loc='center left',
                ncol=1,
                fontsize=8,
                bbox_to_anchor=(1, 0.5))
    Plot.savefig("result.png")
    elice_utils.send_image("result.png")
    return
 def plot_with_labels(self,plot_only = 100, title="Like2Vec meets TensorFlow", filename='tsne.png',
                      num_tsne_dims = 2, perplexity = 5.0,verbose=False):
     """
     randomly chooses some of the users or items and plots them using tsne
     
     INPUT:
         plot_only : the number of users or items you would like plotted,int
         title : the title of the plot generated, str
         filename : the name you would like the file saved under, str
         num_tsne_dims : number of dimensions, int
         perplexity : the perplexity used in generating tsne, recommended to be between 5.0-50.0, double
         verbose : whether or not to print the progress of tsne
     
     OUTPUT:
         Your plot will be saved under the name and in the location you passed in filename
     """
     selected_rows = np.sort(np.random.choice(range(self.final_embeddings.shape[0]),plot_only,replace=False))
     labels = [self.labels[i] for i in selected_rows]
     low_dim_embs = tsne(self.final_embeddings[selected_rows], num_tsne_dims,
                         self.final_embeddings.shape[1], perplexity,verbose)
     assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
     plt.figure(figsize=(18, 18))  #in inches
     for i, label in enumerate(labels):
         x, y = low_dim_embs[i,:]
         plt.scatter(x, y)
         plt.annotate(label,
                      xy=(x, y),
                      xytext=(5, 2),
                      textcoords='offset points',
                      ha='right',
                      va='bottom')
     plt.title(title)
     plt.savefig(filename)
Example #27
0
def plot_words (V,labels=None,color='b',mark='o',fa='bottom'):
	W = tsne(V,2)
	i = 0
	plt.scatter(W[:,0], W[:,1],c=color,marker=mark,s=50.0)
	for label,x,y in zip(labels, W[:,0], W[:,1]):
		plt.annotate(label.decode('utf8'), xy=(x,y), xytext=(-1,1), textcoords='offset points', ha= 'center', va=fa, bbox=dict(boxstyle='round,pad=0.1', fc='white', alpha=0))
		i += 1
Example #28
0
def egfr_test():
    sdf_dir = '/home/xtalpi/datasets/mol_data/drugbank/sdf3D/'
    active_smi = '/home/xtalpi/datasets/mol_data/dude/egfr/actives_final.ism'
    decoys_smi = '/home/xtalpi/datasets/mol_data/dude/egfr/decoys_final.ism'
    sdf_files = [x for x in os.listdir(sdf_dir) if x.endswith('sdf')]
    sdf_files = [os.path.join(sdf_dir, f) for f in sdf_files]

    df_active = pd.read_csv(active_smi,
                            sep=' ',
                            names=['smiles', 'id', 'chemblid'])
    df_decoys = pd.read_csv(decoys_smi,
                            sep=' ',
                            names=['smiles', 'id', 'chemblid'])

    n2 = 10000
    smiles_list = df_active['smiles'].tolist() + df_decoys['smiles'].tolist(
    )[:n2]
    labels = [1] * df_active.shape[0] + [0] * n2
    print(len(smiles_list), len(labels))

    dataset = dataset_from_mols(smiles_list,
                                featurizer_type='morgan',
                                transformer_type='morgan',
                                batch_size=32)

    data = np.asarray(dataset.data[0], dtype=np.float32)

    Y = tsne.tsne(data, 2, 50, 20.)
    print(data.shape, Y.shape)
    pylab.scatter(Y[:, 0], Y[:, 1], 20, labels)
    pylab.show()
Example #29
0
def dimension_reduction(data, index, method='tsne', label=None, plot=False):
    n_components = 2

    # 所有降维方法都是基于距离的,需要保证特征距离标准化
    scaler = StandardScaler().fit(data)
    data = scaler.transform(data)

    # 大多数情况下,用tsne 将高纬度数据用二维方式展示出来。不同方法采用不同的特征映射方法计算出
    # 不同的X,用fittransform方法进行标准化,这里是最小-最大规范化
    if method == 'tsne':
        model = TSNE(n_components=n_components, perplexity=20, early_exaggeration=20, method='exact',
                     learning_rate=100, n_iter=1000, random_state=250, verbose=2)
        X = model.fit_transform(data)  # X是两列数据,经过了聚类+规范化
    if method == 'isomap':
        model = Isomap(n_components=n_components, n_neighbors=20)
        X = model.fit_transform(data)
    if method == 'MDS':
        model = MDS(n_components=n_components, verbose=2, n_init=1, max_iter=500)
        X = model.fit_transform(data)
    if method == 'tsne_v2':
        X = tsne(data, 2, 44, 50.0)

    data_len = len(X)  # 统计X长度
    print(data_len)  # data_len = 1653
    print(X)  # 二维数组,(1653L,2L)
    if plot:
        fig, ax = plt.subplots()  # 说明有几个子图,数量未定
        # plt.subplot(2, 1, 1)#面板设置成2行1列,并取第一个(顺时针编号)
        # plt.plot(x1, y1, 'yo-')#画图,染色
        #        plt.scatter(X[label==0,0],X[label==0,1],c='darkblue',alpha=0.25,marker='^')
        #        plt.scatter(X[label==1,0],X[label==1,1],c='darkred',alpha=0.75,marker='x')
        #        plt.scatter(X[label==2,0],X[label==2,1],c='green',alpha=0.25,marker='o')
        #        plt.xlim([np.min(X[label==0,0]),np.max(X[label==0,0])])
        #        plt.ylim([np.min(X[label==0,1]),np.max(X[label==0,1])])
        ax.scatter(X[label == 0, 0], X[label == 0, 1], c='darkblue', alpha=0.25, marker='^')
        ax.scatter(X[label == 1, 0], X[label == 1, 1], c='darkred', alpha=0.75, marker='x')
        ax.scatter(X[label == 2, 0], X[label == 2, 1], c='green', alpha=0.25, marker='o')
        ax.set_xlim([np.min(X[label == 0, 0]), np.max(X[label == 0, 0])])
        ax.set_ylim([np.min(X[label == 0, 1]), np.max(X[label == 0, 1])])
        idxList = [];
        nameList = [];
        for i, ind in enumerate(index):
            if not ((-20 < X[ind, 0] < 20) and (-20 < X[ind, 1] < 20)):
                print(ind)
                idxList.append(ind)
                nameList.append(name[ind])
                # plt.annotate('This is awesome!', xy=(76, 0.75),
                ax.annotate(str(ind), xy=(X[ind, 0], X[ind, 1]))
                #
                # ax.annotate(str(ind), X[ind, 0], X[ind, 1])
        print idxList
        print nameList
        plt.show()
        outPut = {'Index': idxList, 'Video_Name': nameList}
        print outPut
        output_Archive = pd.DataFrame(outPut)
        output_Archive.to_csv('output_Archive.csv')

    return X
    def __init__(self, wrd_embedding_corpora):
        self.wrd_embedding_corpora = wrd_embedding_corpora
        self.embedding_dims = wrd_embedding_corpora[0][1].shape[1]
        self.num_corpus = len(wrd_embedding_corpora)

        self.master_wrd_embedding = self._concat_wrd_embeddings()
        self.master_tsne_embedding = tsne(self.master_wrd_embedding)
        self.tsne_embedding_corpora = self._flatten_tsne_embeddings()
Example #31
0
def SBM_visual_tsne(labels, X):
    import tsne
    import pylab as Plot
    Y = tsne.tsne(X, 2)
    Plot.figure()
    Plot.scatter(Y[:, 0], Y[:, 1], 20, labels)
    Plot.show()
    return Y
Example #32
0
 def tsne_plot(self):
     """2-D visualization of the learned representations using t-SNE."""
     mapped_X = tsne.tsne(self.model.layers[0].weight.numpy())
     plt.figure()
     for i, w in enumerate(self.vocab):
         plt.text(mapped_X[i, 0], mapped_X[i, 1], w)
     plt.xlim(mapped_X[:, 0].min(), mapped_X[:, 0].max())
     plt.ylim(mapped_X[:, 1].min(), mapped_X[:, 1].max())
     plt.show()
    def plot_tsne(self):
        ''' 
        Plot the t-Distributed Stochastic Neighbor Embedding (t-SNE) distribution of the data
        '''
        self.subplot.clear()
        self.data = np.nan_to_num(self.data) # Eliminate NaNs
        centered = self.mean_center(self.data)
        standardized = self.standardization(centered)

        # Calculate t-SNE of the data and mask it (python t-SNE version if Intel IPP is not installed)
        try:
            from calc_tsne import calc_tsne
            U = calc_tsne(standardized, 2, 50, 20.0)
        except:
            logging.warning('''Could not use fast t-SNE. You may need to install the Intel Integrated Performance Libraries. Will use normal t-SNE instead.''')
            try:
                from tsne import tsne
                U = tsne(standardized, 2, 50, 20.0)
            except:
                logging.error('''Both t-SNE versions failed. Your dataset may be too large for t-SNE to handle. Will not plot t-SNE results.''')
                return

        self.Scores = U[:, 0:2]
        if self.class_masks is None or self.class_names is None:
            self.class_masks, self.class_names = self.create_class_masks()
        self.masked_X, self.masked_Y = self.mask_data(len(self.class_names), self.class_masks, self.Scores)

        # Plot the masked t-SNE results in the Scores canvas
        self.color_set = self.set_colormap(self.class_names)
        handles = []
        labels = []

        # Determine the different opacities for the objects. This is set to 1 if no opacities have been specified.
        if self.object_opacity is None:
            self.object_opacity = np.ones([self.masked_X.shape[0], 1])
            self.object_accuracies = False
        elif self.object_accuracies is None:
            self.object_accuracies = True
        opacities = np.unique(self.object_opacity)
        nOpacity = len(opacities)
            
        # For each class and opacity combination plot the corresponding objects
        for i in xrange(len(self.class_names)):
            cell_count = np.shape(np.nonzero(self.masked_X[:, i]))
            for j in xrange(nOpacity):
                showObjects = np.where(self.object_opacity == opacities[j])
                subHandle = self.subplot.scatter(self.masked_X[showObjects, i], self.masked_Y[showObjects, i], 8, c=self.color_set[i, :], linewidth="0.25", alpha=0.25+0.75*opacities[j])
                # The highest opacity objects are added to the legend
                if opacities[j] == np.max(opacities):
                    handles.append(subHandle)
                    labels.append(self.class_names[i] + ': ' + str(cell_count[1]))
        self.leg = self.subplot.legend(handles, labels, loc=4, fancybox=True, handlelength=1)
        self.leg.get_frame().set_alpha(0.25)
        self.subplot.axhline(0, -100000, 100000, c='k', lw=0.1)
        self.subplot.axvline(0, -100000, 100000, c='k', lw=0.1)
        self.figure.canvas.draw()
        self.motion_event_active = True
Example #34
0
def load_training_annotation(filepath, verbose=False):
    output_filepath = filepath[:-4] + '_embed.pkl'
    image_names, joints = pickle.load(open(filepath, "rb"))
    joints = np.array(joints)
    joints_embed = tsne(
        joints.reshape(
            (joints.shape[0], joints.shape[1] * joints.shape[2]))[0:10000, :])
    with open(output_filepath, 'wb') as pf:
        pickle.dump(joints_embed, pf)
 def tsne_plot(self):
     """Plot a 2-D visualization of the learned representations using t-SNE."""
     mapped_X = tsne.tsne(self.params.word_embedding_weights)
     pylab.figure()
     for i, w in enumerate(self.vocab):
         pylab.text(mapped_X[i, 0], mapped_X[i, 1], w)
     pylab.xlim(mapped_X[:, 0].min(), mapped_X[:, 0].max())
     pylab.ylim(mapped_X[:, 1].min(), mapped_X[:, 1].max())
     pylab.show()
 def tsne_plot(self):
     """Plot a 2-D visualization of the learned representations using t-SNE."""
     
     mapped_X = tsne.tsne(self.params.word_embedding_weights)
     pylab.figure()
     for i, w in enumerate(self.vocab):
         pylab.text(mapped_X[i, 0], mapped_X[i, 1], w)
     pylab.xlim(mapped_X[:, 0].min(), mapped_X[:, 0].max())
     pylab.ylim(mapped_X[:, 1].min(), mapped_X[:, 1].max())        
Example #37
0
def closest_k_points_tsne(embeddings, word, k):
    neighbours = embeddings.nearest_neighbors(word, top_k=k) + [word]
    X = map(lambda x: embeddings.get(x).tolist(), neighbours)
    tsne_reps = tsne(np.array(X))
    result = []
    for i in range(len(neighbours)):
        result.append({})
        result[i]['label'] = neighbours[i]
        result[i]['x'] = tsne_reps[i][0]
        result[i]['y'] = tsne_reps[i][1]
    return result
Example #38
0
def closest_k_points_tsne(embeddings, word, k):
    neighbours = embeddings.nearest_neighbors(word, top_k=k) + [word]
    X =  map(lambda x: embeddings.get(x).tolist(), neighbours)
    tsne_reps = tsne(np.array(X))
    result = []
    for i in range(len(neighbours)):
        result.append({})
        result[i]['label'] = neighbours[i]
        result[i]['x'] = tsne_reps[i][0]
        result[i]['y'] = tsne_reps[i][1]
    return result
Example #39
0
def tSNE_analysis(dp):
    BATCHSIZE = 50
    batch = dp.get_train_batch(BATCHSIZE)
    imgs, labels = batch[0], batch[1]
    imgs = np.reshape(imgs, [BATCHSIZE, 4096])
    labels = 0.5 + 0.5 * labels
    Y = tsne.tsne(imgs, 2, 20, 20.0)

    plt.scatter(Y[:, 0], Y[:, 1], c=labels, cmap=plt.get_cmap("brg"))
    plt.colorbar()
    plt.show()
Example #40
0
def embed(words,matrix,usermodel):
    Y = tsne(matrix,2,300,5.0)
    print '2-d embedding finished'
    Plot.scatter(Y[:,0], Y[:,1], 20,marker='.')
    for label, x, y in zip(words, Y[:, 0], Y[:, 1]):
	Plot.annotate(label.split('_')[0], xy = (x-20, y),size = 'x-large', weight = 'bold',fontproperties=font)
    m = hashlib.md5()
    name = '_'.join(words).encode('ascii','backslashreplace')
    m.update(name)
    fname = m.hexdigest()
    Plot.savefig(root+'static/tsneplots/'+usermodel+'_'+fname+'.png',dpi=150,bbox_inches='tight')
    Plot.close()
 def tsne_plot(self):
     """
     Plot a 2-D visualization of the learned representations using t-SNE.
     """
     mapped_x = tsne.tsne(self.params.word_embedding_weights)
     pylab.figure()
     for i, w in enumerate(self.vocab):
         pylab.text(mapped_x[i, 0], mapped_x[i, 1], w)
     pylab.xlim(mapped_x[:, 0].min(), mapped_x[:, 0].max())
     pylab.ylim(mapped_x[:, 1].min(), mapped_x[:, 1].max())
     # TODO: change back to show
     # pylab.show()
     pylab.savefig('../a1-writeup/Images/1.png')
def feature_tsne(f_S, f_T):
    N_S = np.shape(f_S)[0]
    N_T = np.shape(f_T)[0]
    N = N_S + N_T
    f_dim = np.shape(f_S)[1]
    f = np.append(f_S, f_T, axis=0)

    fr = tsne.tsne(X=f, no_dims=2, initial_dims=f_dim, perplexity=30.0)

    fr_S = fr[0:N_S, :]
    fr_T = fr[N_S:N_S + N_T, :]

    return [fr_S, fr_T]
Example #43
0
def embed(words, matrix, usermodel):
    perplexity = 5.0
    dimensionality = matrix.shape[1]
    y = tsne(matrix, 2, dimensionality, perplexity)
    print >> sys.stderr, '2-d embedding finished'
    plot.scatter(y[:, 0], y[:, 1], 20, marker='.')
    for label, x, y in zip(words, y[:, 0], y[:, 1]):
        plot.annotate(label.split('_')[0], xy=(x - 20, y), size='x-large', weight='bold', fontproperties=font)
    m = hashlib.md5()
    name = '_'.join(words).encode('ascii', 'backslashreplace')
    m.update(name)
    fname = m.hexdigest()
    plot.savefig(root + 'static/tsneplots/' + usermodel + '_' + fname + '.png', dpi=150, bbox_inches='tight')
    plot.close()
def plot_2d_classes(value):
    global labels
    if 10 > len(labels):
        print ("Labels and HoG dont seem to have been loaded")
        print ("Trying to load them from disk")
        if not load_hog(1) == 1:
            print ("Could not load HoG, quitting")
            return
    nm_elements = int(raw_input('Plot this many elements (up to ' + str(len(labels)) + ') : '))
    new_labels = list()
    classes = np.unique(labels).tolist()
    for single_label in labels[:nm_elements]:
        for unique_label in classes:
            if unique_label == single_label:
                new_labels.append(classes.index(unique_label))
    y = tsne.tsne(np.array(hog_list[:nm_elements]))
    plot.scatter(y[:, 0], y[:, 1], 20, new_labels)
    plot.show()
def tsne_viz(
        mat=None,
        rownames=None,
        indices=None,
        colors=None,
        output_filename=None,
        figheight=40,
        figwidth=50,
        display_progress=False): 
    """2d plot of mat using tsne, with the points labeled by rownames,
    aligned with colors (defaults to all black).
    If indices is a list of indices into mat and rownames,
    then it determines a subspace of mat and rownames to display.
    Give output_filename a string argument to save the image to disk.
    figheight and figwidth set the figure dimensions.
    display_progress=True shows the information that the tsne method prints out."""
    if not colors:
        colors = ['black' for i in range(len(rownames))]
    temp = sys.stdout
    if not display_progress:
        # Redirect stdout so that tsne doesn't fill the screen with its iteration info:
        f = open(os.devnull, 'w')
        sys.stdout = f
    tsnemat = tsne(mat)
    sys.stdout = temp
    # Plot coordinates:
    if not indices:
        indices = range(len(rownames))        
    vocab = np.array(rownames)[indices]
    xvals = tsnemat[indices, 0] 
    yvals = tsnemat[indices, 1]
    # Plotting:
    fig, ax = plt.subplots(nrows=1, ncols=1)
    fig.set_figheight(40)
    fig.set_figwidth(50)
    ax.plot(xvals, yvals, marker='', linestyle='')
    # Text labels:
    for word, x, y, color in zip(vocab, xvals, yvals, colors):
        ax.annotate(word, (x, y), fontsize=8, color=color)
    # Output:
    if output_filename:
        plt.savefig(output_filename, bbox_inches='tight')
    else:
        plt.show()
def plotTsne():
    w2vThreshold = 2
    filenames = ['Haupt.txt', 'Super.txt', 'Kinder.txt', 'Bundes.txt', 'Finanz.txt']
    # filenames = ['Haupt.txt', 'Bundes.txt']
    w2vPath = '../NLP2-Project2/models/mono_500_de.bin'
    # w2vPath = '../NLP2-Project2/models/mono_200_de.bin'
    dimensions = 500
    # dimensions = 200

    colours = ['#f02720', '#ff7f0f', '#32a251', '#1f77b4', '#ab6ad5']

    words = set()

    rawLabels = []

    for i, fname in enumerate(filenames):
        f = codecs.open(fname, 'rb', encoding='utf-8')
        for l in f:
            clean = l.strip().split(' ')
            if clean[0] > w2vThreshold:
                words.add(clean[1])
                rawLabels.append(colours[i])

    model = loadW2VModel(w2vPath)

    X = Math.empty((0, dimensions))
    # labels = Math.empty((1),dtype=float)

    labels = []

    for i,w in enumerate(words):
        try:
            rep = model[w]
            X = Math.r_[X, rep[Math.newaxis,:]]
            labels.append(rawLabels[i])
        except KeyError:
            continue

    # X = Math.loadtxt()
    # labels = Math.loadtxt()
    Y = tsne(X, 2, dimensions, 20.0, max_iter=1000)
    pylab.scatter(Y[:,0], Y[:,1], 18, marker='o', c=labels, edgecolor='None')
    pylab.savefig('scatter.png')
Example #47
0
def plot(title,embeddings, labels,use_tsne):

	if not use_tsne:
		low_dim_embs = PCA(n_components=2).fit_transform(embeddings)
	else:
		low_dim_embs = tsne.tsne(embeddings,2,len(embeddings), 50.0)

	if title:
		for label, x, y in zip(labels, low_dim_embs[:, 0], low_dim_embs[:, 1]):
			plt.plot(x,y,'x')
			plt.annotate(label, xy = (x, y),fontsize='xx-small')

		file = 'fig-%s.eps' % title
		plt.savefig(file, format='eps', dpi=1200)

	plt.clf()
	for label, x, y in zip(labels, low_dim_embs[:, 0], low_dim_embs[:, 1]):
		plt.plot(x,y,'x')
		plt.annotate(label, xy = (x, y))

	plt.show()
	plt.clf()
Example #48
0
def visualize(wordEmbeddings):
    """
    Visualize a set of examples using t-SNE.
    """
    PERPLEXITY=30

    titles = wordEmbeddings.keys()
    titlesStr = ["_".join(y.strip().split()) for y in titles]
    x = numpy.vstack(wordEmbeddings.values())    

    filename = "embeddings.png"
    try:
        #from textSNE.calc_tsne import tsne
        from tsne import tsne
        out = tsne(x, no_dims = 2,perplexity=PERPLEXITY)
        #from textSNE.render import render
        #render([(title, point[0], point[1]) for title, point in zip(titles, out)], filename)
    except IOError:
        print "ERROR visualizing", filename

    data = numpy.column_stack((titlesStr,out))
    numpy.savetxt("/home/bhanu/workspace/RNTN/data/results/embeddings2d_phrase_vis.txt", data, "%s")
Example #49
0
def tsne_viz(
        mat=None,
        rownames=None,
        indices=None,
        colors=None,
        output_filename=None,
        figheight=40,
        figwidth=50,
        display_progress=False): 
    if not colors:
        colors = ['black' for i in range(len(mat))]
    temp = sys.stdout
    if not display_progress:
        # Redirect stdout so that tsne doesn't fill the screen with its iteration info:
        f = open(os.devnull, 'w')
        sys.stdout = f
    tsnemat = tsne(mat)   
    sys.stdout = temp
   # print tsnemat
    # Plot coordinates:
    if not indices:
        indices = range(len(mat))        
    vocab = np.array(rownames)[indices]
    xvals = tsnemat[indices, 0] 
    yvals = tsnemat[indices, 1]
    # Plotting:
    fig, ax = plt.subplots(nrows=1, ncols=1)
    fig.set_figheight(100)
    fig.set_figwidth(500)
    ax.plot(xvals, yvals, marker='', linestyle='')
    # Text labels:
    for word, x, y, color in zip(vocab, xvals, yvals, colors):
        ax.annotate(word, (x, y), fontsize=8, color=color)
    print "Output:"
    if output_filename:
        plt.savefig(output_filename, bbox_inches='tight')
    else:
        plt.show()
Example #50
0
def get_area_centroids_2D():
    """Return a dictionary of tsne-determined 2D brain area centroids."""
    # Get all structure ids
    s_ids = [ONTO.structure_by_acronym(area).structure_id \
             for area in AREAS]
    # Get centroids
    ctrds_L = [ONTO.get_mask_from_id_left_hemisphere_nonzero(s_id).centroid
               for s_id in s_ids]
    ctrds_R = [ONTO.get_mask_from_id_right_hemisphere_nonzero(s_id).centroid
               for s_id in s_ids]
    centroids = np.concatenate([np.array(ctrds_L),np.array(ctrds_R)],0)
    
    # Get lateralized names
    areas_L = [area + '_L' for area in AREAS]
    areas_R = [area + '_R' for area in AREAS]
    areas_LR = areas_L + areas_R
    
    # Run tsne
    centroids_2D = tsne.tsne(centroids,2,max_iter=1000)
    
    # Align symmetrically
    centroids_2D = sym_align(centroids_2D,areas_LR)
    return areas_LR, centroids_2D
Example #51
0
def test_tsne():
    global digits, labels

    nrows = digits.shape[0];
    # Smaller number of labels for debugging...
    if not options.all_data:
        nrows = 250

    X = digits[range(nrows), ...]
    L = labels[range(nrows), ...]

    Y = tsne.tsne(X, 2, 50, 20.0, use_pca=True, max_iter=1000)
    #Y = tsne.tsne(X, 2, 50, 20.0, use_pca=False)

    for i in xrange(10):
        idxs = [idx for idx in xrange(len(L)) if L[idx] == i]
        c = Plot.get_cmap()(0.1 * i)
        Plot.scatter(Y[idxs,0], Y[idxs,1], 20, c, label="%d" % i)
    #Plot.axis('off')
    Plot.xticks([])
    Plot.yticks([])
    Plot.legend(loc='upper left', scatterpoints=1)
    if options.show_graph:
        Plot.show()
Example #52
0
        saver.save(sess, os.getcwd()+"/training/train",global_step=epoch)
else:
    saver.restore(sess, tf.train.latest_checkpoint(os.getcwd()+"/training/"))
    rand = 50
    x = train_data[rand:rand+64,:,:]
    y = train_labels[rand:rand+64,:]
    preds = sess.run([model.prediction], {data: x, target: y, dropout: 1})[0]

    labels = ["4Head","AMPEnergy","AMPEnergyCherry","ANELE","ArgieB8","ArsonNoSexy","AsianGlow","AthenaPMS","BabyRage","BatChest","BCouch","BCWarrior","BibleThump","BiersDerp","BigBrother","BionicBunion","BlargNaut","bleedPurple","BloodTrail","BORT","BrainSlug","BrokeBack","BudBlast","BuddhaBar","BudStar","ChefFrank","cmonBruh","CoolCat","CorgiDerp","CougarHunt","DAESuppy","DalLOVE","DansGame","DatSheffy","DBstyle","deExcite","deIlluminati","DendiFace","DogFace","DOOMGuy","DoritosChip","duDudu","EagleEye","EleGiggle","FailFish","FPSMarksman","FrankerZ","FreakinStinkin","FUNgineer","FunRun","FutureMan","FuzzyOtterOO","GingerPower","GrammarKing","HassaanChop","HassanChop","HeyGuys","HotPokket","HumbleLife","ItsBoshyTime","Jebaited","JKanStyle","JonCarnage","KAPOW","Kappa","KappaClaus","KappaPride","KappaRoss","KappaWealth","Keepo","KevinTurtle","Kippa","Kreygasm","Mau5","mcaT","MikeHogu","MingLee","MKXRaiden","MKXScorpion","MrDestructoid","MVGame","NinjaTroll","NomNom","NoNoSpot","NotATK","NotLikeThis","OhMyDog","OMGScoots","OneHand","OpieOP","OptimizePrime","OSfrog","OSkomodo","OSsloth","panicBasket","PanicVis","PartyTime","PazPazowitz","PeoplesChamp","PermaSmug","PeteZaroll","PeteZarollTie","PicoMause","PipeHype","PJSalt","PJSugar","PMSTwin","PogChamp","Poooound","PraiseIt","PRChase","PunchTrees","PuppeyFace","RaccAttack","RalpherZ","RedCoat","ResidentSleeper","riPepperonis","RitzMitz","RuleFive","SeemsGood","ShadyLulu","ShazBotstix","ShibeZ","SmoocherZ","SMOrc","SMSkull","SoBayed","SoonerLater","SriHead","SSSsss","StinkyCheese","StoneLightning","StrawBeary","SuperVinlin","SwiftRage","TBCheesePull","TBTacoLeft","TBTacoRight","TF2John","TheRinger","TheTarFu","TheThing","ThunBeast","TinyFace","TooSpicy","TriHard","TTours","twitchRaid","TwitchRPG","UleetBackup","UncleNox","UnSane","VaultBoy","VoHiYo","Volcania","WholeWheat","WinWaker","WTRuck","WutFace","YouWHY"]
    labels2 = [x for pair in zip(labels,labels) for x in pair]

    embeds = sess.run([model.embeddingvar])[0]
    print np.shape(embeds)
    embeds = np.repeat(embeds.T,2,axis=0)
    print np.shape(embeds)
    Y = tsne.tsne(embeds, 2, 200, 20.0);
    print np.shape(Y)
    # # Plot.scatter(Y[:,0], Y[:,1], 20);
    # # Plot.show();
    plt.scatter(
    Y[:, 0], Y[:, 1], marker = 'o')
    #
    for label, x, y in zip(labels2, Y[:, 0], Y[:, 1]):
        plt.annotate(
        label,
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

    plt.show()
Example #53
0
# coding: utf-8
import tsne
import cPickle as pkl
import numpy as np
import sys
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# Document matrix pickle object
with open(sys.argv[1], 'r') as doc_mat_pkl:
    vectors = pkl.load(doc_mat_pkl)

Y = tsne.tsne(vectors.astype(np.float64), no_dims=2, perplexity=5)

# Output name for the pdf, sans the filetype
pp = PdfPages(sys.argv[2] + '.pdf')
fig = plt.figure(figsize=(16,12))
plt.scatter(Y[:,0], Y[:,1], c='k')
pp.savefig()
pp.close()
plt.close()

Example #54
0
import numpy as np
from tsne import tsne


X = np.random.rand(1000,30)

Y = tsne(X, verbose = True)
Example #55
0
import numpy as Math
import pylab as Plot
import tsne as visualize
from PIL import Image

# Functions in tsne taken from http://lvdmaaten.github.io/tsne/

# Load in labels and vectors for corresponding labels
vector_matrix = Math.loadtxt("vectors.txt")
labels = [line.strip() for line in open("labels.txt")]

rows = [labels.index(word) for word in labels]
target_matrix = vector_matrix[rows, :]

# Run the t-SNE reducing to 2 dimensions
reduced_matrix = visualize.tsne(vector_matrix, 2)

# Plot the figure
Plot.figure(figsize=(20, 20), dpi=10)
max_x = Math.amax(reduced_matrix, axis=0)[0]
max_y = Math.amax(reduced_matrix, axis=0)[1]
Plot.xlim((-max_x, max_x))
Plot.ylim((-max_y, max_y))

Plot.scatter(reduced_matrix[:, 0], reduced_matrix[:, 1], 20)

# Add labels
for row_id in range(0, len(rows)):
    target_word = labels[rows[row_id]]
    x = reduced_matrix[row_id, 0]
    y = reduced_matrix[row_id, 1]
Example #56
0
 

#take apart the bands and time intervals
for i,val in enumerate(np.arange(0,56,14)):
    for j in np.arange(0,no_sbj,1):        
        coh[i,:,:,j] = data[j,val:val+6,:]

#Diferentiate the data in time
diff_coh = np.diff(coh,1,0)
diff_coh_res = np.reshape(diff_coh,[18,36*no_sbj],'F').T 

coh_res = np.reshape(coh,[24,36*no_sbj]).T 
     
     
#t-SNE on dataset
[mapped,C] = tsne.tsne(diff_coh_res, no_dims, init_dims, perplexity)

#labels = kmeans2(mapped,no_clust,10)
colors = cm.jet(np.linspace(0, 1, no_clstr))

plt.figure(0)    
plt.scatter(mapped[:,0],mapped[:,1],c = colors[ident[:,0],:], s = 150,alpha = 0.7)
plt.show


#for i in np.arange(no_sbj):
#    plt.figure(i)
#    plt.matshow(diff_coh_res[i*36:i*36 + 36,:])    
#    plt.colorbar()
#    plt.text(0.5,36.5,r'$\delta$',fontsize=25)
#    plt.text(3.5,36.5,r'$\theta$',fontsize=25)
Example #57
0
for i,val in enumerate(np.arange(0,56,14)):
    coh[i,:,:] = average[:,val:val+6].T

#Diferentiate the data in time
diff_coh = np.diff(coh,1,0)
diff_coh_res = np.reshape(diff_coh,[18,no_pairs],'F').T        
coh_res = np.reshape(coh,[24,no_pairs],'F').T

silhouette = np.zeros([no_perm,no_pairs])
for i in np.arange(no_perm):
    print('Permutation number:' + str(i))
    C_min = 1e10
    dif_perm = diff_coh_res[np.random.permutation(no_pairs)]
    #coh_perm = diff_coh_res
    for j in np.arange(no_map):
        [mapped,C] = tsne.tsne(dif_perm, no_dims, init_dims, perplexity)
        if C < C_min:
            win_map = mapped    #a map with the lowest error
            C_min = C
    for k in np.arange(2,no_pairs):
        kmeans_obj = KMeans(k)
        kmeans_obj.fit(win_map)
        labels = kmeans_obj.labels_        
        silhouette[i,k] = crit(win_map,labels)
 
#plt.figure   
#plt.plot(np.mean(silhouette.T,1),color = 'red')
#plt.show

plt.figure
plt.plot(silhouette.T,color = 'red')
Example #58
0
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

%pylab
import tsne as tsne

# <codecell>

X = np.loadtxt("mnist2500_X.txt");
labels = np.loadtxt("mnist2500_labels.txt");

# <codecell>

print X.shape
print labels.shape
print np.min(X), np.max(X)

# <codecell>

tsne.tsne(

Example #59
0
from __future__ import unicode_literals
import numpy as Math
import pylab as Plot
import argparse
import tsne

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-l', action='store', dest='labelfile', required=True,
                        help='Path of label file')
    parser.add_argument('-v', action='store', dest='vectorFile', required=True,
                        help='Embedding vector')
    parser.add_argument('-d', action='store', type=int, dest='demension',
                        required=True, help='Demension of vector')
    parser.add_argument('-p', action='store', type=int, dest='perplexity',
                        default=20, help='Perplexity, usually between 20 to 50')
    r = parser.parse_args()

    X = Math.loadtxt(r.vectorFile)

    with open(r.labelfile, 'r') as f:
        labels = f.read().upper().splitlines()
        Y = tsne.tsne(X, 2, r.demension, r.perplexity)
        fig, ax = Plot.subplots()
        ax.scatter(Y[:, 0], Y[:, 1], 20)

        for i, txt in enumerate(labels):
            ax.annotate(txt, (Y[:, 0][i], Y[:, 1][i]))

        Plot.show()
Example #60
0
def tsne(fdarray, new_label = 'tsne',  channels = None, transform = 'arcsinh', sample = 6000,
         verbose = False, backgate = True):
    """Perform t-SNE/viSNE on the FlowData object
    
    """

    fdarray = util.make_list(fdarray)

    # If the user has not provided a list of channels to use, 
    # use the intersection of all isotope channels
    if channels is None:
        channel_set = []
        for fd in fdarray:
            channel_set.append(set(fd.isotopes))
        channels = list(set.intersection(*channel_set))
    
    # Make a copy of the data in files that we want    
    points = []
    for fd in fdarray:
        points.append(np.vstack([ fd[ch] for ch in channels ]).T)

    # transform
    if transform == 'arcsinh':
        for pts in points:
            # Apply the transform inplace to the data
            np.arcsinh(5*pts, pts)
    
    # Randomly sample to reduce the number of points
    sample_masks = []
    for pts in points:
        if sample < pts.shape[0]:
            # If we have enough points to subsample
            sample_masks.append(np.random.choice(pts.shape[0], sample, replace = False))
        else:
            # Otherwise we add all the points
            sample_masks.append(np.array(range(pts.shape[0])))

    # Sample the points, and construct a large matrix
    sample_points = []
    for mask, pts in zip(sample_masks, points):
        sample_points.append(pts[mask,:])
    X = np.vstack(sample_points)

    # Perform t-SNE
    Y = lib_tsne.tsne(X, verbose = verbose)
    assert Y is not None, ('t-SNE failed to return') 

    # Split Y into a matrix for each dataset
    splits = np.cumsum( np.array([ mask.shape[0] for mask in sample_masks], dtype = int))
    Y_split = np.split(Y, splits, axis = 0)

    # now expand data to reassign these points back into the dataset
    tsne_coords = []
    for (pts, mask, Yspt) in zip(points, sample_masks, Y_split):
        npoints = pts.shape[0]
        Z = np.zeros((npoints, 2))*float('NaN')
        Z[mask,:] = Yspt
        tsne_coords.append(Z)

    # If a point didn't get sampled, place its t-SNE coordinates at its nearest 
    # neighbor.
    if backgate:
        kd = KDTree(X)
        # select points not assigned values with t-SNE
        for pts, mask, coords, j  in zip(points, sample_masks, tsne_coords, range(len(points))):
            nan_points = np.argwhere(np.isnan(coords[:,0]))            
            d,near = kd.query(pts[nan_points],1) 
            # convert back to coordinates on the whole dataset
            coords[nan_points, :] = Y[near,:]
            tsne_coords[j] = coords
    # add to data to FlowData structure
    for fd, coords in zip(fdarray, tsne_coords):
        fd[new_label+'1'] = coords[:,0]
        fd[new_label+'2'] = coords[:,1]