Python tsne Exemples - HotExamples

Exemple #1

0

Afficher le fichier

Fichier : tsne_word_visualisation.py Projet : harvineet/social-network-embedding

def save_embed_plot(X,labels,fname):
	Y = tsne(X, 2, word_vector_dim, 20.0);
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 1);
	for label, x, y in zip(labels, Y[:,0], Y[:,1]):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=5)
	fig.savefig(fname, dpi=1200)

Exemple #2

0

Afficher le fichier

Fichier : tsne_word_visualisation.py Projet : harvineet/SocialMirror

def save_embed_plot(X,labels,fname):
	Y = tsne(X, 2, word_vector_dim, 20.0);
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 1);
	for label, x, y in zip(labels, Y[:,0], Y[:,1]):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=5)
	fig.savefig(fname, dpi=1200)

Exemple #3

0

Afficher le fichier

Fichier : vae.py Projet : mzhuang1/opfmbrl

def visualizeLatentState(X, rs, gen_params, rec_params):
    q_means, q_log_stds = nn_predict_gaussian(rec_params, X)
    latents = sample_diag_gaussian(q_means, q_log_stds, rs)
    gen = sigmoid(neural_net_predict(gen_params, latents))
    gen = gen[:,:gen.shape[1]/2]
    print(gen.shape)
    print(X.shape)
    #yTrain =y[:genTrain.shape[0],:]
    #yTest = y[genTrain.shape[0]:,:]
    #pdb.set_trace
    y = tsne(np.vstack((X,gen*10)))
    plt.figure()
    plt.clf()
    plt.scatter(y[:gen.shape[0],0],y[:gen.shape[0],1],color='red')
    plt.scatter(y[gen.shape[0]:,0],y[gen.shape[0]:,1],color='blue')
    plt.legend(['X', 'Xdecoded'],)
    plt.savefig('hidden.jpg')

Exemple #4

0

Afficher le fichier

def visualize_codes(net, dataloader=test_loader, batches=4):

    codes = []
    truths = []

    for _ in range(batches):

        to_encode, truth = next(iter(dataloader))
        truths.append(truth.numpy())
        to_encode = Variable(to_encode)
        encoded = net.encode(to_encode)
        codes.append(encoded.data.numpy())

    X = np.concatenate(codes, axis=0)
    GT = np.concatenate(truths, axis=0)
    Y = tsne(X, no_dims=2, initial_dims=8)

    tops = Y[np.where(GT == 0)]
    trousers = Y[np.where(GT == 1)]
    pullovers = Y[np.where(GT == 2)]
    dresses = Y[np.where(GT == 3)]
    coats = Y[np.where(GT == 4)]
    sandals = Y[np.where(GT == 5)]
    shirts = Y[np.where(GT == 6)]
    sneakers = Y[np.where(GT == 7)]
    bags = Y[np.where(GT == 8)]
    boots = Y[np.where(GT == 9)]

    plt.scatter(tops[:, 0], tops[:, 1], label='tops')
    plt.scatter(trousers[:, 0], trousers[:, 1], label='trousers')
    plt.scatter(pullovers[:, 0], pullovers[:, 1], label='pullovers')
    plt.scatter(dresses[:, 0], dresses[:, 1], label='dresses')
    plt.scatter(coats[:, 0], coats[:, 1], label='coats')
    plt.scatter(sandals[:, 0], sandals[:, 1], label='sandals')
    plt.scatter(shirts[:, 0], shirts[:, 1], label='shirts')
    plt.scatter(sneakers[:, 0], sneakers[:, 1], label='sneakers')
    plt.scatter(bags[:, 0], bags[:, 1], label='bags')
    plt.scatter(boots[:, 0], boots[:, 1], label='boots')

    plt.title('visualization of codes')
    plt.legend()
    plt.show()

    return X, Y, GT

Exemple #5

0

Afficher le fichier

def plot_clusters(matrix, listy, no_dims=2, initial_dims=100, perplexity=10):
    figure1 = plt.figure()
    Y = tsne(matrix,
             no_dims=no_dims,
             initial_dims=initial_dims,
             perplexity=perplexity)
    plt.scatter(Y[:, 0], Y[:, 1])  #,len(languages),np.r_[1:len(languages)])
    for label, x, y in zip(listy, Y[:, 0], Y[:, 1]):
        plt.annotate(label,
                     xy=(x, y),
                     xytext=(-20, 20),
                     textcoords='offset points',
                     ha='right',
                     va='bottom',
                     bbox=dict(boxstyle='round,pad=0.5',
                               fc='yellow',
                               alpha=0.5),
                     arrowprops=dict(arrowstyle='->',
                                     connectionstyle='arc3,rad=0'),
                     fontsize='x-large')
    frame = plt.gca()
    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])
    return

Exemple #6

0

Afficher le fichier

Fichier : hyperdim.py Projet : ssquinntran/HDlanguageDetection

'''
plt.figure()
print final_lang.shape
X = pca(cosangles)
plt.scatter(X[:,0],X[:,1])#,len(languages),np.r_[1:len(languages)])
for label, x, y in zip(languages, X[:, 0], X[:, 1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
'''

# plot language points
plt.figure()
Y = tsne(cosangles,no_dims=2,initial_dims=100,perplexity=8)
plt.scatter(Y[:,0],Y[:,1])#,len(languages),np.r_[1:len(languages)])
for label, x, y in zip(languages, Y[:, 0], Y[:, 1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'),
				fontsize='x-large')
frame = plt.gca()
frame.axes.get_xaxis().set_ticks([])
frame.axes.get_yaxis().set_ticks([])
plt.show()

Exemple #7

0

Afficher le fichier

Fichier : LDA.py Projet : the0demiurge/python-test

from sklearn.datasets import load_breast_cancer
from tsne import *
import pandas as pd
from pylab import *
import seaborn as sns
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

x, y, label_names = data['data'], data['target'], data['target_names']
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)

tsne(x, y, label_names=label_names)

trainx, testx, trainy, testy = train_test_split(x, reshape(y, [-1, 1]), test_size=0.1)


def variance(x, u):
    x = reshape(x, [-1, 1])
    return (x - u).dot(x - u).T


def lda2(x, y):
    x0, x1 = mat(x[y.T[0] == 0]), mat(x[y.T[0] == 1])
    u0, u1 = x0.mean(axis=0), x1.mean(axis=0)

    sigma0, sigma1 = reduce(lambda x, y: x + y, [variance(i, u0) for i in x0]), reduce(lambda x, y: x + y, [variance(i, u1) for i in x1])

Exemple #8

0

Afficher le fichier

Fichier : tsne_topic_adopters_visualisation_pred.py Projet : harvineet/SocialMirror

		vectors.append(vec[vocab_index[u]])
		if y==1 and yp==1:
			color.append(1)
		elif y==1 and yp!=1:
			color.append(2)
		elif y!=1 and yp==1:
			color.append(3)
		else:
			color.append(4)
		count+=1
		if count==vec_limit:
			break
	return numpy.array(vectors), color, tag, prec

def save_embed_plot((X,color,tag,prec),fname):
	Y = tsne(X, no_dims = 2, initial_dims = 50, perplexity = 30.0)
	with open("/mnt/filer01/word2vec/degree_distribution/adopter_pred_files/single_topic_vis/"+fname+".pickle","wb") as fd:
		pickle.dump(Y,fd)
	fig = Plot.figure()
	init = []
	tp = []
	fn = []
	fp = []
	tn = []
	for i,c in enumerate(color):
		if c==0:
			init.append(i)
		elif c==1:
			tp.append(i)
		elif c==2:
			fn.append(i)

Exemple #9

0

Afficher le fichier

#set visibility of most, least and mid frequency hashtags by setting text size
def get_tag_size_label(tlist):
    size = []
    label = []
    for t in tag_labels:
        if t in tlist:
            size.append(2)
            label.append(t.decode('latin-1'))
        else:
            size.append(0)
            label.append('')
    return size, array(label)


X = array(hist_feature)
Y = tsne(X, 2, 50, 30.0)


def save_embed_plot((tag_sizes, labels), fname):
    fig = Plot.figure()
    Plot.scatter(Y[:, 0], Y[:, 1], 0)
    for label, x, y, s in zip(labels, Y[:, 0], Y[:, 1], tag_sizes):
        Plot.annotate(label,
                      xy=(x, y),
                      xytext=(0, 0),
                      textcoords='offset points',
                      size=s)
    Plot.axis('off')
    fig.savefig(fname, dpi=800, bbox_inches='tight')

Exemple #10

0

Afficher le fichier

Fichier : tsne_topic_adopters_visualisation_pred.py Projet : harvineet/social-network-embedding

    Plot.scatter(Y_tn[:, 0],
                 Y_tn[:, 1],
                 s=10,
                 c='c',
                 alpha=0.4,
                 label='true negatives',
                 edgecolor='none')
    Plot.axis('off')
    Plot.legend(prop={'size': 8})
    Plot.title('#' + tag + ', P@100: ' + str(prec) + ', ' + clf)
    fig.savefig(fname + '.png', dpi=400, bbox_inches='tight')


if __name__ == "__main__":
    print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
    for i in range(0, train_ex_limit):
        X, color, color_log, tag, prec, prec_log = get_user_vectors(i)
        if use_tsne == True:
            Y = tsne(X, no_dims=2, initial_dims=50, perplexity=30.0)
        else:
            Y = pca(X, no_dims=2)
        # with open("/mnt/filer01/word2vec/degree_distribution/adopter_pred_files/single_topic_vis/"+fname+".pickle","wb") as fd:
        # pickle.dump(Y,fd)
        save_embed_plot(Y, color, tag, prec, 'RF',
                        'embed_adopters_topic_rf' + str(i))
        save_embed_plot(Y, color_log, tag, prec_log, 'LR',
                        'embed_adopters_topic_lr' + str(i))

#cc 0.0589, candidate set recall 280 out of 4751 cand size 6312
#cc 0.219, candidate set recall 516 out of 2347 cand size 4702
#cc 0.56, candidate set recall 658 out of 1162 cand size 4075

Exemple #11

0

Afficher le fichier

    for t in stasks:
        dstask[ds][t] = ctr
        print '%d: ds%03d task%03d' % (ctr, ds, t)
        ctr = ctr + 1

# make colormap
cmap = {}
ctr = 0

for i in list(s):
    cmap[i] = ctr
    ctr += 1

colors = [cmap[i] for i in copedata[:, 0]]

X = X[usedata == 1, :]

t = tsne(X, no_dims=2, initial_dims=15, perplexity=10.0, max_iter=1000)
plt.clf()
plt.scatter(t[:, 0], t[:, 1], s=0)  # create axes
f = open(basedir + 'tasklabels.txt', 'w')
for i in range(len(t)):
    x, y = t[i, :]
    plt.text(x, y,
             '%d' % dstask[copedata[i, 0]][copedata[i, 1]])  #,color=colors[i])
    f.write('%d\n' % dstask[copedata[i, 0]][copedata[i, 1]])

f.close()
# print legend:
plt.savefig(basedir + 'tsne_fig.pdf', format='pdf')

Exemple #12

0

Afficher le fichier

Fichier : tsne_algo.py Projet : declan-ayres/MLAlgorithms

        The perplexity is 2 to the entropy of the probability distribution. It measures how many neighbors each data point will be connected to. When I raise the perplexity the images have more clusters.
        Theta measures the accuracy of the algorithm. It is the angle the data points are to each other. Large theta speeds up the algorithm but reduces the accuracy and small theta slows down the algorithm but increases the accuracy.
        """
        #read all the classfied files into a list
        #not only read the classified files into a list, but also keep it open for appending
	#randomarray=np.random.random(255, size=(1000, 784))
	randomarray=np.random.random((1000, 784))
	coordinates = bh_sne(randomarray, perplexity = 30, theta = .1) * 10
        print coordinates




if __name__ == '__main__':	
    streamhandler = logging.StreamHandler(sys.stdout)
    
    if args.logging_level==10:
       streamhandler.setLevel(logging.INFO)
       log.setLevel(logging.INFO)
    if args.logging_level==20:
       streamhandler.setLevel(logging.DEBUG)
       log.setLevel(logging.DEBUG)

    filehandler = logging.FileHandler("logging")
    #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    formatter = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s")

    streamhandler.setFormatter(formatter)
    log.addHandler(streamhandler)
    tsne()

Exemple #13

0

Afficher le fichier

Fichier : tfOnlineMNIST-TSNE.py Projet : hal2001/Crossprop

def draw(learning_rate, n_examples, repeats):
    gate = Tanh()
    runs = 1
    cp_alt = CrossPropAlt(dim_in,
                          dim_hidden,
                          dim_out,
                          learning_rate,
                          gate,
                          output_layer='CE',
                          lam=0,
                          name='cp')
    cp_alt_lam = CrossPropAlt(dim_in,
                              dim_hidden,
                              dim_out,
                              learning_rate,
                              gate,
                              output_layer='CE',
                              lam=0.5,
                              name='cp-lam')
    bp = BackPropClissification(dim_in,
                                dim_hidden,
                                dim_out,
                                learning_rate,
                                gate,
                                name='bp')
    bp_mom = BackPropClissification(dim_in,
                                    dim_hidden,
                                    dim_out,
                                    learning_rate,
                                    gate,
                                    name='bp-mom',
                                    optimizer=tf.train.MomentumOptimizer(
                                        learning_rate=learning_rate,
                                        momentum=0.9))
    bp_adam = BackPropClissification(
        dim_in,
        dim_hidden,
        dim_out,
        learning_rate,
        gate,
        name='bp-adam',
        optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate))
    bp_rms = BackPropClissification(
        dim_in,
        dim_hidden,
        dim_out,
        learning_rate,
        gate,
        name='bp-rms',
        optimizer=tf.train.RMSPropOptimizer(learning_rate=learning_rate))
    methods = [cp_alt, cp_alt_lam, bp, bp_adam, bp_rms, bp_mom]
    for run in range(runs):

        train_x = train_x_total[:n_examples, :]
        train_y = train_y_total[:n_examples, :]

        y0 = train_y
        y1 = np.concatenate([train_y[:, 1:], train_y[:, :1]], 1)
        y2 = np.concatenate([y1[:, 1:], y1[:, :1]], 1)

        train_xs = [train_x] * 6
        train_ys = [y0, y1, y2, y0, y1, y2]

        # np.random.seed(0)
        # x0 = train_x
        # perm = np.arange(dim_in)
        # np.random.shuffle(perm)
        # x1 = train_x[:, perm]
        # np.random.shuffle(perm)
        # x2 = train_x[:, perm]
        #
        # train_xs = [x0, x1, x2, x0, x1, x2]
        # train_ys = [train_y] * 6

        # features = np.zeros((stages, len(methods), n_examples, dim_hidden))
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            figure_index = 0
            candidate_stages = [0, 1, 2, 3]
            candidate_methods = [0, 2]
            target_dim = 2
            tsne_data = dict()
            for stage in candidate_stages:
                saver.restore(sess,
                              'tmp/saved/ffn_model/%s_stage_%d' % (tag, stage))
                features = np.zeros((len(methods), n_examples, dim_hidden))
                train_x = train_xs[stage]
                train_y = train_ys[stage]
                batch_size = 1000
                cur_example = 0
                while cur_example < n_examples:
                    logger.info('store features... stage %d, example %d' %
                                (stage, cur_example))
                    end_example = min(n_examples, cur_example + batch_size)
                    for method_ind, method in enumerate(methods):
                        cur_features = sess.run(
                            method.feature,
                            feed_dict={
                                method.x: train_x[cur_example:end_example, :],
                                method.target:
                                train_y[cur_example:end_example, :]
                            })
                        features[method_ind,
                                 cur_example:end_example, :] = cur_features
                    cur_example = end_example
                sample_indices = np.arange(2500)
                for repeat in range(repeats):
                    np.random.shuffle(sample_indices)
                    for method_ind in candidate_methods:
                        x_to_plot = features[method_ind, sample_indices, :]
                        y_to_plot = np.argmax(train_y[sample_indices, :],
                                              axis=1)
                        print x_to_plot.shape, y_to_plot.shape
                        x_prime = tsne(x_to_plot, target_dim, 50, 20.0)
                        tsne_data[(stage, method_ind)] = (x_prime, y_to_plot)
                        # fig = plt.figure(figure_index)
                        # figure_index += 1
                        # ax = Axes3D(fig)
                        # ax.scatter(x_prime[:, 0], x_prime[:, 1], x_prime[:, 2], c=y_to_plot)
                        # plt.scatter(x_prime[:, 0], x_prime[:, 1], 20, y_to_plot)
                        # plt.title('%s_%s_stage_%d' % (tag, labels[method_ind], stage))
                        # plt.show()
                        # plt.savefig('figure/%s_repeat_%d_%s_stage_%d.png' % (tag, repeat, labels[method_ind], stage))
                        # plt.close()
                        # plt.show()
            with open('tmp/tsne_dim_%d.bin' % target_dim, 'wb') as f:
                pickle.dump(tsne_data, f)

Exemple #14

0

Afficher le fichier

Fichier : LDA.py Projet : sjl421/Python-Scripts-2

from sklearn.datasets import load_breast_cancer
from tsne import *
import pandas as pd
from pylab import *
import seaborn as sns
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

x, y, label_names = data['data'], data['target'], data['target_names']
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)

tsne(x, y, label_names=label_names)

trainx, testx, trainy, testy = train_test_split(x,
                                                reshape(y, [-1, 1]),
                                                test_size=0.1)


def variance(x, u):
    x = reshape(x, [-1, 1])
    return (x - u).dot(x - u).T


def lda2(x, y):
    x0, x1 = mat(x[y.T[0] == 0]), mat(x[y.T[0] == 1])
    u0, u1 = x0.mean(axis=0), x1.mean(axis=0)

Exemple #15

0

Afficher le fichier

Fichier : 8.1_tsne_wholebrain.py Projet : Jasonleng/openfmri

import matplotlib.pyplot as plt
import matplotlib.font_manager as mplfont
import os
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from tsne import *

outdir='/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/clustering'

#X=N.loadtxt('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/ICA/datarun1_icarun1_200comp.txt')
X=N.load('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/data_prep/zstat_run1.npy')

#clf = manifold.MDS(n_components=2, n_init=1, max_iter=1000)
#t=clf.fit_transform(euclidean_distances(X))

t=tsne(X,no_dims=2, initial_dims=30,perplexity=10.0, max_iter=1000)

taskinfo=N.loadtxt('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/data_prep/data_key_run1.txt')

tasknums=N.unique(taskinfo[:,0])


# compute scatter for each task
t_eucdist={}
mean_t_obs={}
for k in tasknums:
    obs=N.where(taskinfo[:,0]==k)[0]
    t_obs=t[obs,:]
    mean_t_obs[k]=N.mean(t_obs,0)
    t_eucdist[k]=N.mean(N.sqrt((t_obs[:,0]-mean_t_obs[k][0])**2 + (t_obs[:,1]-mean_t_obs[k][1])**2 ))

Exemple #16

0

Afficher le fichier

Fichier : plot.py Projet : rhyschris/Playlist-Recommendation

        next = np.array([ float(n) for n in l[:-2].split(" ")[half:]])
        prevsong = prev.reshape((96, 50))
        nextsong = next.reshape((96, 50))
        
        prevsong = np.sum(prevsong, axis=1)
        nextsong = np.sum(nextsong, axis=1)
        arr.append(np.array(list(prevsong) + list(nextsong)))
    

''' read in labels '''
with open('labels.txt', 'r') as l:
    labels = [word.rstrip() for word in l.readlines()]
    

matr = np.array( [ np.array(entry) for entry in arr] )
''' plot scatter '''

Y = tsne(matr)
import matplotlib.pyplot as plt

plt.scatter(Y[:, 0], Y[:, 1], 20)
for label, x, y in zip(labels, Y[:, 0], Y[:, 1]):
    plt.annotate(label, xy=(x, y), xytext=(-10, 10), 
                 textcoords="offset points", 
                 bbox = dict(boxstyle='round', fc="yellow"))

plt.savefig("test.ps", format='eps', dpi=1000)
plt.show()

Exemple #17

0

Afficher le fichier

Fichier : run.py Projet : sharpfun/ss2016_dpnlp

from blocks.model import Model

main_loop = MainLoop(
    algorithm=algorithm,
    data_stream=DataStream.default_stream(
        dataset=train_dataset,
        iteration_scheme=SequentialScheme(train_dataset.num_instances(), 1)
    ),
    model=Model(cost),
    extensions=extensions
)

main_loop.run()

from tsne import *
import matplotlib.pyplot as plt

W1 = numpy.load("layer1_20.npy")

Y = tsne(W1, 2, 50, 20.0)

fig, ax = plt.subplots()
ax.scatter(Y[:,0], Y[:,1])

for i, word in enumerate(train_dataset.bag_words):
    x,y = Y[i]
    ax.annotate(word, (x,y))

plt.show()

Exemple #18

0

Afficher le fichier

Fichier : tsne_hashtag_visualisation.py Projet : harvineet/SocialMirror

#set visibility of most, least and mid frequency hashtags by setting text size
def get_tag_size_label(tlist):
	size = []
	label = []
	for t in tag_labels:
		if t in tlist:
			size.append(2)
			label.append(t.decode('latin-1'))
		else:
			size.append(0)
			label.append('')
	return size, array(label)

X = array(hist_feature)
Y = tsne(X, 2, 50, 30.0);

def save_embed_plot((tag_sizes,labels),fname):
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 0);
	for label, x, y, s in zip(labels, Y[:,0], Y[:,1], tag_sizes):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=s)
	Plot.axis('off')
	fig.savefig(fname, dpi=800, bbox_inches='tight')
	
if __name__ == "__main__":
	print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
	# print "Running example on 2,500 MNIST digits..."
	# X = Math.loadtxt("mnist2500_X.txt");
	# labels = Math.loadtxt("mnist2500_labels.txt");
	save_embed_plot(get_tag_size_label(most_freq),'embed_tag_mostfreq.png')