Esempi in Python per tsne

Esempio n. 1

0

Mostra file

File: tsne_word_visualisation.py Progetto: harvineet/social-network-embedding

def save_embed_plot(X,labels,fname):
	Y = tsne(X, 2, word_vector_dim, 20.0);
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 1);
	for label, x, y in zip(labels, Y[:,0], Y[:,1]):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=5)
	fig.savefig(fname, dpi=1200)

Esempio n. 2

0

Mostra file

File: tsne_word_visualisation.py Progetto: harvineet/SocialMirror

def save_embed_plot(X,labels,fname):
	Y = tsne(X, 2, word_vector_dim, 20.0);
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 1);
	for label, x, y in zip(labels, Y[:,0], Y[:,1]):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=5)
	fig.savefig(fname, dpi=1200)

Esempio n. 3

0

Mostra file

File: vae.py Progetto: mzhuang1/opfmbrl

def visualizeLatentState(X, rs, gen_params, rec_params):
    q_means, q_log_stds = nn_predict_gaussian(rec_params, X)
    latents = sample_diag_gaussian(q_means, q_log_stds, rs)
    gen = sigmoid(neural_net_predict(gen_params, latents))
    gen = gen[:,:gen.shape[1]/2]
    print(gen.shape)
    print(X.shape)
    #yTrain =y[:genTrain.shape[0],:]
    #yTest = y[genTrain.shape[0]:,:]
    #pdb.set_trace
    y = tsne(np.vstack((X,gen*10)))
    plt.figure()
    plt.clf()
    plt.scatter(y[:gen.shape[0],0],y[:gen.shape[0],1],color='red')
    plt.scatter(y[gen.shape[0]:,0],y[gen.shape[0]:,1],color='blue')
    plt.legend(['X', 'Xdecoded'],)
    plt.savefig('hidden.jpg')

Esempio n. 4

0

Mostra file

def visualize_codes(net, dataloader=test_loader, batches=4):

    codes = []
    truths = []

    for _ in range(batches):

        to_encode, truth = next(iter(dataloader))
        truths.append(truth.numpy())
        to_encode = Variable(to_encode)
        encoded = net.encode(to_encode)
        codes.append(encoded.data.numpy())

    X = np.concatenate(codes, axis=0)
    GT = np.concatenate(truths, axis=0)
    Y = tsne(X, no_dims=2, initial_dims=8)

    tops = Y[np.where(GT == 0)]
    trousers = Y[np.where(GT == 1)]
    pullovers = Y[np.where(GT == 2)]
    dresses = Y[np.where(GT == 3)]
    coats = Y[np.where(GT == 4)]
    sandals = Y[np.where(GT == 5)]
    shirts = Y[np.where(GT == 6)]
    sneakers = Y[np.where(GT == 7)]
    bags = Y[np.where(GT == 8)]
    boots = Y[np.where(GT == 9)]

    plt.scatter(tops[:, 0], tops[:, 1], label='tops')
    plt.scatter(trousers[:, 0], trousers[:, 1], label='trousers')
    plt.scatter(pullovers[:, 0], pullovers[:, 1], label='pullovers')
    plt.scatter(dresses[:, 0], dresses[:, 1], label='dresses')
    plt.scatter(coats[:, 0], coats[:, 1], label='coats')
    plt.scatter(sandals[:, 0], sandals[:, 1], label='sandals')
    plt.scatter(shirts[:, 0], shirts[:, 1], label='shirts')
    plt.scatter(sneakers[:, 0], sneakers[:, 1], label='sneakers')
    plt.scatter(bags[:, 0], bags[:, 1], label='bags')
    plt.scatter(boots[:, 0], boots[:, 1], label='boots')

    plt.title('visualization of codes')
    plt.legend()
    plt.show()

    return X, Y, GT

Esempio n. 5

0

Mostra file

def plot_clusters(matrix, listy, no_dims=2, initial_dims=100, perplexity=10):
    figure1 = plt.figure()
    Y = tsne(matrix,
             no_dims=no_dims,
             initial_dims=initial_dims,
             perplexity=perplexity)
    plt.scatter(Y[:, 0], Y[:, 1])  #,len(languages),np.r_[1:len(languages)])
    for label, x, y in zip(listy, Y[:, 0], Y[:, 1]):
        plt.annotate(label,
                     xy=(x, y),
                     xytext=(-20, 20),
                     textcoords='offset points',
                     ha='right',
                     va='bottom',
                     bbox=dict(boxstyle='round,pad=0.5',
                               fc='yellow',
                               alpha=0.5),
                     arrowprops=dict(arrowstyle='->',
                                     connectionstyle='arc3,rad=0'),
                     fontsize='x-large')
    frame = plt.gca()
    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])
    return

Esempio n. 6

0

Mostra file

File: hyperdim.py Progetto: ssquinntran/HDlanguageDetection

'''
plt.figure()
print final_lang.shape
X = pca(cosangles)
plt.scatter(X[:,0],X[:,1])#,len(languages),np.r_[1:len(languages)])
for label, x, y in zip(languages, X[:, 0], X[:, 1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
'''

# plot language points
plt.figure()
Y = tsne(cosangles,no_dims=2,initial_dims=100,perplexity=8)
plt.scatter(Y[:,0],Y[:,1])#,len(languages),np.r_[1:len(languages)])
for label, x, y in zip(languages, Y[:, 0], Y[:, 1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'),
				fontsize='x-large')
frame = plt.gca()
frame.axes.get_xaxis().set_ticks([])
frame.axes.get_yaxis().set_ticks([])
plt.show()

Esempio n. 7

0

Mostra file

File: LDA.py Progetto: the0demiurge/python-test

from sklearn.datasets import load_breast_cancer
from tsne import *
import pandas as pd
from pylab import *
import seaborn as sns
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

x, y, label_names = data['data'], data['target'], data['target_names']
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)

tsne(x, y, label_names=label_names)

trainx, testx, trainy, testy = train_test_split(x, reshape(y, [-1, 1]), test_size=0.1)


def variance(x, u):
    x = reshape(x, [-1, 1])
    return (x - u).dot(x - u).T


def lda2(x, y):
    x0, x1 = mat(x[y.T[0] == 0]), mat(x[y.T[0] == 1])
    u0, u1 = x0.mean(axis=0), x1.mean(axis=0)

    sigma0, sigma1 = reduce(lambda x, y: x + y, [variance(i, u0) for i in x0]), reduce(lambda x, y: x + y, [variance(i, u1) for i in x1])

Esempio n. 8

0

Mostra file

File: tsne_topic_adopters_visualisation_pred.py Progetto: harvineet/SocialMirror

		vectors.append(vec[vocab_index[u]])
		if y==1 and yp==1:
			color.append(1)
		elif y==1 and yp!=1:
			color.append(2)
		elif y!=1 and yp==1:
			color.append(3)
		else:
			color.append(4)
		count+=1
		if count==vec_limit:
			break
	return numpy.array(vectors), color, tag, prec

def save_embed_plot((X,color,tag,prec),fname):
	Y = tsne(X, no_dims = 2, initial_dims = 50, perplexity = 30.0)
	with open("/mnt/filer01/word2vec/degree_distribution/adopter_pred_files/single_topic_vis/"+fname+".pickle","wb") as fd:
		pickle.dump(Y,fd)
	fig = Plot.figure()
	init = []
	tp = []
	fn = []
	fp = []
	tn = []
	for i,c in enumerate(color):
		if c==0:
			init.append(i)
		elif c==1:
			tp.append(i)
		elif c==2:
			fn.append(i)

Esempio n. 9

0

Mostra file

#set visibility of most, least and mid frequency hashtags by setting text size
def get_tag_size_label(tlist):
    size = []
    label = []
    for t in tag_labels:
        if t in tlist:
            size.append(2)
            label.append(t.decode('latin-1'))
        else:
            size.append(0)
            label.append('')
    return size, array(label)


X = array(hist_feature)
Y = tsne(X, 2, 50, 30.0)


def save_embed_plot((tag_sizes, labels), fname):
    fig = Plot.figure()
    Plot.scatter(Y[:, 0], Y[:, 1], 0)
    for label, x, y, s in zip(labels, Y[:, 0], Y[:, 1], tag_sizes):
        Plot.annotate(label,
                      xy=(x, y),
                      xytext=(0, 0),
                      textcoords='offset points',
                      size=s)
    Plot.axis('off')
    fig.savefig(fname, dpi=800, bbox_inches='tight')

Esempio n. 10

0

Mostra file

File: tsne_topic_adopters_visualisation_pred.py Progetto: harvineet/social-network-embedding

    Plot.scatter(Y_tn[:, 0],
                 Y_tn[:, 1],
                 s=10,
                 c='c',
                 alpha=0.4,
                 label='true negatives',
                 edgecolor='none')
    Plot.axis('off')
    Plot.legend(prop={'size': 8})
    Plot.title('#' + tag + ', P@100: ' + str(prec) + ', ' + clf)
    fig.savefig(fname + '.png', dpi=400, bbox_inches='tight')


if __name__ == "__main__":
    print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
    for i in range(0, train_ex_limit):
        X, color, color_log, tag, prec, prec_log = get_user_vectors(i)
        if use_tsne == True:
            Y = tsne(X, no_dims=2, initial_dims=50, perplexity=30.0)
        else:
            Y = pca(X, no_dims=2)
        # with open("/mnt/filer01/word2vec/degree_distribution/adopter_pred_files/single_topic_vis/"+fname+".pickle","wb") as fd:
        # pickle.dump(Y,fd)
        save_embed_plot(Y, color, tag, prec, 'RF',
                        'embed_adopters_topic_rf' + str(i))
        save_embed_plot(Y, color_log, tag, prec_log, 'LR',
                        'embed_adopters_topic_lr' + str(i))

#cc 0.0589, candidate set recall 280 out of 4751 cand size 6312
#cc 0.219, candidate set recall 516 out of 2347 cand size 4702
#cc 0.56, candidate set recall 658 out of 1162 cand size 4075

Esempio n. 11

0

Mostra file

    for t in stasks:
        dstask[ds][t] = ctr
        print '%d: ds%03d task%03d' % (ctr, ds, t)
        ctr = ctr + 1

# make colormap
cmap = {}
ctr = 0

for i in list(s):
    cmap[i] = ctr
    ctr += 1

colors = [cmap[i] for i in copedata[:, 0]]

X = X[usedata == 1, :]

t = tsne(X, no_dims=2, initial_dims=15, perplexity=10.0, max_iter=1000)
plt.clf()
plt.scatter(t[:, 0], t[:, 1], s=0)  # create axes
f = open(basedir + 'tasklabels.txt', 'w')
for i in range(len(t)):
    x, y = t[i, :]
    plt.text(x, y,
             '%d' % dstask[copedata[i, 0]][copedata[i, 1]])  #,color=colors[i])
    f.write('%d\n' % dstask[copedata[i, 0]][copedata[i, 1]])

f.close()
# print legend:
plt.savefig(basedir + 'tsne_fig.pdf', format='pdf')

Esempio n. 12

0

Mostra file

File: tsne_algo.py Progetto: declan-ayres/MLAlgorithms

        The perplexity is 2 to the entropy of the probability distribution. It measures how many neighbors each data point will be connected to. When I raise the perplexity the images have more clusters.
        Theta measures the accuracy of the algorithm. It is the angle the data points are to each other. Large theta speeds up the algorithm but reduces the accuracy and small theta slows down the algorithm but increases the accuracy.
        """
        #read all the classfied files into a list
        #not only read the classified files into a list, but also keep it open for appending
	#randomarray=np.random.random(255, size=(1000, 784))
	randomarray=np.random.random((1000, 784))
	coordinates = bh_sne(randomarray, perplexity = 30, theta = .1) * 10
        print coordinates




if __name__ == '__main__':	
    streamhandler = logging.StreamHandler(sys.stdout)
    
    if args.logging_level==10:
       streamhandler.setLevel(logging.INFO)
       log.setLevel(logging.INFO)
    if args.logging_level==20:
       streamhandler.setLevel(logging.DEBUG)
       log.setLevel(logging.DEBUG)

    filehandler = logging.FileHandler("logging")
    #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    formatter = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s")

    streamhandler.setFormatter(formatter)
    log.addHandler(streamhandler)
    tsne()

Esempio n. 13

0

Mostra file

File: tfOnlineMNIST-TSNE.py Progetto: hal2001/Crossprop

def draw(learning_rate, n_examples, repeats):
    gate = Tanh()
    runs = 1
    cp_alt = CrossPropAlt(dim_in,
                          dim_hidden,
                          dim_out,
                          learning_rate,
                          gate,
                          output_layer='CE',
                          lam=0,
                          name='cp')
    cp_alt_lam = CrossPropAlt(dim_in,
                              dim_hidden,
                              dim_out,
                              learning_rate,
                              gate,
                              output_layer='CE',
                              lam=0.5,
                              name='cp-lam')
    bp = BackPropClissification(dim_in,
                                dim_hidden,
                                dim_out,
                                learning_rate,
                                gate,
                                name='bp')
    bp_mom = BackPropClissification(dim_in,
                                    dim_hidden,
                                    dim_out,
                                    learning_rate,
                                    gate,
                                    name='bp-mom',
                                    optimizer=tf.train.MomentumOptimizer(
                                        learning_rate=learning_rate,
                                        momentum=0.9))
    bp_adam = BackPropClissification(
        dim_in,
        dim_hidden,
        dim_out,
        learning_rate,
        gate,
        name='bp-adam',
        optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate))
    bp_rms = BackPropClissification(
        dim_in,
        dim_hidden,
        dim_out,
        learning_rate,
        gate,
        name='bp-rms',
        optimizer=tf.train.RMSPropOptimizer(learning_rate=learning_rate))
    methods = [cp_alt, cp_alt_lam, bp, bp_adam, bp_rms, bp_mom]
    for run in range(runs):

        train_x = train_x_total[:n_examples, :]
        train_y = train_y_total[:n_examples, :]

        y0 = train_y
        y1 = np.concatenate([train_y[:, 1:], train_y[:, :1]], 1)
        y2 = np.concatenate([y1[:, 1:], y1[:, :1]], 1)

        train_xs = [train_x] * 6
        train_ys = [y0, y1, y2, y0, y1, y2]

        # np.random.seed(0)
        # x0 = train_x
        # perm = np.arange(dim_in)
        # np.random.shuffle(perm)
        # x1 = train_x[:, perm]
        # np.random.shuffle(perm)
        # x2 = train_x[:, perm]
        #
        # train_xs = [x0, x1, x2, x0, x1, x2]
        # train_ys = [train_y] * 6

        # features = np.zeros((stages, len(methods), n_examples, dim_hidden))
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            figure_index = 0
            candidate_stages = [0, 1, 2, 3]
            candidate_methods = [0, 2]
            target_dim = 2
            tsne_data = dict()
            for stage in candidate_stages:
                saver.restore(sess,
                              'tmp/saved/ffn_model/%s_stage_%d' % (tag, stage))
                features = np.zeros((len(methods), n_examples, dim_hidden))
                train_x = train_xs[stage]
                train_y = train_ys[stage]
                batch_size = 1000
                cur_example = 0
                while cur_example < n_examples:
                    logger.info('store features... stage %d, example %d' %
                                (stage, cur_example))
                    end_example = min(n_examples, cur_example + batch_size)
                    for method_ind, method in enumerate(methods):
                        cur_features = sess.run(
                            method.feature,
                            feed_dict={
                                method.x: train_x[cur_example:end_example, :],
                                method.target:
                                train_y[cur_example:end_example, :]
                            })
                        features[method_ind,
                                 cur_example:end_example, :] = cur_features
                    cur_example = end_example
                sample_indices = np.arange(2500)
                for repeat in range(repeats):
                    np.random.shuffle(sample_indices)
                    for method_ind in candidate_methods:
                        x_to_plot = features[method_ind, sample_indices, :]
                        y_to_plot = np.argmax(train_y[sample_indices, :],
                                              axis=1)
                        print x_to_plot.shape, y_to_plot.shape
                        x_prime = tsne(x_to_plot, target_dim, 50, 20.0)
                        tsne_data[(stage, method_ind)] = (x_prime, y_to_plot)
                        # fig = plt.figure(figure_index)
                        # figure_index += 1
                        # ax = Axes3D(fig)
                        # ax.scatter(x_prime[:, 0], x_prime[:, 1], x_prime[:, 2], c=y_to_plot)
                        # plt.scatter(x_prime[:, 0], x_prime[:, 1], 20, y_to_plot)
                        # plt.title('%s_%s_stage_%d' % (tag, labels[method_ind], stage))
                        # plt.show()
                        # plt.savefig('figure/%s_repeat_%d_%s_stage_%d.png' % (tag, repeat, labels[method_ind], stage))
                        # plt.close()
                        # plt.show()
            with open('tmp/tsne_dim_%d.bin' % target_dim, 'wb') as f:
                pickle.dump(tsne_data, f)

Esempio n. 14

0

Mostra file

File: LDA.py Progetto: sjl421/Python-Scripts-2

from sklearn.datasets import load_breast_cancer
from tsne import *
import pandas as pd
from pylab import *
import seaborn as sns
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

x, y, label_names = data['data'], data['target'], data['target_names']
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)

tsne(x, y, label_names=label_names)

trainx, testx, trainy, testy = train_test_split(x,
                                                reshape(y, [-1, 1]),
                                                test_size=0.1)


def variance(x, u):
    x = reshape(x, [-1, 1])
    return (x - u).dot(x - u).T


def lda2(x, y):
    x0, x1 = mat(x[y.T[0] == 0]), mat(x[y.T[0] == 1])
    u0, u1 = x0.mean(axis=0), x1.mean(axis=0)

Esempio n. 15

0

Mostra file

File: 8.1_tsne_wholebrain.py Progetto: Jasonleng/openfmri

import matplotlib.pyplot as plt
import matplotlib.font_manager as mplfont
import os
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from tsne import *

outdir='/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/clustering'

#X=N.loadtxt('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/ICA/datarun1_icarun1_200comp.txt')
X=N.load('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/data_prep/zstat_run1.npy')

#clf = manifold.MDS(n_components=2, n_init=1, max_iter=1000)
#t=clf.fit_transform(euclidean_distances(X))

t=tsne(X,no_dims=2, initial_dims=30,perplexity=10.0, max_iter=1000)

taskinfo=N.loadtxt('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/data_prep/data_key_run1.txt')

tasknums=N.unique(taskinfo[:,0])


# compute scatter for each task
t_eucdist={}
mean_t_obs={}
for k in tasknums:
    obs=N.where(taskinfo[:,0]==k)[0]
    t_obs=t[obs,:]
    mean_t_obs[k]=N.mean(t_obs,0)
    t_eucdist[k]=N.mean(N.sqrt((t_obs[:,0]-mean_t_obs[k][0])**2 + (t_obs[:,1]-mean_t_obs[k][1])**2 ))

Esempio n. 16

0

Mostra file

File: plot.py Progetto: rhyschris/Playlist-Recommendation

        next = np.array([ float(n) for n in l[:-2].split(" ")[half:]])
        prevsong = prev.reshape((96, 50))
        nextsong = next.reshape((96, 50))
        
        prevsong = np.sum(prevsong, axis=1)
        nextsong = np.sum(nextsong, axis=1)
        arr.append(np.array(list(prevsong) + list(nextsong)))
    

''' read in labels '''
with open('labels.txt', 'r') as l:
    labels = [word.rstrip() for word in l.readlines()]
    

matr = np.array( [ np.array(entry) for entry in arr] )
''' plot scatter '''

Y = tsne(matr)
import matplotlib.pyplot as plt

plt.scatter(Y[:, 0], Y[:, 1], 20)
for label, x, y in zip(labels, Y[:, 0], Y[:, 1]):
    plt.annotate(label, xy=(x, y), xytext=(-10, 10), 
                 textcoords="offset points", 
                 bbox = dict(boxstyle='round', fc="yellow"))

plt.savefig("test.ps", format='eps', dpi=1000)
plt.show()

Esempio n. 17

0

Mostra file

File: run.py Progetto: sharpfun/ss2016_dpnlp

from blocks.model import Model

main_loop = MainLoop(
    algorithm=algorithm,
    data_stream=DataStream.default_stream(
        dataset=train_dataset,
        iteration_scheme=SequentialScheme(train_dataset.num_instances(), 1)
    ),
    model=Model(cost),
    extensions=extensions
)

main_loop.run()

from tsne import *
import matplotlib.pyplot as plt

W1 = numpy.load("layer1_20.npy")

Y = tsne(W1, 2, 50, 20.0)

fig, ax = plt.subplots()
ax.scatter(Y[:,0], Y[:,1])

for i, word in enumerate(train_dataset.bag_words):
    x,y = Y[i]
    ax.annotate(word, (x,y))

plt.show()

Esempio n. 18

0

Mostra file

File: tsne_hashtag_visualisation.py Progetto: harvineet/SocialMirror

#set visibility of most, least and mid frequency hashtags by setting text size
def get_tag_size_label(tlist):
	size = []
	label = []
	for t in tag_labels:
		if t in tlist:
			size.append(2)
			label.append(t.decode('latin-1'))
		else:
			size.append(0)
			label.append('')
	return size, array(label)

X = array(hist_feature)
Y = tsne(X, 2, 50, 30.0);

def save_embed_plot((tag_sizes,labels),fname):
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 0);
	for label, x, y, s in zip(labels, Y[:,0], Y[:,1], tag_sizes):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=s)
	Plot.axis('off')
	fig.savefig(fname, dpi=800, bbox_inches='tight')
	
if __name__ == "__main__":
	print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
	# print "Running example on 2,500 MNIST digits..."
	# X = Math.loadtxt("mnist2500_X.txt");
	# labels = Math.loadtxt("mnist2500_labels.txt");
	save_embed_plot(get_tag_size_label(most_freq),'embed_tag_mostfreq.png')