def save_embed_plot(X,labels,fname):
	Y = tsne(X, 2, word_vector_dim, 20.0);
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 1);
	for label, x, y in zip(labels, Y[:,0], Y[:,1]):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=5)
	fig.savefig(fname, dpi=1200)
def save_embed_plot(X,labels,fname):
	Y = tsne(X, 2, word_vector_dim, 20.0);
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 1);
	for label, x, y in zip(labels, Y[:,0], Y[:,1]):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=5)
	fig.savefig(fname, dpi=1200)
Esempio n. 3
0
def visualizeLatentState(X, rs, gen_params, rec_params):
    q_means, q_log_stds = nn_predict_gaussian(rec_params, X)
    latents = sample_diag_gaussian(q_means, q_log_stds, rs)
    gen = sigmoid(neural_net_predict(gen_params, latents))
    gen = gen[:,:gen.shape[1]/2]
    print(gen.shape)
    print(X.shape)
    #yTrain =y[:genTrain.shape[0],:]
    #yTest = y[genTrain.shape[0]:,:]
    #pdb.set_trace
    y = tsne(np.vstack((X,gen*10)))
    plt.figure()
    plt.clf()
    plt.scatter(y[:gen.shape[0],0],y[:gen.shape[0],1],color='red')
    plt.scatter(y[gen.shape[0]:,0],y[gen.shape[0]:,1],color='blue')
    plt.legend(['X', 'Xdecoded'],)
    plt.savefig('hidden.jpg')
Esempio n. 4
0
def visualize_codes(net, dataloader=test_loader, batches=4):

    codes = []
    truths = []

    for _ in range(batches):

        to_encode, truth = next(iter(dataloader))
        truths.append(truth.numpy())
        to_encode = Variable(to_encode)
        encoded = net.encode(to_encode)
        codes.append(encoded.data.numpy())

    X = np.concatenate(codes, axis=0)
    GT = np.concatenate(truths, axis=0)
    Y = tsne(X, no_dims=2, initial_dims=8)

    tops = Y[np.where(GT == 0)]
    trousers = Y[np.where(GT == 1)]
    pullovers = Y[np.where(GT == 2)]
    dresses = Y[np.where(GT == 3)]
    coats = Y[np.where(GT == 4)]
    sandals = Y[np.where(GT == 5)]
    shirts = Y[np.where(GT == 6)]
    sneakers = Y[np.where(GT == 7)]
    bags = Y[np.where(GT == 8)]
    boots = Y[np.where(GT == 9)]

    plt.scatter(tops[:, 0], tops[:, 1], label='tops')
    plt.scatter(trousers[:, 0], trousers[:, 1], label='trousers')
    plt.scatter(pullovers[:, 0], pullovers[:, 1], label='pullovers')
    plt.scatter(dresses[:, 0], dresses[:, 1], label='dresses')
    plt.scatter(coats[:, 0], coats[:, 1], label='coats')
    plt.scatter(sandals[:, 0], sandals[:, 1], label='sandals')
    plt.scatter(shirts[:, 0], shirts[:, 1], label='shirts')
    plt.scatter(sneakers[:, 0], sneakers[:, 1], label='sneakers')
    plt.scatter(bags[:, 0], bags[:, 1], label='bags')
    plt.scatter(boots[:, 0], boots[:, 1], label='boots')

    plt.title('visualization of codes')
    plt.legend()
    plt.show()

    return X, Y, GT
Esempio n. 5
0
def plot_clusters(matrix, listy, no_dims=2, initial_dims=100, perplexity=10):
    figure1 = plt.figure()
    Y = tsne(matrix,
             no_dims=no_dims,
             initial_dims=initial_dims,
             perplexity=perplexity)
    plt.scatter(Y[:, 0], Y[:, 1])  #,len(languages),np.r_[1:len(languages)])
    for label, x, y in zip(listy, Y[:, 0], Y[:, 1]):
        plt.annotate(label,
                     xy=(x, y),
                     xytext=(-20, 20),
                     textcoords='offset points',
                     ha='right',
                     va='bottom',
                     bbox=dict(boxstyle='round,pad=0.5',
                               fc='yellow',
                               alpha=0.5),
                     arrowprops=dict(arrowstyle='->',
                                     connectionstyle='arc3,rad=0'),
                     fontsize='x-large')
    frame = plt.gca()
    frame.axes.get_xaxis().set_ticks([])
    frame.axes.get_yaxis().set_ticks([])
    return
'''
plt.figure()
print final_lang.shape
X = pca(cosangles)
plt.scatter(X[:,0],X[:,1])#,len(languages),np.r_[1:len(languages)])
for label, x, y in zip(languages, X[:, 0], X[:, 1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))
'''

# plot language points
plt.figure()
Y = tsne(cosangles,no_dims=2,initial_dims=100,perplexity=8)
plt.scatter(Y[:,0],Y[:,1])#,len(languages),np.r_[1:len(languages)])
for label, x, y in zip(languages, Y[:, 0], Y[:, 1]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-20, 20),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'),
				fontsize='x-large')
frame = plt.gca()
frame.axes.get_xaxis().set_ticks([])
frame.axes.get_yaxis().set_ticks([])
plt.show()
Esempio n. 7
0
from sklearn.datasets import load_breast_cancer
from tsne import *
import pandas as pd
from pylab import *
import seaborn as sns
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

x, y, label_names = data['data'], data['target'], data['target_names']
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)

tsne(x, y, label_names=label_names)

trainx, testx, trainy, testy = train_test_split(x, reshape(y, [-1, 1]), test_size=0.1)


def variance(x, u):
    x = reshape(x, [-1, 1])
    return (x - u).dot(x - u).T


def lda2(x, y):
    x0, x1 = mat(x[y.T[0] == 0]), mat(x[y.T[0] == 1])
    u0, u1 = x0.mean(axis=0), x1.mean(axis=0)

    sigma0, sigma1 = reduce(lambda x, y: x + y, [variance(i, u0) for i in x0]), reduce(lambda x, y: x + y, [variance(i, u1) for i in x1])
		vectors.append(vec[vocab_index[u]])
		if y==1 and yp==1:
			color.append(1)
		elif y==1 and yp!=1:
			color.append(2)
		elif y!=1 and yp==1:
			color.append(3)
		else:
			color.append(4)
		count+=1
		if count==vec_limit:
			break
	return numpy.array(vectors), color, tag, prec

def save_embed_plot((X,color,tag,prec),fname):
	Y = tsne(X, no_dims = 2, initial_dims = 50, perplexity = 30.0)
	with open("/mnt/filer01/word2vec/degree_distribution/adopter_pred_files/single_topic_vis/"+fname+".pickle","wb") as fd:
		pickle.dump(Y,fd)
	fig = Plot.figure()
	init = []
	tp = []
	fn = []
	fp = []
	tn = []
	for i,c in enumerate(color):
		if c==0:
			init.append(i)
		elif c==1:
			tp.append(i)
		elif c==2:
			fn.append(i)
Esempio n. 9
0
#set visibility of most, least and mid frequency hashtags by setting text size
def get_tag_size_label(tlist):
    size = []
    label = []
    for t in tag_labels:
        if t in tlist:
            size.append(2)
            label.append(t.decode('latin-1'))
        else:
            size.append(0)
            label.append('')
    return size, array(label)


X = array(hist_feature)
Y = tsne(X, 2, 50, 30.0)


def save_embed_plot((tag_sizes, labels), fname):
    fig = Plot.figure()
    Plot.scatter(Y[:, 0], Y[:, 1], 0)
    for label, x, y, s in zip(labels, Y[:, 0], Y[:, 1], tag_sizes):
        Plot.annotate(label,
                      xy=(x, y),
                      xytext=(0, 0),
                      textcoords='offset points',
                      size=s)
    Plot.axis('off')
    fig.savefig(fname, dpi=800, bbox_inches='tight')

    Plot.scatter(Y_tn[:, 0],
                 Y_tn[:, 1],
                 s=10,
                 c='c',
                 alpha=0.4,
                 label='true negatives',
                 edgecolor='none')
    Plot.axis('off')
    Plot.legend(prop={'size': 8})
    Plot.title('#' + tag + ', P@100: ' + str(prec) + ', ' + clf)
    fig.savefig(fname + '.png', dpi=400, bbox_inches='tight')


if __name__ == "__main__":
    print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
    for i in range(0, train_ex_limit):
        X, color, color_log, tag, prec, prec_log = get_user_vectors(i)
        if use_tsne == True:
            Y = tsne(X, no_dims=2, initial_dims=50, perplexity=30.0)
        else:
            Y = pca(X, no_dims=2)
        # with open("/mnt/filer01/word2vec/degree_distribution/adopter_pred_files/single_topic_vis/"+fname+".pickle","wb") as fd:
        # pickle.dump(Y,fd)
        save_embed_plot(Y, color, tag, prec, 'RF',
                        'embed_adopters_topic_rf' + str(i))
        save_embed_plot(Y, color_log, tag, prec_log, 'LR',
                        'embed_adopters_topic_lr' + str(i))

#cc 0.0589, candidate set recall 280 out of 4751 cand size 6312
#cc 0.219, candidate set recall 516 out of 2347 cand size 4702
#cc 0.56, candidate set recall 658 out of 1162 cand size 4075
Esempio n. 11
0
    for t in stasks:
        dstask[ds][t] = ctr
        print '%d: ds%03d task%03d' % (ctr, ds, t)
        ctr = ctr + 1

# make colormap
cmap = {}
ctr = 0

for i in list(s):
    cmap[i] = ctr
    ctr += 1

colors = [cmap[i] for i in copedata[:, 0]]

X = X[usedata == 1, :]

t = tsne(X, no_dims=2, initial_dims=15, perplexity=10.0, max_iter=1000)
plt.clf()
plt.scatter(t[:, 0], t[:, 1], s=0)  # create axes
f = open(basedir + 'tasklabels.txt', 'w')
for i in range(len(t)):
    x, y = t[i, :]
    plt.text(x, y,
             '%d' % dstask[copedata[i, 0]][copedata[i, 1]])  #,color=colors[i])
    f.write('%d\n' % dstask[copedata[i, 0]][copedata[i, 1]])

f.close()
# print legend:
plt.savefig(basedir + 'tsne_fig.pdf', format='pdf')
Esempio n. 12
0
        The perplexity is 2 to the entropy of the probability distribution. It measures how many neighbors each data point will be connected to. When I raise the perplexity the images have more clusters.
        Theta measures the accuracy of the algorithm. It is the angle the data points are to each other. Large theta speeds up the algorithm but reduces the accuracy and small theta slows down the algorithm but increases the accuracy.
        """
        #read all the classfied files into a list
        #not only read the classified files into a list, but also keep it open for appending
	#randomarray=np.random.random(255, size=(1000, 784))
	randomarray=np.random.random((1000, 784))
	coordinates = bh_sne(randomarray, perplexity = 30, theta = .1) * 10
        print coordinates




if __name__ == '__main__':	
    streamhandler = logging.StreamHandler(sys.stdout)
    
    if args.logging_level==10:
       streamhandler.setLevel(logging.INFO)
       log.setLevel(logging.INFO)
    if args.logging_level==20:
       streamhandler.setLevel(logging.DEBUG)
       log.setLevel(logging.DEBUG)

    filehandler = logging.FileHandler("logging")
    #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    formatter = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s")

    streamhandler.setFormatter(formatter)
    log.addHandler(streamhandler)
    tsne()
Esempio n. 13
0
def draw(learning_rate, n_examples, repeats):
    gate = Tanh()
    runs = 1
    cp_alt = CrossPropAlt(dim_in,
                          dim_hidden,
                          dim_out,
                          learning_rate,
                          gate,
                          output_layer='CE',
                          lam=0,
                          name='cp')
    cp_alt_lam = CrossPropAlt(dim_in,
                              dim_hidden,
                              dim_out,
                              learning_rate,
                              gate,
                              output_layer='CE',
                              lam=0.5,
                              name='cp-lam')
    bp = BackPropClissification(dim_in,
                                dim_hidden,
                                dim_out,
                                learning_rate,
                                gate,
                                name='bp')
    bp_mom = BackPropClissification(dim_in,
                                    dim_hidden,
                                    dim_out,
                                    learning_rate,
                                    gate,
                                    name='bp-mom',
                                    optimizer=tf.train.MomentumOptimizer(
                                        learning_rate=learning_rate,
                                        momentum=0.9))
    bp_adam = BackPropClissification(
        dim_in,
        dim_hidden,
        dim_out,
        learning_rate,
        gate,
        name='bp-adam',
        optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate))
    bp_rms = BackPropClissification(
        dim_in,
        dim_hidden,
        dim_out,
        learning_rate,
        gate,
        name='bp-rms',
        optimizer=tf.train.RMSPropOptimizer(learning_rate=learning_rate))
    methods = [cp_alt, cp_alt_lam, bp, bp_adam, bp_rms, bp_mom]
    for run in range(runs):

        train_x = train_x_total[:n_examples, :]
        train_y = train_y_total[:n_examples, :]

        y0 = train_y
        y1 = np.concatenate([train_y[:, 1:], train_y[:, :1]], 1)
        y2 = np.concatenate([y1[:, 1:], y1[:, :1]], 1)

        train_xs = [train_x] * 6
        train_ys = [y0, y1, y2, y0, y1, y2]

        # np.random.seed(0)
        # x0 = train_x
        # perm = np.arange(dim_in)
        # np.random.shuffle(perm)
        # x1 = train_x[:, perm]
        # np.random.shuffle(perm)
        # x2 = train_x[:, perm]
        #
        # train_xs = [x0, x1, x2, x0, x1, x2]
        # train_ys = [train_y] * 6

        # features = np.zeros((stages, len(methods), n_examples, dim_hidden))
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            figure_index = 0
            candidate_stages = [0, 1, 2, 3]
            candidate_methods = [0, 2]
            target_dim = 2
            tsne_data = dict()
            for stage in candidate_stages:
                saver.restore(sess,
                              'tmp/saved/ffn_model/%s_stage_%d' % (tag, stage))
                features = np.zeros((len(methods), n_examples, dim_hidden))
                train_x = train_xs[stage]
                train_y = train_ys[stage]
                batch_size = 1000
                cur_example = 0
                while cur_example < n_examples:
                    logger.info('store features... stage %d, example %d' %
                                (stage, cur_example))
                    end_example = min(n_examples, cur_example + batch_size)
                    for method_ind, method in enumerate(methods):
                        cur_features = sess.run(
                            method.feature,
                            feed_dict={
                                method.x: train_x[cur_example:end_example, :],
                                method.target:
                                train_y[cur_example:end_example, :]
                            })
                        features[method_ind,
                                 cur_example:end_example, :] = cur_features
                    cur_example = end_example
                sample_indices = np.arange(2500)
                for repeat in range(repeats):
                    np.random.shuffle(sample_indices)
                    for method_ind in candidate_methods:
                        x_to_plot = features[method_ind, sample_indices, :]
                        y_to_plot = np.argmax(train_y[sample_indices, :],
                                              axis=1)
                        print x_to_plot.shape, y_to_plot.shape
                        x_prime = tsne(x_to_plot, target_dim, 50, 20.0)
                        tsne_data[(stage, method_ind)] = (x_prime, y_to_plot)
                        # fig = plt.figure(figure_index)
                        # figure_index += 1
                        # ax = Axes3D(fig)
                        # ax.scatter(x_prime[:, 0], x_prime[:, 1], x_prime[:, 2], c=y_to_plot)
                        # plt.scatter(x_prime[:, 0], x_prime[:, 1], 20, y_to_plot)
                        # plt.title('%s_%s_stage_%d' % (tag, labels[method_ind], stage))
                        # plt.show()
                        # plt.savefig('figure/%s_repeat_%d_%s_stage_%d.png' % (tag, repeat, labels[method_ind], stage))
                        # plt.close()
                        # plt.show()
            with open('tmp/tsne_dim_%d.bin' % target_dim, 'wb') as f:
                pickle.dump(tsne_data, f)
Esempio n. 14
0
from sklearn.datasets import load_breast_cancer
from tsne import *
import pandas as pd
from pylab import *
import seaborn as sns
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_breast_cancer()

x, y, label_names = data['data'], data['target'], data['target_names']
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)

tsne(x, y, label_names=label_names)

trainx, testx, trainy, testy = train_test_split(x,
                                                reshape(y, [-1, 1]),
                                                test_size=0.1)


def variance(x, u):
    x = reshape(x, [-1, 1])
    return (x - u).dot(x - u).T


def lda2(x, y):
    x0, x1 = mat(x[y.T[0] == 0]), mat(x[y.T[0] == 1])
    u0, u1 = x0.mean(axis=0), x1.mean(axis=0)
Esempio n. 15
0
import matplotlib.pyplot as plt
import matplotlib.font_manager as mplfont
import os
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from tsne import *

outdir='/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/clustering'

#X=N.loadtxt('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/ICA/datarun1_icarun1_200comp.txt')
X=N.load('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/data_prep/zstat_run1.npy')

#clf = manifold.MDS(n_components=2, n_init=1, max_iter=1000)
#t=clf.fit_transform(euclidean_distances(X))

t=tsne(X,no_dims=2, initial_dims=30,perplexity=10.0, max_iter=1000)

taskinfo=N.loadtxt('/corral-repl/utexas/poldracklab/openfmri/analyses/paper_analysis_Dec2012/data_prep/data_key_run1.txt')

tasknums=N.unique(taskinfo[:,0])


# compute scatter for each task
t_eucdist={}
mean_t_obs={}
for k in tasknums:
    obs=N.where(taskinfo[:,0]==k)[0]
    t_obs=t[obs,:]
    mean_t_obs[k]=N.mean(t_obs,0)
    t_eucdist[k]=N.mean(N.sqrt((t_obs[:,0]-mean_t_obs[k][0])**2 + (t_obs[:,1]-mean_t_obs[k][1])**2 ))
Esempio n. 16
0
        next = np.array([ float(n) for n in l[:-2].split(" ")[half:]])
        prevsong = prev.reshape((96, 50))
        nextsong = next.reshape((96, 50))
        
        prevsong = np.sum(prevsong, axis=1)
        nextsong = np.sum(nextsong, axis=1)
        arr.append(np.array(list(prevsong) + list(nextsong)))
    

''' read in labels '''
with open('labels.txt', 'r') as l:
    labels = [word.rstrip() for word in l.readlines()]
    

matr = np.array( [ np.array(entry) for entry in arr] )
''' plot scatter '''

Y = tsne(matr)
import matplotlib.pyplot as plt

plt.scatter(Y[:, 0], Y[:, 1], 20)
for label, x, y in zip(labels, Y[:, 0], Y[:, 1]):
    plt.annotate(label, xy=(x, y), xytext=(-10, 10), 
                 textcoords="offset points", 
                 bbox = dict(boxstyle='round', fc="yellow"))

plt.savefig("test.ps", format='eps', dpi=1000)
plt.show()


Esempio n. 17
0
from blocks.model import Model

main_loop = MainLoop(
    algorithm=algorithm,
    data_stream=DataStream.default_stream(
        dataset=train_dataset,
        iteration_scheme=SequentialScheme(train_dataset.num_instances(), 1)
    ),
    model=Model(cost),
    extensions=extensions
)

main_loop.run()

from tsne import *
import matplotlib.pyplot as plt

W1 = numpy.load("layer1_20.npy")

Y = tsne(W1, 2, 50, 20.0)

fig, ax = plt.subplots()
ax.scatter(Y[:,0], Y[:,1])

for i, word in enumerate(train_dataset.bag_words):
    x,y = Y[i]
    ax.annotate(word, (x,y))

plt.show()
#set visibility of most, least and mid frequency hashtags by setting text size
def get_tag_size_label(tlist):
	size = []
	label = []
	for t in tag_labels:
		if t in tlist:
			size.append(2)
			label.append(t.decode('latin-1'))
		else:
			size.append(0)
			label.append('')
	return size, array(label)

X = array(hist_feature)
Y = tsne(X, 2, 50, 30.0);

def save_embed_plot((tag_sizes,labels),fname):
	fig = Plot.figure()
	Plot.scatter(Y[:,0], Y[:,1], 0);
	for label, x, y, s in zip(labels, Y[:,0], Y[:,1], tag_sizes):
		Plot.annotate(label, xy = (x, y), xytext = (0, 0), textcoords = 'offset points', size=s)
	Plot.axis('off')
	fig.savefig(fname, dpi=800, bbox_inches='tight')
	
if __name__ == "__main__":
	print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
	# print "Running example on 2,500 MNIST digits..."
	# X = Math.loadtxt("mnist2500_X.txt");
	# labels = Math.loadtxt("mnist2500_labels.txt");
	save_embed_plot(get_tag_size_label(most_freq),'embed_tag_mostfreq.png')