コード例 #1
0
def get_training_data(vectors_filename, vocab_filename, vocab_thresh):
    bigrams_vocab = load_bigrams(vocab_filename, vocab_thresh)
    word_vectors = word_vecs.load(vectors_filename)
    n = 300
    N = len(bigrams_vocab)

    # Construct matrix B row-wise
    print 'Constructing matrix B row-wise'
    B = np.zeros(shape=(N, 2 * n))
    for idx, label in enumerate(bigrams_vocab):
        b_1 = word_vectors[label.split('_')[0]]
        b_2 = word_vectors[label.split('_')[1]]
        row = np.concatenate((b_1, b_2))
        B[idx] = row
        # if idx > 5000:
        # 	break
    # B = np.matrix(B)

    # Construct Y Matrix (an individual column from this will make for a y-vector)
    print 'Constructing matrix Y row-wise'
    Y = np.zeros(shape=(N, n))
    for idx, label in enumerate(bigrams_vocab):
        row = word_vectors[label]
        Y[idx] = row
        # if idx > 5000:
        # 	break
    Y = np.matrix(Y)  # turn this bad boy into a matrix

    # Cut off some rows...
    # B = B[0:10]
    # Y = Y[0:10]

    return B, Y
コード例 #2
0
def train(vectors_filename, vocab_filename, vocab_thresh):
    bigrams_vocab = load_bigrams(vocab_filename, vocab_thresh)
    word_vectors = word_vecs.load(vectors_filename)
    n = 50
    N = len(bigrams_vocab)

    # Construct matrix B row-wise
    print 'Constructing matrix B row-wise'
    B = np.zeros(shape=(N, 2 * n))
    for idx, label in enumerate(bigrams_vocab):
        b_1 = word_vectors[label.split('_')[0]]
        b_2 = word_vectors[label.split('_')[1]]
        row = np.concatenate((b_1, b_2))
        B[idx] = row
    B = np.matrix(B)

    print 'Computing pseudoinverse'
    B_inv = np.linalg.pinv(B)

    # Construct Y Matrix (an individual column from this will make for a y-vector)
    print 'Constructing matrix Y row-wise'
    Y = np.zeros(shape=(N, n))
    for idx, label in enumerate(bigrams_vocab):
        row = word_vectors[label]
        Y[idx] = row
    Y = np.matrix(Y)

    # Recover linear approximation matrix A row-wise
    print 'Recovering linear approximation matrix A row-wise'
    global A  # mark A as global -- we are gonna change it (John: how am I supposed to do this with nice class variables?!)
    A = np.zeros(shape=(n, 2 * n))
    for i in range(n):  # 0..n-1
        y = Y[:, i]
        a_i = B_inv * y
        A[i] = np.transpose(a_i)
コード例 #3
0
def train(vectors_filename, vocab_filename, vocab_thresh):
	bigrams_vocab = load_bigrams(vocab_filename, vocab_thresh);
	word_vectors = word_vecs.load(vectors_filename);
	n = 50
	N = len(bigrams_vocab)

	# Construct matrix B row-wise
	print 'Constructing matrix B row-wise'
	B = np.zeros(shape=(N,2*n))
	for idx, label in enumerate(bigrams_vocab):
		b_1 = word_vectors[label.split('_')[0]]
		b_2 = word_vectors[label.split('_')[1]]
		row = np.concatenate((b_1,b_2))
		B[idx] = row
	B = np.matrix(B)

	print 'Computing pseudoinverse'
	B_inv = np.linalg.pinv(B)

	# Construct Y Matrix (an individual column from this will make for a y-vector)
	print 'Constructing matrix Y row-wise'
	Y = np.zeros(shape=(N,n))
	for idx, label in enumerate(bigrams_vocab):
		row = word_vectors[label]
		Y[idx] = row
	Y = np.matrix(Y)

	# Recover linear approximation matrix A row-wise
	print 'Recovering linear approximation matrix A row-wise'
	global A # mark A as global -- we are gonna change it (John: how am I supposed to do this with nice class variables?!)
	A = np.zeros(shape=(n,2*n))
	for i in range(n): # 0..n-1
		y = Y[:,i]
		a_i = B_inv*y
		A[i] = np.transpose(a_i)
コード例 #4
0
def get_training_data(vectors_filename, vocab_filename, vocab_thresh):
	bigrams_vocab = load_bigrams(vocab_filename, vocab_thresh);
	word_vectors = word_vecs.load(vectors_filename);
	n = 300
	N = len(bigrams_vocab)

	# Construct matrix B row-wise
	print 'Constructing matrix B row-wise'
	B = np.zeros(shape=(N,2*n))
	for idx, label in enumerate(bigrams_vocab):
		b_1 = word_vectors[label.split('_')[0]]
		b_2 = word_vectors[label.split('_')[1]]
		row = np.concatenate((b_1,b_2))
		B[idx] = row
		# if idx > 5000:
		# 	break
	# B = np.matrix(B)

	# Construct Y Matrix (an individual column from this will make for a y-vector)
	print 'Constructing matrix Y row-wise'
	Y = np.zeros(shape=(N,n))
	for idx, label in enumerate(bigrams_vocab):
		row = word_vectors[label]
		Y[idx] = row
		# if idx > 5000:
		# 	break
	Y = np.matrix(Y) # turn this bad boy into a matrix

	# Cut off some rows...
	# B = B[0:10]
	# Y = Y[0:10]

	return B, Y
コード例 #5
0
ファイル: pca.py プロジェクト: MyHumbleSelf/cs224n
#!/usr/bin/env python

import numpy as np
import load_word_vecs as word_vecs
from sklearn.decomposition import PCA
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D
import test_bigrams as tb
import matplotlib
import pylab as pl

import train_neural_network as tnn


if __name__ == '__main__':
	vecs = word_vecs.load("../data/vectors.txt")
	labels, wordVecsMatrix = word_vecs.get_matrix(vecs)
	pca = PCA(n_components=2)


	# pca.fit(wordVecsMatrix);
	# reduced_X = pca.transform(wordVecsMatrix)

	# print "Running PCA"
	# pca = PCA(n_components=2)
	# pca.fit(wordVecsMatrix);
	# reduced_X = pca.transform(wordVecsMatrix)
	# fig = pl.figure()
	# ax = fig.add_subplot(111, projection='3d')

	#plot full data
コード例 #6
0
ファイル: run_tsne.py プロジェクト: MyHumbleSelf/cs224n
#!/usr/bin/env python
import numpy as np
import load_word_vecs as word_vecs
import bh_tsne.bhtsne as tsne

if __name__ == '__main__':
	labels, wordVecsMatrix = word_vecs.get_matrix(word_vecs.load("../data/vectors.txt"))

	# #runs tsne on wordVecsMatrix (change if we want to just look at some subset of the bigrams)
	points = tsne.bh_tsne(wordVecsMatrix);
	# np.save("../data/tsne_coordinates", points);
コード例 #7
0
ファイル: evaluate.py プロジェクト: MyHumbleSelf/cs224n
#!/usr/bin/env python

import numpy as np
import load_word_vecs as word_vecs

if __name__ == '__main__':
	vecs = word_vecs.load("../../glove/vectors.txt")
コード例 #8
0
ファイル: pca.py プロジェクト: runngezhang/cs224n
#!/usr/bin/env python

import numpy as np
import load_word_vecs as word_vecs
from sklearn.decomposition import PCA
# import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D
import test_bigrams as tb
import matplotlib
import pylab as pl

import train_neural_network as tnn

if __name__ == '__main__':
    vecs = word_vecs.load("../data/vectors.txt")
    labels, wordVecsMatrix = word_vecs.get_matrix(vecs)
    pca = PCA(n_components=2)

    # pca.fit(wordVecsMatrix);
    # reduced_X = pca.transform(wordVecsMatrix)

    # print "Running PCA"
    # pca = PCA(n_components=2)
    # pca.fit(wordVecsMatrix);
    # reduced_X = pca.transform(wordVecsMatrix)
    # fig = pl.figure()
    # ax = fig.add_subplot(111, projection='3d')

    #plot full data
    # ax.scatter(reduced_X[:, 0], reduced_X[:, 1], reduced_X[:, 2])
    # plt.show()
コード例 #9
0
#!/usr/bin/env python

import numpy as np
import load_word_vecs as word_vecs

if __name__ == '__main__':
    vecs = word_vecs.load("../../glove/vectors.txt")