def main(): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary=True) for t in train] train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary=True) for t in test] test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train) train = train[:1000] test = shuffle(test) test = test[:500] V = len(word2idx) print "vocab size:", V D = 80 K = 5 model = RecursiveNN(V, D, K) model.fit(train, reg=0, activation=T.nnet.relu) print "train accuracy:", model.score(train) print "test accuracy:", model.score(test)
def main(is_binary=True): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary) for t in train] if is_binary: train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary) for t in test] if is_binary: test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train) train = train[:5000] # n_pos = sum(t[3][-1] for t in train) # print "n_pos train:", n_pos test = shuffle(test) test = test[:1000] # n_pos = sum(t[3][-1] for t in test) # print "n_pos test:", n_pos V = len(word2idx) print("vocab size:", V) D = 20 K = 2 if is_binary else 5 model = RecursiveNN(V, D, K) model.fit(train) print("train accuracy:", model.score(train)) print("test accuracy:", model.score(test)) print("train f1:", model.f1_score(train)) print("test f1:", model.f1_score(test))
def main(is_binary=True): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary) for t in train] if is_binary: train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary) for t in test] if is_binary: test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train) train = train[:5000] # n_pos = sum(t[3][-1] for t in train) # print "n_pos train:", n_pos test = shuffle(test) test = test[:1000] # n_pos = sum(t[3][-1] for t in test) # print "n_pos test:", n_pos V = len(word2idx) print "vocab size:", V D = 20 K = 2 if is_binary else 5 model = RecursiveNN(V, D, K) model.fit(train) print "train accuracy:", model.score(train) print "test accuracy:", model.score(test) print "train f1:", model.f1_score(train) print "test f1:", model.f1_score(test)
def main(): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, True) for t in train] train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, True) for t in test] test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train) train = train[:1000] # n_pos = sum(t[3][-1] for t in train) # print "n_pos train:", n_pos test = shuffle(test) test = test[:100] # n_pos = sum(t[3][-1] for t in test) # print "n_pos test:", n_pos V = len(word2idx) print "vocab size:", V D = 80 K = 5 model = RecursiveNN(V, D, K) model.fit(train, epochs=3, activation=T.nnet.relu) print "train accuracy:", model.score(train) print "test accuracy:", model.score(test) print "train f1:", model.f1_score(train) print "test f1:", model.f1_score(test)
def main(is_binary=True): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary) for t in train] if is_binary: train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary) for t in test] if is_binary: test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels # check imbalance # pos = 0 # neg = 0 # mid = 0 # label_counts = np.zeros(5) # for t in train + test: # words, left_child, right_child, labels = t # # for l in labels: # # if l == 0: # # neg += 1 # # elif l == 1: # # pos += 1 # # else: # # mid += 1 # for l in labels: # label_counts[l] += 1 # # print("pos / total:", float(pos) / (pos + neg + mid)) # # print("mid / total:", float(mid) / (pos + neg + mid)) # # print("neg / total:", float(neg) / (pos + neg + mid)) # print("label proportions:", label_counts / label_counts.sum()) # exit() train = shuffle(train) # train = train[:5000] # n_pos = sum(t[3][-1] for t in train) # print("n_pos train:", n_pos) test = shuffle(test) smalltest = test[:1000] # n_pos = sum(t[3][-1] for t in test) # print("n_pos test:", n_pos) V = len(word2idx) print("vocab size:", V) D = 20 K = 2 if is_binary else 5 model = RecursiveNN(V, D, K) model.fit(train, smalltest, epochs=20, train_inner_nodes=True) print("train accuracy:", model.score(train)) print("test accuracy:", model.score(test)) print("train f1:", model.f1_score(train)) print("test f1:", model.f1_score(test))
def main(): train, test, word2idx = get_ptb_data() train = train[:100] test = test[:100] V = len(word2idx) D = 80 K = 5 model = TNN(V, D, K, tf.nn.relu) model.fit(train)
def main(is_binary=True): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary) for t in train] if is_binary: train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels # sanity check # check that last node has no parent # for t in train: # assert(t[1][-1] == -1 and t[2][-1] == -1) for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary) for t in test] if is_binary: test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train) # train = train[:2000] n_pos = sum(t[3][-1] for t in train) # print("num pos train:", n_pos) # idx2word = {v:k for k, v in word2idx.items()} # for i in range(4): # words, _, _, labels = train[i] # print_sentence(words, idx2word) # print("label:", labels[-1]) test = shuffle(test) test = test[:1000] V = len(word2idx) print("vocab size:", V) D = 10 K = 2 if is_binary else 5 model = RecursiveNN(V, D, K) model.fit(train, learning_rate=1e-2, reg=1e-2, mu=0, epochs=20, activation=T.tanh, train_inner_nodes=False) print("train accuracy:", model.score(train)) print("test accuracy:", model.score(test)) # make sure program doesn't end until we close the plot plt.show()
def main(): train, test, word2idx = get_ptb_data() train = train[:100] test = test[:100] V = len(word2idx) D = 80 K = 5 model = TNN(V, D, K, tf.nn.relu) model.fit(train) print "train accuracy:", model.score(None) print "test accuracy:", model.score(test)
def main(): train, test, word2idx = get_ptb_data() train = train[:5] # print (word2idx) exit() test = test[:5] V = len(word2idx) D = 20 K = 5 model = TNN(V, D, K, tf.nn.relu) model.fit(train)
def main(is_binary=True): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary) for t in train] if is_binary: train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary) for t in test] if is_binary: test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train)
def main(is_binary=True): train, test, word2idx = get_ptb_data() for t in train: add_idx_to_tree(t, 0) train = [tree2list(t, -1, is_binary) for t in train] if is_binary: train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels # sanity check # check that last node has no parent # for t in train: # assert(t[1][-1] == -1 and t[2][-1] == -1) for t in test: add_idx_to_tree(t, 0) test = [tree2list(t, -1, is_binary) for t in test] if is_binary: test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels train = shuffle(train)
wordSequence.append(reverseLookUp[element]) else: wordSequence.append('.') return wordSequence datasetPath = "trees/treeSentences.txt" lookUp, reverseLookUp = getLookUps(datasetPath) sentences = getSentences(datasetPath, lookUp) #print(lookUp) #print(reverseLookUp) #print(sentences) print("hello") vocabsize = len(lookUp) train, test, word2idx = get_ptb_data() # vocabsize = len(word2idx) ans = [] print(word_yield(train[0])) exit() #print("vocab size: "+str(len(lookUp))+" number of sentences: "+str(len(sentences))) # word vector dimensions dime = 200 # word embedding # all variables will get changed during back propogation embeddings = tf.Variable(tf.random_uniform([vocabsize, dime], -1.0, 1.0),