def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data( 300, 70, 100) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) self.w2v_vocab, w2v_embd, embedding_dict = self.load_w2v() self.embedding_dim = len(w2v_embd[0]) self.w2v_vocab_size = len(self.w2v_vocab) self.vocab_size = len(self.vocab) embeddings_tmp = [] for i in range(self.vocab_size): item = self.vocab.decode(i) if item in self.w2v_vocab: embeddings_tmp.append(embedding_dict[item]) # print("Found word {}".format(item)) else: # print("Couldn't find {}.".format(item)) rand_num = np.random.uniform(low=-0.2, high=0.2, size=self.embedding_dim) embeddings_tmp.append(rand_num) self.embed = np.asarray(embeddings_tmp)
def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents)))
def load_data(self, LOAD_DATA=False): """Loads train/dev/test data and builds vocabulary.""" if LOAD_DATA: self.vocab = Vocab( ) # only initialize the Vocab class because of the embedding matrix else: self.train_data, self.dev_data, self.test_data = tr.simplified_data( 600, 40) #self.train_data, self.dev_data , self.test_data = tr.simplified_data(2000, 500) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct( list(itertools.chain.from_iterable(train_sents)))
def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data( 700, 100, 200) # build vocab from training data self.vocab = Vocab() # train_sents = [t.get_words() for t in self.train_data] # self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) all_sents = [t.get_words() for t in self.train_data] + [ t.get_words() for t in self.dev_data ] + [t.get_words() for t in self.test_data] self.vocab.construct(list(itertools.chain.from_iterable(all_sents))) for k in self.vocab.word_to_index.keys(): print '\t {} : {}'.format(k, self.vocab.word_to_index[k])
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data( 700, 100, 200) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders self.is_leaf_placeholder = tf.placeholder(tf.int32, (None), name='is_leaf_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder(tf.int32, (None), name='labels_placeholder') self.cons_placeholder = tf.placeholder(tf.int32, (None), name='cons') # add model variables # making initialization deterministic for now # initializer = tf.random_normal_initializer(seed=1) with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable( 'embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): W1 = tf.get_variable( 'W1', [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): U = tf.get_variable( 'U', [self.config.embed_size, self.config.label_size]) bs = tf.get_variable('bs', [1, self.config.label_size]) # Build recursive graph def embed_word(word_index, embeddings): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor, W, b): return tf.nn.relu( tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b) def find_loss(node_tensor, i, labels, U, bs): # add projection layer node_logits = tf.matmul(node_tensor, U) + bs loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=node_logits, labels=labels[i:i + 1]) return loss def base_case(node_word_indices, i, embeddings, labels, U, bs): word_index = tf.gather(node_word_indices, i) node_tensor = embed_word(word_index, embeddings) loss = find_loss(node_tensor, i, labels, U, bs) return [node_tensor, loss] def rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): left_node, left_loss = rec(i * 2, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) right_node, right_loss = rec(i * 2 + 1, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) node_tensor = combine_children(left_node, right_node, W, b) node_loss = find_loss(node_tensor, i, labels, U, bs) loss = tf.concat([left_loss, node_loss, right_loss], 0) return [node_tensor, loss] # Function Declaration rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32), ("b", tf.float32), ("labels", tf.int32), ("U", tf.float32), ("bs", tf.float32)], [("ret", tf.float32), ("ret1", tf.float32)]) # Function Definition @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret", "ret1"]) def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): node_tensor, loss = \ tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)), lambda: base_case(node_word_indices, i, embeddings, labels, U, bs), lambda: rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs)) return [node_tensor, loss] RecImpl.add_to_graph(tf.get_default_graph()) self.node_tensor, self.full_loss = rec( self.cons_placeholder, self.is_leaf_placeholder, self.node_word_indices_placeholder, self.embeddings, W1, b1, self.labels_placeholder, U, bs) # add projection layer self.root_logits = tf.matmul(self.node_tensor, U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer self.root_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.root_logits, labels=self.labels_placeholder[1:2])) regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) self.full_loss = regularization_loss + tf.reduce_sum(self.full_loss) # # add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize( self.full_loss)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data( 700, 100, 200) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders self.is_leaf_placeholder = tf.compat.v1.placeholder( tf.bool, (None), name='is_leaf_placeholder') self.left_children_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='left_children_placeholder') self.right_children_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='right_children_placeholder') self.node_word_indices_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.compat.v1.placeholder( tf.int32, (None), name='labels_placeholder') # add model variables with tf.compat.v1.variable_scope('Embeddings'): embeddings = tf.compat.v1.get_variable( 'embeddings', [len(self.vocab), self.config.embed_size]) with tf.compat.v1.variable_scope('Composition'): W1 = tf.compat.v1.get_variable( 'W1', [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.compat.v1.get_variable('b1', [1, self.config.embed_size]) with tf.compat.v1.variable_scope('Projection'): U = tf.compat.v1.get_variable( 'U', [self.config.embed_size, self.config.label_size]) bs = tf.compat.v1.get_variable('bs', [1, self.config.label_size]) # build recursive graph tensor_array = tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False, infer_shape=False) def embed_word(word_index): with tf.device('/cpu:0'): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor): return tf.nn.relu( tf.matmul(tf.concat([left_tensor, right_tensor], 1), W1) + b1) def loop_body(tensor_array, i): node_is_leaf = tf.gather(self.is_leaf_placeholder, i) node_word_index = tf.gather(self.node_word_indices_placeholder, i) left_child = tf.gather(self.left_children_placeholder, i) right_child = tf.gather(self.right_children_placeholder, i) node_tensor = tf.cond( node_is_leaf, lambda: embed_word(node_word_index), lambda: combine_children(tensor_array.read(left_child), tensor_array.read(right_child))) tensor_array = tensor_array.write(i, node_tensor) i = tf.add(i, 1) return tensor_array, i loop_cond = lambda tensor_array, i: \ tf.less(i, tf.squeeze(tf.shape(self.is_leaf_placeholder))) self.tensor_array, _ = tf.while_loop(loop_cond, loop_body, [tensor_array, 0], parallel_iterations=1) # add projection layer self.logits = tf.matmul(self.tensor_array.concat(), U) + bs self.root_logits = tf.matmul( self.tensor_array.read(self.tensor_array.size() - 1), U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) included_indices = tf.where(tf.less(self.labels_placeholder, 2)) self.full_loss = regularization_loss + tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.gather(self.logits, included_indices), labels=tf.gather(self.labels_placeholder, included_indices))) self.root_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.root_logits, labels=self.labels_placeholder[-1:])) # add training op self.train_op = tf.train.GradientDescentOptimizer( self.config.lr).minimize(self.full_loss)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data(700, 100, 200) self.config.max_tree_nodes = tree.get_max_tree_nodes(self.train_data + self.dev_data + self.test_data) print(self.config.max_tree_nodes) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders dim1 = self.config.batch_size dim2 = self.config.max_tree_nodes self.is_leaf_placeholder = tf.placeholder( tf.bool, [dim1, dim2], name='is_leaf_placeholder') self.left_children_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='left_children_placeholder') self.right_children_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='right_children_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='labels_placeholder') self.tree_size_placeholder = tf.placeholder( tf.int32, [dim1], name='tree_size_placeholder') # add model variables # making initialization deterministic for now # initializer = tf.random_normal_initializer(seed=1) with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): self.W1 = tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) self.b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): self.U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) self.bs = tf.get_variable('bs', [1, self.config.label_size]) # Build recursive graph outloss = [] prediction = [] root_loss = [] for idx_batch in range(self.config.batch_size): self.root_prediction, self.full_loss, self.root_loss = self.compute_tree(idx_batch) prediction.append(self.root_prediction) outloss.append(self.full_loss) root_loss.append(self.root_loss) batch_loss = tf.stack(outloss) self.pred = tf.stack(prediction) self.rloss = tf.stack(root_loss) # Compute batch loss self.total_loss = tf.reduce_mean(batch_loss) # Add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(self.total_loss)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data(700, 100, 200) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders self.is_leaf_placeholder = tf.placeholder( tf.int32, (None), name='is_leaf_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, (None), name='labels_placeholder') self.cons_placeholder = tf.placeholder( tf.int32, (None), name='cons') # add model variables # making initialization deterministic for now initializer = tf.random_normal_initializer(seed=1) with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) # , # initializer=initializer) #tf.constant_initializer(2.0)) with tf.variable_scope('Composition'): W1 = tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) b1 = tf.get_variable('b1', [1, self.config.embed_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) with tf.variable_scope('Projection'): U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) bs = tf.get_variable('bs', [1, self.config.label_size]) # , # initializer=initializer) #tf.constant_initializer(0.0)) # Build recursive graph # tensor_array = tf.TensorArray( # tf.float32, # size=0, # dynamic_size=True, # clear_after_read=False, # infer_shape=False) # Build recursive graph def embed_word(word_index, embeddings): # with tf.device('/cpu:0'): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor, W, b): return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b) # Function Declaration rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32),("b", tf.float32)], [("ret", tf.float32)]) # Function Definition @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret"]) def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b): node_word_index = tf.gather(node_word_indices, i) node_tensor = \ tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)), lambda: embed_word(node_word_index, embeddings), lambda: combine_children(rec(i*2, is_leaf, node_word_indices, embeddings, W, b), rec(i*2+1, is_leaf, node_word_indices, embeddings, W, b), W, b)) return node_tensor RecImpl.add_to_graph(tf.get_default_graph()) self.node_tensor = rec(self.cons_placeholder, self.is_leaf_placeholder, self.node_word_indices_placeholder, self.embeddings, W1, b1) # add projection layer # self.logits = tf.matmul(self.tensor_array.concat(), U) + bs # 1x35 * 35x35 + 1x35 -> 1x35 projection self.root_logits = tf.matmul(self.node_tensor, U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer # regularization_loss = self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) # included_indices = tf.where(tf.less(self.labels_placeholder, 2)) # self.full_loss = regularization_loss + tf.reduce_sum( # tf.nn.sparse_softmax_cross_entropy_with_logits( # logits=tf.gather(self.logits, included_indices),labels=tf.gather(self.labels_placeholder, included_indices))) self.root_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.root_logits,labels=self.labels_placeholder[1:2])) # # add training op self.train_op = tf.train.GradientDescentOptimizer(self.config.lr).minimize(self.root_loss)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data, self.real_test = tree.simplified_data(4000, 500, 500) # add input placeholders self.is_leaf_placeholder = tf.placeholder( tf.bool, (None), name='is_leaf_placeholder') self.left_children_placeholder = tf.placeholder( tf.int32, (None), name='left_children_placeholder') self.right_children_placeholder = tf.placeholder( tf.int32, (None), name='right_children_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, (None), name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, (None), name='labels_placeholder') self.vocab = utils.Vocab() data = self.train_data train_sents = [t.get_words() for t in data] vocab_size = self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) ''' def loadGloveModel(gloveFile): print("Loading Glove Model") f = open(gloveFile,'r') words = [] embeddings = [] for line in f: splitLine = line.split() word = splitLine[0] embedding = [float(val) for val in splitLine[1:]] words.append(word) embeddings.append(embedding) print "Done.",len(words)," words loaded!" return words, embeddings self.glove_words, self.embeddings = loadGloveModel("filtered_glove_300.txt") num = 0 embed = np.zeros((vocab_size, self.config.embed_size), dtype='f') for i in range(vocab_size): word = self.vocab.decode(i) if word in self.glove_words: indx = self.glove_words.index(word) num += 1 np.append(embed, self.embeddings[indx]) else: np.append(embed, np.random.uniform(-0.1, 0.1, self.config.embed_size)) print(num) ''' with tf.variable_scope('Embeddings'): embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) #embeddings = tf.get_variable('embeddings', initializer=embed, trainable=True) a = np.zeros((self.config.embed_size, self.config.embed_size), dtype='f') np.fill_diagonal(a, 0.5) b = np.zeros((2*self.config.embed_size, self.config.embed_size), dtype='f') b = np.vstack((a,a)) print(b) with tf.variable_scope('Composition'): W1 = tf.Variable(b, name='W1', dtype=tf.float32) b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) bs = tf.get_variable('bs', [1, self.config.label_size]) # build recursive graph tensor_array = tf.TensorArray( tf.float32, size=0, dynamic_size=True, clear_after_read=False, infer_shape=False) def embed_word(word_index): with tf.device('/cpu:0'): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor): return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1) def loop_body(tensor_array, i): node_is_leaf = tf.gather(self.is_leaf_placeholder, i) node_word_index = tf.gather(self.node_word_indices_placeholder, i) left_child = tf.gather(self.left_children_placeholder, i) right_child = tf.gather(self.right_children_placeholder, i) node_tensor = tf.cond( node_is_leaf, lambda: embed_word(node_word_index), lambda: combine_children(tensor_array.read(left_child), tensor_array.read(right_child))) tensor_array = tensor_array.write(i, node_tensor) i = tf.add(i, 1) return tensor_array, i loop_cond = lambda tensor_array, i: \ tf.less(i, tf.squeeze(tf.shape(self.is_leaf_placeholder))) self.tensor_array, _ = tf.while_loop( loop_cond, loop_body, [tensor_array, 0], parallel_iterations=1) # add projection layer self.logits = tf.matmul(self.tensor_array.concat(), U) + bs self.root_logits = tf.matmul( self.tensor_array.read(self.tensor_array.size() - 1), U) + bs self.root_prediction = tf.squeeze(tf.argmax(self.root_logits, 1)) # add loss layer regularization_loss = self.config.l2 * ( tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)) included_indices = tf.where(tf.less(self.labels_placeholder, 2)) #self.full_loss = regularization_loss + tf.reduce_sum( #tf.nn.sparse_softmax_cross_entropy_with_logits( #tf.gather(self.logits, included_indices), tf.gather( #self.labels_placeholder, included_indices))) self.root_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( self.root_logits, self.labels_placeholder[-1:])) # add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize( self.root_loss)
parser.add_argument("--intermediate", action="store_true", help="train intermediate labels") parser.add_argument("--inter_alpha", type=float, default=0.1, help="adjust the penalty on intermediate labels") parser.add_argument("--corpus", type=str, default='raw', help="acd|raw") parser.add_argument("--mode", type=str, default='lstm', help="rnn|lstm") params, _ = parser.parse_known_args() if __name__ == '__main__': data = params.corpus print(data) assert data == 'acd_trees_128d' or data == 'acd_trees_512d' or data == 'raw' or data == 'acd_trees_512d_rand' train_data, dev_data, test_data = tr.simplified_data(0, 0, 0, data) print(len(train_data), len(dev_data), len(test_data)) print(train_data[0]) vocab = Vocab() train_sents = [t.get_words() for t in train_data] vocab.construct(list(itertools.chain.from_iterable(train_sents))) if params.mode == 'lstm': model = RNN_LSTM_Model(vocab, embed_size=embed_size).cuda() else: model = RNN_Model(vocab, embed_size=embed_size).cuda() loss_history = [] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0)
def forward(self, x): """ Forward function accepts input data and returns a Variable of output data """ self.node_list = [] root_node = self.walk_tree(x.root) all_nodes = torch.cat(self.node_list) #now I need to project out return all_nodes def main(): print("do nothing") if __name__ == '__main__': train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200) vocab = Vocab() train_sents = [t.get_words() for t in train_data] vocab.construct(list(itertools.chain.from_iterable(train_sents))) model = RNN_Model(vocab, embed_size=50) main() lr = 0.01 loss_history = [] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0) # params (iterable): iterable of parameters to optimize or dicts defining # parameter groups # lr (float): learning rate # momentum (float, optional): momentum factor (default: 0) # weight_decay (float, optional): weight decay (L2 penalty) (default: 0) #torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0, weight_decay=0)
def __init__(self, config): self.config = config # Load train data and build vocabulary self.train_data, self.dev_data, self.test_data = tree.simplified_data(700, 100, 200) max_height = tree.get_max_tree_height(self.train_data + self.dev_data + self.test_data) self.config.max_tree_height = pow(2, max_height + 1) print(self.config.max_tree_height) # print("data ",self.train_data)) self.vocab = utils.Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) # add input placeholders dim1 = self.config.batch_size dim2 = self.config.max_tree_height self.is_leaf_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='is_leaf_placeholder') self.node_word_indices_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='node_word_indices_placeholder') self.labels_placeholder = tf.placeholder( tf.int32, [dim1, dim2], name='labels_placeholder') self.cons_placeholder = tf.placeholder( tf.int32, (None), name='cons') # add model variables with tf.variable_scope('Embeddings'): self.embeddings = tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): self.W1 = tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) self.b1 = tf.get_variable('b1', [1, self.config.embed_size]) with tf.variable_scope('Projection'): self.U = tf.get_variable('U', [self.config.embed_size, self.config.label_size]) self.bs = tf.get_variable('bs', [1, self.config.label_size]) # Build recursive graph def embed_word(word_index, embeddings): return tf.expand_dims(tf.gather(embeddings, word_index), 0) def combine_children(left_tensor, right_tensor, W, b): return tf.nn.relu(tf.matmul(tf.concat([left_tensor, right_tensor], 1), W) + b) def find_loss(node_tensor, i, labels, U, bs): # add projection layer node_logits = tf.matmul(node_tensor, U) + bs loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=node_logits, labels=labels[i:i+1]) return loss def base_case(node_word_indices, i, embeddings, labels, U, bs): word_index = tf.gather(node_word_indices, i) node_tensor = embed_word(word_index, embeddings) loss = find_loss(node_tensor, i, labels, U, bs) return [node_tensor, loss] def rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): left_node, left_loss = self.rec(i*2, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) right_node, right_loss = self.rec(i*2+1, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs) node_tensor = combine_children(left_node, right_node, W, b) node_loss = find_loss(node_tensor, i, labels, U, bs) loss = tf.concat([left_loss, node_loss, right_loss], 0) return [node_tensor, loss] # Function Declaration self.rec = function.Declare("Rec", [("i", tf.int32), ("is_leaf", tf.int32), ("node_word_indices", tf.int32), ("embeddings", tf.float32), ("W", tf.float32), ("b", tf.float32), ("labels", tf.int32), ("U", tf.float32), ("bs", tf.float32)], [("ret", tf.float32), ("ret1", tf.float32)]) # Function Definition @function.Defun(tf.int32, tf.int32, tf.int32, tf.float32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, func_name="Rec", grad_func="GradFac", create_grad_func=True, out_names=["ret", "ret1"]) def RecImpl(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs): node_tensor, loss = \ tf.cond(tf.equal(tf.gather(is_leaf, i), tf.constant(1)), lambda: base_case(node_word_indices, i, embeddings, labels, U, bs), lambda: rec_case(i, is_leaf, node_word_indices, embeddings, W, b, labels, U, bs)) return [node_tensor, loss] RecImpl.add_to_graph(tf.get_default_graph()) outloss = [] prediction = [] root_loss = [] for idx_batch in range(self.config.batch_size): self.root_prediction, self.full_loss, self.root_loss = self.compute_tree(idx_batch) prediction.append(self.root_prediction) outloss.append(self.full_loss) root_loss.append(self.root_loss) batch_loss = tf.stack(outloss) self.pred = tf.stack(prediction) self.rloss = tf.stack(root_loss) # Compute batch loss # reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # regpart = tf.add_n(reg_losses) # loss = tf.reduce_mean(batch_loss) # self.total_loss = loss + 0.5*regpart self.total_loss = tf.reduce_mean(batch_loss) # Add training op self.train_op = tf.train.AdamOptimizer(self.config.lr).minimize(self.total_loss)
""" Forward function accepts input data and returns a Variable of output data """ self.node_list = [] root_node = self.walk_tree(x.root) all_nodes = torch.cat(self.node_list) #now I need to project out return all_nodes def main(): print("do nothing") if __name__ == '__main__': train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200) vocab = Vocab() train_sents = [t.get_words() for t in train_data] vocab.construct(list(itertools.chain.from_iterable(train_sents))) model = RNN_Model(vocab, embed_size=50) main() lr = 0.01 loss_history = [] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0) # params (iterable): iterable of parameters to optimize or dicts defining # parameter groups # lr (float): learning rate