def buid_sentence_expression(): sentence_tree = td.InputTransform(lambda sentence_json: WNJsonDecoder(sentence_json)) tree_rnn = td.ForwardDeclaration(td.PyObjectType()) leaf_case = td.GetItem('word_vec', name='leaf_in') >> td.Vector(embedding_size) index_case = td.Record({'children': td.Map(tree_rnn()) >> td.Mean(), 'word_vec': td.Vector(embedding_size)}, name='index_in') >> td.Concat(name='concat_root_child') >> td.FC(embedding_size, name='FC_root_child') expr_sentence = td.OneOf(td.GetItem('leaf'), {True: leaf_case, False: index_case}, name='recur_in') tree_rnn.resolve_to(expr_sentence) return sentence_tree >> expr_sentence
def tree_sum_blk(loss_blk): # traverse the tree to sum up the loss tree_sum_fwd = td.ForwardDeclaration(td.PyObjectType(), td.TensorType([])) tree_sum = td.Composition() with tree_sum.scope(): myloss = loss_blk().reads(tree_sum.input) children = td.GetItem('children').reads(tree_sum.input) mapped = td.Map(tree_sum_fwd()).reads(children) summed = td.Reduce(td.Function(tf.add)).reads(mapped) summed = td.Function(tf.add).reads(summed, myloss) tree_sum.output.reads(summed) tree_sum_fwd.resolve_to(tree_sum) return tree_sum
def __init__(self, weight_matrix, word_idx, ModelConfig): self.ModelConfig = ModelConfig self.word_embedding = td.Embedding(*weight_matrix.shape, initializer=weight_matrix, name='word_embedding') self.word_idx = word_idx self.keep_prob_ph = tf.placeholder_with_default(1.0, []) self.tree_lstm = td.ScopedLayer(tf.contrib.rnn.DropoutWrapper( BinaryTreeLSTMCell(self.ModelConfig.lstm_num_units, keep_prob=self.keep_prob_ph), input_keep_prob=self.keep_prob_ph, output_keep_prob=self.keep_prob_ph), name_or_scope='tree_lstm') self.output_layer = td.FC(self.ModelConfig.num_classes, activation=None, name='output_layer') self.embed_subtree = td.ForwardDeclaration(name='embed_subtree') self.model = self.embed_tree(is_root=True) self.embed_subtree.resolve_to(self.embed_tree(is_root=False)) self.compiler = td.Compiler.create(self.model) print('input type: %s' % self.model.input_type) print('output type: %s' % self.model.output_type) self.metrics = { k: tf.reduce_mean(v) for k, v in self.compiler.metric_tensors.items() } self.loss = tf.reduce_sum(self.compiler.metric_tensors['all_loss']) opt = tf.train.AdagradOptimizer(ModelConfig.learning_rate) grads_and_vars = opt.compute_gradients(self.loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var == self.word_embedding.weights: found += 1 grad = tf.scalar_mul(ModelConfig.embedding_learning_rate, grad) grads_and_vars[i] = (grad, var) assert found == 1 # internal consistency check self.train_op = opt.apply_gradients(grads_and_vars) self.saver = tf.train.Saver() self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def buid_sentence_expression(): sentence_tree = td.InputTransform( lambda sentence_json: WordNode(sentence_json)) tree_rnn = td.ForwardDeclaration(td.PyObjectType()) leaf_case = td.GetItem( 'word_id', name='leaf_in') >> td.Scalar(dtype=tf.int32) >> embedding index_case = td.Record({'left': tree_rnn(), 'right': tree_rnn()}) \ >> td.Concat(name='concat_root_child') \ >> fc expr_sentence = td.OneOf(td.GetItem('leaf'), { True: leaf_case, False: index_case }, name='recur_in') tree_rnn.resolve_to(expr_sentence) return sentence_tree >> expr_sentence
def __init__(self, weights, vocab, tree_lstm_num_units, tree_binarizer=None): if not tree_binarizer: tree_binarizer = TreeBinarizer(vocab, dict()) self.tree_binarizer = tree_binarizer self.tree_lstm_keep_prob_ph = tf.placeholder_with_default(1.0, []) self.tree_lstm_cell = td.ScopedLayer( tf.contrib.rnn.DropoutWrapper( BinaryTreeLSTMCell(tree_lstm_num_units, self.tree_lstm_keep_prob_ph), self.tree_lstm_keep_prob_ph, self.tree_lstm_keep_prob_ph), name_or_scope=self._tree_lstm_cell_default_scope_name) self.word_embedding = td.Embedding( *weights.shape, initializer=weights, name=self._word_embedding_default_scope_name) self.embed_subtree = td.ForwardDeclaration(name='embed_subtree') self.vocab = vocab
def dynamic_pooling_blk(): """Input: root node dic Output: pooled, TensorType([hyper.conv_dim, ]) """ leaf_case = feature_detector_blk() pool_fwd = td.ForwardDeclaration(td.PyObjectType(), td.TensorType([ hyper.conv_dim, ])) pool = td.Composition() with pool.scope(): cur_fea = feature_detector_blk().reads(pool.input) children = td.GetItem('children').reads(pool.input) mapped = td.Map(pool_fwd()).reads(children) summed = td.Reduce(td.Function(tf.maximum)).reads(mapped) summed = td.Function(tf.maximum).reads(summed, cur_fea) pool.output.reads(summed) pool = td.OneOf(lambda x: x['clen'] == 0, {True: leaf_case, False: pool}) pool_fwd.resolve_to(pool) return pool
def __init__(self, image_feat_grid, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_qpn, qpn_dropout, reduce_visfeat_dim=False, new_visfeat_dim=256, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN self.reduce_visfeat_dim = reduce_visfeat_dim if reduce_visfeat_dim: # use an extrac linear 1x1 conv layer (without ReLU) # to reduce the feature dimension with tf.variable_scope('reduce_visfeat_dim'): image_feat_grid = conv('conv_reduce_visfeat_dim', image_feat_grid, kernel_size=1, stride=1, output_dim=new_visfeat_dim) print('visual feature dimension reduced to %d' % new_visfeat_dim) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokensa with tf.variable_scope('layout_generation'): att_seq2seq = AttentionSeq2Seq(text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, None, num_choices) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Scene case_scene = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_scene = case_scene >> td.Function(modules.SceneModule) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_find = case_find >> td.Function(modules.FindModule) # _Filter case_filter = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_filter = case_filter >> td.Function(modules.FilterModule) # _FindSameProperty case_find_same_property = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_find_same_property = case_find_same_property >> \ td.Function(modules.FindSamePropertyModule) # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> td.Function(modules.TransformModule) # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> td.Function(modules.AndModule) # _Or case_or = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_or = case_or >> td.Function(modules.OrModule) # _Exist case_exist = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_exist = case_exist >> td.Function(modules.ExistModule) # _Count case_count = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_count = case_count >> td.Function(modules.CountModule) # _EqualNum case_equal_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_equal_num = case_equal_num >> td.Function(modules.EqualNumModule) # _MoreNum case_more_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_more_num = case_more_num >> td.Function(modules.MoreNumModule) # _LessNum case_less_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_less_num = case_less_num >> td.Function(modules.LessNumModule) # _SameProperty case_same_property = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_same_property = case_same_property >> \ td.Function(modules.SamePropertyModule) # _Describe case_describe = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_describe = case_describe >> \ td.Function(modules.DescribeModule) recursion_cases = td.OneOf(td.GetItem('module'), { '_Scene': case_scene, '_Find': case_find, '_Filter': case_filter, '_FindSameProperty': case_find_same_property, '_Transform': case_transform, '_And': case_and, '_Or': case_or}) att_expr_decl.resolve_to(recursion_cases) # For invalid expressions, define a dummy answer # so that all answers have the same form dummy_scores = td.Void() >> td.FromTensor(np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Exist': case_exist, '_Count': case_count, '_EqualNum': case_equal_num, '_MoreNum': case_more_num, '_LessNum': case_less_num, '_SameProperty': case_same_property, '_Describe': case_describe, INVALID_EXPR: dummy_scores}) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores_nmn = self.compiler.output_tensors[0] # Add a question prior network if specified self.use_qpn = use_qpn self.qpn_dropout = qpn_dropout if use_qpn: self.scores_qpn = question_prior_net(att_seq2seq.encoder_states, num_choices, qpn_dropout) self.scores = self.scores_nmn + self.scores_qpn else: self.scores = self.scores_nmn # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights'))] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
keep_prob_ph = tf.placeholder_with_default(1.0, []) lstm_num_units = 256 # Tai et al. used 150 tree_lstm = td.ScopedLayer(binaryTreeLSTMCell(lstm_num_units, keep_prob=keep_prob_ph), name_or_scope='tree_lstm') NUM_CLASSES = 6 # number of distinct labels output_layer = td.FC(NUM_CLASSES, activation=None, name='output_layer') word_embedding = td.Embedding(*weight_matrix.shape, initializer=weight_matrix, name='word_embedding', trainable=False) # declare recursive model embed_subtree = td.ForwardDeclaration(name='embed_subtree') def makeContextMat(input1): input1 = int(input1) if input1 == 0: # if input1 < 2: return [1 for i in range(10)] else: return [0 for i in range(10)] def makeDepthMat(input2): input1 = int(input2) return [1 if i < input1 else 0 for i in range(20)]
def run(write_to, batch_size_setting): startTime = time.time() data_dir = "../senti/" """ def download_and_unzip(url_base, zip_name, *file_names): zip_path = os.path.join(data_dir, zip_name) url = url_base + zip_name print('downloading %s to %s' % (url, zip_path)) urllib.request.urlretrieve(url, zip_path) out_paths = [] with zipfile.ZipFile(zip_path, 'r') as f: for file_name in file_names: print('extracting %s' % file_name) out_paths.append(f.extract(file_name, path=data_dir)) return out_paths def download(url_base, zip_name): zip_path = os.path.join(data_dir, zip_name) url = url_base + zip_name print('downloading %s to %s' % (url, zip_path)) urllib.request.urlretrieve(url, zip_path) full_glove_path, = download_and_unzip( 'http://nlp.stanford.edu/data/', 'glove.840B.300d.zip', 'glove.840B.300d.txt') train_path, dev_path, test_path = download_and_unzip( 'http://nlp.stanford.edu/sentiment/', 'trainDevTestTrees_PTB.zip', 'trees/train.txt', 'trees/dev.txt', 'trees/test.txt') filtered_glove_path = os.path.join(data_dir, 'filtered_glove.txt') def filter_glove(): vocab = set() # Download the full set of unlabeled sentences separated by '|'. sentence_path, = download_and_unzip( 'http://nlp.stanford.edu/~socherr/', 'stanfordSentimentTreebank.zip', 'stanfordSentimentTreebank/SOStr.txt') with codecs.open(sentence_path, encoding='utf-8') as f: for line in f: # Drop the trailing newline and strip backslashes. Split into words. vocab.update(line.strip().replace('\\', '').split('|')) nread = 0 nwrote = 0 with codecs.open(full_glove_path, encoding='utf-8') as f: with codecs.open(filtered_glove_path, 'w', encoding='utf-8') as out: for line in f: nread += 1 line = line.strip() if not line: continue if line.split(u' ', 1)[0] in vocab: out.write(line + '\n') nwrote += 1 print('read %s lines, wrote %s' % (nread, nwrote)) #filter_glove() """ dev_glove_path = os.path.join('./', 'small_glove.txt') def load_embeddings(embedding_path): """Loads embedings, returns weight matrix and dict from words to indices.""" print('loading word embeddings from %s' % embedding_path) weight_vectors = [] word_idx = {} with codecs.open(embedding_path, encoding='utf-8') as f: for line in f: word, vec = line.split(u' ', 1) word_idx[word] = len(weight_vectors) weight_vectors.append(np.array(vec.split(), dtype=np.float32)) # Annoying implementation detail; '(' and ')' are replaced by '-LRB-' and # '-RRB-' respectively in the parse-trees. #word_idx[u'-LRB-'] = word_idx.pop(u'(') #word_idx[u'-RRB-'] = word_idx.pop(u')') # Random embedding vector for unknown words. weight_vectors.append( np.random.uniform(-0.05, 0.05, weight_vectors[0].shape).astype(np.float32)) return np.stack(weight_vectors), word_idx weight_matrix, word_idx = load_embeddings(dev_glove_path) def load_trees(filename): with codecs.open(filename, encoding='utf-8') as f: # Drop the trailing newline and strip \s. trees = [line.strip().replace('\\', '') for line in f] print('loaded %s trees from %s' % (len(trees), filename)) return trees #train_path = './senti/trees/train.txt' #train_path = os.path.join(data_dir, 'trees/dev.txt') train_path = './dev.txt' #dev_path = './senti/trees/dev.txt' #test_path = './senti/trees/test.txt' train_trees = load_trees(train_path) trainSIZE = len(train_trees) #dev_trees = load_trees(dev_path) #test_trees = load_trees(test_path) class BinaryTreeLSTMCell(tf.contrib.rnn.BasicLSTMCell): """LSTM with two state inputs. This is the model described in section 3.2 of 'Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks' <http://arxiv.org/pdf/1503.00075.pdf>, with recurrent dropout as described in 'Recurrent Dropout without Memory Loss' <http://arxiv.org/pdf/1603.05118.pdf>. """ def __init__(self, num_units, keep_prob=1.0): """Initialize the cell. Args: num_units: int, The number of units in the LSTM cell. keep_prob: Keep probability for recurrent dropout. """ super(BinaryTreeLSTMCell, self).__init__(num_units) self._keep_prob = keep_prob def __call__(self, inputs, state, scope=None): with tf.variable_scope(scope or type(self).__name__): lhs, rhs = state c0, h0 = lhs c1, h1 = rhs concat = tf.contrib.layers.linear( tf.concat([inputs, h0, h1], 1), 5 * self._num_units) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f0, f1, o = tf.split(value=concat, num_or_size_splits=5, axis=1) j = self._activation(j) if not isinstance(self._keep_prob, float) or self._keep_prob < 1: j = tf.nn.dropout(j, self._keep_prob) new_c = (c0 * tf.sigmoid(f0 + self._forget_bias) + c1 * tf.sigmoid(f1 + self._forget_bias) + tf.sigmoid(i) * j) new_h = self._activation(new_c) * tf.sigmoid(o) new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) return new_h, new_state keep_prob_ph = tf.placeholder_with_default(1.0, []) lstm_num_units = 150 # Tai et al. used 150, but our regularization strategy is more effective tree_lstm = td.ScopedLayer(tf.contrib.rnn.DropoutWrapper( BinaryTreeLSTMCell(lstm_num_units, keep_prob=keep_prob_ph), input_keep_prob=keep_prob_ph, output_keep_prob=keep_prob_ph), name_or_scope='tree_lstm') NUM_CLASSES = 5 # number of distinct sentiment labels output_layer = td.FC(NUM_CLASSES, activation=None, name='output_layer') word_embedding = td.Embedding(*weight_matrix.shape, initializer=weight_matrix, name='word_embedding', trainable=False) embed_subtree = td.ForwardDeclaration(name='embed_subtree') def logits_and_state(): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(word_idx) lookup_word = lambda word: word_idx.get(word, unknown_idx) word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> word_embedding) pair2vec = (embed_subtree(), embed_subtree()) # Trees are binary, so the tree layer takes two states as its input_state. zero_state = td.Zeros((tree_lstm.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(word_embedding.output_type.shape[0]) word_case = td.AllOf(word2vec, zero_state) pair_case = td.AllOf(zero_inp, pair2vec) tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)]) return tree2vec >> tree_lstm >> (output_layer, td.Identity()) def tf_node_loss(logits, labels): return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels) def tf_fine_grained_hits(logits, labels): predictions = tf.cast(tf.argmax(logits, 1), tf.int32) return tf.cast(tf.equal(predictions, labels), tf.float64) def tf_binary_hits(logits, labels): softmax = tf.nn.softmax(logits) binary_predictions = (softmax[:, 3] + softmax[:, 4]) > (softmax[:, 0] + softmax[:, 1]) binary_labels = labels > 2 return tf.cast(tf.equal(binary_predictions, binary_labels), tf.float64) def add_metrics(is_root, is_neutral): """A block that adds metrics for loss and hits; output is the LSTM state.""" c = td.Composition(name='predict(is_root=%s, is_neutral=%s)' % (is_root, is_neutral)) with c.scope(): # destructure the input; (labels, (logits, state)) labels = c.input[0] logits = td.GetItem(0).reads(c.input[1]) state = td.GetItem(1).reads(c.input[1]) # calculate loss loss = td.Function(tf_node_loss) td.Metric('all_loss').reads(loss.reads(logits, labels)) if is_root: td.Metric('root_loss').reads(loss) # calculate fine-grained hits hits = td.Function(tf_fine_grained_hits) td.Metric('all_hits').reads(hits.reads(logits, labels)) if is_root: td.Metric('root_hits').reads(hits) # calculate binary hits, if the label is not neutral if not is_neutral: binary_hits = td.Function(tf_binary_hits).reads(logits, labels) td.Metric('all_binary_hits').reads(binary_hits) if is_root: td.Metric('root_binary_hits').reads(binary_hits) # output the state, which will be read by our by parent's LSTM cell c.output.reads(state) return c def tokenize(s): label, phrase = s[1:-1].split(None, 1) return label, sexpr.sexpr_tokenize(phrase) def embed_tree(logits_and_state, is_root): """Creates a block that embeds trees; output is tree LSTM state.""" return td.InputTransform(tokenize) >> td.OneOf( key_fn=lambda pair: pair[0] == '2', # label 2 means neutral case_blocks=(add_metrics(is_root, is_neutral=False), add_metrics(is_root, is_neutral=True)), pre_block=(td.Scalar('int32'), logits_and_state)) model = embed_tree(logits_and_state(), is_root=True) embed_subtree.resolve_to(embed_tree(logits_and_state(), is_root=False)) compiler = td.Compiler.create(model) print('input type: %s' % model.input_type) print('output type: %s' % model.output_type) metrics = { k: tf.reduce_mean(v) for k, v in compiler.metric_tensors.items() } LEARNING_RATE = 0.05 KEEP_PROB = 1.0 BATCH_SIZE = batch_size_setting #20 EPOCHS = 6 EMBEDDING_LEARNING_RATE_FACTOR = 0 train_feed_dict = {keep_prob_ph: KEEP_PROB} loss = tf.reduce_sum(compiler.metric_tensors['all_loss']) opt = tf.train.AdagradOptimizer(LEARNING_RATE) grads_and_vars = opt.compute_gradients(loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var == word_embedding.weights: found += 1 grad = tf.scalar_mul(EMBEDDING_LEARNING_RATE_FACTOR, grad) grads_and_vars[i] = (grad, var) #assert found == 1 # internal consistency check train = opt.apply_gradients(grads_and_vars) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) def train_step(batch): train_feed_dict[compiler.loom_input_tensor] = batch _, batch_loss = sess.run([train, loss], train_feed_dict) return batch_loss def train_epoch(train_set): return sum( train_step(batch) for batch in td.group_by_batches(train_set, BATCH_SIZE)) train_set = compiler.build_loom_inputs(train_trees) """ dev_feed_dict = compiler.build_feed_dict(dev_trees) def dev_eval(epoch, train_loss): dev_metrics = sess.run(metrics, dev_feed_dict) dev_loss = dev_metrics['all_loss'] dev_accuracy = ['%s: %.2f' % (k, v * 100) for k, v in sorted(dev_metrics.items()) if k.endswith('hits')] print('epoch:%4d, train_loss: %.3e, dev_loss_avg: %.3e, dev_accuracy:\n [%s]' % (epoch, train_loss, dev_loss, ' '.join(dev_accuracy))) return dev_metrics['root_hits'] """ best_accuracy = 0.0 save_path = os.path.join(data_dir, 'sentiment_model') loopTime = time.time() #print('prepare time %s ' % (loopTime - startTime)) loss_save = [] time_save = [] epoch_start_time = loopTime for epoch, shuffled in enumerate(td.epochs(train_set, EPOCHS), 1): train_loss = train_epoch(shuffled) av_loss = train_loss / trainSIZE epoch_end_time = time.time() epoch_time = epoch_end_time - epoch_start_time time_save.append(epoch_time) epoch_start_time = epoch_end_time print('train loss is %s at time %s' % (av_loss, epoch_time)) loss_save.append(av_loss) #accuracy = dev_eval(epoch, train_loss) #if accuracy > best_accuracy: # best_accuracy = accuracy # checkpoint_path = saver.save(sess, save_path, global_step=epoch) # print('model saved in file: %s' % checkpoint_path) loopEndTime = time.time() #print('loop time %s ' % (loopEndTime - loopTime)) prepareTime = loopTime - startTime loopTime = loopEndTime - loopTime timePerEpoch = loopTime / EPOCHS # use median time instead time_save.sort() median_time = time_save[int(EPOCHS / 2)] with open(write_to, "w") as f: f.write("unit: " + "1 epoch\n") for loss in loss_save: f.write(str(loss) + "\n") f.write("run time: " + str(prepareTime) + " " + str(median_time) + "\n")
def __init__(self, config, kb, text_seq_batch, seq_length_batch, num_vocab_txt, num_vocab_nmn, EOS_idx, num_choices, decoder_sampling, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 1: Seq2seq RNN to generate module layout tokens embedding_mat = tf.get_variable( 'embedding_mat', [num_vocab_txt, config.embed_dim_txt], initializer=tf.contrib.layers.xavier_initializer()) with tf.variable_scope('layout_generation'): att_seq2seq = netgen_att.AttentionSeq2Seq( config, text_seq_batch, seq_length_batch, num_vocab_txt, num_vocab_nmn, EOS_idx, decoder_sampling, embedding_mat, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(config, kb, word_vecs, num_choices, embedding_mat) self.modules = modules # Recursion of modules att_shape = [len(kb)] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _key_find case_key_find = td.Record([ ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_key_find = case_key_find >> td.ScopedLayer( modules.KeyFindModule, name_or_scope='KeyFindModule') # _key_filter case_key_filter = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) case_key_filter = case_key_filter >> td.ScopedLayer( modules.KeyFilterModule, name_or_scope='KeyFilterModule') recursion_cases = td.OneOf(td.GetItem('module'), { '_key_find': case_key_find, '_key_filter': case_key_filter }) att_expr_decl.resolve_to(recursion_cases) # _val_desc: output scores for choice (for valid expressions) predicted_scores = td.Record([('input_0', recursion_cases), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) predicted_scores = predicted_scores >> td.ScopedLayer( modules.ValDescribeModule, name_or_scope='ValDescribeModule') # For invalid expressions, define a dummy answer # so that all answers have the same form INVALID = assembler.INVALID_EXPR dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_val_desc': predicted_scores, INVALID: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores = self.compiler.output_tensors[0] # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, image_data_batch, image_mean, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_qpn, qpn_dropout, reduce_visfeat_dim=False, new_visfeat_dim=128, use_gt_layout=None, gt_layout_batch=None, map_dim=1024, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN with tf.variable_scope('image_feature_cnn'): image_data_batch = image_data_batch / 255.0 - image_mean image_feat_grid = nlvr_convnet(image_data_batch) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokensa with tf.variable_scope('layout_generation'): att_seq2seq = AttentionSeq2Seq( text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, None, num_choices, map_dim) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_find = case_find >> td.Function(modules.FindModule) # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> td.Function( modules.TransformModule) # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> td.Function(modules.AndModule) # _Describe case_describe = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_describe = case_describe >> \ td.Function(modules.DescribeModule) recursion_cases = td.OneOf( td.GetItem('module'), { '_Find': case_find, '_Transform': case_transform, '_And': case_and }) att_expr_decl.resolve_to(recursion_cases) # For invalid expressions, define a dummy answer # so that all answers have the same form dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Describe': case_describe, INVALID_EXPR: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores_nmn = self.compiler.output_tensors[0] # Add a question prior network if specified self.use_qpn = use_qpn self.qpn_dropout = qpn_dropout if use_qpn: self.scores_qpn = question_prior_net( att_seq2seq.encoder_states, num_choices, qpn_dropout) self.scores = self.scores_nmn + self.scores_qpn #self.scores = self.scores_nmn else: self.scores = self.scores_nmn # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) #tf.check_numerics(self.entropy_reg, 'entropy NaN/Inf ') #print(self.entropy_reg.eval()) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, image_batch, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, EOS_idx, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN with tf.variable_scope('image_feature_cnn'): image_feat_grid = shapes_convnet(image_batch) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokens with tf.variable_scope('layout_generation'): att_seq2seq = nmn3_netgen_att.AttentionSeq2Seq( text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, EOS_idx, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, num_choices) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_find = case_find >> \ td.ScopedLayer(modules.FindModule, name_or_scope='FindModule') # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> \ td.ScopedLayer(modules.TransformModule, name_or_scope='TransformModule') # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> \ td.ScopedLayer(modules.AndModule, name_or_scope='AndModule') recursion_cases = td.OneOf( td.GetItem('module'), { '_Find': case_find, '_Transform': case_transform, '_And': case_and }) att_expr_decl.resolve_to(recursion_cases) # _Answer: output scores for choice (for valid expressions) predicted_scores = td.Record([('input_0', recursion_cases), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) predicted_scores = predicted_scores >> \ td.ScopedLayer(modules.AnswerModule, name_or_scope='AnswerModule') # For invalid expressions, define a dummy answer # so that all answers have the same form INVALID = nmn3_assembler.INVALID_EXPR dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Answer': predicted_scores, INVALID: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores = self.compiler.output_tensors[0] # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])