def buid_sentence_expression(): sentence_tree = td.InputTransform(lambda sentence_json: WNJsonDecoder(sentence_json)) tree_rnn = td.ForwardDeclaration(td.PyObjectType()) leaf_case = td.GetItem('word_vec', name='leaf_in') >> td.Vector(embedding_size) index_case = td.Record({'children': td.Map(tree_rnn()) >> td.Mean(), 'word_vec': td.Vector(embedding_size)}, name='index_in') >> td.Concat(name='concat_root_child') >> td.FC(embedding_size, name='FC_root_child') expr_sentence = td.OneOf(td.GetItem('leaf'), {True: leaf_case, False: index_case}, name='recur_in') tree_rnn.resolve_to(expr_sentence) return sentence_tree >> expr_sentence
def tree_sum_blk(loss_blk): # traverse the tree to sum up the loss tree_sum_fwd = td.ForwardDeclaration(td.PyObjectType(), td.TensorType([])) tree_sum = td.Composition() with tree_sum.scope(): myloss = loss_blk().reads(tree_sum.input) children = td.GetItem('children').reads(tree_sum.input) mapped = td.Map(tree_sum_fwd()).reads(children) summed = td.Reduce(td.Function(tf.add)).reads(mapped) summed = td.Function(tf.add).reads(summed, myloss) tree_sum.output.reads(summed) tree_sum_fwd.resolve_to(tree_sum) return tree_sum
def buid_sentence_expression(): sentence_tree = td.InputTransform( lambda sentence_json: WordNode(sentence_json)) tree_rnn = td.ForwardDeclaration(td.PyObjectType()) leaf_case = td.GetItem( 'word_id', name='leaf_in') >> td.Scalar(dtype=tf.int32) >> embedding index_case = td.Record({'left': tree_rnn(), 'right': tree_rnn()}) \ >> td.Concat(name='concat_root_child') \ >> fc expr_sentence = td.OneOf(td.GetItem('leaf'), { True: leaf_case, False: index_case }, name='recur_in') tree_rnn.resolve_to(expr_sentence) return sentence_tree >> expr_sentence
def dynamic_pooling_blk(): """Input: root node dic Output: pooled, TensorType([hyper.conv_dim, ]) """ leaf_case = feature_detector_blk() pool_fwd = td.ForwardDeclaration(td.PyObjectType(), td.TensorType([ hyper.conv_dim, ])) pool = td.Composition() with pool.scope(): cur_fea = feature_detector_blk().reads(pool.input) children = td.GetItem('children').reads(pool.input) mapped = td.Map(pool_fwd()).reads(children) summed = td.Reduce(td.Function(tf.maximum)).reads(mapped) summed = td.Function(tf.maximum).reads(summed, cur_fea) pool.output.reads(summed) pool = td.OneOf(lambda x: x['clen'] == 0, {True: leaf_case, False: pool}) pool_fwd.resolve_to(pool) return pool
def __init__(self, image_feat_grid, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_qpn, qpn_dropout, reduce_visfeat_dim=False, new_visfeat_dim=256, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN self.reduce_visfeat_dim = reduce_visfeat_dim if reduce_visfeat_dim: # use an extrac linear 1x1 conv layer (without ReLU) # to reduce the feature dimension with tf.variable_scope('reduce_visfeat_dim'): image_feat_grid = conv('conv_reduce_visfeat_dim', image_feat_grid, kernel_size=1, stride=1, output_dim=new_visfeat_dim) print('visual feature dimension reduced to %d' % new_visfeat_dim) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokensa with tf.variable_scope('layout_generation'): att_seq2seq = AttentionSeq2Seq(text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, None, num_choices) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Scene case_scene = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_scene = case_scene >> td.Function(modules.SceneModule) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_find = case_find >> td.Function(modules.FindModule) # _Filter case_filter = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_filter = case_filter >> td.Function(modules.FilterModule) # _FindSameProperty case_find_same_property = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32'))]) case_find_same_property = case_find_same_property >> \ td.Function(modules.FindSamePropertyModule) # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> td.Function(modules.TransformModule) # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> td.Function(modules.AndModule) # _Or case_or = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_or = case_or >> td.Function(modules.OrModule) # _Exist case_exist = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_exist = case_exist >> td.Function(modules.ExistModule) # _Count case_count = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_count = case_count >> td.Function(modules.CountModule) # _EqualNum case_equal_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_equal_num = case_equal_num >> td.Function(modules.EqualNumModule) # _MoreNum case_more_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_more_num = case_more_num >> td.Function(modules.MoreNumModule) # _LessNum case_less_num = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_less_num = case_less_num >> td.Function(modules.LessNumModule) # _SameProperty case_same_property = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_same_property = case_same_property >> \ td.Function(modules.SamePropertyModule) # _Describe case_describe = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_describe = case_describe >> \ td.Function(modules.DescribeModule) recursion_cases = td.OneOf(td.GetItem('module'), { '_Scene': case_scene, '_Find': case_find, '_Filter': case_filter, '_FindSameProperty': case_find_same_property, '_Transform': case_transform, '_And': case_and, '_Or': case_or}) att_expr_decl.resolve_to(recursion_cases) # For invalid expressions, define a dummy answer # so that all answers have the same form dummy_scores = td.Void() >> td.FromTensor(np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Exist': case_exist, '_Count': case_count, '_EqualNum': case_equal_num, '_MoreNum': case_more_num, '_LessNum': case_less_num, '_SameProperty': case_same_property, '_Describe': case_describe, INVALID_EXPR: dummy_scores}) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores_nmn = self.compiler.output_tensors[0] # Add a question prior network if specified self.use_qpn = use_qpn self.qpn_dropout = qpn_dropout if use_qpn: self.scores_qpn = question_prior_net(att_seq2seq.encoder_states, num_choices, qpn_dropout) self.scores = self.scores_nmn + self.scores_qpn else: self.scores = self.scores_nmn # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights'))] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, config, kb, text_seq_batch, seq_length_batch, num_vocab_txt, num_vocab_nmn, EOS_idx, num_choices, decoder_sampling, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 1: Seq2seq RNN to generate module layout tokens embedding_mat = tf.get_variable( 'embedding_mat', [num_vocab_txt, config.embed_dim_txt], initializer=tf.contrib.layers.xavier_initializer()) with tf.variable_scope('layout_generation'): att_seq2seq = netgen_att.AttentionSeq2Seq( config, text_seq_batch, seq_length_batch, num_vocab_txt, num_vocab_nmn, EOS_idx, decoder_sampling, embedding_mat, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(config, kb, word_vecs, num_choices, embedding_mat) self.modules = modules # Recursion of modules att_shape = [len(kb)] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _key_find case_key_find = td.Record([ ('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_key_find = case_key_find >> td.ScopedLayer( modules.KeyFindModule, name_or_scope='KeyFindModule') # _key_filter case_key_filter = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) case_key_filter = case_key_filter >> td.ScopedLayer( modules.KeyFilterModule, name_or_scope='KeyFilterModule') recursion_cases = td.OneOf(td.GetItem('module'), { '_key_find': case_key_find, '_key_filter': case_key_filter }) att_expr_decl.resolve_to(recursion_cases) # _val_desc: output scores for choice (for valid expressions) predicted_scores = td.Record([('input_0', recursion_cases), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) predicted_scores = predicted_scores >> td.ScopedLayer( modules.ValDescribeModule, name_or_scope='ValDescribeModule') # For invalid expressions, define a dummy answer # so that all answers have the same form INVALID = assembler.INVALID_EXPR dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_val_desc': predicted_scores, INVALID: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores = self.compiler.output_tensors[0] # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, image_data_batch, image_mean, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_qpn, qpn_dropout, reduce_visfeat_dim=False, new_visfeat_dim=128, use_gt_layout=None, gt_layout_batch=None, map_dim=1024, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN with tf.variable_scope('image_feature_cnn'): image_data_batch = image_data_batch / 255.0 - image_mean image_feat_grid = nlvr_convnet(image_data_batch) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokensa with tf.variable_scope('layout_generation'): att_seq2seq = AttentionSeq2Seq( text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, assembler, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, None, num_choices, map_dim) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_find = case_find >> td.Function(modules.FindModule) # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> td.Function( modules.TransformModule) # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> td.Function(modules.AndModule) # _Describe case_describe = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_describe = case_describe >> \ td.Function(modules.DescribeModule) recursion_cases = td.OneOf( td.GetItem('module'), { '_Find': case_find, '_Transform': case_transform, '_And': case_and }) att_expr_decl.resolve_to(recursion_cases) # For invalid expressions, define a dummy answer # so that all answers have the same form dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Describe': case_describe, INVALID_EXPR: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores_nmn = self.compiler.output_tensors[0] # Add a question prior network if specified self.use_qpn = use_qpn self.qpn_dropout = qpn_dropout if use_qpn: self.scores_qpn = question_prior_net( att_seq2seq.encoder_states, num_choices, qpn_dropout) self.scores = self.scores_nmn + self.scores_qpn #self.scores = self.scores_nmn else: self.scores = self.scores_nmn # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) #tf.check_numerics(self.entropy_reg, 'entropy NaN/Inf ') #print(self.entropy_reg.eval()) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])
def __init__(self, image_batch, text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, EOS_idx, encoder_dropout, decoder_dropout, decoder_sampling, num_choices, use_gt_layout=None, gt_layout_batch=None, scope='neural_module_network', reuse=None): with tf.variable_scope(scope, reuse=reuse): # Part 0: Visual feature from CNN with tf.variable_scope('image_feature_cnn'): image_feat_grid = shapes_convnet(image_batch) self.image_feat_grid = image_feat_grid # Part 1: Seq2seq RNN to generate module layout tokens with tf.variable_scope('layout_generation'): att_seq2seq = nmn3_netgen_att.AttentionSeq2Seq( text_seq_batch, seq_length_batch, T_decoder, num_vocab_txt, embed_dim_txt, num_vocab_nmn, embed_dim_nmn, lstm_dim, num_layers, EOS_idx, encoder_dropout, decoder_dropout, decoder_sampling, use_gt_layout, gt_layout_batch) self.att_seq2seq = att_seq2seq predicted_tokens = att_seq2seq.predicted_tokens token_probs = att_seq2seq.token_probs word_vecs = att_seq2seq.word_vecs neg_entropy = att_seq2seq.neg_entropy self.atts = att_seq2seq.atts self.predicted_tokens = predicted_tokens self.token_probs = token_probs self.word_vecs = word_vecs self.neg_entropy = neg_entropy # log probability of each generated sequence self.log_seq_prob = tf.reduce_sum(tf.log(token_probs), axis=0) # Part 2: Neural Module Network with tf.variable_scope('layout_execution'): modules = Modules(image_feat_grid, word_vecs, num_choices) self.modules = modules # Recursion of modules att_shape = image_feat_grid.get_shape().as_list()[1:-1] + [1] # Forward declaration of module recursion att_expr_decl = td.ForwardDeclaration(td.PyObjectType(), td.TensorType(att_shape)) # _Find case_find = td.Record([('time_idx', td.Scalar(dtype='int32')), ('batch_idx', td.Scalar(dtype='int32')) ]) case_find = case_find >> \ td.ScopedLayer(modules.FindModule, name_or_scope='FindModule') # _Transform case_transform = td.Record([('input_0', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_transform = case_transform >> \ td.ScopedLayer(modules.TransformModule, name_or_scope='TransformModule') # _And case_and = td.Record([('input_0', att_expr_decl()), ('input_1', att_expr_decl()), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32'))]) case_and = case_and >> \ td.ScopedLayer(modules.AndModule, name_or_scope='AndModule') recursion_cases = td.OneOf( td.GetItem('module'), { '_Find': case_find, '_Transform': case_transform, '_And': case_and }) att_expr_decl.resolve_to(recursion_cases) # _Answer: output scores for choice (for valid expressions) predicted_scores = td.Record([('input_0', recursion_cases), ('time_idx', td.Scalar('int32')), ('batch_idx', td.Scalar('int32')) ]) predicted_scores = predicted_scores >> \ td.ScopedLayer(modules.AnswerModule, name_or_scope='AnswerModule') # For invalid expressions, define a dummy answer # so that all answers have the same form INVALID = nmn3_assembler.INVALID_EXPR dummy_scores = td.Void() >> td.FromTensor( np.zeros(num_choices, np.float32)) output_scores = td.OneOf(td.GetItem('module'), { '_Answer': predicted_scores, INVALID: dummy_scores }) # compile and get the output scores self.compiler = td.Compiler.create(output_scores) self.scores = self.compiler.output_tensors[0] # Regularization: Entropy + L2 self.entropy_reg = tf.reduce_mean(neg_entropy) module_weights = [ v for v in tf.trainable_variables() if (scope in v.op.name and v.op.name.endswith('weights')) ] self.l2_reg = tf.add_n([tf.nn.l2_loss(v) for v in module_weights])