def get_outputs(inputs, params): "Return the outputs from the model which will be used in the loss function." embedding_size = params['embedding_size'] num_blocks = params['num_blocks'] vocab_size = params['vocab_size'] story = inputs['story'] query = inputs['query'] batch_size = tf.shape(story)[0] normal_initializer = tf.random_normal_initializer(stddev=0.1) ortho_initializer = tf.orthogonal_initializer(gain=1.0) ones_initializer = tf.constant_initializer(1.0) # Extend the vocab to include keys for the dynamic memory cell, # allowing the initialization of the memory to be learned. vocab_size = vocab_size + num_blocks with tf.variable_scope('EntityNetwork', initializer=normal_initializer): # PReLU activations have their alpha parameters initialized to 1 # so they may be identity before training. alpha = tf.get_variable(name='alpha', shape=embedding_size, initializer=ones_initializer) activation = partial(prelu, alpha=alpha) # Embeddings embedding_params = tf.get_variable(name='embedding_params', shape=[vocab_size, embedding_size]) # The embedding mask forces the special "pad" embedding to zeros. embedding_mask = tf.constant( value=[0 if i == 0 else 1 for i in range(vocab_size)], shape=[vocab_size, 1], dtype=tf.float32) embedding_params_masked = embedding_params * embedding_mask story_embedding = tf.nn.embedding_lookup(embedding_params_masked, story) query_embedding = tf.nn.embedding_lookup(embedding_params_masked, query) # Input Module encoded_story = get_input_encoding(inputs=story_embedding, initializer=ones_initializer, scope='StoryEncoding') encoded_query = get_input_encoding(inputs=query_embedding, initializer=ones_initializer, scope='QueryEncoding') # Memory Module # We define the keys outside of the cell so they may be used for memory initialization. # Keys are initialized to a range outside of the main vocab. keys = [key for key in range(vocab_size - num_blocks, vocab_size)] keys = tf.nn.embedding_lookup(embedding_params_masked, keys) keys = tf.split(keys, num_blocks, axis=0) keys = [tf.squeeze(key, axis=0) for key in keys] cell = DynamicMemoryCell(num_blocks=num_blocks, num_units_per_block=embedding_size, keys=keys, initializer=normal_initializer, recurrent_initializer=ortho_initializer, activation=activation) # Recurrence initial_state = cell.zero_state(batch_size, tf.float32) sequence_length = get_sequence_length(encoded_story) _, last_state = tf.nn.dynamic_rnn(cell=cell, inputs=encoded_story, sequence_length=sequence_length, initial_state=initial_state) # Output Module outputs = get_output_module(last_state=last_state, encoded_query=encoded_query, num_blocks=num_blocks, vocab_size=vocab_size, initializer=normal_initializer, activation=activation) parameters = count_parameters() print('Parameters: {}'.format(parameters)) return outputs
def model_fn(features, labels, params, mode, scope=None): embedding_size = params['embedding_size'] num_blocks = params['num_blocks'] vocab_size = params['vocab_size'] debug = params['debug'] story = features['story'] query = features['query'] batch_size = tf.shape(story)[0] normal_initializer = tf.random_normal_initializer(stddev=0.1) ones_initializer = tf.constant_initializer(1.0) # PReLU activations have their alpha parameters initialized to 1 # so they may be identity before training. activation = partial(prelu, initializer=ones_initializer) with tf.variable_scope(scope, 'EntityNetwork', initializer=normal_initializer): # Embeddings # The embedding mask forces the special "pad" embedding to zeros. embedding_params = tf.get_variable('embedding_params', [vocab_size, embedding_size]) embedding_mask = tf.constant([0 if i == 0 else 1 for i in range(vocab_size)], dtype=tf.float32, shape=[vocab_size, 1]) story_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, story) query_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, query) # Input Module encoded_story = get_input_encoding(story_embedding, ones_initializer, 'StoryEncoding') encoded_query = get_input_encoding(query_embedding, ones_initializer, 'QueryEncoding') # Memory Module # We define the keys outside of the cell so they may be used for state initialization. keys = [tf.get_variable('key_{}'.format(j), [embedding_size]) for j in range(num_blocks)] cell = DynamicMemoryCell(num_blocks, embedding_size, keys, initializer=normal_initializer, activation=activation) # Recurrence initial_state = cell.zero_state(batch_size, tf.float32) sequence_length = get_sequence_length(encoded_story) _, last_state = tf.nn.dynamic_rnn(cell, encoded_story, sequence_length=sequence_length, initial_state=initial_state) # Output Module output = get_output(last_state, encoded_query, num_blocks=num_blocks, vocab_size=vocab_size, initializer=normal_initializer, activation=activation) prediction = tf.argmax(output, 1) # Training loss = get_loss(output, labels, mode) train_op = get_train_op(loss, params, mode) if debug: tf.contrib.layers.summarize_tensor(sequence_length, 'sequence_length') tf.contrib.layers.summarize_tensor(encoded_story, 'encoded_story') tf.contrib.layers.summarize_tensor(encoded_query, 'encoded_query') tf.contrib.layers.summarize_tensor(last_state, 'last_state') tf.contrib.layers.summarize_tensor(output, 'output') tf.contrib.layers.summarize_variables() tf.add_check_numerics_ops() return prediction, loss, train_op
def get_outputs(inputs, params): "Return the outputs from the model which will be used in the loss function." embedding_size = params['embedding_size'] num_blocks = params['num_blocks'] vocab_size = params['vocab_size'] story = inputs['story'] query = inputs['query'] is_general = params['is_general'] candidates = inputs['candidates'] print("get outputs candi ", candidates) batch_size = tf.shape(story)[0] normal_initializer = tf.random_normal_initializer(stddev=0.1) ones_initializer = tf.constant_initializer(1.0) # Extend the vocab to include keys for the dynamic memory cell, # allowing the initialization of the memory to be learned. #vocab_size = vocab_size + num_blocks with tf.variable_scope('EntityNetwork', initializer=normal_initializer): # PReLU activations have their alpha parameters initialized to 1 # so they may be identity before training. alpha = tf.get_variable(name='alpha', shape=embedding_size, initializer=ones_initializer) if is_general: activation = partial(prelu, alpha=alpha) else: activation = tf.identity # Embeddings embedding_params = tf.get_variable(name='embedding_params', shape=[vocab_size, embedding_size]) #embedding_params = tf.nn.dropout(embedding_params, 0.5) # The embedding mask forces the special "pad" embedding to zeros. embedding_mask = tf.constant( value=[0 if i == 0 else 1 for i in range(vocab_size)], shape=[vocab_size, 1], dtype=tf.float32) embedding_params_masked = embedding_params * embedding_mask print("story ", story) story_embedding = tf.nn.embedding_lookup(embedding_params_masked, story) query_embedding = tf.nn.embedding_lookup(embedding_params_masked, query) story_embedding = tf.nn.dropout(story_embedding, 0.2) query_embedding = tf.nn.dropout(query_embedding, 0.2) print("story embedding ", story_embedding) print("query embedding ", query_embedding) # Input Module encoded_story = get_input_encoding(inputs=story_embedding, initializer=ones_initializer, scope='StoryEncoding') encoded_story_for_gate = get_input_encoding_for_gate( inputs=story_embedding, initializer=ones_initializer, scope='StoryEncoding_for_gate') encoded_query = get_input_encoding(inputs=query_embedding, initializer=ones_initializer, scope='QueryEncoding') print("encoded query ", encoded_query) # Memory Module # We define the keys outside of the cell so they may be used for memory initialization. # Keys are initialized to a range outside of the main vocab. print('model candidates', candidates) #keys = [key for key in range(vocab_size - num_blocks, vocab_size)] keys = tf.nn.embedding_lookup(embedding_params_masked, candidates) print("keys ", keys) #keys = tf.split(keys, num_blocks, axis=0) #print("split keys ", keys) #keys = [tf.squeeze(key, axis=0) for key in keys] #print("squeezed keys ", keys) cell = DynamicMemoryCell(num_blocks=num_blocks, num_units_per_block=embedding_size, keys=keys, initializer=normal_initializer, recurrent_initializer=normal_initializer, activation=activation, is_general=is_general) print(encoded_story) print(encoded_story.get_shape()) inputcat = tf.concat([encoded_story, encoded_story_for_gate], axis=2) #inputcat = tf.reshape(inputcat,[tf.shape(encoded_story)[0],encoded_story.get_shape()[1], encoded_story.get_shape()[2]*2]) print(cell.output_size) print("inputcat ", inputcat) # Recurrence initial_state = cell.zero_state(batch_size, tf.float32) sequence_length = get_sequence_length(encoded_story) #assert_op = tf.Assert(tf.equal(0,1), [sequence_length]) #with tf.control_dependencies([assert_op]): #sequence_length = tf.reshape(sequence_length,[-1,1]) #assertop = tf.Assert(tf.equal(tf.constant(0), tf.constant(1)), data=[tf.reduce_max(sequence_length)],summarize=10) #with tf.control_dependencies([assertop]): #sequence_length = tf.Print(sequence_length, [sequence_length], message="seq len") #with tf.Session() as sess: #init = tf.global_variables_initializer() #sess.run(init) #sess.run(tf.local_variables_initializer()) #sequence_length.eval(session=sess) # sess.run(tf.constant(5)) #sequence_length = tf.reshape(sequence_length,[-1,1]) # sequence_length = sequence_length* tf.constant(5) # sequence_length = tf.reshape(sequence_length,[-1,]) #print("sequence_length ", sequence_length) _, last_state = tf.nn.dynamic_rnn(cell=cell, inputs=inputcat, sequence_length=sequence_length, initial_state=initial_state) # Output Module outputs = get_output_module(last_state=last_state, encoded_query=encoded_query, num_blocks=num_blocks, vocab_size=vocab_size, candidates=candidates, initializer=normal_initializer, activation=activation) parameters = count_parameters() print('Parameters: {}'.format(parameters)) return outputs