Ejemplo n.º 1
0
def get_outputs(inputs, params):
    "Return the outputs from the model which will be used in the loss function."
    embedding_size = params['embedding_size']
    num_blocks = params['num_blocks']
    vocab_size = params['vocab_size']

    story = inputs['story']
    query = inputs['query']

    batch_size = tf.shape(story)[0]

    normal_initializer = tf.random_normal_initializer(stddev=0.1)
    ortho_initializer = tf.orthogonal_initializer(gain=1.0)
    ones_initializer = tf.constant_initializer(1.0)

    # Extend the vocab to include keys for the dynamic memory cell,
    # allowing the initialization of the memory to be learned.
    vocab_size = vocab_size + num_blocks

    with tf.variable_scope('EntityNetwork', initializer=normal_initializer):
        # PReLU activations have their alpha parameters initialized to 1
        # so they may be identity before training.
        alpha = tf.get_variable(name='alpha',
                                shape=embedding_size,
                                initializer=ones_initializer)
        activation = partial(prelu, alpha=alpha)

        # Embeddings
        embedding_params = tf.get_variable(name='embedding_params',
                                           shape=[vocab_size, embedding_size])

        # The embedding mask forces the special "pad" embedding to zeros.
        embedding_mask = tf.constant(
            value=[0 if i == 0 else 1 for i in range(vocab_size)],
            shape=[vocab_size, 1],
            dtype=tf.float32)
        embedding_params_masked = embedding_params * embedding_mask

        story_embedding = tf.nn.embedding_lookup(embedding_params_masked,
                                                 story)
        query_embedding = tf.nn.embedding_lookup(embedding_params_masked,
                                                 query)

        # Input Module
        encoded_story = get_input_encoding(inputs=story_embedding,
                                           initializer=ones_initializer,
                                           scope='StoryEncoding')
        encoded_query = get_input_encoding(inputs=query_embedding,
                                           initializer=ones_initializer,
                                           scope='QueryEncoding')

        # Memory Module
        # We define the keys outside of the cell so they may be used for memory initialization.
        # Keys are initialized to a range outside of the main vocab.
        keys = [key for key in range(vocab_size - num_blocks, vocab_size)]
        keys = tf.nn.embedding_lookup(embedding_params_masked, keys)
        keys = tf.split(keys, num_blocks, axis=0)
        keys = [tf.squeeze(key, axis=0) for key in keys]

        cell = DynamicMemoryCell(num_blocks=num_blocks,
                                 num_units_per_block=embedding_size,
                                 keys=keys,
                                 initializer=normal_initializer,
                                 recurrent_initializer=ortho_initializer,
                                 activation=activation)

        # Recurrence
        initial_state = cell.zero_state(batch_size, tf.float32)
        sequence_length = get_sequence_length(encoded_story)
        _, last_state = tf.nn.dynamic_rnn(cell=cell,
                                          inputs=encoded_story,
                                          sequence_length=sequence_length,
                                          initial_state=initial_state)

        # Output Module
        outputs = get_output_module(last_state=last_state,
                                    encoded_query=encoded_query,
                                    num_blocks=num_blocks,
                                    vocab_size=vocab_size,
                                    initializer=normal_initializer,
                                    activation=activation)

        parameters = count_parameters()
        print('Parameters: {}'.format(parameters))

        return outputs
Ejemplo n.º 2
0
def model_fn(features, labels, params, mode, scope=None):
    embedding_size = params['embedding_size']
    num_blocks = params['num_blocks']
    vocab_size = params['vocab_size']
    debug = params['debug']

    story = features['story']
    query = features['query']

    batch_size = tf.shape(story)[0]

    normal_initializer = tf.random_normal_initializer(stddev=0.1)
    ones_initializer = tf.constant_initializer(1.0)

    # PReLU activations have their alpha parameters initialized to 1
    # so they may be identity before training.
    activation = partial(prelu, initializer=ones_initializer)

    with tf.variable_scope(scope, 'EntityNetwork', initializer=normal_initializer):
        # Embeddings
        # The embedding mask forces the special "pad" embedding to zeros.
        embedding_params = tf.get_variable('embedding_params', [vocab_size, embedding_size])
        embedding_mask = tf.constant([0 if i == 0 else 1 for i in range(vocab_size)],
            dtype=tf.float32,
            shape=[vocab_size, 1])

        story_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, story)
        query_embedding = tf.nn.embedding_lookup(embedding_params * embedding_mask, query)

        # Input Module
        encoded_story = get_input_encoding(story_embedding, ones_initializer, 'StoryEncoding')
        encoded_query = get_input_encoding(query_embedding, ones_initializer, 'QueryEncoding')

        # Memory Module
        # We define the keys outside of the cell so they may be used for state initialization.
        keys = [tf.get_variable('key_{}'.format(j), [embedding_size]) for j in range(num_blocks)]

        cell = DynamicMemoryCell(num_blocks, embedding_size, keys,
            initializer=normal_initializer,
            activation=activation)

        # Recurrence
        initial_state = cell.zero_state(batch_size, tf.float32)
        sequence_length = get_sequence_length(encoded_story)
        _, last_state = tf.nn.dynamic_rnn(cell, encoded_story,
            sequence_length=sequence_length,
            initial_state=initial_state)

        # Output Module
        output = get_output(last_state, encoded_query,
            num_blocks=num_blocks,
            vocab_size=vocab_size,
            initializer=normal_initializer,
            activation=activation)
        prediction = tf.argmax(output, 1)

        # Training
        loss = get_loss(output, labels, mode)
        train_op = get_train_op(loss, params, mode)

        if debug:
            tf.contrib.layers.summarize_tensor(sequence_length, 'sequence_length')
            tf.contrib.layers.summarize_tensor(encoded_story, 'encoded_story')
            tf.contrib.layers.summarize_tensor(encoded_query, 'encoded_query')
            tf.contrib.layers.summarize_tensor(last_state, 'last_state')
            tf.contrib.layers.summarize_tensor(output, 'output')
            tf.contrib.layers.summarize_variables()

            tf.add_check_numerics_ops()

        return prediction, loss, train_op
Ejemplo n.º 3
0
def get_outputs(inputs, params):
    "Return the outputs from the model which will be used in the loss function."
    embedding_size = params['embedding_size']
    num_blocks = params['num_blocks']
    vocab_size = params['vocab_size']
    story = inputs['story']
    query = inputs['query']
    is_general = params['is_general']
    candidates = inputs['candidates']
    print("get outputs candi ", candidates)
    batch_size = tf.shape(story)[0]

    normal_initializer = tf.random_normal_initializer(stddev=0.1)
    ones_initializer = tf.constant_initializer(1.0)

    # Extend the vocab to include keys for the dynamic memory cell,
    # allowing the initialization of the memory to be learned.
    #vocab_size = vocab_size + num_blocks

    with tf.variable_scope('EntityNetwork', initializer=normal_initializer):
        # PReLU activations have their alpha parameters initialized to 1
        # so they may be identity before training.
        alpha = tf.get_variable(name='alpha',
                                shape=embedding_size,
                                initializer=ones_initializer)
        if is_general:
            activation = partial(prelu, alpha=alpha)
        else:
            activation = tf.identity

        # Embeddings
        embedding_params = tf.get_variable(name='embedding_params',
                                           shape=[vocab_size, embedding_size])
        #embedding_params = tf.nn.dropout(embedding_params, 0.5)

        # The embedding mask forces the special "pad" embedding to zeros.
        embedding_mask = tf.constant(
            value=[0 if i == 0 else 1 for i in range(vocab_size)],
            shape=[vocab_size, 1],
            dtype=tf.float32)
        embedding_params_masked = embedding_params * embedding_mask
        print("story ", story)
        story_embedding = tf.nn.embedding_lookup(embedding_params_masked,
                                                 story)
        query_embedding = tf.nn.embedding_lookup(embedding_params_masked,
                                                 query)
        story_embedding = tf.nn.dropout(story_embedding, 0.2)
        query_embedding = tf.nn.dropout(query_embedding, 0.2)
        print("story embedding ", story_embedding)
        print("query embedding ", query_embedding)
        # Input Module
        encoded_story = get_input_encoding(inputs=story_embedding,
                                           initializer=ones_initializer,
                                           scope='StoryEncoding')
        encoded_story_for_gate = get_input_encoding_for_gate(
            inputs=story_embedding,
            initializer=ones_initializer,
            scope='StoryEncoding_for_gate')
        encoded_query = get_input_encoding(inputs=query_embedding,
                                           initializer=ones_initializer,
                                           scope='QueryEncoding')
        print("encoded query ", encoded_query)
        # Memory Module
        # We define the keys outside of the cell so they may be used for memory initialization.
        # Keys are initialized to a range outside of the main vocab.
        print('model candidates', candidates)
        #keys = [key for key in range(vocab_size - num_blocks, vocab_size)]

        keys = tf.nn.embedding_lookup(embedding_params_masked, candidates)
        print("keys ", keys)
        #keys = tf.split(keys, num_blocks, axis=0)
        #print("split keys ", keys)
        #keys = [tf.squeeze(key, axis=0) for key in keys]
        #print("squeezed keys ", keys)
        cell = DynamicMemoryCell(num_blocks=num_blocks,
                                 num_units_per_block=embedding_size,
                                 keys=keys,
                                 initializer=normal_initializer,
                                 recurrent_initializer=normal_initializer,
                                 activation=activation,
                                 is_general=is_general)
        print(encoded_story)
        print(encoded_story.get_shape())
        inputcat = tf.concat([encoded_story, encoded_story_for_gate], axis=2)
        #inputcat = tf.reshape(inputcat,[tf.shape(encoded_story)[0],encoded_story.get_shape()[1], encoded_story.get_shape()[2]*2])
        print(cell.output_size)
        print("inputcat ", inputcat)
        # Recurrence
        initial_state = cell.zero_state(batch_size, tf.float32)
        sequence_length = get_sequence_length(encoded_story)
        #assert_op = tf.Assert(tf.equal(0,1), [sequence_length])
        #with tf.control_dependencies([assert_op]):
        #sequence_length = tf.reshape(sequence_length,[-1,1])
        #assertop = tf.Assert(tf.equal(tf.constant(0), tf.constant(1)), data=[tf.reduce_max(sequence_length)],summarize=10)
        #with tf.control_dependencies([assertop]):
        #sequence_length = tf.Print(sequence_length, [sequence_length], message="seq len")
        #with tf.Session() as sess:
        #init = tf.global_variables_initializer()
        #sess.run(init)
        #sess.run(tf.local_variables_initializer())
        #sequence_length.eval(session=sess)
        #	sess.run(tf.constant(5))
        #sequence_length = tf.reshape(sequence_length,[-1,1])
        # sequence_length = sequence_length* tf.constant(5)
        #	sequence_length = tf.reshape(sequence_length,[-1,])
        #print("sequence_length ", sequence_length)
        _, last_state = tf.nn.dynamic_rnn(cell=cell,
                                          inputs=inputcat,
                                          sequence_length=sequence_length,
                                          initial_state=initial_state)
        # Output Module
        outputs = get_output_module(last_state=last_state,
                                    encoded_query=encoded_query,
                                    num_blocks=num_blocks,
                                    vocab_size=vocab_size,
                                    candidates=candidates,
                                    initializer=normal_initializer,
                                    activation=activation)

        parameters = count_parameters()
        print('Parameters: {}'.format(parameters))

        return outputs