예제 #1
0
def input_hidden(input_words, input_roles, n_word_vocab, n_role_vocab, emb_init, missing_word_id, 
    n_factors_emb=256, n_hidden=256, n_sample=1, mask_zero=True, using_dropout=False, dropout_rate=0.3, 
    activation='linear', a_target=False):
    """Input layer designed by Ottokar

        Embedding layers are initialized with glorot uniform.
        batch_size is None during compile time.
        input_length is length of input_words/input_roles

    # Arguments:
        input_words:        place holder for input words, shape is (batch_size, input_length)
        input_roles:        place holder for input roles, shape is (batch_size, input_length)
        n_word_vocab:       size of word vocabulary
        n_role_vocab:       size of role vocabulary
        emb_init:           initializer of embedding
        missing_word_id:    the id used as place-holder for the role without a word appearing
        n_factors_emb:      tensor factorization number
        n_hidden:           number of hidden units
        n_sample:           number of samples, useful when there are negative samples # QUESTION: what is number of samples/negative samples? (team1-change)
        mask_zero:          bool, zero out the weight of missing word
        using_dropout:      bool, using drop-out layer or not
        dropout_rate:       rate of drop-out layer
        activation:         activation function in fully connected layer
        is_target:          bool, True if this is a target embedding

    # if a_target:
    #     input_length = n_sample
    # else:
    #     input_length = n_role_vocab - 1
    """
    hidden = role_based_word_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, emb_init, 
        missing_word_id, n_factors_emb, mask_zero, using_dropout, dropout_rate)

    if a_target: # QUESTION: What is a_target controlling? (team1-change) 
        # fully connected layer, output shape is (batch_size, n_sample, n_hidden)
        output = Dense(n_hidden, 
            activation=activation, 
            use_bias=False,
            input_shape=(n_sample, n_factors_emb,),
            name='target_role_based_embedding')(hidden)

    else:
        # sum on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_factors_emb)
        context_hidden = Lambda(lambda x: K.sum(x, axis=1), 
            name='context_hidden',
            output_shape=(n_factors_emb,))(hidden)

        # fully connected layer, output shape is (batch_size, n_hidden)
        output = Dense(n_hidden, 
            activation=activation, 
            use_bias=True,
            input_shape=(n_factors_emb,), 
            name='role_based_embedding')(context_hidden)

    # if using_dropout:
    #     # Drop-out layer after fully connected layer
    #     output = Dropout(0.5)(output)

    return output
    def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, 
        unk_word_id=50000, unk_role_id=7, missing_word_id=50001, 
        using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']):
        super(NNRF_ResROFA, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, 
            unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics)

        # minus 1 here because one of the role is target role
        self.input_length = n_role_vocab - 1

        # each input is a fixed window of frame set, each word correspond to one role
        input_words = Input(shape=(self.input_length, ), dtype='int32', name='input_words')
        input_roles = Input(shape=(self.input_length, ), dtype='int32', name='input_roles')
        target_role = Input(shape=(1, ), dtype='int32', name='target_role')

        # role based embedding layer
        embedding_layer = role_based_word_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), 
            missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate)
        
        # fully connected layer, output shape is (batch_size, input_length, n_hidden)
        lin_proj = Dense(n_factors_emb, 
            activation='linear', 
            use_bias=False,
            input_shape=(n_factors_emb,), 
            name='lin_proj')(embedding_layer)

        non_lin = PReLU(
            alpha_initializer='ones',
            name='non_lin')(lin_proj)
        
        # fully connected layer, output shape is (batch_size, input_length, n_hidden)
        lin_proj2 = Dense(n_factors_emb, 
            activation='linear', 
            use_bias=False,
            input_shape=(n_factors_emb,), 
            name='lin_proj2')(non_lin)

        residual_0 = Add(name='residual_0')([embedding_layer, lin_proj2])

        # mean on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_hidden)
        context_embedding = Lambda(lambda x: K.mean(x, axis=1), 
            name='context_embedding',
            output_shape=(n_factors_emb,))(residual_0)

        # hidden layer
        hidden_layer2 = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, 
            using_dropout=using_dropout, dropout_rate=dropout_rate)

        # softmax output layer
        output_layer = Dense(n_word_vocab, 
            activation='softmax', 
            input_shape=(n_factors_cls, ), 
            name='softmax_word_output')(hidden_layer2)

        self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer])

        self.model.compile(optimizer, loss, metrics)
예제 #3
0
    def __init__(self,
                 n_word_vocab=50001,
                 n_role_vocab=7,
                 n_factors_emb=256,
                 n_factors_cls=512,
                 n_hidden=256,
                 word_vocabulary={},
                 role_vocabulary={},
                 unk_word_id=50000,
                 unk_role_id=7,
                 missing_word_id=50001,
                 using_dropout=False,
                 dropout_rate=0.3,
                 optimizer='adagrad',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy']):
        super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb,
                                   n_hidden, word_vocabulary, role_vocabulary,
                                   unk_word_id, unk_role_id, missing_word_id,
                                   using_dropout, dropout_rate, optimizer,
                                   loss, metrics)

        # minus 1 here because one of the role is target role
        self.input_length = n_role_vocab - 1

        # each input is a fixed window of frame set, each word correspond to one role
        input_words = Input(
            shape=(self.input_length, ), dtype=tf.uint32,
            name='input_words')  # Switched dtype to tf specific (team1-change)
        input_roles = Input(
            shape=(self.input_length, ), dtype=tf.uint32,
            name='input_roles')  # Switched dtype to tf specific (team1-change)
        target_role = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_role')  # Switched dtype to tf specific (team1-change)

        # role based embedding layer
        embedding_layer = role_based_word_embedding(
            input_words, input_roles, n_word_vocab, n_role_vocab,
            glorot_uniform(), missing_word_id, self.input_length,
            n_factors_emb, True, using_dropout, dropout_rate)

        # sum on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_factors_emb)
        event_embedding = Lambda(
            lambda x: K.sum(x, axis=1),
            name='event_embedding',
            output_shape=(n_factors_emb, ))(embedding_layer)

        # fully connected layer, output shape is (batch_size, input_length, n_hidden)
        hidden = Dense(n_hidden,
                       activation='linear',
                       input_shape=(n_factors_emb, ),
                       name='projected_event_embedding')(event_embedding)

        # non-linear layer, using 1 to initialize
        non_linearity = PReLU(alpha_initializer='ones',
                              name='context_embedding')(hidden)

        # hidden layer
        hidden_layer2 = target_word_hidden(non_linearity,
                                           target_role,
                                           n_word_vocab,
                                           n_role_vocab,
                                           glorot_uniform(),
                                           n_factors_cls,
                                           n_hidden,
                                           using_dropout=using_dropout,
                                           dropout_rate=dropout_rate)

        # softmax output layer
        output_layer = Dense(n_word_vocab,
                             activation='softmax',
                             input_shape=(n_factors_cls, ),
                             name='softmax_word_output')(hidden_layer2)

        self.model = Model(inputs=[input_words, input_roles, target_role],
                           outputs=[output_layer])

        self.model.compile(optimizer, loss, metrics)