def input_hidden(input_words, input_roles, n_word_vocab, n_role_vocab, emb_init, missing_word_id, n_factors_emb=256, n_hidden=256, n_sample=1, mask_zero=True, using_dropout=False, dropout_rate=0.3, activation='linear', a_target=False): """Input layer designed by Ottokar Embedding layers are initialized with glorot uniform. batch_size is None during compile time. input_length is length of input_words/input_roles # Arguments: input_words: place holder for input words, shape is (batch_size, input_length) input_roles: place holder for input roles, shape is (batch_size, input_length) n_word_vocab: size of word vocabulary n_role_vocab: size of role vocabulary emb_init: initializer of embedding missing_word_id: the id used as place-holder for the role without a word appearing n_factors_emb: tensor factorization number n_hidden: number of hidden units n_sample: number of samples, useful when there are negative samples # QUESTION: what is number of samples/negative samples? (team1-change) mask_zero: bool, zero out the weight of missing word using_dropout: bool, using drop-out layer or not dropout_rate: rate of drop-out layer activation: activation function in fully connected layer is_target: bool, True if this is a target embedding # if a_target: # input_length = n_sample # else: # input_length = n_role_vocab - 1 """ hidden = role_based_word_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, emb_init, missing_word_id, n_factors_emb, mask_zero, using_dropout, dropout_rate) if a_target: # QUESTION: What is a_target controlling? (team1-change) # fully connected layer, output shape is (batch_size, n_sample, n_hidden) output = Dense(n_hidden, activation=activation, use_bias=False, input_shape=(n_sample, n_factors_emb,), name='target_role_based_embedding')(hidden) else: # sum on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_factors_emb) context_hidden = Lambda(lambda x: K.sum(x, axis=1), name='context_hidden', output_shape=(n_factors_emb,))(hidden) # fully connected layer, output shape is (batch_size, n_hidden) output = Dense(n_hidden, activation=activation, use_bias=True, input_shape=(n_factors_emb,), name='role_based_embedding')(context_hidden) # if using_dropout: # # Drop-out layer after fully connected layer # output = Dropout(0.5)(output) return output
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']): super(NNRF_ResROFA, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role self.input_length = n_role_vocab - 1 # each input is a fixed window of frame set, each word correspond to one role input_words = Input(shape=(self.input_length, ), dtype='int32', name='input_words') input_roles = Input(shape=(self.input_length, ), dtype='int32', name='input_roles') target_role = Input(shape=(1, ), dtype='int32', name='target_role') # role based embedding layer embedding_layer = role_based_word_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate) # fully connected layer, output shape is (batch_size, input_length, n_hidden) lin_proj = Dense(n_factors_emb, activation='linear', use_bias=False, input_shape=(n_factors_emb,), name='lin_proj')(embedding_layer) non_lin = PReLU( alpha_initializer='ones', name='non_lin')(lin_proj) # fully connected layer, output shape is (batch_size, input_length, n_hidden) lin_proj2 = Dense(n_factors_emb, activation='linear', use_bias=False, input_shape=(n_factors_emb,), name='lin_proj2')(non_lin) residual_0 = Add(name='residual_0')([embedding_layer, lin_proj2]) # mean on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_hidden) context_embedding = Lambda(lambda x: K.mean(x, axis=1), name='context_embedding', output_shape=(n_factors_emb,))(residual_0) # hidden layer hidden_layer2 = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer output_layer = Dense(n_word_vocab, activation='softmax', input_shape=(n_factors_cls, ), name='softmax_word_output')(hidden_layer2) self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer]) self.model.compile(optimizer, loss, metrics)
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']): super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role self.input_length = n_role_vocab - 1 # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = role_based_word_embedding( input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate) # sum on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_factors_emb) event_embedding = Lambda( lambda x: K.sum(x, axis=1), name='event_embedding', output_shape=(n_factors_emb, ))(embedding_layer) # fully connected layer, output shape is (batch_size, input_length, n_hidden) hidden = Dense(n_hidden, activation='linear', input_shape=(n_factors_emb, ), name='projected_event_embedding')(event_embedding) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones', name='context_embedding')(hidden) # hidden layer hidden_layer2 = target_word_hidden(non_linearity, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer output_layer = Dense(n_word_vocab, activation='softmax', input_shape=(n_factors_cls, ), name='softmax_word_output')(hidden_layer2) self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer]) self.model.compile(optimizer, loss, metrics)