def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']): super(NNRF_ResROFA, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role self.input_length = n_role_vocab - 1 # each input is a fixed window of frame set, each word correspond to one role input_words = Input(shape=(self.input_length, ), dtype='int32', name='input_words') input_roles = Input(shape=(self.input_length, ), dtype='int32', name='input_roles') target_role = Input(shape=(1, ), dtype='int32', name='target_role') # role based embedding layer embedding_layer = role_based_word_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate) # fully connected layer, output shape is (batch_size, input_length, n_hidden) lin_proj = Dense(n_factors_emb, activation='linear', use_bias=False, input_shape=(n_factors_emb,), name='lin_proj')(embedding_layer) non_lin = PReLU( alpha_initializer='ones', name='non_lin')(lin_proj) # fully connected layer, output shape is (batch_size, input_length, n_hidden) lin_proj2 = Dense(n_factors_emb, activation='linear', use_bias=False, input_shape=(n_factors_emb,), name='lin_proj2')(non_lin) residual_0 = Add(name='residual_0')([embedding_layer, lin_proj2]) # mean on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_hidden) context_embedding = Lambda(lambda x: K.mean(x, axis=1), name='context_embedding', output_shape=(n_factors_emb,))(residual_0) # hidden layer hidden_layer2 = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer output_layer = Dense(n_word_vocab, activation='softmax', input_shape=(n_factors_cls, ), name='softmax_word_output')(hidden_layer2) self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer]) self.model.compile(optimizer, loss, metrics)
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=300, n_hidden=300, word_vocabulary=None, role_vocabulary=None, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy'], loss_weights=[1., 1.]): super(MTRFv4Res, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role input_length = n_role_vocab - 1 n_factors_cls = n_hidden # each input is a fixed window of frame set, each word correspond to one role input_words = Input(shape=(input_length, ), dtype='int32', name='input_words') input_roles = Input(shape=(input_length, ), dtype='int32', name='input_roles') target_word = Input(shape=(1, ), dtype='int32', name='target_word') target_role = Input(shape=(1, ), dtype='int32', name='target_role') emb_init = glorot_uniform() # word embedding; shape is (batch_size, input_length, n_factors_emb) word_embedding = Embedding(n_word_vocab, n_factors_emb, embeddings_initializer=emb_init, name='org_word_embedding')(input_words) # a hack zeros out the missing word inputs weights = np.ones((n_word_vocab, n_factors_emb)) weights[missing_word_id] = 0 mask = Embedding(n_word_vocab, n_factors_emb, weights=[weights], trainable=False, name='word_mask')(input_words) # masked word embedding word_embedding = Multiply(name='word_embedding')( [word_embedding, mask]) # role embedding; shape is (batch_size, input_length, n_factors_emb) role_embedding = Embedding(n_role_vocab, n_factors_emb, embeddings_initializer=emb_init, name='role_embedding')(input_roles) if using_dropout: # Drop-out layer after embeddings word_embedding = Dropout(dropout_rate)(word_embedding) role_embedding = Dropout(dropout_rate)(role_embedding) # hidden units after combining 2 embeddings; shape is the same with embedding product = Multiply()([word_embedding, role_embedding]) # fully connected layer, output shape is (batch_size, input_length, n_hidden) lin_proj = Dense(n_factors_emb, activation='linear', use_bias=False, input_shape=(n_factors_emb, ), name='lin_proj')(product) non_lin = PReLU(alpha_initializer='ones', name='non_lin')(lin_proj) # fully connected layer, output shape is (batch_size, input_length, n_hidden) lin_proj2 = Dense(n_factors_emb, activation='linear', use_bias=False, input_shape=(n_factors_emb, ), name='lin_proj2')(non_lin) residual_0 = Add(name='residual_0')([product, lin_proj2]) # mean on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_hidden) context_embedding = Lambda(lambda x: K.mean(x, axis=1), name='context_embedding', output_shape=(n_factors_emb, ))(residual_0) # target word hidden layer tw_hidden = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden) # target role hidden layer tr_hidden = target_role_hidden(context_embedding, target_word, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer target_word_output = Dense(n_word_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_word_output')(tw_hidden) # softmax output layer target_role_output = Dense(n_role_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_role_output')(tr_hidden) self.model = Model( inputs=[input_words, input_roles, target_word, target_role], outputs=[target_word_output, target_role_output]) self.model.compile(optimizer, loss, metrics, loss_weights)
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']): super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role self.input_length = n_role_vocab - 1 # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = role_based_word_embedding( input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate) # sum on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_factors_emb) event_embedding = Lambda( lambda x: K.sum(x, axis=1), name='event_embedding', output_shape=(n_factors_emb, ))(embedding_layer) # fully connected layer, output shape is (batch_size, input_length, n_hidden) hidden = Dense(n_hidden, activation='linear', input_shape=(n_factors_emb, ), name='projected_event_embedding')(event_embedding) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones', name='context_embedding')(hidden) # hidden layer hidden_layer2 = target_word_hidden(non_linearity, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer output_layer = Dense(n_word_vocab, activation='softmax', input_shape=(n_factors_cls, ), name='softmax_word_output')(hidden_layer2) self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer]) self.model.compile(optimizer, loss, metrics)
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=300, n_hidden=300, word_vocabulary=None, role_vocabulary=None, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy'], loss_weights=[1., 1.]): super(MTRFv4, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role input_length = n_role_vocab - 1 n_factors_cls = n_hidden # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_word = Input( shape=(1, ), dtype=tf.uint32, name='target_word') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = factored_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, input_length, n_factors_emb, n_hidden, True, using_dropout, dropout_rate) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones')(embedding_layer) # mean on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_hidden) context_embedding = Lambda(lambda x: K.mean(x, axis=1), name='context_embedding', output_shape=(n_hidden, ))(non_linearity) # target word hidden layer tw_hidden = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # target role hidden layer tr_hidden = target_role_hidden(context_embedding, target_word, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer target_word_output = Dense(n_word_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_word_output')(tw_hidden) # softmax output layer target_role_output = Dense(n_role_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_role_output')(tr_hidden) self.model = Model( inputs=[input_words, input_roles, target_word, target_role], outputs=[target_word_output, target_role_output]) self.model.compile(optimizer, loss, metrics, loss_weights)