def autoe_train(hidden_size, noise_dim, glove, hypo_len, version): prem_input = Input(shape=(None,), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input') train_input = Input(shape=(None,), dtype='int32', name='train_input') class_input = Input(shape=(3,), name='class_input') prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_encoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise_encoder')(prem_embeddings) hypo_encoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo_encoder')(hypo_embeddings) class_encoder = Dense(hidden_size, activation='tanh')(class_input) encoder = LstmAttentionLayer(output_dim=hidden_size, return_sequences=False, feed_state = True, name='encoder') ([hypo_encoder, premise_encoder, class_encoder]) if version == 6: reduction = Dense(noise_dim, name='reduction', activation='tanh')(encoder) elif version == 7: z_mean = Dense(noise_dim, name='z_mean')(encoder) z_log_sigma = Dense(noise_dim, name='z_log_sigma')(encoder) def sampling(args): z_mean, z_log_sigma = args epsilon = K.random_normal(shape=(64, noise_dim,), mean=0., std=0.01) return z_mean + K.exp(z_log_sigma) * epsilon reduction = Lambda(sampling, output_shape=lambda sh: (sh[0][0], noise_dim,), name = 'reduction')([z_mean, z_log_sigma]) def vae_loss(args): z_mean, z_log_sigma = args return - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) vae = Lambda(vae_loss, output_shape=lambda sh: (sh[0][0], 1,), name = 'vae_output')([z_mean, z_log_sigma]) merged = merge([class_input, reduction], mode='concat') creative = Dense(hidden_size, name = 'expansion', activation ='tanh')(merged) premise_decoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise')(prem_embeddings) hypo_decoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo')(hypo_embeddings) attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, feed_state = True, name='attention') ([hypo_decoder, premise_decoder, creative]) hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([attention, train_input]) inputs = [prem_input, hypo_input, train_input, class_input] model_name = 'version' + str(version) model = Model(input=inputs, output=(hs if version == 6 else [hs, vae]), name = model_name) if version == 6: model.compile(loss=hs_categorical_crossentropy, optimizer='adam') elif version == 7: def minimize(y_true, y_pred): return y_pred def metric(y_true, y_pred): return K.mean(y_pred) model.compile(loss=[hs_categorical_crossentropy, minimize], metrics={'hs':word_loss, 'vae_output': metric}, optimizer='adam') return model
def baseline_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version): prem_input = Input(shape=(None,), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input') noise_input = Input(shape=(1,), dtype='int32', name='noise_input') train_input = Input(shape=(None,), dtype='int32', name='train_input') class_input = Input(shape=(3,), name='class_input') concat_dim = hidden_size + noise_dim + 3 prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_layer = LSTM(output_dim=hidden_size, return_sequences=False, inner_activation='sigmoid', name='premise')(prem_embeddings) noise_layer = Embedding(noise_examples, noise_dim, input_length = 1, name='noise_embeddings')(noise_input) flat_noise = Flatten(name='noise_flatten')(noise_layer) merged = merge([premise_layer, class_input, flat_noise], mode='concat') creative = Dense(concat_dim, name = 'cmerge')(merged) fake_merge = Lambda(lambda x:x[0], output_shape=lambda x:x[0])([hypo_embeddings, creative]) hypo_layer = FeedLSTM(output_dim=concat_dim, return_sequences=True, feed_layer = creative, inner_activation='sigmoid', name='attention')([fake_merge]) hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([hypo_layer, train_input]) inputs = [prem_input, hypo_input, noise_input, train_input, class_input] model_name = 'version' + str(version) model = Model(input=inputs, output=hs, name = model_name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') return model
def gen_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version): if version == 9: return baseline_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version) elif version == 6 or version == 7: return autoe_train(hidden_size, noise_dim, glove, hypo_len, version) prem_input = Input(shape=(None, ), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1, ), dtype='int32', name='hypo_input') noise_input = Input(shape=(1, ), dtype='int32', name='noise_input') train_input = Input(shape=(None, ), dtype='int32', name='train_input') class_input = Input(shape=(3, ), name='class_input') prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise')(prem_embeddings) hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo')(hypo_embeddings) noise_layer = Embedding(noise_examples, noise_dim, input_length=1, name='noise_embeddings')(noise_input) flat_noise = Flatten(name='noise_flatten')(noise_layer) if version == 8: create_input = merge([class_input, flat_noise], mode='concat') if version == 5: create_input = flat_noise creative = Dense(hidden_size, name='cmerge')(create_input) attention = LstmAttentionLayer( output_dim=hidden_size, return_sequences=True, feed_state=True, name='attention')([hypo_layer, premise_layer, creative]) hs = HierarchicalSoftmax(len(glove), trainable=True, name='hs')([attention, train_input]) inputs = [prem_input, hypo_input, noise_input, train_input, class_input] if version == 5: inputs = inputs[:4] model_name = 'version' + str(version) model = Model(input=inputs, output=hs, name=model_name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') return model
def baseline_test(train_model, glove, batch_size): version = int(train_model.name[-1]) hidden_size = train_model.get_layer('attention').output_shape[-1] premise_input = Input(batch_shape=(batch_size, None, None)) hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32') creative_input = Input(batch_shape=(batch_size, None)) train_input = Input(batch_shape=(batch_size, 1), dtype='int32') hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) hypo_layer = FeedLSTM(output_dim=hidden_size, return_sequences=True, stateful = True, trainable= False, feed_layer = premise_input, name='attention')([hypo_embeddings]) hs = HierarchicalSoftmax(len(glove), trainable = False, name ='hs')([hypo_layer, train_input]) inputs = [hypo_input, creative_input, train_input] outputs = [hs] model = Model(input=inputs, output=outputs, name=train_model.name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') update_gen_weights(model, train_model) f_inputs = [train_model.get_layer('noise_embeddings').output, train_model.get_layer('class_input').input, train_model.get_layer('prem_input').input] func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, allow_input_downcast=True) return model, None, func_noise
def attention_model(hidden_size, glove): prem_input = Input(shape=(None,), dtype='int32') hypo_input = Input(shape=(None,), dtype='int32') prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, None)(hypo_input) premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid')(prem_embeddings) hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid')(hypo_embeddings) attention = LstmAttentionLayer(output_dim = hidden_size) ([hypo_layer, premise_layer]) final_dense = Dense(3, activation='softmax')(attention) model = Model(input=[prem_input, hypo_input], output=final_dense) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def discriminator(glove, hidden_size): hypo_input = Input(shape=(None,), dtype='int32') embeds = make_fixed_embeddings(glove, None)(hypo_input) lstm = LSTM(hidden_size, inner_activation='sigmoid')(embeds) output = Dense(1, activation='sigmoid')(lstm) discriminator = Model([hypo_input], output) discriminator.compile(loss='binary_crossentropy', optimizer='adam') return discriminator
def discriminator(glove, hidden_size): hypo_input = Input(shape=(None, ), dtype='int32') embeds = make_fixed_embeddings(glove, None)(hypo_input) lstm = LSTM(hidden_size, inner_activation='sigmoid')(embeds) output = Dense(1, activation='sigmoid')(lstm) discriminator = Model([hypo_input], output) discriminator.compile(loss='binary_crossentropy', optimizer='adam') return discriminator
def create_f_model(examples, glove, hidden_size = 10, embed_size = 50, batch_size = 64, hs = True, ci = True, prem_len = 22, hypo_len = 12): batch_input_shape = (batch_size, prem_len, embed_size) em_model = Sequential() em_model.add(Embedding(examples, embed_size, input_length = 1, batch_input_shape=(batch_size,1))) em_model.add(Flatten()) em_model.add(Dense(embed_size)) em_model.add(RepeatVector(prem_len)) input_dim = embed_size * 2 if ci: input_dim += 3 seq2seq = AttentionSeq2seq( batch_input_shape = batch_input_shape, input_dim = input_dim, hidden_dim=embed_size, output_dim=embed_size, output_length=hypo_len, depth=1, bidirectional=False, ) class_model = Sequential() class_model.add(RepeatVector(prem_len)) graph = Graph() graph.add_input(name='premise_input', batch_input_shape=(batch_size, prem_len), dtype = 'int') graph.add_node(make_fixed_embeddings(glove, prem_len), name = 'word_vec', input='premise_input') graph.add_input(name='embed_input', batch_input_shape=(batch_size,1), dtype='int') graph.add_node(em_model, name='em_model', input='embed_input') seq_inputs = ['word_vec', 'em_model'] if ci: graph.add_input(name='class_input', batch_input_shape=(batch_size,3)) graph.add_node(class_model, name='class_model', input='class_input') seq_inputs += ['class_model'] graph.add_node(seq2seq, name='seq2seq', inputs=seq_inputs, merge_mode='concat') if hs: graph.add_input(name='train_input', batch_input_shape=(batch_size, hypo_len), dtype='int32') graph.add_node(HierarchicalSoftmax(len(glove), input_dim = embed_size, input_length = hypo_len), name = 'softmax', inputs=['seq2seq','train_input'], merge_mode = 'join') else: graph.add_node(TimeDistributedDense(len(glove)), name='tdd', input='seq2seq') graph.add_node(Activation('softmax'), name='softmax', input='tdd') graph.add_output(name='output', input='softmax') loss_fun = hs_categorical_crossentropy if hs else 'categorical_crossentropy' graph.compile(loss={'output':loss_fun}, optimizer='adam', sample_weight_modes={'output':'temporal'}) return graph
def gen_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version): if version == 9: return baseline_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version) elif version == 6 or version == 7: return autoe_train(hidden_size, noise_dim, glove, hypo_len, version) prem_input = Input(shape=(None,), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input') noise_input = Input(shape=(1,), dtype='int32', name='noise_input') train_input = Input(shape=(None,), dtype='int32', name='train_input') class_input = Input(shape=(3,), name='class_input') prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise')(prem_embeddings) hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo')(hypo_embeddings) noise_layer = Embedding(noise_examples, noise_dim, input_length = 1, name='noise_embeddings')(noise_input) flat_noise = Flatten(name='noise_flatten')(noise_layer) if version == 8: create_input = merge([class_input, flat_noise], mode='concat') if version == 5: create_input = flat_noise creative = Dense(hidden_size, name = 'cmerge')(create_input) attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, feed_state = True, name='attention') ([hypo_layer, premise_layer, creative]) hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([attention, train_input]) inputs = [prem_input, hypo_input, noise_input, train_input, class_input] if version == 5: inputs = inputs[:4] model_name = 'version' + str(version) model = Model(input=inputs, output=hs, name = model_name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') return model
def baseline_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version): prem_input = Input(shape=(None, ), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1, ), dtype='int32', name='hypo_input') noise_input = Input(shape=(1, ), dtype='int32', name='noise_input') train_input = Input(shape=(None, ), dtype='int32', name='train_input') class_input = Input(shape=(3, ), name='class_input') concat_dim = hidden_size + noise_dim + 3 prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_layer = LSTM(output_dim=hidden_size, return_sequences=False, inner_activation='sigmoid', name='premise')(prem_embeddings) noise_layer = Embedding(noise_examples, noise_dim, input_length=1, name='noise_embeddings')(noise_input) flat_noise = Flatten(name='noise_flatten')(noise_layer) merged = merge([premise_layer, class_input, flat_noise], mode='concat') creative = Dense(concat_dim, name='cmerge')(merged) fake_merge = Lambda(lambda x: x[0], output_shape=lambda x: x[0])( [hypo_embeddings, creative]) hypo_layer = FeedLSTM(output_dim=concat_dim, return_sequences=True, feed_layer=creative, inner_activation='sigmoid', name='attention')([fake_merge]) hs = HierarchicalSoftmax(len(glove), trainable=True, name='hs')([hypo_layer, train_input]) inputs = [prem_input, hypo_input, noise_input, train_input, class_input] model_name = 'version' + str(version) model = Model(input=inputs, output=hs, name=model_name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') return model
def gen_test(train_model, glove, batch_size): version = int(train_model.name[-1]) hidden_size = train_model.get_layer('premise').output_shape[-1] premise_input = Input(batch_shape=(batch_size, None, None)) hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32') creative_input = Input(batch_shape=(batch_size, None)) train_input = Input(batch_shape=(batch_size, 1), dtype='int32') hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) if version == 1 or version == 3 or version == 4: hypo_layer = LSTM(output_dim = hidden_size, return_sequences=True, stateful = True, unroll=True, trainable = False, inner_activation='sigmoid', name='hypo')(hypo_embeddings) elif version == 2: pre_hypo_layer = LSTM(output_dim=hidden_size - 3, return_sequences=True, stateful = True, trainable = False, inner_activation='sigmoid', name='hypo')(hypo_embeddings) class_input = Input(batch_shape=(64, 3,), name='class_input') class_repeat = RepeatVector(1)(class_input) hypo_layer = merge([pre_hypo_layer, class_repeat], mode='concat') attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, stateful = True, unroll =True, trainable = False, feed_state = False, name='attention') \ ([hypo_layer, premise_input, creative_input]) hs = HierarchicalSoftmax(len(glove), trainable = False, name ='hs')([attention, train_input]) inputs = [premise_input, hypo_input, creative_input, train_input] if version == 2: inputs.append(class_input) outputs = [hs] model = Model(input=inputs, output=outputs, name=train_model.name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') update_gen_weights(model, train_model) func_premise = theano.function([train_model.get_layer('prem_input').input], train_model.get_layer('premise').output, allow_input_downcast=True) if version == 1 or version == 4: f_inputs = [train_model.get_layer('noise_embeddings').output, train_model.get_layer('class_input').input] func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, allow_input_downcast=True) elif version == 2 or version == 3: noise = train_model.get_layer('noise_flatten') func_noise = theano.function([noise.get_input_at(0)], noise.output, allow_input_downcast=True) return model, func_premise, func_noise
def gen_test(train_model, glove, batch_size): version = int(train_model.name[-1]) if version == 9: return baseline_test(train_model, glove, batch_size) hidden_size = train_model.get_layer('premise').output_shape[-1] premise_input = Input(batch_shape=(batch_size, None, None)) hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32') creative_input = Input(batch_shape=(batch_size, None)) train_input = Input(batch_shape=(batch_size, 1), dtype='int32') hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) hypo_layer = LSTM(output_dim = hidden_size, return_sequences=True, stateful = True, unroll=False, trainable = False, inner_activation='sigmoid', name='hypo')(hypo_embeddings) att_inputs = [hypo_layer, premise_input] if version == 5 else [hypo_layer, premise_input, creative_input] attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, stateful = True, unroll =False, trainable = False, feed_state = False, name='attention') \ (att_inputs) hs = HierarchicalSoftmax(len(glove), trainable = False, name ='hs')([attention, train_input]) inputs = [premise_input, hypo_input, creative_input, train_input] outputs = [hs] model = Model(input=inputs, output=outputs, name=train_model.name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') update_gen_weights(model, train_model) func_premise = theano.function([train_model.get_layer('prem_input').input], train_model.get_layer('premise').output, allow_input_downcast=True) if version == 5 or version == 8: f_inputs = [train_model.get_layer('noise_embeddings').output] if version == 8: f_inputs += [train_model.get_layer('class_input').input] func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, allow_input_downcast=True) elif version == 6 or version == 7: noise_input = train_model.get_layer('reduction').output class_input = train_model.get_layer('class_input').input noise_output = train_model.get_layer('expansion').output func_noise = theano.function([noise_input, class_input], noise_output, allow_input_downcast=True, on_unused_input='ignore') return model, func_premise, func_noise
def create_o_train_model(examples, hidden_size, embed_size, glove, batch_size = 64, prem_len = 22, hypo_len = 13): premise_layer = LSTM(output_dim=hidden_size, return_sequences=True) hypo_layer = LSTM(output_dim= hidden_size, return_sequences=True) attention = LstmAttentionLayer(hidden_size, return_sequences=True, feed_state = True) noise_layer = Embedding(examples, embed_size, input_length = 1) graph = Graph() graph.add_input(name='premise_input', batch_input_shape = (batch_size, prem_len), dtype = 'int32') graph.add_node(make_fixed_embeddings(glove, prem_len), name = 'prem_word_vec', input='premise_input') graph.add_node(premise_layer, name = 'premise', input='prem_word_vec') graph.add_input(name='noise_input', batch_input_shape=(batch_size,1), dtype='int32') graph.add_node(noise_layer, name='noise_embeddings_pre', input='noise_input') graph.add_node(Flatten(), name='noise_embeddings', input='noise_embeddings_pre') graph.add_input(name='class_input', batch_input_shape=(batch_size, 3)) graph.add_node(Dense(hidden_size), inputs=['noise_embeddings', 'class_input'], name ='creative', merge_mode='concat') graph.add_input(name='hypo_input', batch_input_shape=(batch_size, hypo_len), dtype = 'int32') graph.add_node(make_fixed_embeddings(glove, hypo_len), name = 'hypo_word_vec', input='hypo_input') graph.add_node(hypo_layer, name = 'hypo', input='hypo_word_vec') graph.add_node(attention, name='attention', inputs=['premise', 'hypo', 'creative'], merge_mode='join') graph.add_input(name='train_input', batch_input_shape=(batch_size, hypo_len), dtype='int32') graph.add_node(HierarchicalSoftmax(len(glove), input_dim = hidden_size, input_length = hypo_len), name = 'softmax', inputs=['attention','train_input'], merge_mode = 'join') graph.add_output(name='output', input='softmax') graph.compile(loss={'output': hs_categorical_crossentropy}, optimizer='adam') return graph
def create_o_test_model(train_model, examples, hidden_size, embed_size, glove, batch_size = 64, prem_len = 22): graph = Graph() hypo_layer = LSTM(output_dim= hidden_size, batch_input_shape=(batch_size, 1, embed_size), return_sequences=True, stateful = True, trainable = False) graph.add_input(name='hypo_input', batch_input_shape=(batch_size, 1), dtype = 'int32') graph.add_node(make_fixed_embeddings(glove, 1), name = 'hypo_word_vec', input='hypo_input') graph.add_node(hypo_layer, name = 'hypo', input='hypo_word_vec') graph.add_input(name='premise', batch_input_shape=(batch_size, prem_len, embed_size)) graph.add_input(name='creative', batch_input_shape=(batch_size, embed_size)) attention = LstmAttentionLayer(hidden_size, return_sequences=True, stateful = True, trainable = False, feed_state = False) graph.add_node(attention, name='attention', inputs=['premise', 'hypo', 'creative'], merge_mode='join') graph.add_input(name='train_input', batch_input_shape=(batch_size, 1), dtype='int32') hs = HierarchicalSoftmax(len(glove), input_dim = hidden_size, input_length = 1, trainable = False) graph.add_node(hs, name = 'softmax', inputs=['attention','train_input'], merge_mode = 'join') graph.add_output(name='output', input='softmax') hypo_layer.set_weights(train_model.nodes['hypo'].get_weights()) attention.set_weights(train_model.nodes['attention'].get_weights()) hs.set_weights(train_model.nodes['softmax'].get_weights()) graph.compile(loss={'output': hs_categorical_crossentropy}, optimizer='adam') func_premise = theano.function([train_model.inputs['premise_input'].get_input()], train_model.nodes['premise'].get_output(False), allow_input_downcast=True) func_noise = theano.function([train_model.inputs['noise_input'].get_input(), train_model.inputs['class_input'].get_input()], train_model.nodes['creative'].get_output(False), allow_input_downcast=True) return graph, func_premise, func_noise
def baseline_test(train_model, glove, batch_size): version = int(train_model.name[-1]) hidden_size = train_model.get_layer('attention').output_shape[-1] premise_input = Input(batch_shape=(batch_size, None, None)) hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32') creative_input = Input(batch_shape=(batch_size, None)) train_input = Input(batch_shape=(batch_size, 1), dtype='int32') hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) hypo_layer = FeedLSTM(output_dim=hidden_size, return_sequences=True, stateful=True, trainable=False, feed_layer=premise_input, name='attention')([hypo_embeddings]) hs = HierarchicalSoftmax(len(glove), trainable=False, name='hs')([hypo_layer, train_input]) inputs = [hypo_input, creative_input, train_input] outputs = [hs] model = Model(input=inputs, output=outputs, name=train_model.name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') update_gen_weights(model, train_model) f_inputs = [ train_model.get_layer('noise_embeddings').output, train_model.get_layer('class_input').input, train_model.get_layer('prem_input').input ] func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, allow_input_downcast=True) return model, None, func_noise
def autoe_train(hidden_size, noise_dim, glove, hypo_len, version): prem_input = Input(shape=(None, ), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1, ), dtype='int32', name='hypo_input') train_input = Input(shape=(None, ), dtype='int32', name='train_input') class_input = Input(shape=(3, ), name='class_input') prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_encoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise_encoder')(prem_embeddings) hypo_encoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo_encoder')(hypo_embeddings) class_encoder = Dense(hidden_size, activation='tanh')(class_input) encoder = LstmAttentionLayer( output_dim=hidden_size, return_sequences=False, feed_state=True, name='encoder')([hypo_encoder, premise_encoder, class_encoder]) if version == 6: reduction = Dense(noise_dim, name='reduction', activation='tanh')(encoder) elif version == 7: z_mean = Dense(noise_dim, name='z_mean')(encoder) z_log_sigma = Dense(noise_dim, name='z_log_sigma')(encoder) def sampling(args): z_mean, z_log_sigma = args epsilon = K.random_normal(shape=( 64, noise_dim, ), mean=0., std=0.01) return z_mean + K.exp(z_log_sigma) * epsilon reduction = Lambda(sampling, output_shape=lambda sh: ( sh[0][0], noise_dim, ), name='reduction')([z_mean, z_log_sigma]) def vae_loss(args): z_mean, z_log_sigma = args return -0.5 * K.mean( 1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1) vae = Lambda(vae_loss, output_shape=lambda sh: ( sh[0][0], 1, ), name='vae_output')([z_mean, z_log_sigma]) merged = merge([class_input, reduction], mode='concat') creative = Dense(hidden_size, name='expansion', activation='tanh')(merged) premise_decoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise')(prem_embeddings) hypo_decoder = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo')(hypo_embeddings) attention = LstmAttentionLayer( output_dim=hidden_size, return_sequences=True, feed_state=True, name='attention')([hypo_decoder, premise_decoder, creative]) hs = HierarchicalSoftmax(len(glove), trainable=True, name='hs')([attention, train_input]) inputs = [prem_input, hypo_input, train_input, class_input] model_name = 'version' + str(version) model = Model(input=inputs, output=(hs if version == 6 else [hs, vae]), name=model_name) if version == 6: model.compile(loss=hs_categorical_crossentropy, optimizer='adam') elif version == 7: def minimize(y_true, y_pred): return y_pred def metric(y_true, y_pred): return K.mean(y_pred) model.compile(loss=[hs_categorical_crossentropy, minimize], metrics={ 'hs': word_loss, 'vae_output': metric }, optimizer='adam') return model
def gen_test(train_model, glove, batch_size): version = int(train_model.name[-1]) if version == 9: return baseline_test(train_model, glove, batch_size) hidden_size = train_model.get_layer('premise').output_shape[-1] premise_input = Input(batch_shape=(batch_size, None, None)) hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32') creative_input = Input(batch_shape=(batch_size, None)) train_input = Input(batch_shape=(batch_size, 1), dtype='int32') hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, stateful=True, unroll=False, trainable=False, inner_activation='sigmoid', name='hypo')(hypo_embeddings) att_inputs = [hypo_layer, premise_input] if version == 5 else [ hypo_layer, premise_input, creative_input ] attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, stateful = True, unroll =False, trainable = False, feed_state = False, name='attention') \ (att_inputs) hs = HierarchicalSoftmax(len(glove), trainable=False, name='hs')([attention, train_input]) inputs = [premise_input, hypo_input, creative_input, train_input] outputs = [hs] model = Model(input=inputs, output=outputs, name=train_model.name) model.compile(loss=hs_categorical_crossentropy, optimizer='adam') update_gen_weights(model, train_model) func_premise = theano.function([train_model.get_layer('prem_input').input], train_model.get_layer('premise').output, allow_input_downcast=True) if version == 5 or version == 8: f_inputs = [train_model.get_layer('noise_embeddings').output] if version == 8: f_inputs += [train_model.get_layer('class_input').input] func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, allow_input_downcast=True) elif version == 6 or version == 7: noise_input = train_model.get_layer('reduction').output class_input = train_model.get_layer('class_input').input noise_output = train_model.get_layer('expansion').output func_noise = theano.function([noise_input, class_input], noise_output, allow_input_downcast=True, on_unused_input='ignore') return model, func_premise, func_noise
def gen_train(noise_examples, hidden_size, glove, hypo_len, version = 1, control_layer = True, class_w = 0.1): noise_dim = hidden_size -3 if version == 1 else hidden_size prem_input = Input(shape=(None,), dtype='int32', name='prem_input') hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input') noise_input = Input(shape=(1,), dtype='int32', name='noise_input') train_input = Input(shape=(None,), dtype='int32', name='train_input') class_input = Input(shape=(3,), name='class_input') prem_embeddings = make_fixed_embeddings(glove, None)(prem_input) hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input) premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='premise')(prem_embeddings) if version == 1 or version == 3 or version == 4: hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, inner_activation='sigmoid', name='hypo')(hypo_embeddings) elif version == 2: pre_hypo_layer = LSTM(output_dim=hidden_size - 3, return_sequences=True, inner_activation='sigmoid', name='hypo')(hypo_embeddings) class_repeat = RepeatVector(hypo_len + 1)(class_input) hypo_layer = merge([pre_hypo_layer, class_repeat], mode='concat') noise_layer = Embedding(noise_examples, noise_dim, input_length = 1, name='noise_embeddings')(noise_input) flat_noise = Flatten(name='noise_flatten')(noise_layer) if version == 1: creative = merge([class_input, flat_noise], mode='concat', name = 'cmerge') elif version == 2 or version == 3: creative = flat_noise elif version == 4: W = [np.zeros((3, 150)), np.zeros(150)] W[0][0][:50] = np.ones(50) W[0][1][50:100] = np.ones(50) W[0][2][-50:] = np.ones(50) class_sig = Dense(noise_dim, name = 'class_sig', trainable = False)(class_input) creative = merge([flat_noise, class_sig], mode = 'mul', name='cmerge') attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, feed_state = True, name='attention') ([hypo_layer, premise_layer, creative]) hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([attention, train_input]) if control_layer: control_lstm = LstmAttentionLayer(output_dim=hidden_size)([attention, premise_layer]) control = Dense(3, activation='softmax', name='control')(control_lstm) inputs = [prem_input, hypo_input, noise_input, train_input, class_input] if version == 3: inputs = inputs[:4] outputs = [hs, control] if control_layer else [hs] model_name = 'version' + str(version) model = Model(input=inputs, output=outputs, name = model_name) if control_layer: model.compile(loss=[hs_categorical_crossentropy, 'categorical_crossentropy'], optimizer='adam', loss_weights = [1.0, class_w], metrics={'hs':word_loss, 'control':[cc_loss, 'acc']}) else: model.compile(loss=hs_categorical_crossentropy, optimizer='adam') if version == 4: model.get_layer('class_sig').set_weights(W) return model