def autoe_train(hidden_size, noise_dim, glove, hypo_len, version):

    prem_input = Input(shape=(None,), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input')
    train_input = Input(shape=(None,), dtype='int32', name='train_input')
    class_input = Input(shape=(3,), name='class_input')

    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)
    premise_encoder = LSTM(output_dim=hidden_size, return_sequences=True,
                            inner_activation='sigmoid', name='premise_encoder')(prem_embeddings)

    hypo_encoder = LSTM(output_dim=hidden_size, return_sequences=True,
                            inner_activation='sigmoid', name='hypo_encoder')(hypo_embeddings)
    class_encoder = Dense(hidden_size, activation='tanh')(class_input)

    encoder = LstmAttentionLayer(output_dim=hidden_size, return_sequences=False,
                  feed_state = True, name='encoder') ([hypo_encoder, premise_encoder, class_encoder])
    if version == 6:
        reduction = Dense(noise_dim, name='reduction', activation='tanh')(encoder)
    elif version == 7:
        z_mean = Dense(noise_dim, name='z_mean')(encoder)
        z_log_sigma = Dense(noise_dim, name='z_log_sigma')(encoder)
          
        def sampling(args):
            z_mean, z_log_sigma = args
            epsilon = K.random_normal(shape=(64, noise_dim,),
                              mean=0., std=0.01)
            return z_mean + K.exp(z_log_sigma) * epsilon
        reduction = Lambda(sampling, output_shape=lambda sh: (sh[0][0], noise_dim,), name = 'reduction')([z_mean, z_log_sigma])
        def vae_loss(args):
            z_mean, z_log_sigma = args
            return - 0.5 * K.mean(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)    
        vae = Lambda(vae_loss, output_shape=lambda sh: (sh[0][0], 1,), name = 'vae_output')([z_mean, z_log_sigma])

    merged = merge([class_input, reduction], mode='concat')
    creative = Dense(hidden_size, name = 'expansion', activation ='tanh')(merged)
    premise_decoder = LSTM(output_dim=hidden_size, return_sequences=True,
                            inner_activation='sigmoid', name='premise')(prem_embeddings)
    
    hypo_decoder = LSTM(output_dim=hidden_size, return_sequences=True,
                            inner_activation='sigmoid', name='hypo')(hypo_embeddings)
    attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True,
                     feed_state = True, name='attention') ([hypo_decoder, premise_decoder, creative])

    hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([attention, train_input])

    inputs = [prem_input, hypo_input, train_input, class_input]

    model_name = 'version' + str(version)
    model = Model(input=inputs, output=(hs if version == 6 else [hs, vae]), name = model_name)
    if version == 6:
        model.compile(loss=hs_categorical_crossentropy, optimizer='adam')
    elif version == 7:
        def minimize(y_true, y_pred):
            return y_pred
        def metric(y_true, y_pred):
            return K.mean(y_pred)
        model.compile(loss=[hs_categorical_crossentropy, minimize], metrics={'hs':word_loss, 'vae_output': metric}, optimizer='adam')
    return model
def baseline_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version):
    prem_input = Input(shape=(None,), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input')
    noise_input = Input(shape=(1,), dtype='int32', name='noise_input')
    train_input = Input(shape=(None,), dtype='int32', name='train_input')
    class_input = Input(shape=(3,), name='class_input')
    concat_dim = hidden_size + noise_dim + 3
    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)

    premise_layer = LSTM(output_dim=hidden_size, return_sequences=False,
                            inner_activation='sigmoid', name='premise')(prem_embeddings)
    
    noise_layer = Embedding(noise_examples, noise_dim,
                            input_length = 1, name='noise_embeddings')(noise_input)
    flat_noise = Flatten(name='noise_flatten')(noise_layer)    
    merged = merge([premise_layer, class_input, flat_noise], mode='concat')
    creative = Dense(concat_dim, name = 'cmerge')(merged)
    fake_merge = Lambda(lambda x:x[0], output_shape=lambda x:x[0])([hypo_embeddings, creative])
    hypo_layer = FeedLSTM(output_dim=concat_dim, return_sequences=True,
                         feed_layer = creative, inner_activation='sigmoid', 
                         name='attention')([fake_merge])

    hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([hypo_layer, train_input])
    inputs = [prem_input, hypo_input, noise_input, train_input, class_input]


    model_name = 'version' + str(version)
    model = Model(input=inputs, output=hs, name = model_name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')

    return model
def gen_train(noise_examples, hidden_size, noise_dim, glove, hypo_len,
              version):
    if version == 9:
        return baseline_train(noise_examples, hidden_size, noise_dim, glove,
                              hypo_len, version)
    elif version == 6 or version == 7:
        return autoe_train(hidden_size, noise_dim, glove, hypo_len, version)

    prem_input = Input(shape=(None, ), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1, ),
                       dtype='int32',
                       name='hypo_input')
    noise_input = Input(shape=(1, ), dtype='int32', name='noise_input')
    train_input = Input(shape=(None, ), dtype='int32', name='train_input')
    class_input = Input(shape=(3, ), name='class_input')

    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)
    premise_layer = LSTM(output_dim=hidden_size,
                         return_sequences=True,
                         inner_activation='sigmoid',
                         name='premise')(prem_embeddings)

    hypo_layer = LSTM(output_dim=hidden_size,
                      return_sequences=True,
                      inner_activation='sigmoid',
                      name='hypo')(hypo_embeddings)
    noise_layer = Embedding(noise_examples,
                            noise_dim,
                            input_length=1,
                            name='noise_embeddings')(noise_input)
    flat_noise = Flatten(name='noise_flatten')(noise_layer)
    if version == 8:
        create_input = merge([class_input, flat_noise], mode='concat')
    if version == 5:
        create_input = flat_noise

    creative = Dense(hidden_size, name='cmerge')(create_input)
    attention = LstmAttentionLayer(
        output_dim=hidden_size,
        return_sequences=True,
        feed_state=True,
        name='attention')([hypo_layer, premise_layer, creative])

    hs = HierarchicalSoftmax(len(glove), trainable=True,
                             name='hs')([attention, train_input])

    inputs = [prem_input, hypo_input, noise_input, train_input, class_input]
    if version == 5:
        inputs = inputs[:4]

    model_name = 'version' + str(version)
    model = Model(input=inputs, output=hs, name=model_name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')

    return model
def baseline_test(train_model, glove, batch_size):
    version = int(train_model.name[-1])
    hidden_size = train_model.get_layer('attention').output_shape[-1]    
    
    premise_input = Input(batch_shape=(batch_size, None, None))
    hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    creative_input = Input(batch_shape=(batch_size, None))
    train_input = Input(batch_shape=(batch_size, 1), dtype='int32')

    hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input)
    hypo_layer = FeedLSTM(output_dim=hidden_size, return_sequences=True, 
                         stateful = True, trainable= False, feed_layer = premise_input,
                         name='attention')([hypo_embeddings])
    hs = HierarchicalSoftmax(len(glove), trainable = False, name ='hs')([hypo_layer, train_input])

    inputs = [hypo_input, creative_input, train_input]
    outputs = [hs]

    model = Model(input=inputs, output=outputs, name=train_model.name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')

    update_gen_weights(model, train_model)
    f_inputs = [train_model.get_layer('noise_embeddings').output,
                    train_model.get_layer('class_input').input,
                    train_model.get_layer('prem_input').input]
    func_noise = theano.function(f_inputs,  train_model.get_layer('cmerge').output,
                                     allow_input_downcast=True)

    return model, None, func_noise
def attention_model(hidden_size, glove):
        
    prem_input = Input(shape=(None,), dtype='int32')
    hypo_input = Input(shape=(None,), dtype='int32')
    
    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, None)(hypo_input)
    premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid')(prem_embeddings)
    hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid')(hypo_embeddings)    
    attention = LstmAttentionLayer(output_dim = hidden_size) ([hypo_layer, premise_layer])
    final_dense = Dense(3, activation='softmax')(attention)
    
    model = Model(input=[prem_input, hypo_input], output=final_dense)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model  
def attention_model(hidden_size, glove):
        
    prem_input = Input(shape=(None,), dtype='int32')
    hypo_input = Input(shape=(None,), dtype='int32')
    
    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, None)(hypo_input)
    premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid')(prem_embeddings)
    hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid')(hypo_embeddings)    
    attention = LstmAttentionLayer(output_dim = hidden_size) ([hypo_layer, premise_layer])
    final_dense = Dense(3, activation='softmax')(attention)
    
    model = Model(input=[prem_input, hypo_input], output=final_dense)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model  
def discriminator(glove, hidden_size):
    
    hypo_input = Input(shape=(None,), dtype='int32')
    embeds = make_fixed_embeddings(glove, None)(hypo_input)
    lstm = LSTM(hidden_size, inner_activation='sigmoid')(embeds)
    output = Dense(1, activation='sigmoid')(lstm)
    discriminator = Model([hypo_input], output)
    discriminator.compile(loss='binary_crossentropy', optimizer='adam')
    return discriminator
Beispiel #8
0
def discriminator(glove, hidden_size):

    hypo_input = Input(shape=(None, ), dtype='int32')
    embeds = make_fixed_embeddings(glove, None)(hypo_input)
    lstm = LSTM(hidden_size, inner_activation='sigmoid')(embeds)
    output = Dense(1, activation='sigmoid')(lstm)
    discriminator = Model([hypo_input], output)
    discriminator.compile(loss='binary_crossentropy', optimizer='adam')
    return discriminator
def create_f_model(examples, glove, hidden_size = 10, embed_size = 50, batch_size = 64, 
                 hs = True, ci = True, prem_len = 22, hypo_len = 12):
    
    batch_input_shape = (batch_size, prem_len, embed_size)
    
    em_model = Sequential()    
    em_model.add(Embedding(examples, embed_size, input_length = 1, batch_input_shape=(batch_size,1)))
    em_model.add(Flatten())
    em_model.add(Dense(embed_size))
    em_model.add(RepeatVector(prem_len))
    
    input_dim = embed_size * 2
    if ci:
        input_dim += 3
    seq2seq = AttentionSeq2seq(
        batch_input_shape = batch_input_shape,
        input_dim = input_dim,
        hidden_dim=embed_size,
        output_dim=embed_size,
        output_length=hypo_len,
        depth=1,
        bidirectional=False,
    )

    class_model = Sequential()
    class_model.add(RepeatVector(prem_len))
    
    graph = Graph()
    graph.add_input(name='premise_input', batch_input_shape=(batch_size, prem_len), dtype = 'int')
    graph.add_node(make_fixed_embeddings(glove, prem_len), name = 'word_vec', input='premise_input')
    
    graph.add_input(name='embed_input', batch_input_shape=(batch_size,1), dtype='int')
    graph.add_node(em_model, name='em_model', input='embed_input')
    
    seq_inputs = ['word_vec', 'em_model']
    
    if ci:
        graph.add_input(name='class_input', batch_input_shape=(batch_size,3))
        graph.add_node(class_model, name='class_model', input='class_input')
        seq_inputs += ['class_model']
   
    graph.add_node(seq2seq, name='seq2seq', inputs=seq_inputs, merge_mode='concat')
    
    if hs: 
        graph.add_input(name='train_input', batch_input_shape=(batch_size, hypo_len), dtype='int32')
        graph.add_node(HierarchicalSoftmax(len(glove), input_dim = embed_size, input_length = hypo_len), 
                   name = 'softmax', inputs=['seq2seq','train_input'], 
                   merge_mode = 'join')
    else:
        graph.add_node(TimeDistributedDense(len(glove)), name='tdd', input='seq2seq')
        graph.add_node(Activation('softmax'), name='softmax', input='tdd')

    graph.add_output(name='output', input='softmax')
    loss_fun = hs_categorical_crossentropy if hs else 'categorical_crossentropy'
    graph.compile(loss={'output':loss_fun}, optimizer='adam', sample_weight_modes={'output':'temporal'})
    return graph
def gen_train(noise_examples, hidden_size, noise_dim, glove, hypo_len, version):
    if version == 9:
        return baseline_train(noise_examples, hidden_size, noise_dim, glove, 
                              hypo_len, version)        
    elif version == 6 or version == 7:
        return autoe_train(hidden_size, noise_dim, glove, hypo_len, version)

    prem_input = Input(shape=(None,), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input')
    noise_input = Input(shape=(1,), dtype='int32', name='noise_input')
    train_input = Input(shape=(None,), dtype='int32', name='train_input')
    class_input = Input(shape=(3,), name='class_input')
    
    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)
    premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid', name='premise')(prem_embeddings)
    
    hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid', name='hypo')(hypo_embeddings)
    noise_layer = Embedding(noise_examples, noise_dim, 
                            input_length = 1, name='noise_embeddings')(noise_input)
    flat_noise = Flatten(name='noise_flatten')(noise_layer)
    if version == 8:
        create_input = merge([class_input, flat_noise], mode='concat')
    if version == 5:
        create_input = flat_noise

    creative = Dense(hidden_size, name = 'cmerge')(create_input)
    attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, 
                    feed_state = True, name='attention') ([hypo_layer, premise_layer, creative])
               
    hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([attention, train_input])
    
    inputs = [prem_input, hypo_input, noise_input, train_input, class_input]
    if version == 5:
        inputs = inputs[:4]    

    model_name = 'version' + str(version)
    model = Model(input=inputs, output=hs, name = model_name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')              
    
    return model
def baseline_train(noise_examples, hidden_size, noise_dim, glove, hypo_len,
                   version):
    prem_input = Input(shape=(None, ), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1, ),
                       dtype='int32',
                       name='hypo_input')
    noise_input = Input(shape=(1, ), dtype='int32', name='noise_input')
    train_input = Input(shape=(None, ), dtype='int32', name='train_input')
    class_input = Input(shape=(3, ), name='class_input')
    concat_dim = hidden_size + noise_dim + 3
    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)

    premise_layer = LSTM(output_dim=hidden_size,
                         return_sequences=False,
                         inner_activation='sigmoid',
                         name='premise')(prem_embeddings)

    noise_layer = Embedding(noise_examples,
                            noise_dim,
                            input_length=1,
                            name='noise_embeddings')(noise_input)
    flat_noise = Flatten(name='noise_flatten')(noise_layer)
    merged = merge([premise_layer, class_input, flat_noise], mode='concat')
    creative = Dense(concat_dim, name='cmerge')(merged)
    fake_merge = Lambda(lambda x: x[0], output_shape=lambda x: x[0])(
        [hypo_embeddings, creative])
    hypo_layer = FeedLSTM(output_dim=concat_dim,
                          return_sequences=True,
                          feed_layer=creative,
                          inner_activation='sigmoid',
                          name='attention')([fake_merge])

    hs = HierarchicalSoftmax(len(glove), trainable=True,
                             name='hs')([hypo_layer, train_input])
    inputs = [prem_input, hypo_input, noise_input, train_input, class_input]

    model_name = 'version' + str(version)
    model = Model(input=inputs, output=hs, name=model_name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')

    return model
def gen_test(train_model, glove, batch_size):
    
    version = int(train_model.name[-1])
    
    hidden_size = train_model.get_layer('premise').output_shape[-1] 
    
    premise_input = Input(batch_shape=(batch_size, None, None))
    hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    creative_input = Input(batch_shape=(batch_size, None))
    train_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    
    hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) 
    
    if version == 1 or version == 3 or version == 4:
        hypo_layer = LSTM(output_dim = hidden_size, return_sequences=True, stateful = True, unroll=True,
            trainable = False, inner_activation='sigmoid', name='hypo')(hypo_embeddings)
    elif version == 2:
        pre_hypo_layer = LSTM(output_dim=hidden_size - 3, return_sequences=True, stateful = True, 
            trainable = False, inner_activation='sigmoid', name='hypo')(hypo_embeddings)
        class_input = Input(batch_shape=(64, 3,), name='class_input')
        class_repeat = RepeatVector(1)(class_input)
        hypo_layer = merge([pre_hypo_layer, class_repeat], mode='concat')     
    
    attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, stateful = True, unroll =True,
        trainable = False, feed_state = False, name='attention') \
            ([hypo_layer, premise_input, creative_input])

    hs = HierarchicalSoftmax(len(glove), trainable = False, name ='hs')([attention, train_input])
    
    
    inputs = [premise_input, hypo_input, creative_input, train_input]
    if version == 2:
        inputs.append(class_input)
    outputs = [hs]    
         
    model = Model(input=inputs, output=outputs, name=train_model.name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')
    
    update_gen_weights(model, train_model)
    
    func_premise = theano.function([train_model.get_layer('prem_input').input],
                                    train_model.get_layer('premise').output, 
                                    allow_input_downcast=True)
    if version == 1 or version == 4:   
        f_inputs = [train_model.get_layer('noise_embeddings').output,
                    train_model.get_layer('class_input').input]
        func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, 
                                     allow_input_downcast=True)                            
    elif version == 2 or version == 3:
        noise = train_model.get_layer('noise_flatten')
        func_noise = theano.function([noise.get_input_at(0)], noise.output, 
                                      allow_input_downcast=True) 
    return model, func_premise, func_noise
def gen_test(train_model, glove, batch_size):
    
    version = int(train_model.name[-1])
    if version == 9:
        return baseline_test(train_model, glove, batch_size)
    hidden_size = train_model.get_layer('premise').output_shape[-1] 
    
    premise_input = Input(batch_shape=(batch_size, None, None))
    hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    creative_input = Input(batch_shape=(batch_size, None))
    train_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    
    hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input) 
    
    hypo_layer = LSTM(output_dim = hidden_size, return_sequences=True, stateful = True, unroll=False,
            trainable = False, inner_activation='sigmoid', name='hypo')(hypo_embeddings)
    
    att_inputs = [hypo_layer, premise_input] if version == 5 else [hypo_layer, premise_input, creative_input] 
    attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, stateful = True, unroll =False,
        trainable = False, feed_state = False, name='attention') \
            (att_inputs)

    hs = HierarchicalSoftmax(len(glove), trainable = False, name ='hs')([attention, train_input])
    
    inputs = [premise_input, hypo_input, creative_input, train_input]
    outputs = [hs]    
         
    model = Model(input=inputs, output=outputs, name=train_model.name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')
    
    update_gen_weights(model, train_model)
    
    func_premise = theano.function([train_model.get_layer('prem_input').input],
                                    train_model.get_layer('premise').output, 
                                    allow_input_downcast=True)
    if version == 5 or version == 8:   
        f_inputs = [train_model.get_layer('noise_embeddings').output]
        if version == 8:
            f_inputs += [train_model.get_layer('class_input').input]
       
        func_noise = theano.function(f_inputs, train_model.get_layer('cmerge').output, 
                                     allow_input_downcast=True)                            
    elif version == 6 or version == 7:
        noise_input = train_model.get_layer('reduction').output
        class_input = train_model.get_layer('class_input').input
        noise_output = train_model.get_layer('expansion').output
         
        func_noise = theano.function([noise_input, class_input], noise_output, 
                                      allow_input_downcast=True, on_unused_input='ignore') 
              
    return model, func_premise, func_noise
def create_o_train_model(examples, hidden_size, embed_size, glove, batch_size = 64, prem_len = 22, hypo_len = 13):
   
    premise_layer = LSTM(output_dim=hidden_size, return_sequences=True)
   
    hypo_layer = LSTM(output_dim= hidden_size, return_sequences=True)
    attention = LstmAttentionLayer(hidden_size, return_sequences=True, feed_state = True)
    noise_layer = Embedding(examples, embed_size, input_length = 1)
    

    graph = Graph()
    graph.add_input(name='premise_input', batch_input_shape = (batch_size, prem_len), dtype = 'int32')
    graph.add_node(make_fixed_embeddings(glove, prem_len), name = 'prem_word_vec', input='premise_input')
    graph.add_node(premise_layer, name = 'premise', input='prem_word_vec')
    
    graph.add_input(name='noise_input', batch_input_shape=(batch_size,1), dtype='int32')
    graph.add_node(noise_layer, name='noise_embeddings_pre', input='noise_input')
    graph.add_node(Flatten(), name='noise_embeddings', input='noise_embeddings_pre')
    
    graph.add_input(name='class_input', batch_input_shape=(batch_size, 3))
    graph.add_node(Dense(hidden_size), inputs=['noise_embeddings', 'class_input'], name ='creative', merge_mode='concat')
    
    graph.add_input(name='hypo_input', batch_input_shape=(batch_size, hypo_len), dtype = 'int32')
    graph.add_node(make_fixed_embeddings(glove, hypo_len), name = 'hypo_word_vec', input='hypo_input')
    graph.add_node(hypo_layer, name = 'hypo', input='hypo_word_vec')
    
    graph.add_node(attention, name='attention', inputs=['premise', 'hypo', 'creative'], 
                   merge_mode='join')
    
    graph.add_input(name='train_input', batch_input_shape=(batch_size, hypo_len), dtype='int32')
    graph.add_node(HierarchicalSoftmax(len(glove), input_dim = hidden_size, input_length = hypo_len), 
                   name = 'softmax', inputs=['attention','train_input'], 
                   merge_mode = 'join')
    graph.add_output(name='output', input='softmax')
    
    graph.compile(loss={'output': hs_categorical_crossentropy}, optimizer='adam')
    return graph
def create_o_test_model(train_model, examples, hidden_size, embed_size, glove, batch_size = 64, prem_len = 22):
    
    
    graph = Graph()
    
    hypo_layer = LSTM(output_dim= hidden_size, batch_input_shape=(batch_size, 1, embed_size), 
                      return_sequences=True, stateful = True, trainable = False)
    
    
    graph.add_input(name='hypo_input', batch_input_shape=(batch_size, 1), dtype = 'int32')
    graph.add_node(make_fixed_embeddings(glove, 1), name = 'hypo_word_vec', input='hypo_input')
    graph.add_node(hypo_layer, name = 'hypo', input='hypo_word_vec')
    
    graph.add_input(name='premise', batch_input_shape=(batch_size, prem_len, embed_size))
    graph.add_input(name='creative', batch_input_shape=(batch_size, embed_size))
    
    attention = LstmAttentionLayer(hidden_size, return_sequences=True, stateful = True, trainable = False, feed_state = False)
    
    
    graph.add_node(attention, name='attention', inputs=['premise', 'hypo', 'creative'], merge_mode='join')
   
    
    graph.add_input(name='train_input', batch_input_shape=(batch_size, 1), dtype='int32')
    hs = HierarchicalSoftmax(len(glove), input_dim = hidden_size, input_length = 1, trainable = False)
    
    graph.add_node(hs, 
                   name = 'softmax', inputs=['attention','train_input'], 
                   merge_mode = 'join')
    graph.add_output(name='output', input='softmax')
    
    hypo_layer.set_weights(train_model.nodes['hypo'].get_weights())
    attention.set_weights(train_model.nodes['attention'].get_weights())
    hs.set_weights(train_model.nodes['softmax'].get_weights())    
    
    graph.compile(loss={'output': hs_categorical_crossentropy}, optimizer='adam')
    
    func_premise = theano.function([train_model.inputs['premise_input'].get_input()],
                                    train_model.nodes['premise'].get_output(False), 
                                    allow_input_downcast=True)
    func_noise = theano.function([train_model.inputs['noise_input'].get_input(),
                                  train_model.inputs['class_input'].get_input()],
                                  train_model.nodes['creative'].get_output(False),
                                  allow_input_downcast=True)                            

    return graph, func_premise, func_noise
def baseline_test(train_model, glove, batch_size):
    version = int(train_model.name[-1])
    hidden_size = train_model.get_layer('attention').output_shape[-1]

    premise_input = Input(batch_shape=(batch_size, None, None))
    hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    creative_input = Input(batch_shape=(batch_size, None))
    train_input = Input(batch_shape=(batch_size, 1), dtype='int32')

    hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input)
    hypo_layer = FeedLSTM(output_dim=hidden_size,
                          return_sequences=True,
                          stateful=True,
                          trainable=False,
                          feed_layer=premise_input,
                          name='attention')([hypo_embeddings])
    hs = HierarchicalSoftmax(len(glove), trainable=False,
                             name='hs')([hypo_layer, train_input])

    inputs = [hypo_input, creative_input, train_input]
    outputs = [hs]

    model = Model(input=inputs, output=outputs, name=train_model.name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')

    update_gen_weights(model, train_model)
    f_inputs = [
        train_model.get_layer('noise_embeddings').output,
        train_model.get_layer('class_input').input,
        train_model.get_layer('prem_input').input
    ]
    func_noise = theano.function(f_inputs,
                                 train_model.get_layer('cmerge').output,
                                 allow_input_downcast=True)

    return model, None, func_noise
def autoe_train(hidden_size, noise_dim, glove, hypo_len, version):

    prem_input = Input(shape=(None, ), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1, ),
                       dtype='int32',
                       name='hypo_input')
    train_input = Input(shape=(None, ), dtype='int32', name='train_input')
    class_input = Input(shape=(3, ), name='class_input')

    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)
    premise_encoder = LSTM(output_dim=hidden_size,
                           return_sequences=True,
                           inner_activation='sigmoid',
                           name='premise_encoder')(prem_embeddings)

    hypo_encoder = LSTM(output_dim=hidden_size,
                        return_sequences=True,
                        inner_activation='sigmoid',
                        name='hypo_encoder')(hypo_embeddings)
    class_encoder = Dense(hidden_size, activation='tanh')(class_input)

    encoder = LstmAttentionLayer(
        output_dim=hidden_size,
        return_sequences=False,
        feed_state=True,
        name='encoder')([hypo_encoder, premise_encoder, class_encoder])
    if version == 6:
        reduction = Dense(noise_dim, name='reduction',
                          activation='tanh')(encoder)
    elif version == 7:
        z_mean = Dense(noise_dim, name='z_mean')(encoder)
        z_log_sigma = Dense(noise_dim, name='z_log_sigma')(encoder)

        def sampling(args):
            z_mean, z_log_sigma = args
            epsilon = K.random_normal(shape=(
                64,
                noise_dim,
            ),
                                      mean=0.,
                                      std=0.01)
            return z_mean + K.exp(z_log_sigma) * epsilon

        reduction = Lambda(sampling,
                           output_shape=lambda sh: (
                               sh[0][0],
                               noise_dim,
                           ),
                           name='reduction')([z_mean, z_log_sigma])

        def vae_loss(args):
            z_mean, z_log_sigma = args
            return -0.5 * K.mean(
                1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma),
                axis=-1)

        vae = Lambda(vae_loss,
                     output_shape=lambda sh: (
                         sh[0][0],
                         1,
                     ),
                     name='vae_output')([z_mean, z_log_sigma])

    merged = merge([class_input, reduction], mode='concat')
    creative = Dense(hidden_size, name='expansion', activation='tanh')(merged)
    premise_decoder = LSTM(output_dim=hidden_size,
                           return_sequences=True,
                           inner_activation='sigmoid',
                           name='premise')(prem_embeddings)

    hypo_decoder = LSTM(output_dim=hidden_size,
                        return_sequences=True,
                        inner_activation='sigmoid',
                        name='hypo')(hypo_embeddings)
    attention = LstmAttentionLayer(
        output_dim=hidden_size,
        return_sequences=True,
        feed_state=True,
        name='attention')([hypo_decoder, premise_decoder, creative])

    hs = HierarchicalSoftmax(len(glove), trainable=True,
                             name='hs')([attention, train_input])

    inputs = [prem_input, hypo_input, train_input, class_input]

    model_name = 'version' + str(version)
    model = Model(input=inputs,
                  output=(hs if version == 6 else [hs, vae]),
                  name=model_name)
    if version == 6:
        model.compile(loss=hs_categorical_crossentropy, optimizer='adam')
    elif version == 7:

        def minimize(y_true, y_pred):
            return y_pred

        def metric(y_true, y_pred):
            return K.mean(y_pred)

        model.compile(loss=[hs_categorical_crossentropy, minimize],
                      metrics={
                          'hs': word_loss,
                          'vae_output': metric
                      },
                      optimizer='adam')
    return model
def gen_test(train_model, glove, batch_size):

    version = int(train_model.name[-1])
    if version == 9:
        return baseline_test(train_model, glove, batch_size)
    hidden_size = train_model.get_layer('premise').output_shape[-1]

    premise_input = Input(batch_shape=(batch_size, None, None))
    hypo_input = Input(batch_shape=(batch_size, 1), dtype='int32')
    creative_input = Input(batch_shape=(batch_size, None))
    train_input = Input(batch_shape=(batch_size, 1), dtype='int32')

    hypo_embeddings = make_fixed_embeddings(glove, 1)(hypo_input)

    hypo_layer = LSTM(output_dim=hidden_size,
                      return_sequences=True,
                      stateful=True,
                      unroll=False,
                      trainable=False,
                      inner_activation='sigmoid',
                      name='hypo')(hypo_embeddings)

    att_inputs = [hypo_layer, premise_input] if version == 5 else [
        hypo_layer, premise_input, creative_input
    ]
    attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, stateful = True, unroll =False,
        trainable = False, feed_state = False, name='attention') \
            (att_inputs)

    hs = HierarchicalSoftmax(len(glove), trainable=False,
                             name='hs')([attention, train_input])

    inputs = [premise_input, hypo_input, creative_input, train_input]
    outputs = [hs]

    model = Model(input=inputs, output=outputs, name=train_model.name)
    model.compile(loss=hs_categorical_crossentropy, optimizer='adam')

    update_gen_weights(model, train_model)

    func_premise = theano.function([train_model.get_layer('prem_input').input],
                                   train_model.get_layer('premise').output,
                                   allow_input_downcast=True)
    if version == 5 or version == 8:
        f_inputs = [train_model.get_layer('noise_embeddings').output]
        if version == 8:
            f_inputs += [train_model.get_layer('class_input').input]

        func_noise = theano.function(f_inputs,
                                     train_model.get_layer('cmerge').output,
                                     allow_input_downcast=True)
    elif version == 6 or version == 7:
        noise_input = train_model.get_layer('reduction').output
        class_input = train_model.get_layer('class_input').input
        noise_output = train_model.get_layer('expansion').output

        func_noise = theano.function([noise_input, class_input],
                                     noise_output,
                                     allow_input_downcast=True,
                                     on_unused_input='ignore')

    return model, func_premise, func_noise
def gen_train(noise_examples, hidden_size, glove, hypo_len, version = 1, 
                 control_layer = True, class_w = 0.1):
    
    noise_dim = hidden_size -3 if version == 1 else hidden_size
    prem_input = Input(shape=(None,), dtype='int32', name='prem_input')
    hypo_input = Input(shape=(hypo_len + 1,), dtype='int32', name='hypo_input')
    noise_input = Input(shape=(1,), dtype='int32', name='noise_input')
    train_input = Input(shape=(None,), dtype='int32', name='train_input')
    class_input = Input(shape=(3,), name='class_input')
    
    prem_embeddings = make_fixed_embeddings(glove, None)(prem_input)
    hypo_embeddings = make_fixed_embeddings(glove, hypo_len + 1)(hypo_input)
    premise_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid', name='premise')(prem_embeddings)
    
    if version == 1 or version == 3 or version == 4:
        hypo_layer = LSTM(output_dim=hidden_size, return_sequences=True, 
                            inner_activation='sigmoid', name='hypo')(hypo_embeddings)
    elif version == 2:
        pre_hypo_layer = LSTM(output_dim=hidden_size - 3, return_sequences=True, 
                            inner_activation='sigmoid', name='hypo')(hypo_embeddings)
        class_repeat = RepeatVector(hypo_len + 1)(class_input)
        hypo_layer = merge([pre_hypo_layer, class_repeat], mode='concat')
    
    noise_layer = Embedding(noise_examples, noise_dim, 
                            input_length = 1, name='noise_embeddings')(noise_input)
    flat_noise = Flatten(name='noise_flatten')(noise_layer)
    if version == 1:
        creative = merge([class_input, flat_noise], mode='concat', name = 'cmerge')
    elif version == 2 or version == 3:
        creative = flat_noise
    elif version == 4:
        W  = [np.zeros((3, 150)), np.zeros(150)]
        W[0][0][:50] = np.ones(50)
        W[0][1][50:100] = np.ones(50)
        W[0][2][-50:] = np.ones(50)
      
        class_sig = Dense(noise_dim, name = 'class_sig', trainable = False)(class_input)
        creative = merge([flat_noise, class_sig], mode = 'mul', name='cmerge')
            
    attention = LstmAttentionLayer(output_dim=hidden_size, return_sequences=True, 
                    feed_state = True, name='attention') ([hypo_layer, premise_layer, creative])
               
    hs = HierarchicalSoftmax(len(glove), trainable = True, name='hs')([attention, train_input])
    
    if control_layer: 
        control_lstm = LstmAttentionLayer(output_dim=hidden_size)([attention, premise_layer])
        control = Dense(3, activation='softmax', name='control')(control_lstm)
    
    inputs = [prem_input, hypo_input, noise_input, train_input, class_input]
    if version == 3:
        inputs = inputs[:4]
    outputs = [hs, control] if control_layer else [hs]         
    
    model_name = 'version' + str(version)
    model = Model(input=inputs, output=outputs, name = model_name)
    if control_layer:                                                          
        model.compile(loss=[hs_categorical_crossentropy, 'categorical_crossentropy'],  
                      optimizer='adam', loss_weights = [1.0, class_w],
                      metrics={'hs':word_loss, 'control':[cc_loss, 'acc']})
    else:                                                                              
        model.compile(loss=hs_categorical_crossentropy, optimizer='adam')              
    if version == 4:
        model.get_layer('class_sig').set_weights(W) 
    return model