Exemple #1
0
def prep_model(model, N, s0pad, s1pad, c):
    model.add_shared_node(name='bow',
                          inputs=['e0_', 'e1_'],
                          outputs=['e0b', 'e1b'],
                          layer=TimeDistributedMerge(mode='ave'))
    bow_last = ('e0b', 'e1b')

    for i in range(c['deep']):
        bow_next = ('e0b[%d]' % (i, ), 'e1b[%d]' % (i, ))
        model.add_shared_node(name='deep[%d]' % (i, ),
                              inputs=bow_last,
                              outputs=bow_next,
                              layer=Dense(output_dim=N,
                                          init=c['nninit'],
                                          activation=c['nnact'],
                                          W_regularizer=l2(c['l2reg'])))
        bow_last = bow_next

    # Projection
    if c['project']:
        model.add_shared_node(name='proj',
                              inputs=bow_last,
                              outputs=['e0p', 'e1p'],
                              layer=Dense(input_dim=N,
                                          output_dim=int(N * c['pdim']),
                                          activation=c['pact'],
                                          W_regularizer=l2(c['l2reg'])))
        return ('e0p', 'e1p')
    else:
        return bow_last
Exemple #2
0
def build_model(args):
    np.random.seed(args.seed)

    graph = Graph()

    graph.add_input('input', input_shape=(args.input_width, ), dtype='int')

    graph.add_node(build_embedding_layer(args),
                   input='input',
                   name='embedding')

    graph.add_node(LSTM(args.n_units,
                        truncate_gradient=args.truncate_gradient,
                        return_sequences=True),
                   input='embedding',
                   name='lstm0')

    graph.add_node(LSTM(args.n_units,
                        truncate_gradient=args.truncate_gradient,
                        return_sequences=True),
                   input='lstm0',
                   name='lstm1')

    # Attention module.
    graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
                   input='lstm1',
                   name='attention0')
    graph.add_node(TimeDistributedDense(args.n_units, activation='relu'),
                   input='attention0',
                   name='attention1')
    graph.add_node(TimeDistributedDense(args.n_units, activation='softmax'),
                   input='attention1',
                   name='attention2')

    # Apply mask from output of attention module to LSTM output.
    graph.add_node(TimeDistributedMerge(mode='sum'),
                   inputs=['lstm1', 'attention2'],
                   name='applyattn',
                   merge_mode='mul')

    graph.add_node(Dense(args.n_classes, activation='softmax'),
                   input='applyattn',
                   name='softmax')

    graph.add_output(input='softmax', name='output')

    load_weights(args, graph)

    optimizer = build_optimizer(args)

    graph.compile(loss={'output': args.loss}, optimizer=optimizer)

    return graph
Exemple #3
0
def hybrid_model(W):
    '''
    This function return a hybrid model of cnn and dan
    :param W: initial weights of the embedding layer
    :return: model
    '''
    max_features = W.shape[0]
    N_fm = 300
    # kernel size of convolutional layer
    kernel_size = 8
    conv_input_width = W.shape[1]
    conv_input_height = 200  # maxlen of sentence

    cnn = Sequential()
    cnn.add(Embedding(input_dim=max_features, output_dim=300, weights=[W]))
    cnn.add(Dropout(.5))
    cnn.add(Reshape(dims=(1, conv_input_height, conv_input_width)))
    # first convolutional layer
    cnn.add(
        Convolution2D(nb_filter=N_fm,
                      nb_row=kernel_size,
                      nb_col=conv_input_width,
                      border_mode='valid',
                      W_regularizer=l2(0.0001),
                      activation='relu'))
    # ReLU activation
    cnn.add(Dropout(0.5))
    # aggregate data in every feature map to scalar using MAX operation
    cnn.add(
        MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1),
                     border_mode='valid'))
    cnn.add(Dropout(0.5))
    cnn.add(Flatten())
    cnn.add(Dense(output_dim=N_fm, activation='relu'))

    dan = Sequential()
    dan.add(Embedding(input_dim=max_features, output_dim=300, weights=[W]))
    dan.add(Dropout(.5))
    dan.add(TimeDistributedMerge(mode='ave'))
    dan.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    dan.add(Dropout(.5))
    dan.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    dan.add(Dropout(.5))
    dan.add(Dense(input_dim=300, output_dim=300, activation='relu'))

    model = Sequential()
    model.add(Merge([cnn, dan], mode='sum'))
    model.add(Dense(300, activation='relu'))
    dan.add(Dropout(.5))
    model.add(Dense(2, activation='softmax'))
    return model
Exemple #4
0
def dan_simplified(max_features, weights=None):
    '''
    DAN model with pre-trained embeddings, just use one non-linear layer
    :param max_features: the number of words
    :return: keras model
    '''
    print('Build model...')
    model = Sequential()
    model.add(Embedding(input_dim=max_features, output_dim=300, weights=[weights]))
    model.add(TimeDistributedMerge(mode='ave'))
    model.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(input_dim=300, output_dim=1, activation='sigmoid'))
    return model
Exemple #5
0
def rmv(W=None):
    max_features = W.shape[
        0]  # weights.shape = (vocabulary size, vector dimension)
    print('Build model...')
    model = Sequential()
    model.add(Embedding(input_dim=max_features, output_dim=300, weights=[W]))
    model.add(Dropout(.5))
    model.add(TimeDistributedMerge(mode='ave'))
    # model.add(Dense(input_dim=300, output_dim=300, activation='relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    # model.add(Dropout(.4))
    # model.add(Dense(input_dim=300, output_dim=300, activation='relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    # model.add(Dropout(.2))
    # # model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    # # model.add(Dropout(.2))
    model.add(Dense(input_dim=300, output_dim=1, activation='linear'))
    return model
Exemple #6
0
def dan_dropout_position(weights=None):
    '''
    DAN model with pre-trained embeddings, the position of dropout is changed and the dropuout rate is 0.3
    :param max_features: the number of words
    :return: keras model
    '''
    max_features = weights.shape[0]
    print('Build model...')
    model = Sequential()
    model.add(Embedding(input_dim=max_features, output_dim=300, weights=[weights]))
    model.add(TimeDistributedMerge(mode='ave'))
    model.add(Dropout(.5))
    model.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(input_dim=300, output_dim=1, activation='sigmoid'))
    return model
Exemple #7
0
def dan_original(max_features):
    '''
    DAN model
    :param max_features: the number of words
    :return: keras model
    '''
    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 300))
    model.add(TimeDistributedMerge(mode='ave'))
    model.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(input_dim=300, output_dim=300, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(input_dim=300, output_dim=1, activation='sigmoid'))
    return model
Exemple #8
0
    def __prepare_model(self):
        print('Build model...')
        model = Sequential()
        model.add(
            TimeDistributedDense(output_dim=self.hidden_cnt,
                                 input_dim=self.input_dim,
                                 input_length=self.input_length,
                                 activation='sigmoid'))
        model.add(TimeDistributedMerge(mode='ave'))
        model.add(Dropout(0.5))
        model.add(Dense(self.hidden_cnt, activation='tanh'))
        model.add(Dense(self.output_dim, activation='softmax'))

        # try using different optimizers and different optimizer configs
        print('Compile model...')
        sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=sgd)
        return model
Exemple #9
0
def dan_pre_trained(weights=None, p1=0.5,p2=0.4,p3=0.2):
    '''
    DAN model with pre-trained embeddings
    :param max_features: the number of words
    :return: keras model
    '''
    max_features = weights.shape[0]      # weights.shape = (vocabulary size, vector dimension)
    print('Build model...')
    model = Sequential()
    model.add(Embedding(input_dim = max_features, output_dim = 300, weights=[weights], W_regularizer=l2(1e-5)))
    model.add(Dropout(p1))
    model.add(TimeDistributedMerge(mode='ave'))
    model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    model.add(Dropout(p2))
    model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    model.add(Dropout(p3))
    # model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    # model.add(Dropout(.2))
    model.add(Dense(input_dim=300, output_dim=2, activation = 'softmax', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5)))
    return model
Exemple #10
0
I/P               dim = 1*12*3

LSTM              dim = 1*12

TDD               dim = 1*12*8

TDM               dim = 1*8
"""

model = Sequential()
model.add(
    LSTM(tsteps,
         batch_input_shape=(batch_size, tsteps, attsize),
         return_sequences=True))
model.add(TimeDistributedDense(8))
model.add(TimeDistributedMerge('sum'))
model.compile(loss='mse', optimizer='rmsprop')

print "Network Built Sucessfully"

print "Training"
for j in range(epochs):
    for i in range(len(train_inps)):
        model.fit(np.array([train_inps[i]]),
                  np.array([train_outs[i]]),
                  verbose=1,
                  nb_epoch=j)

print "Finished Training"

open('lstm_y_8_10epch.json', 'w').write(model.to_json())
Exemple #11
0
def CreateGraph(emb_dim, hops, activation, mlp_unit, mlp_layer, word_vec_dim,
                aspect_dim, img_dim, emb_size, polarity_num):
    # model
    model = Graph()
    model.add_input(name='sentence', input_shape=(emb_size, img_dim))
    model.add_input(name='aspect', input_shape=(aspect_dim, ))

    model.add_node(TimeDistributedDense(emb_dim),
                   name='embA',
                   input='sentence')
    model.add_node(TimeDistributedDense(emb_dim),
                   name='embB',
                   input='sentence')
    model.add_node(Dense(emb_dim), name='embC0', input='aspect')

    for i in range(hops):
        model.add_node(Lambda(transpose,
                              input_shape=(emb_size, emb_dim),
                              output_shape=(emb_dim, emb_size)),
                       name='tmp%i_0' % i,
                       input='embA')
        model.add_node(RepeatVector(emb_size),
                       name='tmp%i_1' % i,
                       input='embC%i' % i)
        model.add_node(Lambda(transpose, output_shape=(emb_dim, emb_size)),
                       name='tmp%i_2' % i,
                       input='tmp%i_1' % i)
        model.add_node(Layer(),
                       merge_mode='mul',
                       name='tmp%i_3' % i,
                       inputs=['tmp%i_0' % i, 'tmp%i_2' % i])
        model.add_node(TimeDistributedMerge(),
                       name='dot_%i' % i,
                       input='tmp%i_3' % i)
        model.add_node(Activation('softmax'),
                       name='weights_%i' % i,
                       input='dot_%i' % i)
        model.add_node(RepeatVector(emb_dim),
                       name='tmp%i_4' % i,
                       input='weights_%i' % i)
        model.add_node(Lambda(transpose, output_shape=(emb_size, emb_dim)),
                       name='tmp%i_5' % i,
                       input='tmp%i_4' % i)
        model.add_node(Layer(),
                       merge_mode='mul',
                       name='tmp%i_6' % i,
                       inputs=['embB', 'tmp%i_5' % i])
        model.add_node(TimeDistributedMerge(),
                       name='output_%i' % i,
                       input='tmp%i_6' % i)
        model.add_node(Layer(),
                       name='embC%i' % (i + 1),
                       merge_mode='sum',
                       inputs=['embC%i' % i, 'output_%i' % i])

    if mlp_layer == 0:
        model.add_node(Dense(word_vec_dim), name='mlp0', input='embC%i' % hops)
        model.add_output(name='output', input='mlp0')
        return model
    else:
        model.add_node(Dense(mlp_unit, activation=activation),
                       name='mlp0',
                       input='embC%i' % hops)

    if mlp_layer > 1:
        for j in range(mlp_layer - 1):
            model.add_node(Dense(mlp_unit, activation=activation),
                           name='mlp' + str(j + 1),
                           input='mlp' + str(j))
    model.add_node(Dense(polarity_num, activation='softmax'),
                   name='out',
                   input='mlp' + str(mlp_layer - 1))
    model.add_output(name='output', input='out')
    return model
Exemple #12
0
def CreateGraph(emb_dim, hops, activation, mlp_unit, mlp_layer, word_vec_dim,
                img_dim, emb_size, dropout):
    # model
    model = Graph()
    model.add_input(name='image', input_shape=(emb_size, img_dim))
    model.add_input(name='question', input_shape=(30, word_vec_dim))
    model.add_node(LSTM(output_dim=word_vec_dim,
                        return_sequences=False,
                        input_shape=(30, word_vec_dim)),
                   name='query',
                   input='question')

    model.add_node(TimeDistributedDense(emb_dim), name='embA', input='image')
    model.add_node(TimeDistributedDense(emb_dim), name='embB', input='image')
    model.add_node(Dense(emb_dim), name='embC0', input='query')

    for i in range(hops):
        model.add_node(Lambda(transpose,
                              input_shape=(emb_size, emb_dim),
                              output_shape=(emb_dim, emb_size)),
                       name='tmp%i_0' % i,
                       input='embA')
        model.add_node(RepeatVector(emb_size),
                       name='tmp%i_1' % i,
                       input='embC%i' % i)
        model.add_node(Lambda(transpose, output_shape=(emb_dim, emb_size)),
                       name='tmp%i_2' % i,
                       input='tmp%i_1' % i)
        model.add_node(Layer(),
                       merge_mode='mul',
                       name='tmp%i_3' % i,
                       inputs=['tmp%i_0' % i, 'tmp%i_2' % i])
        model.add_node(TimeDistributedMerge(),
                       name='dot_%i' % i,
                       input='tmp%i_3' % i)
        model.add_node(Activation('softmax'),
                       name='weights_%i' % i,
                       input='dot_%i' % i)
        model.add_node(RepeatVector(emb_dim),
                       name='tmp%i_4' % i,
                       input='weights_%i' % i)
        model.add_node(Lambda(transpose, output_shape=(emb_size, emb_dim)),
                       name='tmp%i_5' % i,
                       input='tmp%i_4' % i)
        model.add_node(Layer(),
                       merge_mode='mul',
                       name='tmp%i_6' % i,
                       inputs=['embB', 'tmp%i_5' % i])
        model.add_node(TimeDistributedMerge(),
                       name='output_%i' % i,
                       input='tmp%i_6' % i)
        model.add_node(Layer(),
                       name='embC%i' % (i + 1),
                       merge_mode='sum',
                       inputs=['embC%i' % i, 'output_%i' % i])

    if mlp_layer == 0:
        model.add_node(Dense(word_vec_dim), name='mlp0', input='embC%i' % hops)
        model.add_output(name='output', input='mlp0')
        return model
    else:
        model.add_node(Dense(mlp_unit, activation=activation),
                       name='mlp0',
                       input='embC%i' % hops)
        model.add_node(Dropout(dropout), name='dropout0', input='mlp0')
    if mlp_layer > 1:
        for j in range(mlp_layer - 1):
            model.add_node(Dense(mlp_unit, activation=activation),
                           name='mlp%i' % (j + 1),
                           input='dropout%i' % j)
            model.add_node(Dropout(dropout),
                           name='dropout%i' % (j + 1),
                           input='mlp%i' % (j + 1))
    model.add_node(Dense(word_vec_dim),
                   name='out',
                   input='dropout%i' % (mlp_layer - 1))
    model.add_output(name='output', input='out')
    return model