def prep_model(model, N, s0pad, s1pad, c): model.add_shared_node(name='bow', inputs=['e0_', 'e1_'], outputs=['e0b', 'e1b'], layer=TimeDistributedMerge(mode='ave')) bow_last = ('e0b', 'e1b') for i in range(c['deep']): bow_next = ('e0b[%d]' % (i, ), 'e1b[%d]' % (i, )) model.add_shared_node(name='deep[%d]' % (i, ), inputs=bow_last, outputs=bow_next, layer=Dense(output_dim=N, init=c['nninit'], activation=c['nnact'], W_regularizer=l2(c['l2reg']))) bow_last = bow_next # Projection if c['project']: model.add_shared_node(name='proj', inputs=bow_last, outputs=['e0p', 'e1p'], layer=Dense(input_dim=N, output_dim=int(N * c['pdim']), activation=c['pact'], W_regularizer=l2(c['l2reg']))) return ('e0p', 'e1p') else: return bow_last
def build_model(args): np.random.seed(args.seed) graph = Graph() graph.add_input('input', input_shape=(args.input_width, ), dtype='int') graph.add_node(build_embedding_layer(args), input='input', name='embedding') graph.add_node(LSTM(args.n_units, truncate_gradient=args.truncate_gradient, return_sequences=True), input='embedding', name='lstm0') graph.add_node(LSTM(args.n_units, truncate_gradient=args.truncate_gradient, return_sequences=True), input='lstm0', name='lstm1') # Attention module. graph.add_node(TimeDistributedDense(args.n_units, activation='relu'), input='lstm1', name='attention0') graph.add_node(TimeDistributedDense(args.n_units, activation='relu'), input='attention0', name='attention1') graph.add_node(TimeDistributedDense(args.n_units, activation='softmax'), input='attention1', name='attention2') # Apply mask from output of attention module to LSTM output. graph.add_node(TimeDistributedMerge(mode='sum'), inputs=['lstm1', 'attention2'], name='applyattn', merge_mode='mul') graph.add_node(Dense(args.n_classes, activation='softmax'), input='applyattn', name='softmax') graph.add_output(input='softmax', name='output') load_weights(args, graph) optimizer = build_optimizer(args) graph.compile(loss={'output': args.loss}, optimizer=optimizer) return graph
def hybrid_model(W): ''' This function return a hybrid model of cnn and dan :param W: initial weights of the embedding layer :return: model ''' max_features = W.shape[0] N_fm = 300 # kernel size of convolutional layer kernel_size = 8 conv_input_width = W.shape[1] conv_input_height = 200 # maxlen of sentence cnn = Sequential() cnn.add(Embedding(input_dim=max_features, output_dim=300, weights=[W])) cnn.add(Dropout(.5)) cnn.add(Reshape(dims=(1, conv_input_height, conv_input_width))) # first convolutional layer cnn.add( Convolution2D(nb_filter=N_fm, nb_row=kernel_size, nb_col=conv_input_width, border_mode='valid', W_regularizer=l2(0.0001), activation='relu')) # ReLU activation cnn.add(Dropout(0.5)) # aggregate data in every feature map to scalar using MAX operation cnn.add( MaxPooling2D(pool_size=(conv_input_height - kernel_size + 1, 1), border_mode='valid')) cnn.add(Dropout(0.5)) cnn.add(Flatten()) cnn.add(Dense(output_dim=N_fm, activation='relu')) dan = Sequential() dan.add(Embedding(input_dim=max_features, output_dim=300, weights=[W])) dan.add(Dropout(.5)) dan.add(TimeDistributedMerge(mode='ave')) dan.add(Dense(input_dim=300, output_dim=300, activation='relu')) dan.add(Dropout(.5)) dan.add(Dense(input_dim=300, output_dim=300, activation='relu')) dan.add(Dropout(.5)) dan.add(Dense(input_dim=300, output_dim=300, activation='relu')) model = Sequential() model.add(Merge([cnn, dan], mode='sum')) model.add(Dense(300, activation='relu')) dan.add(Dropout(.5)) model.add(Dense(2, activation='softmax')) return model
def dan_simplified(max_features, weights=None): ''' DAN model with pre-trained embeddings, just use one non-linear layer :param max_features: the number of words :return: keras model ''' print('Build model...') model = Sequential() model.add(Embedding(input_dim=max_features, output_dim=300, weights=[weights])) model.add(TimeDistributedMerge(mode='ave')) model.add(Dense(input_dim=300, output_dim=300, activation='relu')) model.add(Dropout(.5)) model.add(Dense(input_dim=300, output_dim=1, activation='sigmoid')) return model
def rmv(W=None): max_features = W.shape[ 0] # weights.shape = (vocabulary size, vector dimension) print('Build model...') model = Sequential() model.add(Embedding(input_dim=max_features, output_dim=300, weights=[W])) model.add(Dropout(.5)) model.add(TimeDistributedMerge(mode='ave')) # model.add(Dense(input_dim=300, output_dim=300, activation='relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) # model.add(Dropout(.4)) # model.add(Dense(input_dim=300, output_dim=300, activation='relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) # model.add(Dropout(.2)) # # model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) # # model.add(Dropout(.2)) model.add(Dense(input_dim=300, output_dim=1, activation='linear')) return model
def dan_dropout_position(weights=None): ''' DAN model with pre-trained embeddings, the position of dropout is changed and the dropuout rate is 0.3 :param max_features: the number of words :return: keras model ''' max_features = weights.shape[0] print('Build model...') model = Sequential() model.add(Embedding(input_dim=max_features, output_dim=300, weights=[weights])) model.add(TimeDistributedMerge(mode='ave')) model.add(Dropout(.5)) model.add(Dense(input_dim=300, output_dim=300, activation='relu')) model.add(Dropout(.5)) model.add(Dense(input_dim=300, output_dim=1, activation='sigmoid')) return model
def dan_original(max_features): ''' DAN model :param max_features: the number of words :return: keras model ''' print('Build model...') model = Sequential() model.add(Embedding(max_features, 300)) model.add(TimeDistributedMerge(mode='ave')) model.add(Dense(input_dim=300, output_dim=300, activation='relu')) model.add(Dropout(.5)) model.add(Dense(input_dim=300, output_dim=300, activation='relu')) model.add(Dropout(.5)) model.add(Dense(input_dim=300, output_dim=300, activation='relu')) model.add(Dropout(.5)) model.add(Dense(input_dim=300, output_dim=1, activation='sigmoid')) return model
def __prepare_model(self): print('Build model...') model = Sequential() model.add( TimeDistributedDense(output_dim=self.hidden_cnt, input_dim=self.input_dim, input_length=self.input_length, activation='sigmoid')) model.add(TimeDistributedMerge(mode='ave')) model.add(Dropout(0.5)) model.add(Dense(self.hidden_cnt, activation='tanh')) model.add(Dense(self.output_dim, activation='softmax')) # try using different optimizers and different optimizer configs print('Compile model...') sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd) return model
def dan_pre_trained(weights=None, p1=0.5,p2=0.4,p3=0.2): ''' DAN model with pre-trained embeddings :param max_features: the number of words :return: keras model ''' max_features = weights.shape[0] # weights.shape = (vocabulary size, vector dimension) print('Build model...') model = Sequential() model.add(Embedding(input_dim = max_features, output_dim = 300, weights=[weights], W_regularizer=l2(1e-5))) model.add(Dropout(p1)) model.add(TimeDistributedMerge(mode='ave')) model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) model.add(Dropout(p2)) model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) model.add(Dropout(p3)) # model.add(Dense(input_dim=300, output_dim=300, activation = 'relu', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) # model.add(Dropout(.2)) model.add(Dense(input_dim=300, output_dim=2, activation = 'softmax', W_regularizer=l2(1e-5), b_regularizer=l2(1e-5))) return model
I/P dim = 1*12*3 LSTM dim = 1*12 TDD dim = 1*12*8 TDM dim = 1*8 """ model = Sequential() model.add( LSTM(tsteps, batch_input_shape=(batch_size, tsteps, attsize), return_sequences=True)) model.add(TimeDistributedDense(8)) model.add(TimeDistributedMerge('sum')) model.compile(loss='mse', optimizer='rmsprop') print "Network Built Sucessfully" print "Training" for j in range(epochs): for i in range(len(train_inps)): model.fit(np.array([train_inps[i]]), np.array([train_outs[i]]), verbose=1, nb_epoch=j) print "Finished Training" open('lstm_y_8_10epch.json', 'w').write(model.to_json())
def CreateGraph(emb_dim, hops, activation, mlp_unit, mlp_layer, word_vec_dim, aspect_dim, img_dim, emb_size, polarity_num): # model model = Graph() model.add_input(name='sentence', input_shape=(emb_size, img_dim)) model.add_input(name='aspect', input_shape=(aspect_dim, )) model.add_node(TimeDistributedDense(emb_dim), name='embA', input='sentence') model.add_node(TimeDistributedDense(emb_dim), name='embB', input='sentence') model.add_node(Dense(emb_dim), name='embC0', input='aspect') for i in range(hops): model.add_node(Lambda(transpose, input_shape=(emb_size, emb_dim), output_shape=(emb_dim, emb_size)), name='tmp%i_0' % i, input='embA') model.add_node(RepeatVector(emb_size), name='tmp%i_1' % i, input='embC%i' % i) model.add_node(Lambda(transpose, output_shape=(emb_dim, emb_size)), name='tmp%i_2' % i, input='tmp%i_1' % i) model.add_node(Layer(), merge_mode='mul', name='tmp%i_3' % i, inputs=['tmp%i_0' % i, 'tmp%i_2' % i]) model.add_node(TimeDistributedMerge(), name='dot_%i' % i, input='tmp%i_3' % i) model.add_node(Activation('softmax'), name='weights_%i' % i, input='dot_%i' % i) model.add_node(RepeatVector(emb_dim), name='tmp%i_4' % i, input='weights_%i' % i) model.add_node(Lambda(transpose, output_shape=(emb_size, emb_dim)), name='tmp%i_5' % i, input='tmp%i_4' % i) model.add_node(Layer(), merge_mode='mul', name='tmp%i_6' % i, inputs=['embB', 'tmp%i_5' % i]) model.add_node(TimeDistributedMerge(), name='output_%i' % i, input='tmp%i_6' % i) model.add_node(Layer(), name='embC%i' % (i + 1), merge_mode='sum', inputs=['embC%i' % i, 'output_%i' % i]) if mlp_layer == 0: model.add_node(Dense(word_vec_dim), name='mlp0', input='embC%i' % hops) model.add_output(name='output', input='mlp0') return model else: model.add_node(Dense(mlp_unit, activation=activation), name='mlp0', input='embC%i' % hops) if mlp_layer > 1: for j in range(mlp_layer - 1): model.add_node(Dense(mlp_unit, activation=activation), name='mlp' + str(j + 1), input='mlp' + str(j)) model.add_node(Dense(polarity_num, activation='softmax'), name='out', input='mlp' + str(mlp_layer - 1)) model.add_output(name='output', input='out') return model
def CreateGraph(emb_dim, hops, activation, mlp_unit, mlp_layer, word_vec_dim, img_dim, emb_size, dropout): # model model = Graph() model.add_input(name='image', input_shape=(emb_size, img_dim)) model.add_input(name='question', input_shape=(30, word_vec_dim)) model.add_node(LSTM(output_dim=word_vec_dim, return_sequences=False, input_shape=(30, word_vec_dim)), name='query', input='question') model.add_node(TimeDistributedDense(emb_dim), name='embA', input='image') model.add_node(TimeDistributedDense(emb_dim), name='embB', input='image') model.add_node(Dense(emb_dim), name='embC0', input='query') for i in range(hops): model.add_node(Lambda(transpose, input_shape=(emb_size, emb_dim), output_shape=(emb_dim, emb_size)), name='tmp%i_0' % i, input='embA') model.add_node(RepeatVector(emb_size), name='tmp%i_1' % i, input='embC%i' % i) model.add_node(Lambda(transpose, output_shape=(emb_dim, emb_size)), name='tmp%i_2' % i, input='tmp%i_1' % i) model.add_node(Layer(), merge_mode='mul', name='tmp%i_3' % i, inputs=['tmp%i_0' % i, 'tmp%i_2' % i]) model.add_node(TimeDistributedMerge(), name='dot_%i' % i, input='tmp%i_3' % i) model.add_node(Activation('softmax'), name='weights_%i' % i, input='dot_%i' % i) model.add_node(RepeatVector(emb_dim), name='tmp%i_4' % i, input='weights_%i' % i) model.add_node(Lambda(transpose, output_shape=(emb_size, emb_dim)), name='tmp%i_5' % i, input='tmp%i_4' % i) model.add_node(Layer(), merge_mode='mul', name='tmp%i_6' % i, inputs=['embB', 'tmp%i_5' % i]) model.add_node(TimeDistributedMerge(), name='output_%i' % i, input='tmp%i_6' % i) model.add_node(Layer(), name='embC%i' % (i + 1), merge_mode='sum', inputs=['embC%i' % i, 'output_%i' % i]) if mlp_layer == 0: model.add_node(Dense(word_vec_dim), name='mlp0', input='embC%i' % hops) model.add_output(name='output', input='mlp0') return model else: model.add_node(Dense(mlp_unit, activation=activation), name='mlp0', input='embC%i' % hops) model.add_node(Dropout(dropout), name='dropout0', input='mlp0') if mlp_layer > 1: for j in range(mlp_layer - 1): model.add_node(Dense(mlp_unit, activation=activation), name='mlp%i' % (j + 1), input='dropout%i' % j) model.add_node(Dropout(dropout), name='dropout%i' % (j + 1), input='mlp%i' % (j + 1)) model.add_node(Dense(word_vec_dim), name='out', input='dropout%i' % (mlp_layer - 1)) model.add_output(name='output', input='out') return model