def build_model2(): model = Sequential() # model.add(Dropout(0.5)) model.add(Dense(289, 512, W_regularizer=l1l2())) # model.add(PReLU((16,))) model.add(MaxoutDense(512, 512, nb_feature=4)) model.add(Dropout(0.5)) model.add(BatchNormalization((512, ))) model.add(Dense(512, 256)) # model.add(PReLU((256,))) model.add(MaxoutDense(256, 256, nb_feature=4)) model.add(Dropout(0.25)) model.add(BatchNormalization((256, ))) model.add(Dense(256, 128)) # model.add(PReLU((128,))) model.add(MaxoutDense(128, 128, nb_feature=4)) model.add(Dropout(0.125)) model.add(BatchNormalization((128, ))) # model = Sequential() # model.add(Merge([rnn, mlp], mode='concat')) model.add(Dense(128, 2, activation='softmax')) # model.add(PReLU((1,))) model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") return model
def build_gmodel(): graph = Graph() graph.add_input(name='time_series', ndim=3) graph.add_node(JZS3(63, 40), name='rnn1', input='time_series') # # graph.add_node(JZS3(63, 40), name='rnn2', input='time_series') # # graph.add_node(JZS3(63, 40), name='rnn3', input='time_series') # # graph.add_node(JZS3(63, 40), name='rnn4', input='time_series') graph.add_node(Dense(40, 40), name='dense1', input='rnn1') # # graph.add_node(Dense(40, 40), name='dense2', input='rnn2') # # graph.add_node(Dense(40, 40), name='dense3', input='rnn3') # # graph.add_node(Dense(40, 40), name='dense4', input='rnn4') graph.add_node(MaxoutDense(40, 20, nb_feature=4), name='maxout1', input='dense1') # # graph.add_node(MaxoutDense(40, 80, nb_feature=4), name='maxout2', input='dense2') # # graph.add_node(MaxoutDense(40, 80, nb_feature=4), name='maxout3', input='dense3') # # graph.add_node(MaxoutDense(40, 80, nb_feature=4), name='maxout4', input='dense4') graph.add_node(Dropout(0.5), name='dropout1', input='maxout1') # # graph.add_node(Dropout(0.5), name='dropout2', input='maxout2') # # graph.add_node(Dropout(0.5), name='dropout3', input='maxout3') # # graph.add_node(Dropout(0.5), name='dropout4', input='maxout4') # graph.add_node(Dense(320, 160, activation='softmax'), name='merge', inputs=['dropout1', 'dropout2', 'dropout3', 'dropout4'], merge_mode='concat') # graph.add_node(MaxoutDense(160, 160, nb_feature=4), name='merge_maxout', input='merge') # graph.add_node(Dropout(0.5), name='merge_dropout', input='merge_maxout') graph.add_node(Dense(20, 1, activation='sigmoid'), name='out_dense', input='dropout1') graph.add_input(name='enrollment', ndim=2) graph.add_node(GaussianNoise(0.05), name='noise', input='enrollment') # graph.add_node(Dense(54, 64), name='mlp_dense', inputs=['enrollment', 'out_dense']) graph.add_node(Dense(53, 64), name='mlp_dense', input='noise') graph.add_node(MaxoutDense(64, 64, nb_feature=4), name='mlp_maxout', input='mlp_dense') graph.add_node(Dropout(0.5), name='mlp_dropout', input='mlp_maxout') # graph.add_node(Dense(32, 16), name='mlp_dense2', input='mlp_dropout') graph.add_node(Dense(64, 1, activation='sigmoid'), name='mlp_dense3', input='mlp_dropout') graph.add_node( Dense(4, 2, activation='softmax'), name='mlp_dense4', inputs=['mlp_dense3', 'out_dense', 'mlp_dense3', 'out_dense'], merge_mode='concat') graph.add_node(Dense(2, 1, activation='sigmoid'), name='mlp_dense5', input='mlp_dense4') # graph.add_node(Dense(2, 1), name='my_output', inputs=['mlp_dense2', 'out_dense'], merge_mode='concat') graph.add_output(name='output', input='mlp_dense5') graph.compile('adam', {'output': 'binary_crossentropy'}) return graph
def get_class_net(model_path, n_inp_frms, num_spks): from keras.models import Sequential from keras.layers.core import MaxoutDense, Dense, Dropout, Activation from keras.layers.advanced_activations import LeakyReLU from keras.utils.visualize_util import plot from keras.optimizers import Adam # Add batch normalization: keras.layers.normalization.BatchNormalization() model = Sequential() model.add(Dense(512, input_shape=(n_inp_frms * 60, ))) # 60 MFCCs / frame model.add(LeakyReLU()) model.add(Dropout(0.3)) model.add(MaxoutDense(128)) model.add(LeakyReLU()) model.add(Dropout(0.3)) model.add(Dense(64)) model.add(LeakyReLU()) model.add(Dropout(0.3)) model.add(Dense(num_spks)) model.add(Activation('softmax')) model.summary() plot(model, to_file=model_path + 'architecture.png') model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy', 'precision', 'recall']) return model
def build(self): print("building the multiplication model") enc_size = self.size_of_env_observation() argument_size = IntegerArguments.size_of_arguments input_enc = InputLayer(batch_input_shape=(self.batch_size, enc_size), name='input_enc') input_arg = InputLayer(batch_input_shape=(self.batch_size, argument_size), name='input_arg') input_prg = Embedding(input_dim=PROGRAM_VEC_SIZE, output_dim=PROGRAM_KEY_VEC_SIZE, input_length=1, batch_input_shape=(self.batch_size, 1)) f_enc = Sequential(name='f_enc') f_enc.add(Merge([input_enc, input_arg], mode='concat')) f_enc.add(MaxoutDense(128, nb_feature=FIELD_ROW)) self.f_enc = f_enc program_embedding = Sequential(name='program_embedding') program_embedding.add(input_prg) f_enc_convert = Sequential(name='f_enc_convert') f_enc_convert.add(f_enc) f_enc_convert.add(RepeatVector(1)) f_lstm = Sequential(name='f_lstm') f_lstm.add(Merge([f_enc_convert, program_embedding], mode='concat')) f_lstm.add(LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_1')) f_lstm.add(RepeatVector(1)) f_lstm.add(LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_2')) plot(f_lstm, to_file='f_lstm.png', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) f_end.add(Dense(1, W_regularizer=l2(0.001))) f_end.add(Activation('sigmoid', name='sigmoid_end')) f_prog = Sequential(name='f_prog') f_prog.add(f_lstm) f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu")) f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001))) f_prog.add(Activation('softmax', name='softmax_prog')) plot(f_prog, to_file='f_prog.png', show_shapes=True) f_args = [] for ai in range(1, IntegerArguments.max_arg_num+1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001))) f_arg.add(Activation('softmax', name='softmax_arg%s' % ai)) f_args.append(f_arg) plot(f_arg, to_file='f_arg.png', show_shapes=True) self.model = Model([input_enc.input, input_arg.input, input_prg.input], [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() plot(self.model, to_file='model.png', show_shapes=True)
def get_maxout(size, loss='categorical_crossentropy', optimizer=Adam, optimizer_kwargs={}): # MaxOut network model = Sequential() model.add( MaxoutDense(256, input_shape=(size, ), nb_feature=5, init='he_uniform')) model.add(MaxoutDense(128, nb_feature=5)) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(25)) model.add(Activation('relu')) model.add(Dense(2)) model.add(Activation('sigmoid')) optimizer = optimizer(**optimizer_kwargs) model.compile(loss=loss, optimizer=optimizer) return model
def comp_double(self): ''' double model. Simialar to two-pathway, except takes in a 4x33x33 patch and it's center 4x5x5 patch. merges paths at flatten layer. ''' print ('Compiling double model...') single = Sequential() single.add(Convolution2D(64, 7, 7, border_mode='valid', W_regularizer=l1l2(l1=0.01, l2=0.01), input_shape=(4,33,33))) single.add(Activation('relu')) single.add(BatchNormalization(mode=0, axis=1)) single.add(MaxPooling2D(pool_size=(2,2), strides=(1,1))) single.add(Dropout(0.5)) single.add(Convolution2D(nb_filter=128, nb_row=5, nb_col=5, activation='relu', border_mode='valid', W_regularizer=l1l2(l1=0.01, l2=0.01))) single.add(BatchNormalization(mode=0, axis=1)) single.add(MaxPooling2D(pool_size=(2,2), strides=(1,1))) single.add(Dropout(0.5)) single.add(Convolution2D(nb_filter=256, nb_row=5, nb_col=5, activation='relu', border_mode='valid', W_regularizer=l1l2(l1=0.01, l2=0.01))) single.add(BatchNormalization(mode=0, axis=1)) single.add(MaxPooling2D(pool_size=(2,2), strides=(1,1))) single.add(Dropout(0.5)) single.add(Convolution2D(nb_filter=128, nb_row=3, nb_col=3, activation='relu', border_mode='valid', W_regularizer=l1l2(l1=0.01, l2=0.01))) single.add(Dropout(0.25)) single.add(Flatten()) # add small patch to train on five = Sequential() five.add(Reshape((100,1), input_shape = (4,5,5))) five.add(Flatten()) five.add(MaxoutDense(128, nb_feature=5)) five.add(Dropout(0.5)) model = Sequential() # merge both paths model.add(Merge([five, single], mode='concat', concat_axis=1)) model.add(Dense(5)) model.add(Activation('softmax')) sgd = SGD(lr=0.001, decay=0.01, momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer='sgd') print ('Done.') return model
def build_model(): # max_features = 8 # model_left = Sequential() # model_left.add(GRU( # 33, 512, # # activation='sigmoid', # # inner_activation='hard_sigmoid', # # return_sequences=True # )) # model_left.add(Activation('sigmoid')) # # model_left.add(Dense(512, 256, activation='sigmoid')) # model_right = Sequential() # model_right.add(GRU( # 33, 512, # # activation='sigmoid', # # inner_activation='hard_sigmoid', # # return_sequences=True # )) # model_right.add(Activation('sigmoid')) # model_right.add(Dense(512, 256, activation='sigmoid')) rnn = Sequential() rnn.add(Dropout(0.5)) rnn.add(JZS3(63, 64)) # rnn.add(BatchNormalization((32,))) # rnn.add(MaxoutDense(32, 1, nb_feature=64)) # rnn.add(BatchNormalization((1,))) rnn.add(Dropout(0.5)) rnn.add(Dense(64, 1, activation='sigmoid')) mlp1 = Sequential() # mlp.add(Dropout(0.5)) mlp1.add(Dense(53, 64)) mlp1.add(PReLU((64, ))) mlp1.add(BatchNormalization((64, ))) mlp1.add(MaxoutDense(64, 1, nb_feature=256)) # mlp1.add(Dropout(0.5)) mlp2 = Sequential() # mlp.add(Dropout(0.5)) mlp2.add(Dense(53, 64)) mlp2.add(PReLU((64, ))) mlp2.add(BatchNormalization((64, ))) mlp2.add(MaxoutDense(64, 1, nb_feature=256)) # mlp2.add(Dropout(0.5)) mlp3 = Sequential() # mlp.add(Dropout(0.5)) mlp3.add(Dense(53, 64)) mlp3.add(PReLU((64, ))) mlp3.add(BatchNormalization((64, ))) mlp3.add(MaxoutDense(64, 1, nb_feature=256)) mlp4 = Sequential() # mlp.add(Dropout(0.5)) mlp4.add(Dense(53, 64)) mlp4.add(PReLU((64, ))) mlp4.add(BatchNormalization((64, ))) mlp4.add(MaxoutDense(64, 1, nb_feature=256)) mlp = Sequential() mlp.add(Merge([mlp1, mlp2, mlp3, mlp4], mode='concat')) mlp.add(Dense(4, 1, activation='sigmoid')) # mlp.add(PReLU((32,))) # mlp.add(BatchNormalization((32,))) # mlp.add(MaxoutDense(32, 16, nb_feature=128)) # mlp.add(BatchNormalization((16,))) # mlp.add(Dropout(0.125)) model = Sequential() model.add(Merge([rnn, mlp], mode='concat')) model.add(Dense(2, 1, activation='sigmoid')) # model.add(Dropout(0.25)) # model.add(Dense(2, 1, activation='sigmoid')) # model.add(PReLU((1,))) model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") return model
def ArcticVideoCaptionWithInit(self, params): """ Video captioning with: * Attention mechansim on video frames * Conditional LSTM for processing the video * Feed forward layers: + Context projected to output + Last word projected to output :param params: :return: """ # Video model video = Input(name=self.ids_inputs[0], shape=tuple( [params['NUM_FRAMES'], params['IMG_FEAT_SIZE']])) input_video = video ################################################################## # ENCODER ################################################################## for activation, dimension in params['IMG_EMBEDDING_LAYERS']: input_video = TimeDistributed( Dense(dimension, name='%s_1' % activation, activation=activation, W_regularizer=l2(params['WEIGHT_DECAY'])))(input_video) input_video = Regularize(input_video, params, name='%s_1' % activation) if params['ENCODER_HIDDEN_SIZE'] > 0: if params['BIDIRECTIONAL_ENCODER']: encoder = Bidirectional(eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True), name='bidirectional_encoder_' + params['RNN_TYPE'], merge_mode='concat')(input_video) else: encoder = eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True, name='encoder_' + params['RNN_TYPE'])(input_video) input_video = merge([input_video, encoder], mode='concat', concat_axis=2) input_video = Regularize(input_video, params, name='input_video') # 2.3. Potentially deep encoder for n_layer in range(1, params['N_LAYERS_ENCODER']): if params['BIDIRECTIONAL_DEEP_ENCODER']: current_input_video = Bidirectional( eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True, ), merge_mode='concat', name='bidirectional_encoder_' + str(n_layer))(input_video) else: current_input_video = eval(params['RNN_TYPE'])( params['ENCODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, return_sequences=True, name='encoder_' + str(n_layer))(input_video) current_input_video = Regularize(current_input_video, params, name='input_video_' + str(n_layer)) input_video = merge([input_video, current_input_video], mode='sum') # Previously generated words as inputs for training next_words = Input(name=self.ids_inputs[1], batch_shape=tuple([None, None]), dtype='int32') emb = Embedding(params['OUTPUT_VOCABULARY_SIZE'], params['TARGET_TEXT_EMBEDDING_SIZE'], name='target_word_embedding', W_regularizer=l2(params['WEIGHT_DECAY']), trainable=self.trg_embedding_weights_trainable, weights=self.trg_embedding_weights, mask_zero=True)(next_words) emb = Regularize(emb, params, name='target_word_embedding') # LSTM initialization perceptrons with ctx mean # 3.2. Decoder's RNN initialization perceptrons with ctx mean ctx_mean = Lambda(lambda x: K.mean(x, axis=1), output_shape=lambda s: (s[0], s[2]), name='lambda_mean')(input_video) if len(params['INIT_LAYERS']) > 0: for n_layer_init in range(len(params['INIT_LAYERS']) - 1): ctx_mean = Dense( params['DECODER_HIDDEN_SIZE'], name='init_layer_%d' % n_layer_init, W_regularizer=l2(params['WEIGHT_DECAY']), activation=params['INIT_LAYERS'][n_layer_init])(ctx_mean) ctx_mean = Regularize(ctx_mean, params, name='ctx' + str(n_layer_init)) initial_state = Dense( params['DECODER_HIDDEN_SIZE'], name='initial_state', W_regularizer=l2(params['WEIGHT_DECAY']), activation=params['INIT_LAYERS'][-1])(ctx_mean) initial_state = Regularize(initial_state, params, name='initial_state') input_attentional_decoder = [emb, input_video, initial_state] if params['RNN_TYPE'] == 'LSTM': initial_memory = Dense( params['DECODER_HIDDEN_SIZE'], name='initial_memory', W_regularizer=l2(params['WEIGHT_DECAY']), activation=params['INIT_LAYERS'][-1])(ctx_mean) initial_memory = Regularize(initial_memory, params, name='initial_memory') input_attentional_decoder.append(initial_memory) else: input_attentional_decoder = [emb, input_video] ################################################################## # DECODER ################################################################## # 3.3. Attentional decoder sharedAttRNNCond = eval('Att' + params['RNN_TYPE'] + 'Cond')( params['DECODER_HIDDEN_SIZE'], W_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), U_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), V_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), b_regularizer=l2(params['RECURRENT_WEIGHT_DECAY']), wa_regularizer=l2(params['WEIGHT_DECAY']), Wa_regularizer=l2(params['WEIGHT_DECAY']), Ua_regularizer=l2(params['WEIGHT_DECAY']), ba_regularizer=l2(params['WEIGHT_DECAY']), dropout_W=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_U=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_V=params['RECURRENT_DROPOUT_P'] if params['USE_RECURRENT_DROPOUT'] else None, dropout_wa=params['DROPOUT_P'] if params['USE_DROPOUT'] else None, dropout_Wa=params['DROPOUT_P'] if params['USE_DROPOUT'] else None, dropout_Ua=params['DROPOUT_P'] if params['USE_DROPOUT'] else None, return_sequences=True, return_extra_variables=True, return_states=True, name='decoder_Att' + params['RNN_TYPE'] + 'Cond') rnn_output = sharedAttRNNCond(input_attentional_decoder) proj_h = rnn_output[0] x_att = rnn_output[1] alphas = rnn_output[2] h_state = rnn_output[3] if params['RNN_TYPE'] == 'LSTM': h_memory = rnn_output[4] [proj_h, shared_reg_proj_h] = Regularize(proj_h, params, shared_layers=True, name='proj_h0') shared_FC_mlp = TimeDistributed(Dense( params['TARGET_TEXT_EMBEDDING_SIZE'], W_regularizer=l2(params['WEIGHT_DECAY']), activation='linear', ), name='logit_lstm') out_layer_mlp = shared_FC_mlp(proj_h) shared_FC_ctx = TimeDistributed(Dense( params['TARGET_TEXT_EMBEDDING_SIZE'], W_regularizer=l2(params['WEIGHT_DECAY']), activation='linear', ), name='logit_ctx') out_layer_ctx = shared_FC_ctx(x_att) shared_Lambda_Permute = PermuteGeneral((1, 0, 2)) out_layer_ctx = shared_Lambda_Permute(out_layer_ctx) shared_FC_emb = TimeDistributed(Dense( params['TARGET_TEXT_EMBEDDING_SIZE'], W_regularizer=l2(params['WEIGHT_DECAY']), activation='linear'), name='logit_emb') out_layer_emb = shared_FC_emb(emb) [out_layer_mlp, shared_reg_out_layer_mlp] = Regularize(out_layer_mlp, params, shared_layers=True, name='out_layer_mlp') [out_layer_ctx, shared_reg_out_layer_ctx] = Regularize(out_layer_ctx, params, shared_layers=True, name='out_layer_ctx') [out_layer_emb, shared_reg_out_layer_emb] = Regularize(out_layer_emb, params, shared_layers=True, name='out_layer_emb') additional_output = merge( [out_layer_mlp, out_layer_ctx, out_layer_emb], mode='sum', name='additional_input') shared_activation_tanh = Activation('tanh') out_layer = shared_activation_tanh(additional_output) shared_deep_list = [] shared_reg_deep_list = [] # 3.6 Optional deep ouput layer for i, (activation, dimension) in enumerate(params['DEEP_OUTPUT_LAYERS']): if activation.lower() == 'maxout': shared_deep_list.append( TimeDistributed(MaxoutDense(dimension, W_regularizer=l2( params['WEIGHT_DECAY'])), name='maxout_%d' % i)) else: shared_deep_list.append( TimeDistributed(Dense(dimension, activation=activation, W_regularizer=l2( params['WEIGHT_DECAY'])), name=activation + '_%d' % i)) out_layer = shared_deep_list[-1](out_layer) [out_layer, shared_reg_out_layer ] = Regularize(out_layer, params, shared_layers=True, name='out_layer' + str(activation)) shared_reg_deep_list.append(shared_reg_out_layer) # 3.7. Output layer: Softmax shared_FC_soft = TimeDistributed(Dense( params['OUTPUT_VOCABULARY_SIZE'], activation=params['CLASSIFIER_ACTIVATION'], W_regularizer=l2(params['WEIGHT_DECAY']), name=params['CLASSIFIER_ACTIVATION']), name=self.ids_outputs[0]) softout = shared_FC_soft(out_layer) self.model = Model(input=[video, next_words], output=softout) ################################################################## # BEAM SEARCH MODEL # ################################################################## # Now that we have the basic training model ready, let's prepare the model for applying decoding # The beam-search model will include all the minimum required set of layers (decoder stage) which offer the # possibility to generate the next state in the sequence given a pre-processed input (encoder stage) if params['BEAM_SEARCH']: # First, we need a model that outputs the preprocessed input + initial h state # for applying the initial forward pass model_init_input = [video, next_words] model_init_output = [softout, input_video, h_state] if params['RNN_TYPE'] == 'LSTM': model_init_output.append(h_memory) self.model_init = Model(input=model_init_input, output=model_init_output) # Store inputs and outputs names for model_init self.ids_inputs_init = self.ids_inputs # first output must be the output probs. self.ids_outputs_init = self.ids_outputs + [ 'preprocessed_input', 'next_state' ] if params['RNN_TYPE'] == 'LSTM': self.ids_outputs_init.append('next_memory') # Second, we need to build an additional model with the capability to have the following inputs: # - preprocessed_input # - prev_word # - prev_state # and the following outputs: # - softmax probabilities # - next_state if params['ENCODER_HIDDEN_SIZE'] > 0: if params['BIDIRECTIONAL_ENCODER']: preprocessed_size = params[ 'ENCODER_HIDDEN_SIZE'] * 2 + params['IMG_FEAT_SIZE'] else: preprocessed_size = params['ENCODER_HIDDEN_SIZE'] + params[ 'IMG_FEAT_SIZE'] else: preprocessed_size = params['IMG_FEAT_SIZE'] # Define inputs preprocessed_annotations = Input( name='preprocessed_input', shape=tuple([params['NUM_FRAMES'], preprocessed_size])) prev_h_state = Input(name='prev_state', shape=tuple([params['DECODER_HIDDEN_SIZE']])) input_attentional_decoder = [ emb, preprocessed_annotations, prev_h_state ] if params['RNN_TYPE'] == 'LSTM': prev_h_memory = Input(name='prev_memory', shape=tuple( [params['DECODER_HIDDEN_SIZE']])) input_attentional_decoder.append(prev_h_memory) # Apply decoder rnn_output = sharedAttRNNCond(input_attentional_decoder) proj_h = rnn_output[0] x_att = rnn_output[1] alphas = rnn_output[2] h_state = rnn_output[3] if params['RNN_TYPE'] == 'LSTM': h_memory = rnn_output[4] for reg in shared_reg_proj_h: proj_h = reg(proj_h) out_layer_mlp = shared_FC_mlp(proj_h) out_layer_ctx = shared_FC_ctx(x_att) out_layer_ctx = shared_Lambda_Permute(out_layer_ctx) out_layer_emb = shared_FC_emb(emb) for (reg_out_layer_mlp, reg_out_layer_ctx, reg_out_layer_emb) in zip(shared_reg_out_layer_mlp, shared_reg_out_layer_ctx, shared_reg_out_layer_emb): out_layer_mlp = reg_out_layer_mlp(out_layer_mlp) out_layer_ctx = reg_out_layer_ctx(out_layer_ctx) out_layer_emb = reg_out_layer_emb(out_layer_emb) additional_output = merge( [out_layer_mlp, out_layer_ctx, out_layer_emb], mode='sum', name='additional_input_model_next') out_layer = shared_activation_tanh(additional_output) for (deep_out_layer, reg_list) in zip(shared_deep_list, shared_reg_deep_list): out_layer = deep_out_layer(out_layer) for reg in reg_list: out_layer = reg(out_layer) # Softmax softout = shared_FC_soft(out_layer) model_next_inputs = [ next_words, preprocessed_annotations, prev_h_state ] model_next_outputs = [softout, preprocessed_annotations, h_state] if params['RNN_TYPE'] == 'LSTM': model_next_inputs.append(prev_h_memory) model_next_outputs.append(h_memory) self.model_next = Model(input=model_next_inputs, output=model_next_outputs) # Store inputs and outputs names for model_next # first input must be previous word self.ids_inputs_next = [self.ids_inputs[1] ] + ['preprocessed_input', 'prev_state'] # first output must be the output probs. self.ids_outputs_next = self.ids_outputs + [ 'preprocessed_input', 'next_state' ] # Input -> Output matchings from model_init to model_next and from model_next to model_next self.matchings_init_to_next = { 'preprocessed_input': 'preprocessed_input', 'next_state': 'prev_state' } self.matchings_next_to_next = { 'preprocessed_input': 'preprocessed_input', 'next_state': 'prev_state' } if params['RNN_TYPE'] == 'LSTM': self.ids_inputs_next.append('prev_memory') self.ids_outputs_next.append('next_memory') self.matchings_init_to_next['next_memory'] = 'prev_memory' self.matchings_next_to_next['next_memory'] = 'prev_memory'
# language model language_model = Sequential() language_model.add(Reshape(input_shape = (arg.language_feature_dim,), dims=(arg.language_feature_dim,))) # merge model if arg.image_only == 'True': model = image_model elif arg.language_only == 'True': model = language_model else: model = Sequential() model.add(Merge([image_model, language_model], mode = 'concat', concat_axis = 1)) if arg.activation == 'maxout': for cur_units in arg.units: model.add(MaxoutDense(output_dim = cur_units, nb_feature = 2, init = 'uniform')) if arg.dropout < 1: model.add(Dropout(arg.dropout)) else: for cur_units in arg.units: model.add(Dense(output_dim = cur_units, init = 'uniform')) model.add(Activation(arg.activation)) if arg.dropout < 1: model.add(Dropout(arg.dropout)) model.add(Dense(output_dim = word_vec_dim, init = 'uniform')) print '*** save model ***' model_file_name = './model/' model_file_name += basename(arg.question_feature).replace('_300_train.pkl.gz', '').replace('_300_test.pkl.gz', '') model_file_name += '_ionly_{}_lonly_{}_ifdim_{:d}_iidim_{:d}_lfdim_{:d}_dropout_{:.1f}_activation_{}_unit'.format(arg.image_only, arg.language_only,
model.add(Conv2D(64, (3, 3), input_shape=(424, 424, 3))) #3x3 is default model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(3, 3))) #model.add(Dropout(.1))#test model.add(Dense(32, activation='relu')) #test model.add(Conv2D(64, (3, 3))) #input_shape=(424,424,3) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Dense(64, activation='relu')) model.add(Dropout(.2)) #test model.add(Conv2D(64, (3, 3))) #input_shape=(424,424,3) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Dropout(.2)) model.add(Flatten(input_shape=(424, 424, 3))) model.add(MaxoutDense(128)) ###testing model.add(BatchNormalization()) model.add(Dense(2)) model.add(Activation('softmax')) model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) tensorboard = TensorBoard(log_dir="logs/{}".format( time())) #allows me to visualize results model.fit_generator(train_batches, steps_per_epoch=2, validation_data=valid_batches, validation_steps=2, epochs=40, verbose=2,
model.add(Activation('relu')) model.add(Convolution2D(16, 8, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2), ignore_border=False)) model.add(Dropout(0.2)) model.add(Convolution2D(32, 16, 3, 3, border_mode='full')) model.add(Activation('relu')) model.add(Convolution2D(64, 32, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2), ignore_border=False)) model.add(Dropout(0.2)) model.add(Flatten()) model.add(MaxoutDense(128 * 4 * 9, 256, nb_feature=10)) model.add(Dropout(0.4)) model.add(Dense(256, 2048, activation='linear')) model.add(PReLU(2048)) model.add(Dropout(0.4)) model.add(Dense(2048, 1500, activation='linear')) model.add(PReLU(1500)) model.add(Dropout(0.4)) model.add(Dense(1500, 1200, activation='linear')) model.add(PReLU(1200)) model.add(Dropout(0.4)) model.add(Dense(1200, 48, activation='softmax'))
def nn_model(X_train, y_train, X_test, y_test, batch_size=20, nb_classes=4, nb_epoch=40): # need to fix docs for X_train and X_test as these should be 3D or 4D arrays ''' input: X_train (4D np array), y_train (1D np array), X_test (4D np array), y_test (1D np array) optional: batch_size (int), n_classes (int), n_epochs (int) output: tpl (test score, test accuracy) ''' # get number of test and train obs n_train, n_test = X_train.shape[0], X_test.shape[0] # scale images X_train, X_test = scale_features(X_train), scale_features(X_test) # reshape images because keras is being picky X_train = X_train.reshape(n_train, 60, 60, 3) X_test = X_test.reshape(n_test, 60, 60, 3) # convert class vectors to binary class matrices Y_train, Y_test = convert_targets(y_train), convert_targets(y_test) # import pdb; pdb.set_trace() # initialize sequential model model = Sequential() # first convolutional layer and subsequent pooling model.add( Convolution2D(32, 5, 5, border_mode='valid', input_shape=(60, 60, 3), activation='relu', dim_ordering='tf', init='glorot_normal')) model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering='tf')) # second convolutional layer and subsequent pooling model.add( Convolution2D(64, 5, 5, border_mode='valid', activation='relu', init='glorot_normal', dim_ordering='tf')) model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering='tf')) # third convolutional layer model.add( Convolution2D(128, 3, 3, border_mode='valid', activation='relu', init='glorot_normal', dim_ordering='tf')) # fourth convolutional layer and subsequent pooling model.add( Convolution2D(128, 3, 3, border_mode='valid', activation='relu', init='glorot_normal', dim_ordering='tf')) model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering='tf')) # flattens images to go into dense layers model.add(Flatten()) # first dense layer model.add(Dense(2048, init='glorot_normal')) # model.add(MaxoutDense(2048)) model.add(Activation('relu')) model.add(Dropout(0.5)) # second dense layer model.add(MaxoutDense(2048, init='glorot_normal')) # model.add(Dense(2048, init= 'he_normal')) # model.add(Activation('relu')) model.add(Dropout(0.5)) # third dense layer model.add(MaxoutDense(2048, init='glorot_normal')) model.add(Dropout(0.5)) # fourth dense layer model.add(Dense(1024, init='glorot_uniform')) model.add(Activation('relu')) model.add(Dropout(0.5)) # output layer model.add(Dense(4, init='glorot_uniform')) model.add(Activation('softmax')) # initializes optimizer sgd = SGD(lr=0.005, decay=1e-6, momentum=0.9, nesterov=True) #adamax = Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # initializes early stopping callback early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto') # compiles and fits model, computes accuracy model.compile(loss='binary_crossentropy', optimizer=sgd) model.fit(X_train, Y_train, show_accuracy=True, verbose=1, callbacks=[early_stopping], batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test, Y_test)) return model, model.evaluate(X_test, Y_test, show_accuracy=True, verbose=1)
# -------- # -- training raw = Sequential() raw.add(Dense(625, 500)) raw.add(Activation('relu')) gaussian = Sequential() gaussian.add(Dense(625, 500)) gaussian.add(Activation('relu')) # -- build the model dl = Sequential() # dl.add(Merge([raw, gaussian], mode='concat')) # dl.add(Dense(1000, 512)) dl.add(MaxoutDense(625, 512, 10)) dl.add(Dropout(0.1)) dl.add(MaxoutDense(512, 256, 6)) # dl.add(Activation('tanh')) dl.add(Dropout(0.1)) dl.add(MaxoutDense(256, 64, 6)) dl.add(Dropout(0.1)) dl.add(MaxoutDense(64, 25, 10)) dl.add(Dropout(0.1)) dl.add(Dense(25, 1)) dl.add(Activation('sigmoid'))
print '{} of {}'.format(i, len(sentences)) data.append(tokens_to_mean_vec(txt, w2v)) X = np.array(data) X = X.astype('float32') Y = np.array(funny_votes).astype('float32') Y = np.log(Y + 1) from keras.models import Sequential from keras.layers.core import MaxoutDense, Dense, Dropout, Activation from keras.optimizers import SGD, Adam, RMSprop, Adagrad model_basic = Sequential() model_basic.add(MaxoutDense(100, 100, 20)) model_basic.add(Activation('relu')) model_basic.add(Dropout(0.2)) model_basic.add(MaxoutDense(100, 20, 10)) model_basic.add(Activation('relu')) model_basic.add(Dropout(0.2)) model_basic.add(MaxoutDense(20, 1)) model_basic.add(Activation('relu')) # model_basic.add(Dropout(0.1)) # model_basic.add(Dense(10, 1)) # model_basic.add(Activation('relu')) ada = Adagrad()
clf = Sequential() clf.add(model.layers[0].encoder) clf.add(Dropout(0.1)) clf.add(Dense(64, 1)) clf.add(Activation('sigmoid')) clf.compile(loss='binary_crossentropy', optimizer=Adam(), class_mode='binary') clf.fit(X, y, validation_data = (X_val, y_val), batch_size=100, nb_epoch=10, show_accuracy=True) if not PRETRAINING: # -- test a big maxout net maxout = Sequential() maxout.add(MaxoutDense(625, 200, 5)) maxout.add(Dropout(0.3)) maxout.add(Dense(200, 64)) maxout.add(Activation('tanh')) maxout.add(Dropout(0.3)) maxout.add(Dense(64, 16)) maxout.add(Activation('tanh')) maxout.add(Dropout(0.3)) maxout.add(Dense(16, 1)) maxout.add(Activation('sigmoid')) maxout.compile(loss='binary_crossentropy', optimizer=Adam(), class_mode='binary') mo.fit(X, y, validation_data = (X_val, y_val), batch_size=100, nb_epoch=10, show_accuracy=True) mo.fit(X, y, validation_split = 0.2, batch_size=100, nb_epoch=10, show_accuracy=True) y_dl = mo.predict(X_, verbose=True).ravel()
def build_model0(): # max_features = 8 # model_left = Sequential() # model_left.add(GRU( # 33, 512, # # activation='sigmoid', # # inner_activation='hard_sigmoid', # # return_sequences=True # )) # model_left.add(Activation('sigmoid')) # # model_left.add(Dense(512, 256, activation='sigmoid')) # model_right = Sequential() # model_right.add(GRU( # 33, 512, # # activation='sigmoid', # # inner_activation='hard_sigmoid', # # return_sequences=True # )) # model_right.add(Activation('sigmoid')) # model_right.add(Dense(512, 256, activation='sigmoid')) # graph = Graph() # graph.add_input(name='input', ndim=3) # graph.add_node(Dropout(0.5), name='dropout1', input='input') # graph.add_node(J2S3(63, 64), name='rnn1', input='dropout1') # graph.add_node(Dropout(0.5), name='dropout1_1', input='rnn1') # graph.add_node(Dense(64, 64), name='dense1', input='dropout1_1') # graph.add_node(Dense(64, 1), name='dense1_1', input='dense1') # graph.add_node(Dropout(0.5), name='dropout2', input='input') # graph.add_node(J2S3(63, 64), name='rnn2', input='dropout2') # graph.add_node(Dropout(0.5), name='dropout2_1', input='rnn2') # graph.add_node(Dense(64, 64), name='dense2', input='dropout2_1') # graph.add_node(Dense(64, 1), name='dense2_1', input='dense2') # graph.add_output(name='output', inputs=['dense1_1', 'dense2_1'], merge_mode='sum') # graph.compile('adam', {'output': 'binary_crossentropy'}) rnn1 = Sequential() rnn1.add(JZS3(63, 40)) rnn1.add(Dense(40, 40)) # rnn1.add(PReLU((40,))) rnn1.add(MaxoutDense(40, 80, nb_feature=4)) rnn1.add(Dropout(0.5)) rnn2 = Sequential() rnn2.add(JZS3(63, 40)) rnn2.add(Dense(40, 40)) # rnn2.add(PReLU((40,))) rnn2.add(MaxoutDense(40, 80, nb_feature=4)) rnn2.add(Dropout(0.5)) rnn3 = Sequential() rnn3.add(JZS3(63, 40)) rnn3.add(Dense(40, 40)) # rnn3.add(PReLU((40,))) rnn3.add(MaxoutDense(40, 80, nb_feature=4)) rnn3.add(Dropout(0.5)) rnn = Sequential() rnn.add(Merge([rnn1, rnn2, rnn3], mode='concat')) rnn.add(Dense(240, 120, activation='softmax')) rnn.add(MaxoutDense(120, 120, nb_feature=4)) rnn.add(Dropout(0.5)) rnn.add(Dense(120, 1, activation='sigmoid')) rnn.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") return rnn
concat_axis=-1) graph.add_node(GRU(72, go_backwards=True), name='gru_backwards', inputs=['flatten{}gram'.format(n) for n in NGRAMS], concat_axis=-1) # graph.add_node(GRU(16), name='gru', input='flatten4gram') ADDITIONAL_FC = True graph.add_node(Dropout(0.7), name='gru_dropout', inputs=['gru_forwards', 'gru_backwards']) if ADDITIONAL_FC: graph.add_node(MaxoutDense(64, 16, init='he_uniform'), name='maxout', input='gru_dropout') graph.add_node(Dropout(0.5), name='maxout_dropout', input='maxout') graph.add_node(Dense(1, activation='sigmoid'), name='probability', input='maxout_dropout') else: graph.add_node(Dense(1, activation='sigmoid'), name='probability', input='gru_dropout') graph.add_output(name='prediction', input='probability')