def buildModel(embeddingMatrix): """Constructs the architecture of the modelEMOTICONS_TOKEN[list_str[index]] Input: embeddingMatrix : The embedding matrix to be loaded in the embedding layer. Output: model : A basic LSTM model """ sequence = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='float32') embeddingLayer = Embedding(embeddingMatrix.shape[0], EMBEDDING_DIM, weights=[embeddingMatrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(sequence) embedded = Highway()(embeddingLayer) # embedded = Dropout(0.25)(embedded) embedded = Bidirectional( LSTM(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(embedded) enc = Bidirectional(LSTM(LSTM_DIM, dropout=DROPOUT))(embedded) fc1 = Dense(128, activation="relu")(enc) fc2_dropout = Dropout(0.25)(fc1) output = Dense(NUM_CLASSES, activation='sigmoid')(fc2_dropout) rmsprop = optimizers.rmsprop(lr=LEARNING_RATE) model = Model(inputs=sequence, outputs=output) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc']) return model
def naics_hw_noproj_encoder(input_layer, n_layers=2): layers = [input_layer] for n in range(n_layers): previous_layer = layers[n] layer = Highway(activation='relu', name='naics_hw%s' % n)(previous_layer) layers.append(layer) return layers[-1]
def _enhance_D(self, c): # adding another enhancing layer for D if self.enhanceD == "LSTM": l = int(c.get_shape()[-1]) c = Reshape((1,l))(c) c = LSTM(l)(c) elif self.enhanceD == "HW": c = Highway()(c) return c
def blink_net(shape=(64, 64), nb_channels=1): logger.info('generating net with input shape ({})'.format(', '.join( str(s) for s in shape))) img_width, img_height = shape eye = Input(shape=(nb_channels, img_width, img_height)) eye_model = Sequential() eye_model.add( Convolution2D(64, 3, 3, border_mode='valid', activation='relu', input_shape=(nb_channels, img_width, img_height))) eye_model.add(Dropout(0.25)) eye_model.add( Convolution2D(32, 3, 3, border_mode='valid', activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Flatten(input_shape=(nb_channels, img_width, img_height))) eye_model.add(Dense(1024, activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Highway(activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Highway(activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Highway(activation='relu')) eye_model.add(Dense(512, activation='relu')) eye_model.add(Dropout(0.2)) eye_model.add(Dense(128, activation='relu')) eye_model.add(Dropout(0.2)) eye_model.add(Dense(2, activation='softmax', name='pose')) logger.info('compiling with Adam and mse') eye_model.compile('adam', 'categorical_crossentropy', metrics=['acc']) return eye_model
def lstm(): model = kr.Sequential() model.add(BatchNormalization(input_shape=(1, 465))) model.add( LSTM(64, return_sequences=True, kernel_initializer='he_normal', use_bias=True, bias_initializer=kr.initializers.one(), unit_forget_bias=True, kernel_regularizer=kr.regularizers.l1_l2(0.001, 0.0001))) model.add(LeakyReLU()) model.add( LSTM(64, return_sequences=False, go_backwards=True, kernel_initializer='he_normal')) model.add(Highway()) model.add(GaussianDropout(0.5)) model.add(LeakyReLU()) model.add(BatchNormalization()) model.add(Dense(32)) model.add(LeakyReLU()) model.add(BatchNormalization()) model.add(Highway()) model.add(Dense(64)) model.add(LeakyReLU()) model.add(BatchNormalization()) model.add(Highway()) model.add(Dense(128)) model.add(LeakyReLU()) model.add(BatchNormalization()) model.add(Highway()) model.add(Dense(256)) model.add(LeakyReLU()) model.add(BatchNormalization()) model.add(Dense(1)) sgd = kr.optimizers.sgd(lr=0.1, momentum=0.1, decay=0.001, nesterov=True, clipnorm=3) model.compile(loss='mape', optimizer=sgd, metrics=['mae', 'mse']) return model
def build_network(concept_dic, embeddings_file, EMBEDDING_DIM=100, MAX_SENSE_LENGTH = 5, CONTEXT_WINDOW_SIZE = 5, PRE_TRAINED=True, UPDATABLE=True, dropout_rate=0.3, hidden_activation="relu", highway_activation="sigmoid", output_activation="linear", optimizer="adam", print_model=False): INPUTS = [] LEFT_RIGHT_CENTER = [] embedding_layer = create_embedding(concept_dic, embeddings_file, EMBEDDING_DIM, MAX_SENSE_LENGTH, PRE_TRAINED, UPDATABLE) for i in range(2 * CONTEXT_WINDOW_SIZE + 1): """Creating network's pipes one-by-one (from left to right)""" context_term_input = Input(shape=(MAX_SENSE_LENGTH,), dtype='int32') INPUTS.append(context_term_input) context_term_embedding = embedding_layer(context_term_input) pipe = MaxPooling1D(pool_size=MAX_SENSE_LENGTH)(context_term_embedding) pipe = Flatten()(pipe) LEFT_RIGHT_CENTER.append(pipe) left = Merge(mode='max')(LEFT_RIGHT_CENTER[0:CONTEXT_WINDOW_SIZE]) left_dense = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(left) left_dense_dropout = Dropout(dropout_rate)(left_dense) right = Merge(mode='max')(LEFT_RIGHT_CENTER[CONTEXT_WINDOW_SIZE:CONTEXT_WINDOW_SIZE * 2]) right_dense = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(right) right_dense_dropout = Dropout(dropout_rate)(right_dense) context = Merge(mode='max')([left_dense_dropout, right_dense_dropout]) centre = LEFT_RIGHT_CENTER[-1] #centre_dense = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(centre) #centre__dense_dropout = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(centre_dense) merge_instance = Concatenate(axis=-1)([context, centre]) merge_instance = Highway(activation=highway_activation)(merge_instance) # merge_instance = Dense(units=EMBEDDING_DIM * 2, activation=hidden_activation)(merge_instance) # merge_instance = Dropout(dropout_rate)(merge_instance) merge_instance = Dense(units=EMBEDDING_DIM, activation=hidden_activation)(merge_instance) merge_instance = Dropout(dropout_rate)(merge_instance) prediction = Dense(units=1, activation=output_activation)(merge_instance) model = Model(inputs=INPUTS, outputs=prediction) model.compile(loss='mean_squared_error', optimizer=optimizer) if print_model: print(model.summary()) return model, embedding_layer
def _add_skip_layers(self, tensor_list, layer_count, activation="relu", regularization=None): for layer_index in range(layer_count): new_list = [] for i, item in enumerate(tensor_list): item = TimeDistributed( Highway(activation=activation, W_regularizer=regularization, b_regularizer=regularization))(item) new_list.append(item) tensor_list = new_list return tensor_list
def default_model(self): product_ecfp4 = Input(shape=(16384, )) reaction_ecfp4 = Input(shape=(2048, )) product = Dense(activation='elu', units=1024)(product_ecfp4) reaction = Dense(activation='elu', units=1024)(reaction_ecfp4) product = Dropout(0.3)(product) product = Highway(activation='elu')(product) product = Highway(activation='elu')(product) product = Highway(activation='elu')(product) product = Highway(activation='elu')(product) product = Highway(activation='elu')(product) cosine_similarities = Dot(normalize=True, axes=-1)([product, reaction]) Y = Activation('sigmoid')(cosine_similarities) model = Model(input=[product_ecfp4, reaction_ecfp4], output=Y) return model
def get_fc_model(): width = 40 depth = 6 model = Sequential() model.add(Dense(width, input_dim=3)) model.add(PReLU()) for d in range(depth): model.add(Highway()) model.add(PReLU()) model.add(Dense(3)) model.compile(loss='mae', optimizer='rmsprop') return model
def default_model(self): product_ecfp4 = Input(shape=(self.n_feats, )) product = Dense(512, activation='elu')(product_ecfp4) product = Dropout(0.3)(product) product = Highway(activation='elu')(product) product = Dropout(rate=0.1)(product) product = Highway(activation='elu')(product) product = Dropout(rate=0.1)(product) product = Highway(activation='elu')(product) product = Dropout(rate=0.1)(product) product = Highway(activation='elu')(product) product = Dropout(rate=0.1)(product) product = Highway(activation='elu')(product) product = Dropout(rate=0.1)(product) product = Dense(self.n_classes, activation="relu")(product) Y = Activation('softmax')(product) model = Model(input=product_ecfp4, output=Y) return model
def build_model(config): config = get_data(config) base_name = 'out' if config['hedge'] == True: outs = [''] * config['n_layers'] out_name = [''] * config['n_layers'] N = config['n_layers'] for i in range(len(outs)): outs[i] = base_name + str(i) out_name[i] = base_name + str(i) else: outs = base_name out_name = [base_name] N = config['n_layers'] - 1 in_name = 'in0' inputs = Input(config['input_size'], name=in_name) for j in range(N): if j == 0: layer = Dense(config['hidden_num'])(inputs) layer = Activation(config['activation'])(layer) if config['hedge'] == True: outs[j] = Dense(config['output_size'], activation='softmax', name=outs[j])(layer) continue if config['Highway'] == False: layer = Dense(config['hidden_num'])(layer) layer = Activation(config['activation'])(layer) else: layer = Highway(activation=config['activation'])(layer) if config['hedge'] == True: outs[j] = Dense(config['output_size'], activation='softmax', name=outs[j])(layer) if config['hedge'] == False: outs = Dense(config['output_size'], activation='softmax', name=outs)(layer) model = Model(input=inputs, output=outs) return (model, in_name, out_name)
def add_layer(model, width, activation="tanh"): model.add(Highway(bias=True)) #model.add(Dense(bias=True, output_dim=width)) model.add(Activation(activation)) return model
def naics_hw3_encoder(input_layer, output_dim): h1 = Highway(activation='relu')(input_layer) h2 = Highway(activation='relu')(h1) h3 = Highway(activation='relu')(h2) return Dense(output_dim, name='naics_linear_proj')(h3)
def PPI_model_builder(EMBEDDING_DIM, model_ind, MAX_SEQUENCE_LENGTH, WORD_EMBEDDINGS, SUB_ONTOLOGY_work, word_indeces, ACTIVATION_HIDDEN, ACTIVATION_HIGHWAY, ACTIVATION_OUTPUT, DROPOUT, OPTIMIZER, TRANSFER_LEARNING=False, PRE_TRAINED=True, UPDATABLE=True, PRINT_deepSimDEF_SUMMARY=False): EMBEDDINGS = {} INPUTS = [] DENSES = [] CHANNELS = [] CHANNELS2 = [] Dense1_weights = [] if TRANSFER_LEARNING: # load json and create model # json_file = open('model_repository/model_PPI_' + str(ind) + '.json', 'r') json_file = open('model_repository/model_0.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model # loaded_model.load_weights('model_repository/model_PPI_' + str(ind) + '.h5') loaded_model.load_weights('model_repository/model_0.h5') # Dense1_weights = loaded_model.get_layer('gene_product_dense').get_weights() print("Loaded model from disk") model = loaded_model model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=[fmeasure]) return model, 0 for i in range(2): for sbo in SUB_ONTOLOGY_work: protein_input = Input(shape=(MAX_SEQUENCE_LENGTH[sbo],), dtype='int32') INPUTS.append(protein_input) if sbo in EMBEDDINGS: embedding_layer = EMBEDDINGS[sbo] else: if PRE_TRAINED: # with using pre-trained word embedings file_reader = open(WORD_EMBEDDINGS[sbo]) word_embeddings = {} for line in file_reader: values = line.split() word = values[0] vector = np.asarray(values[1:], dtype='float32') word_embeddings[word] = vector file_reader.close() print 'Loaded', len(word_embeddings), 'word vectors for', sbo, '(Model ' + str(model_ind + 1) + ')' embedding_size = len(word_embeddings[np.random.choice(word_embeddings.keys())]) embedding_matrix = np.zeros((len(word_indeces[sbo]) + 1, embedding_size)) - 300.0 for word, i in word_indeces[sbo].items(): embedding_vector = word_embeddings.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector embedding_layer = Embedding(input_dim=len(word_indeces[sbo]) + 1, output_dim=embedding_size, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH[sbo], trainable=UPDATABLE) else: # without using pre-trained word embedings embedding_layer = Embedding(input_dim=len(word_indeces[sbo]) + 1, output_dim=EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH[sbo]) EMBEDDINGS[sbo] = embedding_layer # protein_input = Input(shape=(MAX_SEQUENCE_LENGTH[sbo],), dtype='int32') # INPUTS.append(protein_input) GO_term = embedding_layer(protein_input) Ch = MaxPooling1D(pool_size=MAX_SEQUENCE_LENGTH[sbo])(GO_term) Ch = Flatten()(Ch) CHANNELS.append(Ch) num_pair = 2 for i in range(num_pair): # for j in range(len(CHANNELS)/2): if len(SUB_ONTOLOGY_work) > 1: Mrg = Concatenate(axis=-1)(CHANNELS[i * len(SUB_ONTOLOGY_work):len(SUB_ONTOLOGY_work) * (i + 1)]) else: Mrg = CHANNELS[i] if len(DENSES) == 1: Dns = DENSES[0] else: Dns = Dense(units=EMBEDDING_DIM * len(SUB_ONTOLOGY_work), activation=ACTIVATION_HIDDEN) # Dns = Dense(units = EMBEDDING_DIM * len(SUB_ONTOLOGY_work), activation = ACTIVATION_HIDDEN, name='gene_product_dense',weights=Dense1_weights, trainable=UPDATABLE) DENSES.append(Dns) Ch = Dns(Mrg) DrpOut = Dropout(DROPOUT) Ch = DrpOut(Ch) CHANNELS2.append(Ch) merge = Concatenate(axis=-1)(CHANNELS2) merge = Highway(activation=ACTIVATION_HIGHWAY, name="highway_layer")(merge) merge = Dropout(DROPOUT)(merge) merge = Dense(units=EMBEDDING_DIM * len(SUB_ONTOLOGY_work), activation=ACTIVATION_HIDDEN)(merge) merge = Dropout(DROPOUT)(merge) preds = Dense(units=1, activation=ACTIVATION_OUTPUT)(merge) model = Model(inputs=INPUTS, outputs=preds) model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=[fmeasure]) if PRINT_deepSimDEF_SUMMARY: print model.summary() print "Model for Fold Number", model_ind + 1, "Instantiated!!\n" return model, EMBEDDINGS
def naics_lp_hw_encoder(input_layer, output_dim): l1 = Dense(output_dim, name='naics_linear_proj')(input_layer) h1 = Highway(activation='relu')(l1) return h1
def naics_hw_only_encoder(input_layer): return Highway(activation='relu')(input_layer)
def deep_neural_net_gru(train_data_1, train_data_2, train_labels, test_data_1, test_data_2, test_labels, max_len, len_chars, bidirectional, hidden_units, selfattention, maxpooling, alignment, shortcut, multiplerlu, onlyconcat, n): early_stop = EarlyStopping(monitor='loss', patience=0, verbose=1) checkpointer = ModelCheckpoint( filepath="/home/amarinho/data-amarinho/checkpoint" + str(n) + ".hdf5", verbose=1, save_best_only=True) gru1 = GRU(hidden_units, consume_less='gpu', return_sequences=True) gru2 = GRU(hidden_units, consume_less='gpu', return_sequences=(alignment or selfattention or maxpooling)) if bidirectional: gru1 = Bidirectional(gru1) gru2 = Bidirectional(gru2) # definition for left branch of the network left_branch = Sequential() left_branch.add(Masking(mask_value=0, input_shape=(max_len, len_chars))) if shortcut: left_branch_aux1 = Sequential() left_branch_aux1.add(left_branch) left_branch_aux1.add(gru1) left_branch_aux2 = Sequential() left_branch_aux2.add( Merge([left_branch, left_branch_aux1], mode='concat')) left_branch = left_branch_aux2 else: left_branch.add(gru1) left_branch.add(Dropout(0.01)) left_branch.add(gru2) left_branch.add(Dropout(0.01)) # definition for right branch of the network right_branch = Sequential() right_branch.add(Masking(mask_value=0, input_shape=(max_len, len_chars))) if shortcut: right_branch_aux1 = Sequential() right_branch_aux1.add(right_branch) right_branch_aux1.add(gru1) right_branch_aux2 = Sequential() right_branch_aux2.add( Merge([right_branch, right_branch_aux1], mode='concat')) right_branch = right_branch_aux2 else: right_branch.add(gru1) right_branch.add(Dropout(0.01)) right_branch.add(gru2) right_branch.add(Dropout(0.01)) # mechanisms used for building representations from the GRU states (e.g., through attention) if alignment: left_branch, right_branch = AlignmentAttention(left_branch, right_branch) if selfattention: att = SelfAttLayer() left_branch.add(att) right_branch.add(att) elif maxpooling: left_branch.add(GlobalMaxPooling1DMasked()) right_branch.add(GlobalMaxPooling1DMasked()) elif alignment: gru3 = GRU(hidden_units, consume_less='gpu', return_sequences=False) if bidirectional: gru3 = Bidirectional(gru3) left_branch.add(gru3) right_branch.add(gru3) # combine the two representations and produce the final classification con_layer = Sequential(name="con_layer") con_layer.add( Merge([left_branch, right_branch], mode='concat', name="merge_con")) mul_layer = Sequential(name="mul_layer") mul_layer.add( Merge([left_branch, right_branch], mode='mul', name="merge_mul")) dif_layer = Sequential(name="dif_layer") dif_layer.add( Merge([left_branch, right_branch], mode=lambda x: x[0] - x[1], output_shape=lambda x: x[0], name="merge_dif")) final_model = Sequential(name="final_model") if onlyconcat: final_model.add(con_layer) else: final_model.add( Merge([con_layer, mul_layer, dif_layer], mode='concat', name="merge_threeconcat")) final_model.add(Dropout(0.01)) final_model.add(Dense(hidden_units, activation='relu')) final_model.add(Dropout(0.01)) if multiplerlu: final_model.add(Highway(activation='relu')) final_model.add(Dropout(0.01)) final_model.add(Highway(activation='relu')) final_model.add(Dropout(0.01)) final_model.add(Dense(1, activation='sigmoid')) print('Compiling...') final_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print('Fitting...') final_model.fit([train_data_1, train_data_2], train_labels, verbose=0, validation_data=([test_data_1, test_data_2], test_labels), callbacks=[early_stop, checkpointer], nb_epoch=20) start_time = time.time() print("Evaluating ...") aux = final_model.predict_classes([test_data_1, test_data_2]).ravel() return aux, (time.time() - start_time)
def main(MODEL_FILE): print "Loading hdf5's..." test_dict = io.load('./data/test_dict_IPConv_ntuple_'+ RUN_NAME +'.h5') train_dict = io.load('./data/train_dict_IPConv_ntuple_'+ RUN_NAME +'.h5') X_train = train_dict['X'] y_train = train_dict['y'] X_test = test_dict['X'] y_test = test_dict['y'] n_features = X_test.shape[2] # this is a df ip3d = test_dict['ip3d'] print 'Building model...' if (MODEL_FILE == 'CRNN'): graph = build_graph(n_features) model = Sequential() model.add(graph) # remove Maxout for tensorflow model.add(MaxoutDense(64, 5, input_shape=graph.nodes['dropout'].output_shape[1:])) model.add(Dense(64)) elif (MODEL_FILE == 'RNN'): model = Sequential() model.add(Masking(mask_value=-999, input_shape=(N_TRACKS, n_features))) model.add(GRU(25))#, input_shape=(N_TRACKS, n_features))) #GRU model.add(Dropout(0.2)) #0.2 # remove Maxout for tensorflow model.add(MaxoutDense(64, 5)) #, input_shape=graph.nodes['dropout'].output_shape[1:])) model.add(Dense(64)) model.add(Dropout(0.4)) model.add(Highway(activation = 'relu')) model.add(Dropout(0.3)) model.add(Dense(4)) model.add(Activation('softmax')) print 'Compiling model...' model.compile('adam', 'categorical_crossentropy') model.summary() print 'Training:' try: model.fit(X_train, y_train, batch_size=512, callbacks = [ EarlyStopping(verbose=True, patience=20, monitor='val_loss'), ModelCheckpoint(MODEL_FILE + RUN_NAME +'-progress', monitor='val_loss', verbose=True, save_best_only=True) ], nb_epoch=100, validation_split = 0.2, show_accuracy=True) except KeyboardInterrupt: print 'Training ended early.' # -- load in best network model.load_weights(MODEL_FILE + RUN_NAME +'-progress') if (SAVE_PROTOBUF): print 'Saving protobuf' # write out to a new directory called models # the actual graph file is graph.pb # the graph def is in the global session import tensorflow as tf import keras.backend.tensorflow_backend as tfbe sess = tfbe._SESSION saver = tf.train.Saver() tf.train.write_graph(sess.graph_def, 'models/', 'graph.pb', as_text=False) save_path = saver.save(sess, "./model-weights.ckpt") print "Model saved in file: %s" % save_path print saver.as_saver_def().filename_tensor_name print saver.as_saver_def().restore_op_name print model.get_output() print 'Saving weights...' model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + RUN_NAME +'.h5', overwrite=True) json_string = model.to_json() open(MODEL_FILE + RUN_NAME +'.json', 'w').write(json_string) print 'Testing...' yhat = model.predict(X_test, verbose = True, batch_size = 512) io.save('yhat'+ RUN_NAME +'.h5', yhat) print 'Plotting ROC...' fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE) #plt.show() fg.savefig('./plots/roc' + MODEL_FILE + RUN_NAME +'.pdf')
def columbia_net(shape=(64, 64), nb_channels=1): logger.info('generating net with input shape ({})'.format(', '.join( str(s) for s in shape))) img_width, img_height = shape nb_poses = 5 nb_vertical = 3 nb_horiz = 7 face = Input(shape=(nb_channels, img_width, img_height)) left_eye = Input(shape=(nb_channels, img_width, img_height)) right_eye = Input(shape=(nb_channels, img_width, img_height)) face_model = Sequential() face_model.add(Flatten(input_shape=(nb_channels, img_width, img_height))) face_model.add(Dense(1024, activation='relu')) face_model.add(Dropout(0.25)) face_model.add(Highway(activation='relu')) face_model.add(Dropout(0.25)) face_model.add(Highway(activation='relu')) face_model.add(Dropout(0.25)) face_model.add(Highway(activation='relu')) face_model.add(Dense(512, activation='relu')) face_h = face_model(face) eye_model = Sequential() eye_model.add(Flatten(input_shape=(nb_channels, img_width, img_height))) eye_model.add(Dense(1024, activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Highway(activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Highway(activation='relu')) eye_model.add(Dropout(0.25)) eye_model.add(Highway(activation='relu')) eye_model.add(Dense(512, activation='relu')) # eye_model.add(Flatten()) left_eye_h = eye_model(left_eye) right_eye_h = eye_model(right_eye) # combined = merge([face_h, left_eye_h, right_eye_h], mode='concat', concat_axis=1) eyes = merge([left_eye_h, right_eye_h], mode='sum') combined = merge([face_h, eyes], mode='concat', concat_axis=1) h = Dense(128)(combined) h = Activation('relu')(h) h = Dropout(0.2)(h) out_pose = Dense(nb_poses, activation='softmax', name='pose')(h) h = Dense(128)(combined) h = Activation('relu')(h) h = Dropout(0.2)(h) out_vertical = Dense(nb_vertical, activation='softmax', name='vertical')(h) h = Dense(128)(combined) h = Activation('relu')(h) h = Dropout(0.2)(h) out_horiz = Dense(nb_horiz, activation='softmax', name='horizontal')(h) model = Model(input=[face, left_eye, right_eye], output=[out_pose, out_vertical, out_horiz]) logger.info('compiling with Adam and mse') model.compile('adam', 3 * ['sparse_categorical_crossentropy'], metrics=['acc']) return model
def LSTMCNN(opt): # opt.seq_length = number of time steps (words) in each batch # opt.rnn_size = dimensionality of hidden layers # opt.num_layers = number of layers # opt.dropout = dropout probability # opt.word_vocab_size = num words in the vocab # opt.word_vec_size = dimensionality of word embeddings # opt.char_vocab_size = num chars in the character vocab # opt.char_vec_size = dimensionality of char embeddings # opt.feature_maps = table of feature map sizes for each kernel width # opt.kernels = table of kernel widths # opt.length = max length of a word # opt.use_words = 1 if use word embeddings, otherwise not # opt.use_chars = 1 if use char embeddings, otherwise not # opt.highway_layers = number of highway layers to use, if any # opt.batch_size = number of sequences in each batch if opt.use_words: word = Input(batch_shape=(opt.batch_size, opt.seq_length), dtype='int32', name='word') word_vecs = Embedding(opt.word_vocab_size, opt.word_vec_size, input_length=opt.seq_length)(word) if opt.use_chars: chars = Input(batch_shape=(opt.batch_size, opt.seq_length, opt.max_word_l), dtype='int32', name='chars') chars_embedding = TimeDistributed( Embedding(opt.char_vocab_size, opt.char_vec_size, name='chars_embedding'))(chars) cnn = CNN(opt.seq_length, opt.max_word_l, opt.char_vec_size, opt.feature_maps, opt.kernels, chars_embedding) if opt.use_words: x = Merge(mode='concat')([cnn, word_vecs]) inputs = [chars, word] else: x = cnn inputs = chars else: x = word_vecs inputs = word if opt.batch_norm: x = BatchNormalization()(x) for l in range(opt.highway_layers): x = TimeDistributed(Highway(activation='relu'))(x) for l in range(opt.num_layers): x = LSTM(opt.rnn_size, activation='tanh', inner_activation='sigmoid', return_sequences=True, stateful=True)(x) if opt.dropout > 0: x = Dropout(opt.dropout)(x) output = TimeDistributed(Dense(opt.word_vocab_size, activation='softmax'))(x) model = sModel(input=inputs, output=output) print model.summary() optimizer = sSGD(lr=opt.learning_rate, clipnorm=opt.max_grad_norm, scale=float(opt.seq_length)) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer) return model
model.add(merged) # model.add(BatchNormalization()) # multiply passage by the dot product # add softmax here # model.add(Dropout(.5)) # model.add(Dense(100, activation='softmax')) # model.add(MaxPooling1D(pool_length=4, stride=None, border_mode='valid')) # model.add(Activation('relu')) # model.add(Permute((2, 1))) # model.add(AveragePooling1D(pool_length=5, stride=None, border_mode='valid')) # model.add(MaxPooling1D(pool_length=MAX_QUESTION_LENGTH/5, stride=None, border_mode='valid')) # model.add(Permute((2, 1))) model.add(Flatten()) model.add(Dropout(.2)) model.add(Highway()) # looks like this kind of worked model.add(Dropout(.2)) model.add(Dense(MAX_PASSAGE_LENGTH, activation='softmax')) plot(model, to_file='model.png', show_shapes=True) # train a 1D convnet with global maxpooling # adam = Adam(lr=.0001, clipnorm=10) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', 'recall']) # metrics=['recall']) # happy learning! # model.fit(x=[passages, questions], y=labels, nb_epoch=2, batch_size=128) model.fit([p_train, q_train],
(X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape(60000, img_channels, img_rows, img_cols) X_test = X_test.reshape(10000, img_channels, img_rows, img_cols) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) inp_img = Input(shape=( img_channels, img_rows, img_cols, )) x = Flatten()(inp_img) x = Dense(32, activation='relu')(x) for _ in range(nb_layer): x = Highway(activation='relu')(x) y = Dense(nb_classes, activation='softmax')(x) classifier = Model(input=inp_img, output=y) classifier.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) classifier.fit(X_train, Y_train, nb_epoch=100, batch_size=batch_size, verbose=1)
def build_GRU(dim, lsize, bsize, cost = 'mse'): def filt(args): return subsample(args[0]) * K.abs(subsample(args[1])) def subtract(args): return K.abs(args[0] - args[1]) def myfun(args): return K.abs(subsample(args[0])) def rect(args): return K.abs(args[0]) def flipsum(args): return args[0][:, ::-1, :] + args[1] def subsample(args): return args[:, 3:15, :] xin = Input(batch_shape = (bsize, lsize, dim)) # Encoding Part using bi-directional GRUs rnnAF = GRU(dim, init='glorot_normal', activation = 'tanh', inner_activation='hard_sigmoid', W_regularizer=None, U_regularizer=None, stateful = False, return_sequences = True, consume_less = 'gpu') (xin) mr = merge([xin, rnnAF], mode = 'sum') rnnBF = GRU(dim, init='glorot_normal', activation = 'tanh', inner_activation='hard_sigmoid', W_regularizer=None, U_regularizer=None, go_backwards = True, return_sequences = True, consume_less = 'mem') (xin) mrB = merge([xin, rnnBF], mode = flipsum, output_shape = (lsize, dim)) # The return of Bi-GRUs mrBDIR = merge([mr, mrB], mode = 'concat') # Decoding part rnnDsv = GRU(dim, init='glorot_normal', activation = 'tanh', inner_activation='hard_sigmoid', W_regularizer=None, U_regularizer=None, stateful = False, return_sequences = True, consume_less = 'gpu') (mrBDIR) svout = merge([xin, rnnDsv], output_shape = (lsize/2, dim), mode = filt) # Post filtering with sparsity constraint hC = TimeDistributed(Highway(input_dim = dim, activation='relu', activity_regularizer=activity_l2(1e-4))) (svout) model = Model(xin, [svout, hC, rnnDsv]) # Cost Functions def mseloss(ytrue, ypred): # Update of the true ytrue = xin * (K.pow(ytrue, 1.) + K.epsilon())/(K.pow(xin, 1.) + K.epsilon()) return K.sum(K.pow(ytrue-ypred,2.), axis = -1) def KL(ytrue, ypred): # Update of the true ytrue = subsample(xin) * (K.pow(ytrue, 1.) + K.epsilon())/(K.pow(subsample(xin), 1.) + K.epsilon()) + 1e-6 ypred += 1e-6 return K.sum(ytrue * (K.log(ytrue) - K.log(ypred)) + (ypred - ytrue), axis=-1) def KLbkg(ytrue, ypred): # Update of the true ytrue = subsample(xin) * K.abs(1. - ((K.pow(ytrue, 1.) + K.epsilon())/(K.pow(subsample(xin), 1.) + K.epsilon()))) + 1e-6 ypred += 1e-6 return K.sum(ytrue * (K.log(ytrue) - K.log(ypred)) + (ypred - ytrue), axis=-1) if cost == 'mse': print('MSE') model.compile(optimizer = opt, loss = [mseloss, mseloss]) elif cost == 'kl': print('Kullback-Leibler') model.compile(optimizer = opt, loss = [KL, KL]) elif cost == 'klbkg': print('Kullback-Leibler for Accompaniment Instrument') model.compile(optimizer = opt, loss = [KLbkg, KLbkg]) return model
model_left.add(Dense(5, input_dim=4, init='glorot_uniform')) model_left.add(BatchNormalization(mode=2)) model_left.add(Activation('relu')) model_left.add(Dense(5)) model_left.add(BatchNormalization(mode=2)) model_left.add(Activation('relu')) model_left.add(Dense(3)) model_left.add(Activation('relu')) model_left.add(Dense(4)) for i in range(0, 6): print(i, model_left.layers[i].name) model_right = Sequential() model_right.add(Dense(4, input_shape=(4, ))) model_right.add(Highway()) model_right.add(BatchNormalization(mode=2)) model2 = Sequential() model2.add(Merge([model_left, model_right], mode='concat')) model2.add(Activation('relu')) model2.add(Reshape((8, ))) model2.add(Dense(5)) model2.add(BatchNormalization(mode=2)) model2.add(Activation('relu')) model2.add(Dense(3)) model2.add(Activation('relu')) model2.add(Dense(4)) for i in range(0, 6): print(i, model2.layers[i].name)
def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0): logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.L1_reg = L1_reg self.L2_reg = L2_reg self.optimizer = 'adam' # fix random seed for reproducibility seed = 123 np.random.seed(seed=seed) # Model must have atleast one hidden layer assert self.n_layers > 0, 'Model must have at least one hidden layer' # Number of hidden layers and their types should be equal assert len(hidden_layer_size) == len(hidden_layer_type) ### Create model graph ### self.model = Sequential() self.model.add( Dense(output_dim=128, input_dim=n_in, init='glorot_uniform', activation='relu', W_regularizer=l1l2(l1=self.L1_reg, l2=self.L2_reg))) self.model.add(Dropout(self.dropout_rate)) num_layers = 15 for i in xrange(num_layers): self.model.add(Highway(activation='relu')) self.model.add(Dropout(self.dropout_rate)) #self.model.add(Dropout(dropout)) # add output layer if output_type.lower() == 'linear': self.final_layer = self.model.add( Dense(output_dim=n_out, input_dim=hidden_layer_size[-1], init='glorot_uniform', activation='linear', W_regularizer=l1l2(l1=self.L1_reg, l2=self.L2_reg))) elif output_type.lower() == 'sigmoid': self.final_layer = self.model.add( Dense(output_dim=n_out, input_dim=hidden_layer_size[-1], init='glorot_uniform', activation='sigmoid', W_regularizer=l1l2(l1=self.L1_reg, l2=self.L2_reg))) else: logger.critical( "This output activation function: %s is not supported right now!" % (output_type)) sys.exit(1) # Compile the model self.model.compile(loss='mse', optimizer=self.optimizer)
data=pd.DataFrame(np.concatenate((X_train2,Y_train),axis=1)) data2=shuffle(data) X_train2=np.array(data2.ix[:,0:3]) Y_train=np.array(pd.get_dummies(data2.ix[:,4])) sgd = SGD(lr=learning_rate,momentum=momentum, decay=decay_rate, nesterov=False) np.var(X_train2.T) model = Sequential() model.add(Dense(7, input_dim=4, init='glorot_uniform')) model.add(Activation('relu')) model.add(Dense(7, init='glorot_uniform')) model.add(Highway()) model.add(Dense(3, init='glorot_uniform')) model.add(Activation('sigmoid')) model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) model.fit(X_train2, Y_train, batch_size = 30, nb_epoch = 1000, verbose = 1,validation_split=0.9) res22 = model.predict_classes([X_train2,X_train2,X_train2],batch_size = 30) acc22=((res22-data2.ix[:,4])==0).sum()/len(res22) acc22
# Note, there is a bug in original implementation of TimeDistributed in keras input_tensor = tf.placeholder(tf.int32, shape=( opts.batch_size, opts.sequence_length)) a = Input(batch_shape=(opts.batch_size, opts.sequence_length), tensor=input_tensor, name='input') e = Embedding(opts.vocabulary_size, opts.embedding_size, input_length=opts.sequence_length, name='embedding')(a) x = e for i in range(1, opts.interaction_times + 1): x = PairwiseInteraction(gate_type=opts.gate_type, activation=opts.activation_type, dropout=opts.dropout, name='interaction layer %d' % i)(x) if opts.highway: x = TimeDistributed(Highway())(x) x = KMaxTensorPooling(opts.sequence_length, name='kmaxpooling layer %d' % i)(x) x = Lambda(lambda t: tf.reshape( t, [-1, opts.sequence_length * opts.embedding_size]))(x) e = Lambda(lambda t: tf.reshape( t, [-1, opts.sequence_length * opts.embedding_size]))(e) x = merge([x, e], mode='concat', concat_axis=1) x = Dense(128, activation='sigmoid')(x) x = Dense(32, activation='sigmoid')(x) x = Dense(1, activation='sigmoid', name='prob')(x) model = Model(input=a, output=x) model.compile(optimizer='nadam', loss='binary_crossentropy', metrics=['accuracy'])
def _generate_model(self, lembedding, num_classes=2, ngrams=[1,2,3,4,5], nfilters=64, rnn_type=GRU, rnn_dim=80, train_vectors=True): CHARACTERS_PER_WORD = lembedding.size_level1 WORDS_PER_DOCUMENT = lembedding.size_level2 EMBEDDING_DIM = lembedding.vector_box.vector_dim INPUT_SHAPE = (CHARACTERS_PER_WORD * WORDS_PER_DOCUMENT, ) EMBEDDING_SHAPE = (WORDS_PER_DOCUMENT, CHARACTERS_PER_WORD, EMBEDDING_DIM) doc = Input(shape=(INPUT_SHAPE[0], ), dtype='int32') embedded = Sequential([ Embedding( input_dim=lembedding.vector_box.size, output_dim=EMBEDDING_DIM, input_length=INPUT_SHAPE[0] ), Reshape(EMBEDDING_SHAPE) ])(doc) def sub_model(n): return Sequential([ Convolution1D(nfilters, n, activation='relu', input_shape=EMBEDDING_SHAPE[1:] ), Lambda( lambda x: K.max(x, axis=1), output_shape=(nfilters,) ) ]) rep = Dropout(0.5)( merge( [TimeDistributed(sub_model(n))(embedded) for n in ngrams], mode='concat', concat_axis=-1 ) ) out = Dropout(0.5)( merge( [rnn_type(rnn_dim)(rep), rnn_type(rnn_dim, go_backwards=True)(rep)], mode='concat', concat_axis=-1 ) ) mapping = [ Highway(activation='relu'), Dropout(0.5), Dense(64, activation='relu'), Dropout(0.4) ] for f in mapping: out = f(out) if num_classes == 2: out = Dense(1, activation='sigmoid')(out) model = Model(input=doc, output=out) if self.optimizer is None: self.optimizer = 'rmsprop' model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) else: out = Dense(num_classes, activation='softmax')(out) model = Model(input=doc, output=out) if self.optimizer is None: self.optimizer = 'adam' model.compile(loss='categorical_crossentropy', optimizer=self.optimizer, metrics=["accuracy"]) return model
return (x-np.min(x))/(np.max(x)-np.min(x)) part=8 thre=1 ## Certo é 256 recog=Sequential() recog.add(Dense(64,activation='relu',input_shape=(784,),init='glorot_uniform')) recog_left=recog recog_left.add(Dense(64,input_shape=(64,),activation='relu')) recog_right=recog recog_right.add(Dense(64,input_shape=(64,),activation='relu')) recog_right.add(Lambda(lambda x: x + K.exp(x / 2) * K.random_normal(shape=(1, 64), mean=0., std=epsilon_std), output_shape=(64,))) recog_right.add(Highway()) recog_right.add(Activation('sigmoid')) recog1=Sequential() recog1.add(Merge([recog_left,recog_right],mode = 'ave')) recog1.add(Dense(784)) #### HERE*** recog11=Sequential() layer=Dense(64,init='glorot_uniform',input_shape=(784,)) layer.trainable=False recog11.add(layer) layer2=Dense(784, activation='sigmoid',init='glorot_uniform') layer2.trainable=False recog11.add(layer2) recog11.layers[0].W.set_value(np.ones((784,64)).astype(np.float32))
def main(MODEL_FILE): test_dict = io.load('./data/test_dict_IPConv.h5') train_dict = io.load('./data/train_dict_IPConv.h5') X_train = train_dict['X'] y_train = train_dict['y'] n_features = X_train.shape[2] X_test = test_dict['X'] y_test = test_dict['y'] ip3d = test_dict['ip3d'] # this is a df print 'Building model...' if (MODEL_FILE == 'CRNN'): graph = build_graph(n_features) model = Sequential() model.add(graph) model.add(Dense(64)) elif (MODEL_FILE == 'RNN'): graph = build_graph_noCNN(n_features) model = Sequential() model.add(graph) model.add(Dense(64)) model.add(Dropout(0.4)) model.add(Highway(activation='relu')) model.add(Dropout(0.4)) #3 model.add(Dense(4)) model.add(Activation('softmax')) print 'Compiling model...' model.compile('adam', 'categorical_crossentropy') model.summary() print 'Training:' try: model.fit(X_train, y_train, batch_size=512, callbacks=[ EarlyStopping(verbose=True, patience=20, monitor='val_loss'), ModelCheckpoint(MODEL_FILE + '-progress', monitor='val_loss', verbose=True, save_best_only=True) ], nb_epoch=200, validation_split=0.2, show_accuracy=True) except KeyboardInterrupt: print 'Training ended early.' # -- load in best network model.load_weights(MODEL_FILE + '-progress') print 'Saving weights...' model.save_weights('./weights/ip3d-replacement_' + MODEL_FILE + '.h5', overwrite=True) print 'Testing...' yhat = model.predict(X_test, verbose=True, batch_size=512) print 'Plotting ROC...' fg = plot_ROC(y_test, yhat, ip3d, MODEL_FILE) #plt.show() fg.savefig('./plots/roc_' + MODEL_FILE + '.pdf')