def build_model(): model = None if embedding_layer: input = Input(shape=(None, )) else: input = Input(shape=(None, n_words)) profiles_input = Input(shape=(None, X_aug_train.shape[2])) # Defining an embedding layer mapping from the words (n_words) to a vector of len 250 if embedding_layer: x1 = Embedding(input_dim=n_words, output_dim=250, input_length=None)(input) else: x1 = input x1 = concatenate([x1, profiles_input]) x1 = Dense(1200, activation="relu")(x1) x1 = Dropout(0.5)(x1) # Defining a bidirectional GRU using the embedded representation of the inputs x1 = Bidirectional(CuDNNGRU(units=500, return_sequences=True))(x1) x1 = Bidirectional(CuDNNGRU(units=100, return_sequences=True))(x1) if embedding_layer: x2 = Embedding(input_dim=n_words, output_dim=125, input_length=None)(input) else: x2 = input x2 = concatenate([x2, profiles_input]) # Defining a bidirectional GRU using the embedded representation of the inputs x2 = Bidirectional(CuDNNGRU(units=500, return_sequences=True))(x2) x2 = Bidirectional(CuDNNGRU(units=100, return_sequences=True))(x2) COMBO_MOVE = concatenate([x1, x2]) w = Dense(500, activation="relu")(COMBO_MOVE) # try 500 w = Dropout(0.4)(w) w = tcn.TCN(return_sequences=True)(w) y = TimeDistributed(Dense(n_tags, activation="softmax"))(w) # Defining the model as a whole and printing the summary model = Model([input, profiles_input], y) # model.summary() # Setting up the model with categorical x-entropy loss and the custom accuracy function as accuracy adamOptimizer = Adam(lr=0.001, beta_1=0.8, beta_2=0.8, epsilon=None, decay=0.0001, amsgrad=False) model.compile(optimizer=adamOptimizer, loss=nll1, metrics=["accuracy", accuracy]) return model
def build_model(): model = None input = Input(shape=( MAXLEN_SEQ, NB_AS, )) if hmm: profiles_input = Input(shape=( MAXLEN_SEQ, NB_FEATURES, )) x1 = concatenate([input, profiles_input]) x2 = concatenate([input, profiles_input]) inp = [input, profiles_input] else: x1 = input x2 = input inp = input x1 = Dense(1200, activation="relu")(x1) x1 = Dropout(0.5)(x1) # x1 = Bidirectional(CuDNNGRU(units=100, return_sequences=True))(x1) # Defining a bidirectional LSTM using the embedded representation of the inputs x2 = Bidirectional(CuDNNGRU(units=500, return_sequences=True))(x2) # x2 = Dropout(0.5)(x2) x2 = Bidirectional(CuDNNGRU(units=100, return_sequences=True))(x2) # x2 = Dropout(0.5)(x2) COMBO_MOVE = concatenate([x1, x2]) w = Dense(500, activation="relu")(COMBO_MOVE) # try 500 w = Dropout(0.4)(w) w = tcn.TCN(return_sequences=True)(w) y = TimeDistributed(Dense(NB_CLASSES_Q8, activation="softmax"))(w) # Defining the model as a whole and printing the summary model = Model(inp, y) # model.summary() # Setting up the model with categorical x-entropy loss and the custom accuracy function as accuracy adamOptimizer = Adam(lr=0.001, beta_1=0.8, beta_2=0.8, epsilon=None, decay=0.0001, amsgrad=False) model.compile(optimizer=adamOptimizer, loss="categorical_crossentropy", metrics=["accuracy", accuracy]) return model
def createTCNModel (self, embedding_matrix=[],useTrainedEmbedding=False) : print ("Creating Keras model..\n") i = Input(batch_shape=(None, self.max_sentence_length)) #If using preTrained embeddings like Glove or Word2Vec, use embeddings as weight #Else learn weights if useTrainedEmbedding: e = Embedding(self.vocab_size, self.embeddingDim, weights=[embedding_matrix], \ input_length=self.max_sentence_length, trainable=False)(i) else : e = Embedding(self.vocab_size, self.embeddingDim, \ input_length=self.max_sentence_length, trainable=True)(i) x = tcn.TCN(e, nb_filters=64, nb_stacks= 2, kernel_size=2, \ activation='tanh',dilations=[1,2,4,8,16,32,64], return_sequences=False) #Add ANN layer with dropout x = Dense(units=20,activation='relu')(x) x = Dropout(0.5)(x) if (self.num_classes > 2 or self.num_classes == 1) : output_dim = self.num_classes elif self.num_classes ==2 : output_dim = self.num_classes - 1 else : print ("Invalid number of classes = ",self.num_classes) exit #Final output layer #Use softmax for multiclass and sigmoid for binary classification if (self.num_classes > 2) : x = Dense(output_dim, activation='softmax')(x) else : x = Dense(output_dim, activation='sigmoid')(x) self.model = Model(inputs=[i], outputs=[x]) # compile the model self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) # summarize the model print(self.model.summary())
def tcnn_model(dataset): semantic_dropout = 0.1 lstm_in_dropout = 0.1 final_dropout = 0.1 # Sentence Embeddings vocab_size = len(dataset.word2index) inputs1 = Input(shape=(4096, ), name='f_input') # means BATCHx4096 fe1 = Dropout(0.25, name='fe1')(inputs1) fe2 = Dense(BEFORE_CONCAT, name='fe2', activation='relu')(fe1) # sequence model. # Note that shape is of the sample (ignoring batch d). (50, 4096) (50, 62) (50, 62, 3626) inputs2 = Input(shape=(None, ), name='s_input') # TPDP: remove dim here!!! se1 = Embedding(vocab_size, EMBEDDING_D, mask_zero=False, name='se1')(inputs2) # why mask_Zero=True print("false to masking") # se2 = Dropout(lstm_in_dropout,name='se2')(se1) # benchmark adapted from Penn treebank k=3. n=4. hidden=600 dropout=0.5 se = tcn.TCN(input_layer=se1, nb_filters=BEFORE_CONCAT, kernel_size=3, nb_stacks=2, dilations=[1, 2, 4, 8], dropout_rate=0.2, return_sequences=True) # se3 = TimeDistributed(Dense(BEFORE_CONCAT,name='se4'))(se2) msk1 = keras.layers.Masking(mask_value=0.0)(se1) msk1 = keras.layers.Lambda(lambda x: x * 0.0)(msk1) # decoder model decoder1 = add([fe2, se, msk1], name='concat/add') decoder2 = TimeDistributed( Dense(BEFORE_SOFTMAX_D, activation='relu', name='decoder2'))(decoder1) decoder2 = TimeDistributed(Dropout(final_dropout, name='final_dropout'))(decoder2) outputs = TimeDistributed( Dense(vocab_size, name='outputs', activation='softmax'))(decoder2) # activation='softmax # tie it together [image, seq] [wordo model = Model(inputs=[inputs1, inputs2], outputs=outputs) model.summary() return model
def build_model_ho_3(params): print(params) input = Input(shape=( X_train_aug[0].shape[1], X_train_aug[0].shape[2], )) profiles_input = Input(shape=( X_train_aug[1].shape[1], X_train_aug[1].shape[2], )) x1 = concatenate([input, profiles_input]) x2 = concatenate([input, profiles_input]) x1 = Dense(params['dense1'], activation="relu")(x1) x1 = Dropout(params['dropout1'])(x1) x2 = Bidirectional(CuDNNGRU(units=params['gru1'], return_sequences=True))(x2) if params['gru2']: x2 = Bidirectional( CuDNNGRU(units=params['gru2']['gru2_units'], return_sequences=True))(x2) if params['gru2'] and params['gru2']['gru3']: x2 = Bidirectional( CuDNNGRU(units=params['gru2']['gru3']['gru3_units'], return_sequences=True))(x2) COMBO_MOVE = concatenate([x1, x2]) w = Dense(params['dense2'], activation="relu")(COMBO_MOVE) w = Dropout(params['dropout2'])(w) w = tcn.TCN(return_sequences=True)(w) y = TimeDistributed(Dense(8, activation="softmax"))(w) model = Model([input, profiles_input], y) adamOptimizer = Adam(lr=params['lr'], beta_1=0.8, beta_2=0.8, epsilon=None, decay=params['decay'], amsgrad=False) model.compile(optimizer=adamOptimizer, loss="categorical_crossentropy", metrics=["accuracy", accuracy, matthews_correlation]) earlyStopping = EarlyStopping(monitor='val_accuracy', patience=3, verbose=1, mode='max') checkpointer = ModelCheckpoint(filepath=load_file, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') model.fit(X_train_aug, y_train, validation_data=(X_val_aug, y_val), epochs=20, batch_size=params['batch_size'], callbacks=[checkpointer, earlyStopping], verbose=1, shuffle=True) model.load_weights(load_file) score = model.evaluate(X_test_aug, y_test) K.clear_session() result = {'loss': -score[2], 'status': STATUS_OK} return result
def build_model(hype_space): """Create model according to the hyperparameter space given.""" print("Hyperspace:") print(hype_space) #use different inputs according to choice of hyperparameter 'input', 'use profiles' input_onehot = Input(shape=(None, n_words)) input_seqs = Input(shape=(None, )) input_elmo = Input(shape=(None, 1024)) input_pssm = Input(shape=(None, 22)) input_hmm = Input(shape=(None, 30)) inp = [input_onehot, input_seqs, input_elmo, input_pssm, input_hmm] x0 = None if hype_space['input'] == 'onehot': x0 = input_onehot print('Onehot shape:', x0._keras_shape) if hype_space['input'] == 'seqs': # have to use embedding if hype_space['embedding']: x0 = input_elmo else: x0 = Embedding(input_dim=n_words, output_dim=int(hype_space['dense_output']), input_length=None)(input_seqs) print('Seqs shape after embedding:', x0._keras_shape) if hype_space['input'] == 'both': # elmo einfügen if hype_space['embedding']: x_seq = input_elmo else: x_seq = Embedding(input_dim=n_words, output_dim=int(hype_space['dense_output']), input_length=None)(input_seqs) print('Seqs shape after embedding:', x_seq._keras_shape) print('Onehot shape:', input_onehot._keras_shape) x0 = concatenate([input_onehot, x_seq]) print('Both concac input shape:', x0._keras_shape) if hype_space['use_profiles'] is not None: if hype_space['use_profiles'] == 'pssm': print('Use pssm profiles.') x0 = concatenate([x0, input_pssm]) if hype_space['use_profiles'] == 'hmm': print('Use hmm profiles.') x0 = concatenate([x0, input_hmm]) if hype_space['use_profiles'] == 'both': print('Use both profiles.') x0 = concatenate([x0, input_pssm, input_hmm]) # NN starts here: if hype_space['tcn_position'] == 'first': print('TCN first.') x0 = tcn.TCN(return_sequences=True)(x0) print('First layer is', hype_space['first_layer']['type']) x1 = x0 if hype_space['first_layer']['type'] == 'LSTM': for i in range(hype_space['first_layer']['lstm_nb']): i = i + 1 print(i) x1 = Bidirectional( CuDNNLSTM(units=int(hype_space['first_layer']['lstm_units'] / i), return_sequences=True))(x1) x1 = Dropout(int(hype_space['dropout']))(x1) x2 = x1 x1 = x0 if hype_space['first_layer']['type'] == 'GRU': x1 = Bidirectional( CuDNNGRU(units=int(hype_space['first_layer']['gru1'] * 100), return_sequences=True))(x1) if hype_space['first_layer']['gru2']: x1 = Bidirectional( CuDNNGRU(units=int( hype_space['first_layer']['gru2']['gru2_units'] * 100), return_sequences=True))(x1) if hype_space['first_layer']['gru2'] and hype_space['first_layer'][ 'gru2']['gru3']: x1 = Bidirectional( CuDNNGRU(units=int( hype_space['first_layer']['gru2']['gru3']['gru3_units'] * 100), return_sequences=True))(x1) x2 = x1 x1 = x0 COMBO_MOVE = concatenate([x0, x2]) x0 = COMBO_MOVE if hype_space['second_layer']: print('Second layer is', hype_space['second_layer']['type']) x1 = x0 if hype_space['second_layer']['type'] == 'LSTM': for i in range(hype_space['second_layer']['lstm_nb_2']): i = i + 1 print(i) x1 = Bidirectional( CuDNNLSTM(units=int( hype_space['second_layer']['lstm_units_2'] / i), return_sequences=True))(x1) x1 = Dropout(int(hype_space['dropout']))(x1) x2 = x1 x1 = x0 if hype_space['second_layer']['type'] == 'GRU': x1 = Bidirectional( CuDNNGRU(units=int(hype_space['second_layer']['gru1_2'] * 100), return_sequences=True))(x1) if hype_space['second_layer']['gru2_2']: x1 = Bidirectional( CuDNNGRU(units=int( hype_space['second_layer']['gru2_2']['gru2_units_2'] * 100), return_sequences=True))(x1) if hype_space['second_layer']['gru2_2'] and hype_space[ 'second_layer']['gru2_2']['gru3_2']: x1 = Bidirectional( CuDNNGRU(units=int(hype_space['second_layer']['gru2_2'] ['gru3_2']['gru3_units_2'] * 100), return_sequences=True))(x1) x2 = x1 COMBO_MOVE = concatenate([x0, x2]) ''' current_layer = input if hype_space['first_conv'] is not None: k = hype_space['first_conv'] current_layer = keras.layers.convolutional.Conv1D( filters=16, kernel_size=k, strides=1, padding='same', activation=hype_space['activation'], kernel_regularizer=keras.regularizers.l2( STARTING_L2_REG * hype_space['l2_weight_reg_mult']) )(current_layer) # Core loop that stacks multiple conv+pool layers, with maybe some # residual connections and other fluffs: n_filters = int(40 * hype_space['conv_hiddn_units_mult']) for i in range(hype_space['nb_conv_pool_layers']): print(i) print(n_filters) print(current_layer._keras_shape) current_layer = convolution(current_layer, n_filters, hype_space) if hype_space['use_BN']: current_layer = bn(current_layer) print(current_layer._keras_shape) n_filters *= 2 # Fully Connected (FC) part: current_layer = TimeDistributed(Dense( units=int(1000 * hype_space['fc_units_1_mult']), activation=hype_space['activation'], kernel_regularizer=keras.regularizers.l2( STARTING_L2_REG * hype_space['l2_weight_reg_mult']) ))(current_layer) print(current_layer._keras_shape) current_layer = dropout( current_layer, hype_space, for_convolution_else_fc=False) if hype_space['one_more_fc'] is not None: current_layer = TimeDistributed(Dense( units=int(750 * hype_space['one_more_fc']), activation=hype_space['activation'], kernel_regularizer=keras.regularizers.l2( STARTING_L2_REG * hype_space['l2_weight_reg_mult']) ))(current_layer) print(current_layer._keras_shape) current_layer = dropout( current_layer, hype_space, for_convolution_else_fc=False) y = TimeDistributed(Dense( units=NB_CLASSES_Q8, activation="softmax", kernel_regularizer=keras.regularizers.l2( STARTING_L2_REG * hype_space['l2_weight_reg_mult']), name='y' ))(current_layer) print(y._keras_shape) # Finalize model: inp = [input, profiles_input] model = Model(inp, y) model.compile( optimizer=OPTIMIZER_STR_TO_CLASS[hype_space['optimizer']]( #lr=0.001 * hype_space['lr_rate_mult'] ), loss=LOSS_STR_TO_CLASS[hype_space['loss']], metrics=[accuracy] #noch andere dazu ) ''' w = Dense(int(hype_space['dense_output']) * 2, activation="relu")(COMBO_MOVE) # try 500 w = Dropout(int(hype_space['dropout']))(w) if hype_space['tcn_position'] == 'last': print('TCN last.') w = tcn.TCN(return_sequences=True)(w) y = TimeDistributed(Dense(n_tags, activation="softmax"))(w) # Defining the model as a whole and printing the summary model = Model(inp, y) # model.summary() # Setting up the model with categorical x-entropy loss and the custom accuracy function as accuracy #adamOptimizer = Adam(lr=0.001, beta_1=0.8, beta_2=0.8, epsilon=None, decay=0.0001, amsgrad=False) model.compile( optimizer=OPTIMIZER_STR_TO_CLASS[hype_space['optimizer']]( #lr=0.001 * hype_space['lr_rate_mult'] ), loss=LOSS_STR_TO_CLASS[hype_space['loss']], metrics=[ accuracy, #weighted_accuracy, kullback_leibler_divergence ]) return model