def trainNN(X_train, X_test, y_train, y_test, w_train, w_test, netDim, epochs, batchSize, dropout, optimizer, activation, initializer, regularizer, classWeight='SumOfWeights', learningRate=0.01, decay=0.0, momentum=0.0, nesterov=False, multiclass=False, reproduce=False): print "Performing a Deep Neural Net!" if reproduce: print 'Constant seed is activated, TensorFlow is forced to use single thread' import tensorflow as tf import random as rn from keras import backend as K # The below is necessary in Python 3.2.3 onwards to # have reproducible behavior for certain hash-based operations. # See these references for further details: # https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED # https://github.com/keras-team/keras/issues/2280#issuecomment-306959926 os.environ['PYTHONHASHSEED'] = '0' # The below is necessary for starting Numpy generated random numbers # in a well-defined initial state. np.random.seed(42) # The below is necessary for starting core Python generated random numbers # in a well-defined state. rn.seed(12345) # Force TensorFlow to use single thread. # Multiple threads are a potential source of # non-reproducible results. # For further details, see: https://stackoverflow.com/questions/42022950/which-seeds-have-to-be-set-where-to-realize-100-reproducibility-of-training-res session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) # The below tf.set_random_seed() will make random number generation # in the TensorFlow backend have a well-defined initial state. # For further details, see: https://www.tensorflow.org/api_docs/python/tf/set_random_seed tf.set_random_seed(1234) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess) #from https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development classes = len(np.bincount(y_train.astype(int))) if classWeight.lower() == 'balanced': print 'Using method balanced for class weights' w = compute_class_weight('balanced', np.unique(y_train), y_train) class_weight = {i: w[i] for i in range(len(w))} elif classWeight.lower() == 'sumofweights': print 'Using method SumOfWeights for class weights' sumofweights = w_train.sum() w = sumofweights / (classes * np.bincount(y_train.astype(int))) class_weight = {i: w[i] for i in range(len(w))} else: print 'Using no class weights' class_weight = None #create 'one-hot' vector for y #y_train = np_utils.to_categorical(y_train, classes) #y_test = np_utils.to_categorical(y_test, classes) model = Sequential() first = True if first: model.add( Dense(netDim[0], input_dim=X_train.shape[1], activation=activation, kernel_initializer=initializer)) if activation.lower() == 'linear': model.add(LeakyReLU(alpha=0.1)) #model.add(BatchNormalization()) first = False for layer in netDim[1:len(netDim)]: model.add( Dense(layer, activation=activation, kernel_initializer=initializer, kernel_regularizer=l1(regularizer))) if activation.lower() == 'linear': model.add(LeakyReLU(alpha=0.1)) #model.add(Dropout(dropout)) #model.add(BatchNormalization()) if multiclass: model.add(Dense(classes, activation='softmax')) loss = 'sparse_categorical_crossentropy' else: model.add(Dense(2, activation='softmax')) loss = 'sparse_categorical_crossentropy' # Set loss and optimizer if optimizer.lower() == 'sgd': print 'Going to use stochastic gradient descent method for learning!' optimizer = SGD(lr=learningRate, decay=decay, momentum=momentum, nesterov=True) elif optimizer.lower() == 'adam': print 'Going to use Adam optimizer for learning!' optimizer = Adam(lr=learningRate, decay=decay) else: print 'Going to use %s as optimizer. Learning rate, decay and momentum will not be used during training!' % ( optimizer) model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) print model.summary() print "Training..." try: #history = model.fit(X_train, y_train, epochs=epochs, batch_size=batchSize, shuffle=True, class_weight={i:class_weight[i] for i in range(len(class_weight))}, validation_data=(X_test,y_test), callbacks = [EarlyStopping(verbose=True, patience=20)]) #history = model.fit(X_train, y_train, epochs=epochs, batch_size=batchSize, shuffle=True, #class_weight={i:class_weight[i] for i in range(len(class_weight))}, #sample_weight=w_train, validation_data=(X_test,y_test,w_test), #callbacks = [EarlyStopping(verbose=True, patience=10, monitor='val_acc')]) history = model.fit(X_train, y_train, epochs=epochs, batch_size=batchSize, shuffle=True, class_weight=class_weight, sample_weight=None, validation_data=(X_test, y_test, None), callbacks=[ EarlyStopping(verbose=True, patience=5, monitor='loss') ]) except KeyboardInterrupt: print '--- Training ended early ---' print 'Testing...' score = model.evaluate(X_test, y_test, sample_weight=None) print("\n%s: %.2f%%" % (model.metrics_names[0], score[0] * 100)) print("\n%s: %.2f%%" % (model.metrics_names[1], score[1] * 100)) y_predicted = model.predict(X_test) print "DNN finished!" return model, history, y_predicted
def trainRNN(X_train, X_test, y_train, y_test, w_train, w_test, sequence, collection, unit_type, n_units, combinedDim, epochs, batchSize, dropout, optimizer, activation, initializer, regularizer, learningRate=0.01, decay=0.0, momentum=0.0, nesterov=False, mergeModels=False, multiclass=False, classWeight='SumOfWeihts'): print "Performing a Deep Recurrent Neural Net!" if type(sequence) == list: for seq in sequence: print 'Prepare channel for {} collection...'.format(seq['name']) SHAPE = seq['X_train'].shape[1:] seq['input'] = Input(SHAPE) seq['channel'] = Masking(mask_value=-999, name=seq['name'] + '_masking')( seq['input']) if unit_type.lower() == 'lstm': seq['channel'] = LSTM(n_units, name=seq['name'] + '_lstm')( seq['channel']) if unit_type.lower() == 'gru': seq['channel'] = GRU(n_units, name=seq['name'] + '_gru')(seq['channel']) #seq['channel'] = Dropout(dropout, name=seq['name']+'_dropout')(seq['channel']) #if mergeModels: # print 'Going to merge sequence model with common NN!' # model_inputs = Input(shape=(X_train.shape[1], )) # layer = Dense(n_units, activation=activation, kernel_initializer=initializer)(model_inputs) # if activation.lower() == 'linear': # layer = LeakyReLU(alpha=0.1)(layer) # layer = BatchNormalization()(layer) # #layer = Dropout(dropout)(layer) if mergeModels: print 'Going to merge sequence model with common NN!' model_inputs = Input(shape=(X_train.shape[1], )) combined = concatenate([c['channel'] for c in sequence] + [model_inputs]) else: if len(sequence) > 1: combined = concatenate([c['channel'] for c in sequence]) else: combined = sequence[0]['channel'] for l in combinedDim: combined = Dense(l, activation=activation, kernel_initializer=initializer, kernel_regularizer=l2(regularizer))(combined) if activation.lower() == 'linear': combined = LeakyReLU(alpha=0.1)(combined) combined = BatchNormalization()(combined) #combined = Dropout(dropout)(combined) if multiclass: combined_output = Dense(len(np.bincount(y_train.astype(int))), activation='softmax')(combined) loss = 'sparse_categorical_crossentropy' else: combined_output = Dense(2, activation='softmax')(combined) loss = 'sparse_categorical_crossentropy' if mergeModels: combined_rnn = Model(inputs=[seq['input'] for seq in sequence] + [model_inputs], outputs=combined_output) else: if len(sequence) > 1: combined_rnn = Model(inputs=[seq['input'] for seq in sequence], outputs=combined_output) else: combined_rnn = Model(inputs=sequence[0]['input'], outputs=combined_output) combined_rnn.summary() # Set loss and optimizer if optimizer.lower() == 'sgd': print 'Going to use stochastic gradient descent method for learning!' optimizer = SGD(lr=learningRate, decay=decay, momentum=momentum, nesterov=True) elif optimizer.lower() == 'adam': print 'Going to use Adam optimizer for learning!' optimizer = Adam(lr=learningRate, decay=decay) else: print 'Going to use %s as optimizer. Learning rate, decay and momentum will not be used during training!' % ( optimizer) combined_rnn.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) print 'Training...' classes = len(np.bincount(y_train.astype(int))) if classWeight.lower() == 'balanced': print 'Using method balanced for class weights' w = compute_class_weight('balanced', np.unique(y_train), y_train) class_weight = {i: w[i] for i in range(len(w))} elif classWeight.lower() == 'sumofweights': print 'Using method SumOfWeights for class weights' sumofweights = w_train.sum() w = sumofweights / (classes * np.bincount(y_train.astype(int))) class_weight = {i: w[i] for i in range(len(w))} else: print 'Using no class weights' class_weight = None try: if mergeModels: history = combined_rnn.fit( [seq['X_train'] for seq in sequence] + [X_train], y_train, class_weight=class_weight, epochs=epochs, batch_size=batchSize, validation_data=([seq['X_test'] for seq in sequence] + [X_test], y_test, None), callbacks=[ EarlyStopping(verbose=True, patience=5, monitor='loss') ]) #ModelCheckpoint('./models/combinedrnn_tutorial-progress', monitor='val_loss', verbose=True, save_best_only=True) else: history = combined_rnn.fit([seq['X_train'] for seq in sequence], y_train, class_weight=class_weight, epochs=epochs, batch_size=batchSize, callbacks=[ EarlyStopping(verbose=True, patience=5, monitor='loss') ]) except KeyboardInterrupt: print 'Training ended early.' print 'Testing...' if mergeModels: score = combined_rnn.evaluate([seq['X_test'] for seq in sequence] + [X_test], y_test, batch_size=batchSize) y_predicted = combined_rnn.predict([seq['X_test'] for seq in sequence] + [X_test], batch_size=batchSize) else: if len(seq) > 1: score = combined_rnn.evaluate([seq['X_test'] for seq in sequence], y_test) y_predicted = combined_rnn.predict( [seq['X_test'] for seq in sequence], batch_size=batchSize) else: score = combined_rnn.evaluate(sequence[0]['X_test'], y_test) y_predicted = combined_rnn.predict(sequence[0]['X_test'], batch_size=batchSize) #print("\n%s: %.2f%%" % (combined_rnn.metrics_names[0], score[0]*100)) #print("\n%s: %.2f%%" % (combined_rnn.metrics_names[1], score[1]*100)) print "RNN finished!" return combined_rnn, history, y_predicted