def fit(self, X_train, y_train): def save_graph(self, sess): if not os.path.exists(self.save_folder): os.makedirs(self.save_folder) if not os.path.exists(self.save_folder + str(self.random_file_ext_) + "/"): os.makedirs(self.save_folder + str(self.random_file_ext_) + "/") permanent_saver = tf.train.Saver() permanent_saver.save(sess, self.save_folder + str(self.random_file_ext_) + "/" + "model") def convert_data_to_one_hot(y_train): #y_test_temp = np.zeros((y_test.size, y_test.max() + 1), dtype=np.int) #y_test_temp[np.arange(y_test.size), y_test] = 1 # Other option: # y_train is a tensor then because of one_hot, but feed_dict only accepts numpy arrays => replace y_train with sess.run(y_train) # http://stackoverflow.com/questions/34410654/tensorflow-valueerror-setting-an-array-element-with-a-sequence # return tf.one_hot(y_train, 4), tf.one_hot(y_test, 4) y_train_temp = np.zeros((y_train.size, y_train.max() + 1), dtype=np.int) y_train_temp[np.arange(y_train.size), y_train] = 1 return y_train_temp y_train_conv = convert_data_to_one_hot(y_train) self.graph = tf.Graph() with self.graph.as_default(): self.prediction, self.prob = self.neural_network_model( X_train) if self.use_class_weights == True:#https://stackoverflow.com/questions/35155655/loss-function-for-class-imbalanced-binary-classifier-in-tensor-flow#answer-38912982 class_weights = calculate_class_weight(y_train) # doesnt work really well class_weight_mod = tf.constant( [ [class_weights[0], class_weights[1]] ]) weight_per_label = tf.transpose(tf.matmul(self.y, tf.transpose(class_weight_mod))) xent = tf.multiply(weight_per_label , tf.nn.softmax_cross_entropy_with_logits(logits=self.prediction, labels=self.y)) else: xent = tf.nn.softmax_cross_entropy_with_logits(logits=self.prediction, labels=self.y) cost = tf.reduce_mean(xent) # simple explanations to some optimizers: # http://stackoverflow.com/questions/33919948/how-to-set-adaptive-learning-rate-for-gradientdescentoptimizer # http://cs231n.github.io/neural-networks-3/ # http://sebastianruder.com/optimizing-gradient-descent/index.html#adam # Parameters: http://tflearn.org/optimizers if self.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) elif self.optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) elif self.optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) elif self.optimizer == 'graddesc': optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) elif self.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate_tensor, momentum=self.momentum).minimize(cost) elif self.optimizer == 'nesterov_momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate_tensor, momentum=self.momentum, use_nesterov=True).minimize(cost) elif self.optimizer == 'proxada': optimizer = tf.train.ProximalAdagradOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) elif self.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) else: optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate_tensor).minimize(cost) with tf.Session(graph=self.graph, config=self.config) as sess: sess.run(tf.global_variables_initializer()) momentum_start = 0.5 momentum_end = 0.99 calc_learning_rate = self.learning_rate for epoch in range(self.hm_epochs): epoch_loss = 0 # increase momentum steadily i = 0 calc_momentum = momentum_start + ( float((momentum_end - momentum_start) / self.hm_epochs) * epoch) # increase momentum with epochs if self.step_decay_LR == True and (epoch == 20 or epoch == 35 or epoch == 45) and epoch > 0: calc_learning_rate = float(calc_learning_rate / 10.0) while i < len(X_train): start = i end = i + self.batch_size batch_x = np.array(X_train[start:end]) batch_y = np.array(y_train_conv[start:end]) _, c = sess.run([optimizer, cost], feed_dict={self.x: batch_x, self.y: batch_y, self.keep_prob: self.keep_prob_const, # self.learning_rate: CHANGE EVERY FEW EPOCHS, self.momentum: calc_momentum, self.learning_rate_tensor: calc_learning_rate }) epoch_loss += c i += self.batch_size self.learning_rate_output += str(epoch_loss) + "\n" print('Epoch', epoch + 1, 'completed out of', self.hm_epochs, 'loss:', epoch_loss, 'LR=', calc_learning_rate) # get second weight matrix # test = sess.run(self.graph.get_tensor_by_name("weight1:0")) # print(test) # save the graph permanently save_graph(self, sess) # correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) # accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) # print('Accuracy by TensorFlow:', accuracy.eval({x: X_test, y: y_test, keep_prob: keep_prob_const})) # value = sess.run(tf.argmax(self.prediction, 1), feed_dict={self.x: X_test, self.keep_prob: self.keep_prob_const}) # return value return self
def fit(self, X_train, y_train, X_test, y_test, test_fold, loss_filename): self.test_fold = test_fold self.y_test = y_test # set session config with gpu variable growth self.sess = tf.Session( config=self.config ) # see https://github.com/fchollet/keras/issues/1538 K.set_session(self.sess) # convert y_train to one-hot vector y_train_one_hot = convert_data_to_one_hot(y_train) y_test_one_hot = convert_data_to_one_hot(self.y_test) # load feature dict for LSTM_1000_GloVe with open(self.FEATURES_DIR + self.PARAM_DICT_FILENAME, "rb") as f: param_dict = pickle.load(f) # load parameters needed for embedding layer EMBEDDING_DIM = param_dict["EMBEDDING_DIM"] # e.g. 50 self.MAX_SEQ_LENGTH = param_dict["MAX_SEQ_LENGTH"] # e.g. 100 X_train_LSTM, X_train_MLP = split_X(X_train, self.MAX_SEQ_LENGTH) X_test_LSTM, X_test_MLP = split_X(X_test, self.MAX_SEQ_LENGTH) # load embeddings EMBEDDING_FILE = np.load(self.FEATURES_DIR + param_dict["EMBEDDING_FILE"]) print("EMBEDDING_FILE.shape = " + str(EMBEDDING_FILE.shape)) # calc cass weights class_weights = calculate_class_weight(y_train, no_classes=4) ################ # CLAIMS LAYER # ################ lstm_input = Input( shape=(self.MAX_SEQ_LENGTH, ), dtype='int32', name='lstm_input' ) # receive sequences of MAX_SEQ_LENGTH_CLAIMS integers embedding = Embedding( input_dim=len(EMBEDDING_FILE), # lookup table size output_dim=EMBEDDING_DIM, # output dim for each number in a sequence weights=[EMBEDDING_FILE], input_length=self. MAX_SEQ_LENGTH, # receive sequences of MAX_SEQ_LENGTH_CLAIMS integers mask_zero=False, trainable=True)(lstm_input) att_vec = GlobalMaxPooling1D()(embedding) orig_docs_att = InnerAttentionLayer(att_vec, EMBEDDING_DIM, return_att_weights=True, return_sequence=True, name='lstm_attention') data_LSTM = LSTM( 100, return_sequences=True, stateful=False, dropout=0.2, batch_input_shape=(self.batch_size, self.MAX_SEQ_LENGTH, EMBEDDING_DIM), input_shape=(self.MAX_SEQ_LENGTH, EMBEDDING_DIM), implementation=self.LSTM_implementation)(orig_docs_att[0]) data_LSTM = LSTM(100, return_sequences=False, stateful=False, dropout=0.2, batch_input_shape=(self.batch_size, self.MAX_SEQ_LENGTH, EMBEDDING_DIM), input_shape=(self.MAX_SEQ_LENGTH, EMBEDDING_DIM), implementation=self.LSTM_implementation)(data_LSTM) ############################### # MLP (NON-TIMESTEP) FEATURES # ############################### mlp_input = Input(shape=(len(X_train_MLP[0]), ), dtype='float32', name='mlp_input') ############### # MERGE LAYER # ############### merged = concatenate([data_LSTM, mlp_input]) dense_mid = Dense(600, kernel_regularizer=self.regularizer, kernel_initializer=self.kernel_initializer, activity_regularizer=self.dense_activity_regularizer, activation='relu')(merged) dense_mid = Dense(600, kernel_regularizer=self.regularizer, kernel_initializer=self.kernel_initializer, activity_regularizer=self.dense_activity_regularizer, activation='relu')(dense_mid) dense_mid = Dense(600, kernel_regularizer=self.regularizer, kernel_initializer=self.kernel_initializer, activity_regularizer=self.dense_activity_regularizer, activation='relu')(dense_mid) dense_out = Dense(4, activation='softmax', name='dense_out')(dense_mid) # build model self.model = Model(inputs=[lstm_input, mlp_input], outputs=[dense_out]) # print summary self.model.summary() # optimizers if self.optimizer_name == "adagrad": optimizer = optimizers.Adagrad(lr=self.lr) print("Used optimizer: adagrad, lr=" + str(self.lr)) elif self.optimizer_name == "adamax": optimizer = optimizers.Adamax( lr=self.lr ) # recommended for sparse stuff like with embeddings print("Used optimizer: adamax, lr=" + str(self.lr)) elif self.optimizer_name == "nadam": optimizer = optimizers.Nadam( lr=self.lr) # recommended to leave at default params print("Used optimizer: nadam, lr=" + str(self.lr)) elif self.optimizer_name == "rms": optimizer = optimizers.RMSprop(lr=self.lr) # recommended for RNNs print("Used optimizer: rms, lr=" + str(self.lr)) elif self.optimizer_name == "SGD": optimizer = optimizers.SGD(lr=self.lr) # recommended for RNNs print("Used optimizer: SGD, lr=" + str(self.lr)) elif self.optimizer_name == "adadelta": optimizer = optimizers.Adadelta(self.lr) # recommended for RNNs print("Used optimizer: adadelta, lr=" + str(self.lr)) else: optimizer = optimizers.Adam(lr=self.lr) print("Used optimizer: Adam, lr=" + str(self.lr)) # compile model: for loss fcts see https://github.com/fchollet/keras/blob/master/keras/losses.py self.model.compile( optimizer, 'kullback_leibler_divergence', # categorial_crossentropy metrics=['accuracy']) if self.use_class_weights == True: self.model.fit([X_train_LSTM, X_train_MLP], y_train_one_hot, validation_data=([X_test_LSTM, X_test_MLP], y_test_one_hot), batch_size=self.batch_size, epochs=self.epochs, verbose=1, class_weight=class_weights, callbacks=[ EarlyStoppingOnF1(self.epochs, X_test_LSTM, X_test_MLP, self.y_test, loss_filename, epsilon=0.0, min_epoch=self.min_epoch), ]) else: self.model.fit([X_train_LSTM, X_train_MLP], y_train_one_hot, validation_data=([X_test_LSTM, X_test_MLP], y_test_one_hot), batch_size=self.batch_size, epochs=self.epochs, verbose=1, callbacks=[ EarlyStoppingOnF1(self.epochs, X_test_LSTM, X_test_MLP, self.y_test, loss_filename, epsilon=0.0, min_epoch=self.min_epoch), ]) self.model.save(self.save_folder + "save.h5") return self