def __train_merge(self): ''' Merge training datasets into a single dataset. Sample mini-batches from the merged dataset for training ''' ## Merge training datasets X_train, time_train, event_train = utils.combine_datasets( self.datasets_train) ## To fetch mini-batches next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size) ## start training self.__sess = tf.Session() self.__sess.run(tf.global_variables_initializer()) for epoch in range(self.epochs): for _ in range(num_batches): X_batch, time_batch, event_batch = next_batch() self.__sess.run(self.__train_op, feed_dict={ self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1 }) if epoch % 100 == 0: print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20) self.__print_loss_ci()
def __train_merge(self): ''' Merge training datasets into a single dataset. Sample mini-batches from the merged dataset for training ''' #weights_path = 'checkpoints/resnet101_weights_tf.h5' weights_path = 'checkpoints/resnet50_weights_tf_dim_ordering_tf_kernels.h5' ## Merge training datasets X_train, time_train, event_train = utils.combine_datasets( self.datasets_train) X_train_1, time_train_1, event_train_1 = utils.combine_datasets( self.datasets_train_1) ## get training datasets ___ by heng #X_train, time_train, event_train = utils.get_datasets(self.datasets_train) ## To fetch mini-batches #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size) ## start training self.__sess = tf.Session() #self.__sess_1 = tf.Session() K.set_session(self.__sess) #K.set_session(self.__sess_1) self.__sess.run(tf.global_variables_initializer()) #self.__sess_1.run(tf.global_variables_initializer()) #self.model.load_weights(weights_path, by_name=True) print(f'pre epoch train log:') #self.__print_loss_ci_yh_dsnet(self.datasets_train, self.datasets_train_1) for epoch in range(self.epochs): #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size) #for _ in range(num_batches): # X_batch, time_batch, event_batch = next_batch() X_merge = np.concatenate([X_train, X_train_1], axis=-1) time_event = np.hstack((time_train[..., None], event_train[..., None])) batches = 0 for X_batch_merge, Y_batch in datagen.flow( X_merge, time_event, batch_size=self.batch_size): batches += 1 if batches >= X_train.shape[0] // self.batch_size: break time_batch = Y_batch[:, 0] event_batch = Y_batch[:, 1] X_batch = X_batch_merge[..., :3] X_batch1 = X_batch_merge[..., 3:] self.__sess.run(self.__train_op, feed_dict={ self.__X: X_batch, self.__X1: X_batch1, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1 }) #print (self.__sess.run([self.__train_op, self.__loss], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1})) #print (self.__sess.run([self.__score, self.__loss, self.__ci], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 0})) print(f'epoch {epoch} train log:') self.__print_loss_ci_yh_dsnet(self.datasets_train, self.datasets_train_1) if epoch % 2 == 0: print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20) print(f'epoch {epoch} val log:') self.__print_loss_ci_yh_dsnet(self.datasets_val, self.datasets_val_1) print(f'epoch {epoch} test log:') self.__print_loss_ci_yh_dsnet(self.datasets_test, self.datasets_test_1)
def __train_merge(self): ''' Merge training datasets into a single dataset. Sample mini-batches from the merged dataset for training ''' #weights_path = 'checkpoints/resnet101_weights_tf.h5' weights_path = 'checkpoints/resnet50_weights_tf_dim_ordering_tf_kernels.h5' ## Merge training datasets X_train, time_train, event_train = utils.combine_datasets( self.datasets_train) ## get training datasets ___ by heng #X_train, time_train, event_train = utils.get_datasets(self.datasets_train) ## To fetch mini-batches #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size) ## start training self.__sess = tf.Session() K.set_session(self.__sess) self.__sess.run(tf.global_variables_initializer()) #self.model.load_weights(weights_path, by_name=True) file_train = open(f"train_log.txt", "w") file_val = open(f"val_log.txt", "w") file_test = open(f"test_log.txt", "w") print(f'pre epoch train log:') ci_train, loss_train = self.__print_loss_ci_yh(self.datasets_train) ci_val, loss_val = self.__print_loss_ci_yh(self.datasets_val) ci_test, loss_test = self.__print_loss_ci_yh(self.datasets_test) file_train.write(str(ci_train) + " " + str(loss_train) + "\n") file_val.write(str(ci_val) + " " + str(loss_val) + "\n") file_test.write(str(ci_test) + " " + str(loss_test) + "\n") ci = 0 for epoch in range(self.epochs): #next_batch, num_batches = utils.batch_factory(X_train, time_train, event_train, self.batch_size) #for _ in range(num_batches): # X_batch, time_batch, event_batch = next_batch() time_event = np.hstack((time_train[..., None], event_train[..., None])) batches = 0 for X_batch, Y_batch in datagen.flow(X_train, time_event, batch_size=self.batch_size): batches += 1 if batches >= X_train.shape[0] // self.batch_size: break time_batch = Y_batch[:, 0] event_batch = Y_batch[:, 1] self.__sess.run(self.__train_op, feed_dict={ self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1 }) #print (self.__sess.run([self.__train_op, self.__loss], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 1})) #print (self.__sess.run([self.__score, self.__loss, self.__ci], feed_dict={self.__X: X_batch, self.__time: time_batch, self.__event: event_batch, K.learning_phase(): 0})) print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20) print(f'epoch {epoch} train log:') ci_train, loss_train = self.__print_loss_ci_yh(self.datasets_train) file_train.write(str(ci_train) + " " + str(loss_train) + "\n") print(f'epoch {epoch} val log:') ci_val, loss_val = self.__print_loss_ci_yh(self.datasets_val) file_val.write(str(ci_val) + " " + str(loss_val) + "\n") #if val_ci > ci: # ci = val_ci # self.model.save_weights(f'my_model_weights_{epoch}.h5') # print ("model saved!") print(f'epoch {epoch} test log:') ci_test, loss_test = self.__print_loss_ci_yh(self.datasets_test) file_test.write(str(ci_test) + " " + str(loss_test) + "\n") #if ci_test > ci: if ci_test > 0.62: ci = ci_test #self.model.save_weights(f'my_fusion_model_weights_{epoch}.h5') print("model saved!") if epoch > 300: file_train.close() file_val.close() file_test.close() raise
def __train_merge(self): ## Merge training datasets X_train, time_value_train, event_train = utils.combine_datasets( self.datasets_train) ## start training self.__sess = tf.Session() K.set_session(self.__sess) self.__sess.run(tf.global_variables_initializer()) #self.model.load_weights(weights_path, by_name=True) file_train = open(f"train_log.txt", "w") file_val = open(f"val_log.txt", "w") file_test = open(f"test_log.txt", "w") print(f'pre epoch train log:') ci_train, loss_train = self.__print_loss_ci(self.datasets_train) ci_val, loss_val = self.__print_loss_ci(self.datasets_val) ci_test, loss_test = self.__print_loss_ci(self.datasets_test) file_train.write(str(ci_train) + " " + str(loss_train) + "\n") file_val.write(str(ci_val) + " " + str(loss_val) + "\n") file_test.write(str(ci_test) + " " + str(loss_test) + "\n") ci = 0 for epoch in range(self.epochs): #next_batch, num_batches = utils.batch_factory(X_train, time_value_train, event_train, self.batch_size) #for _ in range(num_batches): # X_batch, time_value_batch, event_batch = next_batch() time_value_event = np.hstack( (time_value_train[..., None], event_train[..., None])) batches = 0 for X_batch, Y_batch in datagen.flow(X_train, time_value_event, batch_size=self.batch_size): batches += 1 if batches >= X_train.shape[0] // self.batch_size: break time_value_batch = Y_batch[:, 0] event_batch = Y_batch[:, 1] self.__sess.run(self.__train_op, feed_dict={ self.__X: X_batch, self.__time_value: time_value_batch, self.__event: event_batch, K.learning_phase(): 1 }) print(f'epoch {epoch} train log:') ci_train, loss_train = self.__print_loss_ci(self.datasets_train) file_train.write(str(ci_train) + " " + str(loss_train) + "\n") print('-' * 20 + 'Epoch: {0}'.format(epoch) + '-' * 20) print(f'epoch {epoch} val log:') ci_val, loss_val = self.__print_loss_ci(self.datasets_val) file_val.write(str(ci_val) + " " + str(loss_val) + "\n") #if val_ci > ci: # ci = val_ci # self.model.save_weights(f'my_model_weights_{epoch}.h5') # print ("model saved!") print(f'epoch {epoch} test log:') ci_test, loss_test = self.__print_loss_ci(self.datasets_test) file_test.write(str(ci_test) + " " + str(loss_test) + "\n") if ci_test > ci: ci = ci_test #self.model.save_weights(f'my_model_weights_{epoch}.h5') print("model saved!") if epoch > 500: file_train.close() file_val.close() file_test.close() raise
parser = argparse.ArgumentParser(description="Combine multiple datasets to make one submission or a new feature") parser.add_argument("dataset_files", type=argparse.FileType('r'), nargs="+") parser.add_argument("--output_type", type=str, default="submission") parser.add_argument("outfile", type=argparse.FileType('w')) # parser.add_argument("") args = parser.parse_args() is_submission = False if args.output_type == "dataset": is_submission = False elif args.output_type == "submission": is_submission = True else: raise ValueError("Must specify output type to be submission or dataset.") trainf, trainl, testf, test_ids, feature_names = utils.combine_datasets(args.dataset_files) shift = 200 y = np.log(trainl + shift) ids = test_ids RANDOM_STATE = 2016 params = { 'min_child_weight': 1, 'eta': 0.01, 'colsample_bytree': 0.5, 'max_depth': 12, 'subsample': 0.8, 'alpha': 1, 'gamma': 1,