def _load_data(self): utils.thick_line() print('Loading data...') utils.thin_line() if self.cfg.DATABASE_MODE is not None: preprocessed_path_ = join( '../data/{}'.format(self.cfg.DATABASE_MODE), self.cfg.DATABASE_NAME) else: preprocessed_path_ = join(self.cfg.DPP_DATA_PATH, self.cfg.DATABASE_NAME) x = utils.load_pkls(preprocessed_path_, 'x_test' + self.append_info, tl=self.tl_encode, add_n_batch=1) y = utils.load_pkls(preprocessed_path_, 'y_test' + self.append_info) imgs = utils.load_pkls(preprocessed_path_, 'imgs_test' + self.append_info) utils.thin_line() print('Data info:') utils.thin_line() print('x_test: {}\ny_test: {}\nimgs_test: {}'.format( x.shape, y.shape, imgs.shape)) return x, y, imgs
def _get_restore_vars_dict(self): """Load pre-trained variables.""" utils.thick_line() print('Loading pre-trained variables from:\n', self.restore_checkpoint_path) utils.thin_line() tf.reset_default_graph() loaded_graph = tf.Graph() with tf.Session(graph=loaded_graph) as sess: ckp_path = tf.train.latest_checkpoint(self.restore_checkpoint_path) loader = tf.train.import_meta_graph(ckp_path + '.meta') loader.restore(sess, ckp_path) restore_vars_dict = dict() restore_vars_dict['w_conv_0'] = sess.run( loaded_graph.get_tensor_by_name('classifier/conv_0/weights:0')) restore_vars_dict['b_conv_0'] = sess.run( loaded_graph.get_tensor_by_name('classifier/conv_0/biases:0')) restore_vars_dict['w_caps_0'] = sess.run( loaded_graph.get_tensor_by_name('classifier/caps_0/weights:0')) restore_vars_dict['b_caps_0'] = sess.run( loaded_graph.get_tensor_by_name('classifier/caps_0/biases:0')) restore_vars_dict['w_caps_1'] = sess.run( loaded_graph.get_tensor_by_name('classifier/caps_1/weights:0')) # restore_vars_dict['b_caps_1'] = sess.run( # loaded_graph.get_tensor_by_name('classifier/caps_1/biases:0')) return restore_vars_dict
def test(self): """Test models.""" start_time = time.time() tf.reset_default_graph() loaded_graph = tf.Graph() utils.thick_line() print('Testing on {} test set...'.format(self.info[1])) with tf.Session(graph=loaded_graph) as sess: # Load saved models loader = tf.train.import_meta_graph(self.checkpoint_path + '.meta') loader.restore(sess, self.checkpoint_path) # Get Tensors from loaded models if self.cfg.TEST_WITH_REC: inputs, labels, input_imgs, is_training, clf_preds, rec_imgs = \ self._get_tensors(loaded_graph) else: inputs, labels, input_imgs, is_training, clf_preds = \ self._get_tensors(loaded_graph) rec_imgs = None self.tester(sess, inputs, labels, input_imgs, is_training, clf_preds, rec_imgs, start_time)
def _test(self, sess, during_training=False, epoch=None, step=None, mode='single'): """Evaluate on the test set.""" utils.thick_line() start_time_test = time.time() test_params = dict( cfg=self.cfg, multi_gpu=self.multi_gpu, version=self.cfg.VERSION, during_training=during_training, epoch_train=epoch, step_train=step, model_arch_info=self.model.model_arch_info, ) if mode == 'single': print('Testing on Single-object test set...') tester_ = Test elif mode == 'multi_obj': print('Testing on Multi-object test set...') tester_ = TestMultiObjects else: raise ValueError('Wrong mode name') tester_(**test_params).tester(sess, self.inputs, self.labels, self.input_imgs, self.is_training, self.clf_preds, self.rec_imgs, start_time_test, self.loss, self.accuracy, self.clf_loss, self.rec_loss)
def __init__(self, cfg, model_arch, mode='normal'): """Load data and initialize models.""" # Global start time self.start_time = time.time() # Config self.cfg = cfg self.multi_gpu = True if mode == 'multi-tasks': self.multi_gpu = True model = CapsNetMultiTasks(cfg, model_arch) elif mode == 'multi-gpu': self.multi_gpu = True model = CapsNetDistribute(cfg, model_arch) else: self.multi_gpu = False model = CapsNet(cfg, model_arch) # Use encode transfer learning if self.cfg.TRANSFER_LEARNING == 'encode': self.tl_encode = True else: self.tl_encode = False # Get paths from configuration self.preprocessed_path, self.train_log_path, \ self.summary_path, self.checkpoint_path, \ self.train_image_path = self._get_paths() # Load data self.x_train, self.y_train, self.imgs_train, \ self.x_valid, self.y_valid, self.imgs_valid = self._load_data() # Calculate number of batches self.n_batch_train = len(self.y_train) // cfg.BATCH_SIZE self.n_batch_valid = len(self.y_valid) // cfg.BATCH_SIZE # Build graph utils.thick_line() print('Building graph...') tf.reset_default_graph() self.step, self.train_graph, self.inputs, self.labels, self.input_imgs,\ self.is_training, self.optimizer, self.saver, self.summary, \ self.loss, self.accuracy, self.clf_loss, self.rec_loss, \ self.rec_images, self.preds = model.build_graph( input_size=self.x_train.shape[1:], image_size=self.imgs_train.shape[1:], num_class=self.y_train.shape[1]) # Save config self.clf_arch_info = model.clf_arch_info self.rec_arch_info = model.rec_arch_info utils.save_config_log(self.train_log_path, cfg, self.clf_arch_info, self.rec_arch_info)
def download_data(data_base_name): """ Download database. """ utils.thick_line() print('Downloading {} data set...'.format(data_base_name)) utils.thin_line() if data_base_name == 'mnist': SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' source_data_path_ = join(cfg.SOURCE_DATA_PATH, 'mnist') utils.check_dir([source_data_path_]) utils.download_and_extract_mnist( url=SOURCE_URL + TRAIN_IMAGES, save_path=join(source_data_path_, TRAIN_IMAGES), extract_path=join(source_data_path_, 'train_images'), data_type='images') utils.download_and_extract_mnist( url=SOURCE_URL + TRAIN_LABELS, save_path=join(source_data_path_, TRAIN_LABELS), extract_path=join(source_data_path_, 'train_labels'), data_type='labels') utils.download_and_extract_mnist( url=SOURCE_URL + TEST_IMAGES, save_path=join(source_data_path_, TEST_IMAGES), extract_path=join(source_data_path_, 'test_images'), data_type='images') utils.download_and_extract_mnist( url=SOURCE_URL + TEST_LABELS, save_path=join(source_data_path_, TEST_LABELS), extract_path=join(source_data_path_, 'test_labels'), data_type='labels') elif data_base_name == 'cifar10': SOURCE_URL = 'https://www.cs.toronto.edu/~kriz/' FILE_NAME = 'cifar-10-python.tar.gz' utils.check_dir([cfg.SOURCE_DATA_PATH]) utils.download_and_extract_cifar10(url=SOURCE_URL + FILE_NAME, save_path=cfg.SOURCE_DATA_PATH, file_name=FILE_NAME, extract_path=cfg.SOURCE_DATA_PATH) else: raise ValueError('Wrong database name!') utils.thick_line()
def test(self): """ Test models """ start_time = time.time() tf.reset_default_graph() loaded_graph = tf.Graph() with tf.Session(graph=loaded_graph) as sess: # Load saved models loader = tf.train.import_meta_graph(self.checkpoint_path + '.meta') loader.restore(sess, self.checkpoint_path) # Get Tensors from loaded models if self.cfg.TEST_WITH_RECONSTRUCTION: inputs, labels, loss, accuracy, \ clf_loss, rec_loss, rec_images = \ self._get_tensors(loaded_graph) else: inputs, labels, loss, accuracy = self._get_tensors( loaded_graph) clf_loss, rec_loss, rec_images = None, None, None utils.thick_line() print('Testing on test set...') utils.thin_line() print('Calculating loss and accuracy of test set...') loss_test, clf_loss_test, rec_loss_test, acc_test = \ self._eval_on_batches( sess, inputs, labels, loss, accuracy, clf_loss, rec_loss, rec_images, self.x_test, self.y_test, self.n_batch_test) # Print losses and accuracy utils.thin_line() print('Test_Loss: {:.4f}'.format(loss_test)) if self.cfg.TEST_WITH_RECONSTRUCTION: print('Test_Classifier_Loss: {:.4f}\n'.format(clf_loss_test), 'Test_Reconstruction_Loss: {:.4f}'.format(rec_loss_test)) print('Test_Accuracy: {:.2f}%'.format(acc_test * 100)) # Save test log utils.save_test_log(self.test_log_path, loss_test, acc_test, clf_loss_test, rec_loss_test, self.cfg.TEST_WITH_RECONSTRUCTION) utils.thin_line() print('Testing finished! Using time: {:.2f}'.format(time.time() - start_time)) utils.thick_line()
def _eval_on_full_set(self, sess, epoch_i, step, silent=False): """ Evaluate on the full data set and print information. """ eval_start_time = time.time() if not silent: utils.thick_line() print('Calculating losses using full data set...') # Calculate losses and accuracies of full train set if self.cfg.EVAL_WITH_FULL_TRAIN_SET: loss_train, clf_loss_train, rec_loss_train, acc_train = \ self._eval_on_batches('train', sess, self.x_train, self.y_train, self.n_batch_train, silent=silent) else: loss_train, clf_loss_train, rec_loss_train, acc_train = \ None, None, None, None # Calculate losses and accuracies of full valid set loss_valid, clf_loss_valid, rec_loss_valid, acc_valid = \ self._eval_on_batches('valid', sess, self.x_valid, self.y_valid, self.n_batch_valid, silent=silent) if not silent: utils.print_full_set_eval(epoch_i, self.cfg.EPOCHS, step, self.start_time, loss_train, clf_loss_train, rec_loss_train, acc_train, loss_valid, clf_loss_valid, rec_loss_valid, acc_valid, self.cfg.EVAL_WITH_FULL_TRAIN_SET, self.cfg.WITH_RECONSTRUCTION) file_path = join(self.train_log_path, 'full_set_eval_log.csv') if not silent: utils.thin_line() print('Saving {}...'.format(file_path)) utils.save_log(file_path, epoch_i + 1, step, time.time() - self.start_time, loss_train, clf_loss_train, rec_loss_train, acc_train, loss_valid, clf_loss_valid, rec_loss_valid, acc_valid, self.cfg.WITH_RECONSTRUCTION) if not silent: utils.thin_line() print( 'Evaluation done! Using time: {:.2f}'.format(time.time() - eval_start_time))
def __init__(self, cfg): # Config self.cfg = cfg # Get checkpoint path self.checkpoint_path = join( cfg.CHECKPOINT_PATH, '{}/models.ckpt-{}'.format(self.cfg.TEST_VERSION, self.cfg.TEST_CKP_IDX)) # Get log path, append information if the directory exist. test_log_path_ = join( self.cfg.TEST_LOG_PATH, '{}-{}'.format(self.cfg.TEST_VERSION, self.cfg.TEST_CKP_IDX)) self.test_log_path = test_log_path_ i_append_info = 0 while isdir(self.test_log_path): i_append_info += 1 self.test_log_path = test_log_path_ + '({})'.format(i_append_info) # Path for saving images self.test_image_path = join(self.test_log_path, 'images') # Check directory of paths utils.check_dir([self.test_log_path]) if self.cfg.TEST_WITH_RECONSTRUCTION: if self.cfg.TEST_SAVE_IMAGE_STEP is not None: utils.check_dir([self.test_image_path]) # Save config utils.save_config_log(self.test_log_path, self.cfg) # Load data utils.thick_line() print('Loading data...') utils.thin_line() preprocessed_path_ = join(cfg.DPP_DATA_PATH, cfg.DATABASE_NAME) self.x_test = utils.load_data_from_pkl( join(preprocessed_path_, 'x_test.p')) self.y_test = utils.load_data_from_pkl( join(preprocessed_path_, 'y_test.p')) # Calculate number of batches self.n_batch_test = len(self.y_test) // self.cfg.TEST_BATCH_SIZE
def pipeline(self, data_base_name): """ Pipeline of preprocessing data. Arg: data_base_name: name of data base """ utils.thick_line() print('Start Preprocessing...') start_time = time.time() self.data_base_name = data_base_name self.preprocessed_path = join(self.cfg.DPP_DATA_PATH, data_base_name) self.source_data_path = join(self.cfg.SOURCE_DATA_PATH, data_base_name) # Load data self._load_data() # Augment data self._augment_data() # Shuffle data set # self._shuffle() # Scaling images to (0, 1) self._scaling() # One-hot-encoding labels self._one_hot_encoding() # Split data set into train/valid/test self._split_data() # Check data format. self._check_data() # Save data to pickles self._save_data() utils.thin_line() print('Done! Using {:.3}s'.format(time.time() - start_time)) utils.thick_line()
def pipeline(cfg_list, architecture, mode): global_start_time = time.time() for i, cfg in enumerate(cfg_list): start_time = time.time() utils.thick_line() print('Training task: {}/{}'.format(i + 1, len(cfg_list))) model = Main(cfg, architecture, mode) model.train() utils.thick_line() print('Task done! Using {:.4}s'.format(time.time() - start_time)) print('Total time {:.4}s'.format(time.time() - global_start_time)) utils.thick_line() utils.thick_line() print('All Task done! Using {:.4}s'.format(time.time() - global_start_time)) utils.thick_line()
def _load_bottleneck_features(self): """Load preprocessed bottleneck features.""" utils.thick_line() print('Loading data...') utils.thin_line() x_train = utils.load_pkls(self.preprocessed_path, 'x_train') x_valid = utils.load_pkls(self.preprocessed_path, 'x_valid', add_n_batch=1) y_train = utils.load_pkls(self.preprocessed_path, 'y_train') y_valid = utils.load_pkls(self.preprocessed_path, 'y_valid') utils.thin_line() print('Data info:') utils.thin_line() print('x_train: {}\ny_train: {}\nx_valid: {}\ny_valid: {}'.format( x_train.shape, y_train.shape, x_valid.shape, y_valid.shape)) return x_train, y_train, x_valid, y_valid
def _load_data(self): """Load preprocessed data.""" utils.thick_line() print('Loading data...') utils.thin_line() x_train = utils.load_pkls(self.preprocessed_path, 'x_train', tl=self.tl_encode) x_valid = utils.load_pkls(self.preprocessed_path, 'x_valid', tl=self.tl_encode, add_n_batch=1) imgs_train = utils.load_pkls(self.preprocessed_path, 'imgs_train') imgs_valid = utils.load_pkls(self.preprocessed_path, 'imgs_valid') if imgs_train.shape == x_train.shape: print('[W] imgs_train.shape == x_train.shape') del imgs_train del imgs_valid gc.collect() imgs_train = x_train imgs_valid = x_valid y_train = utils.load_pkls(self.preprocessed_path, 'y_train') y_valid = utils.load_pkls(self.preprocessed_path, 'y_valid') utils.thin_line() print('Data info:') utils.thin_line() print('x_train: {}\ny_train: {}\nx_valid: {}\ny_valid: {}'.format( x_train.shape, y_train.shape, x_valid.shape, y_valid.shape)) print('imgs_train: {}\nimgs_valid: {}'.format(imgs_train.shape, imgs_valid.shape)) return x_train, y_train, imgs_train, x_valid, y_valid, imgs_valid
def tester(self, sess, inputs, labels, input_imgs, is_training, clf_preds, rec_imgs, start_time, loss=None, acc=None, clf_loss=None, rec_loss=None): utils.thin_line() print('Calculating loss and accuracy of test set...') # Get losses and accuracies clf_preds_vec_test = self._get_preds_vector(sess, inputs, clf_preds, is_training) # Get binary predictions clf_preds_binary = self._get_preds_binary(preds_vec=clf_preds_vec_test) # Get evaluation scores for multi-objects detection. self._get_multi_obj_scores(clf_preds_binary, clf_preds_vec_test) # Save reconstruction images of multi-objects detection if self.cfg.TEST_WITH_REC: self._save_images_mo(sess, rec_imgs, inputs, labels, is_training, clf_preds_binary, clf_preds_vec_test) utils.thin_line() print('Testing finished! Using time: {:.2f}'.format(time.time() - start_time)) utils.thick_line()
def __init__(self, model, cfg): """ Load data and initialize models. Args: model: the models which will be trained """ # Global start time self.start_time = time.time() # Config self.cfg = cfg # Get paths from configuration train_log_path_ = join(cfg.TRAIN_LOG_PATH, cfg.VERSION) test_log_path_ = join(cfg.TEST_LOG_PATH, cfg.VERSION) summary_path_ = join(cfg.SUMMARY_PATH, cfg.VERSION) checkpoint_path_ = join(cfg.CHECKPOINT_PATH, cfg.VERSION) self.preprocessed_path = join(cfg.DPP_DATA_PATH, cfg.DATABASE_NAME) # Get log paths, append information if the directory exist. self.train_log_path = train_log_path_ i_append_info = 0 while isdir(self.train_log_path): i_append_info += 1 self.train_log_path = train_log_path_ + '({})'.format( i_append_info) if i_append_info > 0: self.summary_path = summary_path_ + '({})'.format(i_append_info) self.checkpoint_path = checkpoint_path_ + '({})'.format( i_append_info) self.test_log_path = test_log_path_ + '({})'.format(i_append_info) else: self.summary_path = summary_path_ self.checkpoint_path = checkpoint_path_ self.test_log_path = test_log_path_ # Images saving path self.train_image_path = join(self.train_log_path, 'images') self.test_image_path = join(self.test_log_path, 'images') # Check directory of paths utils.check_dir([self.train_log_path, self.checkpoint_path]) if cfg.WITH_RECONSTRUCTION: if cfg.SAVE_IMAGE_STEP is not None: utils.check_dir([self.train_image_path]) # Load data utils.thick_line() print('Loading data...') utils.thin_line() self.x_train = utils.load_data_from_pkl( join(self.preprocessed_path, 'x_train.p')) self.y_train = utils.load_data_from_pkl( join(self.preprocessed_path, 'y_train.p')) self.x_valid = utils.load_data_from_pkl( join(self.preprocessed_path, 'x_valid.p')) self.y_valid = utils.load_data_from_pkl( join(self.preprocessed_path, 'y_valid.p')) # Calculate number of batches self.n_batch_train = len(self.y_train) // cfg.BATCH_SIZE self.n_batch_valid = len(self.y_valid) // cfg.BATCH_SIZE # Build graph utils.thick_line() print('Building graph...') tf.reset_default_graph() self.step, self.train_graph, self.inputs, self.labels, self.is_training, \ self.optimizer, self.saver, self.summary, self.loss, self.accuracy,\ self.clf_loss, self.rec_loss, self.rec_images = model.build_graph( image_size=self.x_train.shape[1:], num_class=self.y_train.shape[1]) # Save config utils.save_config_log(self.train_log_path, cfg, model.clf_arch_info, model.rec_arch_info)
self._trainer(sess) else: with tf.Session(graph=self.train_graph, config=session_cfg) as sess: self._trainer(sess) if __name__ == '__main__': opts, args = getopt.getopt(sys.argv[1:], "g", ['gpu-id']) for op, value in opts: if op == "-g": print('Using /gpu: %d' % int(value)) environ["CUDA_VISIBLE_DEVICES"] = str(int(value)) utils.thick_line() print('Input [ 1 ] to run normal version.') print('Input [ 2 ] to run multi-gpu version.') utils.thin_line() input_ = input('Input: ') if input_ == '1': CapsNet_ = CapsNet(config) elif input_ == '2': CapsNet_ = CapsNetDistribute(config) else: raise ValueError('Wrong input! Found: ', input_) Main_ = Main(CapsNet_, config) Main_.train()
def tester(self, sess, inputs, labels, input_imgs, is_training, clf_preds, rec_imgs, start_time, loss=None, acc=None, clf_loss=None, rec_loss=None): utils.thin_line() print('Calculating loss and accuracy of test set...') # Get losses and accuracies clf_preds_vec_test, loss_test, clf_loss_test, rec_loss_test, acc_test = \ self._eval_on_batches( sess, inputs, labels, input_imgs, is_training, clf_preds, loss, acc, clf_loss, rec_loss, rec_imgs) # Get integer predictions _ = self._get_preds_int(preds_vec=clf_preds_vec_test) # Get top N accuracy if self.cfg.TOP_N_LIST is not None: acc_top_n_list = self._get_top_n_accuracy(clf_preds_vec_test) else: acc_top_n_list = None # Print losses and accuracy utils.thin_line() print('Test Loss: {:.4f}'.format(loss_test)) if self.cfg.TEST_WITH_REC: print('Test Classifier Loss: {:.4f}\n'.format(clf_loss_test), 'Test Reconstruction Loss: {:.4f}'.format(rec_loss_test)) print('Test Accuracy: {:.4f}%'.format(acc_test * 100)) if self.cfg.TOP_N_LIST is not None: utils.thin_line() for i, top_n in enumerate(self.cfg.TOP_N_LIST): print('Top_{} Test Accuracy: {:.4f}% \n'.format( top_n, acc_top_n_list[i] * 100)) # Save test log if self.during_training and (self.epoch_train != 'end'): utils.save_test_log_is_training( self.test_log_path, self.epoch_train, self.step_train, loss_test, acc_test, clf_loss_test, rec_loss_test, self.cfg.TEST_WITH_REC, self.cfg.TOP_N_LIST, acc_top_n_list) else: utils.save_test_log(self.test_log_path, loss_test, acc_test, clf_loss_test, rec_loss_test, self.cfg.TEST_WITH_REC, self.cfg.TOP_N_LIST, acc_top_n_list) utils.thin_line() print('Testing finished! Using time: {:.2f}'.format(time.time() - start_time)) utils.thick_line()
def grid_search(self, param_grid, save_every_result=False, save_shifted_result=False, append_info=None): start_time = time.time() df_total = pd.read_csv(join(cfg.source_path, 'z_hack_submit_new_with_cost.csv'), index_col=['FORECASTDATE'], usecols=['FORECASTDATE']) forecast_num = len(df_total) - 1 df_valid = pd.DataFrame(index=range(35)) idx = 0 for i, grid_i in enumerate(param_grid): task_time = time.time() utils.thick_line() print('Grid Searching Task {}...'.format(i)) grid_combs = self._generate_grid_combinations(grid_i) for grid_search_tuple_dict in tqdm(grid_combs, total=len(grid_combs), ncols=100, unit=' comb'): model_name = grid_search_tuple_dict['model_name'] start_year = grid_search_tuple_dict['start_year'] valid_range = grid_search_tuple_dict['valid_range'] feature_num = grid_search_tuple_dict['feature_num'] fill_mode = grid_search_tuple_dict['fill_mode'] time_features = grid_search_tuple_dict['time_features'] use_month_features = \ grid_search_tuple_dict['use_month_features'] train_start = { 2009: '2009-01-05', 2010: '2010-01-04', 2011: '2011-01-04', 2012: '2010-01-04', 2013: '2010-01-04' } data_range = { 'train_start': train_start[start_year], 'valid_start': valid_range[0], 'valid_end': valid_range[1] } if append_info is None: append_info = '' pred_final, cost, pred_valid = self.T[fill_mode].train( model_name=model_name, feature_num=feature_num, forecast_num=forecast_num, time_features=time_features, use_month_features=use_month_features, data_range=data_range, save_result=save_every_result, save_shifted_result=save_shifted_result, append_info='_' + str(idx) + append_info, idx=idx) utils.save_log_to_csv( log_path=cfg.log_path, grid_search_tuple_dict=grid_search_tuple_dict, cost=cost, idx=idx, append_info='_' + self.sample_mode + append_info) pred_final = np.append(pred_final, cost) df_total[str(idx)] = pred_final if len(pred_valid) < 34: pred_valid = \ np.append(pred_valid, np.zeros(34 - len(pred_valid))) pred_cost_valid = np.append(pred_valid, cost) df_valid[str(idx)] = pred_cost_valid idx += 1 utils.thin_line() print('Task {} Done! Using {:.2f}s...'.format( i, time.time() - task_time)) utils.thick_line() utils.check_dir([cfg.result_path]) df_total = df_total.stack().unstack(0) df_total.to_csv( join(cfg.log_path, 'all_results_{}_{}.csv'.format(self.sample_mode, append_info))) df_valid.to_csv( join(cfg.log_path, 'all_valid_{}_{}.csv'.format(self.sample_mode, append_info))) utils.thick_line() print('All Task Done! Using {:.2f}s...'.format(time.time() - start_time)) utils.thick_line()
def _test_after_training(self, sess): """ Evaluate on the test set after training. """ test_start_time = time.time() utils.thick_line() print('Testing...') # Check directory of paths utils.check_dir([self.test_log_path]) if self.cfg.WITH_RECONSTRUCTION: if self.cfg.TEST_SAVE_IMAGE_STEP is not None: utils.check_dir([self.test_image_path]) # Load data utils.thin_line() print('Loading test set...') utils.thin_line() x_test = utils.load_data_from_pkl( join(self.preprocessed_path, 'x_test.p')) y_test = utils.load_data_from_pkl( join(self.preprocessed_path, 'y_test.p')) n_batch_test = len(y_test) // self.cfg.BATCH_SIZE utils.thin_line() print('Calculating loss and accuracy on test set...') loss_test_all = [] acc_test_all = [] clf_loss_test_all = [] rec_loss_test_all = [] step = 0 _test_batch_generator = utils.get_batches(x_test, y_test, self.cfg.BATCH_SIZE) if self.cfg.WITH_RECONSTRUCTION: for _ in tqdm(range(n_batch_test), total=n_batch_test, ncols=100, unit=' batches'): step += 1 test_batch_x, test_batch_y = next(_test_batch_generator) loss_test_i, clf_loss_i, rec_loss_i, acc_test_i = sess.run( [self.loss, self.clf_loss, self.rec_loss, self.accuracy], feed_dict={ self.inputs: test_batch_x, self.labels: test_batch_y, self.is_training: False }) loss_test_all.append(loss_test_i) acc_test_all.append(acc_test_i) clf_loss_test_all.append(clf_loss_i) rec_loss_test_all.append(rec_loss_i) # Save reconstruct images if self.cfg.TEST_SAVE_IMAGE_STEP is not None: if step % self.cfg.TEST_SAVE_IMAGE_STEP == 0: self._save_images(sess, self.test_image_path, test_batch_x, test_batch_y, step, silent=False) clf_loss_test = sum(clf_loss_test_all) / len(clf_loss_test_all) rec_loss_test = sum(rec_loss_test_all) / len(rec_loss_test_all) else: for _ in tqdm(range(n_batch_test), total=n_batch_test, ncols=100, unit=' batches'): test_batch_x, test_batch_y = next(_test_batch_generator) loss_test_i, acc_test_i = sess.run( [self.loss, self.accuracy], feed_dict={ self.inputs: test_batch_x, self.labels: test_batch_y, self.is_training: False }) loss_test_all.append(loss_test_i) acc_test_all.append(acc_test_i) clf_loss_test, rec_loss_test = None, None loss_test = sum(loss_test_all) / len(loss_test_all) acc_test = sum(acc_test_all) / len(acc_test_all) # Print losses and accuracy utils.thin_line() print('Test_Loss: {:.4f}\n'.format(loss_test), 'Test_Accuracy: {:.2f}%'.format(acc_test * 100)) if self.cfg.WITH_RECONSTRUCTION: utils.thin_line() print('Test_Train_Loss: {:.4f}\n'.format(clf_loss_test), 'Test_Reconstruction_Loss: {:.4f}'.format(rec_loss_test)) # Save test log utils.save_test_log(self.test_log_path, loss_test, acc_test, clf_loss_test, rec_loss_test, self.cfg.WITH_RECONSTRUCTION) utils.thin_line() print('Testing finished! Using time: {:.2f}'.format(time.time() - test_start_time))
def build_graph(self, input_size=(None, None, None), image_size=(None, None, None), num_class=None, n_train_samples=None): """Build the graph of CapsNet. Args: input_size: size of input tensor image_size: the size of ground truth images, should be 3 dimensional num_class: number of class of label n_train_samples: number of train samples Returns: tuple of (global_step, train_graph, inputs, labels, train_op, saver, summary_op, loss, accuracy, classifier_loss, reconstruct_loss, reconstructed_images) """ tf.reset_default_graph() train_graph = tf.Graph() with train_graph.as_default(), tf.device('/cpu:0'): # Get inputs tensor inputs, labels, input_imgs, is_training = \ self._get_inputs(input_size, num_class, image_size=image_size) # Global step global_step = tf.placeholder(tf.int16, name='global_step') # Optimizer optimizer = self._optimizer(opt_name=self.cfg.OPTIMIZER, n_train_samples=n_train_samples, global_step=global_step) # Split data for each tower x_splits_tower = tf.split(axis=0, num_or_size_splits=self.cfg.GPU_NUMBER, value=inputs) y_splits_tower = tf.split(axis=0, num_or_size_splits=self.cfg.GPU_NUMBER, value=labels) imgs_splits_tower = tf.split( axis=0, num_or_size_splits=self.cfg.GPU_NUMBER, value=input_imgs) # Calculate the gradients for each models tower. grads_all, loss_all, acc_all, clf_loss_all, clf_preds_all, \ rec_loss_all, rec_imgs_all = [], [], [], [], [], [], [] for i in range(self.cfg.GPU_NUMBER): utils.thin_line() print('Building tower: ', i) # Dequeues one batch for the GPU x_tower, y_tower, imgs_tower = \ x_splits_tower[i], y_splits_tower[i], imgs_splits_tower[i] with tf.variable_scope(tf.get_variable_scope(), reuse=bool(i != 0)): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): grads_tower, loss_tower, acc_tower, clf_loss_tower, \ clf_preds_tower, rec_loss_tower, rec_imgs_tower = \ self._calc_on_gpu(i, x_tower, y_tower, imgs_tower, num_class, is_training, optimizer) # Keep track of the gradients across all towers. grads_all.append(grads_tower) # Collect metrics of each tower loss_all.append(loss_tower) acc_all.append(acc_tower) clf_loss_all.append(clf_loss_tower) clf_preds_all.append(clf_preds_tower) rec_loss_all.append(rec_loss_tower) rec_imgs_all.append(rec_imgs_tower) # Calculate the mean of each gradient. grads = self._average_gradients(grads_all) # Calculate means of metrics loss, accuracy, clf_loss, clf_preds, rec_loss, rec_imgs = \ self._average_metrics(loss_all, acc_all, clf_loss_all, clf_preds_all, rec_loss_all, rec_imgs_all) # Show variables utils.thick_line() print('Variables: ') for v in tf.global_variables(): print(v) # Apply the gradients to adjust the shared variables. apply_gradient_op = optimizer.apply_gradients(grads) # Track the moving averages of all trainable variables. if self.cfg.MOVING_AVERAGE_DECAY: variable_averages = tf.train.ExponentialMovingAverage( self.cfg.MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) else: train_op = apply_gradient_op # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=self.cfg.MAX_TO_KEEP_CKP) # Build the summary operation from the last tower summaries. tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('loss', loss) if self.cfg.WITH_REC: tf.summary.scalar('clf_loss', clf_loss) tf.summary.scalar('rec_loss', rec_loss) summary_op = tf.summary.merge_all() return global_step, train_graph, inputs, labels, input_imgs, \ is_training, train_op, saver, summary_op, loss, accuracy, \ clf_loss, clf_preds, rec_loss, rec_imgs
def _trainer(self, sess): utils.thick_line() print('Training...') # Merge all the summaries and create writers train_summary_path = join(self.summary_path, 'train') valid_summary_path = join(self.summary_path, 'valid') utils.check_dir([train_summary_path, valid_summary_path]) train_writer = tf.summary.FileWriter(train_summary_path, sess.graph) valid_writer = tf.summary.FileWriter(valid_summary_path) sess.run(tf.global_variables_initializer()) step = 0 for epoch_i in range(self.cfg.EPOCHS): epoch_start_time = time.time() utils.thick_line() print('Training on epoch: {}/{}'.format(epoch_i + 1, self.cfg.EPOCHS)) if self.cfg.DISPLAY_STEP is not None: for x_batch, y_batch in utils.get_batches( self.x_train, self.y_train, self.cfg.BATCH_SIZE): step += 1 # Training optimizer sess.run(self.optimizer, feed_dict={ self.inputs: x_batch, self.labels: y_batch, self.step: step - 1, self.is_training: True }) # Display training information if step % self.cfg.DISPLAY_STEP == 0: self._display_status(sess, x_batch, y_batch, epoch_i, step) # Save training logs if self.cfg.SAVE_LOG_STEP is not None: if step % self.cfg.SAVE_LOG_STEP == 0: self._save_logs(sess, train_writer, valid_writer, x_batch, y_batch, epoch_i, step) # Save reconstruction images if self.cfg.SAVE_IMAGE_STEP is not None: if self.cfg.WITH_RECONSTRUCTION: if step % self.cfg.SAVE_IMAGE_STEP == 0: self._save_images(sess, self.train_image_path, x_batch, y_batch, step, epoch_i=epoch_i) # Save models if self.cfg.SAVE_MODEL_MODE == 'per_batch': if step % self.cfg.SAVE_MODEL_STEP == 0: self._save_model(sess, self.saver, step) # Evaluate on full set if self.cfg.FULL_SET_EVAL_MODE == 'per_batch': if step % self.cfg.FULL_SET_EVAL_STEP == 0: self._eval_on_full_set(sess, epoch_i, step) utils.thick_line() else: utils.thin_line() train_batch_generator = utils.get_batches( self.x_train, self.y_train, self.cfg.BATCH_SIZE) for _ in tqdm(range(self.n_batch_train), total=self.n_batch_train, ncols=100, unit=' batches'): step += 1 x_batch, y_batch = next(train_batch_generator) # Training optimizer sess.run(self.optimizer, feed_dict={ self.inputs: x_batch, self.labels: y_batch, self.step: step - 1, self.is_training: True }) # Save training logs if self.cfg.SAVE_LOG_STEP is not None: if step % self.cfg.SAVE_LOG_STEP == 0: self._save_logs(sess, train_writer, valid_writer, x_batch, y_batch, epoch_i, step) # Save reconstruction images if self.cfg.SAVE_IMAGE_STEP is not None: if self.cfg.WITH_RECONSTRUCTION: if step % self.cfg.SAVE_IMAGE_STEP == 0: self._save_images(sess, self.train_image_path, x_batch, y_batch, step, silent=True, epoch_i=epoch_i) # Save models if self.cfg.SAVE_MODEL_MODE == 'per_batch': if step % self.cfg.SAVE_MODEL_STEP == 0: self._save_model(sess, self.saver, step, silent=True) # Evaluate on full set if self.cfg.FULL_SET_EVAL_MODE == 'per_batch': if step % self.cfg.FULL_SET_EVAL_STEP == 0: self._eval_on_full_set(sess, epoch_i, step, silent=True) if self.cfg.SAVE_MODEL_MODE == 'per_epoch': if (epoch_i + 1) % self.cfg.SAVE_MODEL_STEP == 0: self._save_model(sess, self.saver, epoch_i) if self.cfg.FULL_SET_EVAL_MODE == 'per_epoch': if (epoch_i + 1) % self.cfg.FULL_SET_EVAL_STEP == 0: self._eval_on_full_set(sess, epoch_i, step) utils.thin_line() print('Epoch done! Using time: {:.2f}'.format(time.time() - epoch_start_time)) utils.thick_line() print('Training finished! Using time: {:.2f}'.format(time.time() - self.start_time)) utils.thick_line() # Evaluate on test set after training if self.cfg.TEST_AFTER_TRAINING: self._test_after_training(sess) utils.thick_line() print('All task finished! Total time: {:.2f}'.format(time.time() - self.start_time)) utils.thick_line()
def _trainer(self, sess): utils.thick_line() print('Training...') # Merge all the summaries and create writers train_summary_path = join(self.summary_path, 'train') valid_summary_path = join(self.summary_path, 'valid') utils.check_dir([train_summary_path, valid_summary_path]) utils.thin_line() print('Generating TensorFLow summary writer...') train_writer = tf.summary.FileWriter(train_summary_path, sess.graph) valid_writer = tf.summary.FileWriter(valid_summary_path) sess.run(tf.global_variables_initializer()) step = 0 for epoch_i in range(self.cfg.EPOCHS): epoch_start_time = time.time() utils.thick_line() print('Training on epoch: {}/{}'.format(epoch_i + 1, self.cfg.EPOCHS)) utils.thin_line() train_batch_generator = utils.get_batches( x=self.x_train, y=self.y_train, imgs=self.imgs_train, batch_size=self.cfg.BATCH_SIZE) if self.cfg.DISPLAY_STEP: iterator = range(self.n_batch_train) silent = False else: iterator = tqdm(range(self.n_batch_train), total=self.n_batch_train, ncols=100, unit=' batch') silent = True for _ in iterator: step += 1 x_batch, y_batch, imgs_batch = next(train_batch_generator) # Training optimizer sess.run(self.optimizer, feed_dict={ self.inputs: x_batch, self.labels: y_batch, self.input_imgs: imgs_batch, self.step: step - 1, self.is_training: True }) # Display training information if self.cfg.DISPLAY_STEP: if step % self.cfg.DISPLAY_STEP == 0: self._display_status(sess, x_batch, y_batch, imgs_batch, epoch_i, step - 1) # Save training logs if self.cfg.SAVE_LOG_STEP: if step % self.cfg.SAVE_LOG_STEP == 0: self._save_logs(sess, train_writer, valid_writer, x_batch, y_batch, imgs_batch, epoch_i, step - 1) # Save reconstruction images if self.cfg.SAVE_IMAGE_STEP: if self.cfg.WITH_REC: if step % self.cfg.SAVE_IMAGE_STEP == 0: self._save_images(sess, self.train_image_path, x_batch, y_batch, imgs_batch, step - 1, epoch_i=epoch_i, silent=silent) # Save models if self.cfg.SAVE_MODEL_MODE == 'per_batch': if step % self.cfg.SAVE_MODEL_STEP == 0: self._save_model(sess, self.saver, step - 1, silent=silent) # Evaluate on full set if self.cfg.FULL_SET_EVAL_MODE == 'per_batch': if step % self.cfg.FULL_SET_EVAL_STEP == 0: self._eval_on_full_set(sess, epoch_i, step - 1, silent=silent) # Save model per epoch if self.cfg.SAVE_MODEL_MODE == 'per_epoch': if (epoch_i + 1) % self.cfg.SAVE_MODEL_STEP == 0: self._save_model(sess, self.saver, epoch_i) # Evaluate on valid set per epoch if self.cfg.FULL_SET_EVAL_MODE == 'per_epoch': if (epoch_i + 1) % self.cfg.FULL_SET_EVAL_STEP == 0: self._eval_on_full_set(sess, epoch_i, step - 1) # Evaluate on test set per epoch if self.cfg.TEST_SO_MODE == 'per_epoch': self._test(sess, during_training=True, epoch=epoch_i, step=step, mode='single') # Evaluate on multi-objects test set per epoch if self.cfg.TEST_MO_MODE == 'per_epoch': self._test(sess, during_training=True, epoch=epoch_i, step=step, mode='multi_obj') utils.thin_line() print('Epoch {}/{} done! Using time: {:.2f}'.format( epoch_i + 1, self.cfg.EPOCHS, time.time() - epoch_start_time)) utils.thick_line() print('Training finished! Using time: {:.2f}'.format(time.time() - self.start_time)) utils.thick_line() # Evaluate on test set after training if self.cfg.TEST_SO_MODE == 'after_training': self._test(sess, during_training=True, epoch='end', mode='single') # Evaluate on multi-objects test set after training if self.cfg.TEST_MO_MODE == 'after_training': self._test(sess, during_training=True, epoch='end', mode='multi_obj') utils.thick_line() print('All task finished! Total time: {:.2f}'.format(time.time() - self.start_time)) utils.thick_line()