def one_stage_train(myModel, data_reader_trn, my_optimizer, loss_criterion, snapshot_dir, log_dir, i_iter, start_epoch, best_val_accuracy=0, data_reader_eval=None, scheduler=None): report_interval = cfg.training_parameters.report_interval snapshot_interval = cfg.training_parameters.snapshot_interval max_iter = cfg.training_parameters.max_iter avg_accuracy = 0 accuracy_decay = 0.99 best_epoch = 0 writer = SummaryWriter(log_dir) best_iter = i_iter iepoch = start_epoch snapshot_timer = Timer('m') report_timer = Timer('s') while i_iter < max_iter: iepoch += 1 for i, batch in enumerate(data_reader_trn): i_iter += 1 if i_iter > max_iter: break scheduler.step(i_iter) my_optimizer.zero_grad() add_graph = False scores, total_loss, n_sample = compute_a_batch(batch, myModel, eval_mode=False, loss_criterion=loss_criterion, add_graph=add_graph, log_dir=log_dir) total_loss.backward() accuracy = scores / n_sample avg_accuracy += (1 - accuracy_decay) * (accuracy - avg_accuracy) clip_gradients(myModel, i_iter, writer) my_optimizer.step() if i_iter % report_interval == 0: save_a_report(i_iter, total_loss.detach().cpu().item(), accuracy, avg_accuracy, report_timer, writer, data_reader_eval,myModel, loss_criterion) if i_iter % snapshot_interval == 0 or i_iter == max_iter: best_val_accuracy, best_epoch, best_iter = save_a_snapshot(snapshot_dir, i_iter, iepoch, myModel, my_optimizer, loss_criterion, best_val_accuracy, best_epoch, best_iter, snapshot_timer, data_reader_eval) writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json")) writer.close() print("best_acc:%.6f after epoch: %d/%d at iter %d" % (best_val_accuracy, best_epoch, iepoch, best_iter)) sys.stdout.flush()
def download_all_from(conn, file_size): data = bytearray() timer = Timer() while True: part = conn.recv(1024 * 16) # 16KB if len(part) == 0: break data.extend(part) if file_size is None: print('\r' + str(len(data) / timer.elapsed / 1024) + " KB/s", end='') else: progress_bar = get_progress_bar( compute_download_percentage(file_size, len(data))) if progress_bar is None: continue print('\r' + progress_bar + ' ' + str(len(data) / timer.elapsed / 1024) + " KB/s", end='') print('', end="\n\n") return bytes(data)
def __init__(self, size, length=40): if length <= 2: raise ValueError('percent line length must be greater than 2') self.__size = size self.__current = 0 self.__timer = Timer() self.__max_len = 1 self.__length = length
def test_inference_time(self, sess): _t = {'inference': Timer()} with sess.as_default(), sess.graph.as_default(): sess.run(self.init_ops) for i in range(1000): _t['inference'].tic() sess.run(self.network.fixed_images[0]) _t['inference'].toc() tf.logging.info('inference time is %f' % _t['inference'].average_time)
def processor(self, sess): sess.run(tf.global_variables_initializer()) self.net.load_weigths(self.arg.weights, sess, self.saver) timer = Timer() vispy_init() positive_cnt = 0 negative_cnt = 0 data_use_for = 'train' if data_use_for == 'valid': length = self.dataset.validing_rois_length elif data_use_for == 'train': length = self.dataset.training_rois_length else: assert False, 'There is something wrong in dataset description' for idx in range(length): blobs = self.dataset.get_minibatch(idx, data_use_for) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } timer.tic() result_, label_ = sess.run([self.result, self.label], feed_dict=feed_dict) timer.toc() print('Begin to save data_cnt: ', idx) pos_p = os.path.join(self.arg.box_savepath, data_use_for, 'POSITIVE') neg_p = os.path.join(self.arg.box_savepath, data_use_for, 'NEGATIVE') if not os.path.exists(pos_p): os.makedirs(pos_p) if not os.path.exists(neg_p): os.makedirs(neg_p) for box_cnt in range(result_.shape[0]): box = result_[box_cnt].astype(np.int8) if label_[box_cnt]: filename = os.path.join( pos_p, str(positive_cnt).zfill(6) + '.npy') positive_cnt += 1 else: filename = os.path.join( neg_p, str(negative_cnt).zfill(6) + '.npy') negative_cnt += 1 np.save(filename, box)
def __init__(self, machine: Machine, parent: QWidget=None): super().__init__(parent) self._machine = machine self._init_ui() self._draw_timer = Timer(interval=1.0 / 30) self._draw_timer.add_handler(self._draw_state_event) self._draw_timer.start() self._instruction_factory = InstructionFactory()
def __init__(self, screen: Screen, parent: QWidget = None): super().__init__(parent) self._screen = screen screen_size = QDesktopWidget().screenGeometry(-1) self._pixel_width = int(screen_size.width() * 1 / 100) self._pixel_height = self._pixel_width self.init_ui() self._draw_timer = Timer(interval=1.0 / 30) self._draw_timer.add_handler(self._draw_screen_event) self._draw_timer.start()
def __init__(self, machine: Machine, sound: bool=False, instruction_per_second: int = 500): super().__init__() self._machine = machine self.init_ui() self._sound = QSound('beep.wav') self._sound_support = sound self._machine_update_timer = Timer(interval=1.0 / instruction_per_second) self._machine_update_timer.add_handler(self._execute_instruction) self._machine_update_timer.start() self._machine_sound_delay_timer = Timer(interval=1.0 / 60) # 60 Hz self._machine_sound_delay_timer.add_handler(self._update_sound_delay) self._machine_sound_delay_timer.start() self._key_dict = { Qt.Key_1: 1, Qt.Key_2: 2, Qt.Key_3: 3, Qt.Key_4: 0xC, Qt.Key_Q: 4, Qt.Key_W: 5, Qt.Key_E: 6, Qt.Key_R: 0xD, Qt.Key_A: 7, Qt.Key_S: 8, Qt.Key_D: 9, Qt.Key_F: 0xE, Qt.Key_Z: 0xA, Qt.Key_X: 0x0, Qt.Key_C: 0xB, Qt.Key_V: 0xF }
def train_model(self): """Network training loop.""" timer = Timer() model_paths = [] while self.solver.iter < self.max_iters: # Make one SGD update timer.tic() self.solver.step(1) timer.toc() if self.solver.iter % (10 * self.solver_param.display) == 0: print 'speed: {:.3f}s / iter'.format(timer.average_time) if self.solver.iter % self.snapshot_iters == 0: model_paths.append(self.snapshot())
def training(self, sess): sess.run(tf.global_variables_initializer()) reader = pywrap_tensorflow.NewCheckpointReader(self.weights) var_to_shape_map = reader.get_variable_to_shape_map() glb_var = tf.global_variables() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key cubic_cls_score = tf.nn.softmax(self.result) timer = Timer() vispy_init() res = [] loop_parameters = np.arange(0, 360, 2) for data_idx in loop_parameters: # DO NOT EDIT the "training_series",for the latter shuffle run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() feed_dict = self.cubic_rpn_grid( 30, box_idx=0, angel=data_idx, scalar=1.0, #float(data_idx)/180.*1.0, translation=[0, 0, 0]) timer.tic() cubic_cls_score_ = sess.run(cubic_cls_score, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cubic_cls_score_ = np.array(cubic_cls_score_) cubic_result = cubic_cls_score_.argmax(axis=1) res.append(cubic_cls_score_[0, 1]) # print 'rotation: {:3d} score: {:>8,.7f} {:>8,.7f} result: {}'.format(data_idx,cubic_cls_score_[0,0],cubic_cls_score_[0,1],cubic_result[0]) plt.plot(loop_parameters, res) plt.grid(True, color='black', linestyle='--', linewidth='1') plt.title('Rubust Test') plt.xlabel('rotated angle metric:degree') plt.ylabel('score') plt.legend(['positive']) plt.savefig('Rotation.png') plt.show()
def run_wild(self, sess): _t = {'inference': Timer()} self.wild_results = os.path.join(self.config.logdir, "wild_results") tf.gfile.MakeDirs(self.wild_results) fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 12) with sess.as_default(), sess.graph.as_default(): sess.run(self.init_ops) tf.logging.info("Generating results...") count = 0 tf_images = (tf.concat([self.network.test_real_image] + self.network.fixed_images, 2) / 2.0 + 0.5) * 255 while True: try: images = sess.run(tf_images) for image in images: outputs_img = Image.fromarray(image.astype(np.uint8), mode='RGB') size = outputs_img.size txt = Image.new('RGB', (size[0], size[1]), (0, 0, 0)) dr = ImageDraw.Draw(txt) dr.text((0, 60), "original", font=fnt, fill=(255, 255, 255)) j = 128 for ind, label in enumerate( self.config.selected_attrs): dr.text((j, 60), label, font=fnt, fill=(255, 255, 255)) j += 128 rez = np.concatenate((txt, outputs_img), 0) rez = Image.fromarray(rez.astype(np.uint8), mode='RGB') outputs_img.save( os.path.join(self.wild_results, "image_{}.png".format(count))) count += 1 except tf.errors.OutOfRangeError: tf.logging.info("End of training dataset.") break
def get_minibatch(self, _idx=0): """Given a roidb, construct a minibatch sampled from it.""" index_dataset = self.test_set fname = index_dataset[_idx] timer = Timer() timer.tic() lidar_data = pcd2np.from_path(fname) angel = 0 # (np_random.rand() - 0.500) * np.pi * 0.9 points_rot = self.rotation(lidar_data.pc_data, angel) timer.toc() time1 = timer.average_time timer.tic() grid_voxel = voxel_grid(points_rot, cfg, thread_sum=cfg.CPU_CNT) timer.toc() time2 = timer.average_time timer.tic() apollo_8feature = np.load( path_add(self.data_path, fname.split('/')[-3], 'feature_pcd_name', fname.split('/')[-1][0:-4] + '.npy')).reshape( -1, cfg.CUBIC_SIZE[0], cfg.CUBIC_SIZE[1], 8) apollo_8feature_rot = self.apollo_feature_rotation(apollo_8feature, degree=angel * 57.29578) timer.toc() time3 = timer.average_time blob = dict({ 'serial_num': fname.split('/')[-1], 'lidar3d_data': lidar_data.pc_data, 'grid_stack': grid_voxel['feature_buffer'], 'coord_stack': grid_voxel['coordinate_buffer'], 'ptsnum_stack': grid_voxel['number_buffer'], 'apollo_8feature': apollo_8feature_rot, 'voxel_gen_time': (time1, time2, time3) }) return blob
def one_stage_train(main_model, adv_model, data_reader_trn, main_optimizer, adv_optimizer, loss_criterion, snapshot_dir, log_dir, i_iter, start_epoch, best_val_accuracy=0, data_reader_eval=None, data_reader_test=None, scheduler=None, adv_scheduler=None): report_interval = cfg.training_parameters.report_interval snapshot_interval = cfg.training_parameters.snapshot_interval max_iter = cfg.training_parameters.max_iter lambda_q = cfg.training_parameters.lambda_q main_avg_accuracy = adv_avg_accuracy = 0 accuracy_decay = 0.99 best_epoch = 0 main_writer = SummaryWriter(os.path.join(log_dir, 'main')) adv_writer = SummaryWriter(os.path.join(log_dir, 'adversary')) best_iter = i_iter iepoch = start_epoch snapshot_timer = Timer('m') report_timer = Timer('s') if hasattr(main_model, 'module'): q_emb = main_model.module.question_embedding_models else: q_emb = main_model.question_embedding_models print("MAX ITER: {}".format(max_iter)) while i_iter < max_iter: iepoch += 1 main_score_epoch = 0 n_sample_tot = 0 for i, batch in enumerate(data_reader_trn): i_iter += 1 if i_iter > max_iter: break scheduler.step(i_iter) adv_scheduler.step(i_iter) main_writer.add_scalar('learning_rate', scheduler.get_lr()[0], i_iter) adv_writer.add_scalar('learning_rate', adv_scheduler.get_lr()[0], i_iter) # Run main model main_optimizer.zero_grad() main_scores, main_loss, n_sample = compute_a_batch( batch, main_model, run_fn=one_stage_run_model, eval_mode=False, loss_criterion=loss_criterion) main_loss.backward() main_qnorm = get_grad_norm(q_emb.parameters()) main_writer.add_scalar('Q_norm', main_qnorm, i_iter) main_accuracy = main_scores / n_sample main_avg_accuracy += (1 - accuracy_decay) * (main_accuracy - main_avg_accuracy) main_score_epoch += main_scores n_sample_tot += n_sample clip_gradients(main_model, i_iter, main_writer) assert (check_params_and_grads(main_model)) main_optimizer.step() # Run adv model if lambda_q > 0: lambda_grl = lambda_grl_scheduler(i_iter) adv_model.set_lambda(lambda_grl) adv_optimizer.zero_grad() adv_scores, adv_loss_q, n_sample = compute_a_batch( batch, adv_model, run_fn=one_stage_run_adv, eval_mode=False, loss_criterion=loss_criterion) adv_accuracy = adv_scores / n_sample adv_avg_accuracy += (1 - accuracy_decay) * (adv_accuracy - adv_avg_accuracy) adv_loss = lambda_q * adv_loss_q adv_loss.backward() adv_qnorm = get_grad_norm(q_emb.parameters()) adv_writer.add_scalar('Q_norm', adv_qnorm, i_iter) clip_gradients(adv_model, i_iter, adv_writer) assert (check_params_and_grads(adv_model)) adv_optimizer.step() else: adv_accuracy = 0 adv_loss = torch.zeros(1) if i_iter % report_interval == 0: save_a_report(i_iter, main_loss.item(), main_accuracy, main_avg_accuracy, report_timer, main_writer, data_reader_eval, data_reader_test, main_model, 'main', loss_criterion) save_a_report(i_iter, adv_loss.item(), adv_accuracy, adv_avg_accuracy, report_timer, adv_writer, data_reader_eval, data_reader_test, adv_model, 'adv', loss_criterion) if i_iter % snapshot_interval == 0 or i_iter == max_iter: main_train_acc = main_score_epoch / n_sample_tot best_val_accuracy, best_epoch, best_iter = save_a_snapshot( snapshot_dir, i_iter, iepoch, main_model, adv_model, main_optimizer, adv_optimizer, loss_criterion, best_val_accuracy, best_epoch, best_iter, snapshot_timer, data_reader_eval, data_reader_test, main_train_acc) main_writer.export_scalars_to_json( os.path.join(log_dir, "all_scalars.json")) main_writer.close() adv_writer.close() print("best_acc:%.6f after epoch: %d/%d at iter %d" % (best_val_accuracy, best_epoch, iepoch, best_iter)) sys.stdout.flush()
def main(argv): prg_timer = Timer() args = parse_args() config_file = args.config seed = args.seed if args.seed > 0 else random.randint(1, 100000) process_config(config_file, args.config_overwrite) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) basename = 'default' \ if args.config is None else os.path.basename(args.config) cmd_cfg_obj = demjson.decode(args.config_overwrite) \ if args.config_overwrite is not None else None middle_name, final_name = get_output_folder_name(basename, cmd_cfg_obj, seed, args.suffix) out_dir = args.out_dir if args.out_dir is not None else os.getcwd() snapshot_dir = os.path.join(out_dir, "results", middle_name, final_name) boards_dir = os.path.join(out_dir, "boards", middle_name, final_name) if args.force_restart: if os.path.exists(snapshot_dir): shutil.rmtree(snapshot_dir) if os.path.exists(boards_dir): shutil.rmtree(boards_dir) os.makedirs(snapshot_dir, exist_ok=True) os.makedirs(boards_dir, exist_ok=True) print("Results: {}".format(snapshot_dir)) print("Tensorboard: {}".format(boards_dir)) print("fast data reader = " + str(cfg['data']['image_fast_reader'])) print("use cuda = " + str(use_cuda)) print("Adversary nhid: {}".format(cfg.adv_model.nhid)) print("lambda_q: {}".format(cfg.training_parameters.lambda_q)) print("lambda_grl: {}".format(cfg.training_parameters.lambda_grl)) print("lambda_grl_start: {}".format( cfg.training_parameters.lambda_grl_start)) print("lambda_grl_steps: {}".format( cfg.training_parameters.lambda_grl_steps)) if cfg.training_parameters.lambda_grl > 0: print("WARNING: lambda_grl {} is pos., but GRL expects neg. values". format(cfg.training_parameters.lambda_grl)) print("LRs: {} {}".format(cfg.optimizer.par.lr, cfg.adv_optimizer.par.lr)) print("Static LR: {}".format(cfg.training_parameters.static_lr)) # dump the config file to snap_shot_dir config_to_write = os.path.join(snapshot_dir, "config.yaml") dump_config(cfg, config_to_write) train_dataSet = prepare_train_data_set(**cfg['data'], **cfg['model']) print("=> Loaded trainset: {} examples".format(len(train_dataSet))) main_model, adv_model = build_model(cfg, train_dataSet) model = main_model if hasattr(main_model, 'module'): model = main_model.module params = [{ 'params': model.image_embedding_models_list.parameters() }, { 'params': model.question_embedding_models.parameters() }, { 'params': model.multi_modal_combine.parameters() }, { 'params': model.classifier.parameters() }, { 'params': model.image_feature_encode_list.parameters(), 'lr': cfg.optimizer.par.lr * 0.1 }] main_optim = getattr(optim, cfg.optimizer.method)(params, **cfg.optimizer.par) adv_optim = getattr(optim, cfg.optimizer.method)(adv_model.parameters(), **cfg.adv_optimizer.par) i_epoch = 0 i_iter = 0 best_accuracy = 0 if not args.force_restart: md_pths = os.path.join(snapshot_dir, "model_*.pth") files = glob.glob(md_pths) if len(files) > 0: latest_file = max(files, key=os.path.getctime) print("=> Loading save from {}".format(latest_file)) info = torch.load(latest_file) i_epoch = info['epoch'] i_iter = info['iter'] main_model.load_state_dict(info['state_dict']) main_optim.load_state_dict(info['optimizer']) adv_model.load_state_dict(info['adv_state_dict']) adv_optim.load_state_dict(info['adv_optimizer']) if 'best_val_accuracy' in info: best_accuracy = info['best_val_accuracy'] scheduler = get_optim_scheduler(main_optim) adv_scheduler = get_optim_scheduler(adv_optim) my_loss = get_loss_criterion(cfg.loss) dataset_val = prepare_eval_data_set(**cfg['data'], **cfg['model']) print("=> Loaded valset: {} examples".format(len(dataset_val))) dataset_test = prepare_test_data_set(**cfg['data'], **cfg['model']) print("=> Loaded testset: {} examples".format(len(dataset_test))) data_reader_trn = DataLoader(dataset=train_dataSet, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.num_workers) data_reader_val = DataLoader(dataset_val, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) data_reader_test = DataLoader(dataset_test, shuffle=True, batch_size=cfg.data.batch_size, num_workers=cfg.data.num_workers) main_model.train() adv_model.train() print("=> Start training...") one_stage_train(main_model, adv_model, data_reader_trn, main_optim, adv_optim, my_loss, data_reader_eval=data_reader_val, data_reader_test=data_reader_test, snapshot_dir=snapshot_dir, log_dir=boards_dir, start_epoch=i_epoch, i_iter=i_iter, scheduler=scheduler, adv_scheduler=adv_scheduler, best_val_accuracy=best_accuracy) print("=> Training complete.") model_file = os.path.join(snapshot_dir, "best_model.pth") if os.path.isfile(model_file): print("=> Testing best model...") main_model, _ = build_model(cfg, dataset_test) main_model.load_state_dict(torch.load(model_file)['state_dict']) main_model.eval() print("=> Loaded model from file {}".format(model_file)) print("=> Start testing...") acc_test, loss_test, _ = one_stage_eval_model(data_reader_test, main_model, one_stage_run_model, my_loss) print("Final results:\nacc: {:.4f}\nloss: {:.4f}".format( acc_test, loss_test)) result_file = os.path.join(snapshot_dir, 'result_on_val.txt') with open(result_file, 'a') as fid: fid.write('FINAL RESULT ON TEST: {:.6f}'.format(acc_test)) else: print("File {} not found. Skipping testing.".format(model_file)) acc_test = loss_test = 0 # print("BEGIN PREDICTING ON TEST/VAL set...") # if 'predict' in cfg.run: # print_eval(prepare_test_data_set, "test") # if cfg.run == 'train+val': # print_eval(prepare_eval_data_set, "val") print("total runtime(h): %s" % prg_timer.end()) return (acc_test, loss_test)
def train(self, fold_num): train_holder, seg_holder, dst_holder = self.provider.get_train_holder() model = self.model_class(self.is_training) inference_op = model.inference_op(train_holder) if cfg.use_dst_weight == True: loss_op, acc_op = model.loss_op(inference_op, seg_holder, dst_holder) else: loss_op, acc_op = model.loss_op(inference_op, seg_holder) train_op = self._get_optimizer(loss_op) merged = tf.summary.merge_all() self._count_trainables() log_output_path = os.path.join(self.output_path, "log") if not os.path.exists(log_output_path): os.makedirs(log_output_path) model_output_path = os.path.join(self.output_path, "model") if not os.path.exists(model_output_path): os.makedirs(model_output_path) loss_txt_path = os.path.join(self.output_path, "loss") if not os.path.exists(loss_txt_path): os.makedirs(loss_txt_path) train_writer = tf.summary.FileWriter( os.path.join(log_output_path, "train")) test_writer = tf.summary.FileWriter( os.path.join(log_output_path, "val")) line_buffer = 1 config = tf.ConfigProto() config.gpu_options.allow_growth = True config = config with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=1) train_timer = Timer() load_timer = Timer() # if model checkpoint exist, then load last checkpoint #self._load_model(saver, sess, model_output_path) with open(file=loss_txt_path + '/loss_' + cfg.name + str(fold_num) + '.txt', mode='w', buffering=line_buffer) as loss_log: for step in range(self.train_step): if cfg.use_dst_weight == True: load_timer.tic() image, label, weights = self.provider.get_train_value( with_weight=cfg.use_dst_weight) image_val, label_val, val_weights = self.provider.get_val_value( with_weight=cfg.use_dst_weight) load_timer.toc() train_timer.tic() train_merge, train_loss, _, train_acc = sess.run( [merged, loss_op, train_op, acc_op], feed_dict={ train_holder: image, seg_holder: label, dst_holder: weights }) valid_merge, val_loss, val_acc = sess.run( [merged, loss_op, acc_op], feed_dict={ train_holder: image_val, seg_holder: label_val, dst_holder: val_weights, self.is_training: False }) train_timer.toc() else: load_timer.tic() image, label = self.provider.get_train_value( with_weight=cfg.use_dst_weight) image_val, label_val = self.provider.get_val_value( with_weight=cfg.use_dst_weight) load_timer.toc() train_timer.tic() train_merge, train_loss, _, train_acc = sess.run( [merged, loss_op, train_op, acc_op], feed_dict={ train_holder: image, seg_holder: label }) valid_merge, val_loss, val_acc = sess.run( [merged, loss_op, acc_op], feed_dict={ train_holder: image_val, seg_holder: label_val, self.is_training: False }) train_timer.toc() #if val_loss < self.min_valid_loss: #self.min_valid_loss = val_loss #saver.save(sess, os.path.join(self.output_path, "model/model_%d_%.6f"%(fold_num,self.min_valid_loss))) if np.mod(step + 1, self.save_interval) == 0: #saver_final = tf.train.Saver(max_to_keep=1) saver.save( sess, os.path.join(self.output_path, "model/model_saved_%d" % fold_num)) #saver_final.save(sess, os.path.join(self.output_path, "model_final/model_saved_%d"%fold_num)) '''train_merge, train_loss, t_dice_loss, t_weight_loss, m_dice_loss, m_weight_loss,_ = sess.run([merged, loss_op, total_dice_loss, total_weight_loss, main_dice_loss, main_weight_loss,train_op], feed_dict={train_holder: image, seg_holder: label})''' '''train_merge, train_loss, t_dice_loss, t_focal_loss, m_dice_loss, m_focal_loss, _ = sess.run( [merged, loss_op, total_dice_loss, total_focal_loss, main_dice_loss, main_focal_loss, train_op], feed_dict={train_holder: image, seg_holder: label})''' '''train_merge, train_loss, t_dice_loss, m_dice_loss, _ = sess.run( [merged, loss_op, total_dice_loss, main_dice_loss, train_op], feed_dict={train_holder: image, seg_holder: label})''' '''output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss]dice_loss: %.8f,main_dice_loss: %.8f \n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.train_step), train_loss, val_loss, t_dice_loss, m_dice_loss)''' '''output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss]dice_loss: %.8f, focal_loss: %.8f, main_dice_loss: %.8f, main_focal_loss: %.8f\n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step,self.train_step),train_loss, val_loss, t_dice_loss, t_focal_loss, m_dice_loss, m_focal_loss)''' '''output_format = 'Epoch:%d,Speed: %.3fs/iter,Load: %.3fs/iter,Remain: %s\n'\ 'train_loss: %.8f,valid_loss: %.8f,main_dice_loss: %.8f,main_weight_loss: %.8f'\ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.train_step), train_loss, val_loss, m_dice_loss, m_weight_loss)''' '''output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss] main_jacc_loss: %.8f, auxi_jacc_loss: %.8f\n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.train_step), train_loss, val_loss, main_jacc_loss, auxi_jacc_loss)''' output_format = "train loss: %f, valid loss: %f, train accuracy: %f, val accuracy: %f, step: %d" % \ (train_loss, val_loss, train_acc, val_acc, step) print(output_format) train_writer.add_summary(train_merge, step) test_writer.add_summary(valid_merge, step) if step % 5 == 0: loss_log.write(output_format + '\n') #if np.mod(step + 1, self.save_interval) == 0: #saver.save(sess, os.path.join(self.output_path, "model/model_saved_%d"%fold_num)) train_writer.close() test_writer.close()
def testing(self, sess, test_writer): # ======================================= if USE_ROS: import rospy from sensor_msgs.msg import PointCloud,Image from visualization_msgs.msg import MarkerArray, Marker from tools.data_visualize import Boxes_labels_Gen, Image_Gen,PointCloud_Gen rospy.init_node('rostensorflow') pub = rospy.Publisher('prediction', PointCloud, queue_size=1000) img_pub = rospy.Publisher('images_rgb', Image, queue_size=1000) box_pub = rospy.Publisher('label_boxes', MarkerArray, queue_size=1000) rospy.loginfo("ROS begins ...") # ======================================= with tf.name_scope("Inference"): # RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] # RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] # RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred+RNet_rpn_yaw_gt_delta rpn_rois_3d = self.net.get_output('rpn_rois')[1] with tf.name_scope('view_rpn_bv_tb'): # roi_bv = self.net.get_output('rpn_rois')[0] # data_bv = self.net.lidar_bv_data # image_rpn = tf.reshape(test_show_rpn_tf(data_bv,roi_bv), (1, 601, 601, -1)) # tf.summary.image('lidar_bv_test', image_rpn) feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[0],axis=-2),[2,0,1]),[-1,30,30,1]) tf.summary.image('shape_extractor_P1', feature,max_outputs=50) # feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[1],axis=-1),[2,0,1]),[-1,30,30,1]) # tf.summary.image('shape_extractor_P2', feature,max_outputs=10) # feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[-1],axis=-1),[2,0,1]),[-1,30,30,1]) # tf.summary.image('shape_extractor_N1', feature,max_outputs=3) # feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[-2],axis=-1),[2,0,1]),[-1,30,30,1]) # tf.summary.image('shape_extractor_N2', feature,max_outputs=3) merged = tf.summary.merge_all() with tf.name_scope('load_weights'): print 'Loading pre-trained model weights from {:s}'.format(self.args.weights) self.net.load_weigths(self.args.weights, sess, self.saver) self.net.load_weigths(self.args.weights_cube, sess, self.saver,specical_flag=True) vispy_init() # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow timer = Timer() cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) for idx in range(0,self.epoch,1): # index_ = input('Type a new index: ') blobs = self.dataset.get_minibatch(idx) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], # self.net.calib: blobs['calib'] } run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() cubic_cls_score_,rpn_rois_3d_,summary = sess.run([cubic_cls_score,rpn_rois_3d,merged] ,feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if idx % 3 ==0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(cfg.LOG_DIR + '/' +'testing-step-'+ str(idx).zfill(7) + '.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if idx % cfg.TEST.ITER_DISPLAY == 0: pass print 'Test: %06d/%06d speed: %.4f s / iter' % (idx+1, self.epoch, timer.average_time) if VISION_DEBUG: scan = blobs['lidar3d_data'] img = blobs['image_data'] cubic_cls_value = cubic_cls_score_.argmax(axis=1) if USE_ROS: import numpy as np from tools.data_visualize import PointCloud_Gen,Boxes_labels_Gen,Image_Gen pointcloud = PointCloud_Gen(scan) label_boxes = Boxes_labels_Gen(rpn_rois_3d_, ns='Predict') img_ros = Image_Gen(img) pub.publish(pointcloud) img_pub.publish(img_ros) box_pub.publish(label_boxes) else: boxes = BoxAry_Theta(pre_box3d=rpn_rois_3d_,pre_cube_cls=cubic_cls_value) # RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] pcd_vispy(scan, img, boxes,index=idx, save_img=False,#cfg.TEST.SAVE_IMAGE, visible=True, name='CubicNet testing') if idx % 1 == 0 and cfg.TEST.TENSORBOARD: test_writer.add_summary(summary, idx) pass print 'Testing process has done, happy every day !'
def train(self): """ now tf_records are no used for the full image. :return: """ train_holder, seg_holder, dst_holder = self.provider.get_train_holder() if self.model_name == 'cnn_v2': model = self.model_class(self.is_training) model.build_model(train_holder, seg_holder) total_loss = model.total_loss total_dice_loss = model.total_dice_loss total_weight_loss = model.total_weight_loss #main_dice_loss = model.main_dice_loss #dice = model.dice_coefficient loss_op = model.entropy_loss train_op = self._get_optimizer(total_loss) else: model = self.model_class(self.is_training) inference_op = model.inference_op(train_holder) if cfg.use_dst_weight == True: loss_op = model.loss_op(inference_op, seg_holder, dst_holder) else: loss_op = model.loss_op(inference_op, seg_holder) #loss_op = model.loss_op(inference_op, seg_holder) total_dice_loss = model.total_dice_loss total_weight_loss = model.total_weight_loss main_weight_loss = model.main_weight_loss main_dice_loss = model.main_dice_loss train_op = self._get_optimizer(loss_op) merged = tf.summary.merge_all() self._count_trainables() log_output_path = os.path.join(self.output_path, "log") if not os.path.exists(log_output_path): os.makedirs(log_output_path) model_output_path = os.path.join(self.output_path, "model") if not os.path.exists(model_output_path): os.makedirs(model_output_path) loss_txt_path = os.path.join(self.output_path, "loss") if not os.path.exists(loss_txt_path): os.makedirs(loss_txt_path) train_writer = tf.summary.FileWriter( os.path.join(log_output_path, "train")) test_writer = tf.summary.FileWriter( os.path.join(log_output_path, "val")) line_buffer = 1 config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_timer = Timer() load_timer = Timer() # if model checkpoint exist, then load last checkpoint #self._load_model(saver, sess, model_output_path) with open(file=loss_txt_path + '/loss_' + cfg.name + '.txt', mode='w', buffering=line_buffer) as loss_log: for step in range(self.train_step): load_timer.tic() image, label, weight = self.provider.get_train_value( with_weight=cfg.use_weight) image_val, label_val, weight = self.provider.get_val_value( with_weight=cfg.use_weight) load_timer.toc() train_timer.tic() train_merge, train_loss, t_dice_loss, t_weight_loss, m_dice_loss, m_weight_loss, _ = sess.run( [ merged, loss_op, total_dice_loss, total_weight_loss, main_dice_loss, main_weight_loss, train_op ], feed_dict={ train_holder: image, seg_holder: label, dst_holder: weight }) valid_merge, val_loss = sess.run( [merged, loss_op], feed_dict={ train_holder: image_val, seg_holder: label_val, dst_holder: weight, self.is_training: False }) train_timer.toc() output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss]dice_loss: %.8f, weight_loss: %.8f, main_dice_loss: %.8f, main_weight_loss: %.8f\n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step,self.train_step),train_loss, val_loss, t_dice_loss, t_weight_loss, m_dice_loss, m_weight_loss) print(output_format) train_writer.add_summary(train_merge, step) test_writer.add_summary(valid_merge, step) if step % 10 == 0: loss_log.write( 'train loss: %.5f, valid_loss: %.5f, glabl step: %d' % (train_loss, val_loss, step) + '\n') if np.mod(step + 1, self.save_interval) == 0: saver.save( sess, os.path.join(self.output_path, "model/model_saved")) train_writer.close() test_writer.close()
def training(self, sess, train_writer): with tf.name_scope('loss_cubic'): rpn_cls_score = tf.reshape(self.net.get_output('rpn_cls_score'), [-1, 2]) rpn_label = tf.reshape( self.net.get_output('rpn_anchors_label')[0], [-1]) rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) rpn_bbox_keep = tf.where(tf.equal( rpn_label, 1)) # only regression positive anchors rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_keep), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_keep), [-1]) # cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) # cubic_cls_labels = tf.reshape(tf.cast(self.net.get_output('rpn_rois')[0][:, -2], tf.int64), [-1]) if not cfg.TRAIN.FOCAL_LOSS: rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) # cubic_cross_entropy = tf.reduce_mean( # tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cubic_cls_score, labels=cubic_cls_labels)) else: #### use as reference for pos&neg proposal balance # self.cls_loss = alpha * ( # -self.pos_equal_one * tf.log(self.p_pos + small_addon_for_BCE)) / self.pos_equal_one_sum \ # + beta * (-self.neg_equal_one * tf.log( # 1 - self.p_pos + small_addon_for_BCE)) / self.neg_equal_one_sum # self.cls_loss = tf.reduce_sum(self.cls_loss) #### # alpha = [0.75,0.25] # 0.25 for label=1 gamma = 3 rpn_cls_probability = tf.nn.softmax(rpn_cls_score) # cubic_cls_probability = tf.nn.softmax(cubic_cls_score) # formula : Focal Loss for Dense Object Detection: FL(p)= -((1-p)**gama)*log(p) rpn_cross_entropy = tf.reduce_mean(-tf.reduce_sum( tf.one_hot(rpn_label, depth=2) * ((1 - rpn_cls_probability)**gamma) * tf.log([cfg.EPS, cfg.EPS] + rpn_cls_probability), axis=1)) # cubic_cross_entropy = tf.reduce_mean(-tf.reduce_sum( # tf.one_hot(cubic_cls_labels, depth=2) * ((1 - cubic_cls_probability) ** gamma) * tf.log( # [cfg.EPS, cfg.EPS] + cubic_cls_probability), axis=1)) # bounding box regression L1 loss rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') rpn_bbox_targets = self.net.get_output('rpn_anchors_label')[1] rpn_bbox_pred = tf.reshape( tf.gather(tf.reshape(rpn_bbox_pred, [-1, 3]), rpn_bbox_keep), [-1, 3]) rpn_bbox_targets = tf.reshape( tf.gather(tf.reshape(rpn_bbox_targets, [-1, 3]), rpn_bbox_keep), [-1, 3]) rpn_smooth_l1 = self.modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets) rpn_loss_box = tf.multiply( tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1])), 1.0) # loss = rpn_cross_entropy + rpn_loss_box + cubic_cross_entropy loss = rpn_cross_entropy with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.92, name='decay-Lr') # train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step) with tf.name_scope('train_cubic'): tf.summary.scalar('total_loss', loss) # tf.summary.scalar('rpn_loss_box', rpn_loss_box) # tf.summary.scalar('rpn_cross_entropy', rpn_cross_entropy) # tf.summary.scalar('cubic_cross_entropy', cubic_cross_entropy) recall_RPN = 0. # bv_anchors = self.net.get_output('rpn_anchors_label')[2] # roi_bv = self.net.get_output('rpn_rois')[0] # (x1,y1),(x2,y2),score,label # data_bv = self.net.lidar_bv_data # data_gt = self.net.gt_boxes_bv # (x1,y1),(x2,y2),label # # gt_box = tf.concat([data_gt,data_gt[:, 4]], axis=1) # bbox = tf.concat([roi_bv,data_gt],axis=0) # image_rpn = tf.reshape(show_rpn_tf(data_bv, bbox), (1, 601, 601, -1)) # tf.summary.image('lidar_bv_test', image_rpn) glb_var = tf.global_variables() for i in range(len(glb_var)): # print glb_var[i].name if 'moving' not in str(glb_var[i].name): if 'Adam' not in str(glb_var[i].name): if 'weights' not in str(glb_var[i].name): if 'rpn' not in str(glb_var[i].name): if 'biases' not in str(glb_var[i].name): if 'beta' not in str(glb_var[i].name): if 'gamma' not in str(glb_var[i].name): if 'batch' not in str( glb_var[i].name): tf.summary.histogram( glb_var[i].name, glb_var[i]) merged = tf.summary.merge_all() with tf.name_scope('valid_cubic'): epoch_rpn_recall = tf.placeholder(dtype=tf.float32) rpn_recall_smy_op = tf.summary.scalar('rpn_recall', epoch_rpn_recall) epoch_cubic_recall = tf.placeholder(dtype=tf.float32) cubic_recall_smy_op = tf.summary.scalar('cubic_recall', epoch_cubic_recall) epoch_cubic_precise = tf.placeholder(dtype=tf.float32) cubic_prec_smy_op = tf.summary.scalar('cubic_precise', epoch_cubic_precise) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load_weigths(self.args.weights, sess, self.saver) trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to train: ', trainable_var_for_chk timer = Timer() rpn_rois = self.net.get_output('rpn_rois') cubic_grid = self.net.get_output('cubic_grid') # cubic_cnn= self.net.get_output('cubic_cnn') if DEBUG: vispy_init( ) # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow # station = pcd_vispy_client(MSG_QUEUE,title='Vision') # vision_qt = Process(target=station.get_thread_data, args=(MSG_QUEUE,)) # vision_qt.start() # print 'Process vision_qt started ...' training_series = range(17, self.epoch) # self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() rpn_rois_, cubic_grid_, loss_, merged_ = sess.run( [rpn_rois, cubic_grid, loss, merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() recall_RPN = recall_RPN + rpn_rois_[2][0] # cubic_result = cubic_cls_score_.argmax(axis=1) # one_hist = fast_hist(cubic_cls_labels_, cubic_result) cubic_car_cls_prec = 0 #one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1]+1e-5) cubic_car_cls_recall = 0 #one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0]+1e-5) if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, speed: %.3fs/iter, loss: %.3f, rpn_recall: %.3f, cubic classify precise: %.3f,recall: %.3f' % \ (iter,self.args.epoch_iters * self.epoch, blobs['serial_num'],timer.average_time,loss_,recall_RPN / cfg.TRAIN.ITER_DISPLAY,cubic_car_cls_prec,cubic_car_cls_recall) recall_RPN = 0. # print 'divine: ', str(cubic_result).translate(None,'\n') # print 'labels: ', str(cubic_cls_labels_).translate(None,'\n'),'\n' if iter % 20 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter == 100): #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if DEBUG: scan = blobs['lidar3d_data'] gt_box3d = blobs['gt_boxes_3d'][:, (0, 1, 2, 3, 4, 5, 6, 7)] gt_box3d = np.hstack( (gt_box3d, np.ones([gt_box3d.shape[0], 2]) * 4)) pred_boxes = rpn_rois_[1] # pred_boxes = np.hstack((rpn_rois_[1],cubic_result.reshape(-1,1)*2)) # bbox = np.vstack((pred_boxes, gt_box3d)) # pcd_vispy(scan, boxes=BoxAry_Theta(gt_box3d,pred_boxes,pre_cube_cls=cubic_result), name='CubicNet training') if cfg.TRAIN.EPOCH_MODEL_SAVE: self.snapshot(sess, iter) pass if cfg.TRAIN.USE_VALID: with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) # roi_bv = self.net.get_output('rpn_rois')[0] # cubu_bv = np.hstack((roi_bv,cubic_cls_labels.reshape(-1,1))) # pred_rpn_ = show_rpn_tf(self.net.lidar_bv_data,cubu_bv) # pred_rpn = tf.reshape(pred_rpn_,(1, 601, 601, -1)) # predicted_bbox = tf.summary.image('predict_bbox_bv', pred_rpn) # valid_result = tf.summary.merge([predicted_bbox]) recalls = self.net.get_output('rpn_rois')[2] pred_tp_cnt, gt_cnt = 0., 0. hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS), dtype=np.float32) for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } cubic_cls_score_, cubic_cls_labels_, recalls_ = sess.run( [cubic_cls_score, cubic_cls_labels, recalls], feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) pred_tp_cnt = pred_tp_cnt + recalls_[1] gt_cnt = gt_cnt + recalls_[2] cubic_class = cubic_cls_score_.argmax(axis=1) one_hist = fast_hist(cubic_cls_labels_, cubic_class) if not math.isnan(one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1])): if not math.isnan( one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0])): hist += one_hist if cfg.TRAIN.VISUAL_VALID: print 'Valid step: {:d}/{:d} , rpn recall = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(recalls_[1]) / recalls_[2]) print( ' class bg precision = {:.3f} recall = {:.3f}' .format( (one_hist[0, 0] / (one_hist[0, 0] + one_hist[1, 0] + 1e-6)), (one_hist[0, 0] / (one_hist[0, 0] + one_hist[0, 1] + 1e-6))) ) print( ' class car precision = {:.3f} recall = {:.3f}' .format( (one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-6)), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-6))) ) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) precise_total = hist[1, 1] / (hist[1, 1] + hist[0, 1] + 1e-6) recall_total = hist[1, 1] / (hist[1, 1] + hist[1, 0] + 1e-6) recall_rpn = pred_tp_cnt / gt_cnt valid_summary = tf.summary.merge([ rpn_recall_smy_op, cubic_recall_smy_op, cubic_prec_smy_op ]) valid_res = sess.run(valid_summary, feed_dict={ epoch_rpn_recall: recall_rpn, epoch_cubic_recall: recall_total, epoch_cubic_precise: precise_total }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}: rpn_recall {:.3f} cubic_precision = {:.3f} cubic_recall = {:.3f}'\ .format(epo_cnt + 1,recall_rpn,precise_total,recall_total) random.shuffle(training_series) # shuffle the training series print 'Training process has done, enjoy every day !'
def training(self, sess, train_writer): with tf.name_scope('loss_cubic'): cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) cubic_cls_labels = tf.reshape( tf.cast(self.net.get_output('rpn_rois')[:, -2], tf.int64), [-1]) if not cfg.TRAIN.FOCAL_LOSS: cubic_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cubic_cls_score, labels=cubic_cls_labels)) else: # alpha = [0.75,0.25] # 0.25 for label=1 gamma = 2 cubic_cls_probability = tf.nn.softmax(cubic_cls_score) # formula : Focal Loss for Dense Object Detection: FL(p)= -((1-p)**gama)*log(p) cubic_cross_entropy = tf.reduce_mean(-tf.reduce_sum( tf.one_hot(cubic_cls_labels, depth=2) * ((1 - cubic_cls_probability)**gamma) * tf.log([cfg.EPS, cfg.EPS] + cubic_cls_probability), axis=1)) loss = cubic_cross_entropy with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.996, name='decay-Lr') train_op = tf.train.AdamOptimizer(lr).minimize( loss, global_step=global_step) with tf.name_scope('train_cubic'): tf.summary.scalar('total_loss', loss) # bv_anchors = self.net.get_output('rpn_anchors_label')[2] # roi_bv = self.net.get_output('rpn_rois')[0] # data_bv = self.net.lidar_bv_data # data_gt = self.net.gt_boxes_bv # image_rpn = tf.reshape(show_rpn_tf(data_bv, data_gt, bv_anchors, roi_bv), (1, 601, 601, -1)) # tf.summary.image('lidar_bv_test', image_rpn) glb_var = tf.global_variables() for i in range(len(glb_var)): # print glb_var[i].name if 'moving' not in str(glb_var[i].name): if 'Adam' not in str(glb_var[i].name): if 'weights' not in str(glb_var[i].name): if 'rpn' not in str(glb_var[i].name): if 'biases' not in str(glb_var[i].name): if 'beta' not in str(glb_var[i].name): if 'gamma' not in str(glb_var[i].name): if 'batch' not in str( glb_var[i].name): tf.summary.histogram( glb_var[i].name, glb_var[i]) merged = tf.summary.merge_all() with tf.name_scope('valid_cubic'): epoch_rpn_recall = tf.placeholder(dtype=tf.float32) rpn_recall_smy_op = tf.summary.scalar('rpn_recall', epoch_rpn_recall) epoch_cubic_recall = tf.placeholder(dtype=tf.float32) cubic_recall_smy_op = tf.summary.scalar('cubic_recall', epoch_cubic_recall) epoch_cubic_precise = tf.placeholder(dtype=tf.float32) cubic_prec_smy_op = tf.summary.scalar('cubic_precise', epoch_cubic_precise) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: if True: # #full graph restore print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load(self.args.weights, sess, self.saver, True) else: # #part graph restore # # METHOD one # ref_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=['vgg_feat_fc']) # saver1 = tf.train.Saver(ref_vars) # saver1.restore(sess, self.args.weights) # # METHOD two reader = pywrap_tensorflow.NewCheckpointReader( self.args.weights) var_to_shape_map = reader.get_variable_to_shape_map() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to training: ', trainable_var_for_chk timer = Timer() rpn_rois = self.net.get_output('rpn_rois') cubic_grid = self.net.get_output('cubic_grid') cubic_cnn = self.net.get_output('cubic_cnn') if DEBUG: vispy_init( ) # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow # vision_qt = Process(target=pcd_vispy_client, args=(MSG_QUEUE,)) # vision_qt.start() # print 'Process vision_qt started ...' training_series = range(self.epoch) # self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'] } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() cubic_cls_score_, cubic_cls_labels_, rpn_rois_, cubic_cnn_, cubic_grid_, loss_, merged_, _ = sess.run( [ cubic_cls_score, cubic_cls_labels, rpn_rois, cubic_cnn, cubic_grid, loss, merged, train_op ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cubic_result = cubic_cls_score_.argmax(axis=1) one_hist = fast_hist(cubic_cls_labels_, cubic_result) cubic_car_cls_prec = one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-5) cubic_car_cls_recall = one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-5) if iter % 1000 == 0 and cfg.TRAIN.DEBUG_TIMELINE: #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-StiData-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d / %d, loss: %.3f' % ( iter, self.args.epoch_iters * self.epoch, loss_, ) print 'Cubic classify precise: {:.3f} recall: {:.3f}'.format( cubic_car_cls_prec, cubic_car_cls_recall) print 'Speed: {:.3f}s / iter'.format(timer.average_time) print 'divine: ', cubic_result print 'labels: ', cubic_cls_labels_ if iter % 10 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: self.snapshot(sess, iter) pass if DEBUG: scan = blobs['lidar3d_data'] gt_box3d = blobs['gt_boxes_3d'][:, (0, 1, 2, 3, 4, 5, 6)] gt_box3d = np.hstack( (gt_box3d, np.ones([gt_box3d.shape[0], 2]) * 4)) pred_boxes = np.hstack( (rpn_rois_, cubic_result.reshape(-1, 1) * 2)) bbox = np.vstack((pred_boxes, gt_box3d)) # msg = msg_qt(scans=scan, boxes=bbox,name='CubicNet training') # MSG_QUEUE.put(msg) pcd_vispy(scan, boxes=bbox, name='CubicNet training') random.shuffle(training_series) # shuffle the training series if cfg.TRAIN.USE_VALID: with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) # roi_bv = self.net.get_output('rpn_rois')[0] # bv_anchors = self.net.get_output('rpn_anchors_label')[2] # pred_rpn_ = show_rpn_tf(self.net.lidar_bv_data, self.net.gt_boxes_bv, bv_anchors, roi_bv) # pred_rpn = tf.reshape(pred_rpn_,(1, 601, 601, -1)) # predicted_bbox = tf.summary.image('predict_bbox_bv', pred_rpn) # valid_result = tf.summary.merge([predicted_bbox]) recalls = self.net.get_output('rpn_rois')[2] pred_tp_cnt, gt_cnt = 0., 0. hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS), dtype=np.float32) for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } cubic_cls_score_, cubic_cls_labels_, recalls_ = sess.run( [cubic_cls_score, cubic_cls_labels, recalls], feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) pred_tp_cnt = pred_tp_cnt + recalls_[1] gt_cnt = gt_cnt + recalls_[2] cubic_class = cubic_cls_score_.argmax(axis=1) one_hist = fast_hist(cubic_cls_labels_, cubic_class) if not math.isnan(one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1])): if not math.isnan( one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0])): hist += one_hist if cfg.TRAIN.VISUAL_VALID: print 'Valid step: {:d}/{:d} , rpn recall = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(recalls_[1]) / recalls_[2]) print( ' class bg precision = {:.3f} recall = {:.3f}' .format((one_hist[0, 0] / (one_hist[0, 0] + one_hist[1, 0])), (one_hist[0, 0] / (one_hist[0, 0] + one_hist[0, 1])))) print( ' class car precision = {:.3f} recall = {:.3f}' .format((one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1])), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0])))) precise_total = hist[1, 1] / (hist[1, 1] + hist[0, 1]) recall_total = hist[1, 1] / (hist[1, 1] + hist[1, 0]) recall_rpn = pred_tp_cnt / gt_cnt valid_summary = tf.summary.merge([ rpn_recall_smy_op, cubic_recall_smy_op, cubic_prec_smy_op ]) valid_res = sess.run(valid_summary, feed_dict={ epoch_rpn_recall: recall_rpn, epoch_cubic_recall: recall_total, epoch_cubic_precise: precise_total }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}: rpn_recall {:.3f} cubic_precision = {:.3f} cubic_recall = {:.3f}'\ .format(epo_cnt + 1,recall_rpn,precise_total,recall_total) self.snapshot(sess, iter, final=True) print 'Training process has done, enjoy every day !'
def test_epoch(self): self.model.train() # vis = visdom.Visdom(server="http://localhost", port=8888) check_i = 0; _t = Timer() df = pd.read_csv('../sample_submission.csv') # epoch_size = int( len(self.test_loader) ) # batch_iterator = iter(self.test_loader) self.idx_df = int(0) df = df.astype({"Predicted": str}) epoch_size = int( len(self.test_loader) ) print('epoch_size ', epoch_size) for i, row in df.iterrows(): self.idx_df = i break print('start idx ', self.idx_df) for images , name_list in self.test_loader: # print('images ', images.shape) # if len (images) == 1: # continue if self.use_gpu: images = Variable(images.cuda()) out = self.model(images, phase='train') # print('out ', out) for i_im, imname in enumerate(name_list): df.set_value(self.idx_df,'Id', imname ) data = out[i_im] result_all = [] print(' pre ', data) for t_i, tar_rat in enumerate( data): if tar_rat >=0.3 and self.config.v('check_id_list')[t_i] not in minor_type_class: if self.config.v('check_id_list')[t_i] == 0: if tar_rat >=0.4: result_all.append(self.config.v('check_id_list')[t_i]) else: result_all.append(self.config.v('check_id_list')[t_i]) result_xgb = self.xgb_test_result[self.idx_df] print('idx ', self.idx_df, 'result_xgb ', result_xgb) for r_x in result_xgb: result_all.append(r_x) # print('da ', data.float()) result = '' # cla = data.argmax(0).item() # result = str( self.config.v('check_id_list')[ cla]) if len(result_all) > 0: result = str(result_all[0]) if len(result_all) > 1: for r in result_all[1: ]: result += ' ' result += str(r) if len(result) == 0: result = '0' print('idx ', self.idx_df, 'print none ------') print('idx ', self.idx_df, 'result ', result) df.set_value(self.idx_df, 'Predicted', result) self.idx_df += 1; # check_i += 1 df.to_csv('pred.csv', index=None) df.head(10) print('Evaluating detections')
def get_minibatch(self, _idx=0, name='train'): """Given a roidb, construct a minibatch sampled from it.""" if name == 'train': index_dataset = self.train_set elif name == 'valid': index_dataset = self.valid_set else: index_dataset = self.test_set fname = index_dataset[_idx]['files_name'] timer = Timer() timer.tic() lidar_data = pcd2np.from_path( path_add(self.data_path, fname.split('/')[0], 'pcd', fname.split('/')[1])) angel = (np_random.rand() - 0.500) * np.pi * 0.95 points_rot = self.rotation(lidar_data.pc_data, angel) boxes_rot = np.add(index_dataset[_idx]['boxes_labels'], [0., 0., 0., 0., 0., 0., angel, 0.]) # yaw category_rot = self.label_rotation( index_dataset[_idx]['object_labels'], degree=angel * 57.29578) timer.toc() time1 = timer.average_time timer.tic() grid_voxel = voxel_grid(points_rot, cfg, thread_sum=cfg.CPU_CNT) timer.toc() time2 = timer.average_time timer.tic() apollo_8feature = np.load( path_add(self.data_path, fname.split('/')[0], 'feature_pcd_name', fname.split('/')[1][0:-4] + '.npy')).reshape( -1, cfg.CUBIC_SIZE[0], cfg.CUBIC_SIZE[1], 8) apollo_8feature_rot = self.apollo_feature_rotation(apollo_8feature, degree=angel * 57.29578) timer.toc() time3 = timer.average_time blob = dict({ 'serial_num': fname, 'voxel_gen_time': (time1, time2, time3), 'lidar3d_data': np.hstack((points_rot, lidar_data.pc_data[:, 3:4])), 'boxes_labels': boxes_rot, 'object_labels': category_rot, 'grid_stack': grid_voxel['feature_buffer'], 'coord_stack': grid_voxel['coordinate_buffer'], 'ptsnum_stack': grid_voxel['number_buffer'], 'apollo_8feature': apollo_8feature_rot, }) return blob
# save file if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) fw = open(os.path.join(args.save_folder, args.dataset + '_dets.txt'), 'w') # testing dataset testset_folder = os.path.join('data', args.dataset, 'images/') testset_list = os.path.join('data', args.dataset, 'img_list.txt') with open(testset_list, 'r') as fr: test_dataset = fr.read().split() num_images = len(test_dataset) # testing scale resize = 1 _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin for i, img_name in enumerate(test_dataset): image_path = testset_folder + img_name + '.jpg' img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device)
def test_epoch(self): self.model.train() test_image_dir = os.path.join('../', 'test/') # vis = visdom.Visdom(server="http://localhost", port=8888) check_i = 0 _t = Timer() df = pd.DataFrame(columns=["Id", "Predicted"]) self.idx_df = 0 test_image_merge_list = self.get_testimg_merge_list(test_image_dir) banch_num = int(self.config.v('batch_size')) img_list = [] name_list = [] print('len ', len(test_image_merge_list)) for i, img_name in enumerate(test_image_merge_list): img = self.get_merge_image(test_image_dir + img_name) img = Variable(img, volatile=True) if self.use_gpu: img = img.cuda() if i % banch_num > 0 and i <= (len(test_image_merge_list) - 1): img_list.append(img.unsqueeze(0)) name_list.append(img_name) if i < (len(test_image_merge_list) - 1): continue if i % banch_num == 0: if i == 0: img_list.append(img.unsqueeze(0)) name_list.append(img_name) continue # images = images.unsqueeze(0) _t.tic() img_list = torch.cat(img_list, 0) # if check_i == 3: vis.images(img_list[0], win=2, opts={'title': 'Reals'}) self.visTest(self.model, img_list[0], self.priorbox, self.writer, 1, self.use_gpu) # print('imglist ', img_list.shape) out = self.model(img_list, phase='eval') # print('out ', out) for i_im, imname in enumerate(name_list): df.set_value(self.idx_df, 'Id', imname) data = out[i_im] result = '' cla = data.argmax(0).item() result = str(cla) data[cla] = 0 cla = data.argmax(0).item() if data[cla] > 0.5: result += ' ' result += str(cla) df.set_value(self.idx_df, 'Predicted', result) self.idx_df += 1 img_list = [] img_list.append(img.unsqueeze(0)) name_list = [] name_list.append(img_name) # check_i += 1 df.to_csv('pred.csv', index=None) df.head(10) print('Evaluating detections')
def train_per_epoch(self, epoch): epoch_size = int(len(self.train_loader)) batch_iterator = iter(self.train_loader) train_end = int(epoch_size * 0.8) print('epoch_size ', epoch_size, " train_end ", train_end) conf_loss = 0 _t = Timer() conf_loss_v = 0 for iteration in range(epoch_size): images, targets = next(batch_iterator) # print('imgs from data_load shape ', images.shape) targets = np.array(targets) # print('iteration ', iteration) if iteration > train_end and iteration < train_end + 10: if self.use_gpu: images = Variable(images.cuda()) self.visualize_epoch(images, epoch) if iteration <= train_end: if self.use_gpu: images = Variable(images.cuda()) # targets = [Variable(anno.cuda(), volatile=True) for anno in targets] else: images = Variable(images) self.model.train() #train: _t.tic() out = self.model(images, phase='train', targets=targets) self.optimizer.zero_grad() # print('out ', out) # print('targets ', targets.shape) loss_c = self.criterion(out, targets) # some bugs in coco train2017. maybe the annonation bug. if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 100000000: # continue loss_c.backward() self.optimizer.step() time = _t.toc() conf_loss += loss_c.data[0] # log per iter log = '\r==>Train: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() if iteration == train_end: # log per epoch sys.stdout.write('\r') sys.stdout.flush() lr = self.optimizer.param_groups[0]['lr'] log = '\r==>Train: || Total_time: {time:.3f}s || conf_loss: {conf_loss:.4f} || lr: {lr:.6f}\n'.format( lr=lr, time=_t.total_time, conf_loss=conf_loss / epoch_size) sys.stdout.write(log) sys.stdout.flush() # print(log) # log for tensorboard self.writer.add_scalar('Train/conf_loss', conf_loss / epoch_size, epoch) self.writer.add_scalar('Train/lr', lr, epoch) conf_loss = 0 if iteration > train_end: # self.visualize_epoch(model, images[0], targets[0], self.priorbox, writer, epoch, use_gpu) #eval: if self.use_gpu: images = Variable(images.cuda()) else: images = Variable(images) # self.model.eval() out = self.model(images, phase='eval') # loss loss_c = self.criterion(out, targets) if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 100000000: # continue time = _t.toc() conf_loss_v += loss_c.data[0] # log per iter log = '\r==>Eval: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) #print(log) sys.stdout.write(log) sys.stdout.flush() # self.writer.add_scalar('Eval/conf_loss', conf_loss_v/epoch_size, epoch) if iteration == (epoch_size - 1): # eval mAP # prec, rec, ap = cal_pr(label, score, npos) # log per epoch sys.stdout.write('\r') sys.stdout.flush() log = '\r==>Eval: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() # log for tensorboard self.writer.add_scalar('Eval/conf_loss', conf_loss_v / epoch_size, epoch)
dataset_folder = args.dataset_folder anno_file = "label.txt" # read images from annotation file image_list = [] with open(os.path.join(dataset_folder, anno_file), 'r') as f: lines = f.readlines() for line in lines: line = line.strip() if line.startswith('#'): line = line[2:] image_list.append(line) num_images = len(image_list) timer = {'forward_pass': Timer(), 'misc': Timer()} for i, img_name in enumerate(image_list): image_path = os.path.join(dataset_folder, "images", img_name) img_bgr = cv2.imread(image_path) img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) img = np.float32(img) im_height, im_width, _ = img.shape cfg.DATA.image_size = img.shape[0:2] img -= cfg.DATA.rgb_mean img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device)
def train_per_epoch(self, epoch): conf_loss = 0 _t = Timer() conf_loss_v = 0 epoch_size = int( len(self.train_loader) ) train_end = int( epoch_size); batch_iterator = iter(self.train_loader) # print('epoch_size ', epoch_size, " train_end ", train_end) for iteration in range(epoch_size): images, targets,targets_src = next(batch_iterator) # print('images ', images.shape) if len (images) == 1: continue # print('imgs from data_load shape ', images.shape) targets = np.array(targets) # print('iteration ', iteration) if iteration == (train_end - 2): if self.use_gpu: images = Variable(images.cuda()) self.visualize_epoch(images, epoch) if iteration <= train_end: if self.use_gpu: images = Variable(images.cuda()) # targets = [Variable(anno.cuda(), volatile=True) for anno in targets] else: images = Variable(images) self.model.train() #train: _t.tic() # print('---img shape 2 ', images.shape) out = self.model(images, phase='train') self.optimizer.zero_grad() # print('tr_out ', out) # print('targets ', targets.shape) loss_c = self.criterion(out, targets) # some bugs in coco train2017. maybe the annonation bug. if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 10000: # continue loss_c.backward() self.optimizer.step() time = _t.toc() conf_loss += loss_c.data[0] # log per iter log = '\r==>Train_class{}: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format(self.train_class, prograss='#'*int(round(10*iteration/epoch_size)) + '-'*int(round(10*(1-iteration/epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() if iteration == (train_end-2): # log per epoch sys.stdout.write('\r') sys.stdout.flush() lr = self.optimizer.param_groups[0]['lr'] log = '\r==>Train: || Total_time: {time:.3f}s || conf_loss: {conf_loss:.4f} || lr: {lr:.6f}\n'.format(lr=lr, time=_t.total_time, conf_loss=conf_loss/epoch_size) sys.stdout.write(log) sys.stdout.flush() # print(log) # log for tensorboard title = str(self.train_class) + '/conf_loss' # title = str(self.train_class)+'/conf_loss' self.writer.add_scalar(title, conf_loss/epoch_size, epoch) title = str(self.train_class) + '/lr' self.writer.add_scalar(title, lr, epoch) conf_loss = 0 val_epoch_size = int( len(self.val_loader) ) val_batch_iterator = iter(self.val_loader) pre_for_f1 = [] t_for_f1 = [] for iteration in range(val_epoch_size): images, targets, tar_srcs = next(val_batch_iterator) if iteration < (val_epoch_size - 1): # self.visualize_epoch(model, images[0], targets[0], self.priorbox, writer, epoch, use_gpu) #eval: # print('tar_srcs ', tar_srcs) targets = np.array(targets) if self.use_gpu: images = Variable(images.cuda()) else: images = Variable(images) self.model.eval() out = self.model(images, phase='eval') # loss loss_c = self.criterion(out, targets) if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 100000000: # continue print('out ', out) for i_ys, ys in enumerate( out ): tail = '' mid = '' t_val = 0 targets_t = [int (tthis) for tthis in tar_srcs[i_ys].split(' ')] if self.train_class in targets_t: tail = '-----------' t_val = 1 t_for_f1.append(t_val) if ys[1] >= 0.5: mid = '||||||||' pre_for_f1.append(1) print('ci ', self.train_class, ' i_ys ', i_ys, ' pre ' , ys[1], mid, ' t ', tar_srcs[i_ys], tail) else: pre_for_f1.append(0) print('ci ', self.train_class, ' i_ys ', i_ys, ' pre ' , ys[1], ' t ', tar_srcs[i_ys], tail) time = _t.toc() conf_loss_v += loss_c.data[0] # log per iter log = '\r==>Eval_class{}: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format(self.train_class, prograss='#'*int(round(10*iteration/val_epoch_size)) + '-'*int(round(10*(1-iteration/val_epoch_size))), iters=iteration, epoch_size=val_epoch_size, time=time, cls_loss=loss_c.data[0]) #print(log) sys.stdout.write(log) sys.stdout.flush() # self.writer.add_scalar('Eval/conf_loss', conf_loss_v/epoch_size, epoch) # if iteration == (val_epoch_size - 1): # eval mAP # prec, rec, ap = cal_pr(label, score, npos) # log per epoch sys.stdout.write('\r') sys.stdout.flush() log = '\r==>Eval: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#'*int(round(10*iteration/val_epoch_size)) + '-'*int(round(10*(1-iteration/val_epoch_size))), iters=iteration, epoch_size=val_epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() # log for tensorboard title = str(self.train_class) +'/e_conf_loss' self.writer.add_scalar(title, conf_loss_v/epoch_size, epoch) f1 = f1_score(t_for_f1, pre_for_f1, average = "macro") print('c--- ',self.train_class, '---------f1 ',f1) title = str(self.train_class) + '/f' # title = str(self.train_class) + '/f' self.writer.add_scalar(title, f1, epoch)
def training(self, sess): sess.run(tf.global_variables_initializer()) reader = pywrap_tensorflow.NewCheckpointReader(self.weights) var_to_shape_map = reader.get_variable_to_shape_map() glb_var = tf.global_variables() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key timer = Timer() vispy_init() res = [] input_series = [] merge_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(cfg.LOG_DIR, sess.graph, max_queue=1000, flush_secs=1) loop_parameters = np.arange(-90, 90, 1) data_id = 1 box_cnt = 0 for data_idx in loop_parameters: # DO NOT EDIT the "training_series",for the latter shuffle run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() debug_mod = True if data_idx == 0 else False # debug_mod = True feed_dict = self.cubic_rpn_grid( data_id, box_idx=box_cnt, angel=data_idx, scalar=1.00, #float(data_idx)/180.*1.0, translation=[0, 0, 0], DEBUG=debug_mod) timer.tic() img_tf_, cubic_theta_, merge_op_ = sess.run( [self.cubic_theta.img_tf, self.cubic_theta.res, merge_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() input_series.append(img_tf_) res.append(cubic_theta_[0] * 180 / 3.1415926) # print 'rotation: {:3d} score: {:>8,.7f} {:>8,.7f} result: {}'.format(data_idx,cubic_cls_score_[0,0],cubic_cls_score_[0,1],cubic_result[0]) train_writer.add_summary(merge_op_, data_idx) imge_op = tf.summary.image("imagesss", np.array(input_series, dtype=np.float32).reshape( -1, 30, 30, 1), max_outputs=180) imge_op_ = sess.run(imge_op) train_writer.add_summary(imge_op_, 1) plt.plot(loop_parameters, res) plt.grid(True, color='black', linestyle='--', linewidth='1') plt.title('Car_{}_{}'.format(data_id, box_cnt)) plt.xlabel('gt_yaw+') plt.ylabel('pred-yaw') plt.legend(['positive']) plt.savefig('Roation_of_Car2.png') xmajorLocator = MultipleLocator(10) # 将x主刻度标签设置为20的倍数 xmajorFormatter = FormatStrFormatter('%1.0f') # 设置x轴标签文本的格式 xminorLocator = MultipleLocator(5) # 将x轴次刻度标签设置为5的倍数 ymajorLocator = MultipleLocator(10) # 将y轴主刻度标签设置为0.5的倍数 ymajorFormatter = FormatStrFormatter('%1.0f') # 设置y轴标签文本的格式 yminorLocator = MultipleLocator(5) # 将此y轴次刻度标签设置为0.1的倍数 ax = plt.axes() # 设置主刻度标签的位置,标签文本的格式 ax.xaxis.set_major_locator(xmajorLocator) ax.xaxis.set_major_formatter(xmajorFormatter) ax.yaxis.set_major_locator(ymajorLocator) ax.yaxis.set_major_formatter(ymajorFormatter) # 显示次刻度标签的位置,没有标签文本 ax.xaxis.set_minor_locator(xminorLocator) ax.yaxis.set_minor_locator(yminorLocator) ax.xaxis.grid(True, which='major') # x坐标轴的网格使用主刻度 ax.yaxis.grid(True, which='minor') # y坐标轴的网格使用次刻度 plt.show()
def training(self, sess, train_writer): with tf.name_scope('loss_function'): RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] RNet_rpn_yaw_gt = self.net.get_output( 'rpn_rois' )[1][:, -1] #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw RNet_rpn_yaw_gt_new = RNet_rpn_yaw_gt - RNet_rpn_yaw_gt_delta RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred + RNet_rpn_yaw_gt_delta rpn_cls_labels = self.net.get_output( 'rpn_rois' )[1][:, -2] #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw RNet_rpn_yaw_pred = self.angle_trans(RNet_rpn_yaw_pred) RNet_rpn_yaw_gt_new = self.angle_trans(RNet_rpn_yaw_gt_new) debug_pred = tf.multiply(rpn_cls_labels, self.angle_trans(RNet_rpn_yaw_pred)) debug_gt = tf.multiply(rpn_cls_labels, self.angle_trans(RNet_rpn_yaw_gt_new)) tower_l1_loss = self.Rnet_modified_smooth_l1( sigma=3, bbox_pred=RNet_rpn_yaw_pred, bbox_targets=RNet_rpn_yaw_gt_new) tower_l1_loss_keep_positive = tf.multiply(rpn_cls_labels, tower_l1_loss) loss = tf.reduce_sum(tower_l1_loss_keep_positive) / ( 1e-5 + tf.reduce_sum( tf.cast(tf.not_equal(tower_l1_loss_keep_positive, 0.0), dtype=tf.float32))) with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.90, name='decay-Lr') Optimizer = tf.train.AdamOptimizer(lr) var_and_grad = Optimizer.compute_gradients( loss, var_list=tf.trainable_variables()) train_op = Optimizer.minimize(loss, global_step=global_step) with tf.name_scope('debug_board'): tf.summary.scalar('total_loss', loss) glb_var = tf.trainable_variables() for i in range(len(glb_var)): tf.summary.histogram(glb_var[i].name, glb_var[i]) tf.summary.image('theta', self.net.get_output('RNet_theta')[0], max_outputs=50) merged = tf.summary.merge_all() #hxd: before the next summary ops with tf.name_scope('epoch_valid'): epoch_cube_theta = tf.placeholder(dtype=tf.float32) epoch_cube_theta_sum_op = tf.summary.scalar( 'valid_los', epoch_cube_theta) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: if True: # #full graph restore print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load(self.args.weights, sess, self.saver, True) else: # #part graph restore # # METHOD one # ref_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=['vgg_feat_fc']) # saver1 = tf.train.Saver(ref_vars) # saver1.restore(sess, self.args.weights) # # METHOD two reader = pywrap_tensorflow.NewCheckpointReader( self.args.weights) var_to_shape_map = reader.get_variable_to_shape_map() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to train: ', trainable_var_for_chk timer = Timer() rpn_rois_3d = self.net.get_output('rpn_rois')[1] if DEBUG: pass # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow vispy_init() i = 0 training_series = range(10) #self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'], } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() # debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,loss_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_,_ = \ # sess.run([debug_pred,tower_l1_loss_keep_positive,RNet_rpn_yaw_gt_delta,rpn_rois_3d,loss,RNet_rpn_yaw_pred_toshow,debug_gt,merged,train_op,] # ,feed_dict=feed_dict,options=run_options, run_metadata=run_metadata) debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_, = \ sess.run([debug_pred,tower_l1_loss_keep_positive,RNet_rpn_yaw_gt_delta,rpn_rois_3d,RNet_rpn_yaw_pred_toshow,debug_gt,merged,] ,feed_dict=feed_dict,options=run_options, run_metadata=run_metadata) loss_ = 0 timer.toc() if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, Speed: %.3fs/iter, Loss: %.3f ' % ( iter, self.args.epoch_iters * self.epoch, blobs['serial_num'], timer.average_time, loss_) print 'theta_delta: ', for i in range(50): if delta_[i] != 0.0: print '%6.3f' % (delta_[i]), print '\nPredicted angle: ', for j in range(50): if debug_pred_[j] != 0.0: print '%6.3f' % (debug_pred_[j]), print '\nGt yaw angle: ', for j in range(50): if debug_gt_[j] != 0.0: print '%6.3f' % (debug_gt_[j]), print '\n' if iter % 20 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter == 100): #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if DEBUG: scan = blobs['lidar3d_data'] cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N], dtype=np.float32) * 0 boxes = BoxAry_Theta( gt_box3d=blobs['gt_boxes_3d'], pre_box3d=rpn_rois_3d_, pre_theta_value=RNet_rpn_yaw_pred_toshow_, pre_cube_cls=cubic_cls_value ) # RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] pcd_vispy(scan, boxes=boxes, name='CubicNet training', index=i, vis_size=(800, 600), save_img=False, visible=False) i += 1 if cfg.TRAIN.EPOCH_MODEL_SAVE: #iter % 2000==0 and : self.snapshot(sess, iter) pass if cfg.TRAIN.USE_VALID and True: #TODO: to complete the valid process with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) valid_loss_total = 0.0 for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'], } loss_valid = sess.run(loss, feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) valid_loss_total += loss_valid if cfg.TRAIN.VISUAL_VALID and data_idx % 20 == 0: print 'Valid step: {:d}/{:d} , theta_loss = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(loss_valid)) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) valid_summary = tf.summary.merge([epoch_cube_theta_sum_op]) valid_res = sess.run(valid_summary, feed_dict={ epoch_cube_theta: float(valid_loss_total) / self.val_epoch }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}:theta_loss_total = {:.3f}\n'\ .format(epo_cnt + 1,float(valid_loss_total)/self.val_epoch) random.shuffle(training_series) # shuffle the training series print 'Training process has done, enjoy every day !'
model.eval() question_ids, soft_max_result = run_model(model, data_reader_test, ans_dic.UNK_idx) print_result( question_ids, soft_max_result, ans_dic, out_file, json_only=False, pkl_res_file=pkl_res_file, ) if __name__ == "__main__": prg_timer = Timer() args = parse_args() config_file = args.config seed = args.seed if args.seed > 0 else random.randint(1, 100000) process_config(config_file, args.config_overwrite) torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) basename = "default" if args.config is None else os.path.basename( args.config) cmd_cfg_obj = (demjson.decode(args.config_overwrite) if args.config_overwrite is not None else None)
def training(self, sess): with tf.name_scope('loss_cube'): cube_score = self.network.cube_score cube_label = self.network.cube_label if self.arg.focal_loss: alpha = [1.0, 1.0] gamma = 2 cube_probi = tf.nn.softmax(cube_score) tmp = tf.one_hot(cube_label, depth=2) * ( (1 - cube_probi)** gamma) * tf.log([cfg.EPS, cfg.EPS] + cube_probi) * alpha cube_cross_entropy = tf.reduce_mean( -tf.reduce_sum(tmp, axis=1)) else: cube_probi = tf.nn.softmax(cube_score) # use for debug tmp = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cube_score, labels=cube_label) cube_cross_entropy = tf.reduce_mean(tmp) loss = cube_cross_entropy with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(self.arg.lr, global_step, 1000, 0.90, name='decay-Lr') train_op = tf.train.MomentumOptimizer(lr, momentum=0.9).minimize( loss, global_step=global_step) with tf.name_scope('train_cubic'): extractor_int = self.network.extractor_int extractor_float = self.network.extractor_weighs_float extractor_outs = self.network.extractor_outs #(160, 30, 30, 15, 32) # extractor_F_grad = tf.gradients(loss, extractor_float) # extractor_Int_grad = tf.gradients(loss, extractor_int) # conv1_grad = tf.gradients(loss, self.network.conv1) # conv2_grad = tf.gradients(loss, self.network.conv2) # conv3_grad = tf.gradients(loss, self.network.conv3) # fc1_grad = tf.gradients(loss, self.network.fc1) # fc2_grad = tf.gradients(loss, self.network.fc2) watch_data_idx = 0 inputs_cube = tf.reshape( tf.reduce_sum(tf.squeeze( self.network.cube_input[watch_data_idx, ...]), axis=-1, keep_dims=True), [-1, 30, 30, 1]) tf.summary.image('extractor_int', tf.reshape(extractor_int, [1, 27, -1, 1])) data0_kernel0_outs = tf.transpose( tf.reshape(extractor_outs[0, :, :, 2, :], [1, 30, 30, -1]), [3, 1, 2, 0]) data0_kernel1_outs = tf.transpose( tf.reshape(extractor_outs[1, :, :, 2, :], [1, 30, 30, -1])) data0_kernel2_outs = tf.transpose( tf.reshape(extractor_outs[2, :, :, 2, :], [1, 30, 30, -1])) data0_kernel3_outs = tf.transpose( tf.reshape(extractor_outs[3, :, :, 2, :], [1, 30, 30, -1])) tf.summary.image('extractor_inputs_cube', inputs_cube) tf.summary.image('extractor_outs1', data0_kernel0_outs, max_outputs=50) # tf.summary.image('extractor_outs2', data0_kernel1_outs,max_outputs=50) # tf.summary.image('extractor_outs3', data0_kernel2_outs,max_outputs=50) # tf.summary.image('extractor_outs2', data0_kernel3_outs,max_outputs=50) # tf.summary.image('extractor_two', tf.reshape(tf.transpose(extractor_int),[32,9,3,1])) # tf.summary.image('extractor_float', tf.reshape(extractor_float, [-1, 27, 32, 1])) # tf.summary.image('conv1_kernel', tf.reshape(self.network.conv1[0], [-1, 27, 32, 1]), max_outputs=3) # tf.summary.image('conv2_kernel', tf.reshape(self.network.conv2[0], [-1, 27, 64, 1]), max_outputs=3) # tf.summary.image('conv3_kernel', tf.reshape(self.network.conv3[0], [-1, 27, 128, 1]), max_outputs=3) # # tf.summary.histogram('float_grad', extractor_F_grad) # tf.summary.histogram('Int_grad', extractor_Int_grad) # tf.summary.histogram('conv1_grad', conv1_grad[0]) # tf.summary.histogram('conv2_grad', conv2_grad[0]) # tf.summary.histogram('conv3_grad', conv3_grad[0]) # tf.summary.histogram('fc1_grad', fc1_grad[0]) # tf.summary.histogram('fc2_grad', fc2_grad[0]) tf.summary.scalar('total_loss', loss) glb_var = tf.global_variables() # for var in glb_var: # tf.summary.histogram(var.name, var) merged_op = tf.summary.merge_all() with tf.name_scope('valid_cubic'): epoch_cubic_recall = tf.placeholder(dtype=tf.float32) cubic_recall_smy_op = tf.summary.scalar('cubic_recall', epoch_cubic_recall) epoch_cubic_precise = tf.placeholder(dtype=tf.float32) cubic_precise_smy_op = tf.summary.scalar('cubic_precise', epoch_cubic_precise) epoch_extractor_occupy = tf.placeholder(dtype=tf.float32) cubic_occupy_smy_op = tf.summary.scalar('extractor_occupy', epoch_extractor_occupy) valid_summary_op = tf.summary.merge([ cubic_recall_smy_op, cubic_precise_smy_op, cubic_occupy_smy_op ]) with tf.name_scope('load_weights'): sess.run(tf.global_variables_initializer()) if self.arg.weights is not None: self.network.load_weigths(self.arg.weights, sess, self.saver) print 'Loading pre-trained model weights from {:s}'.format( red(self.arg.weights)) else: print 'The network will be {} from default initialization!'.format( yellow('re-trained')) timer = Timer() if DEBUG: pass vispy_init() cube_label_gt = np.concatenate( (np.ones([self.arg.batch_size]), np.zeros([self.arg.batch_size ]))).astype(np.int32) train_epoch_cnt = int(self.dataset.train_positive_cube_cnt / self.arg.batch_size / 2) training_series = range( train_epoch_cnt) # range(train_epoch_cnt) # train_epoch_cnt for epo_cnt in range(self.arg.epoch_iters): for data_idx in training_series: iter = global_step.eval() timer.tic() series = self.train_series_Gen(self.arg.batch_size, 'train') data_batchP = self.dataset.get_minibatch(series[0], data_type='train', classify='positive') data_batchN = self.dataset.get_minibatch(series[1], data_type='train', classify='negative') data_batch = np.vstack((data_batchP, data_batchN)) timer.toc() time1 = timer.average_time timer.tic() if self.arg.use_aug_data_method: data_aug = self.cube_augmentation(data_batch, aug_data=True, DEBUG=False) else: data_aug = data_batch timer.toc() time2 = timer.average_time if DEBUG: a = data_batch[data_idx].sum() b = data_batch[data_idx].sum() if a != b: print 'There is some points loss' else: print 'points cnt: ', a box_np_view(data_aug[data_idx], data_aug[data_idx + self.arg.batch_size]) feed_dict = { self.network.cube_input: data_aug, self.network.cube_label: cube_label_gt, } timer.tic() extractor_outs_,extractor_int_, extractor_float_, cube_probi_, cube_label_, loss_, merge_op_, _ = \ sess.run([extractor_outs, extractor_int, extractor_float, cube_probi, cube_label, loss, merged_op, train_op], feed_dict=feed_dict) timer.toc() # print extractor_outs_.shape,"Look here!" if iter % 4 == 0: predict_result = cube_probi_.argmax(axis=1) one_train_hist = fast_hist(cube_label_gt, predict_result) occupy_part_pos = (extractor_int_.reshape( -1) == 1.0).astype(float).sum() / extractor_int_.size occupy_part_neg = (extractor_int_.reshape( -1) == -1.0).astype(float).sum() / extractor_int_.size print 'Training step: {:3d} loss: {:.4f} occupy: +{}% vs -{}% inference_time: {:.3f} '. \ format(iter, loss_, int(occupy_part_pos * 100), int(occupy_part_neg * 100), timer.average_time) # print(' class bg precision = {:.3f} recall = {:.3f}'.format( # (one_train_hist[0, 0] / (one_train_hist[0, 0] + one_train_hist[1, 0] + 1e-6)), # (one_train_hist[0, 0] / (one_train_hist[0, 0] + one_train_hist[0, 1] + 1e-6)))) print ' class car precision = {:.3f} recall = {:.3f}'.format( (one_train_hist[1, 1] / (one_train_hist[1, 1] + one_train_hist[0, 1] + 1e-6)), (one_train_hist[1, 1] / (one_train_hist[1, 1] + one_train_hist[1, 0] + 1e-6))), '\n' if socket.gethostname() == "szstdzcp0325" and False: with self.printoptions(precision=2, suppress=False, linewidth=10000): print 'scores: {}'.format(cube_probi_[:, 1]) print 'divine:', str(predict_result) print 'labels:', str(cube_label_), '\n' if iter % 1 == 0 and cfg.TRAIN.TENSORBOARD: pass self.writer.add_summary(merge_op_, iter) if (iter % 3000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or iter == 200: if socket.gethostname() == "szstdzcp0325": run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _ = sess.run([cube_score], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) # chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format( show_memory=False)) trace_file.close() if epo_cnt % 10 == 0 and cfg.TRAIN.EPOCH_MODEL_SAVE: pass self.snapshot(sess, epo_cnt) if cfg.TRAIN.USE_VALID: with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS), dtype=np.float32) valid_epoch_cnt = int( self.dataset.valid_positive_cube_cnt / self.arg.batch_size / 2) for data_idx in range(valid_epoch_cnt): series = self.train_series_Gen(self.arg.batch_size, 'valid') data_batchP = self.dataset.get_minibatch( series[0], data_type='valid', classify='positive') data_batchN = self.dataset.get_minibatch( series[1], data_type='valid', classify='negative') data_batch = np.vstack((data_batchP, data_batchN)) feed_dict_ = { self.network.cube_input: data_batch, self.network.cube_label: cube_label_gt, } valid_cls_score_ = sess.run(cube_score, feed_dict=feed_dict_) valid_result = valid_cls_score_.argmax(axis=1) one_hist = fast_hist(cube_label_gt, valid_result) hist += one_hist if cfg.TRAIN.VISUAL_VALID: print 'Valid step: {:d}/{:d}'.format( data_idx + 1, valid_epoch_cnt) print( ' class bg precision = {:.3f} recall = {:.3f}' .format( (one_hist[0, 0] / (one_hist[0, 0] + one_hist[1, 0] + 1e-6)), (one_hist[0, 0] / (one_hist[0, 0] + one_hist[0, 1] + 1e-6))) ) print( ' class car precision = {:.3f} recall = {:.3f}' .format( (one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-6)), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-6))) ) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) valid_extractor_int_ = sess.run(extractor_int) extractor_occupy = valid_extractor_int_.sum( ) / valid_extractor_int_.size precise_total = hist[1, 1] / (hist[1, 1] + hist[0, 1] + 1e-6) recall_total = hist[1, 1] / (hist[1, 1] + hist[1, 0] + 1e-6) valid_res = sess.run(valid_summary_op, feed_dict={ epoch_cubic_recall: recall_total, epoch_cubic_precise: precise_total, epoch_extractor_occupy: extractor_occupy }) self.writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}: cubic_precision = {:.3f} cubic_recall = {:.3f}' \ .format(epo_cnt + 1, precise_total, recall_total) self.shuffle_series() print yellow('Training process has done, enjoy every day !')