def visualize(model, model_path, nr_visualize=100, output_dir='output'): """ Visualize some intermediate results (proposals, raw predictions) inside the pipeline. """ df = get_train_dataflow() # we don't visualize mask stuff df.reset_state() pred = OfflinePredictor( PredictConfig(model=model, session_init=get_model_loader(model_path), input_names=['image', 'gt_boxes', 'gt_labels'], output_names=[ 'generate_{}_proposals/boxes'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals/probs'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_probs', 'final_boxes', 'final_probs', 'final_labels', ])) if os.path.isdir(output_dir): shutil.rmtree(output_dir) utils.fs.mkdir_p(output_dir) with tqdm.tqdm(total=nr_visualize) as pbar: for idx, dp in itertools.islice(enumerate(df.get_data()), nr_visualize): img = dp[0] if cfg.MODE_MASK: gt_boxes, gt_labels, gt_masks = dp[-3:] else: gt_boxes, gt_labels = dp[-2:] rpn_boxes, rpn_scores, all_probs, \ final_boxes, final_probs, final_labels = pred(img, gt_boxes, gt_labels) # draw groundtruth boxes gt_viz = draw_annotation(img, gt_boxes, gt_labels) # draw best proposals for each groundtruth, to show recall proposal_viz, good_proposals_ind = draw_proposal_recall( img, rpn_boxes, rpn_scores, gt_boxes) # draw the scores for the above proposals score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_probs[good_proposals_ind]) results = [ DetectionResult(*args) for args in zip(final_boxes, final_probs, final_labels, [None] * len(final_labels)) ] final_viz = draw_final_outputs(img, results) viz = tpviz.stack_patches( [gt_viz, proposal_viz, score_viz, final_viz], 2, 2) if os.environ.get('DISPLAY', None): tpviz.interactive_imshow(viz) cv2.imwrite("{}/{:03d}.png".format(output_dir, idx), viz) pbar.update()
def visualize(model, model_path, nr_visualize=100, output_dir='output'): """ Visualize some intermediate results (proposals, raw predictions) inside the pipeline. """ df = get_train_dataflow() # we don't visualize mask stuff df.reset_state() pred = OfflinePredictor(PredictConfig( model=model, session_init=get_model_loader(model_path), input_names=['image', 'gt_boxes', 'gt_labels'], output_names=[ 'generate_{}_proposals/boxes'.format('fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals/scores'.format('fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', 'output/labels', ])) if os.path.isdir(output_dir): shutil.rmtree(output_dir) utils.fs.mkdir_p(output_dir) with tqdm.tqdm(total=nr_visualize) as pbar: for idx, dp in itertools.islice(enumerate(df), nr_visualize): img = dp[0] if cfg.MODE_MASK: gt_boxes, gt_labels, gt_masks = dp[-3:] else: gt_boxes, gt_labels = dp[-2:] rpn_boxes, rpn_scores, all_scores, \ final_boxes, final_scores, final_labels = pred(img, gt_boxes, gt_labels) # draw groundtruth boxes gt_viz = draw_annotation(img, gt_boxes, gt_labels) # draw best proposals for each groundtruth, to show recall proposal_viz, good_proposals_ind = draw_proposal_recall(img, rpn_boxes, rpn_scores, gt_boxes) # draw the scores for the above proposals score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_scores[good_proposals_ind]) results = [DetectionResult(*args) for args in zip(final_boxes, final_scores, final_labels, [None] * len(final_labels))] final_viz = draw_final_outputs(img, results) viz = tpviz.stack_patches([ gt_viz, proposal_viz, score_viz, final_viz], 2, 2) if os.environ.get('DISPLAY', None): tpviz.interactive_imshow(viz) cv2.imwrite("{}/{:03d}.png".format(output_dir, idx), viz) pbar.update()
def visualize(model_path, nr_visualize=50, output_dir='output'): pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(model_path), input_names=['image', 'gt_boxes', 'gt_labels'], output_names=[ 'generate_rpn_proposals/boxes', 'generate_rpn_proposals/probs', 'fastrcnn_all_probs', 'final_boxes', 'final_probs', 'final_labels', ])) df = get_train_dataflow() df.reset_state() if os.path.isdir(output_dir): shutil.rmtree(output_dir) utils.fs.mkdir_p(output_dir) with tqdm.tqdm(total=nr_visualize) as pbar: for idx, dp in itertools.islice(enumerate(df.get_data()), nr_visualize): img, _, _, gt_boxes, gt_labels = dp rpn_boxes, rpn_scores, all_probs, \ final_boxes, final_probs, final_labels = pred(img, gt_boxes, gt_labels) # draw groundtruth boxes gt_viz = draw_annotation(img, gt_boxes, gt_labels) # draw best proposals for each groundtruth, to show recall proposal_viz, good_proposals_ind = draw_proposal_recall( img, rpn_boxes, rpn_scores, gt_boxes) # draw the scores for the above proposals score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_probs[good_proposals_ind]) results = [ DetectionResult(*args) for args in zip(final_labels, final_boxes, final_probs) ] final_viz = draw_final_outputs(img, results) viz = tpviz.stack_patches( [gt_viz, proposal_viz, score_viz, final_viz], 2, 2) if os.environ.get('DISPLAY', None): tpviz.interactive_imshow(viz) cv2.imwrite("{}/{:03d}.png".format(output_dir, idx), viz) pbar.update()
stepnum = cfg.TRAIN.STEPS_PER_EPOCH # warmup is step based, lr is epoch based init_lr = cfg.TRAIN.WARMUP_INIT_LR * min(8. / cfg.TRAIN.NUM_GPUS, 1.) warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)] warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)] factor = 8. / cfg.TRAIN.NUM_GPUS for idx, steps in enumerate(cfg.TRAIN.LR_SCHEDULE[:-1]): mult = 0.1 ** (idx + 1) lr_schedule.append( (steps * factor // stepnum, cfg.TRAIN.BASE_LR * mult)) logger.info("Warm Up Schedule (steps, value): " + str(warmup_schedule)) logger.info("LR Schedule (epochs, value): " + str(lr_schedule)) train_dataflow = get_train_dataflow() # This is what's commonly referred to as "epochs" total_passes = cfg.TRAIN.LR_SCHEDULE[-1] * 8 / train_dataflow.size() logger.info("Total passes of the training set is: {:.5g}".format(total_passes)) callbacks = [ PeriodicCallback( ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), every_k_epochs=20), # linear warmup ScheduledHyperParamSetter( 'learning_rate', warmup_schedule, interp='linear', step_based=True), ScheduledHyperParamSetter('learning_rate', lr_schedule), PeakMemoryTracker(), EstimatedTimeLeft(median=True), SessionRunTimeout(60000).set_chief_only(True), # 1 minute timeout
offline_evaluate(pred, args.evaluate) elif args.predict: COCODetection( config.BASEDIR, 'train2014') # to load the class names into caches predict(pred, args.predict) else: logger.set_logger_dir(args.logdir) print_config() stepnum = 500 warmup_epoch = 3 factor = get_batch_factor() cfg = TrainConfig( model=Model(), data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)), callbacks=[ ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), # linear warmup ScheduledHyperParamSetter('learning_rate', [(0, 3e-3), (warmup_epoch * factor, 1e-2)], interp='linear'), # step decay ScheduledHyperParamSetter( 'learning_rate', [(warmup_epoch * factor, 1e-2), (150000 * factor // stepnum, 1e-3), (230000 * factor // stepnum, 1e-4)]), EvalCallback(), GPUUtilizationTracker(), ],
EvalCallback(*MODEL.get_inference_tensor_names()), PeakMemoryTracker(), EstimatedTimeLeft(median=True), SessionRunTimeout(60000).set_chief_only(True), # 1 minute timeout ] if not is_horovod: callbacks.append(GPUUtilizationTracker()) if args.load: session_init = get_model_loader(args.load) else: session_init = get_model_loader( cfg.BACKBONE.WEIGHTS) if cfg.BACKBONE.WEIGHTS else None traincfg = TrainConfig( model=MODEL, data=QueueInput(get_train_dataflow()), callbacks=callbacks, steps_per_epoch=stepnum, max_epoch=cfg.TRAIN.LR_SCHEDULE[-1] * factor // stepnum, session_init=session_init, ) if is_horovod: trainer = HorovodTrainer(average=False) else: # nccl mode has better speed than cpu mode trainer = SyncMultiGPUTrainerReplicated(cfg.TRAIN.NUM_GPUS, average=False, mode='nccl') launch_train_with_config(traincfg, trainer)
print_config() factor = get_batch_factor() stepnum = config.STEPS_PER_EPOCH # warmup is step based, lr is epoch based warmup_schedule = [(0, config.BASE_LR / 3), (config.WARMUP * factor, config.BASE_LR)] warmup_end_epoch = config.WARMUP * factor * 1. / stepnum lr_schedule = [(int(np.ceil(warmup_end_epoch)), warmup_schedule[-1][1])] for idx, steps in enumerate(config.LR_SCHEDULE[:-1]): mult = 0.1 ** (idx + 1) lr_schedule.append( (steps * factor // stepnum, config.BASE_LR * mult)) cfg = TrainConfig( model=Model(), data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)), callbacks=[ ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), # linear warmup ScheduledHyperParamSetter( 'learning_rate', warmup_schedule, interp='linear', step_based=True), ScheduledHyperParamSetter('learning_rate', lr_schedule), EvalCallback(), GPUUtilizationTracker(), EstimatedTimeLeft(), ], steps_per_epoch=stepnum, max_epoch=config.LR_SCHEDULE[2] * factor // stepnum, session_init=get_model_loader(args.load) if args.load else None, ) trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu())
def train(): import multiprocessing as mp mp.set_start_method('spawn', force=True) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.TRAIN.GPU_LIST gpus = list(range(len(cfg.TRAIN.GPU_LIST.split(',')))) num_gpus = len(gpus) restore_from_original_checkpoint = True checkpoint_path = cfg.TRAIN.LOG_DIR + COMMON_POSTFIX if not tf.io.gfile.exists(checkpoint_path): tf.io.gfile.makedirs(checkpoint_path) else: restore_from_original_checkpoint = False register_coco(os.path.expanduser(cfg.DATA.BASEDIR)) data_iter = get_train_dataflow(batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU * num_gpus) ds = tf.data.Dataset.from_generator( lambda: map( lambda x: tuple([ x[k] for k in [ 'images', 'gt_boxes', 'gt_labels', 'orig_gt_counts', 'all_anchors_level2', 'anchor_labels_level2', 'anchor_boxes_level2', 'all_anchors_level3', 'anchor_labels_level3', 'anchor_boxes_level3', 'all_anchors_level4', 'anchor_labels_level4', 'anchor_boxes_level4', 'all_anchors_level5', 'anchor_labels_level5', 'anchor_boxes_level5', 'all_anchors_level6', 'anchor_labels_level6', 'anchor_boxes_level6' ] ]), data_iter), (tf.float32, tf.float32, tf.int64, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32, tf.float32), ( tf.TensorShape([None, None, None, 3]), tf.TensorShape([None, None, 4]), tf.TensorShape([None, None]), tf.TensorShape([ None, ]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None, 4]), #lv2 tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None, 4]), #lv3 tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None, 4]), #lv4 tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None, 4]), #lv5 tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None, 4]) #lv6 )) ds = ds.prefetch(buffer_size=128) ds = ds.make_one_shot_iterator() images, gt_boxes, gt_labels, orig_gt_counts, \ all_anchors_level2, anchor_labels_level2, anchor_boxes_level2, \ all_anchors_level3, anchor_labels_level3, anchor_boxes_level3, \ all_anchors_level4, anchor_labels_level4, anchor_boxes_level4, \ all_anchors_level5, anchor_labels_level5, anchor_boxes_level5, \ all_anchors_level6, anchor_labels_level6, anchor_boxes_level6 \ = ds.get_next() # build optimizers global_step = tf.train.get_or_create_global_step() learning_rate = warmup_lr_schedule(init_learning_rate=cfg.TRAIN.BASE_LR, global_step=global_step, warmup_step=cfg.TRAIN.WARMUP_STEP) opt = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) sess_config = tf.ConfigProto() sess_config.allow_soft_placement = True sess_config.log_device_placement = False sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) if num_gpus > 1: base_inputs_list = [ tf.split(value, num_or_size_splits=num_gpus, axis=0) for value in [images, gt_boxes, gt_labels, orig_gt_counts] ] fpn_all_anchors_list = \ [[tf.identity(value) for _ in range(num_gpus)] for value in [all_anchors_level2, all_anchors_level3, all_anchors_level4, all_anchors_level5, all_anchors_level6]] fpn_anchor_gt_labels_list = \ [tf.split(value, num_or_size_splits=num_gpus, axis=0) for value in [anchor_labels_level2, anchor_labels_level3, anchor_labels_level4, anchor_labels_level5, anchor_labels_level6]] fpn_anchor_gt_boxes_list = \ [tf.split(value, num_or_size_splits=num_gpus, axis=0) for value in [anchor_boxes_level2, anchor_boxes_level3, anchor_boxes_level4, anchor_boxes_level5, anchor_boxes_level6]] tower_grads = [] total_loss_dict = { 'rpn_cls_loss': tf.constant(0.), 'rpn_box_loss': tf.constant(0.), 'rcnn_cls_loss': tf.constant(0.), 'rcnn_box_loss': tf.constant(0.) } for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: inputs1 = [input[i] for input in base_inputs_list] inputs2 = [[input[i] for input in fpn_all_anchors_list]] inputs3 = [[ input[i] for input in fpn_anchor_gt_labels_list ]] inputs4 = [[ input[i] for input in fpn_anchor_gt_boxes_list ]] net_inputs = inputs1 + inputs2 + inputs3 + inputs4 tower_loss_dict = tower_loss_func(net_inputs, reuse=(gpu_id > 0)) batch_norm_updates = tf.get_collection( tf.GraphKeys.UPDATE_OPS, scope) tower_loss = tf.add_n( [v for k, v in tower_loss_dict.items()]) for k, v in tower_loss_dict.items(): total_loss_dict[k] += v if i == num_gpus - 1: wd_loss = regularize_cost('.*/kernel', l2_regularizer( cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') tower_loss = tower_loss + wd_loss # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) if cfg.FRCNN.VISUALIZATION: with tf.device('/cpu:0'): with tf.name_scope('loss-summaries'): for k, v in tower_loss_dict.items(): summaries.append( tf.summary.scalar(k, v)) grads = opt.compute_gradients(tower_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) for k, v in total_loss_dict.items(): total_loss_dict[k] = v / tf.cast(num_gpus, tf.float32) average_total_loss = tf.add_n([v for k, v in total_loss_dict.items()] + [wd_loss]) else: fpn_all_anchors = \ [all_anchors_level2, all_anchors_level3, all_anchors_level4, all_anchors_level5, all_anchors_level6] fpn_anchor_gt_labels = \ [anchor_labels_level2, anchor_labels_level3, anchor_labels_level4, anchor_labels_level5, anchor_labels_level6] fpn_anchor_gt_boxes = \ [anchor_boxes_level2, anchor_boxes_level3, anchor_boxes_level4, anchor_boxes_level5, anchor_boxes_level6] net_inputs = [ images, gt_boxes, gt_labels, orig_gt_counts, fpn_all_anchors, fpn_anchor_gt_labels, fpn_anchor_gt_boxes ] tower_loss_dict = tower_loss_func(net_inputs) batch_norm_updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS) wd_loss = regularize_cost('.*/kernel', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') average_total_loss = tf.add_n([v for k, v in tower_loss_dict.items()] + [wd_loss]) grads = opt.compute_gradients(average_total_loss) total_loss_dict = tower_loss_dict summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) if cfg.FRCNN.VISUALIZATION: with tf.device('/cpu:0'): with tf.name_scope('loss-summaries'): for k, v in tower_loss_dict.items(): summaries.append(tf.summary.scalar(k, v)) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summaries.append(tf.summary.scalar('learning_rate', learning_rate)) # add histograms for trainable variables for grad, var in grads: # print(grad, var) if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # add histograms for trainable variables for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) variable_averages = tf.train.ExponentialMovingAverage( cfg.TRAIN.MOVING_AVERAGE_DECAY, num_updates=global_step) variable_averages_op = variable_averages.apply(tf.trainable_variables()) all_global_vars = [] for var in tf.global_variables(): all_global_vars.append(var.name + '\n') # print(var.name, var.shape) with open('all_global_vars.txt', 'w') as fp: fp.writelines(all_global_vars) all_trainable_vars = [] for var in tf.trainable_variables(): all_trainable_vars.append(var.name + '\n') with open('all_trainable_vars.txt', 'w') as fp: fp.writelines(all_trainable_vars) all_moving_average_vars = [] for var in tf.moving_average_variables(): all_moving_average_vars.append(var.name + '\n') with open('all_moving_average_variables.txt', 'w') as fp: fp.writelines(all_moving_average_vars) # batch norm updates batch_norm_updates_op = tf.group(*batch_norm_updates) with tf.control_dependencies( [apply_gradient_op, variable_averages_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge(summaries) summary_writer = tf.summary.FileWriter(checkpoint_path, tf.get_default_graph()) init_op = tf.group( [tf.global_variables_initializer(), tf.local_variables_initializer()]) sess.run(init_op) if False: print('load weights ...') ckpt_params = dict(np.load('MSRA-R50.npz')) assign_ops = [] all_variables = [] for var in tf.global_variables(): dst_name = var.name all_variables.append(dst_name + '\n') if 'resnet50' in dst_name: src_name = dst_name.replace('resnet50/', ''). \ replace('conv2d/kernel:0', 'W') \ .replace('conv2d/bias:0', 'b') \ .replace('batch_normalization/gamma:0', 'gamma') \ .replace('batch_normalization/beta:0', 'beta') \ .replace('batch_normalization/moving_mean:0', 'mean/EMA') \ .replace('batch_normalization/moving_variance:0', 'variance/EMA') \ .replace('kernel:0', 'W').replace('bias:0', 'b') if 'batch_normalization' in dst_name: src_name = src_name.replace('res', 'bn') if 'conv1' in src_name: src_name = 'bn_' + src_name if src_name == 'fc1000/W': print('{} --> {} {}'.format('fc1000/W', dst_name, var.shape)) assign_ops.append( tf.assign( var, np.reshape(ckpt_params[src_name], [2048, 1000]))) continue if src_name in ckpt_params: print('{} --> {} {}'.format(src_name, dst_name, var.shape)) assign_ops.append(tf.assign(var, ckpt_params[src_name])) print('load weights done.') with open('all_vars.txt', 'w') as fp: fp.writelines(all_variables) all_update_ops = [] for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS): all_update_ops.append(op.name + '\n') with open('all_update_ops.txt', 'w') as fp: fp.writelines(all_update_ops) sess.run(assign_ops) else: if False: all_vars = [] restore_var_dict = {} for var in tf.global_variables(): all_vars.append(var.name + '\n') if 'rpn' not in var.name and 'rcnn' not in var.name and 'global_step' not in var.name and \ 'Momentum' not in var.name and 'ExponentialMovingAverage' not in var.name: restore_var_dict[var.name.replace(':0', '')] = var with open('all_vars.txt', 'w') as fp: fp.writelines(all_vars) restorer = tf.train.Saver(var_list=restore_var_dict) restorer.restore(sess, cfg.BACKBONE.CHECKPOINT_PATH) else: if restore_from_original_checkpoint: # restore from official ResNet checkpoint all_vars = [] restore_var_dict = {} for var in tf.global_variables(): all_vars.append(var.name + '\n') if 'rpn' not in var.name and 'rcnn' not in var.name and 'fpn' not in var.name \ and 'global_step' not in var.name and \ 'Momentum' not in var.name and 'ExponentialMovingAverage' not in var.name: restore_var_dict[var.name.replace('resnet50/', '').replace( ':0', '')] = var print(var.name, var.shape) with open('all_vars.txt', 'w') as fp: fp.writelines(all_vars) restore_vars_names = [ k + '\n' for k in restore_var_dict.keys() ] with open('all_restore_vars.txt', 'w') as fp: fp.writelines(restore_vars_names) restorer = tf.train.Saver(var_list=restore_var_dict) restorer.restore(sess, cfg.BACKBONE.CHECKPOINT_PATH) else: all_vars = [] restore_var_dict = {} for var in tf.global_variables(): all_vars.append(var.name + '\n') restore_var_dict[var.name.replace(':0', '')] = var with open('all_vars.txt', 'w') as fp: fp.writelines(all_vars) # restore from local checkpoint restorer = tf.train.Saver(tf.global_variables()) try: restorer.restore( sess, tf.train.latest_checkpoint(checkpoint_path)) except: pass # record all ops all_operations = [] for op in sess.graph.get_operations(): all_operations.append(op.name + '\n') with open('all_ops.txt', 'w') as fp: fp.writelines(all_operations) loss_names = [ 'rpn_cls_loss', 'rpn_box_loss', 'rcnn_cls_loss', 'rcnn_box_loss' ] sess2run = list() sess2run.append(train_op) sess2run.append(learning_rate) sess2run.append(average_total_loss) sess2run.append(wd_loss) sess2run.extend([total_loss_dict[k] for k in loss_names]) print('begin training ...') step = sess.run(global_step) step0 = step start = time.time() for step in range(step, cfg.TRAIN.MAX_STEPS): if step % cfg.TRAIN.SAVE_SUMMARY_STEPS == 0: _, lr_, tl_, wd_loss_, \ rpn_cls_loss_, rpn_box_loss_, \ rcnn_cls_loss_, rcnn_box_loss_, \ summary_str = sess.run(sess2run + [summary_op]) avg_time_per_step = (time.time() - start) / cfg.TRAIN.SAVE_SUMMARY_STEPS avg_examples_per_second = (cfg.TRAIN.SAVE_SUMMARY_STEPS * cfg.TRAIN.BATCH_SIZE_PER_GPU * num_gpus) \ / (time.time() - start) start = time.time() print('Step {:06d}, LR: {:.6f} LOSS: {:.4f}, ' 'RPN: {:.4f}, {:.4f}, RCNN: {:.4f}, {:.4f}, wd: {:.4f}, ' '{:.2f} s/step, {:.2f} samples/s'.format( step, lr_, tl_, rpn_cls_loss_, rpn_box_loss_, rcnn_cls_loss_, rcnn_box_loss_, wd_loss_, avg_time_per_step, avg_examples_per_second)) summary_writer.add_summary(summary_str, global_step=step) else: sess.run(train_op) if step % 1000 == 0: saver.save(sess, checkpoint_path + '/model.ckpt', global_step=step)
############################################################## ############################################################## ############################################################## # # # # # # # # # # # # Testing Type 2 # # # # # # # # # # # # ############################################################## ############################################################## ############################################################## src_Test = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Test/' save_path = './generated_outputLast/' #initialize_FasterRCNN(args.load) saver = tf.train.Saver() itr, _ = load_weights(saver, './model/') output_file = 'out.json' all_results = [] df = get_train_dataflow(src_Test) df.reset_state() iter = 0 data_generator = df.get_data() max_iters = df.size() save_folder = '/media/ayan/Drive/All_Object/tensorpack-master/Faster_RCNN_Test/Object-Detection-Metrics-master_2/' while iter < max_iters: iter = iter + 1 print(iter) try: batch_image, batch_anchor_labels, batch_anchor_boxes, batch_gt_boxes, batch_gt_labels = next( data_generator) except StopIteration: break orig_shape = batch_image.shape[:2]
PeriodicCallback( ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), every_k_epochs=20), # linear warmup ScheduledHyperParamSetter( 'learning_rate', warmup_schedule, interp='linear', step_based=True), ScheduledHyperParamSetter('learning_rate', lr_schedule), EvalCallback(), PeakMemoryTracker(), EstimatedTimeLeft(), SessionRunTimeout(60000).set_chief_only(True), # 1 minute timeout ] if not is_horovod: callbacks.append(GPUUtilizationTracker()) cfg = TrainConfig( model=get_model(), data=QueueInput(get_train_dataflow()), callbacks=callbacks, steps_per_epoch=stepnum, max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum, session_init=get_model_loader(args.load) if args.load else None, ) if is_horovod: # horovod mode has the best speed for this model trainer = HorovodTrainer() else: # nccl mode has better speed than cpu mode trainer = SyncMultiGPUTrainerReplicated(config.NUM_GPUS, mode='nccl') launch_train_with_config(cfg, trainer)
def do_visualize(model, model_path, nr_visualize=100, output_dir='output'): """ Visualize some intermediate results (proposals, raw predictions) inside the pipeline. """ df = get_train_dataflow() # we don't visualize mask stuff df.reset_state() pred = OfflinePredictor( PredictConfig( model=model, session_init=get_model_loader(model_path), input_names=['images', 'orig_image_dims', 'gt_boxes', 'gt_labels'], output_names=[ 'generate_{}_proposals_topk_per_image/boxes'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'generate_{}_proposals_topk_per_image/scores'.format( 'fpn' if cfg.MODE_FPN else 'rpn'), 'fastrcnn_all_scores', 'output/boxes', 'output/scores', 'output/labels', ])) if os.path.isdir(output_dir): shutil.rmtree(output_dir) utils.fs.mkdir_p(output_dir) with tqdm.tqdm(total=nr_visualize) as pbar: for idx, dp in itertools.islice(enumerate(df), nr_visualize): img, gt_boxes, gt_labels = dp['images'], dp['gt_boxes'], dp[ 'gt_labels'] orig_shape = img.shape[:2] rpn_boxes, rpn_scores, all_scores, \ final_boxes, final_scores, final_labels = pred(np.expand_dims(img, axis=0), np.expand_dims(np.array(img.shape), axis=0), np.expand_dims(gt_boxes, axis=0), np.expand_dims(gt_labels, axis=0)) # draw groundtruth boxes gt_viz = draw_annotation(img, gt_boxes, gt_labels) # draw best proposals for each groundtruth, to show recall # custom op creates different shape for boxes, convert back to original rpn_boxes = np.array([i[1:] for i in rpn_boxes]) proposal_viz, good_proposals_ind = draw_proposal_recall( img, rpn_boxes, rpn_scores, gt_boxes) # draw the scores for the above proposals score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_scores[good_proposals_ind]) results = [ DetectionResult(*args) for args in zip(final_boxes, final_scores, final_labels, [None] * len(final_labels)) ] final_viz = draw_final_outputs(img, results) viz = tpviz.stack_patches( [gt_viz, proposal_viz, score_viz, final_viz], 2, 2) if os.environ.get('DISPLAY', None): tpviz.interactive_imshow(viz) cv2.imwrite("{}/{:03d}.png".format(output_dir, idx), viz) pbar.update()
def build_training_dataflow(self) -> tp.DataFlow: return get_train_dataflow( self.context.get_hparam("is_aws"), self.context.get_hparam("is_gcs") )
stepnum = cfg.TRAIN.STEPS_PER_EPOCH # warmup is step based, lr is epoch based init_lr = cfg.TRAIN.BASE_LR * 0.33 * min(8. / cfg.TRAIN.NUM_GPUS, 1.) warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)] warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)] factor = 8. / cfg.TRAIN.NUM_GPUS for idx, steps in enumerate(cfg.TRAIN.LR_SCHEDULE[:-1]): mult = 0.1 ** (idx + 1) lr_schedule.append( (steps * factor // stepnum, cfg.TRAIN.BASE_LR * mult)) logger.info("Warm Up Schedule (steps, value): " + str(warmup_schedule)) logger.info("LR Schedule (epochs, value): " + str(lr_schedule)) train_dataflow = get_train_dataflow() # This is what's commonly referred to as "epochs" total_passes = cfg.TRAIN.LR_SCHEDULE[-1] * 8 / train_dataflow.size() logger.info("Total passes of the training set is: {}".format(total_passes)) callbacks = [ PeriodicCallback( ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), every_k_epochs=20), # linear warmup ScheduledHyperParamSetter( 'learning_rate', warmup_schedule, interp='linear', step_based=True), ScheduledHyperParamSetter('learning_rate', lr_schedule), EvalCallback(*MODEL.get_inference_tensor_names()), PeakMemoryTracker(), EstimatedTimeLeft(median=True),