def __init__( self, cfg, confidence_threshold=0.7, show_mask_heatmaps=False, masks_per_dim=2, min_image_size=224, ): self.cfg = cfg.clone() self.model = build_detection_model(cfg) self.model.eval() self.device = torch.device(cfg.MODEL.DEVICE) self.model.to(self.device) self.min_image_size = min_image_size checkpointer = DetectronCheckpointer(cfg, self.model) _ = checkpointer.load(cfg.MODEL.WEIGHT) self.transforms = self.build_transform() mask_threshold = -1 if show_mask_heatmaps else 0.5 self.masker = Masker(threshold=mask_threshold, padding=1) # used to make colors for each class self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1]) self.cpu_device = torch.device("cpu") self.confidence_threshold = confidence_threshold self.show_mask_heatmaps = show_mask_heatmaps self.masks_per_dim = masks_per_dim
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.deprecated.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, model, optimizer, scheduler, output_dir, save_to_disk ) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model
def train(cfg, local_rank, distributed, logger): if is_main_process(): wandb.init(project='scene-graph', entity='sgg-speaker-listener', config=cfg.LISTENER) debug_print(logger, 'prepare training') model = build_detection_model(cfg) listener = build_listener(cfg) if is_main_process(): wandb.watch(listener) debug_print(logger, 'end model construction') # modules that should be always set in eval mode # their eval() method should be called after model.train() is called eval_modules = ( model.rpn, model.backbone, model.roi_heads.box, ) fix_eval_modules(eval_modules) # NOTE, we slow down the LR of the layers start with the names in slow_heads if cfg.MODEL.ROI_RELATION_HEAD.PREDICTOR == "IMPPredictor": slow_heads = [ "roi_heads.relation.box_feature_extractor", "roi_heads.relation.union_feature_extractor.feature_extractor", ] else: slow_heads = [] # load pretrain layers to new layers load_mapping = { "roi_heads.relation.box_feature_extractor": "roi_heads.box.feature_extractor", "roi_heads.relation.union_feature_extractor.feature_extractor": "roi_heads.box.feature_extractor" } if cfg.MODEL.ATTRIBUTE_ON: load_mapping[ "roi_heads.relation.att_feature_extractor"] = "roi_heads.attribute.feature_extractor" load_mapping[ "roi_heads.relation.union_feature_extractor.att_feature_extractor"] = "roi_heads.attribute.feature_extractor" device = torch.device(cfg.MODEL.DEVICE) model.to(device) listener.to(device) num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 num_batch = cfg.SOLVER.IMS_PER_BATCH optimizer = make_optimizer(cfg, model, logger, slow_heads=slow_heads, slow_ratio=10.0, rl_factor=float(num_batch)) listener_optimizer = make_listener_optimizer(cfg, listener) scheduler = make_lr_scheduler(cfg, optimizer, logger) listener_scheduler = None debug_print(logger, 'end optimizer and shcedule') # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' #listener, listener_optimizer = amp.initialize(listener, listener_optimizer, opt_level='O0') [model, listener], [optimizer, listener_optimizer ] = amp.initialize([model, listener], [optimizer, listener_optimizer], opt_level='O1', loss_scale=1) model = amp.initialize(model, opt_level='O1') if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, find_unused_parameters=True, ) listener = torch.nn.parallel.DistributedDataParallel( listener, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, find_unused_parameters=True, ) debug_print(logger, 'end distributed') arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR listener_dir = cfg.LISTENER_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk, custom_scheduler=True) listener_checkpointer = Checkpointer(listener, optimizer=listener_optimizer, save_dir=listener_dir, save_to_disk=save_to_disk, custom_scheduler=False) if checkpointer.has_checkpoint(): extra_checkpoint_data = checkpointer.load( cfg.MODEL.PRETRAINED_DETECTOR_CKPT, update_schedule=cfg.SOLVER.UPDATE_SCHEDULE_DURING_LOAD) arguments.update(extra_checkpoint_data) else: # load_mapping is only used when we init current model from detection model. checkpointer.load(cfg.MODEL.PRETRAINED_DETECTOR_CKPT, with_optim=False, load_mapping=load_mapping) # if there is certain checkpoint in output_dir, load it, else load pretrained detector if listener_checkpointer.has_checkpoint(): extra_listener_checkpoint_data = listener_checkpointer.load() amp.load_state_dict(extra_listener_checkpoint_data['amp']) ''' print('Weights after load: ') print('****************************') print(listener.gnn.conv1.node_model.node_mlp_1[0].weight) print('****************************') ''' # arguments.update(extra_listener_checkpoint_data) debug_print(logger, 'end load checkpointer') train_data_loader = make_data_loader(cfg, mode='train', is_distributed=distributed, start_iter=arguments["iteration"], ret_images=True) val_data_loaders = make_data_loader(cfg, mode='val', is_distributed=distributed, ret_images=True) debug_print(logger, 'end dataloader') checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD if cfg.SOLVER.PRE_VAL: logger.info("Validate before training") #output = run_val(cfg, model, listener, val_data_loaders, distributed, logger) #print('OUTPUT: ', output) #(sg_loss, img_loss, sg_acc, img_acc) = output logger.info("Start training") meters = MetricLogger(delimiter=" ") max_iter = len(train_data_loader) start_iter = arguments["iteration"] start_training_time = time.time() end = time.time() print_first_grad = True listener_loss_func = torch.nn.MarginRankingLoss(margin=1, reduction='none') mistake_saver = None if is_main_process(): ds_catalog = DatasetCatalog() dict_file_path = os.path.join( ds_catalog.DATA_DIR, ds_catalog.DATASETS['VG_stanford_filtered_with_attribute'] ['dict_file']) ind_to_classes, ind_to_predicates = load_vg_info(dict_file_path) ind_to_classes = {k: v for k, v in enumerate(ind_to_classes)} ind_to_predicates = {k: v for k, v in enumerate(ind_to_predicates)} print('ind to classes:', ind_to_classes, '/n ind to predicates:', ind_to_predicates) mistake_saver = MistakeSaver( '/Scene-Graph-Benchmark.pytorch/filenames_masked', ind_to_classes, ind_to_predicates) #is_printed = False while True: try: listener_iteration = 0 for iteration, (images, targets, image_ids) in enumerate(train_data_loader, start_iter): listener_optimizer.zero_grad() #print(f'ITERATION NUMBER: {iteration}') if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) if len(images) <= 1: continue data_time = time.time() - end iteration = iteration + 1 listener_iteration += 1 arguments["iteration"] = iteration model.train() fix_eval_modules(eval_modules) images_list = deepcopy(images) images_list = to_image_list( images_list, cfg.DATALOADER.SIZE_DIVISIBILITY).to(device) #SAVE IMAGE TO PC ''' transform = transforms.Compose([ transforms.ToPILImage(), #transforms.Resize((cfg.LISTENER.IMAGE_SIZE, cfg.LISTENER.IMAGE_SIZE)), transforms.ToTensor(), ]) ''' # turn images to a uniform size #print('IMAGE BEFORE Transform: ', images[0], 'GPU: ', get_rank()) ''' if is_main_process(): if not is_printed: transform = transforms.ToPILImage() print('SAVING IMAGE') img = transform(images[0].cpu()) print('DONE TRANSFORM') img.save('image.png') print('DONE SAVING IMAGE') print('ids ', image_ids[0]) ''' for i in range(len(images)): images[i] = images[i].unsqueeze(0) images[i] = F.interpolate(images[i], size=(224, 224), mode='bilinear', align_corners=False) images[i] = images[i].squeeze() images = torch.stack(images).to(device) #images.requires_grad_() targets = [target.to(device) for target in targets] #print('IMAGE BEFORE Model: ', images[0], 'GPU: ', get_rank()) _, sgs = model(images_list, targets) #print('IMAGE AFTER Model: ', images) ''' is_printed = False if is_main_process(): if not is_printed: print('PRINTING OBJECTS') (obj, rel_pair, rel) = sgs[0] obj = torch.argmax(obj, dim=1) for i in range(obj.size(0)): print(f'OBJECT {i}: ', obj[i]) print('DONE PRINTING OBJECTS') is_printed=True ''' image_list = None sgs = collate_sgs(sgs, cfg.MODEL.DEVICE) ''' if is_main_process(): if not is_printed: mistake_saver.add_mistake((image_ids[0], image_ids[1]), (sgs[0], sgs[1]), 231231, 'SG') mistake_saver.toHtml('/www') is_printed = True ''' listener_loss = 0 gap_reward = 0 avg_acc = 0 num_correct = 0 score_matrix = torch.zeros((images.size(0), images.size(0))) # fill score matrix for true_index, sg in enumerate(sgs): acc = 0 detached_sg = (sg[0].detach().requires_grad_().to( torch.float32), sg[1].long(), sg[2].detach().requires_grad_().to( torch.float32)) #scores = listener(sg, images) with amp.disable_casts(): scores = listener(detached_sg, images) score_matrix[true_index] = scores #print('Score matrix:', score_matrix) score_matrix = score_matrix.to(device) # fill loss matrix loss_matrix = torch.zeros((2, images.size(0), images.size(0)), device=device) # sg centered scores for true_index in range(loss_matrix.size(1)): row_score = score_matrix[true_index] (true_scores, predicted_scores, binary) = format_scores(row_score, true_index, device) loss_vec = listener_loss_func(true_scores, predicted_scores, binary) loss_matrix[0][true_index] = loss_vec # image centered scores transposted_score_matrix = score_matrix.t() for true_index in range(loss_matrix.size(1)): row_score = transposted_score_matrix[true_index] (true_scores, predicted_scores, binary) = format_scores(row_score, true_index, device) loss_vec = listener_loss_func(true_scores, predicted_scores, binary) loss_matrix[1][true_index] = loss_vec print('iteration:', listener_iteration) sg_acc = 0 img_acc = 0 # calculate accuracy for i in range(loss_matrix.size(1)): temp_sg_acc = 0 temp_img_acc = 0 for j in range(loss_matrix.size(2)): if loss_matrix[0][i][i] > loss_matrix[0][i][j]: temp_sg_acc += 1 else: if cfg.LISTENER.HTML: if is_main_process( ) and listener_iteration >= 600 and listener_iteration % 25 == 0 and i != j: detached_sg_i = (sgs[i][0].detach(), sgs[i][1], sgs[i][2].detach()) detached_sg_j = (sgs[j][0].detach(), sgs[j][1], sgs[j][2].detach()) mistake_saver.add_mistake( (image_ids[i], image_ids[j]), (detached_sg_i, detached_sg_j), listener_iteration, 'SG') if loss_matrix[1][i][i] > loss_matrix[1][j][i]: temp_img_acc += 1 else: if cfg.LISTENER.HTML: if is_main_process( ) and listener_iteration >= 600 and listener_iteration % 25 == 0 and i != j: detached_sg_i = (sgs[i][0].detach(), sgs[i][1], sgs[i][2].detach()) detached_sg_j = (sgs[j][0].detach(), sgs[j][1], sgs[j][2].detach()) mistake_saver.add_mistake( (image_ids[i], image_ids[j]), (detached_sg_i, detached_sg_j), listener_iteration, 'IMG') temp_sg_acc = temp_sg_acc * 100 / (loss_matrix.size(1) - 1) temp_img_acc = temp_img_acc * 100 / (loss_matrix.size(1) - 1) sg_acc += temp_sg_acc img_acc += temp_img_acc if cfg.LISTENER.HTML: if is_main_process( ) and listener_iteration % 100 == 0 and listener_iteration >= 600: mistake_saver.toHtml('/www') sg_acc /= loss_matrix.size(1) img_acc /= loss_matrix.size(1) avg_sg_acc = torch.tensor([sg_acc]).to(device) avg_img_acc = torch.tensor([img_acc]).to(device) # reduce acc over all gpus avg_acc = {'sg_acc': avg_sg_acc, 'img_acc': avg_img_acc} avg_acc_reduced = reduce_loss_dict(avg_acc) sg_acc = sum(acc for acc in avg_acc_reduced['sg_acc']) img_acc = sum(acc for acc in avg_acc_reduced['img_acc']) # log acc to wadb if is_main_process(): wandb.log({ "Train SG Accuracy": sg_acc.item(), "Train IMG Accuracy": img_acc.item() }) sg_loss = 0 img_loss = 0 for i in range(loss_matrix.size(0)): for j in range(loss_matrix.size(1)): loss_matrix[i][j][j] = 0. for i in range(loss_matrix.size(1)): sg_loss += torch.max(loss_matrix[0][i]) img_loss += torch.max(loss_matrix[1][:][i]) sg_loss = sg_loss / loss_matrix.size(1) img_loss = img_loss / loss_matrix.size(1) sg_loss = sg_loss.to(device) img_loss = img_loss.to(device) loss_dict = {'sg_loss': sg_loss, 'img_loss': img_loss} losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) sg_loss_reduced = loss_dict_reduced['sg_loss'] img_loss_reduced = loss_dict_reduced['img_loss'] if is_main_process(): wandb.log({"Train SG Loss": sg_loss_reduced}) wandb.log({"Train IMG Loss": img_loss_reduced}) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe losses.backward() #with amp.scale_loss(losses, listener_optimizer) as scaled_losses: # scaled_losses.backward() verbose = (iteration % cfg.SOLVER.PRINT_GRAD_FREQ ) == 0 or print_first_grad # print grad or not print_first_grad = False #clip_grad_value([(n, p) for n, p in listener.named_parameters() if p.requires_grad], cfg.LISTENER.CLIP_VALUE, logger=logger, verbose=True, clip=True) listener_optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 200 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=listener_optimizer.param_groups[-1]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: """ print('Model before save') print('****************************') print(listener.gnn.conv1.node_model.node_mlp_1[0].weight) print('****************************') """ listener_checkpointer.save( "model_{:07d}".format(listener_iteration), amp=amp.state_dict()) #listener_checkpointer.save("model_{:07d}".format(listener_iteration)) if iteration == max_iter: listener_checkpointer.save("model_final", amp=amp.state_dict()) #listener_checkpointer.save("model_final") val_result = None # used for scheduler updating if cfg.SOLVER.TO_VAL and iteration % cfg.SOLVER.VAL_PERIOD == 0: logger.info("Start validating") val_result = run_val(cfg, model, listener, val_data_loaders, distributed, logger) (sg_loss, img_loss, sg_acc, img_acc, speaker_val) = val_result if is_main_process(): wandb.log({ "Validation SG Accuracy": sg_acc, "Validation IMG Accuracy": img_acc, "Validation SG Loss": sg_loss, "Validation IMG Loss": img_loss, "Speaker Val": speaker_val, }) except Exception as err: raise (err) print('Dataset finished, creating new') train_data_loader = make_data_loader( cfg, mode='train', is_distributed=distributed, start_iter=arguments["iteration"], ret_images=True) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter))) return listener
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD if cfg.MODEL.DOMAIN_ADAPTATION_ON: source_data_loader = make_data_loader( cfg, is_train=True, is_source=True, is_distributed=distributed, start_iter=arguments["iteration"], ) target_data_loader = make_data_loader( cfg, is_train=True, is_source=False, is_distributed=distributed, start_iter=arguments["iteration"], ) do_da_train( model, source_data_loader, target_data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, cfg, ) else: data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", required=True, metavar="FILE", help="path to config file", ) parser.add_argument( '--model-path', type=Path, help=('Path to model pickle file. If not specified, the latest ' 'checkpoint, if it exists, or cfg.MODEL.WEIGHT is loaded.')) parser.add_argument( '--output-dir', default='{cfg_OUTPUT_DIR}/inference-{model_stem}', help=('Output directory. Can use variables {cfg_OUTPUT_DIR}, which is ' 'replaced by cfg.OUTPUT_DIR, and {model_stem}, which is ' 'replaced by the stem of the file used to load weights.')) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) assert cfg.OUTPUT_DIR, 'cfg.OUTPUT_DIR must not be empty.' checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) if args.model_path: load_path = str(args.model_path.resolve()) load_msg = 'Loading model from --model-path: %s' % load_path else: if checkpointer.has_checkpoint(): load_path = checkpointer.get_checkpoint_file() load_msg = 'Loading model from latest checkpoint: %s' % load_path else: load_path = cfg.MODEL.WEIGHT load_msg = 'Loading model from cfg.MODEL.WEIGHT: %s' % load_path output_dir = Path( args.output_dir.format(cfg_OUTPUT_DIR=cfg.OUTPUT_DIR, model_stem=Path(load_path).stem)) output_dir.mkdir(exist_ok=True, parents=True) file_logger = common_setup(__file__, output_dir, args) # We can't log the load_msg until we setup the output directory, but we # can't get the output directory until we figure out which model to load. # So we save load_msg and log it here. logging.info(load_msg) logging.info('Output inference results to: %s' % output_dir) logger = logging.getLogger("maskrcnn_benchmark") logger.info("Using {} GPUs".format(num_gpus)) file_logger.info('Config:') file_logger.info(cfg) file_logger.info("Collecting env info (might take some time)") file_logger.info("\n" + collect_env_info()) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = output_dir / dataset_name mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def train(cfg, local_rank, distributed): # Model logging print_mlperf(key=mlperf_log.INPUT_BATCH_SIZE, value=cfg.SOLVER.IMS_PER_BATCH) print_mlperf(key=mlperf_log.BATCH_SIZE_TEST, value=cfg.TEST.IMS_PER_BATCH) print_mlperf(key=mlperf_log.INPUT_MEAN_SUBTRACTION, value = cfg.INPUT.PIXEL_MEAN) print_mlperf(key=mlperf_log.INPUT_NORMALIZATION_STD, value=cfg.INPUT.PIXEL_STD) print_mlperf(key=mlperf_log.INPUT_RESIZE) print_mlperf(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING) print_mlperf(key=mlperf_log.MIN_IMAGE_SIZE, value=cfg.INPUT.MIN_SIZE_TRAIN) print_mlperf(key=mlperf_log.MAX_IMAGE_SIZE, value=cfg.INPUT.MAX_SIZE_TRAIN) print_mlperf(key=mlperf_log.INPUT_RANDOM_FLIP) print_mlperf(key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5) print_mlperf(key=mlperf_log.FG_IOU_THRESHOLD, value=cfg.MODEL.RPN.FG_IOU_THRESHOLD) print_mlperf(key=mlperf_log.BG_IOU_THRESHOLD, value=cfg.MODEL.RPN.BG_IOU_THRESHOLD) print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TRAIN) print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TEST) print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN) print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST) print_mlperf(key=mlperf_log.ASPECT_RATIOS, value=cfg.MODEL.RPN.ASPECT_RATIOS) print_mlperf(key=mlperf_log.BACKBONE, value=cfg.MODEL.BACKBONE.CONV_BODY) print_mlperf(key=mlperf_log.NMS_THRESHOLD, value=cfg.MODEL.RPN.NMS_THRESH) model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) # Optimizer logging print_mlperf(key=mlperf_log.OPT_NAME, value=mlperf_log.SGD_WITH_MOMENTUM) print_mlperf(key=mlperf_log.OPT_LR, value=cfg.SOLVER.BASE_LR) print_mlperf(key=mlperf_log.OPT_MOMENTUM, value=cfg.SOLVER.MOMENTUM) print_mlperf(key=mlperf_log.OPT_WEIGHT_DECAY, value=cfg.SOLVER.WEIGHT_DECAY) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, model, optimizer, scheduler, output_dir, save_to_disk ) arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader, iters_per_epoch = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD # set the callback function to evaluate and potentially # early exit each epoch if cfg.PER_EPOCH_EVAL: per_iter_callback_fn = functools.partial( mlperf_test_early_exit, iters_per_epoch=iters_per_epoch, tester=functools.partial(test, cfg=cfg), model=model, distributed=distributed, min_bbox_map=cfg.MLPERF.MIN_BBOX_MAP, min_segm_map=cfg.MLPERF.MIN_SEGM_MAP) else: per_iter_callback_fn = None start_train_time = time.time() do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, per_iter_start_callback_fn=functools.partial(mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch), per_iter_end_callback_fn=per_iter_callback_fn, ) end_train_time = time.time() total_training_time = end_train_time - start_train_time print( "&&&& MLPERF METRIC THROUGHPUT per GPU={:.4f} iterations / s".format((arguments["iteration"] * 1.0) / total_training_time) ) return model
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) if cfg.OUTPUT_DIR: dataset_names = cfg.DATASETS.TEST for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, data_loader_val in zip(output_folders, data_loaders_val): inference( model, data_loader_val, iou_types=iou_types, #box_only=cfg.MODEL.RPN_ONLY, #box_only=False if cfg.RETINANET.RETINANET_ON else cfg.MODEL.RPN_ONLY, box_only=True, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "-i", "--image_folder", default="/media/DATA/HEVI_dataset/frames", metavar="FILE", help="path to the RGB frames", ) parser.add_argument( "-o", "--output_dir", default="/media/DATA/HEVI_dataset/detections", metavar="FILE", help="path to save detection results as numpy", ) parser.add_argument('--gpu', default='0', type=str) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument("--save_features", type=bool, default=False) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if not os.path.exists(output_dir): os.makedirs(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) # initialize model, load checkpointys model = build_detection_model(cfg, save_features=args.save_features) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) model.eval() # get image transform operator transform = build_transform(cfg) data_loader = make_data_loader(cfg, is_train=False, is_distributed=distributed) data_loader = data_loader[0] _ = checkpointer.load(cfg.MODEL.WEIGHT) results_dict = {} for i, batch in enumerate(tqdm(data_loader)): images, targets, image_ids = batch images = images.to(cfg.MODEL.DEVICE) with torch.no_grad(): output = model(images) tmp = [] for j, o in enumerate(output): o = o.to('cpu') # if convert_pred_coco2cityscapes: # o = coco2cityscapes_label(o) output[j] = o # results_dict.update( # {img_id: result for img_id, result in zip(image_ids, output)} # ) for o, t in zip(output, targets): # if t['video_name'] not in results_dict: # results_dict[t['video_name']] = [o] # else: # results_dict[t['video_name']].append(o) #------------------- # NOTE: convert from Cityscapes ID to BDD id # labels = o.get_field('labels') # for i in range(len(labels)): # labels[i] = City2BDD_id_map[int(labels[i])] # o.add_field('labels', labels) # o.resize((1280, 720)) #------------------- save_path = os.path.join(output_dir, t.extra_fields['video_name']) if not os.path.exists(save_path): os.makedirs(save_path) save_path = os.path.join( save_path, str(t.extra_fields['frame_id']).zfill(6) + '.pth') torch.save(o, save_path) '''
def train(cfg, local_rank, distributed, ckpt=None, cls_id=1): model = build_detection_model(cfg) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(ckpt) device = torch.device(cfg.MODEL.DEVICE) model.to(device) adv_patch_cpu = torch.rand(3, 250, 150) # adv_patch_cpu = torch.rand(3, 1, 1) adv_patch_cpu.requires_grad_(True) optimizer = torch.optim.Adam([adv_patch_cpu], lr=0.1, amsgrad=True) # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.SOLVER.STEPS) # optimizer = torch.optim.SGD([adv_patch_cpu], lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM) # optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # # Initialize mixed-precision training # use_mixed_precision = cfg.DTYPE == "float16" # amp_opt_level = 'O1' if use_mixed_precision else 'O0' # model, optimizer = amp.initialize([adv_patch], optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, find_unused_parameters=True ) arguments = {} arguments["iteration"] = 0 arguments["cls_id"] = cls_id patch_applier = PatchApplier().to(device) patch_transformer = PatchTransformer().to(device) output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, adv_patch_cpu, optimizer, scheduler, output_dir, save_to_disk ) # extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) # arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, adv_patch_cpu, patch_transformer, patch_applier, arguments, ) return model
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if not 'search' in cfg.MODEL.BACKBONE.CONV_BODY: # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, find_unused_parameters=True if 'search' in cfg.MODEL.BACKBONE.CONV_BODY else False, ) if 'search' in cfg.MODEL.BACKBONE.CONV_BODY: def forward_hook(module: Module, inp: (Tensor, )): if module.weight is not None: module.weight.requires_grad = True if module.bias is not None: module.bias.requires_grad = True all_modules = ( nn.Conv2d, nn.Linear, nn.BatchNorm2d, ) for m in model.modules(): if isinstance(m, all_modules): m.register_forward_pre_hook(forward_hook) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) test_period = cfg.SOLVER.TEST_PERIOD if test_period > 0: data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True) else: data_loader_val = None checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( cfg, model, data_loader, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, ) return model
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) """ # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize() """ from maskrcnn_benchmark.data.transforms.build import build_transforms from PIL import Image import torchvision.transforms.functional as F transform = build_transforms(cfg, is_train=False) img_dir = "demo_imgs" res_dir = "demo_res" model.eval() imgs = os.listdir(img_dir) for img in imgs: img_path = os.path.join(img_dir, img) img_pil = Image.open(img_path) # for i in range( 360 ): original_img = img_pil # original_img = F.rotate( img_pil, 45, expand=True ) origin_w, origin_h = original_img.size img, target = transform(original_img, None) print(img.shape) img = img.view((1, img.shape[0], img.shape[1], img.shape[2])) h, w = img.shape[2:] if h % 32 != 0: new_h = (h // 32 + 1) * 32 else: new_h = h if w % 32 != 0: new_w = (w // 32 + 1) * 32 else: new_w = w ratio_w = 1. * new_w / w ratio_h = 1. * new_h / h padded_img = torch.zeros((1, 3, new_h, new_w)).float() padded_img[:, :, :h, :w] = img prediction = model(padded_img.cuda())[0] prediction = prediction.resize( (origin_w * ratio_w, origin_h * ratio_h)) hboxes = prediction.bbox.cpu() rboxes = prediction.get_field("rboxes").cpu() ratios = prediction.get_field("ratios").cpu() scores = prediction.get_field("scores").cpu() # labels = prediction.get_field( "labels" ).cpu() for rbox, ratio, score in zip(rboxes, ratios, scores): print(rbox) print(ratio, score) h_idx = ratios > 0.8 # print(hboxes) h = hboxes[h_idx] hboxes_vtx = torch.stack([ h[:, 0], h[:, 1], h[:, 2], h[:, 1], h[:, 2], h[:, 3], h[:, 0], h[:, 3] ]).permute(1, 0) rboxes[h_idx] = hboxes_vtx # rboxes = rboxes.data.numpy().astype( np.int32 ) rboxes = rboxes.data.numpy() keep = poly_nms( np.hstack([rboxes, scores.cpu().data.numpy()[:, np.newaxis] ]).astype(np.double), 0.1) rboxes = rboxes[keep].astype(np.int32) scores = scores[keep] hboxes = hboxes[keep] keep = np.where(scores > 0.6) rboxes = rboxes[keep] scores = scores[keep].tolist() hboxes = hboxes[keep] # rboxes = list( map( minAreaRect, rboxes ) ) if len(rboxes) > 0: rboxes = np.vstack(rboxes) else: rboxes = np.array(rboxes) # vis( img_info["file_name"], rboxes ) # img = cv2.imread( original_img ) img = np.array(original_img.convert("RGB"))[:, :, ::-1].copy() cv2.polylines(img, rboxes.reshape(-1, 4, 2).astype(np.int32), True, (0, 255, 255), thickness=2, lineType=cv2.LINE_AA) filename = img_path.split("/")[-1] cv2.imwrite("{}/{}".format(res_dir, filename), img)
# logger.info("Using {} GPUs".format(num_gpus)) # logger.info(args) # # logger.info("Collecting env info (might take some time)") # logger.info("\n" + collect_env_info()) # # logger.info("Loaded configuration file {}".format(args.config_file)) # with open(args.config_file, "r") as cf: # config_str = "\n" + cf.read() # logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, )
def train(cfg, local_rank, distributed, use_tensorboard=False): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) # load_scheduler_only_epoch will prefer the scheduler specified in the # config rather than the one in the checkpoint, and will load only the # last_epoch from the checkpoint. extra_checkpoint_data = checkpointer.load( cfg.MODEL.WEIGHT, load_model_only=cfg.MODEL.LOAD_ONLY_WEIGHTS, load_scheduler_only_epoch=True) if not cfg.MODEL.LOAD_ONLY_WEIGHTS: arguments.update(extra_checkpoint_data) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD if use_tensorboard: meters = TensorboardLogger(log_dir=output_dir, exp_name=cfg.TENSORBOARD_EXP_NAME, start_iter=arguments['iteration'], delimiter=" ") else: meters = MetricLogger(delimiter=" ") do_train(model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, meters) return model
def main(cfg_text, cfg_segment): # Load saved LSTM network language_model = build_detection_model(cfg_text) language_model.to(cfg_text.MODEL.DEVICE) output_dir = cfg_text.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg_text, language_model, save_dir=output_dir) _ = checkpointer.load(cfg_text.MODEL.WEIGHT) language_model.eval() # Load saved segmentation network seg_model = SegmentationHelper(cfg_segment) # Split=False is Test set data_loaders = make_data_loader(cfg_text, split=False, is_distributed=False) for dataset_index, data_loader in enumerate(data_loaders): fine_gt = [] seg_iou = [] bbox_iou = [] for index, instance in tqdm( enumerate(data_loader), desc=cfg_text.DATASETS.TEST[dataset_index]): #Group images image_indexes = [x.get_field('img_id')[0] for x in instance[0][2]] unique_indexes, unique_mask, unique_inverse = np.unique( image_indexes, return_index=True, return_inverse=True) with torch.no_grad(): prediction = language_model(instance[0], device=cfg_text.MODEL.DEVICE) segmentation_prediction = seg_model.run_on_image( instance[0][0][unique_mask]) _, pred_ind = prediction[:, -1, :].max(1) for j in range(len(pred_ind)): segs = segmentation_prediction[unique_inverse[j]] label = pred_ind[j] ann_seg = instance[0][2][j].get_field('ann_target')[0] fine_gt.append(ann_seg.get_field('labels').item()) label_mask = segs.get_field('labels') == label if any(label_mask): score, top_ind = segs[label_mask].get_field('scores').max( 0) top_seg = segs[label_mask][top_ind] bbox_iou.append( IOU(top_seg.bbox.tolist()[0], ann_seg.bbox.tolist()[0])) if top_seg.has_field('mask'): top_mask = top_seg.get_field('mask').squeeze() ann_mask = ann_seg.get_field('masks').masks[0].mask seg_iou.append(IOU(top_mask, ann_mask)) else: seg_iou.append(0.0) else: bbox_iou.append(0.0) seg_iou.append(0.0) with open( '{}/{}_baseline_report.txt'.format( cfg_text.OUTPUT_DIR, cfg_text.DATASETS.TEST[dataset_index]), 'w') as f: f.write("Mean Segmentation IOU: {}\n".format(np.mean(seg_iou))) f.write("Mean Bounding Box IOU: {}\n".format(np.mean(bbox_iou))) f.write("\n Class \t Seg IOU \t BBox IOU \t Support") for label in data_loaders[0].dataset.coco.cats.values(): mask = torch.Tensor(fine_gt) == label['id'] seg_iou = torch.Tensor(seg_iou) bbox_iou = torch.Tensor(bbox_iou) f.write("\n{} \t {:.2f} \t {:.2f} \t{:d}".format( label['name'], torch.mean(seg_iou[mask]), torch.mean(bbox_iou[mask]), torch.sum(mask)))
def train(cfg, local_rank, distributed): # original = torch.load('/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/renderpy150000/model_0025000.pth') # # new = {"model": original["model"]} # torch.save(new, '/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/finetune/model_0000000.pth') model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) # extra_checkpoint_data = checkpointer.load('/home/zoey/nas/zoey/github/maskrcnn-benchmark/checkpoints/renderpy150000/model_0025000.pth') arguments.update(extra_checkpoint_data) # print(cfg) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, ) return model
def main(): save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, split=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): predictions = inference( model, data_loader_val, dataset_name=dataset_name, device=cfg.MODEL.DEVICE, output_folder=output_folder, ) synchronize()
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) if use_amp: # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: if use_apex_ddp: model = DDP(model, delay_allreduce=True) else: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader, iters_per_epoch = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD # set the callback function to evaluate and potentially # early exit each epoch if cfg.PER_EPOCH_EVAL: per_iter_callback_fn = functools.partial( mlperf_test_early_exit, iters_per_epoch=iters_per_epoch, tester=functools.partial(test, cfg=cfg), model=model, distributed=distributed, min_bbox_map=cfg.MIN_BBOX_MAP, min_segm_map=cfg.MIN_MASK_MAP) else: per_iter_callback_fn = None do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, use_amp, cfg, per_iter_end_callback_fn=per_iter_callback_fn, ) return model
def test_once(cfg, save_dir, weight_name, distributed): torch.cuda.empty_cache() model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = save_dir checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(weight_name, test=True) iou_types = () # ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if not cfg.TEST.GEN else cfg.DATASETS.GEN if output_dir: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(output_dir, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) results = [] if not cfg.TEST.GEN: for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): result, _ = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, generate_data=cfg.TEST.GEN, visual_num= cfg.TEST.VISUAL_NUM ) # pdb.set_trace() results.append(result) synchronize() return results else: for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, generate_data=cfg.TEST.GEN )
def main(): # torch.cuda.set_device(7) parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default= "/home/SelfDriving/maskrcnn/maskrcnn-benchmark/configs/e2e_faster_rcnn_R_50_C4_1x.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) parser.add_argument( "--get_feature", help="get roi features and save", action='store_true', default=False, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, get_feature=args.get_feature, ) synchronize()
def main(): parser = argparse.ArgumentParser() parser.add_argument('config_file') parser.add_argument('ckpt_file') parser.add_argument('image_dir') parser.add_argument('name_file') parser.add_argument('bbox_file') parser.add_argument('output_dir') parser.add_argument('--layer_name', default='fc7') parser.add_argument('--start_id', type=int, default=0) parser.add_argument('--end_id', type=int, default=None) opts = parser.parse_args() bbox_data = json.load(open(opts.bbox_file)) if not os.path.exists(opts.output_dir): os.makedirs(opts.output_dir) ########### build model ############# # update the config options with the config file cfg.merge_from_file(opts.config_file) # manual override some options cfg.merge_from_list(['MODEL.DEVICE', 'cuda:0']) cfg.freeze() device = torch.device(cfg.MODEL.DEVICE) cpu_device = torch.device("cpu") model = build_detection_model(cfg) model.to(device) model.eval() checkpointer = DetectronCheckpointer(cfg, model) _ = checkpointer.load(f=opts.ckpt_file, use_latest=False) transform_fn = build_transform(cfg) ########### extract feature ############# names = np.load(opts.name_file) if opts.end_id is None: opts.end_id = len(names) total_images = opts.end_id - opts.start_id for i, name in enumerate(names): if i < opts.start_id or i >= opts.end_id: continue outname = name.replace('/', '_') outfile = os.path.join(opts.output_dir, '%s.hdf5'%outname) if os.path.exists(outfile): continue img_file = os.path.join(opts.image_dir, name) # apply pre-processing to image original_image = cv2.imread(img_file) height, width = original_image.shape[:-1] image = transform_fn(original_image) nheight, nwidth = image.size(1), image.size(2) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(device) # compute predictions: one image one mini-batch with torch.no_grad(): # features: tuples in FPN (batch, dim_ft: 256, h, w) features = model.backbone(image_list.tensors) if name in bbox_data: cpu_boxes = bbox_data[name] boxes = torch.FloatTensor(cpu_boxes).to(device) cand_proposals = BoxList(boxes, (width, height), mode='xyxy') cand_proposals = cand_proposals.resize((nwidth, nheight)) bbox_fts, _, _ = model.roi_heads.extract_features(features, [cand_proposals]) bbox_fts = bbox_fts[opts.layer_name].cpu() # save to file with h5py.File(outfile, 'w') as outf: outf.create_dataset(outname, bbox_fts.size(), dtype='float', compression='gzip') outf[outname][...] = bbox_fts.data.numpy() outf[outname].attrs['image_w'] = width outf[outname].attrs['image_h'] = height outf[outname].attrs['boxes'] = np.array(cpu_boxes).astype(np.int32) if i % 1000 == 0: print('name %s shape %s, processing %d/%d (%.2f%% done)'%(name, bbox_fts.shape, i-opts.start_id, total_images, (i-opts.start_id)*100/total_images))
def train(cfg, local_rank, distributed): model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader_src = make_data_loader( cfg, is_source=True, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) data_loader_trg = make_data_loader( cfg, is_source=False, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) test_period = cfg.SOLVER.TEST_PERIOD if test_period > 0: data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True) else: data_loader_val = None checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD do_train( cfg, model, data_loader_src, data_loader_trg, data_loader_val, optimizer, scheduler, checkpointer, device, checkpoint_period, test_period, arguments, ) return model
def train(cfg, local_rank, distributed): # Model logging print_mlperf(key=mlperf_log.INPUT_BATCH_SIZE, value=cfg.SOLVER.IMS_PER_BATCH) print_mlperf(key=mlperf_log.BATCH_SIZE_TEST, value=cfg.TEST.IMS_PER_BATCH) print_mlperf(key=mlperf_log.INPUT_MEAN_SUBTRACTION, value = cfg.INPUT.PIXEL_MEAN) print_mlperf(key=mlperf_log.INPUT_NORMALIZATION_STD, value=cfg.INPUT.PIXEL_STD) print_mlperf(key=mlperf_log.INPUT_RESIZE) print_mlperf(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING) print_mlperf(key=mlperf_log.MIN_IMAGE_SIZE, value=cfg.INPUT.MIN_SIZE_TRAIN) print_mlperf(key=mlperf_log.MAX_IMAGE_SIZE, value=cfg.INPUT.MAX_SIZE_TRAIN) print_mlperf(key=mlperf_log.INPUT_RANDOM_FLIP) print_mlperf(key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5) print_mlperf(key=mlperf_log.FG_IOU_THRESHOLD, value=cfg.MODEL.RPN.FG_IOU_THRESHOLD) print_mlperf(key=mlperf_log.BG_IOU_THRESHOLD, value=cfg.MODEL.RPN.BG_IOU_THRESHOLD) print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TRAIN) print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TEST) print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN) print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST) print_mlperf(key=mlperf_log.ASPECT_RATIOS, value=cfg.MODEL.RPN.ASPECT_RATIOS) print_mlperf(key=mlperf_log.BACKBONE, value=cfg.MODEL.BACKBONE.CONV_BODY) print_mlperf(key=mlperf_log.NMS_THRESHOLD, value=cfg.MODEL.RPN.NMS_THRESH) # /root/ssy/maskrcnn-benchmark/maskrcnn_benchmark/modeling/detector/detectors.py # building bare mode without doing anthing model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) optimizer = make_optimizer(cfg, model) # Optimizer logging print_mlperf(key=mlperf_log.OPT_NAME, value=mlperf_log.SGD_WITH_MOMENTUM) print_mlperf(key=mlperf_log.OPT_LR, value=cfg.SOLVER.BASE_LR) print_mlperf(key=mlperf_log.OPT_MOMENTUM, value=cfg.SOLVER.MOMENTUM) print_mlperf(key=mlperf_log.OPT_WEIGHT_DECAY, value=cfg.SOLVER.WEIGHT_DECAY) scheduler = make_lr_scheduler(cfg, optimizer) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer( cfg, model, optimizer, scheduler, output_dir, save_to_disk ) # no such SAVE_CHECKPOINTS # maskrcnn_benchmark/engine/trainer.py will use save_checkpoints #arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS arguments["save_checkpoints"] = True extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) data_loader, iters_per_epoch = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"] ) print("SSY iters_per_epoch "+str(iters_per_epoch)) #print("SSY iters_per_epoch change to 100 ") #iters_per_epoch = 100 #checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD checkpoint_period = iters_per_epoch #checkpoint_period =100 # set the callback function to evaluate and potentially # early exit each epoch # SSY # I already add PER_EPOCH_EVAL and MIN_BBOX_MAP MIN_SEGM_MAP to ./configs/e2e_mask_rcnn_R_50_FPN_1x.yaml # but it still can not find it # so I manually set them here #if cfg.PER_EPOCH_EVAL: # per_iter_callback_fn = functools.partial( # mlperf_test_early_exit, # iters_per_epoch=iters_per_epoch, # tester=functools.partial(test, cfg=cfg), # model=model, # distributed=distributed, # min_bbox_map=cfg.MLPERF.MIN_BBOX_MAP, # min_segm_map=cfg.MLPERF.MIN_SEGM_MAP) #else: # per_iter_callback_fn = None per_iter_callback_fn = functools.partial( mlperf_test_early_exit, iters_per_epoch=iters_per_epoch, # /root/ssy/maskrcnn-benchmark/maskrcnn_benchmark/engine/tester.py tester=functools.partial(test, cfg=cfg), model=model, distributed=distributed, min_bbox_map=0.377, min_segm_map=0.339) start_train_time = time.time() # /root/ssy/maskrcnn-benchmark/maskrcnn_benchmark/engine/trainer.py do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, per_iter_start_callback_fn=functools.partial(mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch), per_iter_end_callback_fn=per_iter_callback_fn, ) end_train_time = time.time() total_training_time = end_train_time - start_train_time print( "&&&& MLPERF METRIC THROUGHPUT per GPU={:.4f} iterations / s".format((arguments["iteration"] * 1.0) / total_training_time) ) return model
def main(): # apply_prior prior_mask # 0 - - # 1 Y - # 2 - Y # 3 Y Y parser = argparse.ArgumentParser( description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "--ckpt", help= "The path to the checkpoint for test, default is the latest checkpoint.", default=None, ) parser.add_argument( "--dataset_name", help="vcoco_test or vcoco_val_test", default=None, ) parser.add_argument('--num_iteration', dest='num_iteration', help='Specify which weight to load', default=-1, type=int) parser.add_argument('--object_thres', dest='object_thres', help='Object threshold', default=0.1, type=float) # used to be 0.4 or 0.05 parser.add_argument('--human_thres', dest='human_thres', help='Human threshold', default=0.8, type=float) parser.add_argument('--prior_flag', dest='prior_flag', help='whether use prior_flag', default=1, type=int) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 and torch.cuda.is_available() if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() print('prior flag: {}'.format(args.prior_flag)) ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data')) args.config_file = os.path.join(ROOT_DIR, args.config_file) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("DRG", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) # model.to(cfg.MODEL.DEVICE) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) # Initialize mixed-precision if necessary use_mixed_precision = cfg.DTYPE == 'float16' amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) if args.num_iteration != -1: args.ckpt = os.path.join(cfg.OUTPUT_DIR, 'model_%07d.pth' % args.num_iteration) ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt logger.info("Testing checkpoint {}".format(ckpt)) _ = checkpointer.load(ckpt, use_latest=args.ckpt is None) # iou_types = ("bbox",) output_folders = [None] * len(cfg.DATASETS.TEST) # dataset_names = cfg.DATASETS.TEST dataset_names = (args.dataset_name, ) if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): if args.num_iteration != -1: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp", dataset_name, "model_%07d" % args.num_iteration) else: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder opt = {} # opt['word_dim'] = 300 for output_folder, dataset_name in zip(output_folders, dataset_names): data = DatasetCatalog.get(dataset_name) data_args = data["args"] im_dir = data_args['im_dir'] test_detection = pickle.load(open(data_args['test_detection_file'], "rb"), encoding='latin1') prior_mask = pickle.load(open(data_args['prior_mask'], "rb"), encoding='latin1') action_dic = json.load(open(data_args['action_index'])) action_dic_inv = {y: x for x, y in action_dic.items()} vcoco_test_ids = open(data_args['vcoco_test_ids_file'], 'r') test_image_id_list = [int(line.rstrip()) for line in vcoco_test_ids] vcocoeval = VCOCOeval(data_args['vcoco_test_file'], data_args['ann_file'], data_args['vcoco_test_ids_file']) word_embeddings = pickle.load(open(data_args['word_embedding_file'], "rb"), encoding='latin1') output_file = os.path.join(output_folder, 'detection.pkl') output_dict_file = os.path.join( output_folder, 'detection_human_{}_new.pkl'.format(dataset_name)) logger.info("Output will be saved in {}".format(output_file)) logger.info("Start evaluation on {} dataset({} images).".format( dataset_name, len(test_image_id_list))) run_test(model, dataset_name=dataset_name, im_dir=im_dir, test_detection=test_detection, word_embeddings=word_embeddings, test_image_id_list=test_image_id_list, prior_mask=prior_mask, action_dic_inv=action_dic_inv, output_file=output_file, output_dict_file=output_dict_file, object_thres=args.object_thres, human_thres=args.human_thres, prior_flag=args.prior_flag, device=device, cfg=cfg) synchronize() vcocoeval._do_eval(output_file, ovr_thresh=0.5)
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="configs/visual_genome_vqa/e2e_faster_rcnn_X-101-64x4d-FPN_1x_MLP_2048_FPN_512_vqa_single.yaml", metavar="FILE", help="path to config file", ) parser.add_argument( "--save_path", default="/checkpoint/meetshah/features/vqa/pytorch/resnext101_64x4d_mlp_2048_fpn_512/", ) parser.add_argument( "--feat_name", default="fc6", ) parser.add_argument( "--n_groups", default=0, ) parser.add_argument( "--group_id", default=1, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() if not os.path.exists(args.save_path): os.makedirs(args.save_path) num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val ): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, save_path=args.save_path, feat_name=args.feat_name, group_id=args.group_id, n_groups=args.n_groups, ) synchronize()
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.deprecated.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) output_dir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) synchronize()
def train(cfg, local_rank, distributed, d_path=None): MaskDnet = MaskDiscriminator(nc=256) BBoxDnet = BoxDiscriminator(nc=256, ndf=64) Dnet = CombinedDiscriminator(MaskDnet, BBoxDnet) model = Mask_RCNN(cfg) g_rcnn = GAN_RCNN(model, Dnet) device = torch.device(cfg.MODEL.DEVICE) g_rcnn.to(device) g_optimizer = make_optimizer(cfg, model) d_optimizer = make_D_optimizer(cfg, Dnet) g_scheduler = make_lr_scheduler(cfg, g_optimizer) d_scheduler = make_lr_scheduler(cfg, d_optimizer) # model.BoxDnet = BBoxDnet # Initialize mixed-precision training use_mixed_precision = cfg.DTYPE == "float16" amp_opt_level = 'O1' if use_mixed_precision else 'O0' model, g_optimizer = amp.initialize(model, g_optimizer, opt_level=amp_opt_level) Dnet, d_optimizer = amp.initialize(Dnet, d_optimizer, opt_level=amp_opt_level) if distributed: g_rcnn = torch.nn.parallel.DistributedDataParallel( g_rcnn, device_ids=[local_rank], output_device=local_rank, # this should be removed if we update BatchNorm stats broadcast_buffers=False, ) arguments = {} arguments["iteration"] = 0 output_dir = cfg.OUTPUT_DIR save_to_disk = get_rank() == 0 checkpointer = DetectronCheckpointer(cfg, model, g_optimizer, g_scheduler, output_dir, save_to_disk) extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT) arguments.update(extra_checkpoint_data) d_checkpointer = DetectronCheckpointer(cfg, Dnet, d_optimizer, d_scheduler, output_dir, save_to_disk) if d_path: d_checkpointer.load(d_path, use_latest=False) data_loader = make_data_loader( cfg, is_train=True, is_distributed=distributed, start_iter=arguments["iteration"], ) test_period = cfg.SOLVER.TEST_PERIOD data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD ## START TRAINING logger = logging.getLogger("maskrcnn_benchmark.trainer") logger.info("Start training") meters = TensorboardLogger(log_dir=cfg.OUTPUT_DIR + "/tensorboardX", start_iter=arguments['iteration'], delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] g_rcnn.train() start_training_time = time.time() end = time.time() iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) dataset_names = cfg.DATASETS.TEST for iteration, (images, targets, _) in enumerate(data_loader, start_iter): if any(len(target) < 1 for target in targets): logger.error( f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" ) continue data_time = time.time() - end iteration = iteration + 1 arguments["iteration"] = iteration images = images.to(device) targets = [target.to(device) for target in targets] # NOTE g_loss_dict, d_loss_dict = g_rcnn(images, targets) g_losses = sum(loss for loss in g_loss_dict.values()) d_losses = sum(loss for loss in d_loss_dict.values()) # reduce losses over all GPUs for logging purposes g_loss_dict_reduced = reduce_loss_dict(g_loss_dict) g_losses_reduced = sum(loss for loss in g_loss_dict_reduced.values()) d_loss_dict_reduced = reduce_loss_dict(d_loss_dict) d_losses_reduced = sum(loss for loss in d_loss_dict_reduced.values()) meters.update(total_g_loss=g_losses_reduced, **g_loss_dict_reduced) meters.update(total_d_loss=d_losses_reduced, **d_loss_dict_reduced) g_optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(g_losses, g_optimizer) as g_scaled_losses: g_scaled_losses.backward() g_optimizer.step() g_scheduler.step() d_optimizer.zero_grad() # Note: If mixed precision is not used, this ends up doing nothing # Otherwise apply loss scaling for mixed-precision recipe with amp.scale_loss(d_losses, d_optimizer) as d_scaled_losses: d_scaled_losses.backward() d_optimizer.step() d_scheduler.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == max_iter: logger.info( meters.delimiter.join([ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters), lr=g_optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration % checkpoint_period == 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) d_checkpointer.save("dnet_{:07d}".format(iteration), **arguments) if data_loader_val is not None and test_period > 0 and iteration % test_period == 0: meters_val = MetricLogger(delimiter=" ") synchronize() _ = inference( # The result can be used for additional logging, e. g. for TensorBoard model, # The method changes the segmentation mask format in a data loader, # so every time a new data loader is created: make_data_loader(cfg, is_train=False, is_distributed=False, is_for_period=True), dataset_name="[Validation]", iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=cfg.OUTPUT_DIR, ) synchronize() model.train() with torch.no_grad(): # Should be one image for each GPU: for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)): images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum( loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) synchronize() logger.info( meters_val.delimiter.join([ "[Validation]: ", "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ]).format( eta=eta_string, iter=iteration, meters=str(meters_val), lr=g_optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, )) if iteration == max_iter: checkpointer.save("model_final", **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info("Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter)))