def main_worker(cfg): # create tensorboard and logs if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0: tb_logdir = build_log_dir(cfg) writer = SummaryWriter(log_dir=tb_logdir) else: writer = None cfg.freeze() # create model model = get_model(cfg) model = deploy_model(model, cfg) # create dataset and dataloader test_loader = build_dataloader_test(cfg) eval_path = cfg.CONFIG.LOG.EVAL_DIR if not os.path.exists(eval_path): os.makedirs(eval_path) criterion = nn.CrossEntropyLoss().cuda() file = os.path.join(eval_path, str(cfg.DDP_CONFIG.GPU_WORLD_RANK) + '.txt') test_classification(model, test_loader, criterion, cfg, file) torch.distributed.barrier() if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0: print("Start merging results...") merge(eval_path, cfg) else: print(cfg.DDP_CONFIG.GPU_WORLD_RANK, "Evaluation done!")
def test_get_all_models(): cfg = get_cfg_defaults() names = get_model_list() for name in names: cfg.CONFIG.MODEL.NAME = name net = get_model(cfg) assert isinstance(net, nn.Module), '{}'.format(name)
def main_worker(cfg): # create tensorboard and logs if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0: tb_logdir = build_log_dir(cfg) writer = SummaryWriter(log_dir=tb_logdir) else: writer = None cfg.freeze() # create model model = get_model(cfg) model = deploy_model(model, cfg) # create dataset and dataloader val_loader, _ = build_dataloader_val(cfg) if cfg.CONFIG.MODEL.LOAD: model, _ = load_model(model, cfg, load_fc=True) criterion = nn.CrossEntropyLoss().cuda() # adversarial_classification(model, val_loader, -1, criterion, cfg, writer) validation_classification(model, val_loader, -1, criterion, cfg, writer) if writer is not None: writer.close()
def main_worker(cfg): # create tensorboard and logs if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0: tb_logdir = build_log_dir(cfg) writer = SummaryWriter(log_dir=tb_logdir) else: writer = None cfg.freeze() # create model model = get_model(cfg) model = deploy_model(model, cfg) # create dataset and dataloader train_loader, val_loader, train_sampler, val_sampler, mg_sampler = build_dataloader( cfg) optimizer = torch.optim.SGD(model.parameters(), lr=cfg.CONFIG.TRAIN.LR, momentum=cfg.CONFIG.TRAIN.MOMENTUM, weight_decay=cfg.CONFIG.TRAIN.W_DECAY) if cfg.CONFIG.MODEL.LOAD: model, _ = load_model(model, optimizer, cfg, load_fc=True) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=cfg.CONFIG.TRAIN.LR_MILESTONE, gamma=cfg.CONFIG.TRAIN.STEP) criterion = nn.CrossEntropyLoss().cuda() base_iter = 0 for epoch in range(cfg.CONFIG.TRAIN.EPOCH_NUM): if cfg.DDP_CONFIG.DISTRIBUTED: train_sampler.set_epoch(epoch) base_iter = train_classification(base_iter, model, train_loader, epoch, criterion, optimizer, cfg, writer=writer) scheduler.step() if epoch % cfg.CONFIG.VAL.FREQ == 0 or epoch == cfg.CONFIG.TRAIN.EPOCH_NUM - 1: validation_classification(model, val_loader, epoch, criterion, cfg, writer) if epoch % cfg.CONFIG.LOG.SAVE_FREQ == 0: if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0 or cfg.DDP_CONFIG.DISTRIBUTED == False: save_model(model, optimizer, epoch, cfg) if writer is not None: writer.close()
def _test_model_list(model_list, use_cuda, x, pretrained, num_classes, **kwargs): cfg = get_cfg_defaults() for model in model_list: cfg.CONFIG.MODEL.NAME = model cfg.CONFIG.MODEL.PRETRAINED = pretrained cfg.CONFIG.DATA.NUM_CLASSES = num_classes net = get_model(cfg) if use_cuda: net.cuda() x = x.cuda() net(x)
def main(cfg, save_path): # get model print('Building model for feature extraction') model = get_model(cfg) model.cuda() model.eval() print('Pre-trained model is successfully loaded from the model zoo.') # get data val_dataset = VideoClsDataset( anno_path=cfg.CONFIG.DATA.VAL_ANNO_PATH, data_path=cfg.CONFIG.DATA.VAL_DATA_PATH, mode='validation', use_multigrid=cfg.CONFIG.DATA.MULTIGRID, clip_len=cfg.CONFIG.DATA.CLIP_LEN, frame_sample_rate=cfg.CONFIG.DATA.FRAME_RATE, num_segment=cfg.CONFIG.DATA.NUM_SEGMENT, num_crop=cfg.CONFIG.DATA.NUM_CROP, keep_aspect_ratio=cfg.CONFIG.DATA.KEEP_ASPECT_RATIO, crop_size=cfg.CONFIG.DATA.CROP_SIZE, short_side_size=cfg.CONFIG.DATA.SHORT_SIDE_SIZE, new_height=cfg.CONFIG.DATA.NEW_HEIGHT, new_width=cfg.CONFIG.DATA.NEW_WIDTH) print('Extracting features from %d videos.' % len(val_dataset)) start_time = time.time() for vid, vtuple in enumerate(val_dataset): video_clip, video_label, video_name = vtuple video_clip = torch.unsqueeze(video_clip, dim=0).cuda() with torch.no_grad(): feat = model(video_clip).cpu().numpy() feat_file = '%s_%s_feat.npy' % (cfg.CONFIG.MODEL.NAME, video_name) np.save(os.path.join(save_path, feat_file), feat) if vid > 0 and vid % 10 == 0: print('%04d/%04d is done' % (vid, len(val_dataset))) end_time = time.time() print('Total feature extraction time is %4.2f minutes' % ((end_time - start_time) / 60))
def main_worker(cfg): # create tensorboard and logs if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0: tb_logdir = build_log_dir(cfg) writer = SummaryWriter(log_dir=tb_logdir) else: writer = None cfg.freeze() logger = get_logger(tb_logdir, "trainer", log_file=True) # create model model = get_model(cfg) model = deploy_model(model, cfg) # create dataset and dataloader data_path_dict = create_dataloader_path( cfg.CONFIG.COOT_DATA.DATA_PATH, cfg.CONFIG.COOT_DATA.DATASET_NAME, video_feature_name=cfg.CONFIG.COOT_DATA.FEATURE) train_set, val_set = create_datasets(data_path_dict, cfg, cfg.CONFIG.COOT_DATA.VIDEO_PRELOAD, cfg.CONFIG.COOT_DATA.TEXT_PRELOAD) train_loader, val_loader = create_loaders(train_set, val_set, cfg.CONFIG.TRAIN.BATCH_SIZE, cfg.CONFIG.DATA.NUM_WORKERS) optimizer = RAdam(model.get_params(), lr=cfg.CONFIG.TRAIN.LR, betas=(cfg.CONFIG.TRAIN.MOMENTUM, cfg.CONFIG.TRAIN.ADAM_BETA2), eps=cfg.CONFIG.TRAIN.ADAM_EPS, weight_decay=cfg.CONFIG.TRAIN.W_DECAY) if cfg.CONFIG.MODEL.LOAD: model, _ = load_model(model, optimizer, cfg, load_fc=True) if cfg.CONFIG.TRAIN.LR_POLICY == 'Step': scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=cfg.CONFIG.TRAIN.LR_MILESTONE, gamma=cfg.CONFIG.TRAIN.STEP) elif cfg.CONFIG.TRAIN.LR_POLICY == 'Cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=cfg.CONFIG.TRAIN.EPOCH_NUM - cfg.CONFIG.TRAIN.WARMUP_EPOCHS, eta_min=0, last_epoch=cfg.CONFIG.TRAIN.RESUME_EPOCH) elif cfg.CONFIG.TRAIN.LR_POLICY == 'LR_Warmup': scheduler = ReduceLROnPlateauWarmup(optimizer, cfg.CONFIG.TRAIN.WARMUP_EPOCHS, mode="max", patience=cfg.CONFIG.TRAIN.PATIENCE, cooldown=cfg.CONFIG.TRAIN.COOLDOWN) else: print( 'Learning rate schedule %s is not supported yet. Please use Step or Cosine.' ) criterion_cycleconsistency = CycleConsistencyCootLoss(num_samples=1, use_cuda=True) criterion_alignment = MaxMarginRankingLoss(use_cuda=True) base_iter = 0 det_best_field_best = 0 for epoch in range(cfg.CONFIG.TRAIN.EPOCH_NUM): ## ======== Training step =============== base_iter = train_coot(cfg, base_iter, model, train_loader, epoch, criterion_alignment, criterion_cycleconsistency, optimizer, writer, logger) ## ======= Validation step ================ if epoch % cfg.CONFIG.VAL.FREQ == 0 or epoch == cfg.CONFIG.TRAIN.EPOCH_NUM - 1: vid_metrics, clip_metrics = validate_coot( cfg, model, val_loader, epoch, criterion_alignment, criterion_cycleconsistency, writer, logger, True) # Check if the performance of model is improving logger.info("---------- Validating epoch {} ----------".format(epoch)) c2s_res, s2c_res, clip_best_at_1 = None, None, None if clip_metrics is not None: c2s_res, s2c_res, clip_best_at_1 = clip_metrics # find field which determines is_best det_best_field_current = clip_best_at_1 # check if best is_best = compare_metrics(det_best_field_current, det_best_field_best) if is_best: det_best_field_best = det_best_field_current best_epoch = epoch # step lr scheduler scheduler.step_rop(det_best_field_current, True) logger.info(f"ROP: model improved: {is_best}, " f"value {det_best_field_current:.3f}," f"new LR: {optimizer.param_groups[0]['lr']:5.3e}") if epoch % cfg.CONFIG.LOG.SAVE_FREQ == 0: if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0 or cfg.DDP_CONFIG.DISTRIBUTED == False: model.save_model(optimizer, epoch, cfg) # check if model did not improve for too long term_after = 15 if epoch - best_epoch > term_after: logger.info(f"NO improvements for {term_after} epochs (current " f"{epoch} best {best_epoch}) STOP training.") break if writer is not None: writer.close() if logger is not None: close_logger(logger)
def main_worker(cfg): # create tensorboard and logs if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0: tb_logdir = build_log_dir(cfg) writer = SummaryWriter(log_dir=tb_logdir) else: writer = None cfg.freeze() # create model model = get_model(cfg) model = deploy_model(model, cfg) # create dataset and dataloader train_loader, val_loader, train_sampler, val_sampler, mg_sampler = build_dataloader( cfg) optimizer = torch.optim.SGD(model.parameters(), lr=cfg.CONFIG.TRAIN.LR, momentum=cfg.CONFIG.TRAIN.MOMENTUM, weight_decay=cfg.CONFIG.TRAIN.W_DECAY) if cfg.CONFIG.MODEL.LOAD: model, _ = load_model(model, optimizer, cfg, load_fc=True) if cfg.CONFIG.TRAIN.LR_POLICY == 'Step': scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=cfg.CONFIG.TRAIN.LR_MILESTONE, gamma=cfg.CONFIG.TRAIN.STEP) elif cfg.CONFIG.TRAIN.LR_POLICY == 'Cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=cfg.CONFIG.TRAIN.EPOCH_NUM - cfg.CONFIG.TRAIN.WARMUP_EPOCHS, eta_min=0, last_epoch=cfg.CONFIG.TRAIN.RESUME_EPOCH) else: print( 'Learning rate schedule %s is not supported yet. Please use Step or Cosine.' ) if cfg.CONFIG.TRAIN.USE_WARMUP: scheduler_warmup = GradualWarmupScheduler( optimizer, multiplier=(cfg.CONFIG.TRAIN.WARMUP_END_LR / cfg.CONFIG.TRAIN.LR), total_epoch=cfg.CONFIG.TRAIN.WARMUP_EPOCHS, after_scheduler=scheduler) criterion = nn.CrossEntropyLoss().cuda() base_iter = 0 for epoch in range(cfg.CONFIG.TRAIN.EPOCH_NUM): if cfg.DDP_CONFIG.DISTRIBUTED: train_sampler.set_epoch(epoch) base_iter = train_classification(base_iter, model, train_loader, epoch, criterion, optimizer, cfg, writer=writer) if cfg.CONFIG.TRAIN.USE_WARMUP: scheduler_warmup.step() else: scheduler.step() if cfg.CONFIG.TRAIN.MULTIGRID.USE_LONG_CYCLE: if epoch in cfg.CONFIG.TRAIN.MULTIGRID.LONG_CYCLE_EPOCH: mg_sampler.step_long_cycle() if epoch % cfg.CONFIG.VAL.FREQ == 0 or epoch == cfg.CONFIG.TRAIN.EPOCH_NUM - 1: validation_classification(model, val_loader, epoch, criterion, cfg, writer) if epoch % cfg.CONFIG.LOG.SAVE_FREQ == 0: if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0 or cfg.DDP_CONFIG.DISTRIBUTED == False: save_model(model, optimizer, epoch, cfg) if writer is not None: writer.close()
return args if __name__ == '__main__': args = parse_args() torch.set_grad_enabled(False) if args.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') cfg = get_cfg_defaults(name='directpose') if args.model_name: cfg.CONFIG.MODEL.PRETRAINED = True cfg.CONFIG.MODEL.NAME = args.model_name cfg.CONFIG.MODEL.TVM_MODE = True net = model_zoo.get_model(cfg) else: assert os.path.isfile(args.config_file) assert os.path.isfile(cfg.CONFIG.MODEL.PRETRAINED_PATH) cfg.merge_from_file(args.config_file) cfg.CONFIG.MODEL.TVM_MODE = True net = model_zoo.directpose_resnet_lpf_fpn(cfg).to(device).eval() load_model = torch.load(cfg.CONFIG.MODEL.PRETRAINED_PATH) net.load_state_dict(load_model, strict=False) images, orig_image = get_image( 'soccer.png', img_url= 'https://github.com/dmlc/web-data/blob/master/gluoncv/pose/soccer.png?raw=true', img_width=args.image_width, img_height=args.image_height) y = net(images.to(device))
'--num-runs', type=int, default=105, help='number of runs to compute average forward timing. default is 105' ) parser.add_argument( '--num-warmup-runs', type=int, default=5, help='number of warmup runs to avoid initial slow speed. default is 5') args = parser.parse_args() cfg = get_cfg_defaults() cfg.merge_from_file(args.config_file) model = get_model(cfg) model.eval() model.cuda() input_tensor = torch.autograd.Variable( torch.rand(1, 3, args.num_frames, args.input_size, args.input_size)).cuda() print('Model is loaded, start forwarding.') with torch.no_grad(): for i in range(args.num_runs): if i == args.num_warmup_runs: start_time = time.time() pred = model(input_tensor) end_time = time.time() total_forward = end_time - start_time
def test_model_zoo_tvm_export_directpose(): from gluoncv.torch.utils.tvm_utils.nms import nms from tvm.contrib.download import download_testdata def get_image(img_name='street_small.jpg', img_url=None, img_width=1280, img_height=736): def get_single_image_input(img_url): img_name = img_url.split("/")[-1] img_path = download_testdata(img_url, img_name, module="data") orig_img = Image.open(img_path) orig_img = orig_img.resize((img_width, img_height), Image.LANCZOS) img = np.array(orig_img)[:, :, (0, 1, 2)].astype('uint8') return img, orig_img, img_path def get_transforms(): tforms = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) return tforms if img_url is None: img_url = f"https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/detection/{img_name}" img, orig_img, img_path = get_single_image_input(img_url) tforms = get_transforms() input_data = tforms(img).unsqueeze(0) return input_data, orig_img images, orig_img = get_image() model_list = ['directpose_resnet50_lpf_fpn_coco'] cfg = get_cfg_defaults(name='directpose') torch.set_grad_enabled(False) use_cuda = True if torch.cuda.device_count() == 0: use_cuda = False if use_cuda: device = torch.device('cuda') target = "cuda" ctx = tvm.gpu(0) else: device = torch.device('cpu') target = "llvm" ctx = tvm.cpu() for model in model_list: cfg.CONFIG.MODEL.PRETRAINED = True cfg.CONFIG.MODEL.NAME = model cfg.CONFIG.MODEL.TVM_MODE = True net = model_zoo.get_model(cfg).to(device).eval() # y = net(images.to(device)) with torch.no_grad(): scripted_model = torch.jit.trace(net.forward, images.to(device)).eval() input_name = "input0" shape_list = [(input_name, images.shape)] mod, params = relay.frontend.from_pytorch(scripted_model, shape_list, {"torchvision::nms": nms}) target_host = "llvm" with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, target_host=target_host, params=params)
if __name__ == '__main__': args = parse_args() torch.set_grad_enabled(False) cfg = get_cfg_defaults(name='directpose') if args.use_gpu: print('Using GPU...') device = torch.device('cuda') else: print('Using CPU...') device = torch.device('cpu') if args.model_name: cfg.CONFIG.MODEL.PRETRAINED = True cfg.CONFIG.MODEL.NAME = args.model_name net = model_zoo.get_model(cfg).to(device).eval() else: assert os.path.isfile(args.config_file) assert os.path.isfile(cfg.CONFIG.MODEL.PRETRAINED_PATH) cfg.merge_from_file(args.config_file) net = model_zoo.directpose_resnet_lpf_fpn(cfg).to(device).eval() load_model = torch.load(cfg.CONFIG.MODEL.PRETRAINED_PATH, map_location=torch.device('cpu')) net.load_state_dict(load_model, strict=False) images, orig_image = get_image( 'soccer.png', img_url= 'https://github.com/dmlc/web-data/blob/master/gluoncv/pose/soccer.png?raw=true', img_width=args.image_width, img_height=args.image_height) with torch.no_grad():