def main(): """Execute operation (train, test, time, etc.).""" args = parse_args() mode = args.mode config.load_cfg(args.cfg) cfg.merge_from_list(args.opts) config.assert_cfg() cfg.freeze() if mode == "info": print(builders.get_model()()) print("complexity:", net.complexity(builders.get_model())) elif mode == "train": dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model) elif mode == "test": dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model) elif mode == "time": dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model) elif mode == "scale": cfg.defrost() cx_orig = net.complexity(builders.get_model()) scaler.scale_model() cx_scaled = net.complexity(builders.get_model()) cfg_file = config.dump_cfg() print("Scaled config dumped to:", cfg_file) print("Original model complexity:", cx_orig) print("Scaled model complexity:", cx_scaled)
def main(): config.load_cfg_fom_args("Train a classification model.") config.assert_and_infer_cfg() cfg.freeze() # Perform training dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model)
def build_model(): config.merge_from_file('configs/dds_baselines/effnet/EN-B5_dds_8npu.yaml') cfg.freeze() model = EffNet() checkpoint = torch.load('result/model.pyth') model.load_state_dict(checkpoint["model_state"], False) model.eval() return model
def main(): config.load_cfg_fom_args("Train a classification model.") config.assert_and_infer_cfg() D2Utils.cfg_merge_from_easydict(cfg, global_cfg) cfg.freeze() trainer_module = cfg.get('trainer_module', 'pycls.core.trainer') trainer_module = importlib.import_module(trainer_module) dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer_module.train_model)
def main(): # Load config options config.load_cfg_fom_args("Test a trained classification model.") config.assert_and_infer_cfg() cfg.freeze() # Perform evaluation if cfg.NUM_GPUS > 1: dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=test_model) else: test_model()
def main(): config.load_cfg_fom_args("Train a classification model.") config.assert_and_infer_cfg() cfg.freeze() print("building model {}".format(cfg.MODEL.TYPE)) model = build_model() model.eval() x = torch.randn(1, 3, 224, 224) y = model(x) print(y.shape) model_complex = complexity(model) print(model_complex)
def __init__(self, num_classes=1, ckpt=None): super(Regnet, self).__init__() from pycls.core.config import cfg import pycls.core.config as model_config from pycls.core.builders import build_model model_config.load_cfg_fom_args("Train a cls model") cfg.freeze() model = build_model() if ckpt: model.load_state_dict(torch.load(ckpt)['model_state']) in_features = model.head.fc.in_features fc = nn.Linear(in_features, num_classes) self.model = model self.model.head.fc = fc
def main(): # Parse cmd line args args = parse_args() # Load config options cfg.merge_from_file(args.cfg_file) cfg.merge_from_list(args.opts) assert_cfg() cfg.freeze() # Perform evaluation if cfg.NUM_GPUS > 1: mpu.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=single_proc_test) else: single_proc_test()
def main(): # Load config options config.load_cfg_fom_args("Train a classification model.") config.assert_and_infer_cfg() cfg.freeze() # Ensure that the output dir exists os.makedirs(cfg.OUT_DIR, exist_ok=True) # Save the config config.dump_cfg() # Perform training if cfg.NUM_GPUS > 1: dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=train_model) else: train_model()
def setup_distributed(cfg_state): """ Initialize torch.distributed and set the CUDA device. Expects environment variables to be set as per https://pytorch.org/docs/stable/distributed.html#environment-variable-initialization along with the environ variable "LOCAL_RANK" which is used to set the CUDA device. This is run inside a new process, so the cfg is reset and must be set explicitly. """ cfg.defrost() cfg.update(**cfg_state) cfg.freeze() local_rank = int(os.environ["LOCAL_RANK"]) torch.distributed.init_process_group(backend=cfg.DIST_BACKEND) torch.cuda.set_device(local_rank)
def __init__(self, num_clusters, num_tiles, num_classes, ckpt): super().__init__() from pycls.core.config import cfg import pycls.core.config as model_config from pycls.core.builders import build_model model_config.load_cfg_fom_args("Train a cls model") cfg.freeze() model = build_model() if ckpt: model.load_state_dict(torch.load(ckpt)['model_state']) self.enc = nn.Sequential(model.stem, model.s1, model.s2, model.s3, model.s4, nn.AdaptiveAvgPool2d(output_size=(1, 1)), nn.Flatten(), nn.Dropout(p=0.3)) self.nc = model.head.fc.in_features self.netvlad = NetVLAD(cluster_size=num_clusters, max_frames=num_tiles, feature_size=self.nc, truncate=False) self.fc = nn.Linear(num_clusters * self.nc, num_classes)
def main(): # Parse cmd line args args = parse_args() # Load config options cfg.merge_from_file(args.cfg_file) cfg.merge_from_list(args.opts) assert_and_infer_cfg() cfg.freeze() # Ensure that the output dir exists os.makedirs(cfg.OUT_DIR, exist_ok=True) # Save the config dump_cfg() # Perform training if cfg.NUM_GPUS > 1: mpu.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=single_proc_train) else: single_proc_train()
def convert(): """Sets up a model for training or testing and log the results.""" loc = 'cpu' with g_pathmgr.open("result/model.pyth", "rb") as f: checkpoint = torch.load(f, map_location=loc) print(checkpoint.keys()) config.merge_from_file('configs/dds_baselines/effnet/EN-B5_dds_8npu.yaml') cfg.freeze() model = EffNet() print(model) checkpoint['model_state'] = proc_node_module(checkpoint, 'model_state') model.load_state_dict(checkpoint["model_state"], False) model.eval() input_names = ["actual_input_1"] output_names = ["output1"] dummy_input = torch.randn(32, 3, 456, 456).to(loc) torch.onnx.export(model, dummy_input, "efficientnetB5_npu_16.onnx", input_names=input_names, output_names=output_names, opset_version=11)
def main(): config.load_cfg_fom_args("Compute precise time for a model on 1 GPU.") config.assert_and_infer_cfg() cfg.freeze() dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model)
def main(): config.load_cfg_fom_args("Test a trained classification model.") config.assert_and_infer_cfg() cfg.freeze() test()
def main(): config.load_cfg_fom_args("Compute model and loader timings.") config.assert_and_infer_cfg() cfg.freeze() dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.time_model)
def main(): config.load_cfg_fom_args("Test a trained classification model.") config.assert_and_infer_cfg() cfg.freeze() dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.test_model)
def main(): # load pretrained model checkpoint = torch.load(args.checkpoint_path) try: model_arch = checkpoint['model_name'] patch_size = checkpoint['patch_size'] prime_size = checkpoint['patch_size'] flops = checkpoint['flops'] model_flops = checkpoint['model_flops'] policy_flops = checkpoint['policy_flops'] fc_flops = checkpoint['fc_flops'] anytime_classification = checkpoint['anytime_classification'] budgeted_batch_classification = checkpoint[ 'budgeted_batch_classification'] dynamic_threshold = checkpoint['dynamic_threshold'] maximum_length = len(checkpoint['flops']) except: print( 'Error: \n' 'Please provide essential information' 'for customized models (as we have done ' 'in pre-trained models)!\n' 'At least the following information should be Given: \n' '--model_name: name of the backbone CNNs (e.g., resnet50, densenet121)\n' '--patch_size: size of image patches (i.e., H\' or W\' in the paper)\n' '--flops: a list containing the Multiply-Adds corresponding to each ' 'length of the input sequence during inference') model_configuration = model_configurations[model_arch] if args.eval_mode > 0: # create model if 'resnet' in model_arch: model = resnet.resnet50(pretrained=False) model_prime = resnet.resnet50(pretrained=False) elif 'densenet' in model_arch: model = eval('densenet.' + model_arch)(pretrained=False) model_prime = eval('densenet.' + model_arch)(pretrained=False) elif 'efficientnet' in model_arch: model = create_model(model_arch, pretrained=False, num_classes=1000, drop_rate=0.3, drop_connect_rate=0.2) model_prime = create_model(model_arch, pretrained=False, num_classes=1000, drop_rate=0.3, drop_connect_rate=0.2) elif 'mobilenetv3' in model_arch: model = create_model(model_arch, pretrained=False, num_classes=1000, drop_rate=0.2, drop_connect_rate=0.2) model_prime = create_model(model_arch, pretrained=False, num_classes=1000, drop_rate=0.2, drop_connect_rate=0.2) elif 'regnet' in model_arch: import pycls.core.model_builder as model_builder from pycls.core.config import cfg cfg.merge_from_file(model_configuration['cfg_file']) cfg.freeze() model = model_builder.build_model() model_prime = model_builder.build_model() traindir = args.data_url + 'train/' valdir = args.data_url + 'val/' normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_set = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop( model_configuration['image_size'], interpolation=model_configuration['dataset_interpolation'] ), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ])) train_set_index = torch.randperm(len(train_set)) train_loader = torch.utils.data.DataLoader( train_set, batch_size=256, num_workers=32, pin_memory=False, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_set_index[-200000:])) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize( int(model_configuration['image_size'] / model_configuration['crop_pct']), interpolation=model_configuration['dataset_interpolation'] ), transforms.CenterCrop(model_configuration['image_size']), transforms.ToTensor(), normalize ])), batch_size=256, shuffle=False, num_workers=16, pin_memory=False) state_dim = model_configuration['feature_map_channels'] * math.ceil( patch_size / 32) * math.ceil(patch_size / 32) memory = Memory() policy = ActorCritic(model_configuration['feature_map_channels'], state_dim, model_configuration['policy_hidden_dim'], model_configuration['policy_conv']) fc = Full_layer(model_configuration['feature_num'], model_configuration['fc_hidden_dim'], model_configuration['fc_rnn']) model = nn.DataParallel(model.cuda()) model_prime = nn.DataParallel(model_prime.cuda()) policy = policy.cuda() fc = fc.cuda() model.load_state_dict(checkpoint['model_state_dict']) model_prime.load_state_dict(checkpoint['model_prime_state_dict']) fc.load_state_dict(checkpoint['fc']) policy.load_state_dict(checkpoint['policy']) budgeted_batch_flops_list = [] budgeted_batch_acc_list = [] print('generate logits on test samples...') test_logits, test_targets, anytime_classification = generate_logits( model_prime, model, fc, memory, policy, val_loader, maximum_length, prime_size, patch_size, model_arch) if args.eval_mode == 2: print('generate logits on training samples...') dynamic_threshold = torch.zeros([39, maximum_length]) train_logits, train_targets, _ = generate_logits( model_prime, model, fc, memory, policy, train_loader, maximum_length, prime_size, patch_size, model_arch) for p in range(1, 40): print('inference: {}/40'.format(p)) _p = torch.FloatTensor(1).fill_(p * 1.0 / 20) probs = torch.exp(torch.log(_p) * torch.range(1, maximum_length)) probs /= probs.sum() if args.eval_mode == 2: dynamic_threshold[p - 1] = dynamic_find_threshold( train_logits, train_targets, probs) acc_step, flops_step = dynamic_evaluate(test_logits, test_targets, flops, dynamic_threshold[p - 1]) budgeted_batch_acc_list.append(acc_step) budgeted_batch_flops_list.append(flops_step) budgeted_batch_classification = [ budgeted_batch_flops_list, budgeted_batch_acc_list ] print('model_arch :', model_arch) print('patch_size :', patch_size) print('flops :', flops) print('model_flops :', model_flops) print('policy_flops :', policy_flops) print('fc_flops :', fc_flops) print('anytime_classification :', anytime_classification) print('budgeted_batch_classification :', budgeted_batch_classification)
def main(): if not os.path.isdir(args.work_dirs): mkdir_p(args.work_dirs) record_path = args.work_dirs + '/GF-' + str(args.model_arch) \ + '_patch-size-' + str(args.patch_size) \ + '_T' + str(args.T) \ + '_train-stage' + str(args.train_stage) if not os.path.isdir(record_path): mkdir_p(record_path) record_file = record_path + '/record.txt' # *create model* # model_configuration = model_configurations[args.model_arch] if 'resnet' in args.model_arch: model_arch = 'resnet' model = resnet.resnet50(pretrained=False) model_prime = resnet.resnet50(pretrained=False) elif 'densenet' in args.model_arch: model_arch = 'densenet' model = eval('densenet.' + args.model_arch)(pretrained=False) model_prime = eval('densenet.' + args.model_arch)(pretrained=False) elif 'efficientnet' in args.model_arch: model_arch = 'efficientnet' model = create_model(args.model_arch, pretrained=False, num_classes=1000, drop_rate=0.3, drop_connect_rate=0.2) model_prime = create_model(args.model_arch, pretrained=False, num_classes=1000, drop_rate=0.3, drop_connect_rate=0.2) elif 'mobilenetv3' in args.model_arch: model_arch = 'mobilenetv3' model = create_model(args.model_arch, pretrained=False, num_classes=1000, drop_rate=0.2, drop_connect_rate=0.2) model_prime = create_model(args.model_arch, pretrained=False, num_classes=1000, drop_rate=0.2, drop_connect_rate=0.2) elif 'regnet' in args.model_arch: model_arch = 'regnet' import pycls.core.model_builder as model_builder from pycls.core.config import cfg cfg.merge_from_file(model_configuration['cfg_file']) cfg.freeze() model = model_builder.build_model() model_prime = model_builder.build_model() fc = Full_layer(model_configuration['feature_num'], model_configuration['fc_hidden_dim'], model_configuration['fc_rnn']) if args.train_stage == 1: model.load_state_dict(torch.load(args.model_path)) model_prime.load_state_dict(torch.load(args.model_prime_path)) else: checkpoint = torch.load(args.checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) model_prime.load_state_dict(checkpoint['model_prime_state_dict']) fc.load_state_dict(checkpoint['fc']) train_configuration = train_configurations[model_arch] if args.train_stage != 2: if train_configuration['train_model_prime']: optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': model_prime.parameters()}, {'params': fc.parameters()}], lr=0, # specify in adjust_learning_rate() momentum=train_configuration['momentum'], nesterov=train_configuration['Nesterov'], weight_decay=train_configuration['weight_decay']) else: optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': fc.parameters()}], lr=0, # specify in adjust_learning_rate() momentum=train_configuration['momentum'], nesterov=train_configuration['Nesterov'], weight_decay=train_configuration['weight_decay']) training_epoch_num = train_configuration['epoch_num'] else: optimizer = None training_epoch_num = 15 criterion = nn.CrossEntropyLoss().cuda() model = nn.DataParallel(model.cuda()) model_prime = nn.DataParallel(model_prime.cuda()) fc = fc.cuda() traindir = args.data_url + 'train/' valdir = args.data_url + 'val/' normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_set = datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_set_index = torch.randperm(len(train_set)) train_loader = torch.utils.data.DataLoader(train_set, batch_size=256, num_workers=32, pin_memory=False, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_set_index[:])) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=train_configuration['batch_size'], shuffle=False, num_workers=32, pin_memory=False) if args.train_stage != 1: state_dim = model_configuration['feature_map_channels'] * math.ceil(args.patch_size / 32) * math.ceil(args.patch_size / 32) ppo = PPO(model_configuration['feature_map_channels'], state_dim, model_configuration['policy_hidden_dim'], model_configuration['policy_conv']) if args.train_stage == 3: ppo.policy.load_state_dict(checkpoint['policy']) ppo.policy_old.load_state_dict(checkpoint['policy']) else: ppo = None memory = Memory() if args.resume: resume_ckp = torch.load(args.resume) start_epoch = resume_ckp['epoch'] print('resume from epoch: {}'.format(start_epoch)) model.module.load_state_dict(resume_ckp['model_state_dict']) model_prime.module.load_state_dict(resume_ckp['model_prime_state_dict']) fc.load_state_dict(resume_ckp['fc']) if optimizer: optimizer.load_state_dict(resume_ckp['optimizer']) if ppo: ppo.policy.load_state_dict(resume_ckp['policy']) ppo.policy_old.load_state_dict(resume_ckp['policy']) ppo.optimizer.load_state_dict(resume_ckp['ppo_optimizer']) best_acc = resume_ckp['best_acc'] else: start_epoch = 0 best_acc = 0 for epoch in range(start_epoch, training_epoch_num): if args.train_stage != 2: print('Training Stage: {}, lr:'.format(args.train_stage)) adjust_learning_rate(optimizer, train_configuration, epoch, training_epoch_num, args) else: print('Training Stage: {}, train ppo only'.format(args.train_stage)) train(model_prime, model, fc, memory, ppo, optimizer, train_loader, criterion, args.print_freq, epoch, train_configuration['batch_size'], record_file, train_configuration, args) acc = validate(model_prime, model, fc, memory, ppo, optimizer, val_loader, criterion, args.print_freq, epoch, train_configuration['batch_size'], record_file, train_configuration, args) if acc > best_acc: best_acc = acc is_best = True else: is_best = False save_checkpoint({ 'epoch': epoch + 1, 'model_state_dict': model.module.state_dict(), 'model_prime_state_dict': model_prime.module.state_dict(), 'fc': fc.state_dict(), 'acc': acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict() if optimizer else None, 'ppo_optimizer': ppo.optimizer.state_dict() if ppo else None, 'policy': ppo.policy.state_dict() if ppo else None, }, is_best, checkpoint=record_path)