def main(): file = open(cell_file, 'r') js = file.read() r_dict = json.loads(js) file.close() genotypes_dict = {} for layer_idx, genotype in r_dict.items(): genotypes_dict[int(layer_idx)] = gt.from_str(genotype) model_main = ModelTest(genotypes_dict, model_type, res_stem=False, init_channel=init_channels, \ stem_multiplier=stem_multiplier, n_nodes=4, num_classes=n_classes) if 'cifar' in model_type: input_x = (3, 32, 32) elif 'imagenet' in model_type: input_x = (3, 224, 224) else: raise Exception("Not support dataset!") scope(model_main, input_size=input_x)
def main(): args, args_text = _parse_args() if args.local_rank == 0: args.local_rank = local_rank print("rank:{0},word_size:{1},dist_url:{2}".format( local_rank, word_size, dist_url)) if args.model_selection == 470: sta_num = (2, 4, 4, 4, 4) elif args.model_selection == 600: sta_num = (4, 4, 4, 4, 4) else: raise ValueError('Unsupported model selection') # arch_list = name2path(args.path_name, sta_num=sta_num, backbone_only=True)[0] path_dict = name2path_ablation(args.path_name, sta_num=sta_num) arch_list = path_dict['back'] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in [ 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25' ], # stage 2, 56x56 in [ 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25' ], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 model = _gen_childnet(arch_list, arch_def, num_classes=args.num_classes, drop_rate=args.drop, drop_path_rate=args.drop_path, global_pool=args.gp, bn_momentum=args.bn_momentum, bn_eps=args.bn_eps, pool_bn=args.pool_bn, zero_gamma=args.zero_gamma) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None output_dir = '' if args.local_rank == 0: output_base = args.output if args.output else './experiments' exp_name = args.path_name output_dir = get_outdir(output_base, 'retrain', exp_name) logger = get_logger(os.path.join(output_dir, 'retrain.log')) writer = SummaryWriter(os.path.join(output_dir, 'runs')) decreasing = True if eval_metric == 'loss' else False saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing) with open(os.path.join(output_dir, 'config.yaml'), 'w') as f: f.write(args_text) else: writer = None logger = None args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1 and args.local_rank == 0: logger.warning( 'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.' ) args.num_gpu = 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank args.distributed = True if args.distributed: args.num_gpu = 1 args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) # torch.distributed.init_process_group(backend='nccl', init_method='env://') torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.local_rank == 0: if args.distributed: logger.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: logger.info('Training with a single process on %d GPUs.' % args.num_gpu) seed = args.seed torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if args.local_rank == 0: scope(model, input_size=(3, 224, 224)) if os.path.exists(args.initial_checkpoint): load_checkpoint(model, args.initial_checkpoint) if args.local_rank == 0: logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) if args.num_gpu > 1: if args.amp: if args.local_rank == 0: logger.warning( 'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.' ) args.amp = False model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model.cuda() optimizer = create_optimizer(args, model) use_amp = False if has_apex and args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True if args.local_rank == 0: logger.info('NVIDIA APEX {}. ' ' {}.'.format('installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) # optionally resume from a checkpoint resume_state = {} resume_epoch = None if args.resume: resume_state, resume_epoch = resume_checkpoint(model, args.resume) if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: if args.local_rank == 0: logging.info('Restoring Optimizer state from checkpoint') optimizer.load_state_dict(resume_state['optimizer']) if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if args.local_rank == 0: logging.info('Restoring NVIDIA AMP state from checkpoint') amp.load_state_dict(resume_state['amp']) del resume_state model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEma(model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '', resume=args.resume) if args.distributed: if args.sync_bn: assert not args.split_bn try: if has_apex: model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( model) if args.local_rank == 0: logger.info( 'Converted model to use Synchronized BatchNorm.') except Exception as e: if args.local_rank == 0: logger.error( 'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1' ) if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: logger.info( "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP." ) model = DDP(model, device_ids=[args.local_rank ]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP train_dir = os.path.join(args.data, 'train') if not os.path.exists(train_dir) and args.local_rank == 0: logger.error('Training folder does not exist at: {}'.format(train_dir)) exit(1) dataset_train = Dataset(train_dir) eval_dir = os.path.join(args.data, 'val') if not os.path.exists(eval_dir) and args.local_rank == 0: logger.error( 'Validation folder does not exist at: {}'.format(eval_dir)) exit(1) dataset_eval = Dataset(eval_dir) loader_train = create_loader( dataset_train, input_size=data_config['input_size'], batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, re_prob=args.reprob, re_mode=args.remode, re_count=args.recount, re_split=args.resplit, color_jitter=args.color_jitter, auto_augment=args.aa, num_aug_splits=0, interpolation=args.train_interpolation, mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, collate_fn=None, pin_memory=args.pin_mem, ) loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], batch_size=args.validation_batch_size_multiplier * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, crop_pct=data_config['crop_pct'], pin_memory=args.pin_mem, ) if args.smoothing: train_loss_fn = LabelSmoothingCrossEntropy( smoothing=args.smoothing).cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() else: train_loss_fn = nn.CrossEntropyLoss().cuda() validate_loss_fn = train_loss_fn lr_scheduler, num_epochs = create_scheduler(args, optimizer) start_epoch = 0 if args.start_epoch is not None: start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: logger.info('Scheduled epochs: {}'.format(num_epochs)) try: best_record = 0 best_ep = 0 total_epochs = min(args.early_stop_epoch, num_epochs) for epoch in range(start_epoch, total_epochs): if args.distributed: loader_train.sampler.set_epoch(epoch) train_metrics = train_epoch(epoch, model, loader_train, optimizer, train_loss_fn, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, use_amp=use_amp, model_ema=model_ema, logger=logger, writer=writer) if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: logging.info( "Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(epoch, model, loader_eval, validate_loss_fn, args, logger=logger, writer=writer) if model_ema is not None and not args.model_ema_force_cpu: if args.distributed and args.dist_bn in ('broadcast', 'reduce'): distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') ema_eval_metrics = validate(epoch, model_ema.ema, loader_eval, validate_loss_fn, args, log_suffix=' (EMA)', logger=logger, writer=writer) eval_metrics = ema_eval_metrics if lr_scheduler is not None: lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) if saver is not None: # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint( model, optimizer, args, epoch=epoch, model_ema=model_ema, metric=save_metric, use_amp=use_amp) if best_record < eval_metrics[eval_metric]: best_record = eval_metrics[eval_metric] best_ep = epoch if args.local_rank == 0: logger.info('*** Best metric: {0} (epoch {1})'.format( best_record, best_ep)) except KeyboardInterrupt: pass if best_metric is not None: logger.info('*** Best metric: {0} (epoch {1})'.format( best_metric, best_epoch))
input_channel=input_channel) return model def mobilenet_075(num_classes=62, input_channel=3): model = MobileNet(widen_factor=0.75, num_classes=num_classes, input_channel=input_channel) return model def mobilenet_05(num_classes=62, input_channel=3): model = MobileNet(widen_factor=0.5, num_classes=num_classes, input_channel=input_channel) return model def mobilenet_025(num_classes=62, input_channel=3): model = MobileNet(widen_factor=0.25, num_classes=num_classes, input_channel=input_channel) return model if __name__ == "__main__": from torchscope import scope model = mobilenet_1() scope(model, (3, 120, 120))
import torch from torchscope import scope from config import num_classes from models.deeplab import DeepLab if __name__ == "__main__": model = DeepLab(backbone='mobilenet', output_stride=16, num_classes=num_classes) model.eval() input = torch.rand(1, 1, 256, 256) output = model(input) print(output.size()) scope(model, (1, 256, 256)) # model = models.segmentation.deeplabv3_resnet101(pretrained=True, num_classes=num_classes) # model.eval() # input = torch.rand(1, 3, 256, 256) # output = model(input)['out'] # print(output.size()) # scope(model, (3, 256, 256))
self.s = args.margin_s self.cos_m = math.cos(self.m) self.sin_m = math.sin(self.m) self.th = math.cos(math.pi - self.m) self.mm = math.sin(math.pi - self.m) * self.m def forward(self, input, label): x = F.normalize(input) W = F.normalize(self.weight) cosine = F.linear(x, W) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m) if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) one_hot = torch.zeros(cosine.size(), device=device) one_hot.scatter_(1, label.view(-1, 1).long(), 1) output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s return output if __name__ == "__main__": from utils import parse_args args = parse_args() model = resnet152(args) # model = MobileNet(1.0) scope(model, (3, 112, 112))
score = self.bn4(self.relu( self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2)W) score = self.bn5(self.relu( self.deconv5(score))) # size=(N, 64, x.H/2, x.W/2)W) # score=torch.nn.functional.interpolate(score, size=None, scale_factor=2, mode='bilinear', align_corners=None) score = self.classifier(score) # f=time.time() # print('time-',f-start,end-start,f-end) return score # size=(N, n_class, x.H/1, x.W/1) if __name__ == '__main__': from torchscope import scope net = Shuffle_Skip(3, 1, 0.5) scope(net, input_size=(3, 256, 256)) device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 dummy_input = torch.rand(3, 3, 256, 256).float().to(device) net.cuda() net(dummy_input) with torch.no_grad(): start = time.time() result = net(dummy_input) end = time.time() print('time', end - start, 's') start = time.time() result = net(dummy_input) end = time.time()
nn.init.zeros_(m.bias) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.zeros_(m.bias) def mobilenetv3(pretrained=False, **kwargs): model = MobileNetV3(**kwargs) return model if __name__ == '__main__': net = mobilenetv3() print('mobilenetv3:\n', net) print('Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) from torchscope import scope scope(net, (3, 112, 112)) input_size = (1, 3, 112, 112) x = torch.randn(input_size) torch_out = torch.onnx._export(net, x, "test.onnx", verbose=True, input_names=["input0"], output_names=['output0'], example_outputs=True, keep_initializers_as_inputs=True)
from efficientnet import efficientnet_b0, efficientnet_b3 from efficientnet_ex import efficientnet_ex, efficientnet_exx from torchscope import scope import torchvision.models as models # model = models.resnet18() # model = efficientnet_b0() model = efficientnet_ex() # model = efficientnet_exx() # print(model) scope(model, input_size=(3, 32, 32), batch_size=2, device='cpu')
if self.aux: auxout = self.auxlayer(c3) auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True) outputs.append(auxout) return tuple(outputs) def get_efficientnet_seg(dataset='citys', pretrained=False, root='~/.torch/models', pretrained_base=False, **kwargs): acronyms = { 'pascal_voc': 'pascal_voc', 'pascal_aug': 'pascal_aug', 'ade20k': 'ade', 'coco': 'coco', 'citys': 'citys', } from light.data import datasets model = EfficientNetSeg(datasets[dataset].NUM_CLASS, backbone='efficientnet', pretrained_base=pretrained_base, **kwargs) if pretrained: from ..model import get_model_file model.load_state_dict(torch.load(get_model_file('efficientnet_%s_best_model' % (acronyms[dataset]), root=root))) return model if __name__ == '__main__': from torchscope import scope model = get_efficientnet_seg() scope(model, (3, 224, 224))
# create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) net.eval() print('Finished loading model!') scope(net, input_size=(3, 300, 300)) if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() #scope(net, input_size=(3,300,300)) device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu") #print(device) compute_speed(net, (1, 3, 300, 300), device, 1000) detector = Detect(num_classes, 0, cfg) transform = BaseTransform(img_dim, rgb_means, (2, 0, 1)) object_detector = ObjectDetector(net, detector, transform) img_list = os.listdir(args.img_dir)
def main(): args, args_text = _parse_args() seed = args.seed torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if args.local_rank == 0: print("rank:{0},word_size:{1},dist_url:{2}".format( local_rank, word_size, dist_url)) assert args.model_selection in [14, 114, 470, 600, 285, 42] if args.model_selection == 470: arch_list = [[0], [3, 4, 3, 1], [3, 2, 3, 0], [3, 3, 3, 1], [3, 3, 3, 3], [3, 3, 3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in [ 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25' ], # stage 2, 56x56 in [ 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25' ], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r2_k3_s1_e4_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 elif args.model_selection == 42: arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 96 elif args.model_selection == 14: arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e4_c80_se0.25'], # stage 4, 14x14in ['ir_r1_k3_s1_e6_c96_se0.25'], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 64 elif args.model_selection == 112: arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 160 elif args.model_selection == 285: arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 elif args.model_selection == 600: arch_list = [[0], [3, 3, 2, 3, 3], [3, 2, 3, 2, 3], [3, 2, 3, 2, 3], [3, 3, 2, 2, 3, 3], [3, 3, 2, 3, 3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in [ 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25' ], # stage 2, 56x56 in [ 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25' ], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 model = _gen_childnet(arch_list, arch_def, num_classes=args.num_classes, drop_rate=args.drop, drop_path_rate=args.drop_path, global_pool=args.gp, bn_momentum=args.bn_momentum, bn_eps=args.bn_eps, pool_bn=args.pool_bn, zero_gamma=args.zero_gamma) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) if args.local_rank == 0: img_size = args.img_size or 224 scope(model, input_size=(3, img_size, img_size)) eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None output_dir = '' if args.local_rank == 0: output_base = args.output if args.output else './experiments/' exp_name = '-'.join([ args.name, datetime.now().strftime("%Y%m%d-%H%M%S"), args.model, str(data_config['input_size'][-1]) ]) output_dir = get_outdir(output_base, 'test', exp_name) logger = get_logger(os.path.join(output_dir, 'test.log')) writer = SummaryWriter(os.path.join(output_dir, 'runs')) decreasing = True if eval_metric == 'loss' else False if not args.nosave: saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing) with open(os.path.join(output_dir, 'config.yaml'), 'w') as f: f.write(args_text) else: writer = None logger = None args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1 and args.local_rank == 0: logger.warning( 'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.' ) args.num_gpu = 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank args.distributed = True if args.distributed: args.num_gpu = 1 args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.local_rank == 0: if args.distributed: logger.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: logger.info('Training with a single process on %d GPUs.' % args.num_gpu) num_aug_splits = 0 if args.aug_splits > 0: assert args.aug_splits > 1, 'A split of 1 makes no sense' num_aug_splits = args.aug_splits if args.split_bn: assert num_aug_splits > 1 or args.resplit model = convert_splitbn_model(model, max(num_aug_splits, 2)) if os.path.exists(args.initial_checkpoint): load_checkpoint(model, args.initial_checkpoint) if args.local_rank == 0: logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) if args.num_gpu > 1: if args.amp: if args.local_rank == 0: logger.warning( 'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.' ) args.amp = False model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model.cuda() optimizer = create_optimizer(args, model) use_amp = False if has_apex and args.amp: model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True if args.local_rank == 0: logger.info('NVIDIA APEX {}. ' ' {}.'.format('installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) # optionally resume from a checkpoint resume_state = {} resume_epoch = None if args.resume: resume_state, resume_epoch = resume_checkpoint(model, args.resume) if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: if args.local_rank == 0: logging.info('Restoring Optimizer state from checkpoint') optimizer.load_state_dict(resume_state['optimizer']) if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if args.local_rank == 0: logging.info('Restoring NVIDIA AMP state from checkpoint') amp.load_state_dict(resume_state['amp']) del resume_state model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEma(model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '', resume=args.resume) if args.distributed: if args.sync_bn: assert not args.split_bn try: if has_apex: model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( model) if args.local_rank == 0: logger.info( 'Converted model to use Synchronized BatchNorm.') except Exception as e: if args.local_rank == 0: logger.error( 'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1' ) if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: logger.info( "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP." ) model = DDP(model, device_ids=[args.local_rank ]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP eval_dir = os.path.join(args.data, 'val') if not os.path.exists(eval_dir) and args.local_rank == 0: logger.error( 'Validation folder does not exist at: {}'.format(eval_dir)) exit(1) dataset_eval = Dataset(eval_dir) loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], batch_size=args.validation_batch_size_multiplier * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, crop_pct=data_config['crop_pct'], pin_memory=args.pin_mem, ) validate_loss_fn = nn.CrossEntropyLoss().cuda() validate(0, model_ema.ema, loader_eval, validate_loss_fn, args, log_suffix=' (EMA)', logger=logger, writer=writer)
x_features = self.features(self.input_norm(input)) x = x_features.view(x_features.size(0), -1) feature = x / torch.norm(x, p=2, dim=-1, keepdim=True) return feature lfdet = LFDet() #.cuda() lfdes = LFDes() #.cuda() rfdet = RFDet() #.cuda() rfdes = L2Net() #.cuda() l2net = L2Net() #.cuda() print("lfdet") print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) scope(lfdet, input_size=(3, 640, 480)) print("lfdes") print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) scope(lfdes, input_size=(1, 32, 32)) print("rfdet") print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) scope(rfdet, input_size=(1, 640, 480)) print("rfdes") print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) scope(rfdes, input_size=(1, 32, 32))
self.sin_m = math.sin(self.m) self.th = math.cos(math.pi - self.m) self.mm = math.sin(math.pi - self.m) * self.m # self.sin_m * self.m def forward(self, input, label): x = F.normalize(input) W = F.normalize(self.weight) cosine = F.linear(x, W) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) # + epsilon in sqrt phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m) if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) one_hot = torch.zeros(cosine.size(), device=device) one_hot.scatter_(1, label.view(-1, 1).long(), 1) output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s # if random.randint(0, 50) == 10: # print("Weight FC: ", self.weight[0][0]) return output if __name__ == "__main__": from torchscope import scope model = MobileFaceNet() print(model) scope(model, input_size=(3, 112, 112), batch_size=1)
def __init__(self, pretrained=True): super(ResNetRankModel, self).__init__() resnet = models.resnet50(pretrained=True) # Remove linear layer modules = list(resnet.children())[:-1] self.features = nn.Sequential(*modules) self.fc = nn.Linear(2048, 16) self.sigmoid = nn.Sigmoid() def forward(self, input1, input2, input3): e1 = self.predict(input1) e2 = self.predict(input2) e3 = self.predict(input3) d12 = F.pairwise_distance(e1, e2, p=2) d13 = F.pairwise_distance(e1, e3, p=2) d23 = F.pairwise_distance(e2, e3, p=2) return self.sigmoid(d12 - (d13 + d23) / 2) def predict(self, input): x = self.features(input) x = x.view(x.size(0), -1) x = self.fc(x) x = F.normalize(x) return x if __name__ == "__main__": model = ResNetEmotionModel() scope(model, input_size=(3, 224, 224))
cfg = None net = None if args.trained_model is not None: cfg = cfg_plate net = BaseModel(cfg=cfg, phase='test') else: print("Don't support network!") exit(0) net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) from torchscope import scope scope(net, input_size=(3, 480, 850)) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) image_paths = get_image_path(args.image_path) _t = {'pre': Timer(), 'forward_pass': Timer(), 'misc': Timer()} # testing begin for path in image_paths: _t['pre'].tic() # path = "/home/can/AI_Camera/License_Plate/LP_Detection/data/val/images/40000/61539302914508AF4442_B.jpg_out-full_1.jpg" img_raw = cv2.imread(path, cv2.IMREAD_COLOR) h, w, _ = img_raw.shape img_raw = cv2.resize(img_raw, (int(w / 3), int(h / 3))) # cv2.imshow("test111", img_raw) # cv2.waitKey() img = np.float32(img_raw)
self.s = args.margin_s self.cos_m = math.cos(self.m) self.sin_m = math.sin(self.m) self.th = math.cos(math.pi - self.m) self.mm = math.sin(math.pi - self.m) * self.m def forward(self, input, label): x = F.normalize(input) W = F.normalize(self.weight) cosine = F.linear(x, W) sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) phi = cosine * self.cos_m - sine * self.sin_m # cos(theta + m) if self.easy_margin: phi = torch.where(cosine > 0, phi, cosine) else: phi = torch.where(cosine > self.th, phi, cosine - self.mm) one_hot = torch.zeros(cosine.size(), device=device) one_hot.scatter_(1, label.view(-1, 1).long(), 1) output = (one_hot * phi) + ((1.0 - one_hot) * cosine) output *= self.s return output if __name__ == "__main__": from torchscope import scope model = MobileFaceNet() # print(model) scope(model, input_size=(1, 112, 112))
import torch from models.deeplab import DeepLab from torchscope import scope if __name__ == "__main__": model = DeepLab(backbone='mobilenet', output_stride=16, num_classes=1) model.eval() input = torch.rand(1, 4, 320, 320) output = model(input) print(output.size()) scope(model, (4, 320, 320))