def build_model(config, device, strict=True, mode='train'): ''' build model and change layers depends on loss type''' parameters = dict(width_mult=config.model.width_mult, prob_dropout=config.dropout.prob_dropout, type_dropout=config.dropout.type, mu=config.dropout.mu, sigma=config.dropout.sigma, embeding_dim=config.model.embeding_dim, prob_dropout_linear = config.dropout.classifier, theta=config.conv_cd.theta, multi_heads = config.multi_task_learning) if config.model.model_type == 'Mobilenet2': model = mobilenetv2(**parameters) if config.model.pretrained and mode == "train": checkpoint_path = config.model.imagenet_weights load_checkpoint(checkpoint_path, model, strict=strict, map_location=device) elif mode == 'convert': model.forward = model.forward_to_onnx if (config.loss.loss_type == 'amsoftmax') and (config.loss.amsoftmax.margin_type != 'cross_entropy'): model.spoofer = AngleSimpleLinear(config.model.embeding_dim, 2) elif config.loss.loss_type == 'soft_triple': model.spoofer = SoftTripleLinear(config.model.embeding_dim, 2, num_proxies=config.loss.soft_triple.K) else: assert config.model.model_type == 'Mobilenet3' if config.model.model_size == 'large': model = mobilenetv3_large(**parameters) if config.model.pretrained and mode == "train": checkpoint_path = config.model.imagenet_weights load_checkpoint(checkpoint_path, model, strict=strict, map_location=device) elif mode == 'convert': model.forward = model.forward_to_onnx else: assert config.model.model_size == 'small' model = mobilenetv3_small(**parameters) if config.model.pretrained and mode == "train": checkpoint_path = config.model.imagenet_weights load_checkpoint(checkpoint_path, model, strict=strict, map_location=device) elif mode == 'convert': model.forward = model.forward_to_onnx if (config.loss.loss_type == 'amsoftmax') and (config.loss.amsoftmax.margin_type != 'cross_entropy'): model.scaling = config.loss.amsoftmax.s model.spoofer[3] = AngleSimpleLinear(config.model.embeding_dim, 2) elif config.loss.loss_type == 'soft_triple': model.scaling = config.loss.soft_triple.s model.spoofer[3] = SoftTripleLinear(config.model.embeding_dim, 2, num_proxies=config.loss.soft_triple.K) return model
def main(args): if args.checkpoint == '': args.checkpoint = "checkpoints/ctw1500_%s_bs_%d_ep_%d" % ( args.arch, args.batch_size, args.n_epoch) if args.pretrain: if 'synth' in args.pretrain: args.checkpoint += "_pretrain_synth" else: args.checkpoint += "_pretrain_ic17" print('checkpoint path: %s' % args.checkpoint) print('init lr: %.8f' % args.lr) print('schedule: ', args.schedule) sys.stdout.flush() if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) kernel_num = 7 min_scale = 0.4 start_epoch = 0 data_loader = CTW1500Loader(is_transform=True, img_size=args.img_size, kernel_num=kernel_num, min_scale=min_scale) #train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size) if args.arch == "resnet50": model = models.resnet50(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet101": model = models.resnet101(pretrained=True, num_classes=kernel_num) elif args.arch == "resnet152": model = models.resnet152(pretrained=True, num_classes=kernel_num) #resnet18 and 34 didn't inplement pretrained elif args.arch == "resnet18": model = models.resnet18(pretrained=False, num_classes=kernel_num) elif args.arch == "resnet34": model = models.resnet34(pretrained=False, num_classes=kernel_num) elif args.arch == "mobilenetv2": model = models.resnet152(pretrained=True, num_classes=kernel_num) elif args.arch == "mobilenetv3large": model = models.mobilenetv3_large(pretrained=False, num_classes=kernel_num) elif args.arch == "mobilenetv3small": model = models.mobilenetv3_small(pretrained=False, num_classes=kernel_num) optimizer = tf.keras.optimizers.SGD(learning_rate=args.lr, momentum=0.99, decay=5e-4) title = 'CTW1500' if args.pretrain: print('Using pretrained model.') assert os.path.isfile( args.pretrain), 'Error: no checkpoint directory found!' logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) elif args.resume: print('Resuming from checkpoint.') model.load_weights(args.resume) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print('Training from scratch.') logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names( ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.']) for epoch in range(start_epoch, args.n_epoch): optimizer = get_new_optimizer(args, optimizer, epoch) print( '\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.n_epoch, optimizer.get_config()['learning_rate'])) train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size) train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss,\ optimizer, epoch) model.save_weights('%s%s' % (args.checkpoint, '/model_tf/weights')) logger.append([ optimizer.get_config()['learning_rate'], train_loss, train_te_acc, train_te_iou ]) logger.close()
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() weight_bits = int(args.weight_bits) activ_bits = int(args.activ_bits) if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) writer = SummaryWriter() if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] print('Selected GPUs: ', args.gpus) # torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) if args.model == 'mobilenet': model = models.__dict__[args.model] model = model(**model_config) elif args.model == 'mobilenetv2': model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True) elif args.model == 'resnet18': model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True) else: #if args.model == 'mobilenet_v3': model = models.mobilenetv3_large( width_mult=float(args.mobilenet_width)) model.load_state_dict( torch.load( "models/mobilenet_v3/mobilenetv3-large-0.75-9632d2a8.pth")) nClasses = get_num_classes(args.dataset) model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \ 'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) } if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) logging.info("created model with configuration: %s", model_config) print(model) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr( model, 'regime', { 0: { 'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay } }) print(transform) # define loss function (criterion) and optimizer criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() criterion.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) fast_val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, sampler=torch.utils.data.RandomSampler(val_data, replacement=True, num_samples=1000)) train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) fast_train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, sampler=torch.utils.data.RandomSampler(val_data, replacement=True, num_samples=100000)) #define optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'alpha' in key or 'beta' in key: params += [{'params': value, 'weight_decay': 1e-4}] else: params += [{'params': value, 'weight_decay': 1e-5}] mixed_prec_dict = None if args.mixed_prec_dict is not None: mixed_prec_dict = nemo.utils.precision_dict_from_json( args.mixed_prec_dict) print("Load mixed precision dict from outside") elif args.mem_constraint is not '': mem_contraints = json.loads(args.mem_constraint) print('This is the memory constraint:', mem_contraints) if mem_contraints is not None: x_test = torch.Tensor(1, 3, 224, 224) mixed_prec_dict = memory_driven_quant(model, x_test, mem_contraints[0], mem_contraints[1], args.mixed_prec_quant, use_sawb=args.use_sawb) #multi gpus if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() # mobilenet_width = float(args.mobilenet_width) # mobilenet_width_s = args.mobilenet_width # mobilenet_input = int(args.mobilenet_input) if args.resume is None: val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] Full-precision model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) if args.quantize: # transform the model in a NEMO FakeQuantized representation model = nemo.transform.quantize_pact(model, dummy_input=torch.randn( (1, 3, 224, 224)).to('cuda')) if args.resume is not None: checkpoint_file = args.resume if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint_loaded = torch.load(checkpoint_file) checkpoint = checkpoint_loaded['state_dict'] model.load_state_dict(checkpoint, strict=True) prec_dict = checkpoint_loaded.get('precision') else: logging.error("no checkpoint found at '%s'", args.resume) import sys sys.exit(1) if args.resume is None: print("[NEMO] Model calibration") model.change_precision(bits=20) model.reset_alpha_weights() if args.initial_folding: model.fold_bn() # use DFQ for weight equalization if args.initial_equalization: model.equalize_weights_dfq() elif args.initial_equalization: model.equalize_weights_lsq(verbose=True) model.reset_alpha_weights() # model.reset_alpha_weights(use_method='dyn_range', dyn_range_cutoff=0.05, verbose=True) # calibrate after equalization with model.statistics_act(): val_loss, val_prec1, val_prec5 = validate( val_loader, model, criterion, 0, None) model.reset_alpha_act() val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] 20-bit calibrated model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) nemo.utils.save_checkpoint(model, None, 0, acc=val_prec1, checkpoint_name='resnet18_calibrated', checkpoint_suffix=args.suffix) model.change_precision(bits=activ_bits) model.change_precision(bits=weight_bits, scale_activations=False) # init weight clipping parameters to their reset value and disable their gradient model.reset_alpha_weights() if args.use_sawb: model.disable_grad_sawb() model.weight_clip_sawb() mixed_prec_dict_all = model.export_precision() mixed_prec_dict_all['relu']['x_bits'] = 2 mixed_prec_dict_all['layer1.0.relu']['x_bits'] = 4 mixed_prec_dict_all['layer3.1.conv1']['W_bits'] = 4 mixed_prec_dict_all['layer3.1.conv2']['W_bits'] = 4 mixed_prec_dict_all['layer4.0.conv1']['W_bits'] = 2 mixed_prec_dict_all['layer4.0.conv2']['W_bits'] = 2 mixed_prec_dict_all['layer4.1.conv1']['W_bits'] = 2 mixed_prec_dict_all['layer4.1.conv2']['W_bits'] = 2 model.change_precision(bits=1, min_prec_dict=mixed_prec_dict_all) else: print("[NEMO] Not calibrating model, as it is pretrained") model.change_precision(bits=1, min_prec_dict=prec_dict) optimizer = torch.optim.Adam([ { 'params': model.get_nonclip_parameters(), 'lr': args.lr, 'weight_decay': 1e-5 }, { 'params': model.get_clip_parameters(), 'lr': args.lr, 'weight_decay': 0.001 }, ]) reset_grad_flow(model, __global_ave_grads, __global_max_grads) for epoch in range(args.start_epoch, args.epochs): # optimizer = adjust_optimizer(optimizer, epoch, regime) # train for one epoch train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, epoch, optimizer, freeze_bn=True if epoch > 0 else False, absorb_bn=True if epoch == 0 else False, writer=writer) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, epoch) writer.add_scalar('Loss/val', val_loss, epoch * len(train_loader)) writer.add_scalar('Accuracy/val', val_prec1, epoch * len(train_loader)) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) #save_model if args.save_check: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='resnet18%s_checkpoint' % ("_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) if is_best: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='resnet18%s_best' % ("_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) logging.info('\n Epoch: {0}\t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \t'.format( epoch + 1, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5)) results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5) results.save()
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() weight_bits = int(args.weight_bits) activ_bits = int(args.activ_bits) if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] print('Selected GPUs: ', args.gpus) torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) if args.model == 'mobilenet': model = models.__dict__[args.model] elif args.model == 'mobilenetv2': model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True) else: #if args.model == 'mobilenet_v3': model = models.mobilenetv3_large( width_mult=float(args.mobilenet_width)) model.load_state_dict( torch.load( "models/mobilenet_v3/mobilenetv3-large-0.75-9632d2a8.pth")) nClasses = get_num_classes(args.dataset) model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \ 'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) } if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) model = model(**model_config) logging.info("created model with configuration: %s", model_config) print(model) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr( model, 'regime', { 0: { 'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay } }) print(transform) # define loss function (criterion) and optimizer criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() criterion.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) #define optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'alpha' in key or 'beta' in key: params += [{'params': value, 'weight_decay': 1e-4}] else: params += [{'params': value, 'weight_decay': 1e-5}] mixed_prec_dict = None if args.mixed_prec_dict is not None: mixed_prec_dict = nemo.utils.precision_dict_from_json( args.mixed_prec_dict) print("Load mixed precision dict from outside") elif args.mem_constraint is not '': mem_contraints = json.loads(args.mem_constraint) print('This is the memory constraint:', mem_contraints) if mem_contraints is not None: x_test = torch.Tensor(1, 3, args.mobilenet_input, args.mobilenet_input) mixed_prec_dict = memory_driven_quant(model, x_test, mem_contraints[0], mem_contraints[1], args.mixed_prec_quant) #multi gpus if args.gpus and len(args.gpus) > 1: model = torch.nn.DataParallel(model).cuda() else: model.type(args.type) mobilenet_width = float(args.mobilenet_width) mobilenet_width_s = args.mobilenet_width mobilenet_input = int(args.mobilenet_input) if args.resume is None: val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] Full-precision model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) if args.quantize: # transform the model in a NEMO FakeQuantized representation model = nemo.transform.quantize_pact(model, dummy_input=torch.randn( (1, 3, mobilenet_input, mobilenet_input)).to('cuda')) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) if args.resume is not None: checkpoint_file = args.resume if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint_loaded = torch.load(checkpoint_file) checkpoint = checkpoint_loaded['state_dict'] model.load_state_dict(checkpoint, strict=True) prec_dict = checkpoint_loaded.get('precision') else: logging.error("no checkpoint found at '%s'", args.resume) import sys sys.exit(1) if args.resume is None: print("[NEMO] Model calibration") model.change_precision(bits=20) model.reset_alpha_weights() if args.initial_folding: model.fold_bn() # use DFQ for weight equalization if args.initial_equalization: model.equalize_weights_dfq() elif args.initial_equalization: model.equalize_weights_lsq(verbose=True) model.reset_alpha_weights() # model.reset_alpha_weights(use_method='dyn_range', dyn_range_cutoff=0.05, verbose=True) # calibrate after equalization with model.statistics_act(): val_loss, val_prec1, val_prec5 = validate( val_loader, model, criterion, 0, None) # # use this in place of the usual calibration, because PACT_Act's descend from ReLU6 and # # the trained weights already assume the presence of a clipping effect # # this should be integrated in NEMO by saving the "origin" of the PACT_Act! # for i in range(0,27): # model.model[i][3].alpha.data[:] = min(model.model[i][3].alpha.item(), model.model[i][3].max) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] 20-bit calibrated model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='mobilenet_%s_%d_calibrated' % (mobilenet_width_s, mobilenet_input), checkpoint_suffix=args.suffix) model.change_precision(bits=activ_bits) model.change_precision(bits=weight_bits, scale_activations=False) import IPython IPython.embed() else: print("[NEMO] Not calibrating model, as it is pretrained") model.change_precision(bits=1, min_prec_dict=prec_dict) ### val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) ### print("[NEMO] pretrained model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) if mixed_prec_dict is not None: mixed_prec_dict_all = model.export_precision() for k in mixed_prec_dict.keys(): mixed_prec_dict_all[k] = mixed_prec_dict[k] model.change_precision(bits=1, min_prec_dict=mixed_prec_dict_all) # freeze and quantize BN parameters # nemo.transform.bn_quantizer(model, precision=nemo.precision.Precision(bits=20)) # model.freeze_bn() # model.fold_bn() # model.equalize_weights_dfq(verbose=True) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) # print("[NEMO] Rounding weights") # model.round_weights() if args.pure_export: model.freeze_bn(reset_stats=True, disable_grad=True) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, shorten=10) print("[NEMO] FQ model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) input_bias_dict = {'model.0.0': +1.0, 'model.0.1': +1.0} remove_bias_dict = {'model.0.1': 'model.0.2'} input_bias = math.floor(1.0 / (2. / 255)) * (2. / 255) model.qd_stage(eps_in=2. / 255, add_input_bias_dict=input_bias_dict, remove_bias_dict=remove_bias_dict, int_accurate=True) model.model[0][0].value = input_bias val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='qd', shorten=10) print("[NEMO] QD model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) model.id_stage() model.model[0][0].value = input_bias * (255. / 2) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='id', shorten=10) print("[NEMO] ID model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) nemo.utils.export_onnx('mobilenet_%s_%d.onnx' % (mobilenet_width_s, mobilenet_input), model, model, (3, mobilenet_input, mobilenet_input), perm=None) import sys sys.exit(0) if args.terminal: fqs = copy.deepcopy(model.state_dict()) model.freeze_bn(reset_stats=True, disable_grad=True) bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations( model, validate, val_loader, model, criterion, 0, None, shorten=1) torch.save({'in': bin_fq['model.0.0'][0]}, "input_fq.pth") val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] FQ model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) input_bias_dict = {'model.0.0': +1.0, 'model.0.1': +1.0} remove_bias_dict = {'model.0.1': 'model.0.2'} input_bias = math.floor(1.0 / (2. / 255)) * (2. / 255) model.qd_stage(eps_in=2. / 255, add_input_bias_dict=input_bias_dict, remove_bias_dict=remove_bias_dict, int_accurate=True) # fix ConstantPad2d model.model[0][0].value = input_bias val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='qd', shorten=50) print("[NEMO] QD model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) qds = copy.deepcopy(model.state_dict()) bin_qd, bout_qd, _ = nemo.utils.get_intermediate_activations( model, validate, val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='qd', shorten=1) torch.save({'qds': qds, 'fqs': fqs}, "states.pth") torch.save({'in': bin_qd['model.0.0'][0]}, "input_qd.pth") diff = collections.OrderedDict() for k in bout_fq.keys(): diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs() for i in range(0, 26): for j in range(3, 4): k = 'model.%d.%d' % (i, j) kn = 'model.%d.%d' % (i if j < 3 else i + 1, j + 1 if j < 3 else 0) eps = model.get_eps_at(kn, eps_in=2. / 255)[0] print("%s:" % k) idx = diff[k] > eps n = idx.sum() t = (diff[k] > -1e9).sum() max_eps = torch.ceil( diff[k].max() / model.get_eps_at('model.%d.0' % (i + 1), 2. / 255)[0]).item() mean_eps = torch.ceil( diff[k][idx].mean() / model.get_eps_at('model.%d.0' % (i + 1), 2. / 255)[0]).item() try: print(" max: %.3f (%d eps)" % (diff[k].max().item(), max_eps)) print(" mean: %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps)) print(" #diff: %d/%d (%.1f%%)" % (n, t, float(n) / float(t) * 100)) except ValueError: print(" #diff: 0/%d (0%%)" % (t, )) model.id_stage() # fix ConstantPad2d model.model[0][0].value = input_bias * (255. / 2) ids = model.state_dict() bin_id, bout_id, _ = nemo.utils.get_intermediate_activations( model, validate, val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='id', shorten=1) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='id', shorten=50) print("[NEMO] ID model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) try: os.makedirs("golden") except Exception: pass torch.save({'in': bin_fq['model.0.0'][0]}, "input_id.pth") diff = collections.OrderedDict() for i in range(0, 26): for j in range(3, 4): k = 'model.%d.%d' % (i, j) kn = 'model.%d.%d' % (i if j < 3 else i + 1, j + 1 if j < 3 else 0) eps = model.get_eps_at(kn, eps_in=2. / 255)[0] diff[k] = (bout_id[k] * eps - bout_qd[k]).to('cpu').abs() print("%s:" % k) idx = diff[k] >= eps n = idx.sum() t = (diff[k] > -1e9).sum() max_eps = torch.ceil(diff[k].max() / eps).item() mean_eps = torch.ceil(diff[k][idx].mean() / eps).item() try: print(" max: %.3f (%d eps)" % (diff[k].max().item(), max_eps)) print(" mean: %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps)) print(" #diff: %d/%d (%.1f%%)" % (n, t, float(n) / float(t) * 100)) except ValueError: print(" #diff: 0/%d (0%%)" % (t, )) import IPython IPython.embed() bidx = 0 for n, m in model.named_modules(): try: actbuf = bin_id[n][0][bidx].permute((1, 2, 0)) except RuntimeError: actbuf = bin_id[n][0][bidx] np.savetxt("golden/golden_input_%s.txt" % n, actbuf.cpu().detach().numpy().flatten(), header="input (shape %s)" % (list(actbuf.shape)), fmt="%.3f", delimiter=',', newline=',\n') for n, m in model.named_modules(): try: actbuf = bout_id[n][bidx].permute((1, 2, 0)) except RuntimeError: actbuf = bout_id[n][bidx] np.savetxt("golden/golden_%s.txt" % n, actbuf.cpu().detach().numpy().flatten(), header="%s (shape %s)" % (n, list(actbuf.shape)), fmt="%.3f", delimiter=',', newline=',\n') nemo.utils.export_onnx("model_int.onnx", model, model, (3, 224, 224), perm=None) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255) print("[NEMO] ID model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) import IPython IPython.embed() import sys sys.exit(0) for epoch in range(args.start_epoch, args.epochs): # optimizer = adjust_optimizer(optimizer, epoch, regime) # train for one epoch train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, epoch, optimizer, freeze_bn=True if epoch > 0 else False, absorb_bn=True if epoch == 0 else False) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) #save_model if args.save_check: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='mobilenet_%s_%d%s_checkpoint' % (mobilenet_width_s, mobilenet_input, "_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) if is_best: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='mobilenet_%s_%d%s_best' % (mobilenet_width_s, mobilenet_input, "_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) logging.info('\n Epoch: {0}\t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \t'.format( epoch + 1, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5)) results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5) results.save()