def build_model(config, device, strict=True, mode='train'):
    ''' build model and change layers depends on loss type'''
    parameters = dict(width_mult=config.model.width_mult,
                    prob_dropout=config.dropout.prob_dropout,
                    type_dropout=config.dropout.type,
                    mu=config.dropout.mu,
                    sigma=config.dropout.sigma,
                    embeding_dim=config.model.embeding_dim,
                    prob_dropout_linear = config.dropout.classifier,
                    theta=config.conv_cd.theta,
                    multi_heads = config.multi_task_learning)

    if config.model.model_type == 'Mobilenet2':
        model = mobilenetv2(**parameters)

        if config.model.pretrained and mode == "train":
            checkpoint_path = config.model.imagenet_weights
            load_checkpoint(checkpoint_path, model, strict=strict, map_location=device)
        elif mode == 'convert':
            model.forward = model.forward_to_onnx

        if (config.loss.loss_type == 'amsoftmax') and (config.loss.amsoftmax.margin_type != 'cross_entropy'):
            model.spoofer = AngleSimpleLinear(config.model.embeding_dim, 2)
        elif config.loss.loss_type == 'soft_triple':
            model.spoofer = SoftTripleLinear(config.model.embeding_dim, 2,
                                             num_proxies=config.loss.soft_triple.K)
    else:
        assert config.model.model_type == 'Mobilenet3'
        if config.model.model_size == 'large':
            model = mobilenetv3_large(**parameters)

            if config.model.pretrained and mode == "train":
                checkpoint_path = config.model.imagenet_weights
                load_checkpoint(checkpoint_path, model, strict=strict, map_location=device)
            elif mode == 'convert':
                model.forward = model.forward_to_onnx
        else:
            assert config.model.model_size == 'small'
            model = mobilenetv3_small(**parameters)

            if config.model.pretrained and mode == "train":
                checkpoint_path = config.model.imagenet_weights
                load_checkpoint(checkpoint_path, model, strict=strict, map_location=device)
            elif mode == 'convert':
                model.forward = model.forward_to_onnx

        if (config.loss.loss_type == 'amsoftmax') and (config.loss.amsoftmax.margin_type != 'cross_entropy'):
            model.scaling = config.loss.amsoftmax.s
            model.spoofer[3] = AngleSimpleLinear(config.model.embeding_dim, 2)
        elif config.loss.loss_type == 'soft_triple':
            model.scaling = config.loss.soft_triple.s
            model.spoofer[3] = SoftTripleLinear(config.model.embeding_dim, 2, num_proxies=config.loss.soft_triple.K)
    return model
Ejemplo n.º 2
0
def main(args):
    if args.checkpoint == '':
        args.checkpoint = "checkpoints/ctw1500_%s_bs_%d_ep_%d" % (
            args.arch, args.batch_size, args.n_epoch)
    if args.pretrain:
        if 'synth' in args.pretrain:
            args.checkpoint += "_pretrain_synth"
        else:
            args.checkpoint += "_pretrain_ic17"

    print('checkpoint path: %s' % args.checkpoint)
    print('init lr: %.8f' % args.lr)
    print('schedule: ', args.schedule)
    sys.stdout.flush()

    if not os.path.isdir(args.checkpoint):
        os.makedirs(args.checkpoint)

    kernel_num = 7
    min_scale = 0.4
    start_epoch = 0

    data_loader = CTW1500Loader(is_transform=True,
                                img_size=args.img_size,
                                kernel_num=kernel_num,
                                min_scale=min_scale)
    #train_loader = ctw_train_loader(data_loader, batch_size=args.batch_size)

    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=kernel_num)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)

    #resnet18 and 34 didn't inplement pretrained
    elif args.arch == "resnet18":
        model = models.resnet18(pretrained=False, num_classes=kernel_num)
    elif args.arch == "resnet34":
        model = models.resnet34(pretrained=False, num_classes=kernel_num)

    elif args.arch == "mobilenetv2":
        model = models.resnet152(pretrained=True, num_classes=kernel_num)
    elif args.arch == "mobilenetv3large":
        model = models.mobilenetv3_large(pretrained=False,
                                         num_classes=kernel_num)

    elif args.arch == "mobilenetv3small":
        model = models.mobilenetv3_small(pretrained=False,
                                         num_classes=kernel_num)

    optimizer = tf.keras.optimizers.SGD(learning_rate=args.lr,
                                        momentum=0.99,
                                        decay=5e-4)

    title = 'CTW1500'
    if args.pretrain:
        print('Using pretrained model.')
        assert os.path.isfile(
            args.pretrain), 'Error: no checkpoint directory found!'

        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])
    elif args.resume:
        print('Resuming from checkpoint.')

        model.load_weights(args.resume)

        logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                        title=title,
                        resume=True)
    else:
        print('Training from scratch.')
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(
            ['Learning Rate', 'Train Loss', 'Train Acc.', 'Train IOU.'])

    for epoch in range(start_epoch, args.n_epoch):
        optimizer = get_new_optimizer(args, optimizer, epoch)
        print(
            '\nEpoch: [%d | %d] LR: %f' %
            (epoch + 1, args.n_epoch, optimizer.get_config()['learning_rate']))

        train_loader = ctw_train_loader(data_loader,
                                        batch_size=args.batch_size)

        train_loss, train_te_acc, train_ke_acc, train_te_iou, train_ke_iou = train(train_loader, model, dice_loss,\
                                                                                   optimizer, epoch)

        model.save_weights('%s%s' % (args.checkpoint, '/model_tf/weights'))

        logger.append([
            optimizer.get_config()['learning_rate'], train_loss, train_te_acc,
            train_te_iou
        ])
    logger.close()
def main():
    global args, best_prec1
    best_prec1 = 0
    args = parser.parse_args()

    weight_bits = int(args.weight_bits)
    activ_bits = int(args.activ_bits)

    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'log.txt'))
    results_file = os.path.join(save_path, 'results.%s')
    results = ResultsLog(results_file % 'csv', results_file % 'html')

    logging.info("saving to %s", save_path)
    logging.debug("run arguments: %s", args)

    writer = SummaryWriter()

    if 'cuda' in args.type:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        print('Selected GPUs: ', args.gpus)
        # torch.cuda.set_device(args.gpus[0])
        cudnn.benchmark = True
    else:
        args.gpus = None

    # create model
    logging.info("creating model %s", args.model)
    if args.model == 'mobilenet':
        model = models.__dict__[args.model]
        model = model(**model_config)
    elif args.model == 'mobilenetv2':
        model = torch.hub.load('pytorch/vision:v0.6.0',
                               'mobilenet_v2',
                               pretrained=True)
    elif args.model == 'resnet18':
        model = torch.hub.load('pytorch/vision:v0.6.0',
                               'resnet18',
                               pretrained=True)
    else:  #if args.model == 'mobilenet_v3':
        model = models.mobilenetv3_large(
            width_mult=float(args.mobilenet_width))
        model.load_state_dict(
            torch.load(
                "models/mobilenet_v3/mobilenetv3-large-0.75-9632d2a8.pth"))
    nClasses = get_num_classes(args.dataset)
    model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \
                    'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) }

    if args.model_config is not '':
        model_config = dict(model_config, **literal_eval(args.model_config))

    logging.info("created model with configuration: %s", model_config)
    print(model)

    num_parameters = sum([l.nelement() for l in model.parameters()])
    logging.info("number of parameters: %d", num_parameters)

    # Data loading code
    default_transform = {
        'train':
        get_transform(args.dataset, input_size=args.input_size, augment=True),
        'eval':
        get_transform(args.dataset, input_size=args.input_size, augment=False)
    }
    transform = getattr(model, 'input_transform', default_transform)
    regime = getattr(
        model, 'regime', {
            0: {
                'optimizer': args.optimizer,
                'lr': args.lr,
                'momentum': args.momentum,
                'weight_decay': args.weight_decay
            }
        })
    print(transform)
    # define loss function (criterion) and optimizer
    criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)()
    criterion.type(args.type)

    val_data = get_dataset(args.dataset, 'val', transform['eval'])
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    fast_val_loader = torch.utils.data.DataLoader(
        val_data,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=True,
        sampler=torch.utils.data.RandomSampler(val_data,
                                               replacement=True,
                                               num_samples=1000))

    train_data = get_dataset(args.dataset, 'train', transform['train'])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    fast_train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=True,
        sampler=torch.utils.data.RandomSampler(val_data,
                                               replacement=True,
                                               num_samples=100000))

    #define optimizer
    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if 'alpha' in key or 'beta' in key:
            params += [{'params': value, 'weight_decay': 1e-4}]
        else:
            params += [{'params': value, 'weight_decay': 1e-5}]

    mixed_prec_dict = None
    if args.mixed_prec_dict is not None:
        mixed_prec_dict = nemo.utils.precision_dict_from_json(
            args.mixed_prec_dict)
        print("Load mixed precision dict from outside")
    elif args.mem_constraint is not '':
        mem_contraints = json.loads(args.mem_constraint)
        print('This is the memory constraint:', mem_contraints)
        if mem_contraints is not None:
            x_test = torch.Tensor(1, 3, 224, 224)
            mixed_prec_dict = memory_driven_quant(model,
                                                  x_test,
                                                  mem_contraints[0],
                                                  mem_contraints[1],
                                                  args.mixed_prec_quant,
                                                  use_sawb=args.use_sawb)

    #multi gpus
    if torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = model.cuda()

    # mobilenet_width = float(args.mobilenet_width)
    # mobilenet_width_s = args.mobilenet_width
    # mobilenet_input = int(args.mobilenet_input)

    if args.resume is None:
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion,
                                                  0, None)
        print("[NEMO] Full-precision model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))

    if args.quantize:

        # transform the model in a NEMO FakeQuantized representation
        model = nemo.transform.quantize_pact(model,
                                             dummy_input=torch.randn(
                                                 (1, 3, 224, 224)).to('cuda'))

        if args.resume is not None:
            checkpoint_file = args.resume
            if os.path.isfile(checkpoint_file):
                logging.info("loading checkpoint '%s'", args.resume)
                checkpoint_loaded = torch.load(checkpoint_file)
                checkpoint = checkpoint_loaded['state_dict']
                model.load_state_dict(checkpoint, strict=True)
                prec_dict = checkpoint_loaded.get('precision')
            else:
                logging.error("no checkpoint found at '%s'", args.resume)
                import sys
                sys.exit(1)

        if args.resume is None:
            print("[NEMO] Model calibration")
            model.change_precision(bits=20)
            model.reset_alpha_weights()

            if args.initial_folding:
                model.fold_bn()
                # use DFQ for weight equalization
                if args.initial_equalization:
                    model.equalize_weights_dfq()
            elif args.initial_equalization:
                model.equalize_weights_lsq(verbose=True)
                model.reset_alpha_weights()


#                model.reset_alpha_weights(use_method='dyn_range', dyn_range_cutoff=0.05, verbose=True)

# calibrate after equalization
            with model.statistics_act():
                val_loss, val_prec1, val_prec5 = validate(
                    val_loader, model, criterion, 0, None)
            model.reset_alpha_act()

            val_loss, val_prec1, val_prec5 = validate(val_loader, model,
                                                      criterion, 0, None)

            print("[NEMO] 20-bit calibrated model: top-1=%.2f top-5=%.2f" %
                  (val_prec1, val_prec5))
            nemo.utils.save_checkpoint(model,
                                       None,
                                       0,
                                       acc=val_prec1,
                                       checkpoint_name='resnet18_calibrated',
                                       checkpoint_suffix=args.suffix)

            model.change_precision(bits=activ_bits)
            model.change_precision(bits=weight_bits, scale_activations=False)

            # init weight clipping parameters to their reset value and disable their gradient
            model.reset_alpha_weights()
            if args.use_sawb:
                model.disable_grad_sawb()
                model.weight_clip_sawb()

            mixed_prec_dict_all = model.export_precision()
            mixed_prec_dict_all['relu']['x_bits'] = 2
            mixed_prec_dict_all['layer1.0.relu']['x_bits'] = 4
            mixed_prec_dict_all['layer3.1.conv1']['W_bits'] = 4
            mixed_prec_dict_all['layer3.1.conv2']['W_bits'] = 4
            mixed_prec_dict_all['layer4.0.conv1']['W_bits'] = 2
            mixed_prec_dict_all['layer4.0.conv2']['W_bits'] = 2
            mixed_prec_dict_all['layer4.1.conv1']['W_bits'] = 2
            mixed_prec_dict_all['layer4.1.conv2']['W_bits'] = 2
            model.change_precision(bits=1, min_prec_dict=mixed_prec_dict_all)

        else:
            print("[NEMO] Not calibrating model, as it is pretrained")
            model.change_precision(bits=1, min_prec_dict=prec_dict)

    optimizer = torch.optim.Adam([
        {
            'params': model.get_nonclip_parameters(),
            'lr': args.lr,
            'weight_decay': 1e-5
        },
        {
            'params': model.get_clip_parameters(),
            'lr': args.lr,
            'weight_decay': 0.001
        },
    ])

    reset_grad_flow(model, __global_ave_grads, __global_max_grads)
    for epoch in range(args.start_epoch, args.epochs):
        #        optimizer = adjust_optimizer(optimizer, epoch, regime)

        # train for one epoch
        train_loss, train_prec1, train_prec5 = train(
            train_loader,
            model,
            criterion,
            epoch,
            optimizer,
            freeze_bn=True if epoch > 0 else False,
            absorb_bn=True if epoch == 0 else False,
            writer=writer)
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion,
                                                  epoch)

        writer.add_scalar('Loss/val', val_loss, epoch * len(train_loader))
        writer.add_scalar('Accuracy/val', val_prec1, epoch * len(train_loader))

        # remember best prec@1 and save checkpoint
        is_best = val_prec1 > best_prec1
        best_prec1 = max(val_prec1, best_prec1)

        #save_model
        if args.save_check:
            nemo.utils.save_checkpoint(
                model,
                optimizer,
                0,
                acc=val_prec1,
                checkpoint_name='resnet18%s_checkpoint' %
                ("_mixed" if mixed_prec_dict is not None else ""),
                checkpoint_suffix=args.suffix)

        if is_best:
            nemo.utils.save_checkpoint(
                model,
                optimizer,
                0,
                acc=val_prec1,
                checkpoint_name='resnet18%s_best' %
                ("_mixed" if mixed_prec_dict is not None else ""),
                checkpoint_suffix=args.suffix)

        logging.info('\n Epoch: {0}\t'
                     'Training Loss {train_loss:.4f} \t'
                     'Training Prec@1 {train_prec1:.3f} \t'
                     'Training Prec@5 {train_prec5:.3f} \t'
                     'Validation Loss {val_loss:.4f} \t'
                     'Validation Prec@1 {val_prec1:.3f} \t'
                     'Validation Prec@5 {val_prec5:.3f} \t'.format(
                         epoch + 1,
                         train_loss=train_loss,
                         val_loss=val_loss,
                         train_prec1=train_prec1,
                         val_prec1=val_prec1,
                         train_prec5=train_prec5,
                         val_prec5=val_prec5))

        results.add(epoch=epoch + 1,
                    train_loss=train_loss,
                    val_loss=val_loss,
                    train_error1=100 - train_prec1,
                    val_error1=100 - val_prec1,
                    train_error5=100 - train_prec5,
                    val_error5=100 - val_prec5)
        results.save()
def main():
    global args, best_prec1
    best_prec1 = 0
    args = parser.parse_args()

    weight_bits = int(args.weight_bits)
    activ_bits = int(args.activ_bits)

    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'log.txt'))
    results_file = os.path.join(save_path, 'results.%s')
    results = ResultsLog(results_file % 'csv', results_file % 'html')

    logging.info("saving to %s", save_path)
    logging.debug("run arguments: %s", args)

    if 'cuda' in args.type:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        print('Selected GPUs: ', args.gpus)
        torch.cuda.set_device(args.gpus[0])
        cudnn.benchmark = True
    else:
        args.gpus = None

    # create model
    logging.info("creating model %s", args.model)
    if args.model == 'mobilenet':
        model = models.__dict__[args.model]
    elif args.model == 'mobilenetv2':
        model = torch.hub.load('pytorch/vision:v0.6.0',
                               'mobilenet_v2',
                               pretrained=True)
    else:  #if args.model == 'mobilenet_v3':
        model = models.mobilenetv3_large(
            width_mult=float(args.mobilenet_width))
        model.load_state_dict(
            torch.load(
                "models/mobilenet_v3/mobilenetv3-large-0.75-9632d2a8.pth"))
    nClasses = get_num_classes(args.dataset)
    model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \
                    'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) }

    if args.model_config is not '':
        model_config = dict(model_config, **literal_eval(args.model_config))

    model = model(**model_config)
    logging.info("created model with configuration: %s", model_config)
    print(model)

    num_parameters = sum([l.nelement() for l in model.parameters()])
    logging.info("number of parameters: %d", num_parameters)

    # Data loading code
    default_transform = {
        'train':
        get_transform(args.dataset, input_size=args.input_size, augment=True),
        'eval':
        get_transform(args.dataset, input_size=args.input_size, augment=False)
    }
    transform = getattr(model, 'input_transform', default_transform)
    regime = getattr(
        model, 'regime', {
            0: {
                'optimizer': args.optimizer,
                'lr': args.lr,
                'momentum': args.momentum,
                'weight_decay': args.weight_decay
            }
        })
    print(transform)
    # define loss function (criterion) and optimizer
    criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)()
    criterion.type(args.type)

    val_data = get_dataset(args.dataset, 'val', transform['eval'])
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    train_data = get_dataset(args.dataset, 'train', transform['train'])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    #define optimizer
    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if 'alpha' in key or 'beta' in key:
            params += [{'params': value, 'weight_decay': 1e-4}]
        else:
            params += [{'params': value, 'weight_decay': 1e-5}]

    mixed_prec_dict = None
    if args.mixed_prec_dict is not None:
        mixed_prec_dict = nemo.utils.precision_dict_from_json(
            args.mixed_prec_dict)
        print("Load mixed precision dict from outside")
    elif args.mem_constraint is not '':
        mem_contraints = json.loads(args.mem_constraint)
        print('This is the memory constraint:', mem_contraints)
        if mem_contraints is not None:
            x_test = torch.Tensor(1, 3, args.mobilenet_input,
                                  args.mobilenet_input)
            mixed_prec_dict = memory_driven_quant(model, x_test,
                                                  mem_contraints[0],
                                                  mem_contraints[1],
                                                  args.mixed_prec_quant)

    #multi gpus
    if args.gpus and len(args.gpus) > 1:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model.type(args.type)

    mobilenet_width = float(args.mobilenet_width)
    mobilenet_width_s = args.mobilenet_width
    mobilenet_input = int(args.mobilenet_input)

    if args.resume is None:
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion,
                                                  0, None)
        print("[NEMO] Full-precision model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))

    if args.quantize:

        # transform the model in a NEMO FakeQuantized representation
        model = nemo.transform.quantize_pact(model,
                                             dummy_input=torch.randn(
                                                 (1, 3, mobilenet_input,
                                                  mobilenet_input)).to('cuda'))
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=1e-5)

        if args.resume is not None:
            checkpoint_file = args.resume
            if os.path.isfile(checkpoint_file):
                logging.info("loading checkpoint '%s'", args.resume)
                checkpoint_loaded = torch.load(checkpoint_file)
                checkpoint = checkpoint_loaded['state_dict']
                model.load_state_dict(checkpoint, strict=True)
                prec_dict = checkpoint_loaded.get('precision')
            else:
                logging.error("no checkpoint found at '%s'", args.resume)
                import sys
                sys.exit(1)

        if args.resume is None:
            print("[NEMO] Model calibration")
            model.change_precision(bits=20)
            model.reset_alpha_weights()

            if args.initial_folding:
                model.fold_bn()
                # use DFQ for weight equalization
                if args.initial_equalization:
                    model.equalize_weights_dfq()
            elif args.initial_equalization:
                model.equalize_weights_lsq(verbose=True)
                model.reset_alpha_weights()
#                model.reset_alpha_weights(use_method='dyn_range', dyn_range_cutoff=0.05, verbose=True)

# calibrate after equalization
            with model.statistics_act():
                val_loss, val_prec1, val_prec5 = validate(
                    val_loader, model, criterion, 0, None)

            # # use this in place of the usual calibration, because PACT_Act's descend from ReLU6 and
            # # the trained weights already assume the presence of a clipping effect
            # # this should be integrated in NEMO by saving the "origin" of the PACT_Act!
            # for i in range(0,27):
            #     model.model[i][3].alpha.data[:] = min(model.model[i][3].alpha.item(), model.model[i][3].max)

            val_loss, val_prec1, val_prec5 = validate(val_loader, model,
                                                      criterion, 0, None)

            print("[NEMO] 20-bit calibrated model: top-1=%.2f top-5=%.2f" %
                  (val_prec1, val_prec5))
            nemo.utils.save_checkpoint(
                model,
                optimizer,
                0,
                acc=val_prec1,
                checkpoint_name='mobilenet_%s_%d_calibrated' %
                (mobilenet_width_s, mobilenet_input),
                checkpoint_suffix=args.suffix)

            model.change_precision(bits=activ_bits)
            model.change_precision(bits=weight_bits, scale_activations=False)
            import IPython
            IPython.embed()

        else:
            print("[NEMO] Not calibrating model, as it is pretrained")
            model.change_precision(bits=1, min_prec_dict=prec_dict)

            ### val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None)
            ### print("[NEMO] pretrained model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5))

        if mixed_prec_dict is not None:
            mixed_prec_dict_all = model.export_precision()
            for k in mixed_prec_dict.keys():
                mixed_prec_dict_all[k] = mixed_prec_dict[k]
            model.change_precision(bits=1, min_prec_dict=mixed_prec_dict_all)

            # freeze and quantize BN parameters
            # nemo.transform.bn_quantizer(model, precision=nemo.precision.Precision(bits=20))
            # model.freeze_bn()
            # model.fold_bn()
            # model.equalize_weights_dfq(verbose=True)
            val_loss, val_prec1, val_prec5 = validate(val_loader, model,
                                                      criterion, 0, None)


#            print("[NEMO] Rounding weights")
#            model.round_weights()

    if args.pure_export:
        model.freeze_bn(reset_stats=True, disable_grad=True)
        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  0,
                                                  None,
                                                  shorten=10)
        print("[NEMO] FQ model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))
        input_bias_dict = {'model.0.0': +1.0, 'model.0.1': +1.0}
        remove_bias_dict = {'model.0.1': 'model.0.2'}
        input_bias = math.floor(1.0 / (2. / 255)) * (2. / 255)
        model.qd_stage(eps_in=2. / 255,
                       add_input_bias_dict=input_bias_dict,
                       remove_bias_dict=remove_bias_dict,
                       int_accurate=True)
        model.model[0][0].value = input_bias
        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  0,
                                                  None,
                                                  input_bias=input_bias,
                                                  eps_in=2. / 255,
                                                  mode='qd',
                                                  shorten=10)
        print("[NEMO] QD model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))
        model.id_stage()
        model.model[0][0].value = input_bias * (255. / 2)
        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  0,
                                                  None,
                                                  input_bias=input_bias,
                                                  eps_in=2. / 255,
                                                  mode='id',
                                                  shorten=10)
        print("[NEMO] ID model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))
        nemo.utils.export_onnx('mobilenet_%s_%d.onnx' %
                               (mobilenet_width_s, mobilenet_input),
                               model,
                               model, (3, mobilenet_input, mobilenet_input),
                               perm=None)
        import sys
        sys.exit(0)

    if args.terminal:
        fqs = copy.deepcopy(model.state_dict())
        model.freeze_bn(reset_stats=True, disable_grad=True)
        bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations(
            model, validate, val_loader, model, criterion, 0, None, shorten=1)

        torch.save({'in': bin_fq['model.0.0'][0]}, "input_fq.pth")

        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion,
                                                  0, None)
        print("[NEMO] FQ model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))

        input_bias_dict = {'model.0.0': +1.0, 'model.0.1': +1.0}
        remove_bias_dict = {'model.0.1': 'model.0.2'}
        input_bias = math.floor(1.0 / (2. / 255)) * (2. / 255)

        model.qd_stage(eps_in=2. / 255,
                       add_input_bias_dict=input_bias_dict,
                       remove_bias_dict=remove_bias_dict,
                       int_accurate=True)

        # fix ConstantPad2d
        model.model[0][0].value = input_bias

        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  0,
                                                  None,
                                                  input_bias=input_bias,
                                                  eps_in=2. / 255,
                                                  mode='qd',
                                                  shorten=50)
        print("[NEMO] QD model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))

        qds = copy.deepcopy(model.state_dict())
        bin_qd, bout_qd, _ = nemo.utils.get_intermediate_activations(
            model,
            validate,
            val_loader,
            model,
            criterion,
            0,
            None,
            input_bias=input_bias,
            eps_in=2. / 255,
            mode='qd',
            shorten=1)

        torch.save({'qds': qds, 'fqs': fqs}, "states.pth")
        torch.save({'in': bin_qd['model.0.0'][0]}, "input_qd.pth")

        diff = collections.OrderedDict()
        for k in bout_fq.keys():
            diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs()

        for i in range(0, 26):
            for j in range(3, 4):
                k = 'model.%d.%d' % (i, j)
                kn = 'model.%d.%d' % (i if j < 3 else i + 1,
                                      j + 1 if j < 3 else 0)
                eps = model.get_eps_at(kn, eps_in=2. / 255)[0]
                print("%s:" % k)
                idx = diff[k] > eps
                n = idx.sum()
                t = (diff[k] > -1e9).sum()
                max_eps = torch.ceil(
                    diff[k].max() /
                    model.get_eps_at('model.%d.0' %
                                     (i + 1), 2. / 255)[0]).item()
                mean_eps = torch.ceil(
                    diff[k][idx].mean() /
                    model.get_eps_at('model.%d.0' %
                                     (i + 1), 2. / 255)[0]).item()
                try:
                    print("  max:   %.3f (%d eps)" %
                          (diff[k].max().item(), max_eps))
                    print("  mean:  %.3f (%d eps) (only diff. elements)" %
                          (diff[k][idx].mean().item(), mean_eps))
                    print("  #diff: %d/%d (%.1f%%)" %
                          (n, t, float(n) / float(t) * 100))
                except ValueError:
                    print("  #diff: 0/%d (0%%)" % (t, ))

        model.id_stage()
        # fix ConstantPad2d
        model.model[0][0].value = input_bias * (255. / 2)

        ids = model.state_dict()
        bin_id, bout_id, _ = nemo.utils.get_intermediate_activations(
            model,
            validate,
            val_loader,
            model,
            criterion,
            0,
            None,
            input_bias=input_bias,
            eps_in=2. / 255,
            mode='id',
            shorten=1)

        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  0,
                                                  None,
                                                  input_bias=input_bias,
                                                  eps_in=2. / 255,
                                                  mode='id',
                                                  shorten=50)
        print("[NEMO] ID model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))

        try:
            os.makedirs("golden")
        except Exception:
            pass

        torch.save({'in': bin_fq['model.0.0'][0]}, "input_id.pth")

        diff = collections.OrderedDict()
        for i in range(0, 26):
            for j in range(3, 4):
                k = 'model.%d.%d' % (i, j)
                kn = 'model.%d.%d' % (i if j < 3 else i + 1,
                                      j + 1 if j < 3 else 0)
                eps = model.get_eps_at(kn, eps_in=2. / 255)[0]
                diff[k] = (bout_id[k] * eps - bout_qd[k]).to('cpu').abs()
                print("%s:" % k)
                idx = diff[k] >= eps
                n = idx.sum()
                t = (diff[k] > -1e9).sum()
                max_eps = torch.ceil(diff[k].max() / eps).item()
                mean_eps = torch.ceil(diff[k][idx].mean() / eps).item()
                try:
                    print("  max:   %.3f (%d eps)" %
                          (diff[k].max().item(), max_eps))
                    print("  mean:  %.3f (%d eps) (only diff. elements)" %
                          (diff[k][idx].mean().item(), mean_eps))
                    print("  #diff: %d/%d (%.1f%%)" %
                          (n, t, float(n) / float(t) * 100))
                except ValueError:
                    print("  #diff: 0/%d (0%%)" % (t, ))
        import IPython
        IPython.embed()

        bidx = 0
        for n, m in model.named_modules():
            try:
                actbuf = bin_id[n][0][bidx].permute((1, 2, 0))
            except RuntimeError:
                actbuf = bin_id[n][0][bidx]
            np.savetxt("golden/golden_input_%s.txt" % n,
                       actbuf.cpu().detach().numpy().flatten(),
                       header="input (shape %s)" % (list(actbuf.shape)),
                       fmt="%.3f",
                       delimiter=',',
                       newline=',\n')
        for n, m in model.named_modules():
            try:
                actbuf = bout_id[n][bidx].permute((1, 2, 0))
            except RuntimeError:
                actbuf = bout_id[n][bidx]
            np.savetxt("golden/golden_%s.txt" % n,
                       actbuf.cpu().detach().numpy().flatten(),
                       header="%s (shape %s)" % (n, list(actbuf.shape)),
                       fmt="%.3f",
                       delimiter=',',
                       newline=',\n')
        nemo.utils.export_onnx("model_int.onnx",
                               model,
                               model, (3, 224, 224),
                               perm=None)

        val_loss, val_prec1, val_prec5 = validate(val_loader,
                                                  model,
                                                  criterion,
                                                  0,
                                                  None,
                                                  input_bias=input_bias,
                                                  eps_in=2. / 255)
        print("[NEMO] ID model: top-1=%.2f top-5=%.2f" %
              (val_prec1, val_prec5))

        import IPython
        IPython.embed()
        import sys
        sys.exit(0)

    for epoch in range(args.start_epoch, args.epochs):
        #        optimizer = adjust_optimizer(optimizer, epoch, regime)

        # train for one epoch
        train_loss, train_prec1, train_prec5 = train(
            train_loader,
            model,
            criterion,
            epoch,
            optimizer,
            freeze_bn=True if epoch > 0 else False,
            absorb_bn=True if epoch == 0 else False)
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion,
                                                  epoch)

        # remember best prec@1 and save checkpoint
        is_best = val_prec1 > best_prec1
        best_prec1 = max(val_prec1, best_prec1)

        #save_model
        if args.save_check:
            nemo.utils.save_checkpoint(
                model,
                optimizer,
                0,
                acc=val_prec1,
                checkpoint_name='mobilenet_%s_%d%s_checkpoint' %
                (mobilenet_width_s, mobilenet_input,
                 "_mixed" if mixed_prec_dict is not None else ""),
                checkpoint_suffix=args.suffix)

        if is_best:
            nemo.utils.save_checkpoint(
                model,
                optimizer,
                0,
                acc=val_prec1,
                checkpoint_name='mobilenet_%s_%d%s_best' %
                (mobilenet_width_s, mobilenet_input,
                 "_mixed" if mixed_prec_dict is not None else ""),
                checkpoint_suffix=args.suffix)

        logging.info('\n Epoch: {0}\t'
                     'Training Loss {train_loss:.4f} \t'
                     'Training Prec@1 {train_prec1:.3f} \t'
                     'Training Prec@5 {train_prec5:.3f} \t'
                     'Validation Loss {val_loss:.4f} \t'
                     'Validation Prec@1 {val_prec1:.3f} \t'
                     'Validation Prec@5 {val_prec5:.3f} \t'.format(
                         epoch + 1,
                         train_loss=train_loss,
                         val_loss=val_loss,
                         train_prec1=train_prec1,
                         val_prec1=val_prec1,
                         train_prec5=train_prec5,
                         val_prec5=val_prec5))

        results.add(epoch=epoch + 1,
                    train_loss=train_loss,
                    val_loss=val_loss,
                    train_error1=100 - train_prec1,
                    val_error1=100 - val_prec1,
                    train_error5=100 - train_prec5,
                    val_error5=100 - val_prec5)
        results.save()