def train(trained_model, trained_optimizer): train_data_pattern = '../tutorial_dataset/training.tfrecord-?????-of-?????' class_file = '../tutorial_dataset/training.tfrecord.classes' eval_data_pattern = '../tutorial_dataset/eval.tfrecord-?????-of-?????' param = util_model.Param() param.class_num = data.get_num_classes(class_file) train_inks, train_lengths, train_labels = data.load_data( train_data_pattern, data.SessionMode.TRAIN, param.batch_size) eval_inks, eval_lengths, eval_labels = data.load_data( eval_data_pattern, data.SessionMode.PREDICT, param.batch_size * 2) model = util_model.Model(param) saver_model = tf.train.Saver(model.model_variables) saver_optimizer = tf.train.Saver(model.optimizer_variables) with tf.Session() as sess: writer = tf.summary.FileWriter("./nn_logs", sess.graph) tf.summary.scalar('cost', model.cross_entropy) #merged = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) if trained_model and trained_optimizer: saver_model.restore(sess, trained_model) saver_optimizer.restore(sess, trained_optimizer) else: sess.run(tf.global_variables_initializer()) idx = 0 while True: train_vinks, train_vlengths, train_vlabels = sess.run( [train_inks, train_lengths, train_labels]) vloss, vacc, _ = sess.run( [model.cross_entropy, model.accuracy, model.train_op], feed_dict={ model.if_train: True, model.input_inks: train_vinks, model.input_lengths: train_vlengths, model.input_labels: train_vlabels }) print(vloss, vacc) if (idx + 1) % 10 == 0: eval_vinks, eval_vlengths, eval_vlabels = sess.run( [eval_inks, eval_lengths, eval_labels]) vacc = sess.run(model.accuracy, feed_dict={ model.if_train: False, model.input_inks: eval_vinks, model.input_lengths: eval_vlengths, model.input_labels: eval_vlabels }) print('val acc: ', vacc) idx = idx + 1 #writer.add_summary(vsummary, i) writer.close() saver_model.save(sess, 'mdl/model.ckpt') saver_optimizer.save(sess, 'mdl/optimizer.ckpt')
def main(): # parsing the input args = parser.parse_args() # read checkpoint file checkpoint_file = args.ckp_file if checkpoint_file == '': print('No checkpoint file provided!') exit(1) if os.path.isfile(checkpoint_file): checkpoint_loaded = torch.load(checkpoint_file) print('Model pretrained at ', checkpoint_file) else: print('Model pretrainined not Loaded') exit(1) # Load configuration from checkpoint model_name = checkpoint_loaded['model'] model_config = checkpoint_loaded['config'] activ_bits = model_config['activ_bits'] activ_type = model_config['activ_type'] dataset = model_config['dataset'] input_dim = model_config['input_dim'] input_size = input_dim num_classes = model_config['num_classes'] type_quant = model_config['type_quant'] weight_bits = model_config['weight_bits'] width_mult = model_config['width_mult'] additional_config = '' quant_add_config = checkpoint_loaded['add_config'] fold_type = checkpoint_loaded['fold_type'] # Load quantizer & model state quantizer_load = checkpoint_loaded['quantizer'] model_state = checkpoint_loaded['state_dict'] # Create model with same characteristics model = models.__dict__[model_name] nClasses = get_num_classes(dataset) model_config = {'input_size': input_size, 'dataset': dataset, 'num_classes': nClasses, \ 'type_quant': type_quant, 'weight_bits': weight_bits, 'activ_bits': activ_bits,\ 'activ_type': activ_type, 'width_mult': width_mult, 'input_dim': input_size } if additional_config is not '': model_config = dict(model_config, **literal_eval(additional_config)) model = models.__dict__[model_name] model = model(**model_config, pretrained=False) if model is None: print('ERROR: model is none') exit(1) # wrap the model with quantop operator dummy_input = torch.Tensor(1, 3, int(input_dim), int(input_dim)) quantizer = quantization.QuantOp(model, quant_type = type_quant, weight_bits = weight_bits,bias_bits =32, \ batch_fold_delay=0,batch_fold_type=fold_type, act_bits=activ_bits, \ add_config=quant_add_config, dummy_input = dummy_input ) # load features from quantizer_load into quantizer and model state for i, item in enumerate(quantizer_load.param_to_quantize): item2 = quantizer.param_to_quantize[i] item2['w_max_thr'] = item['w_max_thr'] item2['bias_bits'] = item['bias_bits'] if 'w_min_thr' in item2.keys(): item2['w_min_thr'] = item['w_min_thr'] if item2['conv'] is not None: item2['conv'].load_state_dict(item['conv'].state_dict()) if item2['batch_norm'] is not None: item2['batch_norm'].load_state_dict( item['batch_norm'].state_dict()) if item2['act'] is not None: item2['act'].load_state_dict(item['act'].state_dict()) # enable folding of batch norm in to convolutional layers quantizer.batch_fold = True # generate model (with cuda) quantizer.generate_deployment_model() quantizer.deployment_model.cuda() quantizer.deployment_model.eval() # adjust bias and scaling factor for i, item in enumerate(quantizer.param_to_quantize): # clamping bias of last layer if type(item['quant_conv']) is nn.Linear: bias_bits = item['bias_bits'] if item['quant_conv'].bias is not None: item['quant_conv'].bias.data.clamp_(-2**(bias_bits - 1), 2**(bias_bits - 1) - 1) # clamping bias of other layers else: #mult bias rounding if item['quant_act'] is not None: M0 = item['quant_act'].M_ZERO N0 = item['quant_act'].N_ZERO M0_new = np.floor( M0 * 2**(MULTBIAS_BITS - 1)) / 2**(MULTBIAS_BITS - 1) item['quant_act'].M_ZERO = M0_new item['quant_act'].N_ZERO = N0 #bias clamping and extraction of normbias if item['quant_conv'].bias is not None: bias_bits = item['bias_bits'] bias = item['quant_conv'].bias.data bias = bias.clamp_(-2**(bias_bits - 1), 2**(bias_bits - 1) - 1) N0_bias = -max(bias.abs().max().log2().ceil().item() - 7, 0) item['quant_conv'].bias.data = bias.mul( 2**N0_bias).round().clamp(-2**7, (2**7) - 1).div(2**N0_bias) item['quant_conv'].bias.N0_bias = N0_bias # run validation if args.evaluate: import torchvision.transforms as transforms normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) input_eval_transform = transforms.Compose([ transforms.Scale(int(input_size)), transforms.ToTensor(), normalize ]) input_transform = getattr(model, 'input_transform', input_eval_transform) val_data = get_dataset(dataset, 'val', input_transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=False, num_workers=8, pin_memory=True) def forward(data_loader, model, epoch=0, training=False, quantizer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() model.eval() max_i = 1 min_i = -1 bit_i = 8 n_steps = (2**bit_i) - 1 eps = (max_i - min_i) / n_steps for i, (inputs, target) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) input_var = Variable(inputs.cuda(), volatile=not training) target_var = Variable(target) input_var = input_var.clamp(min_i, max_i).div(eps).round() if quantizer is not None: input_var = input_var.mul(eps) quantizer.store_and_quantize(training=False) # compute output output = model(input_var) if type(output) is list: output = output[0] values, indices = output.max(1) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if quantizer is not None: quantizer.restore_real_value() if i % 100 == 0: print('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, top1=top1, top5=top5)) print('Top1: ', top1.avg) return top1.avg, top5.avg forward(val_loader, quantizer.deployment_model.cuda()) # dumping of the input image input_dim = int(input_dim) if args.img_file == '': print('No input image provide. Going to generate a rondom matrix!') x = torch.Tensor(1, 3, int(input_dim), int(input_dim)).cuda().random_(-4, 4) else: image = cv2.imread(args.img_file) scale_dim = 256 off = int((scale_dim - input_dim) / 2) image = cv2.resize(image, (scale_dim, scale_dim)) image = image[off:input_dim + off, off:off + input_dim, :] x = torch.Tensor(image).permute(2, 0, 1).unsqueeze(0).cuda().add(-128) y = quantizer.deployment_model(x) print('This is the inference result: ', y.argmax().item()) #file generation if args.bin_path == '': root_folder = './' else: root_folder = args.bin_path folder_bindump = root_folder + 'binFiles/' if args.dump: os.makedirs(root_folder, exist_ok=True) os.makedirs(folder_bindump, exist_ok=True) def get_4D_act_tensor_to_list_HWC(x): tensor_list = [] n_b, in_ch, in_dim_w, in_dim_h = x.size() for c in range(in_ch): for i in range(in_dim_w): for j in range(in_dim_h): tensor_list.append(int(x[0][c][i][j].item())) return tensor_list def get_4D_wgt_tensor_to_list_HWC(w): tensor_list = [] out_ch, in_ch, ker_w, ker_h = w.size() for co in range(out_ch): for ci in range(in_ch): for i in range(ker_w): for j in range(ker_h): tensor_list.append(int(w[co][ci][i][j].item())) return tensor_list def get_2D_wgt_tensor_to_list_HWC(w): tensor_list = [] out_ch, in_ch = w.size() for co in range(out_ch): for ci in range(in_ch): tensor_list.append(int(w[co][ci].item())) return tensor_list def get_1D_bias_tensor_to_list_HWC(b): tensor_list = [] out_ch, = b.size() for co in range(out_ch): tensor_list.append(int(b[co].item())) return tensor_list def dump_int16_tensor_to_file(file_name, data): newFile = open(file_name, "wb") for item in data: newFile.write(struct.pack('h', item)) # 1byte newFile.close() def dump_int8_tensor_to_file(file_name, data): newFile = open(file_name, "wb") for item in data: newFile.write(struct.pack('b', item)) # 1byte newFile.close() def print_size(model, input, output): global si, so si = input[0].size() so = output[0].size() # graph buffers Layers = [] WeightEdges = [] ActivEdges = [] GraphNodes = [] # dump input data input_tensor = "In" # this must match the input tensor of the network inference function if args.dump: data = get_4D_act_tensor_to_list_HWC(x) dump_int8_tensor_to_file(folder_bindump + 'L0_INPUT.bin', data) txt = 'TCArgInfo("signed char *__restrict", "{}", ARG_SCOPE_ARG, ARG_DIR_IN, 1,AT_MEM_L3_HRAM, 0)'.format( input_tensor) WeightEdges.append(txt) # dump the network layer-by-layer i_l = 0 for i, item in enumerate(quantizer.param_to_quantize): #convolution parameters conv = item['quant_conv'] hook = conv.register_forward_hook(print_size) quantizer.deployment_model(x) hook.remove() input_size = si if item['quant_act'] is not None: M0 = int(item['quant_act'].M_ZERO * 2**(MULTBIAS_BITS - 1)) N0 = MULTBIAS_BITS - 1 - item['quant_act'].N_ZERO if type(conv) in [ models.linear_quantized_modules.Conv2d_SAME, nn.Conv2d ]: out_ch = conv.out_channels in_ch = conv.in_channels if in_ch == conv.groups: is_dw = True else: is_dw = False ker_size = conv.kernel_size ker_stride = conv.stride ker_dilation = conv.dilation if is_dw: num_params = out_ch * ker_size[0] * ker_size[0] else: num_params = out_ch * in_ch * ker_size[0] * ker_size[0] ker_padding = conv.padding #weight parameters file_txt = 'L{}_weight_L3.bin'.format(i_l) if args.dump: data = get_4D_wgt_tensor_to_list_HWC(conv.weight.data) dump_int8_tensor_to_file(folder_bindump + file_txt, data) txt = 'TCArgInfo ("signed char *__restrict", "FL{}", ARG_SCOPE_GLOBAL, ARG_DIR_CONSTIN, 0, AT_MEM_UNDEF, ConstInfo("{}", 1, 1, 1, 0))'.format( \ i_l,folder_bindump+file_txt) WeightEdges.append(txt) #bias parameters N0_bias = int(-conv.bias.N0_bias) bias_int = conv.bias.div(2**N0_bias).round().clamp( -2**7, (2**7) - 1) file_txt = 'L{}_bias_L3.bin'.format(i_l) if args.dump: data = get_1D_bias_tensor_to_list_HWC(bias_int) dump_int8_tensor_to_file(folder_bindump + file_txt, data) txt = 'TCArgInfo ( "signed char * __restrict__", "BL{}", ARG_SCOPE_GLOBAL, ARG_DIR_CONSTIN, 0, AT_MEM_UNDEF, ConstInfo( "{}", 1, 1, 1, 0))'\ .format(i_l,folder_bindump+file_txt) WeightEdges.append(txt) # scaling factor parameters file_txt = 'L{}_M0_L3.bin'.format(i_l) if args.dump: data = [M0 for x in range(out_ch)] dump_int8_tensor_to_file(folder_bindump + file_txt, data) txt = 'TCArgInfo ("signed char *__restrict", "ML{}", ARG_SCOPE_GLOBAL, ARG_DIR_CONSTIN, 0, AT_MEM_UNDEF, ConstInfo("{}", 1, 1, 1, 0))'.format( \ i_l,folder_bindump+file_txt) WeightEdges.append(txt) # add node to the graph NormMul = 7 # fixed Norm = N0 - NormMul NormBias = N0_bias #if ker_size[0]==1 and ker_size[1]==1: # #else: # NormBias = 2*Norm - N0_bias # convolution layer txt = 'CNN_ConvolutionMulBiasPoolReLU("Layer{}", &CtrlH, 1,1,1,1,1,{},{},{},{},{},1,1,1,0,1,{},{},{},{},{},{},{},{},{},{},{}, 1, KOP_NONE, 3,3, 1,1, 2,2, 1, KOP_RELU)'\ .format(i_l,0,0,-NormBias,N0,0,\ in_ch,out_ch,input_size[2],input_size[3],'KOP_CONV_DWDP' if is_dw else 'KOP_CONV_DP', \ ker_size[0], ker_size[1], ker_dilation[0], ker_dilation[1], ker_stride[0], ker_stride[1] ) Layers.append(txt) # temporary tensors output_tensor = "OutL{}".format(i) txt = 'TCArgInfo ("signed char *__restrict", "{}", ARG_SCOPE_LOCAL, ARG_DIR_INOUT, 0, AT_MEM_UNDEF, 0)'\ .format(output_tensor) ActivEdges.append(txt) txt = 'AddNode("Layer{}",Bindings(5,GNodeArg(GNA_IN, "{}", 0),GNodeArg(GNA_IN, "FL{}", 0),GNodeArg(GNA_IN, "BL{}", 0),GNodeArg(GNA_IN, "ML{}", 0),GNodeArg(GNA_OUT, "{}", 0) ))'\ .format(i_l,input_tensor,i_l,i_l,i_l,output_tensor) GraphNodes.append(txt) # input_tensor = output_tensor i_l += 1 elif type(conv) in [nn.Linear]: out_features = conv.out_features in_features = conv.in_features file_txt = 'L{}_weight_L3.bin'.format(i_l) data = get_2D_wgt_tensor_to_list_HWC(conv.weight.data) if args.dump: dump_int8_tensor_to_file(folder_bindump + file_txt, data) txt = 'TCArgInfo ("signed char *__restrict", "FL{}", ARG_SCOPE_GLOBAL, ARG_DIR_CONSTIN, 0, AT_MEM_UNDEF, ConstInfo("{}", 1, 1, 1, 0))'.format( \ i_l,folder_bindump+file_txt) WeightEdges.append(txt) data = get_1D_bias_tensor_to_list_HWC(conv.bias.data) if args.dump: dump_int16_tensor_to_file(folder_bindump + file_txt, data) txt = 'TCArgInfo ( "short int * __restrict__", "BL{}", ARG_SCOPE_GLOBAL, ARG_DIR_CONSTIN, 0, AT_MEM_UNDEF, ConstInfo( "{}", 1, 1, 1, 0))'\ .format(i_l,folder_bindump+file_txt) WeightEdges.append(txt) txt = 'CNN_LinearReLU("Layer{}",&CtrlH, 1,1,2,2,0,0,0,0,1,1,1,1,{},{},KOP_LINEAR, KOP_NONE)'\ .format(i_l,in_features,out_features ) Layers.append(txt) output_tensor = "Out" txt = 'TCArgInfoA("short int *__restrict", "{}", ARG_SCOPE_ARG, ARG_DIR_OUT, AT_MEM_L2, AT_MEM_L2, 0)'\ .format(output_tensor) WeightEdges.append(txt) txt = 'AddNode("Layer{}",Bindings(4,GNodeArg(GNA_IN, "{}", 0),GNodeArg(GNA_IN, "FL{}", 0),GNodeArg(GNA_IN, "BL{}", 0),GNodeArg(GNA_OUT, "{}", 0)))'\ .format(i_l,input_tensor,i_l,i_l,output_tensor) GraphNodes.append(txt) input_tensor = output_tensor i_l += 1 # pooling layer append to the last convolution layer if item['pool'] is not None: pool = item['pool'][0] if type(pool) is nn.AvgPool2d: txt = 'CNN_PoolReLU("Layer{}",&CtrlH, 1,1,0,0, 1,1, {},{}, {},{},KOP_AVGPOOL,{},{}, 1,1,{},{}, 1, KOP_NONE)'\ .format(i_l,input_size[1],input_size[1],input_size[2],input_size[3],pool.kernel_size, pool.kernel_size,\ pool.stride, pool.stride) Layers.append(txt) output_tensor = "OutL{}".format(i_l) txt = 'TCArgInfo ("signed char *__restrict", "{}", ARG_SCOPE_LOCAL, ARG_DIR_INOUT, 0, AT_MEM_UNDEF, 0)'\ .format(output_tensor) ActivEdges.append(txt) txt = 'AddNode("Layer{}",Bindings(2,GNodeArg(GNA_IN, "{}", 0),GNodeArg(GNA_OUT, "{}", 0)))'\ .format(i_l,input_tensor,output_tensor) GraphNodes.append(txt) input_tensor = output_tensor i_l += 1 # print Graph Model f_txt = '' f_txt += 'void {}()'.format(args.network_name) + '{\n' f_txt += '\tCNN_GenControl_T CtrlH;\n' f_txt += '\tCNN_InitGenCtrl(&CtrlH);\n' f_txt += '\tCNN_SetGenCtrl(&CtrlH, "EnableIm2Col", AT_OPT_ON);\n' f_txt += '\tCNN_SetGenCtrl(&CtrlH, "PADTYPE", PAD_BALANCED_RIGHT);\n\n' for item in Layers: f_txt += '\t' + item + ';\n' f_txt += '\n\tCreateGraph("{}",\n'.format(args.network_name) f_txt += '\t\tCArgs({},\n'.format(len(WeightEdges)) for i, item in enumerate(WeightEdges): f_txt += '\t\t\t' + item f_txt += '\n' if i == len(WeightEdges) - 1 else ',\n' f_txt += '\t\t),\n\t\tCArgs({},\n'.format(len(ActivEdges)) for i, item in enumerate(ActivEdges): f_txt += '\t\t\t' + item f_txt += '\n' if i == len(ActivEdges) - 1 else ',\n' f_txt += '\t\t)\n\t);\n' for i, item in enumerate(GraphNodes): f_txt += '\t' + item + ';\n' f_txt += '\tCloseGraph();\n}' #if args.dump: file_name = args.network_name + '.c' newFile = open(file_name, "w") newFile.write(f_txt) newFile.close()
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() weight_bits = int(args.weight_bits) activ_bits = int(args.activ_bits) if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] print('Selected GPUs: ', args.gpus) torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) model = models.__dict__[args.model] nClasses = get_num_classes(args.dataset) model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \ 'type_quant': args.type_quant, 'weight_bits': weight_bits, 'activ_bits': activ_bits,\ 'activ_type': args.activ_type, 'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) } if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) model = model(**model_config) logging.info("created model with configuration: %s", model_config) print(model) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr( model, 'regime', { 0: { 'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay } }) print(transform) # define loss function (criterion) and optimizer criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() criterion.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.quantizer: val_quant_loader = torch.utils.data.DataLoader( val_data, batch_size=32, shuffle=False, num_workers=args.workers, pin_memory=True) train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) #define optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'clip_val' in key: params += [{'params': value, 'weight_decay': 1e-4}] else: params += [{'params': value}] optimizer = torch.optim.SGD(params, lr=0.1) logging.info('training regime: %s', regime) #define quantizer if args.quantizer: if args.mem_constraint is not '': mem_contraints = json.loads(args.mem_constraint) print('This is the memory constraint:', mem_contraints) if mem_contraints is not None: x_test = torch.Tensor(1, 3, args.mobilenet_input, args.mobilenet_input) add_config = memory_driven_quant(model, x_test, mem_contraints[0], mem_contraints[1], args.mixed_prec_quant) if add_config == -1: print('The quantization process failed!') else: add_config = [] else: mem_constraint = None if args.quant_add_config is not '': add_config = json.loads(args.quant_add_config) else: add_config = [] quantizer = quantization.QuantOp(model, args.type_quant, weight_bits, \ batch_fold_type=args.batch_fold_type, batch_fold_delay=args.batch_fold_delay, act_bits=activ_bits, \ add_config = add_config ) quantizer.deployment_model.type(args.type) quantizer.add_params_to_optimizer(optimizer) else: quantizer = None #exit(0) #multi gpus if args.gpus and len(args.gpus) > 1: model = torch.nn.DataParallel(model).cuda() else: model.type(args.type) if args.resume: checkpoint_file = args.resume if os.path.isdir(checkpoint_file): checkpoint_file = os.path.join(checkpoint_file, 'model_best.pth.tar') if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint_loaded = torch.load(checkpoint_file) checkpoint = checkpoint_loaded['state_dict'] model.load_state_dict(checkpoint, strict=False) print('Model pretrained') else: logging.error("no checkpoint found at '%s'", args.resume) if args.quantizer: quantizer.init_parameters() if args.evaluate: # evaluate on validation set if args.quantizer: # evaluate deployment model on validation set quantizer.generate_deployment_model() val_quant_loss, val_quant_prec1, val_quant_prec5 = validate( val_quant_loader, quantizer.deployment_model, criterion, 0, 'deployment') else: val_quant_loss, val_quant_prec1, val_quant_prec5 = 0, 0, 0 val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, quantizer) logging.info('\n This is the results from evaluation only: ' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \t' 'Validation Quant Prec@1 {val_quant_prec1:.3f} \t' 'Validation Quant Prec@5 {val_quant_prec5:.3f} \n'.format( val_prec1=val_prec1, val_prec5=val_prec5, val_quant_prec1=val_quant_prec1, val_quant_prec5=val_quant_prec5)) exit(0) for epoch in range(args.start_epoch, args.epochs): optimizer = adjust_optimizer(optimizer, epoch, regime) # train for one epoch train_loss, train_prec1, train_prec5 = train(train_loader, model, criterion, epoch, optimizer, quantizer) # evaluate on validation set val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, epoch, quantizer) if args.quantizer: # evaluate deployment model on validation set quantizer.generate_deployment_model() val_quant_loss, val_quant_prec1, val_quant_prec5 = validate( val_quant_loader, quantizer.deployment_model, criterion, epoch, 'deployment') else: val_quant_loss, val_quant_prec1, val_quant_prec5 = 0, 0, 0 # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) #save_model if args.save_check: print('Saving Model!! Accuracy : ', best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'model': args.model, 'config': model_config, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'regime': regime, 'quantizer': quantizer, 'add_config': add_config, 'fold_type': args.batch_fold_type }, is_best, path=save_path) logging.info('\n Epoch: {0}\t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \t' 'Validation Quant Prec@1 {val_quant_prec1:.3f} \t' 'Validation Quant Prec@5 {val_quant_prec5:.3f} \n'.format( epoch + 1, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5, val_quant_prec1=val_quant_prec1, val_quant_prec5=val_quant_prec5)) results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5, val_quant_error1=100 - val_quant_prec1, val_quant_error5=100 - val_quant_prec5) results.save()
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() weight_bits = int(args.weight_bits) activ_bits = int(args.activ_bits) if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) writer = SummaryWriter() if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] print('Selected GPUs: ', args.gpus) # torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) if args.model == 'mobilenet': model = models.__dict__[args.model] model = model(**model_config) elif args.model == 'mobilenetv2': model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True) elif args.model == 'resnet18': model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True) else: #if args.model == 'mobilenet_v3': model = models.mobilenetv3_large( width_mult=float(args.mobilenet_width)) model.load_state_dict( torch.load( "models/mobilenet_v3/mobilenetv3-large-0.75-9632d2a8.pth")) nClasses = get_num_classes(args.dataset) model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \ 'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) } if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) logging.info("created model with configuration: %s", model_config) print(model) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr( model, 'regime', { 0: { 'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay } }) print(transform) # define loss function (criterion) and optimizer criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() criterion.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) fast_val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, sampler=torch.utils.data.RandomSampler(val_data, replacement=True, num_samples=1000)) train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) fast_train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, sampler=torch.utils.data.RandomSampler(val_data, replacement=True, num_samples=100000)) #define optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'alpha' in key or 'beta' in key: params += [{'params': value, 'weight_decay': 1e-4}] else: params += [{'params': value, 'weight_decay': 1e-5}] mixed_prec_dict = None if args.mixed_prec_dict is not None: mixed_prec_dict = nemo.utils.precision_dict_from_json( args.mixed_prec_dict) print("Load mixed precision dict from outside") elif args.mem_constraint is not '': mem_contraints = json.loads(args.mem_constraint) print('This is the memory constraint:', mem_contraints) if mem_contraints is not None: x_test = torch.Tensor(1, 3, 224, 224) mixed_prec_dict = memory_driven_quant(model, x_test, mem_contraints[0], mem_contraints[1], args.mixed_prec_quant, use_sawb=args.use_sawb) #multi gpus if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() # mobilenet_width = float(args.mobilenet_width) # mobilenet_width_s = args.mobilenet_width # mobilenet_input = int(args.mobilenet_input) if args.resume is None: val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] Full-precision model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) if args.quantize: # transform the model in a NEMO FakeQuantized representation model = nemo.transform.quantize_pact(model, dummy_input=torch.randn( (1, 3, 224, 224)).to('cuda')) if args.resume is not None: checkpoint_file = args.resume if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint_loaded = torch.load(checkpoint_file) checkpoint = checkpoint_loaded['state_dict'] model.load_state_dict(checkpoint, strict=True) prec_dict = checkpoint_loaded.get('precision') else: logging.error("no checkpoint found at '%s'", args.resume) import sys sys.exit(1) if args.resume is None: print("[NEMO] Model calibration") model.change_precision(bits=20) model.reset_alpha_weights() if args.initial_folding: model.fold_bn() # use DFQ for weight equalization if args.initial_equalization: model.equalize_weights_dfq() elif args.initial_equalization: model.equalize_weights_lsq(verbose=True) model.reset_alpha_weights() # model.reset_alpha_weights(use_method='dyn_range', dyn_range_cutoff=0.05, verbose=True) # calibrate after equalization with model.statistics_act(): val_loss, val_prec1, val_prec5 = validate( val_loader, model, criterion, 0, None) model.reset_alpha_act() val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] 20-bit calibrated model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) nemo.utils.save_checkpoint(model, None, 0, acc=val_prec1, checkpoint_name='resnet18_calibrated', checkpoint_suffix=args.suffix) model.change_precision(bits=activ_bits) model.change_precision(bits=weight_bits, scale_activations=False) # init weight clipping parameters to their reset value and disable their gradient model.reset_alpha_weights() if args.use_sawb: model.disable_grad_sawb() model.weight_clip_sawb() mixed_prec_dict_all = model.export_precision() mixed_prec_dict_all['relu']['x_bits'] = 2 mixed_prec_dict_all['layer1.0.relu']['x_bits'] = 4 mixed_prec_dict_all['layer3.1.conv1']['W_bits'] = 4 mixed_prec_dict_all['layer3.1.conv2']['W_bits'] = 4 mixed_prec_dict_all['layer4.0.conv1']['W_bits'] = 2 mixed_prec_dict_all['layer4.0.conv2']['W_bits'] = 2 mixed_prec_dict_all['layer4.1.conv1']['W_bits'] = 2 mixed_prec_dict_all['layer4.1.conv2']['W_bits'] = 2 model.change_precision(bits=1, min_prec_dict=mixed_prec_dict_all) else: print("[NEMO] Not calibrating model, as it is pretrained") model.change_precision(bits=1, min_prec_dict=prec_dict) optimizer = torch.optim.Adam([ { 'params': model.get_nonclip_parameters(), 'lr': args.lr, 'weight_decay': 1e-5 }, { 'params': model.get_clip_parameters(), 'lr': args.lr, 'weight_decay': 0.001 }, ]) reset_grad_flow(model, __global_ave_grads, __global_max_grads) for epoch in range(args.start_epoch, args.epochs): # optimizer = adjust_optimizer(optimizer, epoch, regime) # train for one epoch train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, epoch, optimizer, freeze_bn=True if epoch > 0 else False, absorb_bn=True if epoch == 0 else False, writer=writer) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, epoch) writer.add_scalar('Loss/val', val_loss, epoch * len(train_loader)) writer.add_scalar('Accuracy/val', val_prec1, epoch * len(train_loader)) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) #save_model if args.save_check: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='resnet18%s_checkpoint' % ("_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) if is_best: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='resnet18%s_best' % ("_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) logging.info('\n Epoch: {0}\t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \t'.format( epoch + 1, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5)) results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5) results.save()
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() weight_bits = int(args.weight_bits) activ_bits = int(args.activ_bits) if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_path = os.path.join(args.results_dir, args.save) if not os.path.exists(save_path): os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] print('Selected GPUs: ', args.gpus) torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) if args.model == 'mobilenet': model = models.__dict__[args.model] elif args.model == 'mobilenetv2': model = torch.hub.load('pytorch/vision:v0.6.0', 'mobilenet_v2', pretrained=True) else: #if args.model == 'mobilenet_v3': model = models.mobilenetv3_large( width_mult=float(args.mobilenet_width)) model.load_state_dict( torch.load( "models/mobilenet_v3/mobilenetv3-large-0.75-9632d2a8.pth")) nClasses = get_num_classes(args.dataset) model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'num_classes': nClasses, \ 'width_mult': float(args.mobilenet_width), 'input_dim': float(args.mobilenet_input) } if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) model = model(**model_config) logging.info("created model with configuration: %s", model_config) print(model) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr( model, 'regime', { 0: { 'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay } }) print(transform) # define loss function (criterion) and optimizer criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() criterion.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) #define optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'alpha' in key or 'beta' in key: params += [{'params': value, 'weight_decay': 1e-4}] else: params += [{'params': value, 'weight_decay': 1e-5}] mixed_prec_dict = None if args.mixed_prec_dict is not None: mixed_prec_dict = nemo.utils.precision_dict_from_json( args.mixed_prec_dict) print("Load mixed precision dict from outside") elif args.mem_constraint is not '': mem_contraints = json.loads(args.mem_constraint) print('This is the memory constraint:', mem_contraints) if mem_contraints is not None: x_test = torch.Tensor(1, 3, args.mobilenet_input, args.mobilenet_input) mixed_prec_dict = memory_driven_quant(model, x_test, mem_contraints[0], mem_contraints[1], args.mixed_prec_quant) #multi gpus if args.gpus and len(args.gpus) > 1: model = torch.nn.DataParallel(model).cuda() else: model.type(args.type) mobilenet_width = float(args.mobilenet_width) mobilenet_width_s = args.mobilenet_width mobilenet_input = int(args.mobilenet_input) if args.resume is None: val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] Full-precision model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) if args.quantize: # transform the model in a NEMO FakeQuantized representation model = nemo.transform.quantize_pact(model, dummy_input=torch.randn( (1, 3, mobilenet_input, mobilenet_input)).to('cuda')) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5) if args.resume is not None: checkpoint_file = args.resume if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint_loaded = torch.load(checkpoint_file) checkpoint = checkpoint_loaded['state_dict'] model.load_state_dict(checkpoint, strict=True) prec_dict = checkpoint_loaded.get('precision') else: logging.error("no checkpoint found at '%s'", args.resume) import sys sys.exit(1) if args.resume is None: print("[NEMO] Model calibration") model.change_precision(bits=20) model.reset_alpha_weights() if args.initial_folding: model.fold_bn() # use DFQ for weight equalization if args.initial_equalization: model.equalize_weights_dfq() elif args.initial_equalization: model.equalize_weights_lsq(verbose=True) model.reset_alpha_weights() # model.reset_alpha_weights(use_method='dyn_range', dyn_range_cutoff=0.05, verbose=True) # calibrate after equalization with model.statistics_act(): val_loss, val_prec1, val_prec5 = validate( val_loader, model, criterion, 0, None) # # use this in place of the usual calibration, because PACT_Act's descend from ReLU6 and # # the trained weights already assume the presence of a clipping effect # # this should be integrated in NEMO by saving the "origin" of the PACT_Act! # for i in range(0,27): # model.model[i][3].alpha.data[:] = min(model.model[i][3].alpha.item(), model.model[i][3].max) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] 20-bit calibrated model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='mobilenet_%s_%d_calibrated' % (mobilenet_width_s, mobilenet_input), checkpoint_suffix=args.suffix) model.change_precision(bits=activ_bits) model.change_precision(bits=weight_bits, scale_activations=False) import IPython IPython.embed() else: print("[NEMO] Not calibrating model, as it is pretrained") model.change_precision(bits=1, min_prec_dict=prec_dict) ### val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) ### print("[NEMO] pretrained model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) if mixed_prec_dict is not None: mixed_prec_dict_all = model.export_precision() for k in mixed_prec_dict.keys(): mixed_prec_dict_all[k] = mixed_prec_dict[k] model.change_precision(bits=1, min_prec_dict=mixed_prec_dict_all) # freeze and quantize BN parameters # nemo.transform.bn_quantizer(model, precision=nemo.precision.Precision(bits=20)) # model.freeze_bn() # model.fold_bn() # model.equalize_weights_dfq(verbose=True) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) # print("[NEMO] Rounding weights") # model.round_weights() if args.pure_export: model.freeze_bn(reset_stats=True, disable_grad=True) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, shorten=10) print("[NEMO] FQ model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) input_bias_dict = {'model.0.0': +1.0, 'model.0.1': +1.0} remove_bias_dict = {'model.0.1': 'model.0.2'} input_bias = math.floor(1.0 / (2. / 255)) * (2. / 255) model.qd_stage(eps_in=2. / 255, add_input_bias_dict=input_bias_dict, remove_bias_dict=remove_bias_dict, int_accurate=True) model.model[0][0].value = input_bias val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='qd', shorten=10) print("[NEMO] QD model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) model.id_stage() model.model[0][0].value = input_bias * (255. / 2) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='id', shorten=10) print("[NEMO] ID model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) nemo.utils.export_onnx('mobilenet_%s_%d.onnx' % (mobilenet_width_s, mobilenet_input), model, model, (3, mobilenet_input, mobilenet_input), perm=None) import sys sys.exit(0) if args.terminal: fqs = copy.deepcopy(model.state_dict()) model.freeze_bn(reset_stats=True, disable_grad=True) bin_fq, bout_fq, _ = nemo.utils.get_intermediate_activations( model, validate, val_loader, model, criterion, 0, None, shorten=1) torch.save({'in': bin_fq['model.0.0'][0]}, "input_fq.pth") val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None) print("[NEMO] FQ model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) input_bias_dict = {'model.0.0': +1.0, 'model.0.1': +1.0} remove_bias_dict = {'model.0.1': 'model.0.2'} input_bias = math.floor(1.0 / (2. / 255)) * (2. / 255) model.qd_stage(eps_in=2. / 255, add_input_bias_dict=input_bias_dict, remove_bias_dict=remove_bias_dict, int_accurate=True) # fix ConstantPad2d model.model[0][0].value = input_bias val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='qd', shorten=50) print("[NEMO] QD model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) qds = copy.deepcopy(model.state_dict()) bin_qd, bout_qd, _ = nemo.utils.get_intermediate_activations( model, validate, val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='qd', shorten=1) torch.save({'qds': qds, 'fqs': fqs}, "states.pth") torch.save({'in': bin_qd['model.0.0'][0]}, "input_qd.pth") diff = collections.OrderedDict() for k in bout_fq.keys(): diff[k] = (bout_fq[k] - bout_qd[k]).to('cpu').abs() for i in range(0, 26): for j in range(3, 4): k = 'model.%d.%d' % (i, j) kn = 'model.%d.%d' % (i if j < 3 else i + 1, j + 1 if j < 3 else 0) eps = model.get_eps_at(kn, eps_in=2. / 255)[0] print("%s:" % k) idx = diff[k] > eps n = idx.sum() t = (diff[k] > -1e9).sum() max_eps = torch.ceil( diff[k].max() / model.get_eps_at('model.%d.0' % (i + 1), 2. / 255)[0]).item() mean_eps = torch.ceil( diff[k][idx].mean() / model.get_eps_at('model.%d.0' % (i + 1), 2. / 255)[0]).item() try: print(" max: %.3f (%d eps)" % (diff[k].max().item(), max_eps)) print(" mean: %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps)) print(" #diff: %d/%d (%.1f%%)" % (n, t, float(n) / float(t) * 100)) except ValueError: print(" #diff: 0/%d (0%%)" % (t, )) model.id_stage() # fix ConstantPad2d model.model[0][0].value = input_bias * (255. / 2) ids = model.state_dict() bin_id, bout_id, _ = nemo.utils.get_intermediate_activations( model, validate, val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='id', shorten=1) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255, mode='id', shorten=50) print("[NEMO] ID model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) try: os.makedirs("golden") except Exception: pass torch.save({'in': bin_fq['model.0.0'][0]}, "input_id.pth") diff = collections.OrderedDict() for i in range(0, 26): for j in range(3, 4): k = 'model.%d.%d' % (i, j) kn = 'model.%d.%d' % (i if j < 3 else i + 1, j + 1 if j < 3 else 0) eps = model.get_eps_at(kn, eps_in=2. / 255)[0] diff[k] = (bout_id[k] * eps - bout_qd[k]).to('cpu').abs() print("%s:" % k) idx = diff[k] >= eps n = idx.sum() t = (diff[k] > -1e9).sum() max_eps = torch.ceil(diff[k].max() / eps).item() mean_eps = torch.ceil(diff[k][idx].mean() / eps).item() try: print(" max: %.3f (%d eps)" % (diff[k].max().item(), max_eps)) print(" mean: %.3f (%d eps) (only diff. elements)" % (diff[k][idx].mean().item(), mean_eps)) print(" #diff: %d/%d (%.1f%%)" % (n, t, float(n) / float(t) * 100)) except ValueError: print(" #diff: 0/%d (0%%)" % (t, )) import IPython IPython.embed() bidx = 0 for n, m in model.named_modules(): try: actbuf = bin_id[n][0][bidx].permute((1, 2, 0)) except RuntimeError: actbuf = bin_id[n][0][bidx] np.savetxt("golden/golden_input_%s.txt" % n, actbuf.cpu().detach().numpy().flatten(), header="input (shape %s)" % (list(actbuf.shape)), fmt="%.3f", delimiter=',', newline=',\n') for n, m in model.named_modules(): try: actbuf = bout_id[n][bidx].permute((1, 2, 0)) except RuntimeError: actbuf = bout_id[n][bidx] np.savetxt("golden/golden_%s.txt" % n, actbuf.cpu().detach().numpy().flatten(), header="%s (shape %s)" % (n, list(actbuf.shape)), fmt="%.3f", delimiter=',', newline=',\n') nemo.utils.export_onnx("model_int.onnx", model, model, (3, 224, 224), perm=None) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, 0, None, input_bias=input_bias, eps_in=2. / 255) print("[NEMO] ID model: top-1=%.2f top-5=%.2f" % (val_prec1, val_prec5)) import IPython IPython.embed() import sys sys.exit(0) for epoch in range(args.start_epoch, args.epochs): # optimizer = adjust_optimizer(optimizer, epoch, regime) # train for one epoch train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, epoch, optimizer, freeze_bn=True if epoch > 0 else False, absorb_bn=True if epoch == 0 else False) val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) #save_model if args.save_check: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='mobilenet_%s_%d%s_checkpoint' % (mobilenet_width_s, mobilenet_input, "_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) if is_best: nemo.utils.save_checkpoint( model, optimizer, 0, acc=val_prec1, checkpoint_name='mobilenet_%s_%d%s_best' % (mobilenet_width_s, mobilenet_input, "_mixed" if mixed_prec_dict is not None else ""), checkpoint_suffix=args.suffix) logging.info('\n Epoch: {0}\t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \t'.format( epoch + 1, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5)) results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5) results.save()
def main(): global args, best_prec1 best_prec1 = 0 args = parser.parse_args() if args.evaluate: args.results_dir = '/tmp' if args.save is '': args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') save_name = args.model+"_"+args.majority+"_pad="+str(args.padding)+"_Data="+args.dataset if (args.resume != ''): save_name = save_name + "_resume" save_path = os.path.join(args.results_dir, save_name) if not os.path.exists(save_path): os.makedirs(save_path) else: # append datatime tim = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') overwrite = input ("Directory {} already exists. Would you like to overwrite (y/n): ".format(save_path)) if (overwrite == "y"): save_path = save_path else: save_path = save_path+"_{}".format(tim) os.makedirs(save_path) setup_logging(os.path.join(save_path, 'log.txt')) results_file = os.path.join(save_path, 'results.%s') results = ResultsLog(results_file % 'csv', results_file % 'html') logging.info("saving to %s", save_path) logging.debug("run arguments: %s", args) logging.info("setting up tensorboard") writer = SummaryWriter(log_dir=save_path) if 'cuda' in args.type: args.gpus = [int(i) for i in args.gpus.split(',')] torch.cuda.set_device(args.gpus[0]) cudnn.benchmark = True else: args.gpus = None # create model logging.info("creating model %s", args.model) model = models.__dict__[args.model] args.num_classes = get_num_classes(args.dataset) model_config = {'input_size': args.input_size, 'dataset': args.dataset, 'backprop': args.backprop, 'majority': args.majority, 'padding': args.padding, 'num_classes': args.num_classes, 'depth': args.depth} if args.model_config is not '': model_config = dict(model_config, **literal_eval(args.model_config)) model = model(**model_config) logging.info("created model with configuration: %s", model_config) # optionally resume from a checkpoint if args.evaluate: if not os.path.isfile(args.evaluate): parser.error('invalid checkpoint: {}'.format(args.evaluate)) checkpoint = torch.load(args.evaluate) model.load_state_dict(checkpoint['state_dict']) logging.info("loaded checkpoint '%s' (epoch %s)", args.evaluate, checkpoint['epoch']) elif args.resume: checkpoint_file = args.resume if os.path.isdir(checkpoint_file): results.load(os.path.join(checkpoint_file, 'results.csv')) checkpoint_file = os.path.join( checkpoint_file, 'model_best.pth.tar') if os.path.isfile(checkpoint_file): logging.info("loading checkpoint '%s'", args.resume) checkpoint = torch.load(checkpoint_file) args.start_epoch = checkpoint['epoch'] - 1 best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) logging.info("loaded checkpoint '%s' (epoch %s)", checkpoint_file, checkpoint['epoch']) else: logging.error("no checkpoint found at '%s'", args.resume) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info("number of parameters: %d", num_parameters) # Data loading code default_transform = { 'train': get_transform(args.dataset, input_size=args.input_size, augment=True), 'eval': get_transform(args.dataset, input_size=args.input_size, augment=False) } transform = getattr(model, 'input_transform', default_transform) regime = getattr(model, 'regime', {0: {'optimizer': args.optimizer, 'lr': args.lr, 'momentum': args.momentum, 'weight_decay': args.weight_decay}}) # define loss function (criterion) and optimizer criterion = getattr(model, 'criterion', nn.CrossEntropyLoss)() criterion.type(args.type) model.type(args.type) val_data = get_dataset(args.dataset, 'val', transform['eval']) val_loader = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion, 0) return train_data = get_dataset(args.dataset, 'train', transform['train']) train_loader = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) logging.info('training regime: %s', regime) for epoch in range(args.start_epoch, args.epochs): optimizer = adjust_optimizer(optimizer, epoch, regime) lr = optimizer.param_groups[0]['lr'] # user function in utils.py to override regime and update learning rate lr = lr_schedule(lr, epoch, args.epochs, start_epoch=args.start_epoch) writer.add_scalar("lr", lr, epoch) adjust_learning_rate(optimizer, lr) # train for one epoch train_loss, train_prec1, train_prec5 = train( train_loader, model, criterion, epoch, optimizer) # evaluate on validation set val_loss, val_prec1, val_prec5 = validate( val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = val_prec1 > best_prec1 best_prec1 = max(val_prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'model': args.model, 'config': args.model_config, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'regime': regime }, is_best, path=save_path) logging.info('\n Epoch: {0}\t' 'lr {lr: .5f} \t' 'Training Loss {train_loss:.4f} \t' 'Training Prec@1 {train_prec1:.3f} \t' 'Training Prec@5 {train_prec5:.3f} \t' 'Validation Loss {val_loss:.4f} \t' 'Validation Prec@1 {val_prec1:.3f} \t' 'Validation Prec@5 {val_prec5:.3f} \n' .format(epoch + 1, lr=lr, train_loss=train_loss, val_loss=val_loss, train_prec1=train_prec1, val_prec1=val_prec1, train_prec5=train_prec5, val_prec5=val_prec5)) # adds results to html log file results.add(epoch=epoch + 1, train_loss=train_loss, val_loss=val_loss, train_error1=100 - train_prec1, val_error1=100 - val_prec1, train_error5=100 - train_prec5, val_error5=100 - val_prec5, lr=lr) # also add results to tensorboard summary writer train_res = { 'loss': train_loss, 'accuracy': train_prec1, } log_result(writer, "train", train_res, epoch+1) val_res = { 'loss': val_loss, 'accuracy': val_prec1, } log_result(writer, "val", val_res, epoch+1) #results.plot(x='epoch', y=['train_loss', 'val_loss'], # title='Loss', ylabel='loss') #results.plot(x='epoch', y=['train_error1', 'val_error1'], # title='Error@1', ylabel='error %') #results.plot(x='epoch', y=['train_error5', 'val_error5'], # title='Error@5', ylabel='error %') results.save()