def eval_and_print(model, ds, is_imagenet, is_train, prefix_str=""): if is_train: acc1, acc5, loss = misc.eval_model(model, ds, ngpu=args.ngpu, is_imagenet=is_imagenet) print(prefix_str+" model, type={}, training acc1={:.4f}, acc5={:.4f}, loss={:.6f}".format(args.type, acc1, acc5, loss)) else: acc1, acc5, loss = misc.eval_model(model, ds, ngpu=args.ngpu, is_imagenet=is_imagenet) print(prefix_str+" model, type={}, validation acc1={:.4f}, acc5={:.4f}, loss={:.6f}".format(args.type, acc1, acc5, loss)) return acc1, acc5, loss
def retrain(model, train_ds, val_ds, valid_ind, mask_list, is_imagenet): best_acc, best_acc5, best_loss = misc.eval_model(model, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) best_model = model criterion = nn.CrossEntropyLoss() epochs = args.prune_finetune_epoch lrs = args.prune_finetune_lr if 'inception' in args.type or args.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lrs, alpha=0.9, eps=1.0, momentum=0.9) else: optimizer = torch.optim.SGD(model.parameters(), lr=lrs, momentum=0.9, weight_decay=args.decay) for epoch in range(epochs): #adjust_learning_rate(optimizer, epoch) train(train_ds, model, criterion, optimizer, epoch, valid_ind, mask_list, is_imagenet) if (epoch + 1) % args.eval_epoch == 0: eval_and_print(model, train_ds, val_ds, is_imagenet, prefix_str="retraining epoch {}".format(epoch + 1)) #if acc1 > best_acc: # best_acc = acc1 # best_model = model model = best_model
num_gpu=args.ngpu, selected_gpus=args.gpu) args.ngpu = len(args.gpu) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # load model and dataset fetcher model_raw, ds_fetcher = selector.select(args.type, model_root=args.model_root) # eval model val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu) # print sf print(model_raw) res_str = "type={}, acc1={:.4f}, acc5={:.4f}".format(args.type, acc1, acc5) print(res_str) with open('acc1_acc5.txt', 'a') as f: f.write(res_str + '\n')
# quantize forward activation if args.fwd_bits < 32: model_quant = quant.quantize_model_layer_output( model_quant, bits=args.fwd_bits, overflow_rate=args.overflow_rate, counter=args.n_sample, type=args.quant_method) if args.fwd_bits <= 16: model_quant.half() # print(model_quant) save_model(model_quant, model_name=args.type + '_quant') # eval quant model start = time.time() acc1, acc5 = misc.eval_model(model_quant, val_ds_quant, ngpu=args.ngpu, is_imagenet=is_imagenet) duration = time.time() - start print('Quant model eval duration: {}'.format(duration)) print(model_quant) res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format( args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5) print(res_str) with open('acc1_acc5.txt', 'a') as f: f.write('quant: ' + res_str + '\n')
# quantize forward activation print("=================quantize activation==================") if args.fwd_bits < 32: model = quant.duplicate_model_with_scalequant(model, bits=args.fwd_bits, counter=args.n_sample) # ds_fetcher is in path: /imagenet/dataset.get val_ds_tmp = ds_fetcher(batch_size=args.batch_size, data_root=args.data_root, train=False, val=True, shuffle=args.shuffle, input_size=args.input_size) print("load dataset done") misc.eval_model(model, val_ds_tmp, ngpu=1, n_sample=args.n_sample) print("======================================================") print("===================eval model=========================") print(model) if args.test: args.batch_size = 1 else: args.batch_size = 50 val_ds = ds_fetcher(batch_size=args.batch_size, data_root=args.data_root, train=False, val=True, shuffle=args.shuffle, input_size=args.input_size)
# quantize forward activation if args.fwd_bits < 32: model_raw = quant.duplicate_model_with_quant( model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate, counter=args.n_sample, type=args.quant_method) print(model_raw) val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size) misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet) # eval model val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) acc = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) # print sf print(model_raw)
def train_model(args, model, criterion, optimizer, scheduler, num_epochs, dataset_sizes, dataloders, device_ids): since = time.time() resumed = False best_model_wts = model.state_dict() val_ds_tmp = ds_fetcher(batch_size=8, data_root=args.data_root, train=False, val=True, shuffle=args.shuffle, input_size=args.input_size) for epoch in range(args.start_epoch, num_epochs + 1): print("qauntize activation") model = model.module model = torch.nn.DataParallel(model.cuda(), device_ids=[device_ids[0]]) quant.add_counter(model, args.n_sample) misc.eval_model(model, val_ds_tmp, device_ids=device_ids[0], n_sample=args.n_sample) model = model.module model = torch.nn.DataParallel(model.cuda(), device_ids=device_ids) for phase in ['train', 'val']: if phase == 'train': print("train phase") scheduler.step(epoch) model.train(True) # Set model to training mode running_loss = 0.0 running_corrects = 0 tic_batch = time.time() # Iterate over data for 1 epoch for i, (inputs, labels) in enumerate(dataloders[phase]): inputs = inputs.cuda() labels = labels.cuda() # zero the parameter gradients optimizer.zero_grad() # forward outputs = model(inputs) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() running_corrects += torch.sum(preds == labels.data) batch_loss = running_loss / ((i + 1) * args.batch_size) batch_acc = float(running_corrects) / ( (i + 1) * args.batch_size) if i % args.print_freq == 0: print( '[Epoch {}/{}]-[batch:{}/{}] lr:{:.8f} {} Loss: {:.6f} Acc: {:.4f} Time: {:.4f}batch/sec' .format( epoch, num_epochs, i, round(dataset_sizes[phase]) - 1, scheduler.get_lr()[0], phase, batch_loss, batch_acc, args.print_freq / (time.time() - tic_batch))) tic_batch = time.time() #if i>=3: # break epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = float(running_corrects) / dataset_sizes[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) else: print("val phase") model.eval() # Set model to evaluate mode acc1, acc5 = misc.eval_model(model, dataloders[phase], device_ids=device_ids) res_str = "epoch={}, type={}, quant_method={}, \n \ param_bits={}, fwd_bits={},\n \ acc1={:.4f}, acc5={:.4f}".format( epoch, args.type, args.quant_method, args.param_bits, args.fwd_bits, acc1, acc5) print(res_str) with open( str(args.param_bits) + "-" + str(args.fwd_bits) + 'bits_quant_acc1_acc5.txt', 'a') as f: f.write(res_str + '\n') if (epoch + 1) % args.save_epoch_freq == 0: if not os.path.exists(args.save_path): os.makedirs(args.save_path) torch.save( model, os.path.join(args.save_path, "epoch_" + str(epoch) + ".pth.tar")) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) # load best model weights model.load_state_dict(best_model_wts) return model
for q_method in quant_methods: for p_bits in model_bits[typ]['param_bits']: for b_bits in model_bits[typ]['batch_norm_bits']: for l_bits in model_bits[typ]['layer_output_bits']: model = quantize_model(model_raw, q_method, p_bits, b_bits, l_bits, overflow_rate=args.overflow_rate, n_sample=len(val_ds)) start = time.time() acc1, acc5 = misc.eval_model(model, val_ds, is_imagenet=is_imagenet) duration = time.time() - start print(f"{typ}, {q_method}, {p_bits}, {b_bits}, {l_bits}") print( f"Eval duration: {duration}, acc1: {acc1}, acc5: {acc5}" ) rec = { 'type': typ, 'quant_method': q_method, 'param_bits': p_bits, 'batch_norm_bits': b_bits, 'layer_output_bits': l_bits, 'freq(test/s)': len(val_ds) / duration, 'top1': acc1,
model=model, param_bits=args.param_bits, fwd_bits=args.fwd_bits, overflow_rate=args.overflow_rate, counter=args.n_sample) model = model.eval() model = torch.nn.DataParallel(model.cuda(device_ids[0]), device_ids=device_ids) # ds_fetcher is in path: /imagenet/dataset.get val_ds_tmp = ds_fetcher(batch_size=args.batch_size, data_root=args.data_root, train=False, val = True, shuffle=args.shuffle, input_size=args.input_size) print("load dataset done") misc.eval_model(model, val_ds_tmp, device_ids=device_ids, n_sample=args.n_sample) print("======================================================") print("===================eval model=========================") #print(model) if args.test: args.batch_size = 1 val_ds = ds_fetcher(batch_size=args.batch_size, data_root=args.data_root, train=False, val = True, shuffle=args.shuffle, input_size=args.input_size) if args.test: acc1, acc5 = misc.eval_model(model, val_ds, device_ids=device_ids, n_sample=1)
#primetices da se ni onda broj ne slaze # to je zato sto jedan sloj koji ima tezine, kao fc1, zasebno ima tezine i biase, # pa je drugi element liste zapravo deo prvog sloja (bias, po jedan za svaki izlazni neuron) #%% # za evaluaciju modela, moras da ga stavis u odgovarajuci mode: model_raw.model.eval() # ovo je bitno jer neke stvari, poput dropout koji nasumicno iskljucuje neurone, rade samo u trening fazi, a za test ne rade nista, # medjutim ako ga ukljucis za test, on ce da unakazi rezultate #with torch.no_grad(): # Y_ = model_raw.model(data2) #<--ne radi zbog dimenzija. srecom neko je vec pisao fju (utee/misc.py/eval_model). # Oni tu i prvo normalizuju sliku acc1, acc5 = misc.eval_model( model_raw, ds_val) # kad ja tamo, a ono ne radi. vraca prazne tenzore. izdebaguj!! # kad proradi ( XD ) tu ces imati tacnost originalne mreze. #%% quantize weights bits = 8 # ukupno bitova quantized_weights = [] for layer in w: sf = 4 temp = quant.linear_quantize(layer, sf, bits) quantized_weights.append(temp) #ucitaj nove tezine, tj napravi novu mrezu kvantizovanu model_q = model_raw
print('Starting second step!') else: args.epochs = 8 print('Starting first step!') for epoch in range(args.start_epoch, args.epochs): #if args.distributed: # train_sampler.set_epoch(epoch) #adjust_learning_rate(optimizer, epoch) # train for one epoch train_mode(train_ds, model_raw, criterion, optimizer, epoch, args, masks, masks_amul, threshold) # evaluate on validation set prec1, prec5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) print(' * Prec@1 {top1:.3f} Prec@5 {top5:.3f}'.format(top1=prec1 * 100, top5=prec5 * 100)) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.type, 'state_dict': model_raw.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), 'masks': masks,
model_raw.cuda() model_raw.eval() model_new = compress.CompressedModel(model_raw, input_scale=255, act_bits=params['act_bits'], weight_bits=params['weight_bits']) model_new = model_new.cuda() print(model_new) val_ds = ds_fetcher(params['batch_size'], data_root=params['data_dir'], train=False) acc1, acc5 = misc.eval_model(model_new, val_ds, ngpu=1, n_sample=params['n_sample'], is_imagenet=False) print("FP accuracy Top1: %g Top5: %g" % (acc1, acc5)) model_new.quantize_params() acc1, acc5 = misc.eval_model(model_new, val_ds, ngpu=1, n_sample=params['n_sample'], is_imagenet=False) print("Quant accuracy Top1: %g Top5: %g" % (acc1, acc5)) print(acc1, acc5) print(model_new) new_file = os.path.join(
#if args.distributed: # train_sampler.set_epoch(epoch) #adjust_learning_rate(optimizer, epoch) # train for one epoch import time time.sleep(3) print('>>>>epoch: ' + str(epoch) + '\n') train_mode(train_ds, model_raw, rnn_ins, target_rnn, rnn_optimizer, GAMMA, memory, criterion, optimizer, epoch, args) if 1: acc1, acc5 = misc.eval_model(model_raw, rnn_ins, target_rnn, rnn_optimizer, GAMMA, memory, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format( args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5) print(res_str) # evaluate on validation set ''' prec1, prec5 = misc.eval_model(model_raw, rnn_ins, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) print(' * Prec@1 {top1:.3f} Prec@5 {top5:.3f}' .format(top1=prec1*100, top5=prec5*100))
def main(): parser = argparse.ArgumentParser(description='PyTorch SVHN Example') parser.add_argument('--type', default='cifar10', help='|'.join(selector.known_models)) parser.add_argument('--quant_method', default='linear', help='linear|minmax|log|tanh') parser.add_argument('--batch_size', type=int, default=100, help='input batch size for training (default: 64)') parser.add_argument('--gpu', default=None, help='index of gpus to use') parser.add_argument('--ngpu', type=int, default=8, help='number of gpus to use') parser.add_argument('--seed', type=int, default=117, help='random seed (default: 1)') parser.add_argument('--model_root', default='~/.torch/models/', help='folder to save the model') parser.add_argument('--data_root', default='/data/public_dataset/pytorch/', help='folder to save the model') parser.add_argument('--logdir', default='log/default', help='folder to save to the log') parser.add_argument('--input_size', type=int, default=224, help='input size of image') parser.add_argument('--n_sample', type=int, default=20, help='number of samples to infer the scaling factor') parser.add_argument('--param_bits', type=int, default=8, help='bit-width for parameters') parser.add_argument('--bn_bits', type=int, default=32, help='bit-width for running mean and std') parser.add_argument('--fwd_bits', type=int, default=8, help='bit-width for layer output') parser.add_argument('--overflow_rate', type=float, default=0.0, help='overflow rate') args = parser.parse_args() args.gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=args.ngpu, selected_gpus=args.gpu) args.ngpu = len(args.gpu) misc.ensure_dir(args.logdir) args.model_root = misc.expand_user(args.model_root) args.data_root = misc.expand_user(args.data_root) args.input_size = 299 if 'inception' in args.type else args.input_size assert args.quant_method in ['linear', 'minmax', 'log', 'tanh'] print("=================FLAGS==================") for k, v in args.__dict__.items(): print('{}: {}'.format(k, v)) print("========================================") assert torch.cuda.is_available(), 'no cuda' torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # load model and dataset fetcher model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root) args.ngpu = args.ngpu if is_imagenet else 1 # quantize parameters if args.param_bits < 32: state_dict = model_raw.state_dict() state_dict_quant = OrderedDict() sf_dict = OrderedDict() for k, v in state_dict.items(): if 'running' in k: if args.bn_bits >=32: print("Ignoring {}".format(k)) state_dict_quant[k] = v continue else: bits = args.bn_bits else: bits = args.param_bits if args.quant_method == 'linear': sf = bits - 1. - quant.compute_integral_part(v, overflow_rate=args.overflow_rate) v_quant = quant.linear_quantize(v, sf, bits=bits) elif args.quant_method == 'log': v_quant = quant.log_minmax_quantize(v, bits=bits) elif args.quant_method == 'minmax': v_quant = quant.min_max_quantize(v, bits=bits) else: v_quant = quant.tanh_quantize(v, bits=bits) state_dict_quant[k] = v_quant print(k, bits) model_raw.load_state_dict(state_dict_quant) # quantize forward activation if args.fwd_bits < 32: model_raw = quant.duplicate_model_with_quant(model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate, counter=args.n_sample, type=args.quant_method) print(model_raw) val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size) misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet) # eval model val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) # print sf print(model_raw) res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format( args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5) print(res_str) with open('acc1_acc5.txt', 'a') as f: f.write(res_str + '\n')
v_quant = quant.log_minmax_quantize(v, bits=bits) elif args.quant_method == 'minmax': v_quant = quant.min_max_quantize(v, bits=bits) else: v_quant = quant.tanh_quantize(v, bits=bits) state_dict_quant[k] = v_quant print(k, bits) model_raw.load_state_dict(state_dict_quant) # quantize forward activation if args.fwd_bits < 32: model_raw = quant.duplicate_model_with_quant(model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate, counter=args.n_sample, type=args.quant_method) print(model_raw) val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size) misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet) # eval model val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size) acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet) # print sf print(model_raw) res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format( args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5) print(res_str) with open('acc1_acc5.txt', 'a') as f: f.write(res_str + '\n')