Ejemplo n.º 1
0
def eval_and_print(model, ds, is_imagenet, is_train, prefix_str=""):
    if is_train:
        acc1, acc5, loss = misc.eval_model(model, ds, ngpu=args.ngpu, is_imagenet=is_imagenet)
        print(prefix_str+" model, type={}, training acc1={:.4f}, acc5={:.4f}, loss={:.6f}".format(args.type, acc1, acc5, loss))
    else:
        acc1, acc5, loss = misc.eval_model(model, ds, ngpu=args.ngpu, is_imagenet=is_imagenet)
        print(prefix_str+" model, type={}, validation acc1={:.4f}, acc5={:.4f}, loss={:.6f}".format(args.type, acc1, acc5, loss))
    return acc1, acc5, loss
Ejemplo n.º 2
0
def retrain(model, train_ds, val_ds, valid_ind, mask_list, is_imagenet):
    best_acc, best_acc5, best_loss = misc.eval_model(model,
                                                     val_ds,
                                                     ngpu=args.ngpu,
                                                     is_imagenet=is_imagenet)
    best_model = model
    criterion = nn.CrossEntropyLoss()
    epochs = args.prune_finetune_epoch
    lrs = args.prune_finetune_lr

    if 'inception' in args.type or args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lrs,
                                        alpha=0.9,
                                        eps=1.0,
                                        momentum=0.9)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lrs,
                                    momentum=0.9,
                                    weight_decay=args.decay)

    for epoch in range(epochs):
        #adjust_learning_rate(optimizer, epoch)
        train(train_ds, model, criterion, optimizer, epoch, valid_ind,
              mask_list, is_imagenet)
        if (epoch + 1) % args.eval_epoch == 0:
            eval_and_print(model,
                           train_ds,
                           val_ds,
                           is_imagenet,
                           prefix_str="retraining epoch {}".format(epoch + 1))

        #if acc1 > best_acc:
        #    best_acc = acc1
        #    best_model = model

    model = best_model
Ejemplo n.º 3
0
                                num_gpu=args.ngpu,
                                selected_gpus=args.gpu)
args.ngpu = len(args.gpu)
args.model_root = misc.expand_user(args.model_root)
args.data_root = misc.expand_user(args.data_root)
args.input_size = 299 if 'inception' in args.type else args.input_size
print("=================FLAGS==================")
for k, v in args.__dict__.items():
    print('{}: {}'.format(k, v))
print("========================================")

assert torch.cuda.is_available(), 'no cuda'
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

# load model and dataset fetcher
model_raw, ds_fetcher = selector.select(args.type, model_root=args.model_root)

# eval model
val_ds = ds_fetcher(args.batch_size,
                    data_root=args.data_root,
                    train=False,
                    input_size=args.input_size)
acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu)

# print sf
print(model_raw)
res_str = "type={}, acc1={:.4f}, acc5={:.4f}".format(args.type, acc1, acc5)
print(res_str)
with open('acc1_acc5.txt', 'a') as f:
    f.write(res_str + '\n')
Ejemplo n.º 4
0
# quantize forward activation
if args.fwd_bits < 32:
    model_quant = quant.quantize_model_layer_output(
        model_quant,
        bits=args.fwd_bits,
        overflow_rate=args.overflow_rate,
        counter=args.n_sample,
        type=args.quant_method)
    if args.fwd_bits <= 16:
        model_quant.half()

    # print(model_quant)
    save_model(model_quant, model_name=args.type + '_quant')

# eval quant model
start = time.time()
acc1, acc5 = misc.eval_model(model_quant,
                             val_ds_quant,
                             ngpu=args.ngpu,
                             is_imagenet=is_imagenet)
duration = time.time() - start
print('Quant model eval duration: {}'.format(duration))

print(model_quant)
res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format(
    args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits,
    args.overflow_rate, acc1, acc5)
print(res_str)
with open('acc1_acc5.txt', 'a') as f:
    f.write('quant: ' + res_str + '\n')
Ejemplo n.º 5
0
# quantize forward activation
print("=================quantize activation==================")
if args.fwd_bits < 32:
    model = quant.duplicate_model_with_scalequant(model,
                                                  bits=args.fwd_bits,
                                                  counter=args.n_sample)

    # ds_fetcher is in path: /imagenet/dataset.get
    val_ds_tmp = ds_fetcher(batch_size=args.batch_size,
                            data_root=args.data_root,
                            train=False,
                            val=True,
                            shuffle=args.shuffle,
                            input_size=args.input_size)
    print("load dataset done")
    misc.eval_model(model, val_ds_tmp, ngpu=1, n_sample=args.n_sample)

print("======================================================")

print("===================eval model=========================")
print(model)
if args.test:
    args.batch_size = 1
else:
    args.batch_size = 50
val_ds = ds_fetcher(batch_size=args.batch_size,
                    data_root=args.data_root,
                    train=False,
                    val=True,
                    shuffle=args.shuffle,
                    input_size=args.input_size)
Ejemplo n.º 6
0
# quantize forward activation
if args.fwd_bits < 32:
    model_raw = quant.duplicate_model_with_quant(
        model_raw,
        bits=args.fwd_bits,
        overflow_rate=args.overflow_rate,
        counter=args.n_sample,
        type=args.quant_method)
    print(model_raw)
    val_ds_tmp = ds_fetcher(10,
                            data_root=args.data_root,
                            train=False,
                            input_size=args.input_size)
    misc.eval_model(model_raw,
                    val_ds_tmp,
                    ngpu=1,
                    n_sample=args.n_sample,
                    is_imagenet=is_imagenet)

# eval model
val_ds = ds_fetcher(args.batch_size,
                    data_root=args.data_root,
                    train=False,
                    input_size=args.input_size)
acc = misc.eval_model(model_raw,
                      val_ds,
                      ngpu=args.ngpu,
                      is_imagenet=is_imagenet)

# print sf
print(model_raw)
Ejemplo n.º 7
0
def train_model(args, model, criterion, optimizer, scheduler, num_epochs,
                dataset_sizes, dataloders, device_ids):
    since = time.time()
    resumed = False

    best_model_wts = model.state_dict()

    val_ds_tmp = ds_fetcher(batch_size=8,
                            data_root=args.data_root,
                            train=False,
                            val=True,
                            shuffle=args.shuffle,
                            input_size=args.input_size)

    for epoch in range(args.start_epoch, num_epochs + 1):
        print("qauntize activation")
        model = model.module
        model = torch.nn.DataParallel(model.cuda(), device_ids=[device_ids[0]])
        quant.add_counter(model, args.n_sample)
        misc.eval_model(model,
                        val_ds_tmp,
                        device_ids=device_ids[0],
                        n_sample=args.n_sample)

        model = model.module
        model = torch.nn.DataParallel(model.cuda(), device_ids=device_ids)

        for phase in ['train', 'val']:
            if phase == 'train':
                print("train phase")
                scheduler.step(epoch)
                model.train(True)  # Set model to training mode

                running_loss = 0.0
                running_corrects = 0

                tic_batch = time.time()
                # Iterate over data for 1 epoch
                for i, (inputs, labels) in enumerate(dataloders[phase]):
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    outputs = model(inputs)
                    _, preds = torch.max(outputs.data, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    # statistics
                    running_loss += loss.item()
                    running_corrects += torch.sum(preds == labels.data)

                    batch_loss = running_loss / ((i + 1) * args.batch_size)
                    batch_acc = float(running_corrects) / (
                        (i + 1) * args.batch_size)
                    if i % args.print_freq == 0:
                        print(
                            '[Epoch {}/{}]-[batch:{}/{}] lr:{:.8f} {} Loss: {:.6f}  Acc: {:.4f}  Time: {:.4f}batch/sec'
                            .format(
                                epoch, num_epochs, i,
                                round(dataset_sizes[phase]) - 1,
                                scheduler.get_lr()[0], phase, batch_loss,
                                batch_acc,
                                args.print_freq / (time.time() - tic_batch)))
                        tic_batch = time.time()
                    #if i>=3:
                    #    break

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = float(running_corrects) / dataset_sizes[phase]
                print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                    phase, epoch_loss, epoch_acc))

            else:
                print("val phase")
                model.eval()  # Set model to evaluate mode
                acc1, acc5 = misc.eval_model(model,
                                             dataloders[phase],
                                             device_ids=device_ids)
                res_str = "epoch={}, type={}, quant_method={}, \n \
                            param_bits={}, fwd_bits={},\n \
                            acc1={:.4f}, acc5={:.4f}".format(
                    epoch, args.type, args.quant_method, args.param_bits,
                    args.fwd_bits, acc1, acc5)
                print(res_str)
                with open(
                        str(args.param_bits) + "-" + str(args.fwd_bits) +
                        'bits_quant_acc1_acc5.txt', 'a') as f:
                    f.write(res_str + '\n')

        if (epoch + 1) % args.save_epoch_freq == 0:
            if not os.path.exists(args.save_path):
                os.makedirs(args.save_path)
            torch.save(
                model,
                os.path.join(args.save_path,
                             "epoch_" + str(epoch) + ".pth.tar"))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
    for q_method in quant_methods:
        for p_bits in model_bits[typ]['param_bits']:
            for b_bits in model_bits[typ]['batch_norm_bits']:
                for l_bits in model_bits[typ]['layer_output_bits']:
                    model = quantize_model(model_raw,
                                           q_method,
                                           p_bits,
                                           b_bits,
                                           l_bits,
                                           overflow_rate=args.overflow_rate,
                                           n_sample=len(val_ds))

                    start = time.time()
                    acc1, acc5 = misc.eval_model(model,
                                                 val_ds,
                                                 is_imagenet=is_imagenet)
                    duration = time.time() - start
                    print(f"{typ}, {q_method}, {p_bits}, {b_bits}, {l_bits}")
                    print(
                        f"Eval duration: {duration}, acc1: {acc1}, acc5: {acc5}"
                    )

                    rec = {
                        'type': typ,
                        'quant_method': q_method,
                        'param_bits': p_bits,
                        'batch_norm_bits': b_bits,
                        'layer_output_bits': l_bits,
                        'freq(test/s)': len(val_ds) / duration,
                        'top1': acc1,
Ejemplo n.º 9
0
            model=model, 
            param_bits=args.param_bits, 
            fwd_bits=args.fwd_bits,
            overflow_rate=args.overflow_rate,
            counter=args.n_sample)
    model = model.eval()
    model = torch.nn.DataParallel(model.cuda(device_ids[0]), device_ids=device_ids)
    # ds_fetcher is in path: /imagenet/dataset.get
    val_ds_tmp = ds_fetcher(batch_size=args.batch_size, 
                            data_root=args.data_root, 
                            train=False, 
                            val = True,
                            shuffle=args.shuffle,
                            input_size=args.input_size)
    print("load dataset done")
    misc.eval_model(model, val_ds_tmp, device_ids=device_ids, n_sample=args.n_sample)
print("======================================================")


print("===================eval model=========================")
#print(model)
if args.test:
    args.batch_size = 1
val_ds = ds_fetcher(batch_size=args.batch_size, 
                    data_root=args.data_root, 
                    train=False,
                    val = True,
                    shuffle=args.shuffle,
                    input_size=args.input_size)
if args.test:
    acc1, acc5 = misc.eval_model(model, val_ds, device_ids=device_ids, n_sample=1)
Ejemplo n.º 10
0
#primetices da se ni onda broj ne slaze
# to je zato sto jedan sloj koji ima tezine, kao fc1, zasebno ima tezine i biase,
# pa je drugi element liste zapravo deo prvog sloja (bias, po jedan za svaki izlazni neuron)

#%%
# za evaluaciju modela, moras da ga stavis u odgovarajuci mode:
model_raw.model.eval()
# ovo je bitno jer neke stvari, poput dropout koji nasumicno iskljucuje neurone, rade samo u trening fazi, a za test ne rade nista,
# medjutim ako ga ukljucis za test, on ce da unakazi rezultate

#with torch.no_grad():
#    Y_ = model_raw.model(data2) #<--ne radi zbog dimenzija. srecom neko je vec pisao fju (utee/misc.py/eval_model).
# Oni tu i prvo normalizuju sliku

acc1, acc5 = misc.eval_model(
    model_raw,
    ds_val)  # kad ja tamo, a ono ne radi. vraca prazne tenzore. izdebaguj!!
# kad proradi ( XD ) tu ces imati tacnost originalne mreze.

#%% quantize weights

bits = 8  # ukupno bitova

quantized_weights = []
for layer in w:
    sf = 4
    temp = quant.linear_quantize(layer, sf, bits)
    quantized_weights.append(temp)

#ucitaj nove tezine, tj napravi novu mrezu kvantizovanu
model_q = model_raw
    print('Starting second step!')
else:
    args.epochs = 8
    print('Starting first step!')

for epoch in range(args.start_epoch, args.epochs):
    #if args.distributed:
    #    train_sampler.set_epoch(epoch)
    #adjust_learning_rate(optimizer, epoch)
    # train for one epoch
    train_mode(train_ds, model_raw, criterion, optimizer, epoch, args, masks,
               masks_amul, threshold)

    # evaluate on validation set
    prec1, prec5 = misc.eval_model(model_raw,
                                   val_ds,
                                   ngpu=args.ngpu,
                                   is_imagenet=is_imagenet)

    print(' * Prec@1 {top1:.3f} Prec@5 {top5:.3f}'.format(top1=prec1 * 100,
                                                          top5=prec5 * 100))
    # remember best prec@1 and save checkpoint
    is_best = prec1 > best_prec1
    best_prec1 = max(prec1, best_prec1)
    save_checkpoint(
        {
            'epoch': epoch + 1,
            'arch': args.type,
            'state_dict': model_raw.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
            'masks': masks,
Ejemplo n.º 12
0
model_raw.cuda()
model_raw.eval()

model_new = compress.CompressedModel(model_raw,
                                     input_scale=255,
                                     act_bits=params['act_bits'],
                                     weight_bits=params['weight_bits'])
model_new = model_new.cuda()
print(model_new)

val_ds = ds_fetcher(params['batch_size'],
                    data_root=params['data_dir'],
                    train=False)
acc1, acc5 = misc.eval_model(model_new,
                             val_ds,
                             ngpu=1,
                             n_sample=params['n_sample'],
                             is_imagenet=False)
print("FP accuracy Top1: %g Top5: %g" % (acc1, acc5))

model_new.quantize_params()
acc1, acc5 = misc.eval_model(model_new,
                             val_ds,
                             ngpu=1,
                             n_sample=params['n_sample'],
                             is_imagenet=False)
print("Quant accuracy Top1: %g Top5: %g" % (acc1, acc5))
print(acc1, acc5)

print(model_new)
new_file = os.path.join(
    #if args.distributed:
    #    train_sampler.set_epoch(epoch)
    #adjust_learning_rate(optimizer, epoch)
    # train for one epoch
    import time
    time.sleep(3)
    print('>>>>epoch: ' + str(epoch) + '\n')
    train_mode(train_ds, model_raw, rnn_ins, target_rnn, rnn_optimizer, GAMMA,
               memory, criterion, optimizer, epoch, args)

    if 1:
        acc1, acc5 = misc.eval_model(model_raw,
                                     rnn_ins,
                                     target_rnn,
                                     rnn_optimizer,
                                     GAMMA,
                                     memory,
                                     val_ds,
                                     ngpu=args.ngpu,
                                     is_imagenet=is_imagenet)

        res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format(
            args.type, args.quant_method, args.param_bits, args.bn_bits,
            args.fwd_bits, args.overflow_rate, acc1, acc5)
        print(res_str)
    # evaluate on validation set
    '''
    prec1, prec5 = misc.eval_model(model_raw, rnn_ins, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet)

    print(' * Prec@1 {top1:.3f} Prec@5 {top5:.3f}'
          .format(top1=prec1*100, top5=prec5*100))
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch SVHN Example')
    parser.add_argument('--type', default='cifar10', help='|'.join(selector.known_models))
    parser.add_argument('--quant_method', default='linear', help='linear|minmax|log|tanh')
    parser.add_argument('--batch_size', type=int, default=100, help='input batch size for training (default: 64)')
    parser.add_argument('--gpu', default=None, help='index of gpus to use')
    parser.add_argument('--ngpu', type=int, default=8, help='number of gpus to use')
    parser.add_argument('--seed', type=int, default=117, help='random seed (default: 1)')
    parser.add_argument('--model_root', default='~/.torch/models/', help='folder to save the model')
    parser.add_argument('--data_root', default='/data/public_dataset/pytorch/', help='folder to save the model')
    parser.add_argument('--logdir', default='log/default', help='folder to save to the log')

    parser.add_argument('--input_size', type=int, default=224, help='input size of image')
    parser.add_argument('--n_sample', type=int, default=20, help='number of samples to infer the scaling factor')
    parser.add_argument('--param_bits', type=int, default=8, help='bit-width for parameters')
    parser.add_argument('--bn_bits', type=int, default=32, help='bit-width for running mean and std')
    parser.add_argument('--fwd_bits', type=int, default=8, help='bit-width for layer output')
    parser.add_argument('--overflow_rate', type=float, default=0.0, help='overflow rate')
    args = parser.parse_args()

    args.gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=args.ngpu, selected_gpus=args.gpu)
    args.ngpu = len(args.gpu)
    misc.ensure_dir(args.logdir)
    args.model_root = misc.expand_user(args.model_root)
    args.data_root = misc.expand_user(args.data_root)
    args.input_size = 299 if 'inception' in args.type else args.input_size
    assert args.quant_method in ['linear', 'minmax', 'log', 'tanh']
    print("=================FLAGS==================")
    for k, v in args.__dict__.items():
        print('{}: {}'.format(k, v))
    print("========================================")

    assert torch.cuda.is_available(), 'no cuda'
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # load model and dataset fetcher
    model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root)
    args.ngpu = args.ngpu if is_imagenet else 1

    # quantize parameters
    if args.param_bits < 32:
        state_dict = model_raw.state_dict()
        state_dict_quant = OrderedDict()
        sf_dict = OrderedDict()
        for k, v in state_dict.items():
            if 'running' in k:
                if args.bn_bits >=32:
                    print("Ignoring {}".format(k))
                    state_dict_quant[k] = v
                    continue
                else:
                    bits = args.bn_bits
            else:
                bits = args.param_bits

            if args.quant_method == 'linear':
                sf = bits - 1. - quant.compute_integral_part(v, overflow_rate=args.overflow_rate)
                v_quant  = quant.linear_quantize(v, sf, bits=bits)
            elif args.quant_method == 'log':
                v_quant = quant.log_minmax_quantize(v, bits=bits)
            elif args.quant_method == 'minmax':
                v_quant = quant.min_max_quantize(v, bits=bits)
            else:
                v_quant = quant.tanh_quantize(v, bits=bits)
            state_dict_quant[k] = v_quant
            print(k, bits)
        model_raw.load_state_dict(state_dict_quant)

    # quantize forward activation
    if args.fwd_bits < 32:
        model_raw = quant.duplicate_model_with_quant(model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate,
                                                     counter=args.n_sample, type=args.quant_method)
        print(model_raw)
        val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size)
        misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet)

    # eval model
    val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size)
    acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet)

    # print sf
    print(model_raw)
    res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format(
        args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5)
    print(res_str)
    with open('acc1_acc5.txt', 'a') as f:
        f.write(res_str + '\n')
Ejemplo n.º 15
0
            v_quant = quant.log_minmax_quantize(v, bits=bits)
        elif args.quant_method == 'minmax':
            v_quant = quant.min_max_quantize(v, bits=bits)
        else:
            v_quant = quant.tanh_quantize(v, bits=bits)
        state_dict_quant[k] = v_quant
        print(k, bits)
    model_raw.load_state_dict(state_dict_quant)

# quantize forward activation
if args.fwd_bits < 32:
    model_raw = quant.duplicate_model_with_quant(model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate,
                                                 counter=args.n_sample, type=args.quant_method)
    print(model_raw)
    val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size)
    misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet)

# eval model
val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size)
acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet)

# print sf
print(model_raw)
res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format(
    args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5)
print(res_str)
with open('acc1_acc5.txt', 'a') as f:
    f.write(res_str + '\n')