def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--config_file',
                        type=str,
                        default='',
                        help="config file")
    parser.add_argument('--stage',
                        type=str,
                        default='',
                        help="select the pruning stage")

    args = parser.parse_args()

    config = Config(args)

    use_cuda = True

    torch.manual_seed(1)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=64,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=1000,
                                              shuffle=True,
                                              **kwargs)

    model = None
    if config.arch == 'lenet_bn':
        model = LeNet_BN().to(device)
    elif config.arch == 'lenet':
        model = LeNet().to(device)
    elif config.arch == 'lenet_adv':
        model = LeNet_adv(config.width_multiplier).to(device)
    torch.cuda.set_device(config.gpu)
    model.cuda(config.gpu)

    config.model = model

    ADMM = None

    config.prepare_pruning()

    if config.admm:
        ADMM = admm.ADMM(config)

    criterion = CrossEntropyLossMaybeSmooth(smooth_eps=config.smooth_eps).cuda(
        config.gpu)
    config.smooth = config.smooth_eps > 0.0
    config.mixup = config.alpha > 0.0

    config.warmup = (not config.admm) and config.warmup_epochs > 0
    optimizer_init_lr = config.warmup_lr if config.warmup else config.lr

    if (config.optimizer == 'sgd'):
        optimizer = torch.optim.SGD(config.model.parameters(),
                                    optimizer_init_lr,
                                    momentum=0.9,
                                    weight_decay=1e-4)
    elif (config.optimizer == 'adam'):
        optimizer = torch.optim.Adam(config.model.parameters(),
                                     optimizer_init_lr)
    else:
        raise Exception("unknown optimizer")

    scheduler = None
    if config.lr_scheduler == 'cosine':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=config.epochs *
                                                         len(train_loader),
                                                         eta_min=4e-08)
    elif config.lr_scheduler == 'default':
        pass
    else:
        raise Exception("unknown lr scheduler")

    if config.load_model:
        # unlike resume, load model does not care optimizer status or start_epoch
        print('==> Loading from {}'.format(config.load_model))
        config.model.load_state_dict(
            torch.load(config.load_model,
                       map_location={'cuda:0': 'cuda:{}'.format(config.gpu)}))
        test(config, device, test_loader)

    global best_acc
    if config.resume:
        if os.path.isfile(config.resume):
            checkpoint = torch.load(config.resume)
            config.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                config.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(config.resume))

    if config.masked_retrain:
        # make sure small weights are pruned and confirm the acc
        print("<============masking both weights and gradients for retrain")
        admm.masking(config)
        print("<============testing sparsity before retrain")
        admm.test_sparsity(config)
        test(config, device, test_loader)
    if config.masked_progressive:
        admm.zero_masking(config)

    for epoch in range(0, config.epochs + 1):

        train(config, ADMM, device, train_loader, criterion, optimizer,
              scheduler, epoch)
        test(config, device, test_loader)
        save_checkpoint(
            config, {
                'epoch': epoch + 1,
                'arch': config.arch,
                'state_dict': config.model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict()
            })

    print('overall  best_acc is {}'.format(best_acc))

    if (config.save_model and config.admm):
        print('saving model {}'.format(config.save_model))
        torch.save(config.model.state_dict(), config.save_model)
Beispiel #2
0
def main_worker(gpu, ngpus_per_node, config):
    global best_acc1
    config.gpu = gpu

    if config.gpu is not None:
        print("Use GPU: {} for training".format(config.gpu))

    if config.distributed:
        if config.dist_url == "env://" and config.rank == -1:
            config.rank = int(os.environ["RANK"])
        if config.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            config.rank = config.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=config.dist_backend,
                                init_method=config.dist_url,
                                world_size=config.world_size,
                                rank=config.rank)
    # create model
    if config.pretrained:
        print("=> using pre-trained model '{}'".format(config.arch))

        model = models.__dict__[config.arch](pretrained=True)
        print(model)
        param_names = []
        module_names = []
        for name, W in model.named_modules():
            module_names.append(name)
        print(module_names)
        for name, W in model.named_parameters():
            param_names.append(name)
        print(param_names)
    else:
        print("=> creating model '{}'".format(config.arch))
        if config.arch == "alexnet_bn":
            model = AlexNet_BN()
            print(model)
            for i, (name, W) in enumerate(model.named_parameters()):
                print(name)
        else:
            model = models.__dict__[config.arch]()
            print(model)

    if config.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if config.gpu is not None:
            torch.cuda.set_device(config.gpu)
            model.cuda(config.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            config.batch_size = int(config.batch_size / ngpus_per_node)
            config.workers = int(config.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[config.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif config.gpu is not None:
        torch.cuda.set_device(config.gpu)
        model = model.cuda(config.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if config.arch.startswith('alexnet') or config.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:

            model = torch.nn.DataParallel(model).cuda()
    config.model = model
    # define loss function (criterion) and optimizer

    criterion = CrossEntropyLossMaybeSmooth(smooth_eps=config.smooth_eps).cuda(
        config.gpu)

    config.smooth = config.smooth_eps > 0.0
    config.mixup = config.alpha > 0.0

    # note that loading a pretrain model does not inherit optimizer info
    # will use resume to resume admm training
    if config.load_model:
        if os.path.isfile(config.load_model):
            if (config.gpu):
                model.load_state_dict(
                    torch.load(
                        config.load_model,
                        map_location={'cuda:0': 'cuda:{}'.format(config.gpu)}))
            else:
                model.load_state_dict(torch.load(config.load_model))
        else:
            print("=> no checkpoint found at '{}'".format(config.resume))

    config.prepare_pruning()

    nonzero = 0
    zero = 0
    for name, W in model.named_parameters():
        if name in config.conv_names:
            W = W.cpu().detach().numpy()
            zero += np.sum(W == 0)
            nonzero += np.sum(W != 0)
    total = nonzero + zero
    print('compression rate is {}'.format(total * 1.0 / nonzero))
    import sys
    sys.exit()

    # optionally resume from a checkpoint
    if config.resume:
        ## will add logic for loading admm variables
        if os.path.isfile(config.resume):
            print("=> loading checkpoint '{}'".format(config.resume))
            checkpoint = torch.load(config.resume)
            config.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']

            ADMM.ADMM_U = checkpoint['admm']['ADMM_U']
            ADMM.ADMM_Z = checkpoint['admm']['ADMM_Z']

            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                config.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(config.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(config.data, 'train')
    valdir = os.path.join(config.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if config.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=config.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=config.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=config.batch_size,
                                             shuffle=False,
                                             num_workers=config.workers,
                                             pin_memory=True)

    config.warmup = (not config.admm) and config.warmup_epochs > 0
    optimizer_init_lr = config.warmup_lr if config.warmup else config.lr

    optimizer = None
    if (config.optimizer == 'sgd'):
        optimizer = torch.optim.SGD(model.parameters(),
                                    optimizer_init_lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)
    elif (config.optimizer == 'adam'):
        optimizer = torch.optim.Adam(model.parameters(), optimizer_init_lr)

    scheduler = None
    if config.lr_scheduler == 'cosine':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=config.epochs *
                                                         len(train_loader),
                                                         eta_min=4e-08)
    elif config.lr_scheduler == 'default':
        # sets the learning rate to the initial LR decayed by gamma every 30 epochs"""
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=30 * len(train_loader),
                                              gamma=0.1)
    else:
        raise Exception("unknown lr scheduler")

    if config.warmup:
        scheduler = GradualWarmupScheduler(
            optimizer,
            multiplier=config.lr / config.warmup_lr,
            total_iter=config.warmup_epochs * len(train_loader),
            after_scheduler=scheduler)

    if False:
        validate(val_loader, criterion, config)
        return
    ADMM = None

    if config.verify:
        admm.masking(config)
        admm.test_sparsity(config)
        validate(val_loader, criterion, config)
        import sys
        sys.exit()
    if config.admm:
        ADMM = admm.ADMM(config)

    if config.masked_retrain:
        # make sure small weights are pruned and confirm the acc
        admm.masking(config)
        print("before retrain starts")
        admm.test_sparsity(config)
        validate(val_loader, criterion, config)
    if config.masked_progressive:
        admm.zero_masking(config)
    for epoch in range(config.start_epoch, config.epochs):
        if config.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch

        train(train_loader, config, ADMM, criterion, optimizer, scheduler,
              epoch)

        # evaluate on validation set
        acc1 = validate(val_loader, criterion, config)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if is_best and not config.admm:  # we don't need admm to have best validation acc
            print('saving new best model {}'.format(config.save_model))
            torch.save(model.state_dict(), config.save_model)

        if not config.multiprocessing_distributed or (
                config.multiprocessing_distributed
                and config.rank % ngpus_per_node == 0):
            save_checkpoint(
                config, {
                    'admm': {},
                    'epoch': epoch + 1,
                    'arch': config.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
    # save last model for admm, optimizer detail is not necessary
    if config.save_model and config.admm:
        print('saving model {}'.format(config.save_model))
        torch.save(model.state_dict(), config.save_model)
    if config.masked_retrain:
        print("after masked retrain")
        admm.test_sparsity(config)
            global best_acc
            if top1.avg.item() > best_acc and not config.admm:
                best_acc = top1.avg.item()
                print('new best_acc is {top1.avg:.3f}'.format(top1=top1))
                print('saving model {}'.format(config.save_model))
                torch.save(config.model.state_dict(), config.save_model)

        return top1.avg

    if config.admm:
        validate(testloader, criterion, config)

    if config.masked_retrain:
        # make sure small weights are pruned and confirm the acc
        print("<============masking both weights and gradients for retrain")
        admm.masking(config)
        print("<============testing sparsity before retrain")
        admm.test_sparsity(config)
        validate(testloader, criterion, config)
    if config.masked_progressive:
        admm.zero_masking(config)

    for epoch in range(start_epoch, start_epoch + config.epochs):
        train(trainloader, criterion, optimizer, epoch, config)
        validate(testloader, criterion, config)

    ####LOG HERE###
    if config.logging:
        logger.info(f'---Final Results---')
        logger.info(f'overall best_acc is {best_acc}')
def run_admm(data_name,data_set,data_end_index,fea_dict,lab_dict,arch_dict,cfg_file,processed_first,next_config_file,ADMM,masks,ep,ck):

    # This function processes the current chunk using the information in cfg_file. In parallel, the next chunk is load into the CPU memory

    # Reading chunk-specific cfg file (first argument-mandatory file)
    if not(os.path.exists(cfg_file)):
         sys.stderr.write('ERROR: The config file %s does not exist!\n'%(cfg_file))
         sys.exit(0)
    else:
        config = configparser.ConfigParser()
        config.read(cfg_file)

    # Setting torch seed
    seed=int(config['exp']['seed'])
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)


    # Reading config parameters
    output_folder=config['exp']['out_folder']
    multi_gpu=strtobool(config['exp']['multi_gpu'])

    to_do=config['exp']['to_do']
    info_file=config['exp']['out_info']

    model=config['model']['model'].split('\n')

    forward_outs=config['forward']['forward_out'].split(',')
    forward_normalize_post=list(map(strtobool,config['forward']['normalize_posteriors'].split(',')))
    forward_count_files=config['forward']['normalize_with_counts_from'].split(',')
    require_decodings=list(map(strtobool,config['forward']['require_decoding'].split(',')))

    use_cuda=strtobool(config['exp']['use_cuda'])
    save_gpumem=strtobool(config['exp']['save_gpumem'])
    is_production=strtobool(config['exp']['production'])

    if to_do=='train':
        batch_size=int(config['batches']['batch_size_train'])

    if to_do=='valid':
        batch_size=int(config['batches']['batch_size_valid'])

    if to_do=='forward':
        batch_size=1


    # ***** Reading the Data********
    if processed_first:  # admm初始化的工作,咱们都在这儿做了吧

        # Reading all the features and labels for this chunk
        shared_list=[]

        p=threading.Thread(target=read_lab_fea, args=(cfg_file,is_production,shared_list,output_folder,))
        p.start()
        p.join()

        data_name=shared_list[0]
        data_end_index=shared_list[1]
        fea_dict=shared_list[2]
        lab_dict=shared_list[3]
        arch_dict=shared_list[4]
        data_set=shared_list[5]



        # converting numpy tensors into pytorch tensors and put them on GPUs if specified
        if not(save_gpumem) and use_cuda:
           data_set=torch.from_numpy(data_set).float().cuda()
        else:
           data_set=torch.from_numpy(data_set).float()




    # Reading all the features and labels for the next chunk
    shared_list=[]
    p=threading.Thread(target=read_lab_fea, args=(next_config_file,is_production,shared_list,output_folder,))
    p.start()

    # Reading model and initialize networks
    inp_out_dict=fea_dict

    [nns,costs]=model_init(inp_out_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do)

    if processed_first:
        ADMM = admm.ADMM(config, nns)

    # optimizers initialization
    optimizers=optimizer_init(nns,config,arch_dict)


    # pre-training and multi-gpu init
    for net in nns.keys():
        pt_file_arch=config[arch_dict[net][0]]['arch_pretrain_file']

        if pt_file_arch!='none':
            checkpoint_load = torch.load(pt_file_arch)
            nns[net].load_state_dict(checkpoint_load['model_par'])
            optimizers[net].load_state_dict(checkpoint_load['optimizer_par'])
            optimizers[net].param_groups[0]['lr']=float(config[arch_dict[net][0]]['arch_lr']) # loading lr of the cfg file for pt

        if multi_gpu:
            nns[net] = torch.nn.DataParallel(nns[net])


    if to_do=='forward':

        post_file={}
        for out_id in range(len(forward_outs)):
            if require_decodings[out_id]:
                out_file=info_file.replace('.info','_'+forward_outs[out_id]+'_to_decode.ark')
            else:
                out_file=info_file.replace('.info','_'+forward_outs[out_id]+'.ark')
            post_file[forward_outs[out_id]]=open_or_fd(out_file,output_folder,'wb')


    if strtobool(config['exp']['retrain']) and processed_first and strtobool(config['exp']['masked_progressive']):
        # make sure small weights are pruned and confirm the acc
        print ("<============masking both weights and gradients for retrain")
        masks = admm.masking(config, ADMM, nns)
        print("<============all masking statistics")
        masks = admm.zero_masking(config, nns)
        print ("<============testing sparsity before retrain")
        admm.test_sparsity(config, nns, ADMM)


    if strtobool(config['exp']['masked_progressive']) and processed_first and strtobool(config['exp']['admm']):
        masks = admm.zero_masking(config, nns)


    # check automatically if the model is sequential
    seq_model=is_sequential_dict(config,arch_dict)

    # ***** Minibatch Processing loop********
    if seq_model or to_do=='forward':
        N_snt=len(data_name)
        N_batches=int(N_snt/batch_size)
    else:
        N_ex_tr=data_set.shape[0]
        N_batches=int(N_ex_tr/batch_size)


    beg_batch=0
    end_batch=batch_size

    snt_index=0
    beg_snt=0


    start_time = time.time()

    # array of sentence lengths
    arr_snt_len=shift(shift(data_end_index, -1,0)-data_end_index,1,0)
    arr_snt_len[0]=data_end_index[0]


    loss_sum=0
    err_sum=0

    inp_dim=data_set.shape[1]
    for i in range(N_batches):

        max_len=0

        if seq_model:

         max_len=int(max(arr_snt_len[snt_index:snt_index+batch_size]))
         inp= torch.zeros(max_len,batch_size,inp_dim).contiguous()


         for k in range(batch_size):

                  snt_len=data_end_index[snt_index]-beg_snt
                  N_zeros=max_len-snt_len

                  # Appending a random number of initial zeros, tge others are at the end.
                  N_zeros_left=random.randint(0,N_zeros)

                  # randomizing could have a regularization effect
                  inp[N_zeros_left:N_zeros_left+snt_len,k,:]=data_set[beg_snt:beg_snt+snt_len,:]

                  beg_snt=data_end_index[snt_index]
                  snt_index=snt_index+1

        else:
            # features and labels for batch i
            if to_do!='forward':
                inp= data_set[beg_batch:end_batch,:].contiguous()
            else:
                snt_len=data_end_index[snt_index]-beg_snt
                inp= data_set[beg_snt:beg_snt+snt_len,:].contiguous()
                beg_snt=data_end_index[snt_index]
                snt_index=snt_index+1

        # use cuda
        if use_cuda:
            inp=inp.cuda()

        if to_do=='train':
            # Forward input, with autograd graph active
            outs_dict=forward_model(fea_dict,lab_dict,arch_dict,model,nns,costs,inp,inp_out_dict,max_len,batch_size,to_do,forward_outs)

            if strtobool(config['exp']['admm']):
                batch_idx = i + ck
                admm.admm_update(config,ADMM,nns, ep,batch_idx)   # update Z and U
                outs_dict['loss_final'],admm_loss,mixed_loss = admm.append_admm_loss(config,ADMM,nns,outs_dict['loss_final']) # append admm losss

            for opt in optimizers.keys():
                optimizers[opt].zero_grad()

            if strtobool(config['exp']['admm']):
                mixed_loss.backward()
            else:
                outs_dict['loss_final'].backward()

            if strtobool(config['exp']['masked_progressive']) and not strtobool(config['exp']['retrain']):
                with torch.no_grad():
                    for net in nns.keys():
                        for name, W in nns[net].named_parameters():
                            if name in masks:
                                W.grad *=masks[name]
                        break

            if strtobool(config['exp']['retrain']):
                with torch.no_grad():
                    for net in nns.keys():
                        for name, W in nns[net].named_parameters():
                            if name in masks:
                                W.grad *=masks[name]
                        break

            # Gradient Clipping (th 0.1)
            #for net in nns.keys():
            #    torch.nn.utils.clip_grad_norm_(nns[net].parameters(), 0.1)


            for opt in optimizers.keys():
                if not(strtobool(config[arch_dict[opt][0]]['arch_freeze'])):
                    optimizers[opt].step()
        else:
            with torch.no_grad(): # Forward input without autograd graph (save memory)
                outs_dict=forward_model(fea_dict,lab_dict,arch_dict,model,nns,costs,inp,inp_out_dict,max_len,batch_size,to_do,forward_outs)


        if to_do=='forward':
            for out_id in range(len(forward_outs)):

                out_save=outs_dict[forward_outs[out_id]].data.cpu().numpy()

                if forward_normalize_post[out_id]:
                    # read the config file
                    counts = load_counts(forward_count_files[out_id])
                    out_save=out_save-np.log(counts/np.sum(counts))

                # save the output
                write_mat(output_folder,post_file[forward_outs[out_id]], out_save, data_name[i])
        else:
            loss_sum=loss_sum+outs_dict['loss_final'].detach()
            err_sum=err_sum+outs_dict['err_final'].detach()

        # update it to the next batch
        beg_batch=end_batch
        end_batch=beg_batch+batch_size

        # Progress bar
        if to_do == 'train':
          status_string="Training | (Batch "+str(i+1)+"/"+str(N_batches)+")"+" | L:" +str(round(loss_sum.cpu().item()/(i+1),3))
          if i==N_batches-1:
             status_string="Training | (Batch "+str(i+1)+"/"+str(N_batches)+")"


        if to_do == 'valid':
          status_string="Validating | (Batch "+str(i+1)+"/"+str(N_batches)+")"
        if to_do == 'forward':
          status_string="Forwarding | (Batch "+str(i+1)+"/"+str(N_batches)+")"

        progress(i, N_batches, status=status_string)

    elapsed_time_chunk=time.time() - start_time

    loss_tot=loss_sum/N_batches
    err_tot=err_sum/N_batches

    # clearing memory
    del inp, outs_dict, data_set

    # save the model
    if to_do=='train':


         for net in nns.keys():
             checkpoint={}
             if multi_gpu:
                checkpoint['model_par']=nns[net].module.state_dict()
             else:
                checkpoint['model_par']=nns[net].state_dict()

             checkpoint['optimizer_par']=optimizers[net].state_dict()

             out_file=info_file.replace('.info','_'+arch_dict[net][0]+'.pkl')
             torch.save(checkpoint, out_file)

    if to_do=='forward':
        for out_name in forward_outs:
            post_file[out_name].close()



    # Write info file
    with open(info_file, "w") as text_file:
        text_file.write("[results]\n")
        if to_do!='forward':
            text_file.write("loss=%s\n" % loss_tot.cpu().numpy())
            text_file.write("err=%s\n" % err_tot.cpu().numpy())
        text_file.write("elapsed_time_chunk=%f\n" % elapsed_time_chunk)

    text_file.close()


    # Getting the data for the next chunk (read in parallel)
    p.join()
    data_name=shared_list[0]
    data_end_index=shared_list[1]
    fea_dict=shared_list[2]
    lab_dict=shared_list[3]
    arch_dict=shared_list[4]
    data_set=shared_list[5]


    # converting numpy tensors into pytorch tensors and put them on GPUs if specified
    if not(save_gpumem) and use_cuda:
       data_set=torch.from_numpy(data_set).float().cuda()
    else:
       data_set=torch.from_numpy(data_set).float()


    return [data_name,data_set,data_end_index,fea_dict,lab_dict,arch_dict,masks,ADMM]
Beispiel #5
0
lr = args.lr
# best_val_loss = None
stage = 'train'

# At any point you can hit Ctrl + C to break out of training early.

try:
    if args.admm:
        stage = 'admm'
        args.masked_retrain = True
        lr = args.lr / 4
        train_procedure(lr)
    if args.masked_retrain:
        stage = 'masked_retrain'
        lr = args.lr 
        admm.masking(args, config, model)
        admm.test_sparsity(args, config, model)
        train_procedure(lr)
        admm.test_sparsity(args, config, model)

except KeyboardInterrupt:
    print('-' * 89)
    print('Exiting from training early')

# Load the best saved model.
with open(args.save, 'rb') as f:
    model = torch.load(f)
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    model.rnn.flatten_parameters()
Beispiel #6
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--config_file', type=str, default='', help ="config file")
    parser.add_argument('--stage', type=str, default='', help ="select the pruning stage")

    
    args = parser.parse_args()

    config = Config(args)
    
    use_cuda = True


    init = Init_Func(config.init_func)
    

    torch.manual_seed(config.random_seed)
    
    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor()
                           #transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=64, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor()
                           #transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=1000, shuffle=True, **kwargs)


    

    model = None
    if config.arch == 'lenet_bn':
        model = LeNet_BN().to(device)
    elif config.arch == 'lenet':
        model = LeNet().to(device)
    elif config.arch == 'lenet_adv':
        model = LeNet_adv(w=config.width_multiplier).to(device)
    if config.arch not in model_names:
        raise Exception("unknown model architecture")

    ### for initialization experiments
    
    for name,W in model.named_parameters():
        if 'conv' in name and 'bias' not in name:
            print ('initialization uniform')        
            #W.data = torch.nn.init.uniform_(W.data)
            W.data = init.init(W.data)
    model = AttackPGD(model,config)
    #### loading initialization
    '''
    ### for lottery tickets experiments
    read_dict = np.load('lenet_adv_retrained_w16_1_cut.pt_init.npy').item()
    for name,W in model.named_parameters():
        if name not in read_dict:
            continue
        print (name)

        #print ('{} has shape {}'.format(name,read_dict[name].shape))
        print (read_dict[name].shape)
        W.data = torch.from_numpy(read_dict[name])
    '''
    config.model = model



    
    if config.load_model:
        # unlike resume, load model does not care optimizer status or start_epoch
        print('==> Loading from {}'.format(config.load_model))
        config.model.load_state_dict(torch.load(config.load_model, map_location=lambda storage, loc: storage))
        #config.model.load_state_dict(torch.load(config.load_model,map_location = {'cuda:0':'cuda:{}'.format(config.gpu)}))
                

    torch.cuda.set_device(config.gpu)
    config.model.cuda(config.gpu)
    test(config,  device, test_loader)    
    ADMM = None

    config.prepare_pruning()
    
    if config.admm:
        ADMM = admm.ADMM(config)

    optimizer = None
    if (config.optimizer == 'sgd'):
        optimizer = torch.optim.SGD(config.model.parameters(), config.lr,
                                momentum=0.9,
                                    weight_decay=1e-6)

    elif (config.optimizer =='adam'):
        optimizer = torch.optim.Adam(config.model.parameters(),config.lr)    

    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.epochs*len(train_loader),eta_min=4e-08)

        
        
                      
    if config.resume:
        if os.path.isfile(config.resume):
            checkpoint = torch.load(config.resume)
            config.start_epoch = checkpoint['epoch']
            best_adv_acc = checkpoint['best_adv_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(config.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(config.resume))            

        
    if config.masked_retrain:
        # make sure small weights are pruned and confirm the acc
        print ("<============masking both weights and gradients for retrain")    
        admm.masking(config)

        print ("<============testing sparsity before retrain")
        admm.test_sparsity(config)
        test(config,  device, test_loader)        
    if config.masked_progressive:
        admm.zero_masking(config)

        
    for epoch in range(0, config.epochs+1):

        if config.admm:
            admm.admm_adjust_learning_rate(optimizer, epoch, config)
        else:
            if config.lr_scheduler == 'cosine':
                scheduler.step()
            elif config.lr_scheduler == 'sgd':
                if epoch == 20:
                    config.lr/=10
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = config.lr
            else:
                pass # it uses adam
            
        train(config,ADMM,device, train_loader, optimizer, epoch)
        test(config, device, test_loader)
        

    admm.test_sparsity(config)
    test(config,  device, test_loader)    
    if config.save_model and config.admm:
        print ('saving model {}'.format(config.save_model))
        torch.save(config.model.state_dict(),config.save_model)