Exemple #1
0
def changeDues(msg):
    typeCheck(msg, {'cardIds': list, 'minDue': int, 'maxDue': int})
    cids = msg['cardIds']
    with Col() as col:
        checkpoint(col, 'Change card dues')
        minDue = (msg['minDue'] - col.crt) // 86400
        maxDue = (msg['maxDue'] - col.crt) // 86400
        for cid in cids:
            card = col.getCard(cid)
            oldIvl, oldDue = card.ivl, card.due

            if card.queue == 0 or card.type == 0:  # Ignore for new cards
                continue

            # TODO: Properly calculate the next interval using exponential learning curve
            oldDue = card.due
            newDue = random.randint(minDue, maxDue)
            print(oldDue, newDue)

            card.type = 2
            card.queue = 2
            card.due = newDue
            card.ivl += newDue - oldDue
            card.flush()
        col.reset()
        return emit.emitResult(True)
Exemple #2
0
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    all_correct = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            outputs = net(inputs)
            loss = loss_func(outputs, targets)
            step_loss = loss.item()
            if(args.private):
                step_loss /= inputs.shape[0]

            test_loss += step_loss 
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct_idx = predicted.eq(targets.data).cpu()
            all_correct += correct_idx.numpy().tolist()
            correct += correct_idx.sum()

        acc = 100.*float(correct)/float(total)
        print('test loss:%.5f'%(test_loss/(batch_idx+1)), 'test acc:', acc)
        ## Save checkpoint.
        if acc > best_acc:
            best_acc = acc
            checkpoint(net, acc, epoch, args.sess)

    return (test_loss/batch_idx, acc)
Exemple #3
0
def resetScheduling(msg):
    typeCheck(msg, {'cardIds': list})
    cids = msg['cardIds']
    with Col() as col:
        checkpoint(col, "Reset scheduling and learning on selected cards")
        col.sched.resetCards(cids)
        col.sched.removeLrn(cids)
        return emit.emitResult(True)
Exemple #4
0
def main(hparams):

    # Set up some stuff accoring to hparams
    hparams.n_input = np.prod(hparams.image_shape)
    utils.set_num_measurements(hparams)
    utils.print_hparams(hparams)

    # get inputs
    data_dict = model_input(hparams)

    estimator = utils.get_estimator(hparams, 'vae')
    utils.setup_checkpointing(hparams)
    measurement_losses, l2_losses = utils.load_checkpoints(hparams)

    h_hats_dict = {model_type: {} for model_type in hparams.model_types}
    for key, x in data_dict.iteritems():
        if not hparams.not_lazy:
            # If lazy, first check if the image has already been
            # saved before by *all* estimators. If yes, then skip this image.
            save_paths = utils.get_save_paths(hparams, key)
            is_saved = all([
                os.path.isfile(save_path) for save_path in save_paths.values()
            ])
            if is_saved:
                continue

        # Get Rx data
        Rx = data_dict[key]['Rx_data']
        Tx = data_dict[key]['Rx_data']
        H = data_dict[key]['H_data']

        # Construct estimates using each estimator
        h_hat = estimator(Tx, Rx, hparams)

        # Save the estimate
        h_hats_dict['vae'][key] = h_hat

        # Compute and store measurement and l2 loss
        measurement_losses['vae'][key] = utils.get_measurement_loss(
            h_hat, Tx, Rx)
        l2_losses['vae'][key] = utils.get_l2_loss(h_hat, H)

        print 'Processed upto image {0} / {1}'.format(key + 1, len(data_dict))

        # Checkpointing
        if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter
                                      == 0):
            utils.checkpoint(key, h_hat, measurement_losses, l2_losses,
                             save_image, hparams)
            print '\nProcessed and saved first ', key + 1, 'channels\n'
Exemple #5
0
def test(epoch, train_loss):
    model.eval()
    projector.eval()

    # Save at the last epoch #
    if epoch == args.epoch - 1 and args.local_rank % ngpus_per_node == 0:
        checkpoint(model, train_loss, epoch, args, optimizer)
        checkpoint(projector,
                   train_loss,
                   epoch,
                   args,
                   optimizer,
                   save_name_add='_projector')

    # Save at every 100 epoch #
    elif epoch % 100 == 0 and args.local_rank % ngpus_per_node == 0:
        checkpoint(model,
                   train_loss,
                   epoch,
                   args,
                   optimizer,
                   save_name_add='_epoch_' + str(epoch))
        checkpoint(projector,
                   train_loss,
                   epoch,
                   args,
                   optimizer,
                   save_name_add=('_projector_epoch_' + str(epoch)))
Exemple #6
0
    def encode(self, x):
        loglikelihood_accum = 0

        zout = []
        z = x.clone()
        z.requires_grad = True

        for L in range(self.num_blocks):
            # squeeze - ensures the channel dimension is divisible by 2
            z = self.squeezes[L](z)

            for K in range(self.num_layers_per_block):
                # permute
                z, plogdet = self.permutes[L][K](z)

                if self.checkpoint_gradients:
                    z, logdet = utils.checkpoint(self.flows[L][K], z)
                else:
                    z, logdet = self.flows[L][K](z)

                loglikelihood_accum = loglikelihood_accum + (logdet + plogdet)

                del plogdet
                del logdet

            # split hierarchical
            # this operation returns two non-contiguous blocks
            # with references to the original z tensor
            # if we do not call .contiguous() (or .clone()) on BOTH z1 and z2,
            # then the entire z tensor must be kept around
            # the del operators just do a little cleanup to avoid a (very) slight memory bump
            z1, z2 = torch.chunk(z, 2, dim=1)
            z1 = z1.contiguous()
            z2 = z2.contiguous()
            zout.append(z1)
            del z
            z = z2
            del z2

        zout.append(z)

        return zout, loglikelihood_accum
Exemple #7
0
def main(hparams):
    # Set up some stuff according to hparams
    hparams.n_input = np.prod(hparams.image_shape)
    maxiter = hparams.max_outer_iter
    utils.print_hparams(hparams)

    # get inputs
    xs_dict = model_input(hparams)

    estimators = utils.get_estimators(hparams)
    utils.setup_checkpointing(hparams)
    measurement_losses, l2_losses = utils.load_checkpoints(hparams)

    x_hats_dict = {'dcgan' : {}}
    x_batch_dict = {}
    for key, x in xs_dict.iteritems():
        if hparams.lazy:
            # If lazy, first check if the image has already been
            # saved before by *all* estimators. If yes, then skip this image.
            save_paths = utils.get_save_paths(hparams, key)
            is_saved = all([os.path.isfile(save_path) for save_path in save_paths.values()])
            if is_saved:
                continue

        x_batch_dict[key] = x
        if len(x_batch_dict) < hparams.batch_size:
            continue

        # Reshape input
        x_batch_list = [x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()]
        x_batch = np.concatenate(x_batch_list)

        # Construct measurements
        A_outer = utils.get_outer_A(hparams)

        y_batch_outer=np.matmul(x_batch, A_outer)


        x_main_batch = 0.0 * x_batch
        z_opt_batch = np.random.randn(hparams.batch_size, 100)
        for k in range(maxiter):

            x_est_batch=x_main_batch + hparams.outer_learning_rate*(np.matmul((y_batch_outer-np.matmul(x_main_batch,A_outer)),A_outer.T))



            estimator = estimators['dcgan']
            x_hat_batch,z_opt_batch = estimator(x_est_batch,z_opt_batch, hparams)
            x_main_batch=x_hat_batch


        for i, key in enumerate(x_batch_dict.keys()):
            x = xs_dict[key]
            y = y_batch_outer[i]
            x_hat = x_hat_batch[i]

            # Save the estimate
            x_hats_dict['dcgan'][key] = x_hat

            # Compute and store measurement and l2 loss
            measurement_losses['dcgan'][key] = utils.get_measurement_loss(x_hat, A_outer, y)
            l2_losses['dcgan'][key] = utils.get_l2_loss(x_hat, x)
        print 'Processed upto image {0} / {1}'.format(key+1, len(xs_dict))

        # Checkpointing
        if (hparams.save_images) and ((key+1) % hparams.checkpoint_iter == 0):
            utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams)
            #x_hats_dict = {'dcgan' : {}}
            print '\nProcessed and saved first ', key+1, 'images\n'

        x_batch_dict = {}

    # Final checkpoint
    if hparams.save_images:
        utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, save_image, hparams)
        print '\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict))

    if hparams.print_stats:
        for model_type in hparams.model_types:
            print model_type
            mean_m_loss = np.mean(measurement_losses[model_type].values())
            mean_l2_loss = np.mean(l2_losses[model_type].values())
            print 'mean measurement loss = {0}'.format(mean_m_loss)
            print 'mean l2 loss = {0}'.format(mean_l2_loss)

    if hparams.image_matrix > 0:
        utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams)

    # Warn the user that some things were not processsed
    if len(x_batch_dict) > 0:
        print '\nDid NOT process last {} images because they did not fill up the last batch.'.format(len(x_batch_dict))
        print 'Consider rerunning lazily with a smaller batch size.'
Exemple #8
0
def main(hparams):
    hparams.n_input = np.prod(hparams.image_shape)
    maxiter = hparams.max_outer_iter
    utils.print_hparams(hparams)
    xs_dict = model_input(hparams)
    estimators = utils.get_estimators(hparams)
    utils.setup_checkpointing(hparams)
    measurement_losses, l2_losses = utils.load_checkpoints(hparams)
    x_hats_dict = {'dcgan': {}}
    x_batch_dict = {}
    for key, x in xs_dict.iteritems():
        x_batch_dict[key] = x
        if len(x_batch_dict) < hparams.batch_size:
            continue
        x_coll = [
            x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()
        ]
        x_batch = np.concatenate(x_coll)
        A_outer = utils.get_outer_A(hparams)
        # 1bitify
        y_batch_outer = np.sign(np.matmul(x_batch, A_outer))

        x_main_batch = 0.0 * x_batch
        z_opt_batch = np.random.randn(hparams.batch_size, 100)
        for k in range(maxiter):
            x_est_batch = x_main_batch + hparams.outer_learning_rate * (
                np.matmul(
                    (y_batch_outer -
                     np.sign(np.matmul(x_main_batch, A_outer))), A_outer.T))
            estimator = estimators['dcgan']
            x_hat_batch, z_opt_batch = estimator(x_est_batch, z_opt_batch,
                                                 hparams)
            x_main_batch = x_hat_batch

        for i, key in enumerate(x_batch_dict.keys()):
            x = xs_dict[key]
            y = y_batch_outer[i]
            x_hat = x_hat_batch[i]
            x_hats_dict['dcgan'][key] = x_hat
            measurement_losses['dcgan'][key] = utils.get_measurement_loss(
                x_hat, A_outer, y)
            l2_losses['dcgan'][key] = utils.get_l2_loss(x_hat, x)
        print 'Processed upto image {0} / {1}'.format(key + 1, len(xs_dict))
        if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter
                                      == 0):
            utils.checkpoint(x_hats_dict, measurement_losses, l2_losses,
                             save_image, hparams)
            print '\nProcessed and saved first ', key + 1, 'images\n'

        x_batch_dict = {}

    if hparams.save_images:
        utils.checkpoint(x_hats_dict, measurement_losses, l2_losses,
                         save_image, hparams)
        print '\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict))

    if hparams.print_stats:
        for model_type in hparams.model_types:
            print model_type
            mean_m_loss = np.mean(measurement_losses[model_type].values())
            mean_l2_loss = np.mean(l2_losses[model_type].values())
            print 'mean measurement loss = {0}'.format(mean_m_loss)
            print 'mean l2 loss = {0}'.format(mean_l2_loss)

    if hparams.image_matrix > 0:
        utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams)

    # Warn the user that some things were not processsed
    if len(x_batch_dict) > 0:
        print '\nDid NOT process last {} images because they did not fill up the last batch.'.format(
            len(x_batch_dict))
        print 'Consider rerunning lazily with a smaller batch size.'
Exemple #9
0
import torch

import utils
from option import args  #set the args in option.py
from data import data
from trainer import Trainer

if __name__ == '__main__':
    # set the random seed, so the later rand func can return reproducible results
    torch.manual_seed(args.seed)
    checkpoint = utils.checkpoint(args)  # log related

    if checkpoint.ok:
        my_loader = data(args).get_loader()  # init DataLoader
        t = Trainer(my_loader, checkpoint, args)
        while not t.terminate():
            t.train()
            t.test()

        checkpoint.done()
Exemple #10
0
def main():
    start_epoch = 0
    best_prec1 = 0.0

    seed = np.random.randint(10000)

    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

    if args.gpus is not None:
        device = torch.device("cuda:{}".format(args.gpus[0]))
        cudnn.benchmark = False
        cudnn.deterministic = True
        cudnn.enabled = True
    else:
        device = torch.device("cpu")

    now = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
    if args.mission is not None:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{args.mission}/{now}'
        elif 'resnet20' == args.arch:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}/{args.mission}/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{args.mission}/{now}'
    else:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{now}'

    _make_dir(args.job_dir)
    ckpt = utils.checkpoint(args)
    print_logger = utils.get_logger(os.path.join(args.job_dir, "logger.log"))
    utils.print_params(vars(args), print_logger.info)
    log_file = os.path.join(args.job_dir, 'search_log.csv')
    writer_train = SummaryWriter(args.job_dir + '/run/train')
    writer_test = SummaryWriter(args.job_dir + '/run/test')

    ## hyperparameters settings ##
    n_layers = (args.num_layers - 2) * 2
    unit_k_bits = int(args.k_bits)
    kbits_list = [unit_k_bits for i in range(n_layers)]
    print_logger.info(f'k_bits_list {kbits_list}')

    # Data loading
    print('=> Preparing data..')

    if args.dataset in ['cifar10', 'cifar100', 'mnist']:
        IMAGE_SIZE = 32
    else:
        IMAGE_SIZE = 224

    if args.dataset == 'imagenet':
        # train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        # val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        train_data = get_imagenet_iter_torch(type='train',
                                             image_dir=args.base_data_dir,
                                             batch_size=args.train_batch_size,
                                             num_threads=args.workers,
                                             crop=IMAGE_SIZE,
                                             device_id=0,
                                             num_gpus=1)

    elif args.dataset == 'cifar10':
        train_transform, test_transform = utils._data_transforms_cifar10(
            cutout=args.cutout)
        train_data = torchvision.datasets.CIFAR10(args.data_dir,
                                                  train=True,
                                                  transform=train_transform,
                                                  download=True)
        # test_data = torchvision.datasets.CIFAR10(args.data_dir,train=False, transform=test_transform, download=True)
        # train_loader = get_cifar_iter_dali(type='train', image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers)
        # val_loader = get_cifar_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers)

    # Create model
    # Create model
    print('=> Building model...')
    if args.dataset == 'cifar10' or args.dataset == 'mnist':
        num_classes = 10
        train_data_length = 50000
        eval_data_length = 10000
    elif args.dataset == 'imagenet':
        num_classes = 1000
        train_data_length = 50000
        eval_data_length = 10000

    if args.arch == 'mobilenetv2':
        model_config = {
            'k_bits': kbits_list,
            'num_layers': args.num_layers,
            'pre_k_bits': args.pre_k_bits,
            'ratio': args.ratio,
            'width_mult': args.width_mult
        }
    else:
        model_config = {
            'k_bits': kbits_list,
            'num_layers': args.num_layers,
            'pre_k_bits': args.pre_k_bits,
            'ratio': args.ratio
        }

    if 'vgg' == args.arch and args.batchnorm:
        model, model_k_bits = import_module(
            f"models.{args.dataset}.{args.archtype}.{args.arch}"
        ).__dict__[f'{args.arch}{args.num_layers}_bn'](model_config)
    elif 'resnet20' == args.arch:
        model, model_k_bits = import_module(
            f"models.{args.dataset}.{args.archtype}.{args.arch}"
        ).__dict__[f'{args.arch}'](model_config)
    else:
        model, model_k_bits = import_module(
            f"models.{args.dataset}.{args.archtype}.{args.arch}"
        ).__dict__[f'{args.arch}{args.num_layers}'](model_config)

    model = model.to(device)

    print_logger.info(f'model_k_bits_list {model_k_bits}')

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()

    # Optionally resume from a checkpoint
    resume = args.resume

    if resume:
        print('=> Loading checkpoint {}'.format(resume))
        checkpoint = torch.load(resume, map_location=device)
        state_dict = checkpoint['state_dict']
        start_epoch = checkpoint['epoch']
        pre_train_best_prec1 = checkpoint['best_prec1']
        model_check = load_check(state_dict, model)
        model.load_state_dict(model_check)
        print('Prec@1:', pre_train_best_prec1)
    else:
        checkpoint = model.state_dict()

    choose_model,k_bits = architecture_search(args=args,nn_model=model,device = device,checkpoint=checkpoint, \
                            step=args.step,criterion=criterion,train_data=train_data,train_batch_size=args.train_batch_size, \
                            eval_batch_size=args.eval_batch_size,train_data_length = train_data_length, \
                            eval_data_length = eval_data_length,clip_value=args.grad_clip,lam=args.lam,\
                            gpu_id = 0,print_logger = print_logger,ckpt = ckpt,log_file=log_file)
Exemple #11
0
    # gradients metric
    wandb.watch(gen)
    wandb.watch(critic)
    # model mode
    gen.train()
    critic.train()

    start_time = time.time()
    for epoch in range(start_epoch, end_epoch + 1):
        train_one_epoch(epoch,
                        dataloader,
                        gen,
                        critic,
                        opt_gen,
                        opt_critic,
                        fixed_noise,
                        device,
                        metric_logger,
                        num_samples=cfg.NUM_SAMPLES,
                        freq=cfg.FREQ)
        if epoch == cfg.NUM_EPOCHS + 1:
            checkpoint(epoch, end_epoch, gen, critic, opt_gen, opt_critic,
                       fixed_noise)
        elif epoch % cfg.SAVE_EACH_EPOCH == 0:
            checkpoint(epoch, end_epoch, gen, critic, opt_gen, opt_critic,
                       fixed_noise)

    total_time = time.time() - start_time
    print(f"=> Training time:{total_time}")
def main(hparams):
    # set up perceptual loss
    device = 'cuda:0'
    percept = PerceptualLoss(
            model="net-lin", net="vgg", use_gpu=device.startswith("cuda")
    )

    utils.print_hparams(hparams)

    # get inputs
    xs_dict = model_input(hparams)

    estimators = utils.get_estimators(hparams)
    utils.setup_checkpointing(hparams)
    measurement_losses, l2_losses, lpips_scores, z_hats = utils.load_checkpoints(hparams)

    x_hats_dict = {model_type : {} for model_type in hparams.model_types}
    x_batch_dict = {}

    A = utils.get_A(hparams)
    noise_batch = hparams.noise_std * np.random.standard_t(2, size=(hparams.batch_size, hparams.num_measurements))



    for key, x in xs_dict.items():
        if not hparams.not_lazy:
            # If lazy, first check if the image has already been
            # saved before by *all* estimators. If yes, then skip this image.
            save_paths = utils.get_save_paths(hparams, key)
            is_saved = all([os.path.isfile(save_path) for save_path in save_paths.values()])
            if is_saved:
                continue

        x_batch_dict[key] = x
        if len(x_batch_dict) < hparams.batch_size:
            continue

        # Reshape input
        x_batch_list = [x.reshape(1, hparams.n_input) for _, x in x_batch_dict.items()]
        x_batch = np.concatenate(x_batch_list)

        # Construct noise and measurements


        y_batch = utils.get_measurements(x_batch, A, noise_batch, hparams)

        # Construct estimates using each estimator
        for model_type in hparams.model_types:
            estimator = estimators[model_type]
            x_hat_batch, z_hat_batch, m_loss_batch = estimator(A, y_batch, hparams)

            for i, key in enumerate(x_batch_dict.keys()):
                x = xs_dict[key]
                y_train = y_batch[i]
                x_hat = x_hat_batch[i]

                # Save the estimate
                x_hats_dict[model_type][key] = x_hat

                # Compute and store measurement and l2 loss
                measurement_losses[model_type][key] = m_loss_batch[key]
                l2_losses[model_type][key] = utils.get_l2_loss(x_hat, x)
                lpips_scores[model_type][key] = utils.get_lpips_score(percept, x_hat, x, hparams.image_shape)
                z_hats[model_type][key] = z_hat_batch[i]

        print('Processed upto image {0} / {1}'.format(key+1, len(xs_dict)))

        # Checkpointing
        if (hparams.save_images) and ((key+1) % hparams.checkpoint_iter == 0):
            utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, lpips_scores, z_hats, save_image, hparams)
            x_hats_dict = {model_type : {} for model_type in hparams.model_types}
            print('\nProcessed and saved first ', key+1, 'images\n')

        x_batch_dict = {}

    # Final checkpoint
    if hparams.save_images:
        utils.checkpoint(x_hats_dict, measurement_losses, l2_losses, lpips_scores, z_hats, save_image, hparams)
        print('\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict)))

    if hparams.print_stats:
        for model_type in hparams.model_types:
            print(model_type)
            measurement_loss_list = list(measurement_losses[model_type].values())
            l2_loss_list = list(l2_losses[model_type].values())
            mean_m_loss = np.mean(measurement_loss_list)
            mean_l2_loss = np.mean(l2_loss_list)
            print('mean measurement loss = {0}'.format(mean_m_loss))
            print('mean l2 loss = {0}'.format(mean_l2_loss))

    if hparams.image_matrix > 0:
        utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams)

    # Warn the user that some things were not processsed
    if len(x_batch_dict) > 0:
        print('\nDid NOT process last {} images because they did not fill up the last batch.'.format(len(x_batch_dict)))
        print('Consider rerunning lazily with a smaller batch size.')
Exemple #13
0
            print('Evaluating model!')
            val_miou, val_ciou, val_acc = validation(model, loader_val, cfg)
            history['val_miou'].append(val_miou)
            history['val_ciou'].append(val_ciou)
            history['val_acc'].append(val_acc)

            if val_miou > best_pred[0]:
                best_pred[0] = val_miou
                best_model= True
            else:
                best_model=False
            if val_acc > best_pred[1]:
                best_pred[1] = val_acc
            print('Best validation IOU and Acc:', best_pred)

            checkpoint(state={'epoch': epoch,
                        'encoder': net_encoder.state_dict(),
                        'decoder': net_decoder.state_dict(),
                        'optimizer': optimizer.state_dict()},
                        cfg=cfg, best= best_model, history= history)


    







    utils.tensorboard_log_test(avg_accuracy, avg_loss, std)
    print('$' * 80)
    print('TEST: Epoch {} loss: {:.4f} accuracy: {:%} std: {:.4f}'.format(
        epoch, avg_loss, avg_accuracy, std))


# Run the experiment ==========================================================
try:
    for epoch in range(params.n_epochs):
        train(epoch)
        if epoch % params.valid_eval_freq == 0:
            valid(epoch)

        if epoch % params.checkpoint_freq == 0:
            checkpoint_path = os.path.join(params.checkpoint_dir,
                                           'model_' + str(epoch) + '.pt')
            utils.checkpoint(model, checkpoint_path)

    print('Training finished')
    checkpoint_path = os.path.join(params.checkpoint_dir, 'model_last.pt')
    utils.checkpoint(model, checkpoint_path)
    print('Model saved. Evaluating test set')
    test()

except KeyboardInterrupt:
    print('Keyboard Interrupt received. Saving, testing, and shutting down')
    checkpoint_path = os.path.join(params.checkpoint_dir, 'model_last.pt')
    utils.checkpoint(model, checkpoint_path)
    print('Model saved. Evaluating test set')
    test()
Exemple #15
0
def main(args):
    # Setup datasets
    dload_train, dload_train_labeled, dload_valid, dload_test = get_data(args)

    # Model and buffer
    sample_q = get_sample_q(args)
    f, replay_buffer = get_model_and_buffer(args, sample_q)

    # Setup Optimizer
    params = f.class_output.parameters() if args.clf_only else f.parameters()
    if args.optimizer == "adam":
        optim = torch.optim.Adam(params,
                                 lr=args.lr,
                                 betas=[0.9, 0.999],
                                 weight_decay=args.weight_decay)
    else:
        optim = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay)

    best_valid_acc = 0.0
    cur_iter = 0
    for epoch in range(args.start_epoch, args.n_epochs):

        # Decay lr
        if epoch in args.decay_epochs:
            for param_group in optim.param_groups:
                new_lr = param_group["lr"] * args.decay_rate
                param_group["lr"] = new_lr

        # Load data
        for i, (x_p_d, _) in tqdm(enumerate(dload_train)):
            # Warmup
            if cur_iter <= args.warmup_iters:
                lr = args.lr * cur_iter / float(args.warmup_iters)
                for param_group in optim.param_groups:
                    param_group["lr"] = lr

            x_p_d = x_p_d.to(device)
            x_lab, y_lab = dload_train_labeled.__next__()
            x_lab, y_lab = x_lab.to(device), y_lab.to(device)

            # Label smoothing
            dist = smooth_one_hot(y_lab, args.n_classes, args.smoothing)

            L = 0.0

            # log p(y|x) cross entropy loss
            if args.pyxce > 0:
                logits = f.classify(x_lab)
                l_pyxce = KHotCrossEntropyLoss()(logits, dist)
                if cur_iter % args.print_every == 0:
                    acc = (logits.max(1)[1] == y_lab).float().mean()
                    print("p(y|x)CE {}:{:>d} loss={:>14.9f}, acc={:>14.9f}".
                          format(epoch, cur_iter, l_pyxce.item(), acc.item()))
                    logger.record_dict({
                        "l_pyxce": l_pyxce.cpu().data.item(),
                        "acc_pyxce": acc.item()
                    })
                L += args.pyxce * l_pyxce

            # log p(x) using sgld
            if args.pxsgld > 0:
                if args.class_cond_p_x_sample:
                    assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond"
                    y_q = torch.randint(0, args.n_classes,
                                        (args.sgld_batch_size, )).to(device)
                    x_q = sample_q(f, replay_buffer, y=y_q)
                else:
                    x_q = sample_q(f, replay_buffer)  # sample from log-sumexp
                fp_all = f(x_p_d)
                fq_all = f(x_q)
                fp = fp_all.mean()
                fq = fq_all.mean()
                l_pxsgld = -(fp - fq)
                if cur_iter % args.print_every == 0:
                    print(
                        "p(x)SGLD | {}:{:>d} loss={:>14.9f} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f}"
                        .format(epoch, i, l_pxsgld, fp, fq))
                    logger.record_dict(
                        {"l_pxsgld": l_pxsgld.cpu().data.item()})
                L += args.pxsgld * l_pxsgld

            # log p(x) using contrastive learning
            if args.pxcontrast > 0:
                # ones like dist to use all indexes
                ones_dist = torch.ones_like(dist).to(device)
                output, target, ce_output, neg_num = f.joint(img=x_lab,
                                                             dist=ones_dist)
                l_pxcontrast = nn.CrossEntropyLoss(reduction="mean")(output,
                                                                     target)
                if cur_iter % args.print_every == 0:
                    acc = (ce_output.max(1)[1] == y_lab).float().mean()
                    print(
                        "p(x)Contrast {}:{:>d} loss={:>14.9f}, acc={:>14.9f}".
                        format(epoch, cur_iter, l_pxcontrast.item(),
                               acc.item()))
                    logger.record_dict({
                        "l_pxcontrast":
                        l_pxcontrast.cpu().data.item(),
                        "acc_pxcontrast":
                        acc.item()
                    })
                L += args.pxycontrast * l_pxcontrast

            # log p(x|y) using sgld
            if args.pxysgld > 0:
                x_q_lab = sample_q(f, replay_buffer, y=y_lab)
                fp, fq = f(x_lab).mean(), f(x_q_lab).mean()
                l_pxysgld = -(fp - fq)
                if cur_iter % args.print_every == 0:
                    print(
                        "p(x|y)SGLD | {}:{:>d} loss={:>14.9f} f(x_p_d)={:>14.9f} f(x_q)={:>14.9f}"
                        .format(epoch, i, l_pxysgld.item(), fp, fq))
                    logger.record_dict(
                        {"l_pxysgld": l_pxysgld.cpu().data.item()})
                L += args.pxsgld * l_pxysgld

            # log p(x|y) using contrastive learning
            if args.pxycontrast > 0:
                output, target, ce_output, neg_num = f.joint(img=x_lab,
                                                             dist=dist)
                l_pxycontrast = nn.CrossEntropyLoss(reduction="mean")(output,
                                                                      target)
                if cur_iter % args.print_every == 0:
                    acc = (ce_output.max(1)[1] == y_lab).float().mean()
                    print(
                        "p(x|y)Contrast {}:{:>d} loss={:>14.9f}, acc={:>14.9f}"
                        .format(epoch, cur_iter, l_pxycontrast.item(),
                                acc.item()))
                    logger.record_dict({
                        "l_pxycontrast":
                        l_pxycontrast.cpu().data.item(),
                        "acc_pxycontrast":
                        acc.item()
                    })
                L += args.pxycontrast * l_pxycontrast

            # SGLD training of log q(x) may diverge
            # break here and record information to restart
            if L.abs().item() > 1e8:
                print("restart epoch: {}".format(epoch))
                print("save dir: {}".format(args.log_dir))
                print("id: {}".format(args.id))
                print("steps: {}".format(args.n_steps))
                print("seed: {}".format(args.seed))
                print("exp prefix: {}".format(args.exp_prefix))
                sys.stdout = sys.__stdout__
                sys.stderr = sys.__stderr__
                print("restart epoch: {}".format(epoch))
                print("save dir: {}".format(args.log_dir))
                print("id: {}".format(args.id))
                print("steps: {}".format(args.n_steps))
                print("seed: {}".format(args.seed))
                print("exp prefix: {}".format(args.exp_prefix))
                assert False, "shit loss explode..."

            optim.zero_grad()
            L.backward()
            optim.step()
            cur_iter += 1

        if epoch % args.plot_every == 0:
            if args.plot_uncond:
                if args.class_cond_p_x_sample:
                    assert not args.uncond, "can only draw class-conditional samples if EBM is class-cond"
                    y_q = torch.randint(0, args.n_classes,
                                        (args.sgld_batch_size, )).to(device)
                    x_q = sample_q(f, replay_buffer, y=y_q)
                    plot(
                        "{}/x_q_{}_{:>06d}.png".format(args.log_dir, epoch, i),
                        x_q)
                    if args.plot_contrast:
                        x_q = sample_q(f, replay_buffer, y=y_q, contrast=True)
                        plot(
                            "{}/contrast_x_q_{}_{:>06d}.png".format(
                                args.log_dir, epoch, i), x_q)
                else:
                    x_q = sample_q(f, replay_buffer)
                    plot(
                        "{}/x_q_{}_{:>06d}.png".format(args.log_dir, epoch, i),
                        x_q)
                    if args.plot_contrast:
                        x_q = sample_q(f, replay_buffer, contrast=True)
                        plot(
                            "{}/contrast_x_q_{}_{:>06d}.png".format(
                                args.log_dir, epoch, i), x_q)
            if args.plot_cond:  # generate class-conditional samples
                y = torch.arange(0, args.n_classes)[None].repeat(
                    args.n_classes,
                    1).transpose(1, 0).contiguous().view(-1).to(device)
                x_q_y = sample_q(f, replay_buffer, y=y)
                plot("{}/x_q_y{}_{:>06d}.png".format(args.log_dir, epoch, i),
                     x_q_y)
                if args.plot_contrast:
                    y = torch.arange(0, args.n_classes)[None].repeat(
                        args.n_classes,
                        1).transpose(1, 0).contiguous().view(-1).to(device)
                    x_q_y = sample_q(f, replay_buffer, y=y, contrast=True)
                    plot(
                        "{}/contrast_x_q_y_{}_{:>06d}.png".format(
                            args.log_dir, epoch, i), x_q_y)

        if args.ckpt_every > 0 and epoch % args.ckpt_every == 0:
            checkpoint(f, replay_buffer, f"ckpt_{epoch}.pt", args)

        if epoch % args.eval_every == 0:
            # Validation set
            correct, val_loss = eval_classification(f, dload_valid)
            if correct > best_valid_acc:
                best_valid_acc = correct
                print("Best Valid!: {}".format(correct))
                checkpoint(f, replay_buffer, "best_valid_ckpt.pt", args)
            # Test set
            correct, test_loss = eval_classification(f, dload_test)
            print("Epoch {}: Valid Loss {}, Valid Acc {}".format(
                epoch, val_loss, correct))
            print("Epoch {}: Test Loss {}, Test Acc {}".format(
                epoch, test_loss, correct))
            f.train()
            logger.record_dict({
                "Epoch":
                epoch,
                "Valid Loss":
                val_loss,
                "Valid Acc":
                correct.detach().cpu().numpy(),
                "Test Loss":
                test_loss,
                "Test Acc":
                correct.detach().cpu().numpy(),
                "Best Valid":
                best_valid_acc.detach().cpu().numpy(),
                "Loss":
                L.cpu().data.item(),
            })
        checkpoint(f, replay_buffer, "last_ckpt.pt", args)

        logger.dump_tabular()
            output = output.view(-1, opt.embed_dim).float()
            loss = criterion_lm(output, target) / batch.token.size(0)
            loss.backward()
            forward_losses += float(loss)
            optimizer_lm.step()

        print("\nforward LM loss: {}".format(forward_losses))
        print("backward LM loss: {}".format(backward_losses))

    ############ transport pretrained layers ###############
    for key, state in lm_model.state_dict().items():
        model.state_dict()[key] = state
    checkpoint(opt.lm_epoch,
               model,
               os.path.join(CURRENT_DIR,
                            '../models/pretrained_bilstm_crf.pth'),
               opt.batch_size,
               interrupted=False,
               use_gpu=opt.gpu)

    if opt.use_pretrain:
        print("========== use pretrain model ===========")
        model.load_state_dict(torch.load(opt.use_pretrain))

    print(model)

    ############ start training ################
    train_iter = BucketIterator(train_dataset,
                                batch_size=opt.batch_size,
                                shuffle=True,
                                repeat=False,
Exemple #17
0
            log[i].append({})
            model.train()
            for k, batch in tqdm(zip(ts, trainLoader)):
                t = time.time()
                loss_train = process_batch(batch, loss_train, i, k, 'train', t0)
                t_optim += time.time() - t
            for key, value in loss_train.items():
                log[i][j][key] = float(np.mean(value[-opt.nbatch_train:]))
            log[i][j]['train_batch'] = k
            model.eval()
            for k, batch in zip(vs, valLoader):
                t = time.time()
                loss_val = process_batch(batch, loss_val, i, k, 'val', t0)
                t_optim += time.time() - t
            for key, value in loss_val.items():
                log[i][j][key] = float(np.mean(value[-opt.nbatch_val:]))
            # optionally update LR after each epoch/minibatch
            model.lr_step()
            utils.checkpoint('%d_%d' %(i, j), model, log, opt)
            log[i][j]['time(optim)'] = '%.2f(%.2f)' %(time.time() - t0, t_optim)
            print(log[i][j])

except KeyboardInterrupt:
    time.sleep(2) # waiting for all threads to stop
    print('-' * 89)
    save = input('Exiting early, save the last model?[y/n]')
    if save == 'y':
        print('Saving...')
        utils.checkpoint('final', model, log, opt)

def train(config):
    gpu_manage(config)

    ### DATASET LOAD ###
    print('===> Loading datasets')

    dataset = TrainDataset(config)
    print('dataset:', len(dataset))
    train_size = int((1 - config.validation_size) * len(dataset))
    validation_size = len(dataset) - train_size
    train_dataset, validation_dataset = torch.utils.data.random_split(
        dataset, [train_size, validation_size])
    print('train dataset:', len(train_dataset))
    print('validation dataset:', len(validation_dataset))
    training_data_loader = DataLoader(dataset=train_dataset,
                                      num_workers=config.threads,
                                      batch_size=config.batchsize,
                                      shuffle=True)
    validation_data_loader = DataLoader(dataset=validation_dataset,
                                        num_workers=config.threads,
                                        batch_size=config.validation_batchsize,
                                        shuffle=False)

    ### MODELS LOAD ###
    print('===> Loading models')

    gen = Generator(gpu_ids=config.gpu_ids)

    if config.gen_init is not None:
        param = torch.load(config.gen_init)
        gen.load_state_dict(param)
        print('load {} as pretrained model'.format(config.gen_init))

    dis = Discriminator(in_ch=config.in_ch,
                        out_ch=config.out_ch,
                        gpu_ids=config.gpu_ids)

    if config.dis_init is not None:
        param = torch.load(config.dis_init)
        dis.load_state_dict(param)
        print('load {} as pretrained model'.format(config.dis_init))

    # setup optimizer
    opt_gen = optim.Adam(gen.parameters(),
                         lr=config.lr,
                         betas=(config.beta1, 0.999),
                         weight_decay=0.00001)
    opt_dis = optim.Adam(dis.parameters(),
                         lr=config.lr,
                         betas=(config.beta1, 0.999),
                         weight_decay=0.00001)

    real_a = torch.FloatTensor(config.batchsize, config.in_ch, config.width,
                               config.height)
    real_b = torch.FloatTensor(config.batchsize, config.out_ch, config.width,
                               config.height)
    M = torch.FloatTensor(config.batchsize, config.width, config.height)

    criterionL1 = nn.L1Loss()
    criterionMSE = nn.MSELoss()
    criterionSoftplus = nn.Softplus()

    if config.cuda:
        gen = gen.cuda()
        dis = dis.cuda()
        criterionL1 = criterionL1.cuda()
        criterionMSE = criterionMSE.cuda()
        criterionSoftplus = criterionSoftplus.cuda()
        real_a = real_a.cuda()
        real_b = real_b.cuda()
        M = M.cuda()

    real_a = Variable(real_a)
    real_b = Variable(real_b)

    logreport = LogReport(log_dir=config.out_dir)
    validationreport = TestReport(log_dir=config.out_dir)

    print('===> begin')
    start_time = time.time()
    # main
    for epoch in range(1, config.epoch + 1):
        epoch_start_time = time.time()
        for iteration, batch in enumerate(training_data_loader, 1):
            real_a_cpu, real_b_cpu, M_cpu = batch[0], batch[1], batch[2]
            real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu)
            real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu)
            M.data.resize_(M_cpu.size()).copy_(M_cpu)
            att, fake_b = gen.forward(real_a)

            ################
            ### Update D ###
            ################

            opt_dis.zero_grad()

            # train with fake
            fake_ab = torch.cat((real_a, fake_b), 1)
            pred_fake = dis.forward(fake_ab.detach())
            batchsize, _, w, h = pred_fake.size()

            loss_d_fake = torch.sum(
                criterionSoftplus(pred_fake)) / batchsize / w / h

            # train with real
            real_ab = torch.cat((real_a, real_b), 1)
            pred_real = dis.forward(real_ab)
            loss_d_real = torch.sum(
                criterionSoftplus(-pred_real)) / batchsize / w / h

            # Combined loss
            loss_d = loss_d_fake + loss_d_real

            loss_d.backward()

            if epoch % config.minimax == 0:
                opt_dis.step()

            ################
            ### Update G ###
            ################

            opt_gen.zero_grad()

            # First, G(A) should fake the discriminator
            fake_ab = torch.cat((real_a, fake_b), 1)
            pred_fake = dis.forward(fake_ab)
            loss_g_gan = torch.sum(
                criterionSoftplus(-pred_fake)) / batchsize / w / h

            # Second, G(A) = B
            loss_g_l1 = criterionL1(fake_b, real_b) * config.lamb
            loss_g_att = criterionMSE(att[:, 0, :, :], M)
            loss_g = loss_g_gan + loss_g_l1 + loss_g_att

            loss_g.backward()

            opt_gen.step()

            # log
            if iteration % 10 == 0:
                print(
                    "===> Epoch[{}]({}/{}): loss_d_fake: {:.4f} loss_d_real: {:.4f} loss_g_gan: {:.4f} loss_g_l1: {:.4f}"
                    .format(epoch, iteration, len(training_data_loader),
                            loss_d_fake.item(), loss_d_real.item(),
                            loss_g_gan.item(), loss_g_l1.item()))

                log = {}
                log['epoch'] = epoch
                log['iteration'] = len(training_data_loader) * (epoch -
                                                                1) + iteration
                log['gen/loss'] = loss_g.item()
                log['dis/loss'] = loss_d.item()

                logreport(log)

        print('epoch', epoch, 'finished, use time',
              time.time() - epoch_start_time)
        with torch.no_grad():
            log_validation = test(config, validation_data_loader, gen,
                                  criterionMSE, epoch)
            validationreport(log_validation)
        print('validation finished')
        if epoch % config.snapshot_interval == 0:
            checkpoint(config, epoch, gen, dis)

        logreport.save_lossgraph()
        validationreport.save_lossgraph()
    print('training time:', time.time() - start_time)
                                      opt.batch_size,
                                      token2id[labels.PAD],
                                      char2id[labels.PAD],
                                      label2id[labels.O],
                                      shuffle=True))):
            batch_start = time.time()
            model.zero_grad()
            model.train()
            token_batch = get_variable(torch.LongTensor(token_batch),
                                       use_gpu=opt.gpu).transpose(1, 0)
            char_batch = get_variable(torch.LongTensor(char_batch),
                                      use_gpu=opt.gpu).transpose(1, 0)
            label_batch = get_variable(torch.LongTensor(label_batch),
                                       use_gpu=opt.gpu).transpose(1, 0)
            # loss = model.loss(token_batch, char_batch, label_batch) / token_batch.shape[0]
            loss = model.loss(token_batch, char_batch, label_batch)
            optimizer.zero_grad()
            print("loss: {}".format(loss))
            loss.backward()
            optimizer.step()
            #print("loss: {}".format(float(loss)))
            loss_per_epoch += float(loss)

        print('{}epoch\nloss: {}\nvalid: {}\ntime: {} sec.\n'.format(
            epoch, loss_per_epoch, 0,
            time.time() - start))
        if epoch % 10 == 0:
            print("model save {}epoch".format(epoch))
            checkpoint(model, opt.model_path)
    checkpoint(model, opt.model_path)
Exemple #20
0
def main(hparams):
    hparams.n_input = np.prod(hparams.image_shape)
    hparams.model_type = 'vae'
    maxiter = hparams.max_outer_iter
    utils.print_hparams(hparams)
    xs_dict = model_input(hparams)  # returns the images
    estimators = utils.get_estimators(hparams)
    utils.setup_checkpointing(hparams)
    measurement_losses, l2_losses = utils.load_checkpoints(hparams)

    x_hats_dict = {'vae': {}}
    x_batch_dict = {}

    for key, x in xs_dict.iteritems():
        print key
        x_batch_dict[key] = x  #placing images in dictionary
        if len(x_batch_dict) < hparams.batch_size:
            continue
        x_coll = [
            x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()
        ]  #Generates the columns of input x
        x_batch = np.concatenate(x_coll)  # Generates entire X

        A_outer = utils.get_outer_A(hparams)  # Created the random matric A

        noise_batch = hparams.noise_std * np.random.randn(
            hparams.batch_size, 100)

        y_batch_outer = np.sign(
            np.matmul(x_batch, A_outer)
        )  # Multiplication of A and X followed by quantization on 4 levels

        #y_batch_outer = np.matmul(x_batch, A_outer)

        x_main_batch = 0.0 * x_batch
        z_opt_batch = np.random.randn(hparams.batch_size,
                                      20)  #Input to the generator of the GAN

        for k in range(maxiter):

            x_est_batch = x_main_batch + hparams.outer_learning_rate * (
                np.matmul(
                    (y_batch_outer -
                     np.sign(np.matmul(x_main_batch, A_outer))), A_outer.T))
            #x_est_batch = x_main_batch + hparams.outer_learning_rate * (np.matmul((y_batch_outer - np.matmul(x_main_batch, A_outer)), A_outer.T))
            # Gradient decent in x is done
            estimator = estimators['vae']
            x_hat_batch, z_opt_batch = estimator(
                x_est_batch, z_opt_batch, hparams)  # Projectin on the GAN
            x_main_batch = x_hat_batch

        dist = np.linalg.norm(x_batch - x_main_batch) / 784
        print 'cool'
        print dist

        for i, key in enumerate(x_batch_dict.keys()):
            x = xs_dict[key]
            y = y_batch_outer[i]
            x_hat = x_hat_batch[i]

            # Save the estimate
            x_hats_dict['vae'][key] = x_hat

            # Compute and store measurement and l2 loss
            measurement_losses['vae'][key] = utils.get_measurement_loss(
                x_hat, A_outer, y)
            l2_losses['vae'][key] = utils.get_l2_loss(x_hat, x)
        print 'Processed upto image {0} / {1}'.format(key + 1, len(xs_dict))

        # Checkpointing
        if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter
                                      == 0):
            utils.checkpoint(x_hats_dict, measurement_losses, l2_losses,
                             save_image, hparams)
            #x_hats_dict = {'dcgan' : {}}
            print '\nProcessed and saved first ', key + 1, 'images\n'

        x_batch_dict = {}

    # Final checkpoint
    if hparams.save_images:
        utils.checkpoint(x_hats_dict, measurement_losses, l2_losses,
                         save_image, hparams)
        print '\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict))

    if hparams.print_stats:
        for model_type in hparams.model_types:
            print model_type
            mean_m_loss = np.mean(measurement_losses[model_type].values())
            mean_l2_loss = np.mean(l2_losses[model_type].values())
            print 'mean measurement loss = {0}'.format(mean_m_loss)
            print 'mean l2 loss = {0}'.format(mean_l2_loss)

    if hparams.image_matrix > 0:
        utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams)

    # Warn the user that some things were not processsed
    if len(x_batch_dict) > 0:
        print '\nDid NOT process last {} images because they did not fill up the last batch.'.format(
            len(x_batch_dict))
        print 'Consider rerunning lazily with a smaller batch size.'
Exemple #21
0
if args.epochwise:
    for k in range(100, 1000, 100):
        model, linear, projector, loptim, attacker = load(args, k)
        print('loading.......epoch ', str(k))
        ##### Linear evaluation #####
        for i in range(args.epoch):
            print('Epoch ', i)
            train_acc, model, linear, projector, loptim = linear_train(
                i, model, linear, projector, loptim, attacker)
            test_acc, model, linear = test(model, linear)
            adjust_lr(i, loptim)

        checkpoint(model,
                   test_acc,
                   args.epoch,
                   args,
                   loptim,
                   save_name_add='epochwise' + str(k))
        checkpoint(linear,
                   test_acc,
                   args.epoch,
                   args,
                   loptim,
                   save_name_add='epochwise' + str(k) + '_linear')
        if args.local_rank % ngpus_per_node == 0:
            with open(logname, 'a') as logfile:
                logwriter = csv.writer(logfile, delimiter=',')
                logwriter.writerow([k, train_acc, test_acc])

model, linear, projector, loptim, attacker = load(args, 0)
Exemple #22
0
def main():
    start_epoch = 0
    best_prec1 = 0.0

    seed=np.random.randint(10000)

    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

    if args.gpus is not None:
        device = torch.device("cuda:{}".format(args.gpus[0]))
        cudnn.benchmark = False
        # cudnn.deterministic = True
        cudnn.enabled = True 
    else:
        device = torch.device("cpu")
    
    now = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
    if args.mission is not None:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{args.mission}/{now}'
        elif 'resnet20' == args.arch:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}/{args.mission}/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{args.mission}/{now}'

    else:
        if 'vgg' == args.arch and args.batchnorm:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}_bn/{now}'
        else:
            args.job_dir = f'{args.job_dir}/{args.dataset}/{args.arch}{args.num_layers}/{now}'
    
    _make_dir(args.job_dir)
    ckpt = utils.checkpoint(args)
    print_logger = utils.get_logger(os.path.join(args.job_dir, "logger.log"))
    utils.print_params(vars(args), print_logger.info)
    writer_train = SummaryWriter(args.job_dir +'/run/train')
    writer_test = SummaryWriter(args.job_dir+ '/run/test')

    ## hyperparameters settings ##
    n_layers = (args.num_layers - 2) * 2 
    unit_k_bits = int(args.k_bits)
    kbits_list = [unit_k_bits for i in range(n_layers)]
    print_logger.info(f'k_bits_list {kbits_list}')

    # Data loading
    print('=> Preparing data..')

    if args.dataset in ['cifar10', 'cifar100','mnist']:
        IMAGE_SIZE = 32
    elif args.dataset == 'tinyimagenet':
        IMAGE_SIZE = 64
    else:
        IMAGE_SIZE = 224

    if args.dataset == 'imagenet':
        train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
    elif args.dataset == 'tinyimagenet':
        train_loader = get_imagenet_iter_dali(type = 'train',image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
        val_loader = get_imagenet_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers,crop=IMAGE_SIZE,device_id=0,num_gpus=1)
    elif args.dataset == 'cifar10':
        train_loader = get_cifar_iter_dali(type='train', image_dir=args.data_dir, batch_size=args.train_batch_size,num_threads=args.workers)
        val_loader = get_cifar_iter_dali(type='val', image_dir=args.data_dir, batch_size=args.eval_batch_size,num_threads=args.workers)

    # Create model
    print('=> Building model...')
    if args.dataset =='cifar10':
        num_classes = 10
        train_data_length = 50000
        eval_data_length =10000
    elif args.dataset == 'imagenet':
        num_classes = 1000
        train_data_length = 50000
        eval_data_length =10000

    # arch = args.arch
    # model = models.__dict__[arch]

    model_config = {'k_bits':kbits_list,'num_layers':args.num_layers,'pre_k_bits':args.pre_k_bits,'ratio':args.ratio}
    if args.arch == 'mobilenetv2':
        model_config = {'k_bits':kbits_list,'num_layers':args.num_layers,'pre_k_bits':args.pre_k_bits,'ratio':args.ratio,'width_mult':args.width_mult}
    if 'vgg' == args.arch and args.batchnorm:
        model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}{args.num_layers}_bn'](model_config)
    elif 'resnet20' == args.arch:
        model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}'](model_config)
    else:
        model,model_k_bits = import_module(f"models.{args.dataset}.{args.archtype}.{args.arch}").__dict__[f'{args.arch}{args.num_layers}'](model_config)

    model = model.to(device)
    print_logger.info(f'model_k_bits_list {model_k_bits}')
    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    scheduler = MultiStepLR(optimizer, milestones=[0.5 * args.train_epochs, 0.75 * args.train_epochs], gamma=0.1)
  
    # Optionally resume from a checkpoint
    resume = args.resume
    if resume:
        print('=> Loading checkpoint {}'.format(resume))
        checkpoint = torch.load(resume, map_location=device)
        state_dict = checkpoint['state_dict']
        start_epoch = checkpoint['epoch']
        pre_train_best_prec1 = checkpoint['best_prec1']
        model_check = load_check(state_dict,model)
        pdb.set_trace()
        model.load_state_dict(model_check)
        print('Prec@1:',pre_train_best_prec1)

    if args.test_only:
        test_prec1 = test(args, device, val_loader, model, criterion, writer_test,print_logger,start_epoch )
        print('=> Test Prec@1: {:.2f}'.format(test_prec1))
        print(f'sample k_bits {kbits_list}')
        return

    for epoch in range(0, args.train_epochs):
        scheduler.step(epoch)
        train_loss, train_prec1 = train(args, device, train_loader, train_data_length, model, criterion, optimizer, writer_train, print_logger, epoch)
        test_prec1 = test(args, device, val_loader, eval_data_length, model, criterion, writer_test, print_logger, epoch)

        is_best = best_prec1 < test_prec1
        best_prec1 = max(test_prec1, best_prec1) 

        state = {
                'state_dict': model.state_dict(),
                'test_prec1': test_prec1, 
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'epoch': epoch + 1
            }
        ckpt.save_model(state, epoch + 1, is_best,mode='train')
        print_logger.info('==> BEST ACC {:.3f}'.format(best_prec1.item()))
def main(hparams):

    # Set up some stuff accoring to hparams
    hparams.n_input = np.prod(hparams.image_shape)
    utils.set_num_measurements(hparams)
    utils.print_hparams(hparams)

    # get inputs
    xs_dict = model_input(hparams)

    estimators = utils.get_estimators(hparams)
    utils.setup_checkpointing(hparams)
    measurement_losses, l2_losses = utils.load_checkpoints(hparams)

    x_hats_dict = {model_type: {} for model_type in hparams.model_types}
    x_batch_dict = {}
    for key, x in xs_dict.iteritems():
        if not hparams.not_lazy:
            # If lazy, first check if the image has already been
            # saved before by *all* estimators. If yes, then skip this image.
            save_paths = utils.get_save_paths(hparams, key)
            is_saved = all([
                os.path.isfile(save_path) for save_path in save_paths.values()
            ])
            if is_saved:
                continue

        x_batch_dict[key] = x
        if len(x_batch_dict) < hparams.batch_size:
            continue

        # Reshape input
        x_batch_list = [
            x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()
        ]
        x_batch = np.concatenate(x_batch_list)

        # Construct noise and measurements
        A = utils.get_A(hparams)
        noise_batch = hparams.noise_std * np.random.randn(
            hparams.batch_size, hparams.num_measurements)
        if hparams.measurement_type == 'project':
            y_batch = x_batch + noise_batch
        else:
            y_batch = np.matmul(x_batch, A) + noise_batch

        # Construct estimates using each estimator
        for model_type in hparams.model_types:
            estimator = estimators[model_type]
            x_hat_batch = estimator(A, y_batch, hparams)

            for i, key in enumerate(x_batch_dict.keys()):
                x = xs_dict[key]
                y = y_batch[i]
                x_hat = x_hat_batch[i]

                # Save the estimate
                x_hats_dict[model_type][key] = x_hat

                # Compute and store measurement and l2 loss
                measurement_losses[model_type][
                    key] = utils.get_measurement_loss(x_hat, A, y)
                l2_losses[model_type][key] = utils.get_l2_loss(x_hat, x)

        print('Processed upto image {0} / {1}'.format(key + 1, len(xs_dict)))

        # Checkpointing
        if (hparams.save_images) and ((key + 1) % hparams.checkpoint_iter
                                      == 0):
            utils.checkpoint(x_hats_dict, measurement_losses, l2_losses,
                             save_image, hparams)
            x_hats_dict = {
                model_type: {}
                for model_type in hparams.model_types
            }
            print('\nProcessed and saved first ', key + 1, 'images\n')

        x_batch_dict = {}

    # Final checkpoint
    if hparams.save_images:
        utils.checkpoint(x_hats_dict, measurement_losses, l2_losses,
                         save_image, hparams)
        print('\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict)))

    if hparams.print_stats:
        for model_type in hparams.model_types:
            print(model_type)
            mean_m_loss = np.mean(measurement_losses[model_type].values())
            mean_l2_loss = np.mean(l2_losses[model_type].values())
            print('mean measurement loss = {0}'.format(mean_m_loss))
            print('mean l2 loss = {0}'.format(mean_l2_loss))

    if hparams.image_matrix > 0:
        utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams)

    # Warn the user that some things were not processsed
    if len(x_batch_dict) > 0:
        print(
            '\nDid NOT process last {} images because they did not fill up the last batch.'
            .format(len(x_batch_dict)))
        print('Consider rerunning lazily with a smaller batch size.')
Exemple #24
0
def train(gpu: int, args: Namespace):
    """Implements the training loop for PyTorch a model.

    Args:
        gpu: the GPU device
        args: user defined arguments
    """

    # setup process groups
    rank = args.nr * args.gpus + gpu
    setup(rank, args)
    
    # define the model
    model = ResNext().architecture
    model.cuda(gpu)
    # Wrap the model
    model = DDP(model, device_ids=[gpu])

    # define loss function (criterion) and optimizer
    criterion = nn.BCEWithLogitsLoss()
    optimizer = Adam(model.parameters(), args.lr)

    # split data
    train_df = split_data(args.folds)

    for fold in range(args.folds):
        losses = []
        scores = []
        train_loader, valid_loader = get_data(args, train_df, fold, rank)
        
        if gpu == 0:
            print(f"Training started using fold {fold} for validation") 
        
        # train
        model.train()
        for epoch in range(args.epochs):
            for i, (images, labels) in enumerate(train_loader):
                images = images.cuda(gpu)
                labels = labels.cuda(gpu)
                output = model(images)
                loss = criterion(output, labels)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                if i % args.log_interval == 0 and gpu == 0:
                    print("Train Epoch: {} [{}/{} ({:.0f}%)]\tloss={:.4f}".format(
                          epoch+1, i, len(train_loader),
                          100. * i / len(train_loader), loss.item()))
        
        # evaluate
        model.eval()
        with torch.no_grad():
            for i, (images, labels) in enumerate(valid_loader):
                images = images.cuda(gpu)
                labels = labels.cuda(gpu)
                output = model(images)
                loss = criterion(output, labels).item()
                score = get_score(labels.detach().cpu(), output.detach().cpu())
                losses.append(loss)
                scores.append(score)

            if gpu == 0:
                print("Validation loss={:.4f}\tAUC score={:.4f}".format(
                      statistics.mean(losses), statistics.mean(scores)))
                
        # checkpoint model
        model = checkpoint(model, gpu, fold)
            
    if args.save_model and gpu == 0:
        torch.save(model.module.state_dict(), "model.pt")
        
    cleanup()
Exemple #25
0
import torch

import utils
from option import args
from data import data
if args.fullTrain:
    from trainer import Trainer
else:
    from preTrainer import Trainer

torch.manual_seed(args.seed)
checkpoint = utils.checkpoint(args)

if checkpoint.ok:
    my_loader = data(args).get_loader()
    t = Trainer(my_loader, checkpoint, args)
    while not t.terminate():
        t.train()
        t.test()

    checkpoint.done()
""" 
my_loader = data(args).get_loader()

loader_train, loader_test = my_loader
check = 0

#for batch, (input, target,input_4,input_2, idx_scale) in enumerate(loader_train):
#    check = check+1
for batch, (input, target, idx_scale) in enumerate(loader_train):
    check = check+1
Exemple #26
0
def train(config):
    gpu_manage(config)

    ### DATASET LOAD ###
    print('===> Loading datasets')

    dataset = Dataset(config)
    train_size = int(0.9 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, test_size])
    training_data_loader = DataLoader(dataset=train_dataset,
                                      num_workers=config.threads,
                                      batch_size=config.batchsize,
                                      shuffle=True)
    test_data_loader = DataLoader(dataset=test_dataset,
                                  num_workers=config.threads,
                                  batch_size=config.test_batchsize,
                                  shuffle=False)

    ### MODELS LOAD ###
    print('===> Loading models')

    if config.gen_model == 'unet':
        gen = UNet(in_ch=config.in_ch,
                   out_ch=config.out_ch,
                   gpu_ids=config.gpu_ids)
    else:
        print('The generator model does not exist')

    if config.gen_init is not None:
        param = torch.load(config.gen_init)
        gen.load_state_dict(param)
        print('load {} as pretrained model'.format(config.gen_init))
    dis = Discriminator(in_ch=config.in_ch,
                        out_ch=config.out_ch,
                        gpu_ids=config.gpu_ids)
    if config.dis_init is not None:
        param = torch.load(config.dis_init)
        dis.load_state_dict(param)
        print('load {} as pretrained model'.format(config.dis_init))

    # setup optimizer
    opt_gen = optim.Adam(gen.parameters(),
                         lr=config.lr,
                         betas=(config.beta1, 0.999),
                         weight_decay=0.00001)
    opt_dis = optim.Adam(dis.parameters(),
                         lr=config.lr,
                         betas=(config.beta1, 0.999),
                         weight_decay=0.00001)

    real_a = torch.FloatTensor(config.batchsize, config.in_ch, 256, 256)
    real_b = torch.FloatTensor(config.batchsize, config.out_ch, 256, 256)

    criterionL1 = nn.L1Loss()
    criterionMSE = nn.MSELoss()
    criterionSoftplus = nn.Softplus()

    if config.cuda:
        gen = gen.cuda(0)
        dis = dis.cuda(0)
        criterionL1 = criterionL1.cuda(0)
        criterionMSE = criterionMSE.cuda(0)
        criterionSoftplus = criterionSoftplus.cuda(0)
        real_a = real_a.cuda(0)
        real_b = real_b.cuda(0)

    real_a = Variable(real_a)
    real_b = Variable(real_b)

    logreport = LogReport(log_dir=config.out_dir)
    testreport = TestReport(log_dir=config.out_dir)

    # main
    for epoch in range(1, config.epoch + 1):
        for iteration, batch in enumerate(training_data_loader, 1):
            real_a_cpu, real_b_cpu = batch[0], batch[1]
            real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu)
            real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu)
            fake_b = gen.forward(real_a)

            ################
            ### Update D ###
            ################

            opt_dis.zero_grad()

            # train with fake
            fake_ab = torch.cat((real_a, fake_b), 1)
            pred_fake = dis.forward(fake_ab.detach())
            batchsize, _, w, h = pred_fake.size()

            loss_d_fake = torch.sum(
                criterionSoftplus(pred_fake)) / batchsize / w / h

            # train with real
            real_ab = torch.cat((real_a, real_b), 1)
            pred_real = dis.forward(real_ab)
            loss_d_real = torch.sum(
                criterionSoftplus(-pred_real)) / batchsize / w / h

            # Combined loss
            loss_d = loss_d_fake + loss_d_real

            loss_d.backward()

            if epoch % config.minimax == 0:
                opt_dis.step()

            ################
            ### Update G ###
            ################

            opt_gen.zero_grad()

            # First, G(A) should fake the discriminator
            fake_ab = torch.cat((real_a, fake_b), 1)
            pred_fake = dis.forward(fake_ab)
            loss_g_gan = torch.sum(
                criterionSoftplus(-pred_fake)) / batchsize / w / h

            # Second, G(A) = B
            loss_g_l1 = criterionL1(fake_b, real_b) * config.lamb

            loss_g = loss_g_gan + loss_g_l1

            loss_g.backward()

            opt_gen.step()

            # log
            if iteration % 100 == 0:
                print(
                    "===> Epoch[{}]({}/{}): loss_d_fake: {:.4f} loss_d_real: {:.4f} loss_g_gan: {:.4f} loss_g_l1: {:.4f}"
                    .format(epoch, iteration, len(training_data_loader),
                            loss_d_fake.item(), loss_d_real.item(),
                            loss_g_gan.item(), loss_g_l1.item()))

                log = {}
                log['epoch'] = epoch
                log['iteration'] = len(training_data_loader) * (epoch -
                                                                1) + iteration
                log['gen/loss'] = loss_g.item()
                log['dis/loss'] = loss_d.item()

                logreport(log)

        with torch.no_grad():
            log_test = test(config, test_data_loader, gen, criterionMSE, epoch)
            testreport(log_test)

        if epoch % config.snapshot_interval == 0:
            checkpoint(config, epoch, gen, dis)

        logreport.save_lossgraph()
        testreport.save_lossgraph()
Exemple #27
0
    for i in range(opt.n_epochs):
        log.append([])
        t_optim = 0
        t0 = time.time()
        train_slices = utils.slice_epoch(opt.nbatch_train, opt.n_slices)
        val_slices = utils.slice_epoch(opt.nbatch_val, opt.n_slices)
        for ts, vs, j in zip(train_slices, val_slices, range(opt.n_slices)):
            log[i].append({})
            for k, batch in zip(ts, trainLoader):
                t = time.time()
                loss_train = process_batch(batch, loss_train, i, k, "train", t0)
                t_optim += time.time() - t
            for key, value in loss_train.items():
                log[i][j]["train_" + key] = np.mean(value[-opt.nbatch_train :])
            for k, batch in zip(vs, valLoader):
                loss_val = process_batch(batch, loss_val, i, k, "val", t0)
                t_optim += time.time() - t
            for key, value in loss_val.items():
                log[i][j]["val_" + key] = np.mean(value[-opt.nbatch_val :])
            utils.checkpoint("%d_%d" % (i, j), model, log, opt)
            log[i][j]["time(optim)"] = "%.2f(%.2f)" % (time.time() - t0, t_optim)
            print(log[i][j])

except KeyboardInterrupt:
    time.sleep(2)  # waiting for all threads to stop
    print("-" * 89)
    save = input("Exiting early, save the last model?[y/n]")
    if save == "y":
        print("Saving...")
        utils.checkpoint("final", model, log, opt)
if __name__ == '__main__':
	
	print("\nObservation\n--------------------------------")
	print("Shape :", obs_dim)
	print("\nAction\n--------------------------------")
	print("Shape :", action_dim, "\n")

	total_reward = 0
	deltas = []

	for episode in range(NUM_EPISODES + 1):
		eps = START_EPSILON / (1.0 + episode * EPSILON_TAPER)

		if episode%10000 == 0:
			cp_file = checkpoint(Q, CHECKPOINT_DIR, GAME, episode)
			print('Saved Checkpoint to: ', cp_file)

		biggest_change = 0
		curr_state = env.reset()
		for step in range(MAX_STEPS):
			prev_state = curr_state
			state_visit_counts[prev_state] = state_visit_counts.get(prev_state,0)+1
			action = epsilon_action(curr_state, eps)
			curr_state, reward, done, info = env.step(action)
			total_reward += reward
			old_qsa = Q[prev_state][action]
			update_Q(prev_state, action, reward, curr_state)
			biggest_change = max(biggest_change, np.abs(old_qsa - Q[prev_state][action]))
			if done:
				break
def main():
    """The main function
    Entry point.
    """
    global args

    # Setting the hyper parameters
    parser = argparse.ArgumentParser(description='Example of Capsule Network')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='number of training epochs. default=10')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='learning rate. default=0.01')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='training batch size. default=128')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=128,
                        help='testing batch size. default=128')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        help=
        'how many batches to wait before logging training status. default=10')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training. default=false')
    parser.add_argument(
        '--threads',
        type=int,
        default=4,
        help='number of threads for data loader to use. default=4')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help='random seed for training. default=42')
    parser.add_argument(
        '--num-conv-out-channel',
        type=int,
        default=256,
        help='number of channels produced by the convolution. default=256')
    parser.add_argument(
        '--num-conv-in-channel',
        type=int,
        default=1,
        help='number of input channels to the convolution. default=1')
    parser.add_argument('--num-primary-unit',
                        type=int,
                        default=8,
                        help='number of primary unit. default=8')
    parser.add_argument('--primary-unit-size',
                        type=int,
                        default=1152,
                        help='primary unit size is 32 * 6 * 6. default=1152')
    parser.add_argument(
        '--num-classes',
        type=int,
        default=2,
        help='number of digit classes. 1 unit for one MNIST digit. default=10')
    parser.add_argument('--output-unit-size',
                        type=int,
                        default=1,
                        help='output unit size. default=16')
    parser.add_argument('--num-routing',
                        type=int,
                        default=3,
                        help='number of routing iteration. default=3')
    parser.add_argument(
        '--use-reconstruction-loss',
        type=utils.str2bool,
        nargs='?',
        default=True,
        help='use an additional reconstruction loss. default=True')
    parser.add_argument(
        '--regularization-scale',
        type=float,
        default=0.0005,
        help=
        'regularization coefficient for reconstruction loss. default=0.0005')
    parser.add_argument('--dataset',
                        help='the name of dataset (mnist, cifar10)',
                        default='mnist')
    parser.add_argument(
        '--input-width',
        type=int,
        default=28,
        help='input image width to the convolution. default=28 for MNIST')
    parser.add_argument(
        '--input-height',
        type=int,
        default=28,
        help='input image height to the convolution. default=28 for MNIST')
    parser.add_argument('--is-training',
                        type=int,
                        default=1,
                        help='Whether or not is training, default is yes')
    parser.add_argument('--weights',
                        type=str,
                        default=None,
                        help='Load pretrained weights, default is none')

    args = parser.parse_args()

    print(args)

    # Check GPU or CUDA is available
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    # Get reproducible results by manually seed the random number generator
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # Load data
    train_loader, test_loader = utils.load_data(args)

    # Build Capsule Network
    print('===> Building model')
    model = Net(num_conv_in_channel=args.num_conv_in_channel,
                num_conv_out_channel=args.num_conv_out_channel,
                num_primary_unit=args.num_primary_unit,
                primary_unit_size=args.primary_unit_size,
                num_classes=args.num_classes,
                output_unit_size=args.output_unit_size,
                num_routing=args.num_routing,
                use_reconstruction_loss=args.use_reconstruction_loss,
                regularization_scale=args.regularization_scale,
                input_width=args.input_width,
                input_height=args.input_height,
                cuda_enabled=args.cuda)

    if args.cuda:
        print('Utilize GPUs for computation')
        print('Number of GPU available', torch.cuda.device_count())
        model.cuda()
        cudnn.benchmark = True
        model = torch.nn.DataParallel(model)

    # Print the model architecture and parameters
    print('Model architectures:\n{}\n'.format(model))

    print('Parameters and size:')
    for name, param in model.named_parameters():
        print('{}: {}'.format(name, list(param.size())))

    # CapsNet has:
    # - 8.2M parameters and 6.8M parameters without the reconstruction subnet on MNIST.
    # - 11.8M parameters and 8.0M parameters without the reconstruction subnet on CIFAR10.
    num_params = sum([param.nelement() for param in model.parameters()])

    # The coupling coefficients c_ij are not included in the parameter list,
    # we need to add them manually, which is 1152 * 10 = 11520 (on MNIST) or 2048 * 10 (on CIFAR10)
    print('\nTotal number of parameters: {}\n'.format(
        num_params + (11520 if args.dataset == 'mnist' else 20480)))

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Make model checkpoint directory
    if not os.path.exists('results/trained_model'):
        os.makedirs('results/trained_model')

    # Set the logger
    writer = SummaryWriter()

    if not args.is_training:
        print("Loading checkpoint")
        checkpoint = torch.load('results/trained_model/model_epoch_50.pth',
                                map_location=lambda storage, loc: storage)

        from collections import OrderedDict
        state_dict = checkpoint['state_dict']
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            if name[:2] == 'fc':
                name = 'decoder.' + name
            new_state_dict[name] = v

        epoch = checkpoint['epoch']
        model.load_state_dict(new_state_dict)
        optimizer.load_state_dict(checkpoint['optimizer'])

    # Train and test
    for epoch in range(1, args.epochs + 1):
        if args.is_training:
            train(model, train_loader, optimizer, epoch, writer)
        test(model, test_loader, len(train_loader), epoch, writer)

        # Save model checkpoint
        utils.checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }, epoch)

    writer.close()
def main(hparams):
#    if not hparams.use_gpu:
#        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    # Set up some stuff accoring to hparams
    hparams.n_input = np.prod(hparams.image_shape)
    #hparams.stdv = 10 #adjust to HPARAM in model_def.py
    #hparams.mean = 0 #adjust to HPARAM in model_def.py
    utils.set_num_measurements(hparams)
    utils.print_hparams(hparams)

    hparams.bol = False
 #   hparams.dict_flag = False
    # get inputs
    if hparams.input_type == 'dict-input':# or hparams.dict_flag:
        hparams_load_key = copy.copy(hparams)
        hparams_load_key.input_type = 'full-input'
        hparams_load_key.measurement_type = 'project'
        hparams_load_key.zprior_weight = 0.0
        hparams.key_field = np.load(utils.get_checkpoint_dir(hparams_load_key, hparams.model_types[0])+'candidates.npy').item()
        print(hparams.measurement_type)
    xs_dict, label_dict = model_input(hparams)    

    estimators = utils.get_estimators(hparams)
    utils.setup_checkpointing(hparams)
    sh = utils.SaveHandler()
    sh.load_or_init_all(hparams.save_images,hparams.model_types,sh.get_pkl_filepaths(hparams,use_all=True))
    if label_dict is None:
        print('No labels exist.')
        del sh.class_loss
#    measurement_losses, l2_losses, emd_losses, x_orig, x_rec, noise_batch = utils.load_checkpoints(hparams)
    
    if hparams.input_type == 'gen-span':
        np.save(utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'z.npy',hparams.z_from_gen)
        np.save(utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'images.npy',hparams.images_mat)
    
    

    x_hats_dict = {model_type : {} for model_type in hparams.model_types}
    x_batch_dict = {}
    x_batch=[]
    x_hat_batch=[]
#    l2_losses2=np.zeros((len(xs_dict),1))
#    distances_arr=[]
    image_distance =np.zeros((len(xs_dict),1))
    hparams.x = [] # TO REMOVE
    for key, x in xs_dict.iteritems(): #//each batch once (x_batch_dict emptied at end)
        if not hparams.not_lazy:
            # If lazy, first check if the image has already been
            # saved before by *all* estimators. If yes, then skip this image.
            save_paths = utils.get_save_paths(hparams, key)
            is_saved = all([os.path.isfile(save_path) for save_path in save_paths.values()])
            if is_saved:
                continue

        x_batch_dict[key] = x       
        hparams.x.append(x)#To REMOVE
        if len(x_batch_dict) < hparams.batch_size:
            continue
        
        # Reshape input
        x_batch_list = [x.reshape(1, hparams.n_input) for _, x in x_batch_dict.iteritems()]
        x_batch = np.concatenate(x_batch_list)
#        x_batch, known_distortion, distances = get_random_distortion(x_batch)
#        distances_arr[(key-1)*hparams.batch_size:key*hparams.batch_size] = distances
#        xs_dict[(key-1)*hparams.batch_size:key*hparams.batch_size] =x_batch
        
        # Construct noise and measurements
        recovered, optim = utils.load_if_optimized(hparams)
        if recovered and np.linalg.norm(optim.x_orig-x_batch) < 1e-10:
            hparams.optim = optim
            hparams.recovered = True
        else:
            hparams.recovered=False
            optim.x_orig = x_batch
            
            hparams.optim = optim
            
        A, noise_batch, y_batch, c_val = utils.load_meas(hparams,sh,x_batch,xs_dict)
        hparams.optim.noise_batch = noise_batch
        if c_val:
            continue
        
        if hparams.measurement_type == 'sample_distribution':
            plot_distribution(hparams,x_batch)
            
#            for i in range(z.shape[1]):#range(1):
#                plt.hist(z[i,:], facecolor='blue', alpha=0.5)
#                directory_distr = 
#                pl.savefig("abc.png")            
        elif hparams.measurement_type == 'autoencoder':
            plot_reconstruction(hparams,x_batch) 
        else:
            # Construct estimates using each estimator
            for model_type in hparams.model_types:
                estimator = estimators[model_type]
                start = time.time()

                tmp = estimator(A, y_batch, hparams)
                if isinstance(tmp,tuple):
                    x_hat_batch = tmp[0]
                    sh.z_rec = tmp[1]                    
                else:
                    x_hat_batch = tmp
                    del sh.z_rec
                end = time.time()
                duration = end-start
                print('The calculation needed {} time'.format(datetime.timedelta(seconds=duration)))
                np.save(utils.get_checkpoint_dir(hparams, model_type)+'elapsed_time',duration)
#                DEBUGGING = []
                for i, key in enumerate(x_batch_dict.keys()):
    #                x = xs_dict[key]+known_distortion[i]
                    x = xs_dict[key]
                    y = y_batch[i]
                    x_hat = x_hat_batch[i]
#                    plt.figure()
#                    plt.imshow(np.reshape(x_hat, [64, 64, 3])*255)#, interpolation="nearest", cmap=plt.cm.gray)
#                    plt.show()
    
                    # Save the estimate
                    x_hats_dict[model_type][key] = x_hat
    
                    # Compute and store measurement and l2 loss
                    sh.measurement_losses[model_type][key] = utils.get_measurement_loss(x_hat, A, y)
#                    DEBUGGING.append(np.sum((x_hat.dot(A)-y)**2)/A.shape[1])
                    sh.l2_losses[model_type][key] = utils.get_l2_loss(x_hat, x)
                    if hparams.class_bol and label_dict is not None:
                        try:
                            sh.class_losses[model_type][key] = utils.get_classifier_loss(hparams,x_hat,label_dict[key])
                        except:
                            sh.class_losses[model_type][key] = NaN
                            warnings.warn('Class loss unsuccessfull, most likely due to corrupted memory. Simply retry.')
                    if hparams.emd_bol:
                        try:
                            _,sh.emd_losses[model_type][key] = utils.get_emd_loss(x_hat, x)
                            if 'nonneg' not in hparams.tv_or_lasso_mode and 'pca'  in model_type:
                                warnings.warn('EMD requires nonnegative images, for safety insert nonneg into tv_or_lasso_mode')
                        except ValueError:
                            warnings.warn('EMD calculation unsuccesfull (most likely due to negative images)')
                            pass
    #                    if l2_losses[model_type][key]-measurement_losses[model_type][key]!=0:
    #                        print('NO')
    #                        print(y)
    #                        print(x)
    #                        print(np.mean((x-y)**2))
                    image_distance[i] = np.linalg.norm(x_hat-x)
    #                l2_losses2[key] = np.mean((x_hat-x)**2)
    #                print('holla')
    #                print(l2_losses2[key])
    #                print(np.linalg.norm(x_hat-x)**2/len(xs_dict[0]))
    #                print(np.linalg.norm(x_hat-x)/len(xs_dict[0]))
    #                print(np.linalg.norm(x_hat-x))
            print('Processed upto image {0} / {1}'.format(key+1, len(xs_dict)))
            sh.x_orig = x_batch
            sh.x_rec = x_hat_batch
            sh.noise = noise_batch
    
            #ACTIVATE ON DEMAND
            #plot_bad_reconstruction(measurement_losses,x_batch)
            # Checkpointing
            if (hparams.save_images) and ((key+1) % hparams.checkpoint_iter == 0):           
                utils.checkpoint(x_hats_dict, save_image, sh, hparams)
                x_hats_dict = {model_type : {} for model_type in hparams.model_types}
                print('\nProcessed and saved first ', key+1, 'images\n')    
            x_batch_dict = {}
                   

    if 'wavelet' in hparams.model_types[0]:
        print np.abs(sh.x_rec)
        print('The average sparsity is {}'.format(np.sum(np.abs(sh.x_rec)>=0.0001)/float(hparams.batch_size)))

    # Final checkpoint
    if hparams.save_images:
        utils.checkpoint(x_hats_dict, save_image, sh, hparams)
        print('\nProcessed and saved all {0} image(s)\n'.format(len(xs_dict)))
        if hparams.dataset in ['mnist', 'fashion-mnist']:
            if np.array(x_batch).size:
                utilsM.save_images(np.reshape(x_batch, [-1, 28, 28]),
                                          [8, 8],utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'original.png')
            if np.array(x_hat_batch).size:
                utilsM.save_images(np.reshape(x_hat_batch, [-1, 28, 28]),
                                          [8, 8],utils.get_checkpoint_dir(hparams, hparams.model_types[0])+'reconstruction.png')

        for model_type in hparams.model_types:
#            print(model_type)
            mean_m_loss = np.mean(sh.measurement_losses[model_type].values())
            mean_l2_loss = np.mean(sh.l2_losses[model_type].values()) #\|XHUT-X\|**2/784/64
            if hparams.emd_bol:
                mean_emd_loss = np.mean(sh.emd_losses[model_type].values())
            if label_dict is not None:
                mean_class_loss = np.mean(sh.class_losses[model_type].values())
                print('mean class loss = {0}'.format(mean_class_loss))
#            print(image_distance)
            mean_norm_loss = np.mean(image_distance)#sum_i(\|xhut_i-x_i\|)/64
#            mean_rep_error = np.mean(distances_arr)
#            mean_opt_meas_error_pixel = np.mean(np.array(l2_losses[model_type].values())-np.array(distances_arr)/xs_dict[0].shape)
#            mean_opt_meas_error = np.mean(image_distance-distances_arr)
            print('mean measurement loss = {0}'.format(mean_m_loss))
#            print np.sum(np.asarray(DEBUGGING))/64
            print('mean l2 loss = {0}'.format(mean_l2_loss))
            if hparams.emd_bol:
                print('mean emd loss = {0}'.format(mean_emd_loss))            
            print('mean distance = {0}'.format(mean_norm_loss))
            print('mean distance pixelwise = {0}'.format(mean_norm_loss/len(xs_dict[xs_dict.keys()[0]])))
#            print('mean representation error = {0}'.format(mean_rep_error))
#            print('mean optimization plus measurement error = {0}'.format(mean_opt_meas_error))
#            print('mean optimization plus measurement error per pixel = {0}'.format(mean_opt_meas_error_pixel))

    if hparams.image_matrix > 0:
        utils.image_matrix(xs_dict, x_hats_dict, view_image, hparams)

    # Warn the user that some things were not processsed
    if len(x_batch_dict) > 0:
        print('\nDid NOT process last {} images because they did not fill up the last batch.'.format(len(x_batch_dict)))
        print('Consider rerunning lazily with a smaller batch size.')