Ejemplo n.º 1
0
    def train(self, dataset_map_fn, batch_size, epochs, model, device,
              optimizer):
        for epoch in range(1, epochs + 1):
            if self.verbose:
                print(f"Epoch {epoch}\n-------------------------------")

            train_dataloader, test_dataloader = get_dataloaders(
                dataset_map_fn=dataset_map_fn, batch_size=batch_size)

            model.train()
            self.train_epoch(model, train_dataloader, optimizer, device)

            model.eval()
            test_eval_dict = self.evaluate_epoch(model, test_dataloader,
                                                 device)
            test_accuracy = test_eval_dict['accuracy'] * 100
            test_loss = test_eval_dict['loss']
            if self.verbose:
                print(
                    f"Test Error: \n Accuracy: {test_accuracy:>0.1f}%, Avg loss: {test_loss:>8f}\n"
                )

            if epoch % 10 == 0:
                tc.save(model.state_dict(), "model.pth")
                tc.save(optimizer.state_dict(), "optimizer.pth")
Ejemplo n.º 2
0
def start_evaluating(writer, seed, dataset_name, test_experiments_and_kwargs,
                     model_logdir, checkpoint, normalize_inputs, batch_size,
                     device_id):
    torch.manual_seed(seed)
    np.random.seed(seed)

    train_loader, _, test_loader, _, _ = get_dataloaders(
        dataset_name=dataset_name,
        batch_size=batch_size,
        train_size='max',
        val_size=0,
        device_id=device_id,
        normalize_inputs=normalize_inputs,
        num_workers=0)

    model = load_model_from_checkpoint(writer, model_logdir, checkpoint)
    device = torch.device(device_id if torch.cuda.is_available() else "cpu")
    model.to(device)

    test_experiments = []
    for te, kwargs in test_experiments_and_kwargs:
        test_experiments.append(experiments.__dict__[te](
            writer=writer,
            model=model,
            train_loader=train_loader,
            val_loader=test_loader,
            **kwargs))
    results = {}
    for ex in test_experiments:
        results.update(ex.run(0, 0))
    pprint(results)
    return results
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('data_dir', help='Path to image files.', type=str)
    parser.add_argument('--save_dir', dest="save_dir", type=str, action="store",
                        default="./", help="Directory to save checkpoints")
    parser.add_argument('--arch', dest="arch", type=str, action="store",
                        default="densenet121", help="Architecture type default is densenet121")
    parser.add_argument('--learning_rate', dest="learning_rate", type=float, action="store", default=0.003)
    parser.add_argument('--epochs', dest="epochs", type=int, action="store", default=5)
    parser.add_argument('--hidden_units', dest="hidden_units", type=int, nargs='+', action="store", default=[512])
    parser.add_argument('--gpu', action='store_true')
    num_outputs = 102
    args = parser.parse_args()
    device = utils.get_device(args.gpu)
    dataloaders, class_to_idx = utils.get_dataloaders(args.data_dir)
    model, optimizer, hidden_layers = utils.get_model_and_optimizer(
        args.arch, args.learning_rate,
        num_outputs, device, args.hidden_units
    )
    if not model:
        return
    
    model.class_to_idx = class_to_idx
    with active_session():
        utils.train_model(
            model, optimizer, dataloaders, device,
            epochs=args.epochs, print_every=20
        )
        
    utils.save_model(model, args.learning_rate, args.epochs, optimizer, num_outputs, args.hidden_units, args.save_dir)
def main(dataset_name):
    print('Training resnet model for', dataset_name)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load the dataset
    print('loading', dataset_name, 'dataset')
    dataloaders_dict = get_dataloaders(DATASET_DIR[dataset_name], TRAIN_CLASSES[dataset_name])
    
    # load the resnet18 model
    print('loading the resnet model')
    model = models.resnet18()
    num_feat = model.fc.in_features
    print(len(TRAIN_CLASSES[dataset_name]))
    model.fc = nn.Linear(num_feat, len(TRAIN_CLASSES[dataset_name]))

    loss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-04)
    print('training the model')
    model, val_loss = train(model, dataloaders_dict, loss, optimizer, device, no_of_epochs=20)

    model_path = 'save/dogs.pt'
    create_dirs(model_path)
    print('saving the model')
    save_model(model, model_path)
    
    print('done!')
Ejemplo n.º 5
0
 def __init__(self, **kwargs):
     super(OODDetection, self).__init__(kwargs['writer'])
     self.model = kwargs['model']
     self.train_loader = kwargs['train_loader']
     self.val_loader = kwargs['val_loader']
     self.run_interval = kwargs.get('run_interval', None)
     OOD_loader, _, _, _, _ = get_dataloaders('SVHN',
                                              self.val_loader.batch_size,
                                              len(self.val_loader.dataset),
                                              0,
                                              str(self.model.device),
                                              normalize_inputs=True)
     self.OOD_loader = OOD_loader
Ejemplo n.º 6
0
def main(model_name):
    print(f'Fine-tuning {model_name} model for cats')

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load the resnet50 model
    print('loading the pretrained model')
    model = models.resnet18(pretrained=True)

    num_feat = model.fc.in_features
    if model_name == 'dogs':
        model.fc = nn.Linear(num_feat, len(TRAIN_CLASSES["dogs"]))
        model.load_state_dict(torch.load('save/dogs.pt'))

    for param in model.parameters():
        param.requires_grad = False

    model.fc = nn.Linear(num_feat, len(TRAIN_CLASSES['cats']))

    # load the dataset
    print('loading cats dataset')
    dataloaders_dict = get_dataloaders(DATASET_DIR, TRAIN_CLASSES['cats'])
    loss = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-04)
    print('fine-tuning the model')
    model, val_loss = train(model,
                            dataloaders_dict,
                            loss,
                            optimizer,
                            device,
                            no_of_epochs=20)

    if model_name == 'dogs':
        model_path = 'save/dogs_to_cats.pt'
    else:
        model_path = 'save/imagenet_to_cats.pt'

    save_model(model, model_path)

    print('done!')
Ejemplo n.º 7
0
def main():
    args = parser.parse_args()
    assert args.n_views == 2, "Only two view training is supported. Please use --n-views 2."
    # check if gpu training is available
    if not args.disable_cuda and torch.cuda.is_available():
        args.device = torch.device('cuda')
        cudnn.deterministic = True
        cudnn.benchmark = True
    else:
        args.device = torch.device('cpu')
        args.gpu_index = -1
    set_random_seed(args.seed)

    train_loader, valid_loader = get_dataloaders(args)

    if args.mode == 'simclr':
        model = ResNetSimCLR(base_model=args.arch, out_dim=args.out_dim)
        trainer_class = SimCLRTrainer
    elif args.mode == 'supervised':
        model = ResNetSimCLR(base_model=args.arch, out_dim=len(train_loader.dataset.classes))
        trainer_class = SupervisedTrainer
    else:
        raise InvalidTrainingMode()

    if args.optimizer_mode == 'simclr':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader),
                                                               eta_min=0, last_epoch=-1)

    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                    momentum=0.9, weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

    #  It’s a no-op if the 'gpu_index' argument is a negative integer or None.
    with torch.cuda.device(args.gpu_index):
        trainer = trainer_class(model=model, optimizer=optimizer, scheduler=scheduler, args=args)
        trainer.train(train_loader, valid_loader)
Ejemplo n.º 8
0
    def __init__(self, config: dict):
        """
        Initialize the trainer

        Parameters
        ---
        config: dict
        configuration dictionary with the following keys:
        {
            "exp_name"
            "debug"
            "seed"
            "batch_size"
            "epochs"
        }
        """  
        self.config = config
        
        self.exp_dir = RESULTS / config['exp_name']
        self.exp_dir.mkdir(parents=True, exist_ok=True)
        
        self.checkpoint_dir = CHECKPOINTS / config['exp_name']
        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)

        self.log_dir = self.exp_dir / LOG_DIR
        self.log_dir.mkdir(parents=True, exist_ok=True)
        self.writer = SummaryWriter(log_dir=self.log_dir)

        log_name = config["exp_name"]+".log"
        self.logger = logging.getLogger(__name__)
        logfile_handler = logging.FileHandler(filename=self.exp_dir / log_name)
        logfile_handler.setLevel(level = (logging.DEBUG if config["debug"] else logging.INFO))
        logfile_format = logging.Formatter('%(asctime)s - %(levelname)10s - %(funcName)15s : %(message)s')
        logfile_handler.setFormatter(logfile_format)
        self.logger.addHandler(logfile_handler)
        self.logger.setLevel(level = (logging.DEBUG if config["debug"] else logging.INFO))

        self.logger.info("-"*50)
        self.logger.info(f"EXPERIMENT: {config['exp_name']}")
        self.logger.info("-"*50)

        self.logger.info(f"Setting seed: {config['seed']}")
        np.random.seed(config['seed'])
        torch.manual_seed(config['seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        self.logger.info(f"Loading data ...")
        self.train_dl, self.valid_dl, self.test_dl, self.vocab = get_dataloaders(config['batch_size'], DATA_DIR)
        vocab_emb = self.vocab.vectors

         # Init trackers
        self.current_iter = 0
        self.current_epoch = 0
        self.best_accuracy = 0.

        if config['encoder'] == 'EmbeddingEncoder':
            encoded_dim = vocab_emb.shape[-1]
            encoder = EmbeddingEncoder(embeddings=vocab_emb)
        elif config['encoder'] == 'UniLSTM':
            encoded_dim = config['hidden_dim']
            encoder = UniLSTM(embeddings=vocab_emb, 
                              batch_size=config['batch_size'],
                              hidden_size=config['hidden_dim'],
                              device=config['device'],
                              num_layers=config['num_layers'])
        elif config['encoder'] == 'BiLSTM':
            encoded_dim = 2*config['hidden_dim']
            encoder = BiLSTM(embeddings=vocab_emb, 
                             batch_size=config['batch_size'],
                             hidden_size=config['hidden_dim'],
                             device=config['device'],
                             num_layers=config['num_layers'])
        elif config['encoder'] == 'BiLSTMPool':
            encoded_dim = 2*config['hidden_dim']
            encoder = BiLSTMPool(embeddings=vocab_emb, 
                             batch_size=config['batch_size'],
                             hidden_size=config['hidden_dim'],
                             device=config['device'],
                             num_layers=config['num_layers'])
        else:
            self.logger.error("Encoder not available")
            sys.exit(1)

        self.model = Classifier(encoder, encoded_dim)
        self.logger.info(f"Using device: {config['device']}")
        self.model.to(config['device'])
        self.opt = optim.Adam(self.model.parameters(), lr=config['learning_rate'])
        self.criterion = nn.CrossEntropyLoss()

        if 'load_checkpoint' in config:
            self.load_checkpoint(config['load_checkpoint'])
Ejemplo n.º 9
0
def train_model(
        writer,
        seed,
        dataset_name,
        model_class_name,
        model_kwargs,
        normalize_inputs,
        batch_size,
        train_size,
        val_size,
        epochs,
        total_batches,
        optimizer_class_name,
        optimizer_kwargs,
        lr_scheduler_class_name,
        lr_scheduler_kwargs,
        model_logdir=None,
        checkpoint=None,
        train_experiments_and_kwargs=[],
        device_id='cpu'):
    torch.manual_seed(seed)
    np.random.seed(seed)
    device = torch.device(device_id if torch.cuda.is_available() else "cpu")

    model_class = models.__dict__[model_class_name]
    train_loader, val_loader, _, in_shape, n_classes = get_dataloaders(dataset_name=dataset_name,
                                                                       batch_size=batch_size,
                                                                       train_size=train_size,
                                                                       val_size=val_size,
                                                                       device_id=device_id,
                                                                       normalize_inputs=normalize_inputs)

    if model_logdir or checkpoint:
        model = load_model_from_checkpoint(writer, model_logdir, checkpoint)
    else:
        model_kwargs['n_classes'] = n_classes
        model_kwargs['net_kwargs']['in_shape'] = in_shape
        model = model_class(writer, **model_kwargs)
    save_model_kwargs(writer, model_class_name, model_kwargs)

    optimizer = model.get_optimizer(optimizer_class_name, optimizer_kwargs)
    scheduler = torch.optim.lr_scheduler.__dict__[lr_scheduler_class_name](optimizer, **lr_scheduler_kwargs)

    train_experiments = []
    for ex in train_experiments_and_kwargs:
        train_experiments.append(experiments.__dict__[ex[0]](writer=writer,
                                                             model=model,
                                                             train_loader=train_loader,
                                                             val_loader=val_loader,
                                                             **ex[1]))
    model.initialize(train_loader)
    model.to(device)
    if epochs is None:
        epochs = ceil(total_batches / len(train_loader))
    for epoch in range(1, epochs + 1):
        train_epoch(writer,
                    model,
                    train_loader,
                    optimizer,
                    scheduler,
                    epoch,
                    total_batches,
                    train_experiments)
Ejemplo n.º 10
0
    args = parse_args()

    SEED = args.seed

    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Preprocessing
    pp_start_time = time.time()

    trainloader, testloader = get_dataloaders(args)

    pp_end_time = time.time()
    pp_mins, pp_secs = epoch_time(pp_end_time - pp_start_time)
    print(f'Preprocessing time: {pp_mins}m {pp_secs}s')

    with wandb.init(project='RegulQuant', entity='womeiyouleezi', config=args):
        if args.run_name:
            wandb.run.name = args.run_name
        if (not args.save_file):
            file_name = wandb.run.name
        else:
            file_name = args.save_file

        # make model
        net = get_model(args).to(device)
    parser.add_argument('--verbose', default=1, type=int)
    args = parser.parse_args()

    args.cuda = torch.cuda.device_count() != 0

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)

    enc = Encoder()
    dec = Decoder()
    generator = AEGenerator(enc, dec)
    discriminator = Discriminator()
    dagan = DAGAN(generator, discriminator)

    d_learning_rate = 2e-4
    g_learning_rate = 2e-4
    optim_betas = (0.9, 0.999)
    opt_g = torch.optim.Adam(generator.parameters(),
                             lr=g_learning_rate,
                             betas=optim_betas)
    opt_d = torch.optim.Adam(discriminator.parameters(),
                             lr=d_learning_rate,
                             betas=optim_betas)

    trainloader, testloader = utils.get_dataloaders(data=args.data,
                                                    train_bs=args.batch_size,
                                                    ohe_labels=True)

    train(trainloader, testloader, dagan, opt_g, opt_d, args)
Ejemplo n.º 12
0
def main():
    avaliable_modelnames = [
        m for m in dir(models)
        if m[0] != '_' and type(getattr(models, m)).__name__ != 'module'
    ]
    parser = argparse.ArgumentParser(description='PyTorch RICAP Training')

    # hardware
    parser.add_argument('--num_workers',
                        type=int,
                        default=4,
                        help='number of workers loading data')

    # dataset
    parser.add_argument('--dataset',
                        type=str,
                        default='cifar10',
                        choices=['cifar10', 'cifar100', 'ImageNet'],
                        help='dataset for training')
    parser.add_argument('--dataroot',
                        type=str,
                        default='data/',
                        help='path to dataset')

    # model
    parser.add_argument('--model',
                        '-m',
                        type=str,
                        required=True,
                        choices=avaliable_modelnames,
                        help='model name')
    parser.add_argument('--depth',
                        '-d',
                        type=int,
                        required=True,
                        help='number of layers')
    parser.add_argument(
        '--params',
        '-p',
        type=str,
        default=None,
        help='model parameters such as widen factor for Wide ResNet')
    parser.add_argument('--postfix',
                        type=str,
                        default='',
                        help='postfix for saved model name')

    # hyperparameters
    parser.add_argument(
        '--epoch',
        '-e',
        type=int,
        default=200,
        help='number of epochs: (default: 200 for Wide ResNet)')
    parser.add_argument('--batch', type=int, default=128, help='batchsize')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='default learning rate')
    parser.add_argument(
        '--droplr',
        type=float,
        default=0.2,
        help='adaptive learning rate ratio: (default: 0.2 for Wide ResNet)')
    parser.add_argument(
        '--adlr',
        type=str,
        default=None,
        help=
        'epochs at which learning rate is adapted (x droplr); e.g., \'60,120,160\' for Wide ResNet'
    )
    parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
    parser.add_argument('--wd',
                        type=float,
                        default=0.0005,
                        help='weight decay: (default: 0.0005 for Wide ResNet)')

    # data augmentation
    parser.add_argument('--crop', type=int, default=None, help='crop size')
    parser.add_argument('--beta_of_ricap',
                        type=float,
                        default=0.0,
                        help='beta of ricap augmentation')

    # save and resume
    parser.add_argument(
        '--resume',
        '-r',
        type=int,
        default=0,
        help='epoch at which resume from checkpoint. -1 for latest')
    parser.add_argument(
        '--savefreq',
        type=int,
        default=5,
        help='frequency to save model and to mark it the latest')
    parser.add_argument('--nocuda',
                        action='store_true',
                        default=False,
                        help='disable cuda devices.')
    args = parser.parse_args()

    print('==> Preparing dataset loaders..')
    dataloaders = utils.get_dataloaders(datasetname=args.dataset,
                                        dataroot=args.dataroot,
                                        batchsize=args.batch,
                                        num_workers=args.num_workers,
                                        cropsize=args.crop)

    # prepare log saving file name
    # save target : model information (.dat), result (.log), model parameters (.pth), optimizer parameters (.opt)
    savefilename_prefix = 'checkpoint/{model}-{depth}{params}_{dataset}{postfix}'.format(
        model=args.model,
        depth=args.depth,
        params='-{}'.format(args.params) if args.params is not None else '',
        dataset=args.dataset,
        postfix='_{}'.format(args.postfix) if args.postfix != '' else '',
    )

    # define learning rate strategy
    if args.adlr is None:
        args.adlr = np.array([60, 120, 160])
    else:
        assert re.match(
            '[0-9 ,]+',
            args.adlr), 'Error: invalid adaptive learning rate: {}'.format(
                args.adlr)
        args.adlr = np.array(sorted(eval('[{}]'.format(args.adlr))))
    lr_current = args.lr

    # prepare cnn model and optimizer
    print('==> Building model..')
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    network = getattr(models, args.model)(args.dataset, args.depth,
                                          args.params)
    optimizer = optim.SGD(network.parameters(),
                          lr=lr_current,
                          momentum=args.momentum,
                          weight_decay=args.wd,
                          nesterov=True)

    # write model information to save fine (.dat)
    with open('{}.dat'.format(savefilename_prefix), 'w') as of:
        print('==> Command', file=of)
        import sys
        print(' '.join(sys.argv), file=of)
        print('\n', file=of)
        print('==> Parameters', file=of)
        arg_str = '\n'.join([
            '--{} {}'.format(k, str(getattr(args, k))) for k in dir(args)
            if '_' not in k
        ])
        print(arg_str, file=of)
        print('\n', file=of)
        print('==> Network', file=of)
        num_params = 0
        for param in network.parameters():
            num_params += param.numel()
        print('Number of parameters: %d' % num_params, file=of)
        print(network, file=of)

    # prepare trainer
    datasetname = args.dataset
    if datasetname == "cifar10":
        num_class = 10
    elif datasetname == "cifar100":
        num_class = 100
    elif datasetname == "ImageNet":
        num_class = 1000
    use_cuda = torch.cuda.is_available() and not args.nocuda
    trainer = trainers.make_trainer(network,
                                    dataloaders,
                                    optimizer,
                                    use_cuda=use_cuda,
                                    beta_of_ricap=args.beta_of_ricap)

    # initialize logs and epoch num
    if args.resume == 0:
        logs = []
        epoch_start = 0
    else:
        # if resuming
        # load model and optimizer parameter, start from pre-saved checkpoint
        print('==> Resuming from checkpoint..')
        if args.resume < 0:
            args.resume = 'latest'
        checkpoint = '{}_{}'.format(savefilename_prefix, args.resume)
        map_location = lambda storage, location: storage.cuda(
        ) if use_cuda else storage
        network.load_state_dict(
            torch.load(checkpoint + '.pth', map_location=map_location))
        optimizer.load_state_dict(
            torch.load(checkpoint + '.opt', map_location=map_location))
        logs = list(np.loadtxt(checkpoint + '.log', ndmin=2))
        epoch_start = len(logs)

    # update learning rate based on define learning rate strategy
    def update_learning_rate(epoch, ite):
        lr_adapted = args.lr * args.droplr**np.sum(args.adlr < epoch)
        if not lr_current == lr_adapted:
            print('Learning rate is adapted: {} -> {}'.format(
                lr_current, lr_adapted))
            utils.adjust_learning_rate(optimizer, lr_adapted)
        return lr_adapted

    # save network and optimizer parameter to save files (.pth, .opt)
    def savemodel(savefilename):
        torch.save(network.state_dict(), savefilename + '.pth')
        torch.save(optimizer.state_dict(), savefilename + '.opt')
        np.savetxt(savefilename + '.log', logs)

    # train and test loop
    epoch_end = args.epoch
    for epoch in range(epoch_start + 1, epoch_end + 1):
        lr_current = update_learning_rate(epoch,
                                          len(dataloaders[0]) * (epoch - 1))
        print('Epoch: {} / Iterations: {}'.format(
            epoch,
            len(dataloaders[0]) * (epoch - 1)))
        ret_train = trainer.epoch(train=True, lr=lr_current)
        ret_test = trainer.epoch(train=False, lr=lr_current)
        logs.append([
            epoch,
        ] + ret_train + ret_test +
                    [lr_current, len(dataloaders[0]) * epoch])

        # save model and optimizer parameters
        if epoch % args.savefreq == 0 or epoch == epoch_end:
            print('Saving model as the latest..')
            savefilename = '{}_{}'.format(savefilename_prefix, 'latest')
            savemodel(savefilename)
Ejemplo n.º 13
0
def train_model(args):
    dataloaders = get_dataloaders(args)

    dataset_sizes = {
        'train': len(dataloaders['train'].dataset),
        'val': len(dataloaders['val'].dataset),
        'test': len(dataloaders['test'].dataset)
    }

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # set up
    model = load_model(args, device)
    loss_fn = get_loss_fn(args.dataset, args.loss_type)

    if args.train_decoders:
        parameters = list(model.photo_decoder.parameters()) + list(
            model.sketch_decoder.parameters())
    elif args.model in ['EmbedGAN']:
        parameters = list(model.G.parameters()) + list(model.D.parameters())
    else:
        parameters = model.parameters()

    if args.optim == 'sgd':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              weight_decay=args.wd,
                              momentum=.9,
                              nesterov=True)
    elif args.optim == 'adam':
        optimizer = optim.Adam(parameters, lr=args.lr, weight_decay=args.wd)

    scheduler = optim.lr_scheduler.StepLR(
        optimizer, step_size=len(dataloaders['train']) // 10, gamma=.9)
    writer = SummaryWriter(args.log_dir + "/{}".format(args.name))

    save_dir = Path(args.save_dir) / ('{}'.format(args.name))
    if not save_dir.exists():
        os.mkdir(save_dir)

    best_model = None
    best_loss = float('inf')
    batch_num = 0

    for epoch in range(args.num_epochs):
        print('Epoch {}/{}'.format(epoch, args.num_epochs - 1))
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            epoch_metrics = defaultdict(float)

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # zero the parameter gradients
                optimizer.zero_grad()

                N = len(inputs)

                # converts list of tuples of images paths of length N into flattened
                # tensor of size N * args.loss_type
                inputs = load_sketchy_images(inputs, args.loss_type, device,
                                             args.img_size)
                labels = labels.to(device)
                with torch.set_grad_enabled(phase == 'train'):
                    if args.loss_type in [
                            "vae", "vae+embed", "vae+embed+classify"
                    ]:
                        batch_metrics = vae_forward(
                            inputs,
                            labels,
                            model,
                            loss_fn,
                            writer,
                            device,
                            batch_num,
                            args.alpha,
                            N,
                            args.name,
                            modality=args.modality,
                            compare_embed=args.loss_type
                            in ["vae+embed", "vae+embed+classify"],
                            classify=args.loss_type
                            in ['vae+embed+classify', 'single_vae'])
                    elif args.loss_type in [
                            "ae", "ae+embed", "ae+embed+classify"
                    ]:
                        batch_metrics = ae_forward(
                            inputs,
                            labels,
                            model,
                            loss_fn,
                            writer,
                            device,
                            batch_num,
                            args.alpha,
                            N,
                            args.name,
                            modality=args.modality,
                            compare_embed=args.loss_type
                            in ["ae+embed", "ae+embed+classify"],
                            classify=args.loss_type
                            in ['ae+embed+classify', 'single_ae'])
                    elif args.loss_type in ['gan']:
                        batch_metrics = gan_forward(inputs, labels, model,
                                                    loss_fn, writer, device,
                                                    batch_num, N)
                    else:
                        batch_metrics = classify_contrast_forward(
                            inputs, labels, model, loss_fn, writer, device,
                            batch_num, args.alpha, args.loss_type, N)

                    for criteria_name in batch_metrics:
                        epoch_metrics[criteria_name] += batch_metrics[
                            criteria_name] / dataset_sizes[phase]

                    loss = batch_metrics['loss']

                    del batch_metrics

                    if phase == "train":
                        batch_num += 1
                        loss.backward()
                        optimizer.step()

            epoch_loss = epoch_metrics['loss'].item()
            log_metrics(epoch_metrics, writer, phase, epoch)

        # deep copy the model
        if phase == 'val' and epoch_loss < best_loss:
            best_loss = epoch_loss
            now = datetime.datetime.now()
            torch.save(
                model.state_dict(), save_dir /
                f"{now.month}{now.day}{now.hour}{now.minute}_{best_loss}")
            best_model = copy.deepcopy(model.state_dict())

    writer.close()
    now = datetime.datetime.now()
    torch.save(
        model.state_dict(), save_dir /
        f"end_{now.month}{now.day}{now.hour}{now.minute}_{best_loss}")

    # load best model weights
    model.load_state_dict(best_model)
    now = datetime.datetime.now()
    torch.save(model.state_dict(), save_dir / "best")
compression_factors = [1, 0.5, 0.25, 0.1, 0.05, 0.01]
sensing_schemes = [RandomProjection, RSTD]
scheme_names = ["Gaussian Sensing", "Random Subsampling"]
S = 200  # 200 achieves ~91.8% accuracy at 100% MR

test_accuracy = np.zeros((len(sensing_schemes), len(compression_factors)))

# Loop over sensing schemes and compression factors
for i, ss in enumerate(sensing_schemes):
    for j, cf in enumerate(compression_factors):
        # Define the data transformation for this network
        sensing_transform = ss(cf, IM_DIM)
        trans = transforms.Compose([transforms.ToTensor(), sensing_transform])

        # Build the dataloaders
        trainloader, valloader, testloader = get_dataloaders(
            batch_size, val_split, trans, n_workers)  # regular / proxy images
        # (uncomment the line below if you want results for sparse recovered images)
        # trainloader, valloader, testloader = get_sparse_recovered_dataloaders(sensing_transform, S, batch_size, val_split, n_workers)
        # Construct the model
        net = MNISTClassifier(resnet20(), lr, lr_milestones)

        if torch.cuda.is_available():
            trainer = pl.Trainer(gpus=2,
                                 accelerator='ddp',
                                 max_epochs=num_epochs,
                                 progress_bar_refresh_rate=bar_refresh_rate)
        else:
            trainer = pl.Trainer(gpus=0,
                                 max_epochs=num_epochs,
                                 progress_bar_refresh_rate=bar_refresh_rate)
Ejemplo n.º 15
0
import torch
from globals import *

from init import init
from net import SentimentAnalyzer

from train_eval import train_loop, evaluate
from utils import get_dataloaders

if __name__ == "__main__":
    # Load the datasets and used vocabulary
    init(config)
    # Get dataloaders for train/validation/test sets
    train_loader, valid_loader, test_loader = get_dataloaders(
        config['train'], config['val'], config['test'])
    if net_config.mode == "train":
        # Create fresh new instance of the RNN
        net = SentimentAnalyzer(config['vocab'], net_config.hidden_dim,
                                net_config.layers, net_config.dropout,
                                net_config.bidirectional).to(device)
        # Train the network
        train_loop(net, train_loader, valid_loader, test_loader)
    else:
        # Create fresh new instance of the RNN which
        # holds loaded pretrained weights
        net = SentimentAnalyzer(config['vocab'], net_config.hidden_dim,
                                net_config.layers, net_config.dropout,
                                net_config.bidirectional).to(device)
        # Load pretrained model parameters
        net.load_state_dict(torch.load(net_config.pretrained_loc))
Ejemplo n.º 16
0
def create_cams(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = load_model(args, device)

    features, weight_softmax = set_up_model(model)

    with open("/home/robincheong/data/sketchy/idx_to_class_dict.pkl",
              "rb") as f:
        classes = pickle.load(f)

    loader = get_dataloaders(args)[args.phase]

    num_cams = 0

    for inputs, labels in loader:
        print("Getting logits")
        labels = labels.numpy()

        file_paths = [example.split('++') for example in inputs]

        N = len(inputs)
        inputs = load_sketchy_images(inputs, args.loss_type, device,
                                     args.img_size)
        sketches, photos = torch.split(inputs, N)

        sketch_probs, sketch_idx = get_probs_and_idx(sketches,
                                                     model,
                                                     device,
                                                     is_sketch=True)
        photo_probs, photo_idx = get_probs_and_idx(photos,
                                                   model,
                                                   device,
                                                   is_sketch=False)

        print(sketch_probs.shape)

        print("Generating CAMs")

        for i in range(N):
            if num_cams > args.num_cams:
                break
            num_cams += 1
            print_top_5(sketch_probs[i], sketch_idx[i], classes, labels[i],
                        "sketch")
            print_top_5(photo_probs[i], photo_idx[i], classes, labels[i],
                        "photo")
            CAMs = {
                "sketch":
                return_CAM(
                    features[0][i], weight_softmax,
                    [sketch_idx[i][np.where(sketch_idx[i] == labels[i])]]),
                "photo":
                return_CAM(features[1][i], weight_softmax,
                           [photo_idx[i][np.where(photo_idx[i] == labels[i])]])
            }

            # render the CAM and output
            for modality, path in [("sketch", file_paths[i][0]),
                                   ("photo", file_paths[i][1])]:
                print('Rendering {} CAMs for the correct class: {}'.format(
                    modality, classes[labels[i]]))
                img = cv2.imread(str(path))
                height, width, _ = img.shape
                heatmap = cv2.applyColorMap(
                    cv2.resize(CAMs[modality][0], (width, height)),
                    cv2.COLORMAP_JET)
                result = heatmap * 0.3 + img * 0.5

                cam_fname = 'cams/{}_{}{}.jpg'.format(modality,
                                                      classes[labels[i]],
                                                      args.suffix)

                cv2.imwrite(cam_fname, result)

        break
Ejemplo n.º 17
0
                    help='dropout ratio for AlexNet.')

args = parser.parse_args()
print("Script arguments:\n", args)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0
start_epoch = 0
working_dir = os.path.join(args.output_dir, args.train_id)
os.makedirs(working_dir, exist_ok=True)
writer = SummaryWriter(working_dir)

# Setup data.
print('==> Preparing data..')
trainloader, testloader = utils.get_dataloaders(dataset=args.dataset,
                                                batch_size=args.batch_size,
                                                data_root=args.data_root)

# Setup model
# ----------------------------------------
print('==> Building model..')
if args.dataset == "imagenet":
    models = {
        'presnet18': PreActResNet18,
        'glouncv-alexnet': alexnet,
        'glouncv-presnet34': preresnet34,
        'glouncv-mobilenetv2_w1': mobilenetv2_w1
    }
    net = models.get(args.arch, None)()

elif args.dataset == "cifar100":
Ejemplo n.º 18
0
def main(cfg: DictConfig) -> None:
    print("Params: \n")
    print(OmegaConf.to_yaml(cfg))
    time.sleep(10)

    best_acc = 0
    start_epoch = 0
    working_dir = os.path.join(get_original_cwd(), cfg.output_dir,
                               cfg.train_id)
    os.makedirs(working_dir, exist_ok=True)
    writer = SummaryWriter(working_dir)

    # Setup data.
    # --------------------
    print('=> Preparing data..')
    trainloader, testloader = utils.get_dataloaders(
        dataset=cfg.dataset.name,
        batch_size=cfg.dataset.batch_size,
        data_root=cfg.dataset.data_root)

    net = setup_network(cfg.dataset.name, cfg.dataset.arch)
    net = tweak_network(net,
                        bit=cfg.quantizer.bit,
                        train_conf=cfg.train_conf,
                        quant_mode=cfg.quant_mode,
                        arch=cfg.dataset.arch,
                        cfg=cfg)
    net = net.to(device)

    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    print(net)
    print("Number of learnable parameters: ",
          sum(p.numel() for p in net.parameters() if p.requires_grad) / 1e6,
          "M")
    time.sleep(5)
    load_checkpoint(net, init_from=cfg.dataset.init_from)
    params = create_train_params(model=net,
                                 main_wd=cfg.quantizer.wd,
                                 delta_wd=0,
                                 skip_keys=['.delta', '.alpha'],
                                 verbose=cfg.verbose)
    criterion = nn.CrossEntropyLoss()

    # Setup optimizer
    # ----------------------------
    if cfg.quantizer.optimizer == 'sgd':
        print("=> Use SGD optimizer")
        optimizer = optim.SGD(params,
                              lr=cfg.quantizer.lr,
                              momentum=0.9,
                              weight_decay=cfg.quantizer.wd)

    elif cfg.quantizer.optimizer == 'adam':
        print("=> Use Adam optimizer")
        optimizer = optim.Adam(params,
                               lr=cfg.quantizer.lr,
                               weight_decay=cfg.quantizer.wd)

    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=cfg.dataset.epochs)

    if cfg.evaluate:
        print("==> Start evaluating ...")
        test(net, testloader, criterion, -1)
        exit()

    # -----------------------------------------------
    # Reset to 'warmup_lr' if we are using warmup strategy.
    if cfg.quantizer.enable_warmup:
        assert cfg.quantizer.bit == 1
        for param_group in optimizer.param_groups:
            param_group['lr'] = cfg.quantizer.warmup_lr

    # Initialization
    # ------------------------------------------------
    if cfg.quantizer.bit != 32 and "quan" in cfg.train_conf:
        simple_initialization(net,
                              trainloader,
                              num_batches=cfg.dataset.num_calibration_batches,
                              train_conf=cfg.train_conf)

    # Training
    # -----------------------------------------------
    save_checkpoint_epochs = list(range(10))

    for epoch in range(start_epoch, cfg.dataset.epochs):
        train_loss, train_acc1 = train(net,
                                       optimizer,
                                       trainloader,
                                       criterion,
                                       epoch,
                                       cfg=cfg)
        test_loss, test_acc1, curr_acc = test(net, testloader, criterion,
                                              epoch)

        # Save checkpoint.
        if curr_acc > best_acc:
            best_acc = curr_acc
            utils.save_checkpoint(net,
                                  lr_scheduler,
                                  optimizer,
                                  curr_acc,
                                  epoch,
                                  filename=os.path.join(
                                      working_dir, 'ckpt_best.pth'))
            print('Saving..')
            print('Best accuracy: ', best_acc)

        if lr_scheduler is not None:
            lr_scheduler.step()

        write_metrics(writer, epoch, net,  \
                    optimizer, train_loss, train_acc1, test_loss, test_acc1, prefix="Standard_Training")

    print('Best accuracy: ', best_acc)
Ejemplo n.º 19
0
def main():

    print("\n_________________________________________________\n")
    print(now(), "train_model.py main() running.")

    parser = argparse.ArgumentParser(description="Deep Thinking")
    parser.add_argument("--checkpoint",
                        default="check_default",
                        type=str,
                        help="where to save the network")
    parser.add_argument("--dataset",
                        default="CIFAR10",
                        type=str,
                        help="dataset")
    parser.add_argument("--depth",
                        default=1,
                        type=int,
                        help="depth of the network")
    parser.add_argument("--epochs",
                        default=200,
                        type=int,
                        help="number of epochs for training")
    parser.add_argument("--lr", default=0.1, type=float, help="learning rate")
    parser.add_argument("--lr_factor",
                        default=0.1,
                        type=float,
                        help="learning rate decay factor")
    parser.add_argument("--lr_schedule",
                        nargs="+",
                        default=[100, 150],
                        type=int,
                        help="how often to decrease lr")
    parser.add_argument("--mode",
                        default="default",
                        type=str,
                        help="which  testing mode?")
    parser.add_argument("--model",
                        default="resnet18",
                        type=str,
                        help="model for training")
    parser.add_argument("--model_path",
                        default=None,
                        type=str,
                        help="where is the model saved?")
    parser.add_argument("--no_save_log",
                        action="store_true",
                        help="do not save log file")
    parser.add_argument("--optimizer",
                        default="SGD",
                        type=str,
                        help="optimizer")
    parser.add_argument("--output",
                        default="output_default",
                        type=str,
                        help="output subdirectory")
    parser.add_argument("--problem",
                        default="classification",
                        type=str,
                        help="problem type (classification or segmentation)")
    parser.add_argument("--save_json", action="store_true", help="save json")
    parser.add_argument("--save_period",
                        default=None,
                        type=int,
                        help="how often to save")
    parser.add_argument("--test_batch_size",
                        default=50,
                        type=int,
                        help="batch size for testing")
    parser.add_argument("--test_dataset",
                        type=str,
                        default=None,
                        help="name of the testing dataset")
    parser.add_argument("--test_iterations",
                        default=None,
                        type=int,
                        help="how many, if testing with a different "
                        "number iterations than training")
    parser.add_argument("--train_batch_size",
                        default=128,
                        type=int,
                        help="batch size for training")
    parser.add_argument("--train_log",
                        default="train_log.txt",
                        type=str,
                        help="name of the log file")
    parser.add_argument("--val_period",
                        default=20,
                        type=int,
                        help="how often to validate")
    parser.add_argument("--width",
                        default=4,
                        type=int,
                        help="width of the network")

    args = parser.parse_args()

    if args.save_period is None:
        args.save_period = args.epochs
    print(args)

    # summary writer
    train_log = args.train_log
    try:
        array_task_id = train_log[:-4].split("_")[-1]
    except:
        array_task_id = 1
    writer = SummaryWriter(log_dir=f"{args.output}/runs/{train_log[:-4]}")

    if not args.no_save_log:
        to_log_file(args, args.output, train_log)

    # set device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    ####################################################
    #               Dataset and Network and Optimizer
    trainloader, testloader = get_dataloaders(
        args.dataset,
        args.train_batch_size,
        test_batch_size=args.test_batch_size)

    # load model from path if a path is provided
    if args.model_path is not None:
        print(f"Loading model from checkpoint {args.model_path}...")
        net, start_epoch, optimizer_state_dict = load_model_from_checkpoint(
            args.model, args.model_path, args.dataset, args.width, args.depth)
        start_epoch += 1

    else:
        net = get_model(args.model, args.dataset, args.width, args.depth)
        start_epoch = 0
        optimizer_state_dict = None

    net = net.to(device)
    pytorch_total_params = sum(p.numel() for p in net.parameters())
    optimizer = get_optimizer(args.optimizer, args.model, net, args.lr,
                              args.dataset)

    print(net)
    print(
        f"This {args.model} has {pytorch_total_params/1e6:0.3f} million parameters."
    )
    print(f"Training will start at epoch {start_epoch}.")

    if optimizer_state_dict is not None:
        print(f"Loading optimizer from checkpoint {args.model_path}...")
        optimizer.load_state_dict(optimizer_state_dict)
        warmup_scheduler = warmup.ExponentialWarmup(optimizer, warmup_period=0)
    else:
        warmup_scheduler = warmup.ExponentialWarmup(optimizer, warmup_period=5)

    lr_scheduler = MultiStepLR(optimizer,
                               milestones=args.lr_schedule,
                               gamma=args.lr_factor,
                               last_epoch=-1)
    optimizer_obj = OptimizerWithSched(optimizer, lr_scheduler,
                                       warmup_scheduler)
    np.set_printoptions(precision=2)
    torch.backends.cudnn.benchmark = True
    test_setup = TestingSetup(args.problem.lower(), args.mode.lower())
    ####################################################

    ####################################################
    #        Train
    print(f"==> Starting training for {args.epochs - start_epoch} epochs...")

    for epoch in range(start_epoch, args.epochs):

        loss, acc = train(net, trainloader, args.problem.lower(),
                          optimizer_obj, device)

        print(f"{now()} Training loss at epoch {epoch}: {loss}")
        print(f"{now()} Training accuracy at epoch {epoch}: {acc}")

        # if the loss is nan, then stop the training
        if np.isnan(float(loss)):
            print("Loss is nan, exiting...")
            sys.exit()

        # tensorboard loss writing
        writer.add_scalar("Loss/loss", loss, epoch)
        writer.add_scalar("Accuracy/acc", acc, epoch)

        for i in range(len(optimizer.param_groups)):
            writer.add_scalar(f"Learning_rate/group{i}",
                              optimizer.param_groups[i]["lr"], epoch)

        if (epoch + 1) % args.val_period == 0:
            train_acc = test(net, trainloader, test_setup, device)
            test_acc = test(net, testloader, test_setup, device)

            print(f"{now()} Training accuracy: {train_acc}")
            print(f"{now()} Testing accuracy: {test_acc}")

            stats = [train_acc, test_acc]
            stat_names = ["train_acc", "test_acc"]
            for stat_idx, stat in enumerate(stats):
                stat_name = os.path.join("val", stat_names[stat_idx])
                writer.add_scalar(stat_name, stat, epoch)

        if (epoch + 1) % args.save_period == 0 or (epoch + 1) == args.epochs:
            state = {
                "net": net.state_dict(),
                "epoch": epoch,
                "optimizer": optimizer.state_dict()
            }
            out_str = os.path.join(
                args.checkpoint,
                f"{args.model}_{args.dataset}_{args.optimizer}"
                f"_depth={args.depth}"
                f"_width={args.width}"
                f"_lr={args.lr}"
                f"_batchsize={args.train_batch_size}"
                f"_epoch={args.epochs-1}"
                f"_{array_task_id}.pth")

            print("saving model to: ", args.checkpoint, " out_str: ", out_str)
            if not os.path.isdir(args.checkpoint):
                os.makedirs(args.checkpoint)
            torch.save(state, out_str)

    writer.flush()
    writer.close()
    ####################################################

    ####################################################
    #        Test
    print("==> Starting testing...")

    if args.test_iterations is not None:
        assert isinstance(
            net.iters, int), "Cannot test feed-forward model with iterations."
        net.iters = args.test_iterations

    train_acc = test(net, trainloader, test_setup, device)
    test_acc = test(net, testloader, test_setup, device)

    print(f"{now()} Training accuracy: {train_acc}")
    print(f"{now()} Testing accuracy: {test_acc}")

    model_name_str = f"{args.model}_depth={args.depth}_width={args.width}"
    stats = OrderedDict([("model", model_name_str),
                         ("num_params", pytorch_total_params),
                         ("learning rate", args.lr),
                         ("lr_factor", args.lr_factor), ("lr", args.lr),
                         ("epochs", args.epochs),
                         ("train_batch_size", args.train_batch_size),
                         ("optimizer", args.optimizer),
                         ("dataset", args.dataset), ("train_acc", train_acc),
                         ("test_acc", test_acc),
                         ("test_iter", args.test_iterations)])

    if args.save_json:
        to_json(stats, args.output)