Example #1
0
    def __init__(self,
                 bert_model,
                 dataset,
                 mapping,
                 discriminator,
                 args,
                 bert_model1=None):
        """
        Initialize trainer script.
        """
        self.bert_model = bert_model
        self.bert_model1 = bert_model1
        if args.adversarial:
            self.dataset = dataset
            #sampler = SequentialSampler(dataset)
            sampler = RandomSampler(dataset)
            self.dataloader = DataLoader(dataset,
                                         sampler=sampler,
                                         batch_size=args.batch_size)
            self.iter_loader = _DataLoaderIter(self.dataloader)
        self.mapping = mapping
        self.discriminator = discriminator
        self.args = args

        if self.args.local_rank == -1 or self.args.no_cuda:
            self.device = torch.device("cuda" if torch.cuda.is_available()
                                       and not self.args.no_cuda else "cpu")
        else:
            self.device = torch.device("cuda", self.args.local_rank)

        # optimizers
        if hasattr(args, 'map_optimizer'):
            optim_fn, optim_args = get_optimizer(args.map_optimizer)
            self.map_optimizer = optim_fn(mapping.parameters(), **optim_args)
        if hasattr(args, 'dis_optimizer'):
            optim_fn, optim_args = get_optimizer(args.dis_optimizer)
            self.dis_optimizer = optim_fn(discriminator.parameters(),
                                          **optim_args)
        else:
            assert discriminator is None

        # best validation score
        self.best_valid_metric = -1e12

        self.decrease_lr = False
        self.decrease_dis_lr = False
Example #2
0
    def __init__(self,
                 params,
                 inputdim,
                 nclasses,
                 l2reg=0.,
                 batch_size=64,
                 seed=1111,
                 cudaEfficient=False):
        super(self.__class__, self).__init__(inputdim, nclasses, l2reg,
                                             batch_size, seed, cudaEfficient)
        """
        PARAMETERS:
        -nhid:       number of hidden units (0: Logistic Regression)
        -optim:      optimizer ("sgd,lr=0.1", "adam", "rmsprop" ..)
        -tenacity:   how many times dev acc does not increase before stopping
        -epoch_size: each epoch corresponds to epoch_size pass on the train set
        -max_epoch:  max number of epoches
        -dropout:    dropout for MLP
        """

        self.nhid = 0 if "nhid" not in params else params["nhid"]
        self.optim = "adam" if "optim" not in params else params["optim"]
        self.tenacity = 5 if "tenacity" not in params else params["tenacity"]
        self.epoch_size = 4 if "epoch_size" not in params else params[
            "epoch_size"]
        self.max_epoch = 200 if "max_epoch" not in params else params[
            "max_epoch"]
        self.dropout = 0. if "dropout" not in params else params["dropout"]
        self.batch_size = 64 if "batch_size" not in params else params[
            "batch_size"]

        if params["nhid"] == 0:
            self.model = nn.Sequential(nn.Linear(self.inputdim,
                                                 self.nclasses), ).cuda()
        else:
            self.model = nn.Sequential(
                nn.Linear(self.inputdim, params["nhid"]),
                nn.Dropout(p=self.dropout),
                nn.Sigmoid(),
                nn.Linear(params["nhid"], self.nclasses),
            ).cuda()

        self.loss_fn = nn.CrossEntropyLoss().cuda()
        self.loss_fn.size_average = False

        optim_fn, optim_params = utils.get_optimizer(self.optim)
        self.optimizer = optim_fn(self.model.parameters(), **optim_params)
        self.optimizer.param_groups[0]['weight_decay'] = self.l2reg
Example #3
0
    def __init__(self, encoder, decoder, data, test_data, params, num_updates):
        self.encoder = encoder
        self.decoder = decoder
        self.data = data
        self.test_data = test_data
        self.params = params

        self.enc_dec_params = list(self.encoder.parameters()) + list(
            self.decoder.parameters())
        # optimizers
        self.optimizer = get_optimizer(self.enc_dec_params, self.params.optim)
        self.optimizer._num_updates = num_updates
        # training statistics
        self.epoch = getattr(params, 'now_epoch', 0)
        self.n_iter = 0
        self.oom = 0
        self.n_sentences = 0
        self.stats = {'processed_s': 0, 'processed_w': 0, 'loss': []}
        self.sample_sizes = []
  def __init__(self,
               train_dataset, val_dataset, test_dataset,
               model, hyper_dict, experiment_name,
               device, cross_validation=False):
    self.train_dataset = train_dataset
    self.val_dataset = val_dataset
    self.test_dataset = test_dataset

    self.handler = LockableModelSaveHandler(self)

    self.model = model
    self.best_model = copy.deepcopy(model)
    self.best_val_loss = None

    self.epochs = hyper_dict['epochs']
    self.batch_size = hyper_dict['batch_size']
    self.num_workers = hyper_dict['num_workers']
    self.hyper_dict = hyper_dict

    self.experiment_name = experiment_name
    self.device = device
    self.cross_validation = cross_validation

    key_lst = ['time']
    for split in ('train', 'val', 'test'):
      for metric in ('loss', 'acc'):
        key_lst.append(f"{split}_{metric}")

    self.avg_meter = {key: AverageMeter() for key in key_lst}
    self.tag_str = {key: "" for key in key_lst}

    self.train_ldr = DataLoader(train_dataset, batch_size=self.batch_size,
                                num_workers=self.num_workers, shuffle=True)
    self.val_ldr = DataLoader(val_dataset, batch_size=self.batch_size,
                              num_workers=self.num_workers, shuffle=False)
    self.test_ldr = DataLoader(test_dataset, batch_size=self.batch_size,
                               num_workers=self.num_workers, shuffle=False)
    self.optimizer = get_optimizer(model.parameters(), hyper_dict)

    # state variables
    self.current_iter = 0
Example #5
0
    def __init__(self,
                 bert_model,
                 mapping,
                 args,
                 bert_model1=None,
                 trans_types=[
                     'self_attention', 'attention', 'linear_self_attention',
                     'nonlinear_self_attention'
                 ]):
        """
        Initialize trainer script.
        """
        self.transformer_types = trans_types
        self.args = args
        self.bert_model = bert_model
        self.bert_model1 = bert_model1
        self.mapping = mapping

        if self.args.local_rank == -1 or self.args.no_cuda:
            self.device = torch.device("cuda" if torch.cuda.is_available()
                                       and not self.args.no_cuda else "cpu")
        else:
            self.device = torch.device("cuda", self.args.local_rank)

        # optimizers
        if hasattr(args, 'map_optimizer'):
            optim_fn, optim_args = get_optimizer(args.map_optimizer)
            if self.args.map_type == 'fine_tune':
                self.map_optimizer = optim_fn(bert_model.parameters(),
                                              **optim_args)
            else:
                self.map_optimizer = optim_fn(mapping.parameters(),
                                              **optim_args)

        # best validation score
        self.best_valid_metric = -1e12
        self.decrease_lr = False
def pretrain(cfg):
    print(cfg.pretty())
    pretrain_config_validator(cfg)
    fix_seed(cfg.seed)

    controller = load_pretrained_weights(
        NAO(**cfg.controller).to(0), cfg.pretrained_model_path)
    models = {'trunk': controller}
    dataset = get_dataset(seed=cfg.seed, **cfg.dataset)
    optimizers = {
        'trunk_optimizer':
        get_optimizer(parameters=models['trunk'].parameters(), **cfg.optimizer)
    }
    lr_schedulers = {
        'trunk_scheduler_by_iteration':
        get_scheduler(optimizer=optimizers['trunk_optimizer'], **cfg.scheduler)
    }
    loss_funcs = {
        'reconstruction_loss': torch.nn.NLLLoss(),
        'metric_loss': get_loss(**cfg.loss)
    }
    mining_funcs = {"tuple_miner": get_miner(**cfg.miner)}
    visualizers = [umap.UMAP(**params) for params in cfg.visualizers]
    end_of_iteration_hook = TensorboardHook(visualizers).end_of_iteration_hook
    end_of_epoch_hook = ModelSaverHook().end_of_epoch_hook
    get_trainer(
        models=models,
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        loss_funcs=loss_funcs,
        mining_funcs=mining_funcs,
        dataset=dataset,
        end_of_iteration_hook=end_of_iteration_hook,
        end_of_epoch_hook=end_of_epoch_hook,
        **cfg.trainer,
    ).train()
Example #7
0
def train(cfg):
    print(cfg.pretty())
    train_config_validator(cfg)
    fix_seed(cfg.seed)

    writer = SummaryWriter(log_dir='logs')
    controller = load_pretrained_weights(
        NAO(**cfg.controller).to(0), cfg.pretrained_model_path)
    dataset = get_dataset(writer=writer, seed=cfg.seed, **cfg.dataset)
    optimizer = get_optimizer(parameters=_get_target_parameters(
        controller, cfg.freeze_encoder_decoder),
                              **cfg.optimizer)
    lr_scheduler = get_scheduler(optimizer=optimizer, **cfg.scheduler)
    end_of_epoch_hook = ModelSaverHook().end_of_epoch_hook

    get_trainer(
        controller=controller,
        dataset=dataset,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        writer=writer,
        end_of_epoch_hook=end_of_epoch_hook,
        **cfg.trainer,
    ).train()
Example #8
0
def setup_data_and_model(params, model):
    # Variables that may not otherwise be assigned
    writer = perturbation_loader = generator = training_smiles = None

    # setup random seeds
    if params.val_seed is None: params.val_seed = params.seed
    set_seed_if(params.seed)

    exp_path = os.path.join(params.dump_path, params.exp_name)
    # create exp path if it doesn't exist
    if not os.path.exists(exp_path):
        os.makedirs(exp_path)
    # create logger
    logger = create_logger(os.path.join(exp_path, 'train.log'), 0)
    pp = pprint.PrettyPrinter()
    logger.info("============ Initialized logger ============")
    logger.info("Random seed is {}".format(params.seed))
    if params.suppress_params is False:
        logger.info("\n".join("%s: %s" % (k, str(v))
                          for k, v in sorted(dict(vars(params)).items())))
        logger.info("Running command: %s" % 'python ' + ' '.join(sys.argv))
    logger.info("The experiment will be stored in %s\n" % exp_path)
    logger.info("")
    # load data
    train_data, val_dataset, train_loader, val_loader = load_graph_data(params)

    logger.info ('train_loader len is {}'.format(len(train_loader)))
    logger.info ('val_loader len is {}'.format(len(val_loader)))

    if params.num_binary_graph_properties > 0 and params.pretrained_property_embeddings_path:
        model.binary_graph_property_embedding_layer.weight.data = \
            torch.Tensor(np.load(params.pretrained_property_embeddings_path).T)
    if params.load_latest is True:
        load_prefix = 'latest'
    elif params.load_best is True:
        load_prefix = 'best'
    else:
        load_prefix = None

    if load_prefix is not None:
        if params.local_cpu is True:
            model.load_state_dict(torch.load(os.path.join(exp_path, '{}_model'.format(load_prefix)), map_location='cpu'))
        else:
            model.load_state_dict(torch.load(os.path.join(exp_path, '{}_model'.format(load_prefix))))
    if params.local_cpu is False:
        model = model.cuda()
    if params.gen_num_samples > 0:
        generator = GraphGenerator(train_data, model, params.gen_random_init, params.gen_num_iters, params.gen_predict_deterministically, params.local_cpu)
        with open(params.smiles_path) as f:
            smiles = f.read().split('\n')
            training_smiles = smiles[:int(params.smiles_train_split * len(smiles))]
            del smiles
    opt = get_optimizer(model.parameters(), params.optimizer)
    if load_prefix is not None:
        opt.load_state_dict(torch.load(os.path.join(exp_path, '{}_opt_sd'.format(load_prefix))))

    lr = opt.param_groups[0]['lr']
    lr_lambda = lambda iteration: lr_decay_multiplier(iteration, params.warm_up_iters, params.decay_start_iter,
                                                      params.lr_decay_amount, params.lr_decay_frac,
                                                      params.lr_decay_interval, params.min_lr, lr)
    scheduler = LambdaLR(opt, lr_lambda)
    index_method = get_index_method()

    best_loss = 9999
    if params.tensorboard:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(exp_path)

    total_iter, grad_accum_iters = params.first_iter, 0

    return params, model, opt, scheduler, train_data, train_loader, val_dataset, val_loader, perturbation_loader,\
           generator, index_method, exp_path, training_smiles, pp, logger, writer, best_loss, total_iter,\
           grad_accum_iters
Example #9
0
def main(params):
    # setup random seeds
    set_seed(params.seed)
    params.ar = True

    exp_path = os.path.join(params.dump_path, params.exp_name)
    # create exp path if it doesn't exist
    if not os.path.exists(exp_path):
        os.makedirs(exp_path)
    # create logger
    logger = create_logger(os.path.join(exp_path, 'train.log'), 0)
    logger.info("============ Initialized logger ============")
    logger.info("Random seed is {}".format(params.seed))
    logger.info("\n".join("%s: %s" % (k, str(v))
                          for k, v in sorted(dict(vars(params)).items())))
    logger.info("The experiment will be stored in %s\n" % exp_path)
    logger.info("Running command: %s" % 'python ' + ' '.join(sys.argv))
    logger.info("")
    # load data
    data, loader = load_smiles_data(params)
    if params.data_type == 'ChEMBL':
        all_smiles_mols = open(os.path.join(params.data_path, 'guacamol_v1_all.smiles'), 'r').readlines()
    else:
        all_smiles_mols = open(os.path.join(params.data_path, 'QM9_all.smiles'), 'r').readlines()
    train_data, val_data = data['train'], data['valid']
    dico = data['dico']
    logger.info ('train_data len is {}'.format(len(train_data)))
    logger.info ('val_data len is {}'.format(len(val_data)))

    # keep cycling through train_loader forever
    # stop when max iters is reached
    def rcycle(iterable):
        saved = []                 # In-memory cache
        for element in iterable:
            yield element
            saved.append(element)
        while saved:
            random.shuffle(saved)  # Shuffle every batch
            for element in saved:
                  yield element
    train_loader = rcycle(train_data.get_iterator(shuffle=True, group_by_size=True, n_sentences=-1))

    # extra param names for transformermodel
    params.n_langs = 1
    # build Transformer model
    model = TransformerModel(params, is_encoder=False, with_output=True)

    if params.local_cpu is False:
        model = model.cuda()
    opt = get_optimizer(model.parameters(), params.optimizer)
    scores = {'ppl': np.float('inf'), 'acc': 0}

    if params.load_path:
        reloaded_iter, scores = load_model(params, model, opt, logger)

    for total_iter, train_batch in enumerate(train_loader):
        if params.load_path is not None:
            total_iter += reloaded_iter + 1

        epoch = total_iter // params.epoch_size
        if total_iter == params.max_steps:
            logger.info("============ Done training ... ============")
            break
        elif total_iter % params.epoch_size == 0:
            logger.info("============ Starting epoch %i ... ============" % epoch)
        model.train()
        opt.zero_grad()
        train_loss = calculate_loss(model, train_batch, params)
        train_loss.backward()
        if params.clip_grad_norm > 0:
            clip_grad_norm_(model.parameters(), params.clip_grad_norm)
        opt.step()
        if total_iter % params.print_after == 0:
            logger.info("Step {} ; Loss = {}".format(total_iter, train_loss))

        if total_iter > 0 and total_iter % params.epoch_size == (params.epoch_size - 1):
            # run eval step (calculate validation loss)
            model.eval()
            n_chars = 0
            xe_loss = 0
            n_valid = 0
            logger.info("============ Evaluating ... ============")
            val_loader = val_data.get_iterator(shuffle=True)
            for val_iter, val_batch in enumerate(val_loader):
                with torch.no_grad():
                    val_scores, val_loss, val_y = calculate_loss(model, val_batch, params, get_scores=True)
                # update stats
                n_chars += val_y.size(0)
                xe_loss += val_loss.item() * len(val_y)
                n_valid += (val_scores.max(1)[1] == val_y).sum().item()

            ppl = np.exp(xe_loss / n_chars)
            acc = 100. * n_valid / n_chars
            logger.info("Acc={}, PPL={}".format(acc, ppl))
            if acc > scores['acc']:
                scores['acc'] = acc
                scores['ppl'] = ppl
                save_model(params, data, model, opt, dico, logger, 'best_model', epoch, total_iter, scores)
                logger.info('Saving new best_model {}'.format(epoch))
                logger.info("Best Acc={}, PPL={}".format(scores['acc'], scores['ppl']))

            logger.info("============ Generating ... ============")
            number_samples = 100
            gen_smiles = generate_smiles(params, model, dico, number_samples)
            generator = ARMockGenerator(gen_smiles)

            try:
                benchmark = ValidityBenchmark(number_samples=number_samples)
                validity_score = benchmark.assess_model(generator).score
            except:
                validity_score = -1
            try:
                benchmark = UniquenessBenchmark(number_samples=number_samples)
                uniqueness_score = benchmark.assess_model(generator).score
            except:
                uniqueness_score = -1

            try:
                benchmark = KLDivBenchmark(number_samples=number_samples, training_set=all_smiles_mols)
                kldiv_score = benchmark.assess_model(generator).score
            except:
                kldiv_score = -1
            logger.info('Validity Score={}, Uniqueness Score={}, KlDiv Score={}'.format(validity_score, uniqueness_score, kldiv_score))
            save_model(params, data, model, opt, dico, logger, 'model', epoch, total_iter, {'ppl': ppl, 'acc': acc})
Example #10
0
assert len(params.name.strip()) > 0
assert not params.reload or os.path.isfile(params.reload)

# initialize experiment / load dataset
logger = initialize_exp(params)
data, attributes = load_images(params)
train_data = DataSampler(data[0], attributes[0], params)
valid_data = DataSampler(data[1], attributes[1], params)
test_data = DataSampler(data[2], attributes[2], params)

# build the model / reload / optimizer
classifier = Classifier(params).cuda()
if params.reload:
    reload_model(classifier, params.reload,
                 ['img_sz', 'img_fm', 'init_fm', 'hid_dim', 'attr', 'n_attr'])
optimizer = get_optimizer(classifier, params.optimizer)


def save_model(name):
    """
    Save the model.
    """
    path = os.path.join(params.dump_path, '%s.pth' % name)
    logger.info('Saving the classifier to %s ...' % path)
    torch.save(classifier, path)

# best accuracy
best_accu = -1e12

for n_epoch in range(params.n_epochs):
Example #11
0
def train(opt):
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": custom_collate_fn
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": custom_collate_fn
    }

    training_set = VOCDataset(opt.data_path, opt.dataset, opt.image_size)
    training_generator = DataLoader(training_set, **training_params)

    test_set = VOCDataset(opt.data_path,
                          opt.dataset,
                          opt.image_size,
                          is_training=False)
    test_generator = DataLoader(test_set, **test_params)

    model = Deeplab(num_classes=training_set.num_classes + 1)
    #model.load_state_dict(torch.load(opt.pre_trained_model))
    log_path = os.path.join(opt.log_path, "{}".format(opt.dataset))
    if os.path.isdir(log_path):
        shutil.rmtree(log_path)


#os.makedirs(log_path)
    writer = SummaryWriter(log_path)
    writer.add_graph(
        model, torch.rand(opt.batch_size, 3, opt.image_size, opt.image_size))
    if torch.cuda.is_available():
        model.cuda()

    best_loss = 1e10
    best_epoch = 0
    model.train()
    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epoches):
        for iter, batch in enumerate(training_generator):
            current_step = epoch * num_iter_per_epoch + iter
            current_lr = update_lr(opt.lr, current_step,
                                   num_iter_per_epoch * opt.num_epoches)
            optimizer = get_optimizer(model, current_lr, opt.momentum,
                                      opt.decay)
            if torch.cuda.is_available():
                batch = [torch.Tensor(record).cuda() for record in batch]
            else:
                batch = [torch.Tensor(record) for record in batch]
            image, gt1, gt2 = batch
            gt1 = gt1.long()
            gt2 = gt2.long()
            optimizer.zero_grad()
            results = model(image)

            mul_losses = multiple_losses(results, [gt1, gt1, gt2, gt1])
            mul_losses[4].backward()
            optimizer.step()
            print(
                "Epoch: {}/{}, Iteration: {}/{}, Lr: {}, Loss: {:.2f} (1xloss: {:.2f} 0.75xloss: {:.2f} 0.5xloss: {:.2f} Max_merged_loss: {:.2f})"
                .format(epoch + 1, opt.num_epoches, iter + 1,
                        num_iter_per_epoch, optimizer.param_groups[0]['lr'],
                        mul_losses[4], mul_losses[0], mul_losses[1],
                        mul_losses[2], mul_losses[3]))
            writer.add_scalar('Train/Total_loss', mul_losses[4], current_step)
            writer.add_scalar('Train/1x_scale_loss', mul_losses[0],
                              current_step)
            writer.add_scalar('Train/0.75x_scale_loss', mul_losses[1],
                              current_step)
            writer.add_scalar('Train/0.5x_scale_loss', mul_losses[2],
                              current_step)
            writer.add_scalar('Train/Max_merged_loss', mul_losses[3],
                              current_step)

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_ls = []
            loss_scale_1_ls = []
            loss_scale_2_ls = []
            loss_scale_3_ls = []
            loss_max_merged_ls = []

            for te_batch in test_generator:
                if torch.cuda.is_available():
                    te_batch = [
                        torch.Tensor(record).cuda() for record in te_batch
                    ]
                else:
                    te_batch = [torch.Tensor(record) for record in te_batch]
                te_image, te_gt1, te_gt2 = te_batch
                te_gt1 = te_gt1.long()
                te_gt2 = te_gt2.long()
                num_sample = len(te_gt1)

                with torch.no_grad():
                    te_results = model(te_image)
                    te_mul_losses = multiple_losses(
                        te_results, [te_gt1, te_gt1, te_gt2, te_gt1])
                loss_ls.append(te_mul_losses[4] * num_sample)
                loss_scale_1_ls.append(te_mul_losses[0] * num_sample)
                loss_scale_2_ls.append(te_mul_losses[1] * num_sample)
                loss_scale_3_ls.append(te_mul_losses[2] * num_sample)
                loss_max_merged_ls.append(te_mul_losses[3] * num_sample)

            te_loss = sum(loss_ls) / test_set.__len__()
            te_scale_1_loss = sum(loss_scale_1_ls) / test_set.__len__()
            te_scale_2_loss = sum(loss_scale_2_ls) / test_set.__len__()
            te_scale_3_loss = sum(loss_scale_3_ls) / test_set.__len__()
            te_max_merged_loss = sum(loss_max_merged_ls) / test_set.__len__()

            print(
                "Epoch: {}/{}, Lr: {}, Loss: {:.2f} (1xloss: {:.2f} 0.75xloss: {:.2f} 0.5xloss: {:.2f} Max_merged_loss: {:.2f})"
                .format(epoch + 1, opt.num_epoches,
                        optimizer.param_groups[0]['lr'], te_loss,
                        te_scale_1_loss, te_scale_2_loss, te_scale_3_loss,
                        te_max_merged_loss))

            writer.add_scalar('Test/Total_loss', te_loss, epoch)
            writer.add_scalar('Test/1x_scale_loss', te_scale_1_loss, epoch)
            writer.add_scalar('Test/0.75x_scale_loss', te_scale_2_loss, epoch)
            writer.add_scalar('Test/0.5x_scale_loss', te_scale_3_loss, epoch)
            writer.add_scalar('Test/Max_merged_loss', te_max_merged_loss,
                              epoch)

            model.train()
            if te_loss + opt.es_min_delta < best_loss:
                best_loss = te_loss
                best_epoch = epoch
                torch.save(
                    model.state_dict(), opt.saved_path + os.sep +
                    "only_params_trained_deeplab_voc")
                torch.save(
                    model, opt.saved_path + os.sep +
                    "whole_model_trained_deeplab_voc")

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, te_loss))
                break
    writer.close()
def main(params):
    logger = initialize_exp(params)

    if params.img_list is None:
        params.img_paths = [s.strip() for s in params.img_paths.split(",")]
    else:
        assert ":" in params.img_paths
        chunks = params.img_paths.split(":")
        assert len(chunks) == 2
        n_start, n_end = int(chunks[0]), int(chunks[1])

        img_list = torch.load(params.img_list)
        params.img_paths = [img_list[i] for i in range(n_start, n_end)]
    print("Image paths", params.img_paths)

    # Build model / cuda
    ckpt = torch.load(params.marking_network)
    params.num_classes = ckpt["params"]["num_classes"]
    params.architecture = ckpt['params']['architecture']
    print("Building %s model ..." % params.architecture)
    model = build_model(params)
    model.cuda()
    model.load_state_dict(
        {k.replace("module.", ""): v
         for k, v in ckpt['model'].items()},
        strict=False)
    model = model.eval()
    model.fc = nn.Sequential()

    loader = default_loader
    transform = getImagenetTransform("none",
                                     img_size=params.img_size,
                                     crop_size=params.crop_size)
    img_orig = [transform(loader(p)).unsqueeze(0) for p in params.img_paths]

    # Loading carriers
    direction = torch.load(params.carrier_path).cuda()
    assert direction.dim() == 2
    direction = direction[params.carrier_id:params.carrier_id + 1]

    rho = -1
    if params.angle is not None:
        rho = 1 + np.tan(params.angle)**2

    img = [x.clone() for x in img_orig]

    # Load differentiable data augmentations
    center_da = CenterCrop(params.img_size, params.crop_size)
    random_da = RandomResizedCropFlip(params.crop_size)
    if params.data_augmentation == "center":
        data_augmentation = center_da
    elif params.data_augmentation == "random":
        data_augmentation = random_da

    for i in range(len(img)):
        img[i].requires_grad = True

    optimizer, schedule = get_optimizer(img, params.optimizer)
    if schedule is not None:
        schedule = repeat_to(schedule, params.epochs)

    img_center = torch.cat(
        [center_da(x, 0).cuda(non_blocking=True) for x in img_orig], dim=0)
    # ft_orig = model(center_da(img_orig, 0).cuda(non_blocking=True)).detach()
    ft_orig = model(img_center).detach()

    if params.angle is not None:
        ft_orig = torch.load(
            "/checkpoint/asablayrolles/radioactive_data/imagenet_ckpt_2/features/valid_resnet18_center.pth"
        ).cuda()

    for iteration in range(params.epochs):
        if schedule is not None:
            lr = schedule[iteration]
            logger.info("New learning rate for %f" % lr)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        # Differentially augment images
        batch = []
        for x in img:
            aug_params = data_augmentation.sample_params(x)
            aug_img = data_augmentation(x, aug_params)
            batch.append(aug_img.cuda(non_blocking=True))
        batch = torch.cat(batch, dim=0)

        # Forward augmented images
        ft = model(batch)

        if params.angle is None:
            loss_ft = -torch.sum((ft - ft_orig) * direction)
            loss_ft_l2 = params.lambda_ft_l2 * torch.norm(ft - ft_orig,
                                                          dim=1).sum()
        else:
            dot_product = torch.sum((ft - ft_orig) * direction)
            print("Dot product: ", dot_product.item())
            if params.half_cone:
                loss_ft = -rho * dot_product * torch.abs(dot_product)
            else:
                loss_ft = -rho * (dot_product**2)
            loss_ft_l2 = torch.norm(ft - ft_orig)**2

        loss_norm = 0
        for i in range(len(img)):
            loss_norm += params.lambda_l2_img * torch.norm(img[i].cuda(
                non_blocking=True) - img_orig[i].cuda(non_blocking=True))**2
        loss = loss_ft + loss_norm + loss_ft_l2

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        logs = {
            "keyword": "iteration",
            "loss": loss.item(),
            "loss_ft": loss_ft.item(),
            "loss_norm": loss_norm.item(),
            "loss_ft_l2": loss_ft_l2.item(),
        }
        if params.angle is not None:
            logs["R"] = -(loss_ft + loss_ft_l2).item()
        if schedule is not None:
            logs["lr"] = schedule[iteration]
        logger.info("__log__:%s" % json.dumps(logs))

        for i in range(len(img)):
            img[i].data[0] = project_linf(img[i].data[0], img_orig[i][0],
                                          params.radius)
            if iteration % 10 == 0:
                img[i].data[0] = roundPixel(img[i].data[0])

    img_new = [numpyPixel(x.data[0]).astype(np.float32) for x in img]
    img_old = [numpyPixel(x[0]).astype(np.float32) for x in img_orig]

    img_totest = torch.cat(
        [center_da(x, 0).cuda(non_blocking=True) for x in img])
    with torch.no_grad():
        ft_new = model(img_totest)

    logger.info("__log__:%s" % json.dumps({
        "keyword":
        "final",
        "psnr":
        np.mean(
            [psnr(x_new - x_old) for x_new, x_old in zip(img_new, img_old)]),
        "ft_direction":
        torch.mv(ft_new - ft_orig, direction[0]).mean().item(),
        "ft_norm":
        torch.norm(ft_new - ft_orig, dim=1).mean().item(),
        "rho":
        rho,
        "R": (rho * torch.dot(ft_new[0] - ft_orig[0], direction[0])**2 -
              torch.norm(ft_new - ft_orig)**2).item(),
    }))

    for i in range(len(img)):
        img_name = basename(params.img_paths[i])

        extension = ".%s" % (img_name.split(".")[-1])
        np.save(
            join(params.dump_path, img_name).replace(extension, ".npy"),
            img_new[i].astype(np.uint8))
Example #13
0

imv = mean0(train_dataset.y)
if args.ensemble_models is None:
    model = Model(args, vocab, imv)
else:
    model_name = args.ensemble_models
    model = EnsembleModel(model_name, args.ensemble_method)
if args.cuda:
    model.cuda()
    model = torch.nn.DataParallel(model)
    print('Model is on GPU')
torch.save(model, model_save)
optimizable_parameters = model.parameters()
loss_fn = F.mse_loss if args.loss == 'mse' else F.l1_loss
optimizer = U.get_optimizer(args, optimizable_parameters)

lcount = 0
model.train()
for epoch in range(args.epochs):
    losses = []
    batch_idx = -1
    # pdb.set_trace()
    loader = ASAPDataLoader(train_dataset, train_dataset.maxlen, args.batch_size)
    for xs, ys, ps, padding_mask, lens, (lhs, rhs) in loader:
        batch_idx += 1
        print('Starting batch %d' % batch_idx)
        if args.pos:
            indexes = train_dataset.tags_x[lhs:rhs]
        else:
            indexes = None
 def configure_optimizers(self):
     self.optimizer = get_optimizer(self.hparams, self.models)
     scheduler = get_scheduler(self.hparams, self.optimizer)
     return [self.optimizer], [scheduler]
Example #15
0
def run(args):
    save_args(args, with_tensorboard=True)

    tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")
    tokenizer.bos_token = BOS_OUTPUT_TOKEN  # For decoding specifically

    train_dataset, eval_dataset, test_dataset = [
        SequentialJSONIterableDataset(
            os.path.join(args.datadir, f"{split}_*.clf.jsonl"),
            args=args,
            process_lines=False,
            reservoir_shuffle=shuffle,
            repeat=repeat,
            reservoir_size=args.reservoir_shuffle_size,
        )
        for (split, shuffle, repeat) in [
            ("train", True, True),
            ("valid", False, True),
            ("test", False, False),
        ]
    ]

    # Multiple inputs. Use Informativeness
    if args.input_type == "all":
        # ControlCode or generic Bart
        model = MultiInputBartForConditionalGeneration.from_pretrained(
            args.model_name_or_path
        )
        # MultiHead
        if args.use_multi_head:
            model = MultiInputMultiHeadBartForConditionalGeneration.from_pretrained_multi(
                args.model_name_or_path
            )

    elif args.use_multi_head:
        # MultiHead
        model = MultiHeadBartForConditionalGeneration.from_pretrained_multi(
            args.model_name_or_path
        )
    else:
        # ControlCode or generic Bart
        model = BartForConditionalGeneration.from_pretrained(args.model_name_or_path)

    # Set special token IDs for eval function
    model.config.decoder_start_token_id = tokenizer.bos_token_id
    model.config.pad_token_id = (
        tokenizer.pad_token_id
    )  # Might not be necessary, but idk

    if args.cuda:
        model = model.to("cuda")
    if args.distributed:
        model = utils_dist.wrap(model, args)

    optimizer = get_optimizer(args, model)
    if args.use_apex and HAS_APEX:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

    collate_fn_filled = functools.partial(
        collate_fn,
        input_type=args.input_type,
        modify_prefix=(not args.no_modify_prefix),
        target_type=args.target_type,
    )

    if args.test_only:  # run on test set
        print("=== TEST/EVAL ONLY, no training")

        named_splits = {
            "train": train_dataset,
            "valid": eval_dataset,
            "test": test_dataset,
        }

        selected_split = named_splits[args.test_split]
        eval_iter = DataLoader(
            selected_split,
            batch_size=args.eval_batch_size,
            collate_fn=collate_fn_filled,
            num_workers=1,
            worker_init_fn=worker_init_fn,
        )
        results = evaluation(args, model, 0, tokenizer, eval_iter, write_summary=False)

        print(results["rouge_scores"])

        # Save results in JSON file
        results_filename = Path(args.logdir) / f"{args.test_split}_results.json"

        with results_filename.open("w") as f:
            json.dump(results, f, indent=2, sort_keys=True)
        return

    model.train()

    global_step = 0
    grad_acc_step = 0
    loss_tensor_log = []

    train_iter = DataLoader(
        train_dataset,
        batch_size=args.per_unit_batch_size,
        collate_fn=collate_fn_filled,
        num_workers=args.num_data_workers,
        worker_init_fn=worker_init_fn,
    )

    eval_iter = DataLoader(
        eval_dataset,
        batch_size=args.eval_batch_size,
        collate_fn=collate_fn_filled,
        num_workers=args.num_data_workers,
        worker_init_fn=worker_init_fn,
    )

    for _, (_, input_texts, output_texts) in enumerate(train_iter):
        if len(input_texts) == 0:
            continue

        # Prohibit batches with no contribution summaries at all
        if sum(len(out) for out in output_texts) == 0:
            continue

        # MultiHead + Auxiliary loss (Informativeness)
        if args.target_type == "both" and args.use_multi_head:
            if args.input_type == "paper":
                ips = [i for ip in input_texts for i in ip]
            elif args.input_type == "all":
                ips = [list(ip[0]) for ip in input_texts]

            ops = [o for op in output_texts for o in op]
            tok_input, tok_output, labels = tokenize_batch(
                ips, ops, model, tokenizer, args
            )

            # Prepare inputs
            if args.input_type == "paper":
                tok_input["input_ids"] = tok_input["input_ids"].view(
                    args.per_unit_batch_size, 2, -1
                )[:, 0, :]
                tok_input["attention_mask"] = tok_input["attention_mask"].view(
                    args.per_unit_batch_size, 2, -1
                )[:, 0, :]
                additional_kwargs = {}
            elif args.input_type == "all":
                new_tok_input = {}
                new_tok_input["input_ids"] = [t["input_ids"] for t in tok_input]
                new_tok_input["attention_mask"] = [
                    t["attention_mask"] for t in tok_input
                ]
                tok_input = new_tok_input
                additional_kwargs = {
                    "final_layer": [None, None, None],
                    "input_modes": ["LogL", "MI_inbound", "MI_outbound"],
                    "informativeness": args.use_informativeness,
                }

            # b x [cont, ctx] x seq_len
            tok_output["input_ids"] = tok_output["input_ids"].view(
                args.per_unit_batch_size, 2, -1
            )
            tok_output["attention_mask"] = tok_output["attention_mask"].view(
                args.per_unit_batch_size, 2, -1
            )
            labels = labels.view(args.per_unit_batch_size, 2, -1)

            # Fixing the strange behavior of torch.distributed where some values
            # are overwritten when the sequence length is just one.
            for b in range(args.per_unit_batch_size):
                tok_output["input_ids"][b][tok_output["input_ids"][b][:, 0] == 1, 0] = 2
                tok_output["attention_mask"][b][
                    tok_output["attention_mask"][b][:, 0] == 0, 0
                ] = 1
                labels[b][labels[b][:, 0] == -100, 0] = 2

            all_labels = []
            all_dec_inputs = []
            for idx in range(2):
                all_dec_inputs.append(
                    dict(
                        input_ids=tok_output["input_ids"][:, idx, :],
                        attention_mask=tok_output["attention_mask"][:, idx, :],
                    )
                )
                all_labels.append(labels[:, idx, :])

            # Disable sync except at the beginning and the end of gradient accumulation
            if args.distributed:
                if (grad_acc_step == 0) or (
                    (grad_acc_step + 1) % args.gradient_accumulation_steps == 0
                ):
                    model.require_forward_param_sync = True
                    model.require_backward_grad_sync = True
                else:
                    model.require_forward_param_sync = False
                    model.require_backward_grad_sync = False

            outs = model(
                input_ids=tok_input["input_ids"],
                attention_mask=tok_input["attention_mask"],
                decoder_input_ids=[
                    shift_left(tok_output["input_ids"], tokenizer.bos_token_id)
                    for tok_output in all_dec_inputs
                ],
                decoder_attention_mask=[
                    tok_output["attention_mask"] for tok_output in all_dec_inputs
                ],
                lm_labels=all_labels,
                **additional_kwargs,
            )

            # MultiHead + Informativeness
            if args.input_type == "all":
                # losses for generating both contrib & context
                if args.use_informativeness:
                    # MI_outbound: informativeness
                    contrib_loss = (
                        outs["LogL"][0][0] + args.aux_scale * outs["MI_outbound"][0][0]
                    )
                    context_loss = (
                        outs["LogL"][1][0] + args.aux_scale * outs["MI_inbound"][1][0]
                    )
                else:
                    contrib_loss = (
                        outs["LogL"][0][0]
                        - args.aux_scale * outs["MI_inbound"][0][0]
                        + (
                            args.aux_scale * outs["MI_outbound"][0][0]
                            if not args.use_adaptive_scale
                            else 0
                        )
                    )
                    context_loss = (
                        outs["LogL"][1][0]
                        + (
                            args.aux_scale * outs["MI_inbound"][1][0]
                            if not args.use_adaptive_scale
                            else 0
                        )
                        - args.aux_scale * outs["MI_outbound"][1][0]
                    )
                loss = (contrib_loss + context_loss) / 2
                losses = [
                    outs["LogL"][0][0],
                    outs["MI_inbound"][0][0],
                    outs["MI_outbound"][0][0],
                    outs["LogL"][1][0],
                    outs["MI_inbound"][1][0],
                    outs["MI_outbound"][1][0],
                ]

            # multihead
            else:
                # contrib, context
                losses = [o[0] for o in outs]
                loss = sum(losses) / len(losses)

            check_nan(loss)

            # reporting logL only
            loss_tensor_log.append(
                (losses[0] if args.input_type == "all" else loss).detach()
            )

            global_step, grad_acc_step, loss_tensor_log = update_step(
                args,
                model,
                tokenizer,
                optimizer,
                loss,
                losses,
                eval_iter,
                global_step,
                grad_acc_step,
                loss_tensor_log,
            )

        else:
            # For compatibility of training loop
            if args.target_type != "both":
                input_texts, output_texts = ([input_texts], [output_texts])

            input_texts, output_texts = zip(*input_texts), zip(*output_texts)
            heads = ["contrib", "context"]
            losses = []

            # loop over the two targets
            for input_text, output_text, head in zip(input_texts, output_texts, heads):
                tok_input, tok_output, labels = tokenize_batch(
                    input_text, output_text, model, tokenizer, args
                )

                if args.distributed:
                    if (grad_acc_step == 0) or (
                        (grad_acc_step + 1) % args.gradient_accumulation_steps == 0
                    ):
                        model.require_forward_param_sync = True
                        model.require_backward_grad_sync = True
                    else:
                        model.require_forward_param_sync = False
                        model.require_backward_grad_sync = False

                # Auxiliary loss: informativeness
                if args.input_type == "all":
                    outs = model(
                        input_ids=[t["input_ids"] for t in tok_input],
                        attention_mask=[t["attention_mask"] for t in tok_input],
                        decoder_input_ids=shift_left(
                            tok_output["input_ids"], tokenizer.bos_token_id
                        ),
                        decoder_attention_mask=tok_output["attention_mask"],
                        lm_labels=labels,
                    )
                else:
                    outs = model(
                        input_ids=tok_input["input_ids"],
                        attention_mask=tok_input["attention_mask"],
                        decoder_input_ids=shift_left(
                            tok_output["input_ids"], tokenizer.bos_token_id
                        ),
                        decoder_attention_mask=tok_output["attention_mask"],
                        lm_labels=labels,
                    )

                if args.input_type == "all":
                    losses += outs[0]
                    if args.target_type == "contrib":
                        if args.use_informativeness:
                            coeff = [
                                1,
                                0,
                                args.aux_scale,
                            ]
                        else:
                            coeff = [
                                1,
                                -args.aux_scale,
                                args.aux_scale if not args.use_adaptive_scale else 0,
                            ]
                    elif args.target_type == "context":
                        if args.use_informativeness:
                            coeff = [
                                1,
                                args.aux_scale,
                                0,
                            ]
                        else:
                            coeff = [
                                1,
                                args.aux_scale if not args.use_adaptive_scale else 0,
                                -args.aux_scale,
                            ]

                    loss = sum(l * c for l, c in zip(outs[0], coeff))

                elif args.use_multi_head:
                    loss = outs[0 if head == "contrib" else 1][0]

                else:
                    loss = outs[0]

                check_nan(loss)

                loss_tensor_log.append(
                    (losses[0] if args.input_type == "all" else loss).detach()
                )

                global_step, grad_acc_step, loss_tensor_log = update_step(
                    args,
                    model,
                    tokenizer,
                    optimizer,
                    loss,
                    losses,
                    eval_iter,
                    global_step,
                    grad_acc_step,
                    loss_tensor_log,
                )