Ejemplo n.º 1
0
def main():
    dataset = MoleculeDataset('ChEMBL', 'canonical', ['train', 'val'])
    train_loader = DataLoader(dataset.train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=dataset.collate)
    val_loader = DataLoader(dataset.val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=dataset.collate)
    model = SGVAE(atom_types=dataset.atom_types,
                  bond_types=dataset.bond_types,
                  node_hidden_size=args['node_hidden_size'],
                  num_prop_rounds=args['num_propagation_rounds'],
                  dropout=args['dropout'])
    if args['num_processes'] == 1:
        from utils import Optimizer
        optimizer = Optimizer(args['lr'],
                              Adam(model.parameters(), lr=args['lr']))
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(
            args['num_processes'], args['lr'],
            Adam(model.parameters(), lr=args['lr']))

    if rank == 0:
        t2 = time.time()
    best_val_prob = 0

    # Training
    for epoch in range(args['nepochs']):
        model.train()
        if rank == 0:
            print('Training')

        for i, data in enumerate(train_loader):
            log_prob = model(actions=data, compute_log_prob=True)
            prob = log_prob.detach().exp()

            loss_averaged = -log_prob
            prob_averaged = prob
            optimizer.backward_and_step(loss_averaged)
            if rank == 0:
                train_printer.update(epoch + 1, loss_averaged.item(),
                                     prob_averaged.item())
    def train(self, model, data_loader, batch_size, n_epoch, template_flag, \
                        resume=False, optimizer=None, mode=0, teacher_forcing_ratio=0, post_flag=False):
        self.evaluator = Evaluator(
            vocab_dict=self.vocab_dict,
            vocab_list=self.vocab_list,
            decode_classes_dict=self.decode_classes_dict,
            decode_classes_list=self.decode_classes_list,
            loss=NLLLoss(),
            cuda_use=self.cuda_use)
        if resume:
            checkpoint_path = Checkpoint.get_certain_checkpoint(
                "./experiment", "best")
            resume_checkpoint = Checkpoint.load(checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            start_step = resume_checkpoint.step
            self.train_acc_list = resume_checkpoint.train_acc_list
            self.test_acc_list = resume_checkpoint.test_acc_list
            self.loss_list = resume_checkpoint.loss_list
        else:
            start_epoch = 1
            start_step = 0
            self.train_acc_list = []
            self.test_acc_list = []
            self.loss_list = []
            model_opt = NoamOpt(
                512, 1, 2000,
                torch.optim.Adam(model.parameters(),
                                 lr=0,
                                 betas=(0.9, 0.98),
                                 eps=1e-9))
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=0)
            self.optimizer = model_opt

        self._train_epoches(data_loader=data_loader,
                            model=model,
                            batch_size=batch_size,
                            start_epoch=start_epoch,
                            start_step=start_step,
                            n_epoch=n_epoch,
                            mode=mode,
                            template_flag=template_flag,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            post_flag=post_flag)
Ejemplo n.º 3
0
def get_model():
    net = Net(config)
    net = net.cuda()

    loss = Loss(config).cuda()
    post_process = PostProcess(config).cuda()

    params = net.parameters()
    opt = Optimizer(params, config)

    return config, ArgoDataset, collate_fn, net, loss, post_process, opt
Ejemplo n.º 4
0
    def __init__(self, hparams, trainable):

        self.trainable = trainable
        self.hparams = hparams
        self.image_shape = [224, 224, 3]
        self.license_number_list = hparams.license_number_list
        self.is_train = tf.placeholder_with_default(False,
                                                    shape=[],
                                                    name='is_train')

        self.layers = NeuralLayers(trainable=self.trainable,
                                   is_train=self.is_train,
                                   hparams=self.hparams)
        self.optimizer_builder = Optimizer(hparams=hparams)
        self.saver = None
        self.build_resnet50()
        if trainable:
            self.build_optimizer()
            self.build_metrics()
            self.build_summary()
Ejemplo n.º 5
0
 def Loading(self):
     self.device.set_device(self.arg.device)
     print("Loading model")
     if self.arg.model:
         model_class = import_class(self.arg.model)
         model = self.device.model_to_device(
             model_class(**self.arg.model_args))
         if self.arg.weights:
             try:
                 print("Loading pretrained model...")
                 state_dict = torch.load(self.arg.weights)
                 for w in self.arg.ignore_weights:
                     if state_dict.pop(w, None) is not None:
                         print('Sucessfully Remove Weights: {}.'.format(w))
                     else:
                         print('Can Not Remove Weights: {}.'.format(w))
                 model.load_state_dict(state_dict, strict=True)
                 optimizer = Optimizer(model, self.arg.optimizer_args)
             except RuntimeError:
                 print("Loading from checkpoint...")
                 state_dict = torch.load(self.arg.weights)
                 self.rng.set_rng_state(state_dict['rng_state'])
                 self.arg.optimizer_args[
                     'start_epoch'] = state_dict["epoch"] + 1
                 self.recoder.print_log(
                     "Resuming from checkpoint: epoch {}".format(
                         self.arg.optimizer_args['start_epoch']))
                 model = self.device.load_weights(model, self.arg.weights,
                                                  self.arg.ignore_weights)
                 optimizer = Optimizer(model, self.arg.optimizer_args)
                 optimizer.optimizer.load_state_dict(
                     state_dict["optimizer_state_dict"])
                 optimizer.scheduler.load_state_dict(
                     state_dict["scheduler_state_dict"])
         else:
             optimizer = Optimizer(model, self.arg.optimizer_args)
     else:
         raise ValueError("No Models.")
     print("Loading model finished.")
     self.load_data()
     return model, optimizer
def main(args, path_to_candidate_bonds):
    if args['train_path'] is None:
        train_set = USPTORank(
            subset='train',
            candidate_bond_path=path_to_candidate_bonds['train'],
            max_num_change_combos_per_reaction=args[
                'max_num_change_combos_per_reaction_train'],
            num_processes=args['num_processes'])
    else:
        train_set = WLNRankDataset(
            path_to_reaction_file=args['train_path'],
            candidate_bond_path=path_to_candidate_bonds['train'],
            mode='train',
            max_num_change_combos_per_reaction=args[
                'max_num_change_combos_per_reaction_train'],
            num_processes=args['num_processes'])
    train_set.ignore_large()
    if args['val_path'] is None:
        val_set = USPTORank(subset='val',
                            candidate_bond_path=path_to_candidate_bonds['val'],
                            max_num_change_combos_per_reaction=args[
                                'max_num_change_combos_per_reaction_eval'],
                            num_processes=args['num_processes'])
    else:
        val_set = WLNRankDataset(
            path_to_reaction_file=args['val_path'],
            candidate_bond_path=path_to_candidate_bonds['val'],
            mode='val',
            max_num_change_combos_per_reaction=args[
                'max_num_change_combos_per_reaction_eval'],
            num_processes=args['num_processes'])

    if args['num_workers'] > 1:
        torch.multiprocessing.set_sharing_strategy('file_system')

    train_loader = DataLoader(train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_rank_train,
                              shuffle=True,
                              num_workers=args['num_workers'])
    val_loader = DataLoader(val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_rank_eval,
                            shuffle=False,
                            num_workers=args['num_workers'])

    model = WLNReactionRanking(
        node_in_feats=args['node_in_feats'],
        edge_in_feats=args['edge_in_feats'],
        node_hidden_feats=args['hidden_size'],
        num_encode_gnn_layers=args['num_encode_gnn_layers']).to(args['device'])
    criterion = CrossEntropyLoss(reduction='sum')
    optimizer = Adam(model.parameters(), lr=args['lr'])
    from utils import Optimizer
    optimizer = Optimizer(model,
                          args['lr'],
                          optimizer,
                          max_grad_norm=args['max_norm'])

    acc_sum = 0
    grad_norm_sum = 0
    dur = []
    total_samples = 0
    for epoch in range(args['num_epochs']):
        t0 = time.time()
        model.train()
        for batch_id, batch_data in enumerate(train_loader):
            batch_reactant_graphs, batch_product_graphs, \
            batch_combo_scores, batch_labels, batch_num_candidate_products = batch_data

            batch_reactant_graphs = batch_reactant_graphs.to(args['device'])
            batch_product_graphs = batch_product_graphs.to(args['device'])
            batch_combo_scores = batch_combo_scores.to(args['device'])
            batch_labels = batch_labels.to(args['device'])
            reactant_node_feats = batch_reactant_graphs.ndata.pop('hv').to(
                args['device'])
            reactant_edge_feats = batch_reactant_graphs.edata.pop('he').to(
                args['device'])
            product_node_feats = batch_product_graphs.ndata.pop('hv').to(
                args['device'])
            product_edge_feats = batch_product_graphs.edata.pop('he').to(
                args['device'])

            pred = model(
                reactant_graph=batch_reactant_graphs,
                reactant_node_feats=reactant_node_feats,
                reactant_edge_feats=reactant_edge_feats,
                product_graphs=batch_product_graphs,
                product_node_feats=product_node_feats,
                product_edge_feats=product_edge_feats,
                candidate_scores=batch_combo_scores,
                batch_num_candidate_products=batch_num_candidate_products)

            # Check if the ground truth candidate has the highest score
            batch_loss = 0
            product_graph_start = 0
            for i in range(len(batch_num_candidate_products)):
                product_graph_end = product_graph_start + batch_num_candidate_products[
                    i]
                reaction_pred = pred[product_graph_start:product_graph_end, :]
                acc_sum += float(
                    reaction_pred.max(
                        dim=0)[1].detach().cpu().data.item() == 0)
                batch_loss += criterion(reaction_pred.reshape(1, -1),
                                        batch_labels[i, :])
                product_graph_start = product_graph_end

            grad_norm_sum += optimizer.backward_and_step(batch_loss)
            total_samples += args['batch_size']
            if total_samples % args['print_every'] == 0:
                progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | time {:.4f} | ' \
                           'accuracy {:.4f} | grad norm {:.4f}'.format(
                    epoch + 1, args['num_epochs'],
                    (batch_id + 1) * args['batch_size'] // args['print_every'],
                    len(train_set) // args['print_every'],
                    (sum(dur) + time.time() - t0) / total_samples * args['print_every'],
                    acc_sum / args['print_every'],
                    grad_norm_sum / args['print_every'])
                print(progress)
                acc_sum = 0
                grad_norm_sum = 0

            if total_samples % args['decay_every'] == 0:
                dur.append(time.time() - t0)
                old_lr = optimizer.lr
                optimizer.decay_lr(args['lr_decay_factor'])
                new_lr = optimizer.lr
                print('Learning rate decayed from {:.4f} to {:.4f}'.format(
                    old_lr, new_lr))
                torch.save({'model_state_dict': model.state_dict()},
                           args['result_path'] +
                           '/model_{:d}.pkl'.format(total_samples))
                prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d})\n'.format(
                    total_samples, epoch + 1, args['num_epochs'],
                    (batch_id + 1) * args['batch_size'] // args['print_every'],
                    len(train_set) //
                    args['print_every']) + candidate_ranking_eval(
                        args, model, val_loader)
                print(prediction_summary)
                with open(args['result_path'] + '/val_eval.txt', 'a') as f:
                    f.write(prediction_summary)
                t0 = time.time()
                model.train()
Ejemplo n.º 7
0
def main(args):

    ts = datetime.datetime.now().timestamp()

    logger = SummaryWriter(
        os.path.join('exp/qgen_rl/', '{}_{}'.format(args.exp_name, ts)))
    logger.add_text('exp_name', args.exp_name)
    logger.add_text('args', str(args))

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    vocab = Vocab(os.path.join(args.data_dir, 'vocab.csv'), 3)
    category_vocab = CategoryVocab(
        os.path.join(args.data_dir, 'categories.csv'))

    data_loader = OrderedDict()
    splits = ['train', 'valid'] + (['test'] if args.test_set else list())
    for split in splits:
        file = os.path.join(args.data_dir, 'guesswhat.' + split + '.jsonl.gz')
        data_loader[split] = DataLoader(
            dataset=InferenceDataset(split,
                                     file,
                                     vocab,
                                     category_vocab,
                                     new_object=split == 'train',
                                     load_vgg_features=True),
            batch_size=args.batch_size,
            collate_fn=InferenceDataset.get_collate_fn(device),
            shuffle=split == 'train')

    if not args.belief_state:
        qgen = QGen.load(device, file=args.qgen_file)
    else:
        qgen = QGenBelief.load(device, file=args.qgen_file)
    guesser = Guesser.load(device, file=args.guesser_file)
    oracle = Oracle.load(device, file=args.oracle_file)

    generation_wrapper = GenerationWrapper(qgen, guesser, oracle)

    baseline = MLP(
        sizes=[qgen.hidden_size, args.baseline_hidden_size, 1],
        activation='relu', final_activation='relu', bias=[True, False])\
        .to(device)

    baseline_loss_fn = torch.nn.MSELoss(reduction='sum')
    baseline_optimizer = Optimizer(torch.optim.SGD,
                                   baseline.parameters(),
                                   lr=args.baseline_lr)
    qgen_optimizer = Optimizer(torch.optim.SGD,
                               qgen.parameters(),
                               lr=args.qgen_lr)

    split2strat = {
        'train': args.train_strategy,
        'valid': args.eval_strategy,
        'test': args.eval_strategy
    }

    best_val_acc = 0
    for epoch in range(args.epochs):

        for split in splits:

            if split == 'train':
                qgen.train()
                baseline.train()
                torch.enable_grad()
            else:
                qgen.eval()
                baseline.eval()
                torch.no_grad()

            total_acc = list()
            for iteration, sample in enumerate(data_loader[split]):

                return_dict = generation_wrapper.generate(
                    sample,
                    vocab,
                    split2strat[split],
                    args.max_num_questions,
                    device,
                    args.belief_state,
                    return_keys=[
                        'mask', 'object_logits', 'hidden_states', 'log_probs',
                        'generations'
                    ])

                mask = return_dict['mask']
                object_logits = return_dict['object_logits']
                hidden_states = return_dict['hidden_states']
                log_probs = return_dict['log_probs']

                acc = accuarcy(object_logits, sample['target_id'])
                total_acc += [acc]

                mask = mask.float()

                rewards = torch.eq(
                    object_logits.topk(1)[1].view(-1),
                    sample['target_id'].view(-1)).float()
                rewards = rewards.unsqueeze(1).repeat(1, mask.size(1))
                rewards *= mask

                print("dialogue", return_dict['dialogue'][0],
                      return_dict['dialogue'].size())
                #print("log_probs", log_probs, log_probs.size())
                #print("mask", mask, mask.size())
                #print("rewards", rewards, rewards.size())

                baseline_preds = baseline(hidden_states.detach_()).squeeze(2)
                baseline_preds *= mask
                baseline_loss = baseline_loss_fn(
                    baseline_preds.view(-1), rewards.view(-1)) \
                    / baseline_preds.size(0)

                log_probs *= mask
                baseline_preds = baseline_preds.detach()
                policy_gradient_loss = torch.sum(log_probs *
                                                 (rewards - baseline_preds),
                                                 dim=1)
                print(policy_gradient_loss)
                policy_gradient_loss = -torch.mean(policy_gradient_loss)
                print()
                raise
                # policy_gradient_loss = - torch.sum(log_probs) / torch.sum(mask)
                #print(policy_gradient_loss_old.item(), policy_gradient_loss.item())

                if split == 'train':
                    qgen_optimizer.optimize(policy_gradient_loss,
                                            clip_norm_args=[args.clip_value])
                    baseline_optimizer.optimize(
                        baseline_loss, clip_norm_args=[args.clip_value])

                logger.add_scalar('{}_accuracy'.format(split), acc,
                                  iteration + len(data_loader[split]) * epoch)

                logger.add_scalar('{}_reward'.format(split),
                                  torch.mean(rewards).item(),
                                  iteration + len(data_loader[split]) * epoch)

                logger.add_scalar('{}_bl_loss'.format(split),
                                  baseline_loss.item(),
                                  iteration + len(data_loader[split]) * epoch)

                logger.add_scalar('{}_pg_loss'.format(split),
                                  policy_gradient_loss.item(),
                                  iteration + len(data_loader[split]) * epoch)

            model_saved = False
            if split == 'valid':
                if np.mean(total_acc) > best_val_acc:
                    best_val_acc = np.mean(total_acc)
                    qgen.save(file='bin/qgen_rl_{}_{}.pt'.format(
                        args.exp_name, ts),
                              accuarcy=np.mean(total_acc))
                    model_saved = True

            logger.add_scalar('epoch_{}_accuracy'.format(split),
                              np.mean(total_acc), epoch)

            print("Epoch {:3d}: {} Accuracy {:5.3f} {}".format(
                epoch, split.upper(),
                np.mean(total_acc) * 100, '*' if model_saved else ''))
        print("-" * 50)
Ejemplo n.º 8
0
def main(rank, args):
    """
    Parameters
    ----------
    rank : int
        Subprocess id
    args : dict
        Configuration
    """
    if rank == 0:
        t1 = time.time()

    set_random_seed(args['seed'])
    # Remove the line below will result in problems for multiprocess
    torch.set_num_threads(1)

    # Setup dataset and data loader
    dataset = MoleculeDataset(args['dataset'],
                              args['order'], ['train', 'val'],
                              subset_id=rank,
                              n_subsets=args['num_processes'])

    # Note that currently the batch size for the loaders should only be 1.
    train_loader = DataLoader(dataset.train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=dataset.collate)
    val_loader = DataLoader(dataset.val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=dataset.collate)

    if rank == 0:
        try:
            from tensorboardX import SummaryWriter
            writer = SummaryWriter(args['log_dir'])
        except ImportError:
            print(
                'If you want to use tensorboard, install tensorboardX with pip.'
            )
            writer = None
        train_printer = Printer(args['nepochs'], len(dataset.train_set),
                                args['batch_size'], writer)
        val_printer = Printer(args['nepochs'], len(dataset.val_set),
                              args['batch_size'])
    else:
        val_printer = None

    # Initialize model
    model = DGMG(atom_types=dataset.atom_types,
                 bond_types=dataset.bond_types,
                 node_hidden_size=args['node_hidden_size'],
                 num_prop_rounds=args['num_propagation_rounds'],
                 dropout=args['dropout'])

    if args['num_processes'] == 1:
        from utils import Optimizer
        optimizer = Optimizer(args['lr'],
                              Adam(model.parameters(), lr=args['lr']))
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(
            args['num_processes'], args['lr'],
            Adam(model.parameters(), lr=args['lr']))

    if rank == 0:
        t2 = time.time()
    best_val_prob = 0

    # Training
    for epoch in range(args['nepochs']):
        model.train()
        if rank == 0:
            print('Training')

        for i, data in enumerate(train_loader):
            log_prob = model(actions=data, compute_log_prob=True)
            prob = log_prob.detach().exp()

            loss_averaged = -log_prob
            prob_averaged = prob
            optimizer.backward_and_step(loss_averaged)
            if rank == 0:
                train_printer.update(epoch + 1, loss_averaged.item(),
                                     prob_averaged.item())

        synchronize(args['num_processes'])

        # Validation
        val_log_prob = evaluate(epoch, model, val_loader, val_printer)
        if args['num_processes'] > 1:
            dist.all_reduce(val_log_prob, op=dist.ReduceOp.SUM)
        val_log_prob /= args['num_processes']
        # Strictly speaking, the computation of probability here is different from what is
        # performed on the training set as we first take an average of log likelihood and then
        # take the exponentiation. By Jensen's inequality, the resulting value is then a
        # lower bound of the real probabilities.
        val_prob = (-val_log_prob).exp().item()
        val_log_prob = val_log_prob.item()
        if val_prob >= best_val_prob:
            if rank == 0:
                torch.save({'model_state_dict': model.state_dict()},
                           args['checkpoint_dir'])
                print(
                    'Old val prob {:.10f} | new val prob {:.10f} | model saved'
                    .format(best_val_prob, val_prob))
            best_val_prob = val_prob
        elif epoch >= args['warmup_epochs']:
            optimizer.decay_lr()

        if rank == 0:
            print('Validation')
            if writer is not None:
                writer.add_scalar('validation_log_prob', val_log_prob, epoch)
                writer.add_scalar('validation_prob', val_prob, epoch)
                writer.add_scalar('lr', optimizer.lr, epoch)
            print('Validation log prob {:.4f} | prob {:.10f}'.format(
                val_log_prob, val_prob))

        synchronize(args['num_processes'])

    if rank == 0:
        t3 = time.time()
        print('It took {} to setup.'.format(datetime.timedelta(seconds=t2 -
                                                               t1)))
        print('It took {} to finish training.'.format(
            datetime.timedelta(seconds=t3 - t2)))
        print(
            '--------------------------------------------------------------------------'
        )
        print('On average, an epoch takes {}.'.format(
            datetime.timedelta(seconds=(t3 - t2) / args['nepochs'])))
Ejemplo n.º 9
0
def main(rank, dev_id, args):
    set_seed()
    # Remove the line below will result in problems for multiprocess
    if args['num_devices'] > 1:
        torch.set_num_threads(1)
    if dev_id == -1:
        args['device'] = torch.device('cpu')
    else:
        args['device'] = torch.device('cuda:{}'.format(dev_id))
        # Set current device
        torch.cuda.set_device(args['device'])

    train_set, val_set = load_dataset(args)
    get_center_subset(train_set, rank, args['num_devices'])
    train_loader = DataLoader(train_set, batch_size=args['batch_size'],
                              collate_fn=collate_center, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=args['batch_size'],
                            collate_fn=collate_center, shuffle=False)

    model = WLNReactionCenter(node_in_feats=args['node_in_feats'],
                              edge_in_feats=args['edge_in_feats'],
                              node_pair_in_feats=args['node_pair_in_feats'],
                              node_out_feats=args['node_out_feats'],
                              n_layers=args['n_layers'],
                              n_tasks=args['n_tasks']).to(args['device'])
    model.train()
    if rank == 0:
        print('# trainable parameters in the model: ', count_parameters(model))

    criterion = BCEWithLogitsLoss(reduction='sum')
    optimizer = Adam(model.parameters(), lr=args['lr'])
    if args['num_devices'] <= 1:
        from utils import Optimizer
        optimizer = Optimizer(model, args['lr'], optimizer, max_grad_norm=args['max_norm'])
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(args['num_devices'], model, args['lr'],
                                          optimizer, max_grad_norm=args['max_norm'])

    total_iter = 0
    rank_iter = 0
    grad_norm_sum = 0
    loss_sum = 0
    dur = []

    for epoch in range(args['num_epochs']):
        t0 = time.time()
        for batch_id, batch_data in enumerate(train_loader):
            total_iter += args['num_devices']
            rank_iter += 1

            batch_reactions, batch_graph_edits, batch_mol_graphs, \
            batch_complete_graphs, batch_atom_pair_labels = batch_data
            labels = batch_atom_pair_labels.to(args['device'])
            pred, biased_pred = reaction_center_prediction(
                args['device'], model, batch_mol_graphs, batch_complete_graphs)
            loss = criterion(pred, labels) / len(batch_reactions)
            loss_sum += loss.cpu().detach().data.item()
            grad_norm_sum += optimizer.backward_and_step(loss)

            if rank_iter % args['print_every'] == 0 and rank == 0:
                progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | ' \
                           'loss {:.4f} | grad norm {:.4f}'.format(
                    epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader),
                    loss_sum / args['print_every'], grad_norm_sum / args['print_every'])
                print(progress)
                grad_norm_sum = 0
                loss_sum = 0

            if total_iter % args['decay_every'] == 0:
                optimizer.decay_lr(args['lr_decay_factor'])
            if total_iter % args['decay_every'] == 0 and rank == 0:
                if epoch >= 1:
                    dur.append(time.time() - t0)
                    print('Training time per {:d} iterations: {:.4f}'.format(
                        rank_iter, np.mean(dur)))
                total_samples = total_iter * args['batch_size']
                prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d}) '.format(
                    total_samples, epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader)) + \
                      reaction_center_final_eval(args, args['top_ks_val'], model, val_loader, easy=True)
                print(prediction_summary)
                with open(args['result_path'] + '/val_eval.txt', 'a') as f:
                    f.write(prediction_summary)
                torch.save({'model_state_dict': model.state_dict()},
                           args['result_path'] + '/model_{:d}.pkl'.format(total_samples))
                t0 = time.time()
                model.train()
        synchronize(args['num_devices'])
Ejemplo n.º 10
0
class Recognizer:
    def __init__(self, hparams, trainable):

        self.trainable = trainable
        self.hparams = hparams
        self.image_shape = [224, 224, 3]
        self.license_number_list = hparams.license_number_list
        self.is_train = tf.placeholder_with_default(False,
                                                    shape=[],
                                                    name='is_train')

        self.layers = NeuralLayers(trainable=self.trainable,
                                   is_train=self.is_train,
                                   hparams=self.hparams)
        self.optimizer_builder = Optimizer(hparams=hparams)
        self.saver = None
        self.build_resnet50()
        if trainable:
            self.build_optimizer()
            self.build_metrics()
            self.build_summary()

    def build_resnet50(self):
        hparams = self.hparams

        images = tf.placeholder(dtype=tf.float32,
                                shape=[None] + self.image_shape)

        conv1_feats = self.layers.conv2d(images,
                                         filters=64,
                                         kernel_size=(7, 7),
                                         strides=(2, 2),
                                         activation=None,
                                         name='conv1')
        conv1_feats = self.layers.batch_norm(conv1_feats, 'bn_conv1')
        conv1_feats = tf.nn.relu(conv1_feats)
        pool1_feats = self.layers.max_pool2d(conv1_feats,
                                             pool_size=(3, 3),
                                             strides=(2, 2),
                                             name='pool1')

        res2a_feats = self.identity_block_with_output_reduced(
            pool1_feats, 'res2a', 'bn2a', 64, (1, 1))
        res2b_feats = self.identity_block(res2a_feats, 'res2b', 'bn2b', 64)
        res2c_feats = self.identity_block(res2b_feats, 'res2c', 'bn2c', 64)

        res3a_feats = self.identity_block_with_output_reduced(
            res2c_feats, 'res3a', 'bn3a', 128)
        res3b_feats = self.identity_block(res3a_feats, 'res3b', 'bn3b', 128)
        res3c_feats = self.identity_block(res3b_feats, 'res3c', 'bn3c', 128)
        res3d_feats = self.identity_block(res3c_feats, 'res3d', 'bn3d', 128)

        res4a_feats = self.identity_block_with_output_reduced(
            res3d_feats, 'res4a', 'bn4a', 256)
        res4b_feats = self.identity_block(res4a_feats, 'res4b', 'bn4b', 256)
        res4c_feats = self.identity_block(res4b_feats, 'res4c', 'bn4c', 256)
        res4d_feats = self.identity_block(res4c_feats, 'res4d', 'bn4d', 256)
        res4e_feats = self.identity_block(res4d_feats, 'res4e', 'bn4e', 256)
        res4f_feats = self.identity_block(res4e_feats, 'res4f', 'bn4f', 256)

        res5a_feats = self.identity_block_with_output_reduced(
            res4f_feats, 'res5a', 'bn5a', 512)
        res5b_feats = self.identity_block(res5a_feats, 'res5b', 'bn5b', 512)
        res5c_feats = self.identity_block(res5b_feats, 'res5c', 'bn5c', 512)

        global_avg_pool = self.layers.global_avg_pool2d(res5c_feats,
                                                        keepdims=False,
                                                        name='global_avg_pool')
        global_avg_pool = self.layers.dropout(global_avg_pool,
                                              name='global_avg_pool_dropout')

        logits = []
        probabilities = []
        predictions = []
        for i, num_list in enumerate(self.license_number_list):
            logit = self.layers.dense(global_avg_pool,
                                      units=len(num_list),
                                      activation=None,
                                      name='num_{}'.format(i))
            probability = tf.nn.softmax(logit)
            prediction = tf.argmax(probability, axis=1)

            logits.append(logit)
            probabilities.append(probability)
            predictions.append(prediction)

        self.images = images
        self.logits = logits
        self.probabilities = probabilities
        self.predictions = predictions

    def identity_block_with_output_reduced(self,
                                           inputs,
                                           name1,
                                           name2,
                                           filters,
                                           strides=(2, 2)):
        """ A basic block of ResNet. """
        branch1_feats = self.layers.conv2d(inputs,
                                           filters=4 * filters,
                                           kernel_size=(1, 1),
                                           strides=strides,
                                           activation=None,
                                           use_bias=False,
                                           name=name1 + '_branch1')
        branch1_feats = self.layers.batch_norm(branch1_feats,
                                               name2 + '_branch1')

        branch2a_feats = self.layers.conv2d(inputs,
                                            filters=filters,
                                            kernel_size=(1, 1),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2a')
        branch2a_feats = self.layers.batch_norm(branch2a_feats,
                                                name2 + '_branch2a')
        branch2a_feats = tf.nn.relu(branch2a_feats)

        branch2b_feats = self.layers.conv2d(branch2a_feats,
                                            filters=filters,
                                            kernel_size=(3, 3),
                                            strides=strides,
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2b')
        branch2b_feats = self.layers.batch_norm(branch2b_feats,
                                                name2 + '_branch2b')
        branch2b_feats = tf.nn.relu(branch2b_feats)

        branch2c_feats = self.layers.conv2d(branch2b_feats,
                                            filters=4 * filters,
                                            kernel_size=(1, 1),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2c')
        branch2c_feats = self.layers.batch_norm(branch2c_feats,
                                                name2 + '_branch2c')

        outputs = branch1_feats + branch2c_feats
        outputs = tf.nn.relu(outputs)
        return outputs

    def identity_block(self, inputs, name1, name2, filters):
        """ Another basic block of ResNet. """
        branch2a_feats = self.layers.conv2d(inputs,
                                            filters=filters,
                                            kernel_size=(1, 1),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2a')
        branch2a_feats = self.layers.batch_norm(branch2a_feats,
                                                name2 + '_branch2a')
        branch2a_feats = tf.nn.relu(branch2a_feats)

        branch2b_feats = self.layers.conv2d(branch2a_feats,
                                            filters=filters,
                                            kernel_size=(3, 3),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2b')
        branch2b_feats = self.layers.batch_norm(branch2b_feats,
                                                name2 + '_branch2b')
        branch2b_feats = tf.nn.relu(branch2b_feats)

        branch2c_feats = self.layers.conv2d(branch2b_feats,
                                            filters=4 * filters,
                                            kernel_size=(1, 1),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2c')
        branch2c_feats = self.layers.batch_norm(branch2c_feats,
                                                name2 + '_branch2c')

        outputs = inputs + branch2c_feats
        outputs = tf.nn.relu(outputs)
        return outputs

    def identity_block_without_bottleneck(self, inputs, name1, name2, filters):
        """ Another basic block of ResNet. """
        branch2a_feats = self.layers.conv2d(inputs,
                                            filters=filters,
                                            kernel_size=(3, 3),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2a')
        branch2a_feats = self.layers.batch_norm(branch2a_feats,
                                                name2 + '_branch2a')
        branch2a_feats = tf.nn.relu(branch2a_feats)

        branch2b_feats = self.layers.conv2d(branch2a_feats,
                                            filters=filters,
                                            kernel_size=(3, 3),
                                            strides=(1, 1),
                                            activation=None,
                                            use_bias=False,
                                            name=name1 + '_branch2b')
        branch2b_feats = self.layers.batch_norm(branch2b_feats,
                                                name2 + '_branch2b')

        outputs = inputs + branch2b_feats
        outputs = tf.nn.relu(outputs)
        return outputs

    def se_block(self, inputs, filters, name, ratio=16):
        avgpool = self.layers.global_avg_pool2d(inputs=inputs,
                                                keepdims=False,
                                                name=name + '_avgpool')
        dense1 = self.layers.dense(inputs=avgpool,
                                   units=filters / ratio,
                                   activation=tf.nn.relu,
                                   name=name + '_dense')
        weighted = self.layers.dense(inputs=dense1,
                                     units=filters,
                                     activation=tf.nn.sigmoid,
                                     name=name + '_weighted')
        weighted = tf.reshape(weighted, (-1, 1, 1, filters))
        outputs = tf.multiply(inputs, weighted)
        return outputs

    def build_optimizer(self):
        hparams = self.hparams

        global_step = tf.train.get_or_create_global_step()

        labels = tf.placeholder(dtype=tf.int64,
                                shape=[None,
                                       len(self.license_number_list)])

        num_losses = []
        min_len = np.min([len(n) for n in self.license_number_list])
        losses = []
        for i, num_list in enumerate(self.license_number_list):
            loss = tf.losses.sparse_softmax_cross_entropy(
                labels=labels[:, i], logits=self.logits[i])
            num_losses.append(loss)
            weight = len(num_list) / min_len
            loss = weight * loss
            losses.append(loss)

        cross_entropy_loss = tf.add_n(losses)

        regularization_loss = tf.losses.get_regularization_loss()

        total_loss = cross_entropy_loss + regularization_loss

        learning_rate = self.optimizer_builder.compute_learning_rate(
            global_step)

        optimizer = self.optimizer_builder.build(name=hparams.optimizer,
                                                 learning_rate=learning_rate)

        gradients, variables = zip(*optimizer.compute_gradients(total_loss, ))
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              hparams.clip_gradients)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        train_op = optimizer.apply_gradients(zip(gradients, variables),
                                             global_step=global_step)
        train_op = tf.group([train_op, update_ops])

        self.global_step = global_step
        self.labels = labels
        self.num_losses = num_losses
        self.cross_entropy_loss = cross_entropy_loss
        self.regularization_loss = regularization_loss
        self.total_loss = total_loss
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.train_op = train_op

    def build_metrics(self):

        avg_cross_entropy_loss, avg_cross_entropy_loss_op = tf.metrics.mean_tensor(
            self.cross_entropy_loss)
        avg_reg_loss, avg_reg_loss_op = tf.metrics.mean_tensor(
            self.regularization_loss)
        avg_total_loss, avg_total_loss_op = tf.metrics.mean_tensor(
            self.total_loss)

        predictions = tf.stack(self.predictions, axis=1)
        partial_accuracy, partial_accuracy_op = tf.metrics.accuracy(
            labels=self.labels, predictions=predictions)
        matches = tf.reduce_all(tf.equal(self.labels, predictions), axis=1)
        accuracy, accuracy_op = tf.metrics.accuracy(
            labels=tf.ones_like(matches), predictions=matches)

        self.metrics = {
            'cross_entropy_loss': avg_cross_entropy_loss,
            'regularization_loss': avg_reg_loss,
            'total_loss': avg_total_loss,
            'partial_accuracy': partial_accuracy,
            'accuracy': accuracy
        }
        self.metric_ops = {
            'cross_entropy_loss': avg_cross_entropy_loss_op,
            'regularization_loss': avg_reg_loss_op,
            'total_loss': avg_total_loss_op,
            'partial_accuracy': partial_accuracy_op,
            'accuracy': accuracy_op
        }

        for i, num_list in enumerate(self.license_number_list):
            loss, loss_op = tf.metrics.mean_tensor(self.num_losses[i])
            accuracy, accuracy_op = tf.metrics.accuracy(
                labels=self.labels[:, i], predictions=self.predictions[i])
            self.metrics.update({
                'num{}_loss'.format(i): loss,
                'num{}_accuracy'.format(i): accuracy
            })
            self.metric_ops.update({
                'num{}_loss'.format(i): loss_op,
                'num{}_accuracy'.format(i): accuracy_op
            })

        self.metric_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES)
        self.reset_metric_op = tf.variables_initializer(self.metric_vars)

    def build_summary(self):

        with tf.name_scope('metric'):
            for metric_name, metric_tensor in self.metrics.items():
                tf.summary.scalar(metric_name, metric_tensor)

        with tf.name_scope('hyperparam'):
            tf.summary.scalar('learning_rate', self.learning_rate)

        self.summary = tf.summary.merge_all()

    def cache_metric_values(self, sess):
        metric_values = sess.run(self.metric_vars)
        self.metric_values = metric_values

    def restore_metric_values(self, sess):
        for var, value in zip(self.metric_vars, self.metric_values):
            sess.run(var.assign(value))

    def encode_labels(self, labels):
        encoded_labels = []
        for label in labels:
            mapped_label = []
            for i, num in enumerate(label):
                assert len(label) == len(self.license_number_list)
                idx = self.license_number_list[i].index(num)
                mapped_label.append(idx)
            encoded_labels.append(mapped_label)
        encoded_labels = np.array(encoded_labels)

        return encoded_labels

    def decode_predictions(self, predictions):
        predictions = np.column_stack(predictions)
        decoded_predictions = []
        for prediction in predictions:
            decoded_prediction = []
            for i, num_idx in enumerate(prediction):
                decoded_prediction.append(self.license_number_list[i][num_idx])
            decoded_prediction = ''.join(decoded_prediction)
            decoded_predictions.append(decoded_prediction)

        return decoded_predictions

    def train(self,
              sess,
              train_dataset,
              val_dataset,
              test_dataset=None,
              load_checkpoint=False,
              checkpoint=None):
        hparams = self.hparams

        if not os.path.exists(hparams.summary_dir):
            os.mkdir(hparams.summary_dir)
        train_writer = tf.summary.FileWriter(hparams.summary_dir + '/train',
                                             sess.graph)
        val_writer = tf.summary.FileWriter(hparams.summary_dir + '/val')
        if test_dataset is not None:
            test_writer = tf.summary.FileWriter(hparams.summary_dir + '/test')

        train_fetches = {
            'train_op': self.train_op,
            'global_step': self.global_step
        }
        train_fetches.update(self.metric_ops)
        val_fetches = self.metric_ops

        if test_dataset is not None:
            test_fetches = self.metric_ops

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        if load_checkpoint:
            self.load(sess, checkpoint)

        # Training
        for _ in tqdm(range(self.hparams.num_epochs), desc='epoch'):
            for _ in tqdm(range(train_dataset.num_batches),
                          desc='batch',
                          leave=False):
                images, labels = train_dataset.next_batch()
                labels = self.encode_labels(labels)

                feed_dict = {
                    self.images: images,
                    self.labels: labels,
                    self.is_train: True
                }

                train_record = sess.run(train_fetches, feed_dict=feed_dict)

                tqdm.write(
                    "Train step {}: total loss: {:>10.5f}   partial accuracy: {:8.2f}   accuracy: {:8.2f}"
                    .format(train_record['global_step'],
                            train_record['total_loss'],
                            train_record['partial_accuracy'] * 100,
                            train_record['accuracy'] * 100))
                if train_record['global_step'] % hparams.summary_period == 0:
                    summary = sess.run(self.summary)
                    train_writer.add_summary(summary,
                                             train_record['global_step'])

                # Validation
                if (train_record['global_step'] +
                        1) % hparams.eval_period == 0:
                    self.cache_metric_values(sess)
                    sess.run(self.reset_metric_op)
                    for _ in tqdm(range(val_dataset.num_batches),
                                  desc='val',
                                  leave=False):
                        images, labels = val_dataset.next_batch()
                        labels = self.encode_labels(labels)

                        feed_dict = {self.images: images, self.labels: labels}

                        val_record = sess.run(val_fetches, feed_dict=feed_dict)

                    tqdm.write(
                        "Validation step {}: total loss: {:>10.5f}   partial accuracy: {:8.2f}   accuracy: {:8.2f}"
                        .format(train_record['global_step'],
                                val_record['total_loss'],
                                val_record['partial_accuracy'] * 100,
                                val_record['accuracy'] * 100))
                    summary = sess.run(self.summary)
                    val_writer.add_summary(summary,
                                           train_record['global_step'])
                    val_writer.flush()
                    val_dataset.reset()

                    self.restore_metric_values(sess)

            sess.run(self.reset_metric_op)

            self.save(sess, global_step=train_record['global_step'])

            train_dataset.reset()

        train_writer.close()
        val_writer.close()

        # Testing
        if test_dataset is not None:
            sess.run(self.reset_metric_op)
            for _ in tqdm(range(test_dataset.num_batches),
                          desc='testing',
                          leave=False):
                images, labels = val_dataset.next_batch()
                labels = self.encode_labels(labels)

                feed_dict = {self.images: images, self.labels: labels}

                test_record = sess.run(test_fetches, feed_dict=feed_dict)

            tqdm.write(
                "Testing: total loss: {:>10.5f}   partial accuracy: {:8.2f}   accuracy: {:8.2f}"
                .format(test_record['total_loss'],
                        test_record['partial_accuracy'] * 100,
                        test_record['accuracy'] * 100))
            summary = sess.run(self.summary)
            test_writer.add_summary(summary, train_record['global_step'])
            test_writer.flush()
            test_writer.close()

    def eval(self, sess, test_dataset, checkpoint=None):
        hparams = self.hparams

        result = {'image': [], 'ground truth': [], 'prediction': []}

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        self.load(sess, checkpoint)

        # Testing
        for _ in tqdm(range(test_dataset.num_batches),
                      desc='batch',
                      leave=False):
            images, labels = test_dataset.next_batch()
            encoded_labels = self.encode_labels(labels)

            predictions, _ = sess.run([self.predictions, self.metric_ops],
                                      feed_dict={
                                          self.images: images,
                                          self.labels: encoded_labels
                                      })

            predictions = self.decode_predictions(predictions)

            for image, file, label, prediction in zip(
                    images, test_dataset.current_image_files, labels,
                    predictions):
                result['image'].append(file)
                result['ground truth'].append(label)
                result['prediction'].append(prediction)

                plt.imshow(image)
                plt.title(prediction)
                plt.savefig('{}/{}'.format(hparams.test_result_dir, file))
                plt.close()

        result = pd.DataFrame.from_dict(result)
        result.to_csv('result.txt')

        eval_result = sess.run(self.metrics)
        with open('eval.txt', 'w') as f:
            for name, value in eval_result.items():
                print('{}: {}'.format(name, value))
                print('{}: {}'.format(name, value), file=f, end='\n')

    def test(self, sess, test_dataset, checkpoint=None):
        hparams = self.hparams

        result = {'image': [], 'prediction': []}

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        self.load(sess, checkpoint)

        # Testing
        for _ in tqdm(range(test_dataset.num_batches),
                      desc='batch',
                      leave=False):
            images = test_dataset.next_batch()

            predictions = sess.run(self.predictions,
                                   feed_dict={self.images: images})

            predictions = self.decode_predictions(predictions)

            for image, file, prediction in zip(
                    images, test_dataset.current_image_files, predictions):
                result['image'].append(file)
                result['prediction'].append(prediction)

                plt.imshow(image)
                plt.title(prediction)
                plt.savefig('{}/{}'.format(hparams.test_result_dir, file))
                plt.close()

        result = pd.DataFrame.from_dict(result)
        result.to_csv('result.txt')

    def save(self, sess, save_dir=None, global_step=None):
        if self.saver is None:
            self.saver = tf.train.Saver()
        save_dir = save_dir or self.hparams.save_dir
        global_step = global_step or self.global_step.eval(session=sess)

        self.saver.save(sess,
                        save_dir + '/recognizer-model.ckpt',
                        global_step=global_step)

    def load(self, sess, checkpoint=None):
        if self.saver is None:
            self.saver = tf.train.Saver()
        if checkpoint is None:
            checkpoint = tf.train.latest_checkpoint(self.hparams.save_dir)
            if checkpoint is None:
                return
        self.saver.restore(sess, checkpoint)
Ejemplo n.º 11
0
def main():
    logging = get_root_logger(args.log_path, mode='a')
    logging.info('Command Line Arguments:')
    for key, i in vars(args).items():
        logging.info(key + ' = ' + str(i))
    logging.info('End Command Line Arguments')

    batch_size = args.batch_size
    num_epochs = args.num_epochs

    resume_from = args.resume_from
    steps_per_checkpoint = args.steps_per_checkpoint

    gpu_id = args.gpu_id

    configure_process(args, gpu_id)
    if gpu_id > -1:
        logging.info('Using CUDA on GPU ' + str(gpu_id))
        args.cuda = True
    else:
        logging.info('Using CPU')
        args.cuda = False

    '''Load data'''
    logging.info('Data base dir ' + args.data_base_dir)
    logging.info('Loading vocab from ' + args.vocab_file)
    with open(args.vocab_file, "r", encoding='utf-8') as f:
        args.target_vocab_size = len(f.readlines()) + 4
    logging.info('Load training data from ' + args.data_path)
    train_data = UIDataset(args.data_base_dir, args.data_path, args.label_path, args.vocab_file)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True,
                              num_workers=2, drop_last=True, collate_fn=collate_fn)

    logging.info('Load validation data from ' + args.val_data_path)
    val_data = UIDataset(args.data_base_dir, args.val_data_path, args.label_path, args.vocab_file)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True,
                            num_workers=2, drop_last=True, collate_fn=collate_fn)

    # Build model
    logging.info('Building model')
    if args.resume_from:
        logging.info('Loading checkpoint from %s' % resume_from)
        checkpoint = torch.load(resume_from)
    else:
        checkpoint = None
        logging.info('Creating model with fresh parameters')
    model = build_model(args, gpu_id, checkpoint)
    logging.info(model)

    n_params, enc, dec = cal_parameters(model)
    logging.info('encoder: %d' % enc)
    logging.info('decoder: %d' % dec)
    logging.info('number of parameters: %d' % n_params)

    # Build optimizer
    optimier = torch.optim.SGD(model.parameters(), lr=args.learning_rate)
    optim = Optimizer(optimier)
    if checkpoint:
        optim.load_state_dict(checkpoint['optim'])
        optim.training_step += 1

    # Build model saver
    model_saver = ModelSaver(args.model_dir, model, optim)

    train(model, optim, model_saver, num_epochs, train_loader, val_loader, steps_per_checkpoint,
          args.valid_steps, args.lr_decay, args.start_decay_at, args.cuda)
Ejemplo n.º 12
0
    cg_train_set = NYU_Depth_V2_v2('train', loadSize, fineSize)
    print('Loaded training set')
    cg_val_set = NYU_Depth_V2_v2('val', loadSize, fineSize)
    print('Loaded val set')

dataset = {0: train_set, 1: val_set}

if len(sys.argv) == 3:
    cg_dataset = {0: cg_train_set, 1: cg_val_set}
    p2p_dataset = {0: cg_train_set, 1: cg_val_set}

else:
    cg_dataset = {0: train_set, 1: val_set}
    p2p_dataset = {0: train_set, 1: val_set}

opt = Optimizer(lr=1e-4, beta1=0.5, lambda_L1=0.01, n_epochs=100, batch_size=4)

p2p_opt = p2pOptimizer(input_nc=3,
                       output_nc=3,
                       num_downs=8,
                       ngf=64,
                       norm_layer=nn.BatchNorm2d,
                       use_dropout=True,
                       ndf=64,
                       n_layers_D=3,
                       lr=0.0002,
                       beta1=0.5,
                       lambda_L1=5,
                       n_blocks=9,
                       padding_type='reflect')