Example #1
0
def get_model():
    net = Net(config)
    net = net.cuda()

    loss = Loss(config).cuda()
    post_process = PostProcess(config).cuda()

    params = net.parameters()
    opt = Optimizer(params, config)

    return config, ArgoDataset, collate_fn, net, loss, post_process, opt
    def train(self, model, data_loader, batch_size, n_epoch, template_flag, \
                        resume=False, optimizer=None, mode=0, teacher_forcing_ratio=0, post_flag=False):
        self.evaluator = Evaluator(
            vocab_dict=self.vocab_dict,
            vocab_list=self.vocab_list,
            decode_classes_dict=self.decode_classes_dict,
            decode_classes_list=self.decode_classes_list,
            loss=NLLLoss(),
            cuda_use=self.cuda_use)
        if resume:
            checkpoint_path = Checkpoint.get_certain_checkpoint(
                "./experiment", "best")
            resume_checkpoint = Checkpoint.load(checkpoint_path)
            model = resume_checkpoint.model
            self.optimizer = resume_checkpoint.optimizer

            resume_optim = self.optimizer.optimizer
            defaults = resume_optim.param_groups[0]
            defaults.pop('params', None)
            self.optimizer.optimizer = resume_optim.__class__(
                model.parameters(), **defaults)

            start_epoch = resume_checkpoint.epoch
            start_step = resume_checkpoint.step
            self.train_acc_list = resume_checkpoint.train_acc_list
            self.test_acc_list = resume_checkpoint.test_acc_list
            self.loss_list = resume_checkpoint.loss_list
        else:
            start_epoch = 1
            start_step = 0
            self.train_acc_list = []
            self.test_acc_list = []
            self.loss_list = []
            model_opt = NoamOpt(
                512, 1, 2000,
                torch.optim.Adam(model.parameters(),
                                 lr=0,
                                 betas=(0.9, 0.98),
                                 eps=1e-9))
            if optimizer is None:
                optimizer = Optimizer(optim.Adam(model.parameters()),
                                      max_grad_norm=0)
            self.optimizer = model_opt

        self._train_epoches(data_loader=data_loader,
                            model=model,
                            batch_size=batch_size,
                            start_epoch=start_epoch,
                            start_step=start_step,
                            n_epoch=n_epoch,
                            mode=mode,
                            template_flag=template_flag,
                            teacher_forcing_ratio=teacher_forcing_ratio,
                            post_flag=post_flag)
Example #3
0
 def Loading(self):
     self.device.set_device(self.arg.device)
     print("Loading model")
     if self.arg.model:
         model_class = import_class(self.arg.model)
         model = self.device.model_to_device(
             model_class(**self.arg.model_args))
         if self.arg.weights:
             try:
                 print("Loading pretrained model...")
                 state_dict = torch.load(self.arg.weights)
                 for w in self.arg.ignore_weights:
                     if state_dict.pop(w, None) is not None:
                         print('Sucessfully Remove Weights: {}.'.format(w))
                     else:
                         print('Can Not Remove Weights: {}.'.format(w))
                 model.load_state_dict(state_dict, strict=True)
                 optimizer = Optimizer(model, self.arg.optimizer_args)
             except RuntimeError:
                 print("Loading from checkpoint...")
                 state_dict = torch.load(self.arg.weights)
                 self.rng.set_rng_state(state_dict['rng_state'])
                 self.arg.optimizer_args[
                     'start_epoch'] = state_dict["epoch"] + 1
                 self.recoder.print_log(
                     "Resuming from checkpoint: epoch {}".format(
                         self.arg.optimizer_args['start_epoch']))
                 model = self.device.load_weights(model, self.arg.weights,
                                                  self.arg.ignore_weights)
                 optimizer = Optimizer(model, self.arg.optimizer_args)
                 optimizer.optimizer.load_state_dict(
                     state_dict["optimizer_state_dict"])
                 optimizer.scheduler.load_state_dict(
                     state_dict["scheduler_state_dict"])
         else:
             optimizer = Optimizer(model, self.arg.optimizer_args)
     else:
         raise ValueError("No Models.")
     print("Loading model finished.")
     self.load_data()
     return model, optimizer
Example #4
0
def main():
    dataset = MoleculeDataset('ChEMBL', 'canonical', ['train', 'val'])
    train_loader = DataLoader(dataset.train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=dataset.collate)
    val_loader = DataLoader(dataset.val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=dataset.collate)
    model = SGVAE(atom_types=dataset.atom_types,
                  bond_types=dataset.bond_types,
                  node_hidden_size=args['node_hidden_size'],
                  num_prop_rounds=args['num_propagation_rounds'],
                  dropout=args['dropout'])
    if args['num_processes'] == 1:
        from utils import Optimizer
        optimizer = Optimizer(args['lr'],
                              Adam(model.parameters(), lr=args['lr']))
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(
            args['num_processes'], args['lr'],
            Adam(model.parameters(), lr=args['lr']))

    if rank == 0:
        t2 = time.time()
    best_val_prob = 0

    # Training
    for epoch in range(args['nepochs']):
        model.train()
        if rank == 0:
            print('Training')

        for i, data in enumerate(train_loader):
            log_prob = model(actions=data, compute_log_prob=True)
            prob = log_prob.detach().exp()

            loss_averaged = -log_prob
            prob_averaged = prob
            optimizer.backward_and_step(loss_averaged)
            if rank == 0:
                train_printer.update(epoch + 1, loss_averaged.item(),
                                     prob_averaged.item())
Example #5
0
    def __init__(self, hparams, trainable):

        self.trainable = trainable
        self.hparams = hparams
        self.image_shape = [224, 224, 3]
        self.license_number_list = hparams.license_number_list
        self.is_train = tf.placeholder_with_default(False,
                                                    shape=[],
                                                    name='is_train')

        self.layers = NeuralLayers(trainable=self.trainable,
                                   is_train=self.is_train,
                                   hparams=self.hparams)
        self.optimizer_builder = Optimizer(hparams=hparams)
        self.saver = None
        self.build_resnet50()
        if trainable:
            self.build_optimizer()
            self.build_metrics()
            self.build_summary()
def main(args, path_to_candidate_bonds):
    if args['train_path'] is None:
        train_set = USPTORank(
            subset='train',
            candidate_bond_path=path_to_candidate_bonds['train'],
            max_num_change_combos_per_reaction=args[
                'max_num_change_combos_per_reaction_train'],
            num_processes=args['num_processes'])
    else:
        train_set = WLNRankDataset(
            path_to_reaction_file=args['train_path'],
            candidate_bond_path=path_to_candidate_bonds['train'],
            mode='train',
            max_num_change_combos_per_reaction=args[
                'max_num_change_combos_per_reaction_train'],
            num_processes=args['num_processes'])
    train_set.ignore_large()
    if args['val_path'] is None:
        val_set = USPTORank(subset='val',
                            candidate_bond_path=path_to_candidate_bonds['val'],
                            max_num_change_combos_per_reaction=args[
                                'max_num_change_combos_per_reaction_eval'],
                            num_processes=args['num_processes'])
    else:
        val_set = WLNRankDataset(
            path_to_reaction_file=args['val_path'],
            candidate_bond_path=path_to_candidate_bonds['val'],
            mode='val',
            max_num_change_combos_per_reaction=args[
                'max_num_change_combos_per_reaction_eval'],
            num_processes=args['num_processes'])

    if args['num_workers'] > 1:
        torch.multiprocessing.set_sharing_strategy('file_system')

    train_loader = DataLoader(train_set,
                              batch_size=args['batch_size'],
                              collate_fn=collate_rank_train,
                              shuffle=True,
                              num_workers=args['num_workers'])
    val_loader = DataLoader(val_set,
                            batch_size=args['batch_size'],
                            collate_fn=collate_rank_eval,
                            shuffle=False,
                            num_workers=args['num_workers'])

    model = WLNReactionRanking(
        node_in_feats=args['node_in_feats'],
        edge_in_feats=args['edge_in_feats'],
        node_hidden_feats=args['hidden_size'],
        num_encode_gnn_layers=args['num_encode_gnn_layers']).to(args['device'])
    criterion = CrossEntropyLoss(reduction='sum')
    optimizer = Adam(model.parameters(), lr=args['lr'])
    from utils import Optimizer
    optimizer = Optimizer(model,
                          args['lr'],
                          optimizer,
                          max_grad_norm=args['max_norm'])

    acc_sum = 0
    grad_norm_sum = 0
    dur = []
    total_samples = 0
    for epoch in range(args['num_epochs']):
        t0 = time.time()
        model.train()
        for batch_id, batch_data in enumerate(train_loader):
            batch_reactant_graphs, batch_product_graphs, \
            batch_combo_scores, batch_labels, batch_num_candidate_products = batch_data

            batch_reactant_graphs = batch_reactant_graphs.to(args['device'])
            batch_product_graphs = batch_product_graphs.to(args['device'])
            batch_combo_scores = batch_combo_scores.to(args['device'])
            batch_labels = batch_labels.to(args['device'])
            reactant_node_feats = batch_reactant_graphs.ndata.pop('hv').to(
                args['device'])
            reactant_edge_feats = batch_reactant_graphs.edata.pop('he').to(
                args['device'])
            product_node_feats = batch_product_graphs.ndata.pop('hv').to(
                args['device'])
            product_edge_feats = batch_product_graphs.edata.pop('he').to(
                args['device'])

            pred = model(
                reactant_graph=batch_reactant_graphs,
                reactant_node_feats=reactant_node_feats,
                reactant_edge_feats=reactant_edge_feats,
                product_graphs=batch_product_graphs,
                product_node_feats=product_node_feats,
                product_edge_feats=product_edge_feats,
                candidate_scores=batch_combo_scores,
                batch_num_candidate_products=batch_num_candidate_products)

            # Check if the ground truth candidate has the highest score
            batch_loss = 0
            product_graph_start = 0
            for i in range(len(batch_num_candidate_products)):
                product_graph_end = product_graph_start + batch_num_candidate_products[
                    i]
                reaction_pred = pred[product_graph_start:product_graph_end, :]
                acc_sum += float(
                    reaction_pred.max(
                        dim=0)[1].detach().cpu().data.item() == 0)
                batch_loss += criterion(reaction_pred.reshape(1, -1),
                                        batch_labels[i, :])
                product_graph_start = product_graph_end

            grad_norm_sum += optimizer.backward_and_step(batch_loss)
            total_samples += args['batch_size']
            if total_samples % args['print_every'] == 0:
                progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | time {:.4f} | ' \
                           'accuracy {:.4f} | grad norm {:.4f}'.format(
                    epoch + 1, args['num_epochs'],
                    (batch_id + 1) * args['batch_size'] // args['print_every'],
                    len(train_set) // args['print_every'],
                    (sum(dur) + time.time() - t0) / total_samples * args['print_every'],
                    acc_sum / args['print_every'],
                    grad_norm_sum / args['print_every'])
                print(progress)
                acc_sum = 0
                grad_norm_sum = 0

            if total_samples % args['decay_every'] == 0:
                dur.append(time.time() - t0)
                old_lr = optimizer.lr
                optimizer.decay_lr(args['lr_decay_factor'])
                new_lr = optimizer.lr
                print('Learning rate decayed from {:.4f} to {:.4f}'.format(
                    old_lr, new_lr))
                torch.save({'model_state_dict': model.state_dict()},
                           args['result_path'] +
                           '/model_{:d}.pkl'.format(total_samples))
                prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d})\n'.format(
                    total_samples, epoch + 1, args['num_epochs'],
                    (batch_id + 1) * args['batch_size'] // args['print_every'],
                    len(train_set) //
                    args['print_every']) + candidate_ranking_eval(
                        args, model, val_loader)
                print(prediction_summary)
                with open(args['result_path'] + '/val_eval.txt', 'a') as f:
                    f.write(prediction_summary)
                t0 = time.time()
                model.train()
Example #7
0
def main(args):

    ts = datetime.datetime.now().timestamp()

    logger = SummaryWriter(
        os.path.join('exp/qgen_rl/', '{}_{}'.format(args.exp_name, ts)))
    logger.add_text('exp_name', args.exp_name)
    logger.add_text('args', str(args))

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    vocab = Vocab(os.path.join(args.data_dir, 'vocab.csv'), 3)
    category_vocab = CategoryVocab(
        os.path.join(args.data_dir, 'categories.csv'))

    data_loader = OrderedDict()
    splits = ['train', 'valid'] + (['test'] if args.test_set else list())
    for split in splits:
        file = os.path.join(args.data_dir, 'guesswhat.' + split + '.jsonl.gz')
        data_loader[split] = DataLoader(
            dataset=InferenceDataset(split,
                                     file,
                                     vocab,
                                     category_vocab,
                                     new_object=split == 'train',
                                     load_vgg_features=True),
            batch_size=args.batch_size,
            collate_fn=InferenceDataset.get_collate_fn(device),
            shuffle=split == 'train')

    if not args.belief_state:
        qgen = QGen.load(device, file=args.qgen_file)
    else:
        qgen = QGenBelief.load(device, file=args.qgen_file)
    guesser = Guesser.load(device, file=args.guesser_file)
    oracle = Oracle.load(device, file=args.oracle_file)

    generation_wrapper = GenerationWrapper(qgen, guesser, oracle)

    baseline = MLP(
        sizes=[qgen.hidden_size, args.baseline_hidden_size, 1],
        activation='relu', final_activation='relu', bias=[True, False])\
        .to(device)

    baseline_loss_fn = torch.nn.MSELoss(reduction='sum')
    baseline_optimizer = Optimizer(torch.optim.SGD,
                                   baseline.parameters(),
                                   lr=args.baseline_lr)
    qgen_optimizer = Optimizer(torch.optim.SGD,
                               qgen.parameters(),
                               lr=args.qgen_lr)

    split2strat = {
        'train': args.train_strategy,
        'valid': args.eval_strategy,
        'test': args.eval_strategy
    }

    best_val_acc = 0
    for epoch in range(args.epochs):

        for split in splits:

            if split == 'train':
                qgen.train()
                baseline.train()
                torch.enable_grad()
            else:
                qgen.eval()
                baseline.eval()
                torch.no_grad()

            total_acc = list()
            for iteration, sample in enumerate(data_loader[split]):

                return_dict = generation_wrapper.generate(
                    sample,
                    vocab,
                    split2strat[split],
                    args.max_num_questions,
                    device,
                    args.belief_state,
                    return_keys=[
                        'mask', 'object_logits', 'hidden_states', 'log_probs',
                        'generations'
                    ])

                mask = return_dict['mask']
                object_logits = return_dict['object_logits']
                hidden_states = return_dict['hidden_states']
                log_probs = return_dict['log_probs']

                acc = accuarcy(object_logits, sample['target_id'])
                total_acc += [acc]

                mask = mask.float()

                rewards = torch.eq(
                    object_logits.topk(1)[1].view(-1),
                    sample['target_id'].view(-1)).float()
                rewards = rewards.unsqueeze(1).repeat(1, mask.size(1))
                rewards *= mask

                print("dialogue", return_dict['dialogue'][0],
                      return_dict['dialogue'].size())
                #print("log_probs", log_probs, log_probs.size())
                #print("mask", mask, mask.size())
                #print("rewards", rewards, rewards.size())

                baseline_preds = baseline(hidden_states.detach_()).squeeze(2)
                baseline_preds *= mask
                baseline_loss = baseline_loss_fn(
                    baseline_preds.view(-1), rewards.view(-1)) \
                    / baseline_preds.size(0)

                log_probs *= mask
                baseline_preds = baseline_preds.detach()
                policy_gradient_loss = torch.sum(log_probs *
                                                 (rewards - baseline_preds),
                                                 dim=1)
                print(policy_gradient_loss)
                policy_gradient_loss = -torch.mean(policy_gradient_loss)
                print()
                raise
                # policy_gradient_loss = - torch.sum(log_probs) / torch.sum(mask)
                #print(policy_gradient_loss_old.item(), policy_gradient_loss.item())

                if split == 'train':
                    qgen_optimizer.optimize(policy_gradient_loss,
                                            clip_norm_args=[args.clip_value])
                    baseline_optimizer.optimize(
                        baseline_loss, clip_norm_args=[args.clip_value])

                logger.add_scalar('{}_accuracy'.format(split), acc,
                                  iteration + len(data_loader[split]) * epoch)

                logger.add_scalar('{}_reward'.format(split),
                                  torch.mean(rewards).item(),
                                  iteration + len(data_loader[split]) * epoch)

                logger.add_scalar('{}_bl_loss'.format(split),
                                  baseline_loss.item(),
                                  iteration + len(data_loader[split]) * epoch)

                logger.add_scalar('{}_pg_loss'.format(split),
                                  policy_gradient_loss.item(),
                                  iteration + len(data_loader[split]) * epoch)

            model_saved = False
            if split == 'valid':
                if np.mean(total_acc) > best_val_acc:
                    best_val_acc = np.mean(total_acc)
                    qgen.save(file='bin/qgen_rl_{}_{}.pt'.format(
                        args.exp_name, ts),
                              accuarcy=np.mean(total_acc))
                    model_saved = True

            logger.add_scalar('epoch_{}_accuracy'.format(split),
                              np.mean(total_acc), epoch)

            print("Epoch {:3d}: {} Accuracy {:5.3f} {}".format(
                epoch, split.upper(),
                np.mean(total_acc) * 100, '*' if model_saved else ''))
        print("-" * 50)
Example #8
0
def main(rank, args):
    """
    Parameters
    ----------
    rank : int
        Subprocess id
    args : dict
        Configuration
    """
    if rank == 0:
        t1 = time.time()

    set_random_seed(args['seed'])
    # Remove the line below will result in problems for multiprocess
    torch.set_num_threads(1)

    # Setup dataset and data loader
    dataset = MoleculeDataset(args['dataset'],
                              args['order'], ['train', 'val'],
                              subset_id=rank,
                              n_subsets=args['num_processes'])

    # Note that currently the batch size for the loaders should only be 1.
    train_loader = DataLoader(dataset.train_set,
                              batch_size=args['batch_size'],
                              shuffle=True,
                              collate_fn=dataset.collate)
    val_loader = DataLoader(dataset.val_set,
                            batch_size=args['batch_size'],
                            shuffle=True,
                            collate_fn=dataset.collate)

    if rank == 0:
        try:
            from tensorboardX import SummaryWriter
            writer = SummaryWriter(args['log_dir'])
        except ImportError:
            print(
                'If you want to use tensorboard, install tensorboardX with pip.'
            )
            writer = None
        train_printer = Printer(args['nepochs'], len(dataset.train_set),
                                args['batch_size'], writer)
        val_printer = Printer(args['nepochs'], len(dataset.val_set),
                              args['batch_size'])
    else:
        val_printer = None

    # Initialize model
    model = DGMG(atom_types=dataset.atom_types,
                 bond_types=dataset.bond_types,
                 node_hidden_size=args['node_hidden_size'],
                 num_prop_rounds=args['num_propagation_rounds'],
                 dropout=args['dropout'])

    if args['num_processes'] == 1:
        from utils import Optimizer
        optimizer = Optimizer(args['lr'],
                              Adam(model.parameters(), lr=args['lr']))
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(
            args['num_processes'], args['lr'],
            Adam(model.parameters(), lr=args['lr']))

    if rank == 0:
        t2 = time.time()
    best_val_prob = 0

    # Training
    for epoch in range(args['nepochs']):
        model.train()
        if rank == 0:
            print('Training')

        for i, data in enumerate(train_loader):
            log_prob = model(actions=data, compute_log_prob=True)
            prob = log_prob.detach().exp()

            loss_averaged = -log_prob
            prob_averaged = prob
            optimizer.backward_and_step(loss_averaged)
            if rank == 0:
                train_printer.update(epoch + 1, loss_averaged.item(),
                                     prob_averaged.item())

        synchronize(args['num_processes'])

        # Validation
        val_log_prob = evaluate(epoch, model, val_loader, val_printer)
        if args['num_processes'] > 1:
            dist.all_reduce(val_log_prob, op=dist.ReduceOp.SUM)
        val_log_prob /= args['num_processes']
        # Strictly speaking, the computation of probability here is different from what is
        # performed on the training set as we first take an average of log likelihood and then
        # take the exponentiation. By Jensen's inequality, the resulting value is then a
        # lower bound of the real probabilities.
        val_prob = (-val_log_prob).exp().item()
        val_log_prob = val_log_prob.item()
        if val_prob >= best_val_prob:
            if rank == 0:
                torch.save({'model_state_dict': model.state_dict()},
                           args['checkpoint_dir'])
                print(
                    'Old val prob {:.10f} | new val prob {:.10f} | model saved'
                    .format(best_val_prob, val_prob))
            best_val_prob = val_prob
        elif epoch >= args['warmup_epochs']:
            optimizer.decay_lr()

        if rank == 0:
            print('Validation')
            if writer is not None:
                writer.add_scalar('validation_log_prob', val_log_prob, epoch)
                writer.add_scalar('validation_prob', val_prob, epoch)
                writer.add_scalar('lr', optimizer.lr, epoch)
            print('Validation log prob {:.4f} | prob {:.10f}'.format(
                val_log_prob, val_prob))

        synchronize(args['num_processes'])

    if rank == 0:
        t3 = time.time()
        print('It took {} to setup.'.format(datetime.timedelta(seconds=t2 -
                                                               t1)))
        print('It took {} to finish training.'.format(
            datetime.timedelta(seconds=t3 - t2)))
        print(
            '--------------------------------------------------------------------------'
        )
        print('On average, an epoch takes {}.'.format(
            datetime.timedelta(seconds=(t3 - t2) / args['nepochs'])))
Example #9
0
def main(rank, dev_id, args):
    set_seed()
    # Remove the line below will result in problems for multiprocess
    if args['num_devices'] > 1:
        torch.set_num_threads(1)
    if dev_id == -1:
        args['device'] = torch.device('cpu')
    else:
        args['device'] = torch.device('cuda:{}'.format(dev_id))
        # Set current device
        torch.cuda.set_device(args['device'])

    train_set, val_set = load_dataset(args)
    get_center_subset(train_set, rank, args['num_devices'])
    train_loader = DataLoader(train_set, batch_size=args['batch_size'],
                              collate_fn=collate_center, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=args['batch_size'],
                            collate_fn=collate_center, shuffle=False)

    model = WLNReactionCenter(node_in_feats=args['node_in_feats'],
                              edge_in_feats=args['edge_in_feats'],
                              node_pair_in_feats=args['node_pair_in_feats'],
                              node_out_feats=args['node_out_feats'],
                              n_layers=args['n_layers'],
                              n_tasks=args['n_tasks']).to(args['device'])
    model.train()
    if rank == 0:
        print('# trainable parameters in the model: ', count_parameters(model))

    criterion = BCEWithLogitsLoss(reduction='sum')
    optimizer = Adam(model.parameters(), lr=args['lr'])
    if args['num_devices'] <= 1:
        from utils import Optimizer
        optimizer = Optimizer(model, args['lr'], optimizer, max_grad_norm=args['max_norm'])
    else:
        from utils import MultiProcessOptimizer
        optimizer = MultiProcessOptimizer(args['num_devices'], model, args['lr'],
                                          optimizer, max_grad_norm=args['max_norm'])

    total_iter = 0
    rank_iter = 0
    grad_norm_sum = 0
    loss_sum = 0
    dur = []

    for epoch in range(args['num_epochs']):
        t0 = time.time()
        for batch_id, batch_data in enumerate(train_loader):
            total_iter += args['num_devices']
            rank_iter += 1

            batch_reactions, batch_graph_edits, batch_mol_graphs, \
            batch_complete_graphs, batch_atom_pair_labels = batch_data
            labels = batch_atom_pair_labels.to(args['device'])
            pred, biased_pred = reaction_center_prediction(
                args['device'], model, batch_mol_graphs, batch_complete_graphs)
            loss = criterion(pred, labels) / len(batch_reactions)
            loss_sum += loss.cpu().detach().data.item()
            grad_norm_sum += optimizer.backward_and_step(loss)

            if rank_iter % args['print_every'] == 0 and rank == 0:
                progress = 'Epoch {:d}/{:d}, iter {:d}/{:d} | ' \
                           'loss {:.4f} | grad norm {:.4f}'.format(
                    epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader),
                    loss_sum / args['print_every'], grad_norm_sum / args['print_every'])
                print(progress)
                grad_norm_sum = 0
                loss_sum = 0

            if total_iter % args['decay_every'] == 0:
                optimizer.decay_lr(args['lr_decay_factor'])
            if total_iter % args['decay_every'] == 0 and rank == 0:
                if epoch >= 1:
                    dur.append(time.time() - t0)
                    print('Training time per {:d} iterations: {:.4f}'.format(
                        rank_iter, np.mean(dur)))
                total_samples = total_iter * args['batch_size']
                prediction_summary = 'total samples {:d}, (epoch {:d}/{:d}, iter {:d}/{:d}) '.format(
                    total_samples, epoch + 1, args['num_epochs'], batch_id + 1, len(train_loader)) + \
                      reaction_center_final_eval(args, args['top_ks_val'], model, val_loader, easy=True)
                print(prediction_summary)
                with open(args['result_path'] + '/val_eval.txt', 'a') as f:
                    f.write(prediction_summary)
                torch.save({'model_state_dict': model.state_dict()},
                           args['result_path'] + '/model_{:d}.pkl'.format(total_samples))
                t0 = time.time()
                model.train()
        synchronize(args['num_devices'])
Example #10
0
def main():
    logging = get_root_logger(args.log_path, mode='a')
    logging.info('Command Line Arguments:')
    for key, i in vars(args).items():
        logging.info(key + ' = ' + str(i))
    logging.info('End Command Line Arguments')

    batch_size = args.batch_size
    num_epochs = args.num_epochs

    resume_from = args.resume_from
    steps_per_checkpoint = args.steps_per_checkpoint

    gpu_id = args.gpu_id

    configure_process(args, gpu_id)
    if gpu_id > -1:
        logging.info('Using CUDA on GPU ' + str(gpu_id))
        args.cuda = True
    else:
        logging.info('Using CPU')
        args.cuda = False

    '''Load data'''
    logging.info('Data base dir ' + args.data_base_dir)
    logging.info('Loading vocab from ' + args.vocab_file)
    with open(args.vocab_file, "r", encoding='utf-8') as f:
        args.target_vocab_size = len(f.readlines()) + 4
    logging.info('Load training data from ' + args.data_path)
    train_data = UIDataset(args.data_base_dir, args.data_path, args.label_path, args.vocab_file)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True,
                              num_workers=2, drop_last=True, collate_fn=collate_fn)

    logging.info('Load validation data from ' + args.val_data_path)
    val_data = UIDataset(args.data_base_dir, args.val_data_path, args.label_path, args.vocab_file)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True,
                            num_workers=2, drop_last=True, collate_fn=collate_fn)

    # Build model
    logging.info('Building model')
    if args.resume_from:
        logging.info('Loading checkpoint from %s' % resume_from)
        checkpoint = torch.load(resume_from)
    else:
        checkpoint = None
        logging.info('Creating model with fresh parameters')
    model = build_model(args, gpu_id, checkpoint)
    logging.info(model)

    n_params, enc, dec = cal_parameters(model)
    logging.info('encoder: %d' % enc)
    logging.info('decoder: %d' % dec)
    logging.info('number of parameters: %d' % n_params)

    # Build optimizer
    optimier = torch.optim.SGD(model.parameters(), lr=args.learning_rate)
    optim = Optimizer(optimier)
    if checkpoint:
        optim.load_state_dict(checkpoint['optim'])
        optim.training_step += 1

    # Build model saver
    model_saver = ModelSaver(args.model_dir, model, optim)

    train(model, optim, model_saver, num_epochs, train_loader, val_loader, steps_per_checkpoint,
          args.valid_steps, args.lr_decay, args.start_decay_at, args.cuda)
Example #11
0
    cg_train_set = NYU_Depth_V2_v2('train', loadSize, fineSize)
    print('Loaded training set')
    cg_val_set = NYU_Depth_V2_v2('val', loadSize, fineSize)
    print('Loaded val set')

dataset = {0: train_set, 1: val_set}

if len(sys.argv) == 3:
    cg_dataset = {0: cg_train_set, 1: cg_val_set}
    p2p_dataset = {0: cg_train_set, 1: cg_val_set}

else:
    cg_dataset = {0: train_set, 1: val_set}
    p2p_dataset = {0: train_set, 1: val_set}

opt = Optimizer(lr=1e-4, beta1=0.5, lambda_L1=0.01, n_epochs=100, batch_size=4)

p2p_opt = p2pOptimizer(input_nc=3,
                       output_nc=3,
                       num_downs=8,
                       ngf=64,
                       norm_layer=nn.BatchNorm2d,
                       use_dropout=True,
                       ndf=64,
                       n_layers_D=3,
                       lr=0.0002,
                       beta1=0.5,
                       lambda_L1=5,
                       n_blocks=9,
                       padding_type='reflect')