def _infer(model, root_path, test_loader=None):
    args = get_args()

    is_test = True if args.test_debug else False  # False로 바꿔야대~~~~~~~~~~~~~~~~~~~~~~~~!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    if test_loader is None:
        is_test = True
        test_loader = get_dataloader(root=os.path.join(root_path, 'test_data'),
                                     fnames=None,
                                     split='test',
                                     mask_channels=args.mask_channels,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers)
        # test_loader = data_loader(root=os.path.join(root_path, 'test_data'), phase='test',
        #                           batch_size=args.batch_size, num_workers=args.num_workers)

    x_hats = []
    fnames = []
    desc = 'infer...'
    model.eval()
    with torch.no_grad():
        for step, data in tqdm(enumerate(test_loader),
                               desc=desc,
                               total=len(test_loader),
                               disable=use_nsml):
            if not is_test:
                fname, x_input, mask, _ = data
            else:
                fname, x_input, mask = data
            x_input = x_input.cuda()
            mask = mask.cuda()

            x_hat, _ = model(x_input, mask)  # PConvnet
            # x_mask = torch.cat([x_input, mask], dim=1)  # else
            # x_hat = model(x_mask)  # else

            x_hat = compose(x_input, x_hat, mask)

            # save_image(x_hat, os.path.join('test_output', 'x_hat_%03d.png' % step))
            # save_image(x_hat, os.path.join('val_output', 'x_hat_%03d.png' % step))

            x_hats.append(x_hat.cpu())
            fnames = fnames + list(fname)

    x_hats = torch.cat(x_hats, dim=0)

    return fnames, x_hats
def _infer(model, root_path, test_loader=None):
    args = get_args()

    is_test = False
    if test_loader is None:
        is_test = True
        test_loader = get_dataloader(root=os.path.join(root_path, 'test_data'),
                                     fnames=None,
                                     split='test',
                                     bbox_constraint=None,
                                     mask_channels=args.mask_channels,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers)

    x_hats = []
    fnames = []
    desc = 'infer...'
    model.eval()
    with torch.no_grad():
        for data in tqdm(test_loader,
                         desc=desc,
                         total=len(test_loader),
                         disable=not use_nsml):
            if not is_test:
                fname, x_input, mask, _ = data
            else:
                fname, x_input, mask = data
            x_input = x_input.cuda()
            mask = mask.cuda()
            x_mask = torch.cat([x_input, mask], dim=1)
            x_hat = model(x_mask)
            x_hat = compose(x_input, x_hat, mask)
            x_hats.append(x_hat.cpu())
            fnames = fnames + list(fname)

    x_hats = torch.cat(x_hats, dim=0)

    return fnames, x_hats
def main():
    seed_everything()
    args = get_args()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    netG = InpaintGeneratorLight()
    netD = Discriminator()
    print('################################################################')
    print('Total number of parameters * 4:',
          (count_parameters(netG) + count_parameters(netD)) * 4)
    print('################################################################')
    netG = netG.to(device)
    netD = netD.to(device)

    optimG = torch.optim.Adam(netG.parameters(),
                              lr=args.lr,
                              betas=(0.0, 0.999))
    optimD = torch.optim.Adam(netD.parameters(),
                              lr=args.lr * 0.1,
                              betas=(0.0, 0.999))
    save, load = bind_nsml(netG, optimG)
    if args.pause == 1:
        nsml.paused(scope=locals())

    adversarial_loss = AdversarialLoss()
    l1_loss = nn.L1Loss()

    # load
    current_epoch = 0
    if not use_nsml:
        writer = SummaryWriter(os.path.join('logs', args.nickname))
        if args.load:
            netG_name = os.path.join('checkpoints', args.nickname,
                                     'netG_%03d.pth' % args.load_epoch)
            netD_name = os.path.join('checkpoints', args.nickname,
                                     'netD_%03d.pth' % args.load_epoch)
            netG_dict = torch.load(netG_name)
            netD_dict = torch.load(netD_name)
            netG.load_state_dict(netG_dict['state_dict'])
            netD.load_state_dict(netD_dict['state_dict'])
            current_epoch = args.load_epoch + 1
            print('loaded')

    if args.mode == 'train':
        path_train = os.path.join(dir_data_root, 'train')
        path_train_data = os.path.join(dir_data_root, 'train', 'train_data')

        # fold
        fnames = os.listdir(path_train_data)
        if args.debug:
            fnames = fnames[:1000]
        random.shuffle(fnames)
        val_ratio = 0.1
        train_fnames = fnames[:-int(len(fnames) * val_ratio)]
        val_fnames = fnames[-int(len(fnames) * val_ratio):]

        postfix = dict()
        total_step = 0
        start = time.time()
        # for epoch in trange(args.num_epochs, disable=use_nsml):
        for epoch in range(current_epoch, args.num_epochs):
            if epoch < args.bbox_epochs[0]:
                bbox_constraint = 0.25
            elif epoch < args.bbox_epochs[1]:
                bbox_constraint = 0.75
            else:
                bbox_constraint = 1.0

            tr_loader = get_dataloader(path_train_data, train_fnames, 'train',
                                       bbox_constraint, args.mask_channels,
                                       args.batch_size, args.num_workers)
            val_loader = get_dataloader(path_train_data, val_fnames, 'val',
                                        bbox_constraint, args.mask_channels,
                                        args.batch_size, args.num_workers)
            print('train:',
                  len(tr_loader) * args.batch_size, 'val:',
                  len(val_loader) * args.batch_size)

            # if epoch >= args.lr_decay_epoch:
            #     optim.param_groups[0]['lr'] *= 0.1

            pbar = tqdm(enumerate(tr_loader),
                        total=len(tr_loader),
                        disable=True)
            for step, (_, x_input, mask, x_GT) in pbar:
                total_step += 1

                x_input = x_input.to(device)
                mask = mask.to(device)
                x_GT = x_GT.to(device)

                x_mask = torch.cat([x_input, mask], dim=1)
                x_hat = netG(x_mask)
                x_composed = compose(x_input, x_hat, mask)

                ###########################################
                # update D network
                ###########################################
                netD.zero_grad()

                netD_real = netD(x_GT)
                net_D_real_loss = adversarial_loss(netD_real, True)

                netD_fake = netD(x_hat)
                netD_fake_loss = adversarial_loss(netD_fake, False)

                netD_loss = net_D_real_loss + netD_fake_loss
                netD_loss.backward(retain_graph=True)
                optimD.step()

                ###########################################
                # update G network
                ###########################################
                netD.zero_grad()

                netG_fake = netD(x_hat)  #.view(-1) 해야할 수도
                netG_fake_loss = adversarial_loss(netG_fake, True) * 0.1

                # netG_L1_loss = inpainting_loss(x_hat, x_GT, mask)
                netG_L1_loss = l1_loss(x_hat, x_GT) / torch.mean(mask)

                netG_loss = netG_fake_loss + netG_L1_loss
                netG_loss.backward()
                optimG.step()

                postfix['netD_loss'] = netD_loss.item()
                postfix['netG_loss'] = netG_loss.item()
                postfix['epoch'] = epoch
                postfix['step_'] = step
                postfix['total_step'] = total_step
                postfix['steps_per_epoch'] = len(tr_loader)

                if step != 0 and step % (args.eval_every - 1) == 0:
                    metric_eval = local_eval(netG, val_loader, path_train_data)
                    postfix['metric_eval'] = metric_eval
                    print('metric eval:', metric_eval)

                    if not use_nsml:
                        sample_dir = os.path.join('samples', args.nickname)
                        os.makedirs(sample_dir, exist_ok=True)
                        vutils.save_image(
                            x_GT,
                            os.path.join(sample_dir, 'x_GT_%03d.png' % epoch),
                            normalize=True)
                        vutils.save_image(x_input,
                                          os.path.join(
                                              sample_dir,
                                              'x_input_%03d.png' % epoch),
                                          normalize=True)
                        vutils.save_image(x_hat,
                                          os.path.join(
                                              sample_dir,
                                              'x_hat_%03d.png' % epoch),
                                          normalize=True)
                        vutils.save_image(
                            mask,
                            os.path.join(sample_dir, 'mask_%03d.png' % epoch),
                            normalize=True)
                        vutils.save_image(x_composed,
                                          os.path.join(
                                              sample_dir,
                                              'x_composed_%03d_%.1f.png' %
                                              (epoch, metric_eval)),
                                          normalize=True)
                        writer.add_scalar('train/netD_loss', netD_loss.item(),
                                          epoch)
                        writer.add_scalar('train/netG_loss', netG_loss.item(),
                                          epoch)

                if step % args.print_every == 0:
                    print(
                        "[%d/%d][%d/%d] time: %.2f,"
                        "netG_gan_loss: %.2f, netG_L1_loss: %.2f, netD_loss: %.2f"
                        % (epoch, args.num_epochs, step, len(tr_loader),
                           time.time() - start, netG_fake_loss.item(),
                           netG_L1_loss.item(), netD_loss.item()))

                if use_nsml:
                    nsml.report(**postfix, scope=locals(), step=total_step)

            if use_nsml:
                nsml.save(epoch)
            else:
                checkpoint_dir = os.path.join('checkpoints', args.nickname)
                os.makedirs(checkpoint_dir, exist_ok=True)

                netG_dict = {'state_dict': netG.state_dict()}
                netD_dict = {'state_dict': netD.state_dict()}
                torch.save(
                    netG_dict,
                    os.path.join(checkpoint_dir, 'netG_%03d.pth' % epoch))
                torch.save(
                    netD_dict,
                    os.path.join(checkpoint_dir, 'netD_%03d.pth' % epoch))
                print('saved')
Exemple #4
0
def train(params):
    # initialize experiment
    logger = init_experiment(params, logger_filename=params.logger_filename)
    
    if params.bilstm:
        # dataloader
        dataloader_train, dataloader_dev, dataloader_test, vocab = get_dataloader_for_bilstmtagger(params)
        # bilstm-crf model
        model = BiLSTMTagger(params, vocab)
        model.cuda()
        # trainer
        trainer = BaseTrainer(params, model)
    elif params.coach:
        # dataloader
        dataloader_train, dataloader_dev, dataloader_test, vocab = get_dataloader_for_coach(params)
        # coach model
        binary_tagger = BiLSTMTagger(params, vocab)
        entity_predictor = EntityPredictor(params)
        binary_tagger.cuda()
        entity_predictor.cuda()
        # trainer
        trainer = CoachTrainer(params, binary_tagger, entity_predictor)
    else:
        # dataloader
        dataloader_train, dataloader_dev, dataloader_test = get_dataloader(params)
        # BERT-based NER Tagger
        model = BertTagger(params)
        model.cuda()
        # trainer
        trainer = BaseTrainer(params, model)

    if params.conll and not params.joint:
        conll_trainloader, conll_devloader, conll_testloader = get_conll2003_dataloader(params.batch_size, params.tgt_dm)
        trainer.train_conll(conll_trainloader, conll_devloader, conll_testloader, params.tgt_dm)

    no_improvement_num = 0
    best_f1 = 0
    logger.info("Training on target domain ...")
    for e in range(params.epoch):
        logger.info("============== epoch %d ==============" % e)
        
        pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train))
        if params.bilstm:
            loss_list = []
            for i, (X, lengths, y) in pbar:
                X, lengths = X.cuda(), lengths.cuda()
                loss = trainer.train_step_for_bilstm(X, lengths, y)
                loss_list.append(loss)
                pbar.set_description("(Epoch {}) LOSS:{:.4f}".format(e, np.mean(loss_list)))

            logger.info("Finish training epoch %d. loss: %.4f" % (e, np.mean(loss_list)))

        elif params.coach:
            loss_bin_list, loss_entity_list = [], []
            for i, (X, lengths, y_bin, y_final) in pbar:
                X, lengths = X.cuda(), lengths.cuda()
                loss_bin, loss_entityname = trainer.train_step(X, lengths, y_bin, y_final)
                loss_bin_list.append(loss_bin)
                loss_entity_list.append(loss_entityname)
                pbar.set_description("(Epoch {}) LOSS BIN:{:.4f}; LOSS ENTITY:{:.4f}".format(e, np.mean(loss_bin_list), np.mean(loss_entity_list)))
            
            logger.info("Finish training epoch %d. loss_bin: %.4f. loss_entity: %.4f" % (e, np.mean(loss_bin_list), np.mean(loss_entity_list)))

        else:
            loss_list = []
            for i, (X, y) in pbar:
                X, y = X.cuda(), y.cuda()
                loss = trainer.train_step(X, y)
                loss_list.append(loss)
                pbar.set_description("(Epoch {}) LOSS:{:.4f}".format(e, np.mean(loss_list)))

            logger.info("Finish training epoch %d. loss: %.4f" % (e, np.mean(loss_list)))

        logger.info("============== Evaluate epoch %d on Train Set ==============" % e)
        f1_train = trainer.evaluate(dataloader_train, params.tgt_dm, use_bilstm=params.bilstm)
        logger.info("Evaluate on Train Set. F1: %.4f." % f1_train)

        logger.info("============== Evaluate epoch %d on Dev Set ==============" % e)
        f1_dev = trainer.evaluate(dataloader_dev, params.tgt_dm, use_bilstm=params.bilstm)
        logger.info("Evaluate on Dev Set. F1: %.4f." % f1_dev)

        logger.info("============== Evaluate epoch %d on Test Set ==============" % e)
        f1_test = trainer.evaluate(dataloader_test, params.tgt_dm, use_bilstm=params.bilstm)
        logger.info("Evaluate on Test Set. F1: %.4f." % f1_test)

        if f1_dev > best_f1:
            logger.info("Found better model!!")
            best_f1 = f1_dev
            no_improvement_num = 0
            # trainer.save_model()
        else:
            no_improvement_num += 1
            logger.info("No better model found (%d/%d)" % (no_improvement_num, params.early_stop))

        if no_improvement_num >= params.early_stop:
            break
Exemple #5
0
def main(params):

    logger = init_experiment(params, logger_filename=params.logger_filename)

    dataloader_tr, dataloader_val, dataloader_test, vocab = get_dataloader(
        params.tgt_dm, params.batch_size, params.tr, params.n_samples)

    coarse_slutagger = CoarseSLUTagger(params, vocab)

    coarse_slutagger = coarse_slutagger.cuda()
    dm_coarse = get_coarse_labels_for_domains()

    fine_predictor = FinePredictor(params, dm_coarse)
    fine_predictor = fine_predictor.cuda()

    # if params.tr:
    sent_repre_generator = SentRepreGenerator(params, vocab)
    sent_repre_generator = sent_repre_generator.cuda()

    slu_trainer = SLUTrainer(params,
                             coarse_slutagger,
                             fine_predictor,
                             sent_repre_generator=sent_repre_generator)

    for e in range(params.epoch):
        loss_c_list = []
        pbar = tqdm(enumerate(dataloader_tr), total=len(dataloader_tr))
        logger.info("============== epoch {} ==============".format(e + 1))
        if e < params.pretrained_epoch or e == 7 or e == 8 or e == 12 or e == 13 \
        or e == 17 or e == 20:
            if params.tr:
                for i, (X, lengths, y_0, y_bin, y_final, y_dm, templates,
                        tem_lengths) in pbar:
                    X, lengths = X.cuda(), lengths.cuda()
                    loss_chunking = slu_trainer.chunking_pretrain(
                        X, lengths, y_0)
                    loss_c_list.append(loss_chunking)
                    pbar.set_description(
                        "(Epoch {}) LOSS CHUNKING:{:.4f}".format(
                            (e + 1), np.mean(loss_c_list)))

            else:
                for i, (X, lengths, y_0, y_bin, y_final, y_dm) in pbar:
                    X, lengths = X.cuda(), lengths.cuda()
                    loss_chunking = slu_trainer.chunking_pretrain(
                        X, lengths, y_0)
                    loss_c_list.append(loss_chunking)
                    pbar.set_description(
                        "(Epoch {}) LOSS CHUNKING:{:.4f}".format(
                            (e + 1), np.mean(loss_c_list)))

            logger.info(
                "============== Evaluate Epoch {} ==============".format(e +
                                                                         1))
            bin_f1 = slu_trainer.chunking_eval(dataloader_val)
            logger.info(
                "Eval on dev set. Binary Slot-F1: {:.4f}".format(bin_f1))

            bin_f1 = slu_trainer.chunking_eval(dataloader_test)
            logger.info(
                "Eval on test set. Binary Slot-F1: {:.4f}".format(bin_f1))

            continue

        loss_bin_list, loss_slotname_list = [], []
        if params.tr:
            loss_tem0_list, loss_tem1_list = [], []

        # record = int(len(dataloader_tr) / 4)
        if params.tr:
            for i, (X, lengths, y_0, y_bin, y_final, y_dm, templates,
                    tem_lengths) in pbar:
                X, lengths, templates, tem_lengths = X.cuda(), lengths.cuda(
                ), templates.cuda(), tem_lengths.cuda()
                loss_bin, loss_slotname, loss_tem0, loss_tem1 = slu_trainer.train_step(
                    X,
                    lengths,
                    y_bin,
                    y_final,
                    y_dm,
                    templates=templates,
                    tem_lengths=tem_lengths,
                    epoch=e)
                loss_bin_list.append(loss_bin)
                loss_slotname_list.append(loss_slotname)
                loss_tem0_list.append(loss_tem0)
                loss_tem1_list.append(loss_tem1)

                pbar.set_description(
                    "(Epoch {}) LOSS BIN:{:.4f} LOSS SLOT:{:.4f} LOSS TEM0:{:.4f} LOSS TEM1:{:.4f}"
                    .format((e + 1), np.mean(loss_bin_list),
                            np.mean(loss_slotname_list),
                            np.mean(loss_tem0_list), np.mean(loss_tem1_list)))
        else:
            for i, (X, lengths, y_0, y_bin, y_final, y_dm) in pbar:
                X, lengths = X.cuda(), lengths.cuda()
                loss_bin, loss_slotname = slu_trainer.train_step(
                    X, lengths, y_bin, y_final, y_dm)
                loss_bin_list.append(loss_bin)
                loss_slotname_list.append(loss_slotname)
                pbar.set_description(
                    "(Epoch {}) LOSS BIN:{:.4f} LOSS SLOT:{:.4f}".format(
                        (e + 1), np.mean(loss_bin_list),
                        np.mean(loss_slotname_list)))

        if params.tr:
            logger.info(
                "Finish training epoch {}. LOSS BIN:{:.4f} LOSS SLOT:{:.4f} LOSS TEM0:{:.4f} LOSS TEM1:{:.4f}"
                .format((e + 1), np.mean(loss_bin_list),
                        np.mean(loss_slotname_list), np.mean(loss_tem0_list),
                        np.mean(loss_tem1_list)))
        else:
            logger.info(
                "Finish training epoch {}. LOSS BIN:{:.4f} LOSS SLOT:{:.4f}".
                format((e + 1), np.mean(loss_bin_list),
                       np.mean(loss_slotname_list)))

        logger.info(
            "============== Evaluate Epoch {} ==============".format(e + 1))
        bin_f1, final_f1, stop_training_flag = slu_trainer.evaluate(
            dataloader_val, istestset=False)
        logger.info(
            "Eval on dev set. Binary Slot-F1: {:.4f}. Final Slot-F1: {:.4f}.".
            format(bin_f1, final_f1))

        bin_f1, final_f1, stop_training_flag = slu_trainer.evaluate(
            dataloader_test, istestset=True)
        logger.info(
            "Eval on test set. Binary Slot-F1: {:.4f}. Final Slot-F1: {:.4f}.".
            format(bin_f1, final_f1))

        if stop_training_flag == True:
            break
def test_coach(params):

    logger = init_experiment(params, logger_filename='test')
    # get dataloader
    dataloader_tr, dataloader_val, dataloader_test, vocab = get_dataloader(
        params.tgt_dm, params.batch_size, params.tr, params.n_samples)
    # _, _, dataloader_test, _ = get_dataloader(params.tgt_dm, params.batch_size, params.tr, params.n_samples)

    print(params.model_path)
    model_path = params.model_path
    opti_path = './experiments/coach_patience/atp_0/opti.pth'

    assert os.path.isfile(model_path)

    reloaded = torch.load(model_path)
    coarse_slutagger = CoarseSLUTagger(params, vocab)

    coarse_slutagger = coarse_slutagger.cuda()
    dm_coarse = get_coarse_labels_for_domains()

    fine_tagger = FinePredictor(params, dm_coarse)
    fine_tagger = fine_tagger.cuda()

    coarse_slutagger.load_state_dict(reloaded["coarse_tagger"])

    fine_tagger.load_state_dict(reloaded["fine_tagger"])
    coarse_tagger = coarse_slutagger.cuda()
    # fine_tagger.cuda()

    # model_parameters = [
    #             {"params": coarse_tagger.parameters()},
    #             {"params": fine_tagger.parameters()}
    #         ]

    # optimizer = torch.optim.Adam(model_parameters, lr=self.lr)

    # optimizer.load_state_dict(torch.load(opti_path))

    slu_trainer = SLUTrainer(params, coarse_tagger, fine_tagger)
    slu_trainer.optimizer.load_state_dict(torch.load(opti_path))

    for e in range(params.epoch):
        logger.info("============== epoch {} ==============".format(e + 1))
        loss_bin_list, loss_slotname_list = [], []
        if params.tr:
            loss_tem0_list, loss_tem1_list = [], []
        pbar = tqdm(enumerate(dataloader_tr), total=len(dataloader_tr))
        # record = int(len(dataloader_tr) / 4)
        if params.tr:
            for i, (X, lengths, y_bin, y_final, y_dm, templates,
                    tem_lengths) in pbar:
                X, lengths, templates, tem_lengths = X.cuda(), lengths.cuda(
                ), templates.cuda(), tem_lengths.cuda()
                loss_bin, loss_slotname, loss_tem0, loss_tem1 = slu_trainer.train_step(
                    X,
                    lengths,
                    y_bin,
                    y_final,
                    y_dm,
                    templates=templates,
                    tem_lengths=tem_lengths,
                    epoch=e)
                loss_bin_list.append(loss_bin)
                loss_slotname_list.append(loss_slotname)
                loss_tem0_list.append(loss_tem0)
                loss_tem1_list.append(loss_tem1)

                pbar.set_description(
                    "(Epoch {}) LOSS BIN:{:.4f} LOSS SLOT:{:.4f} LOSS TEM0:{:.4f} LOSS TEM1:{:.4f}"
                    .format((e + 1), np.mean(loss_bin_list),
                            np.mean(loss_slotname_list),
                            np.mean(loss_tem0_list), np.mean(loss_tem1_list)))
        else:
            for i, (X, lengths, y_bin, y_final, y_dm) in pbar:
                if i == 2:
                    break
                # if i %record == 0 and i > 0:
                #     logger.info("============== Evaluate Epoch {} {}==============".format(e+1, i))
                #     bin_f1, final_f1, stop_training_flag = slu_trainer.evaluate(dataloader_val, istestset=False)
                #     logger.info("Eval on dev set. Binary Slot-F1: {:.4f}. Final Slot-F1: {:.4f}.".format(bin_f1, final_f1))

                #     bin_f1, final_f1, stop_training_flag = slu_trainer.evaluate(dataloader_test, istestset=True)
                #     logger.info("Eval on test set. Binary Slot-F1: {:.4f}. Final Slot-F1: {:.4f}.".format(bin_f1, final_f1))
                X, lengths = X.cuda(), lengths.cuda()
                loss_bin, loss_slotname = slu_trainer.train_step(
                    X, lengths, y_bin, y_final, y_dm)
                loss_bin_list.append(loss_bin)
                loss_slotname_list.append(loss_slotname)
                pbar.set_description(
                    "(Epoch {}) LOSS BIN:{:.4f} LOSS SLOT:{:.4f}".format(
                        (e + 1), np.mean(loss_bin_list),
                        np.mean(loss_slotname_list)))

        if params.tr:
            logger.info(
                "Finish training epoch {}. LOSS BIN:{:.4f} LOSS SLOT:{:.4f} LOSS TEM0:{:.4f} LOSS TEM1:{:.4f}"
                .format((e + 1), np.mean(loss_bin_list),
                        np.mean(loss_slotname_list), np.mean(loss_tem0_list),
                        np.mean(loss_tem1_list)))
        else:
            logger.info(
                "Finish training epoch {}. LOSS BIN:{:.4f} LOSS SLOT:{:.4f}".
                format((e + 1), np.mean(loss_bin_list),
                       np.mean(loss_slotname_list)))

        logger.info(
            "============== Evaluate Epoch {} ==============".format(e + 1))
        bin_f1, final_f1, stop_training_flag = slu_trainer.evaluate(
            dataloader_val, istestset=False)
        logger.info(
            "Eval on dev set. Binary Slot-F1: {:.4f}. Final Slot-F1: {:.4f}.".
            format(bin_f1, final_f1))

        bin_f1, final_f1, stop_training_flag = slu_trainer.evaluate(
            dataloader_test, istestset=True)
        logger.info(
            "Eval on test set. Binary Slot-F1: {:.4f}. Final Slot-F1: {:.4f}.".
            format(bin_f1, final_f1))

        if stop_training_flag == True:
            break
def main():
    seed_everything()
    args = get_args()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = PConvUNetNew()

    print('################################################################')
    print('Total number of parameters * 4:', count_parameters(model) * 4)
    print('################################################################')
    model = model.to(device)

    optim = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0, 0.999))
    save, load = bind_nsml(model)
    if args.pause == 1:
        nsml.paused(scope=locals())

    # load
    current_epoch = 0
    if not use_nsml:
        writer = SummaryWriter(os.path.join('logs', args.nickname))
        if args.load:
            fname = os.path.join('checkpoints', args.nickname,
                                 'model_%03d.pth' % args.load_epoch)
            state = torch.load(fname)
            model.load_state_dict(state['model'])
            current_epoch = args.load_epoch + 1
            print('loaded')

        path_test_data = 'data/test_data_original'
        test_loader = get_dataloader(root=os.path.join('data', 'test_data'),
                                     fnames=None,
                                     split='test',
                                     mask_channels=args.mask_channels,
                                     batch_size=args.batch_size,
                                     num_workers=args.num_workers)
        # test_loader = data_loader(root=os.path.join('data', 'test_data'), phase='test',
        #                           batch_size=args.batch_size, num_workers=args.num_workers)

    if args.mode == 'train':
        path_train = os.path.join(dir_data_root, 'train')
        path_train_data = os.path.join(dir_data_root, 'train', 'train_data')
        # tr_loader, val_loader = data_loader_with_split(path_train, train_split=(1 - args.val_ratio),
        #                                                batch_size=args.batch_size, num_workers=args.num_workers)

        # fold
        fnames = os.listdir(path_train_data)
        if args.debug:
            fnames = fnames[:1000]
        random.shuffle(fnames)
        train_fnames = fnames[:-int(len(fnames) * args.val_ratio)]
        val_fnames = fnames[-int(len(fnames) * args.val_ratio):]

        tr_loader = get_dataloader(path_train_data, train_fnames, 'train',
                                   args.mask_channels, args.batch_size,
                                   args.num_workers)
        val_loader = get_dataloader(path_train_data, val_fnames, 'val',
                                    args.mask_channels, args.batch_size,
                                    args.num_workers)
        print('train:',
              len(tr_loader) * args.batch_size, 'val:',
              len(val_loader) * args.batch_size)

        if args.test_debug:
            metric_eval = local_eval(model, test_loader, path_test_data)
            # metric_eval = local_eval(model, val_loader, path_train_data)
            return

        postfix = dict()
        total_step = 0
        start = time.time()

        best_val_loss = float('inf')

        for epoch in range(current_epoch, args.num_epochs):
            if epoch >= args.lr_decay_epoch:
                optim.param_groups[0]['lr'] = 0.0001

            if epoch >= args.bn_freeze_epoch:
                model.freeze_enc_bn = True
                optim.param_groups[0]['lr'] = 0.00005

            model.train()

            # if epoch < args.bbox_epochs[0]:
            #     bbox_constraint = 0.3
            # elif epoch < args.bbox_epochs[1]:
            #     bbox_constraint = 0.7
            # else:
            #     bbox_constraint = 1.0

            # tr_loader = get_dataloader(path_train_data, train_fnames, 'train', bbox_constraint, args.mask_channels, args.batch_size, args.num_workers)
            # val_loader = get_dataloader(path_train_data, val_fnames, 'val', bbox_constraint, args.mask_channels, args.batch_size, args.num_workers)
            # print('train:', len(tr_loader) * args.batch_size, 'val:', len(val_loader) * args.batch_size)

            for step, (fname, x_input, mask, x_GT) in enumerate(tr_loader):
                total_step += 1

                x_GT = x_GT.to(device)
                x_input = x_input.to(device)
                mask = mask.to(device)

                model.zero_grad()

                x_hat, _ = model(x_input, mask)  # PConvnet
                # x_mask = torch.cat([x_input, mask], dim=1)  #else
                # x_hat = model(x_mask)  #else

                # x_composed = compose(x_input, x_hat, mask)

                # loss = l1_loss(x_composed, x_GT)
                # loss = l1_loss(x_hat, x_GT)
                loss = inpainting_loss(x_hat, x_GT, mask)
                loss.backward()
                optim.step()

                if use_nsml:
                    postfix['loss'] = loss.item()
                    postfix['epoch'] = epoch
                    postfix['step_'] = step
                    postfix['total_step'] = total_step
                    postfix['steps_per_epoch'] = len(tr_loader)
                    nsml.report(**postfix, scope=locals(), step=total_step)

                if step % args.print_every == 0:
                    print(
                        "[%d/%d][%d/%d] time: %.2f, train_loss: %.6f, lr: %f" %
                        (epoch, args.num_epochs, step, len(tr_loader),
                         time.time() - start, loss.item(),
                         optim.param_groups[0]['lr']))

            metric_eval = local_eval(model, val_loader, path_train_data)

            if use_nsml:
                postfix['metric_eval'] = metric_eval
                nsml.report(**postfix, scope=locals(), step=total_step)
            else:
                writer.add_scalar('train/metric_eval', metric_eval, epoch)
                writer.add_scalar('train/loss', loss.item(), epoch)

                # sample_dir = os.path.join('samples', args.nickname)
                # os.makedirs(sample_dir, exist_ok=True)
                # vutils.save_image(x_GT, os.path.join(sample_dir, 'x_GT_%03d.png' % epoch), normalize=True)
                # vutils.save_image(x_input, os.path.join(sample_dir, 'x_input_%03d.png' % epoch), normalize=True)
                # vutils.save_image(x_hat, os.path.join(sample_dir, 'x_hat_%03d.png' % epoch), normalize=True)
                # vutils.save_image(mask, os.path.join(sample_dir, 'mask_%03d.png' % epoch), normalize=True)
                # vutils.save_image(x_composed, os.path.join(sample_dir, 'x_composed_%03d.png' % epoch), normalize=True)
                # save_image(x_GT, os.path.join(sample_dir, 'x_GT_%03d.png' % epoch))
                # save_image(x_input, os.path.join(sample_dir, 'x_input_%03d.png' % epoch))
                # save_image(x_hat, os.path.join(sample_dir, 'x_hat_%03d.png' % epoch))
                # save_image(mask, os.path.join(sample_dir, 'x_mask_%03d.png' % epoch))
                # save_image(x_composed, os.path.join(sample_dir, 'x_composed_%03d_%.2f.png' % (epoch, metric_eval)))

            if use_nsml:
                if metric_eval < best_val_loss:
                    nsml.save(epoch)
                    best_val_loss = metric_eval
            else:
                checkpoint_dir = os.path.join('checkpoints', args.nickname)
                os.makedirs(checkpoint_dir, exist_ok=True)

                state = {'model': model.state_dict()}
                torch.save(
                    state,
                    os.path.join(checkpoint_dir, 'model_%03d.pth' % epoch))
                print('saved')