Beispiel #1
0
    def __init__(self,
                 epochs=100,
                 sample=25,
                 batch=64,
                 input_h_w=112,
                 latent_v=64,
                 data_path='data/png_clasificados/',
                 transforms=None):
        # parameters
        self.epoch = epochs
        self.sample_num = sample
        self.batch_size = batch
        self.save_dir = '/tmp/'
        self.result_dir = '/tmp/'
        self.log_dir = '/tmp/'
        self.gpu_mode = True
        self.dataset = 'vasijas'
        self.model_name = 'GAN'

        # networks init
        self.G = _G(input_h_w, latent_v)
        self.D = _D(input_h_w)
        self.G_optimizer = optim.Adam(self.G.parameters(),
                                      lr=0.0002,
                                      betas=(0.5, 0.999))
        self.D_optimizer = optim.Adam(self.D.parameters(),
                                      lr=0.0002,
                                      betas=(0.5, 0.999))

        if self.gpu_mode:
            self.G.cuda()
            self.D.cuda()
            self.BCE_loss = nn.BCELoss().cuda()
        else:
            self.BCE_loss = nn.BCELoss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.G)
        utils.print_network(self.D)

        # load dataset
        imagenet_data = datasets.ImageFolder(data_path,
                                             tfs.Compose(transforms))

        self.data_loader = data.DataLoader(imagenet_data,
                                           batch_size=self.batch_size,
                                           shuffle=True)
        self.z_dim = latent_v

        # fixed noise
        if self.gpu_mode:
            self.sample_z_ = Variable(
                torch.rand((self.batch_size, self.z_dim)).cuda())
        else:
            self.sample_z_ = Variable(torch.rand(
                (self.batch_size, self.z_dim)))
Beispiel #2
0
def test_main():
    print('[INFO] Retrieving configuration...')
    parser = None
    config = None

    try:
        args, parser = get_test_args()
        # args.config = 'experiments/wmcnn/wmcnn.json'
        # args.config = 'configs/lapsrn.json'
        config = process_config(args.config)
    except Exception as e:
        print('[Exception] Configuration is invalid, %s' % e)
        if parser:
            parser.print_help()
        print(
            '[Exception] Refer to: python main_train.py -c experiments/wmcnn/wmcnn.json'
        )
        exit(0)

    print('[INFO] Building graph...')
    try:
        Net = importlib.import_module('models.{}'.format(
            config['trainer']['net'])).Net
        model = Net(config=config['model'])
        print_network(model)
    except ModuleNotFoundError:
        raise RuntimeWarning(
            "The model name is incorrect or does not exist! Please check!")

    print('[INFO] Loading data...')
    dl = TestImageLoader(config=config['test_data_loader'])

    print('[INFO] Predicting...')
    infer = SRInfer(model, config['trainer'])
    infer.predict(dl.get_test_data(),
                  testset=config['test_data_loader']['test_path'],
                  upscale=config['test_data_loader']['upscale'])
    def train_test(self):

        #load model if model exists weigh initialization
        if self.config.load_model is True:
            self.model.load_model()
        else:
            self.model.weight_init()
            print('weight is initilized')

        # optimizer
        self.momentum = 0.9
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr, weight_decay=1.0)

        # scheduler = lr_scheduler.StepLR(self.optimizer, step_size=70, gamma=0.01)
        scheduler = lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9)

        # loss function
        if self.config.gpu_mode:
            self.model.cuda()
            self.MSE_loss = nn.MSELoss().cuda()
        else:
            self.MSE_loss = nn.MSELoss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.model)
        print('----------------------------------------------')

        # load dataset
        train_data_loader = self.data_train
        test_data_loader = self.data_test

        ################# Train #################
        print('Training is started.')
        avg_loss = []
        avg_loss_test = []
        avg_loss_log_test = []
        step = 0

        es = EarlyStopping(patience=8)

        self.model.train() # It just sets the training mode.model.eval() to set testing mode
        for epoch in range(self.config.num_epochs):
            scheduler.step()
            epoch_loss = 0
            for iter, (input, target, _) in enumerate(train_data_loader):
                # input data (low resolution image)
                if self.config.gpu_mode:
                    x_ = Variable(input.cuda())
                    y_ = Variable(target.cuda())
                else:
                    x_ = Variable(input)
                    y_ = Variable(target)

                # update network
                self.optimizer.zero_grad()
                model_out = self.model(x_)
                loss = torch.sqrt(self.MSE_loss(model_out, y_))
                loss.backward() # 结果得到是tensor
                self.optimizer.step()

                # log
                epoch_loss += loss
                print("Epoch: [%2d] [%4d/%4d] loss: %.8f" % ((epoch + 1), (iter + 1), len(train_data_loader), loss))

                # tensorboard logging
                self.logger.scalar_summary('loss', loss, step + 1)
                step += 1

            # avg. loss per epoch
            avg_loss.append((epoch_loss / len(train_data_loader)).detach().cpu().numpy())

            if (epoch + 1) % self.config.save_epochs == 0:
                self.model.save_model(epoch + 1)

            # caculate test loss
            with torch.no_grad():
                loss_test, loss_log_test = self.test(test_data_loader)

            epoch_loss_test = loss_test / len(test_data_loader)
            epoch_loss_log_test = loss_log_test / len(test_data_loader)

            avg_loss_test.append(float(epoch_loss_test))
            avg_loss_log_test.append(float(epoch_loss_log_test))

            # if es.step(float(epoch_loss_test)):
            #     self.model.save_model(epoch=None)
            #     print('Early stop at %2d epoch' % (epoch + 1))
            #     break

        # Plot avg. loss
        utils.plot_loss(self.config, [avg_loss, avg_loss_log_test])
        utils.plot_loss(self.config, [avg_loss_test], origin=True)

        print('avg_loss: ', avg_loss[-1])
        print('avg_loss_log with original data: ', avg_loss_test[-1])
        print('avg_loss_log with log data: ', avg_loss_log_test[-1])
        print("Training and test is finished.")

        # Save final trained parameters of model
        self.model.save_model(epoch=None)
Beispiel #4
0
    def train(self):

        #load model if model exists weigh initialization
        if self.config.load_model is True:
            self.model.load_model()
        else:
            self.model.weight_init()
            print('weight is initilized')

        # optimizer
        self.momentum = 0.9
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.config.lr,
                                   momentum=self.momentum)

        # loss function
        if self.config.gpu_mode:
            self.model.cuda()
            self.MSE_loss = nn.MSELoss().cuda()
        else:
            self.MSE_loss = nn.MSELoss()

        print('---------- Networks architecture -------------')
        utils.print_network(self.model)
        print('----------------------------------------------')

        # load dataset
        train_data_loader = self.data

        ################# Train #################
        print('Training is started.')
        avg_loss = []
        step = 0

        self.model.train(
        )  # It just sets the training mode.model.eval() to set testing mode
        for epoch in range(self.config.num_epochs):

            epoch_loss = 0
            for iter, (input, _, target) in enumerate(train_data_loader):
                # input data (low resolution image)
                if self.config.gpu_mode:
                    x_ = Variable(input.cuda())
                    y_ = Variable(target.cuda())
                else:
                    x_ = Variable(input)
                    y_ = Variable(target)

                # update network
                self.optimizer.zero_grad()
                model_out = self.model(x_)
                loss = torch.sqrt(self.MSE_loss(model_out, y_))
                loss.backward()
                self.optimizer.step()

                # log
                epoch_loss += loss
                print("Epoch: [%2d] [%4d/%4d] loss: %.8f" %
                      ((epoch + 1), (iter + 1), len(train_data_loader), loss))

                # tensorboard logging
                # self.logger.scalar_summary('loss', loss, step + 1)
                step += 1

            # avg. loss per epoch
            avg_loss.append(epoch_loss / len(train_data_loader))

            if (epoch + 1) % self.config.save_epochs == 0:
                self.model.save_model(epoch + 1)

        # Plot avg. loss
        utils.plot_loss(self.config, [avg_loss])
        print('avg_loss: ', avg_loss[-1])
        print("Training is finished.")

        # Save final trained parameters of model
        self.model.save_model(epoch=None)
Beispiel #5
0
def train_main():
    """
    训练模型

    :return:
    """
    print('[INFO] Retrieving configuration...')
    parser = None
    args = None
    config = None
    # TODO: modify the path of best checkpoint after training
    try:
        args, parser = get_train_args()
        # args.config = 'experiments/stacksr lr=1e-3 28init 2x/stacksr.json'
        # args.config = 'configs/lapsrn.json'
        config = process_config(args.config)
        shutil.copy2(args.config,
                     os.path.join("experiments", config['exp_name']))
    except Exception as e:
        print('[Exception] Configuration is invalid, %s' % e)
        if parser:
            parser.print_help()
        print(
            '[Exception] Refer to: python main_train.py -c configs/rrgun.json')
        exit(0)
    # config = process_config('configs/train_textcnn.json')
    # np.random.seed(config.seed)  # 固定随机数

    print('[INFO] Loading data...')
    torch.backends.cudnn.benchmark = True
    dl = ImageLoader(config=config['train_data_loader'])

    print('[INFO] Building graph...')
    try:
        Net = importlib.import_module('models.{}'.format(
            config['trainer']['net'])).Net
        model = Net(config=config['model'])
        if torch.cuda.device_count() > 1:
            model = torch.nn.DataParallel(model)
        print_network(model)
    except ModuleNotFoundError:
        raise RuntimeWarning(
            "The model name is incorrect or does not exist! Please check!")

    print('[INFO] Training the graph...')
    trainer = SRTrainer(
        model=model,
        data={
            'train': dl.get_hdf5_sample_data(),
            'test': dl.get_test_data()
        },
        # data={'train': dl.get_hdf5_data(), 'test': dl.get_test_data()},
        config=config['trainer'])

    highest_score, best_model = trainer.train()
    with open(
            os.path.join("experiments", config['exp_name'], 'performance.txt'),
            'w') as f:
        f.writelines(str(highest_score))

    json_file = os.path.join("./experiments", config['exp_name'],
                             os.path.basename(args.config))
    with open(json_file, 'w') as file_out:
        config['trainer']['checkpoint'] = best_model
        json.dump(config, file_out, indent=2)

    print('[INFO] Training is completed.')
def train(args):
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # -------------------- Load data ----------------------------------
    transform = transforms.Compose([
        Rescale((224, 224)),
        ColorJitter(0.5, 0.5, 0.5, 0.3, 0.5),
        ToTensor(),
    ])
    dataset = FaceDataset(args.train_data, True, transform=transform)
    data_loader = DataLoader(dataset,
                             shuffle=True,
                             batch_size=args.batch_size,
                             drop_last=True,
                             num_workers=4)

    # ----------------- Define networks ---------------------------------
    Gnet = SketchNet(in_channels=3, out_channels=1, norm_type=args.Gnorm)
    Dnet = DNet(norm_type=args.Dnorm)
    vgg19_model = vgg19(args.vgg19_weight)

    gpu_ids = [int(x) for x in args.gpus.split(',')]
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    Gnet.to(device)
    Dnet.to(device)
    if len(gpu_ids) > 0:
        Gnet = nn.DataParallel(Gnet, device_ids=gpu_ids)
        Dnet = nn.DataParallel(Dnet, device_ids=gpu_ids)
        vgg19_model = nn.DataParallel(vgg19_model, device_ids=gpu_ids)

    Gnet.train()
    Dnet.train()

    if args.resume:
        weights = glob(os.path.join(args.save_weight_path, '*-*.pth'))
        weight_path = sorted(weights)[-1][:-5]
        Gnet.load_state_dict(torch.load(weight_path + 'G.pth'))
        Dnet.load_state_dict(torch.load(weight_path + 'D.pth'))

    # ---------------- set optimizer and learning rate ---------------------
    args.epochs = np.ceil(args.epochs * 1000 / len(dataset))
    args.epochs = max(int(args.epochs), 4)
    ms = [int(1. / 4 * args.epochs), int(2. / 4 * args.epochs)]

    # optim_G = torch.optim.SGD(Gnet.parameters(), args.lr, momentum=0.9, weight_decay=1e-4)
    optim_G = torch.optim.AdamW(Gnet.parameters(), args.glr)
    optim_D = torch.optim.AdamW(Dnet.parameters(), args.dlr)
    scheduler_G = MultiStepLR(optim_G, milestones=ms, gamma=0.1)
    scheduler_D = MultiStepLR(optim_D, milestones=ms, gamma=0.1)
    mse_crit = nn.MSELoss()

    # ---------------------- Define reference styles and feature loss layers ----------
    if args.train_style == 'cufs':
        ref_style_dataset = ['CUHK_student', 'AR', 'XM2VTS']
        ref_feature = './data/cufs_feature_dataset.pth'
        ref_img_list = './data/cufs_reference_img_list.txt'
    elif args.train_style == 'cufsf':
        ref_style_dataset = ['CUFSF']
        ref_feature = './data/cufsf_feature_dataset.pth'
        ref_img_list = './data/cufsf_reference_img_list.txt'
    else:
        assert 1 == 0, 'Train style {} not supported.'.format(args.train_style)

    vgg_feature_layers = ['r11', 'r21', 'r31', 'r41', 'r51']
    feature_loss_layers = list(
        itertools.compress(vgg_feature_layers, args.flayers))
    utils.print_network(Gnet)
    utils.print_network(Dnet)
    print("Initialized")
    log = logger.Logger(args.save_weight_path)

    for e in range(args.epochs):
        sample_count = 0
        for batch_idx, batch_data in enumerate(data_loader):
            # ---------------- Load data -------------------
            start = time()
            train_img, train_img_org = [
                utils.tensorToVar(x) for x in batch_data
            ]
            topk_sketch_img, topk_photo_img = search_dataset.find_photo_sketch_batch(
                train_img_org,
                ref_feature,
                ref_img_list,
                vgg19_model,
                dataset_filter=ref_style_dataset,
                topk=args.topk)
            random_real_sketch = search_dataset.get_real_sketch_batch(
                train_img.size(0),
                ref_img_list,
                dataset_filter=ref_style_dataset)
            end = time()
            data_time = end - start
            sample_count += train_img.size(0)

            # ---------------- Model forward -------------------
            start = time()
            fake_sketch = Gnet(train_img)
            fake_score = Dnet(fake_sketch)
            real_score = Dnet(random_real_sketch)

            real_label = torch.ones_like(fake_score)
            fake_label = torch.zeros_like(fake_score)

            # ----------------- Calculate loss and backward -------------------
            train_img_org_vgg = img_process.subtract_mean_batch(
                train_img_org, 'face')
            topk_sketch_img_vgg = img_process.subtract_mean_batch(
                topk_sketch_img, 'sketch')
            topk_photo_img_vgg = img_process.subtract_mean_batch(
                topk_photo_img, 'face')
            fake_sketch_vgg = img_process.subtract_mean_batch(
                fake_sketch.expand_as(train_img_org), 'sketch', args.meanshift)

            style_loss = loss.feature_mrf_loss_func(
                fake_sketch_vgg,
                topk_sketch_img_vgg,
                vgg19_model,
                feature_loss_layers, [train_img_org_vgg, topk_photo_img_vgg],
                topk=args.topk)

            tv_loss = loss.total_variation(fake_sketch)

            # GAN Loss
            adv_loss = mse_crit(fake_score, real_label) * args.weight[1]
            tv_loss = tv_loss * args.weight[2]
            loss_G = style_loss * args.weight[0] + adv_loss + tv_loss
            loss_D = 0.5 * mse_crit(fake_score, fake_label) + 0.5 * mse_crit(
                real_score, real_label)

            # Update parameters
            optim_D.zero_grad()
            loss_D.backward(retain_graph=True)
            optim_D.step()

            optim_G.zero_grad()
            loss_G.backward()
            optim_G.step()

            scheduler_G.step()
            scheduler_D.step()

            end = time()
            train_time = end - start

            # ----------------- Print result and log the output -------------------
            log.iterLogUpdate(loss_G.item())
            if batch_idx % 100 == 0:
                log.draw_loss_curve()

            msg = "{:%Y-%m-%d %H:%M:%S}\tEpoch [{:03d}/{:03d}]\tBatch [{:03d}/{:03d}]\tData: {:.2f}  Train: {" \
                  ":.2f}\tLoss: G-{:.4f}, Adv-{:.4f}, tv-{:.4f}, D-{:.4f}".format(
                datetime.now(),
                e, args.epochs, sample_count, len(dataset),
                data_time, train_time, *[x for x in [loss_G.item(), adv_loss, tv_loss, loss_D]])
            print(msg)
            log_file = open(os.path.join(args.save_weight_path, 'log.txt'),
                            'a+')
            log_file.write(msg + '\n')
            log_file.close()

        save_weight_name = "epochs-{:03d}-".format(e)
        G_cpu_model = copy.deepcopy(Gnet).cpu()
        D_cpu_model = copy.deepcopy(Dnet).cpu()
        torch.save(
            G_cpu_model.state_dict(),
            os.path.join(args.save_weight_path, save_weight_name + 'G.pth'))
        torch.save(
            D_cpu_model.state_dict(),
            os.path.join(args.save_weight_path, save_weight_name + 'D.pth'))
Beispiel #7
0
def train_main():
    """
    训练模型

    :return:
    """
    print('[INFO] Retrieving configuration...')
    # import torch
    # print(torch.__version__)
    parser = None
    args = None
    config = None
    # TODO: modify the path of best checkpoint after training
    try:
        args, parser = get_train_args()
        # args.config = 'experiments/stacksr lr=1e-3 28init 3x/stacksr.json'
        # args.config = 'configs/lapsrn.json'
        config = process_config(args.config)
        shutil.copy2(args.config, os.path.join("experiments", config['exp_name']))
    except Exception as e:
        print('[Exception] Configuration is invalid, %s' % e)
        if parser:
            parser.print_help()
        print('[Exception] Refer to: python main_train.py -c configs/wmcnn.json')
        exit(0)
    # config = process_config('configs/train_textcnn.json')
    # np.random.seed(config.seed)  # 固定随机数

    print('[INFO] Loading data...')
    dl = ImageLoader(config=config['train_data_loader'])

    print('[INFO] Building graph...')
    try:
        Net = importlib.import_module('models.{}'.format(config['trainer']['net'])).Net
        model = Net(config=config['model'])
        print_network(model)
    except ModuleNotFoundError:
        raise RuntimeWarning("The model name is incorrect or does not exist! Please check!")

    # if config['distributed']:
    #     os.environ['MASTER_ADDR'] = '127.0.0.1'
    #     os.environ['MASTER_PORT'] = '29500'
    #     torch.distributed.init_process_group(backend='nccl', world_size=4, rank=2)

    print('[INFO] Training the graph...')
    # trainer = SRTrainer(
    #     model=model,
    #     data={'train': dl.get_train_data(), 'test': dl.get_test_data()},
    #     config=config['trainer'])
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    trainer = SRTrainer(
        model=model,
        data={'train': dl.get_wmcnn_hdf5_data(), 'test': dl.get_test_data()},
        # data={'train': dl.get_hdf5_data(), 'test': dl.get_test_data()},
        config=config['trainer'])

    highest_score, best_model = trainer.train()
    with open(os.path.join("experiments", config['exp_name'], 'performance.txt'), 'w') as f:
        f.writelines(str(highest_score))

    json_file = os.path.join("./experiments", config['exp_name'], os.path.basename(args.config))
    with open(json_file, 'w') as file_out:
        config['trainer']['checkpoint'] = best_model
        json.dump(config, file_out, indent=2)

    print('[INFO] Training is completed.')
Beispiel #8
0
    def train_test(self):

        # load model if model exists weigh initialization
        if self.config.load_model is True:
            self.load_model()
            # self.load_spec_model()
        else:
            self.weight_init()

        # loss function
        if self.config.gpu_mode:
            self.model.cuda()
            self.MSE_loss = nn.MSELoss().cuda()  # 默认算出来是对每个sample的平均
        else:
            self.MSE_loss = nn.MSELoss()

        # optimizer
        self.momentum = 0.9
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.config.lr,
                                    weight_decay=1.0)

        scheduler = lr_scheduler.StepLR(self.optimizer,
                                        step_size=100,
                                        gamma=0.1)
        # scheduler = lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9)

        print('---------- Networks architecture -------------')
        utils.print_network(self.model)
        print('----------------------------------------------')

        # load dataset
        train_data_loader = self.data_train
        test_data_loader = self.data_test

        ################# Train #################
        print('Training is started.')
        avg_loss = []
        avg_loss_test = []
        avg_loss_log_test = []
        # step = 0

        es = EarlyStopping(patience=50)

        self.model.train(
        )  # It just sets the training mode.model.eval() to set testing mode
        for epoch in range(self.config.num_epochs):
            scheduler.step()
            epoch_loss = 0
            for iter, (input, target,
                       groundtruth) in enumerate(train_data_loader):
                # input data (low resolution image)
                if self.config.gpu_mode:
                    x_ = Variable(input.cuda())
                    y_ = Variable(groundtruth.cuda())
                else:
                    x_ = Variable(input)
                    y_ = Variable(groundtruth)

                # scale是10的话,x_.shape is (batchsize, 1, 300)
                # scale是100的话,x_.shape is (batchsize, 1, 30)

                # update network
                self.optimizer.zero_grad()
                model_out = self.model(x_)
                loss = torch.sqrt(self.MSE_loss(model_out, y_))
                loss.backward()  # 结果得到是tensor
                self.optimizer.step()
                epoch_loss += loss

                # 注意:len(train_data_loader) 是 # train samples/batchsize,有多少个train_data_loader即需要iter多少个batch
                print("Epoch: [%2d] [%4d/%4d] loss: %.8f" %
                      ((epoch + 1), (iter + 1), len(train_data_loader), loss))

                # tensorboard logging
                # self.logger.scalar_summary('loss', loss, step + 1)
                # step += 1

            # avg. loss per epoch
            # 如果除以len(train_data_loader)是平均每一个sample的loss
            avg_loss.append(
                (epoch_loss / len(train_data_loader)).detach().cpu().numpy())

            if (epoch + 1) % self.config.save_epochs == 0:
                self.save_model(epoch + 1)

            # caculate test loss
            with torch.no_grad():
                loss_test, _ = self.test(test_data_loader)

            epoch_loss_test = loss_test / len(test_data_loader)

            avg_loss_test.append(float(epoch_loss_test))

            #nni.report_intermediate_result(
            #    {"default": float(epoch_loss_test), "epoch_loss": float(avg_loss[-1])})

            # if es.step(avg_loss[-1]):
            #     self.save_model(epoch=None)
            #     print('Early stop at %2d epoch' % (epoch + 1))
            #     break

            if epoch % 10 == 0 and epoch != 0:
                utils.plot_loss(self.config, [avg_loss, avg_loss_test])

        #nni.report_final_result({"default": float(avg_loss_test[-1]), "epoch_loss": float(avg_loss[-1])})

        # Plot avg. loss
        utils.plot_loss(self.config, [avg_loss, avg_loss_test])

        with torch.no_grad():
            _, dtw_test = self.test(test_data_loader, True)
            avg_dtw_test = dtw_test / len(test_data_loader)

        print('avg_loss: ', avg_loss[-1])
        print('avg_loss_log with original data: ', avg_loss_test[-1])
        print('dtw with original data: ', avg_dtw_test)
        print("Training and test is finished.")

        # Save final trained parameters of model
        self.save_model(epoch=None)