Exemple #1
0
    def __init__(self,
                 cuda,
                 model,
                 optimizer,
                 loss_fcn,
                 scheduler,
                 train_loader,
                 val_loader,
                 out,
                 max_iter,
                 logFile,
                 size_average=False,
                 interval_validate=None):
        """

        :param cuda:
        :param model:
        :param optimizer:
                scheduler:学习率调整策略
        :param loss_fcn:
        :param train_loader:
        :param val_loader:
        :param out: 字符串,模型输出的路径,用于保存模型
        :param max_iter:
        :param size_average:
        :param interval_validate:
        """

        self.cuda = cuda

        self.model = model
        self.optim = optimizer
        self.scheduler = scheduler
        self.loss_fcn = loss_fcn

        self.train_loader = train_loader
        self.val_loader = val_loader

        self.out = out
        self.epoch = 0
        self.iteration = 0
        self.max_iter = max_iter
        self.best_mean_iu = 0
        self.viusal = utils.Visualizer()

        self.valid_loss = 0
        self.valid_acc = 0
        self.valMeanIu = 0
        self.train_loss = 0
        self.train_acc = 0
        self.trainMeanIu = 0

        self.best_mean_iu = 0
        self.logFile = logFile

        if interval_validate is None:
            self.interval_validate = len(self.train_loader)
        else:
            self.interval_validate = interval_validate
def train(**kwargs):

    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    vis = utils.Visualizer(opt.env)

    # 数据加载
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    loader = get_loader(batch_size=1,
                        data_path=opt.data_path,
                        img_shape=opt.img_shape,
                        transform=transform)

    # 转换网络
    transformer = TransformerNet().cuda()
    # transformer.load_state_dict(t.load(opt.model_path, ))

    #if opt.model_path:
    #    transformer.load_state_dict(t.load(opt.model_path,map_location=lambda _s, _: _s))

    # 损失网络 Vgg16
    vgg = Vgg19().eval()
    depthnet = HourGlass().eval()
    depthnet.load_state_dict(t.load(opt.depth_path))
    # print(vgg)
    # BASNET
    net = BASNet(3, 1).cuda()
    net.load_state_dict(torch.load('./basnet.pth'))
    net.eval()

    # 优化器
    optimizer = t.optim.Adam(transformer.parameters(), lr=opt.lr)

    # 获取风格图片的数据

    img = Image.open(opt.style_path)
    img = img.resize(opt.img_shape)
    img = transform(img).float()
    style = Variable(img, requires_grad=True).unsqueeze(0)
    vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1))

    if opt.use_gpu:
        transformer.cuda()
        style = style.cuda()
        vgg.cuda()
        depthnet.cuda()

    # 风格图片的gram矩阵
    style_v = Variable(style, volatile=True)
    features_style = vgg(style_v)
    gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style]

    # 损失统计
    style_meter = tnt.meter.AverageValueMeter()
    content_meter = tnt.meter.AverageValueMeter()
    temporal_meter = tnt.meter.AverageValueMeter()
    long_temporal_meter = tnt.meter.AverageValueMeter()
    depth_meter = tnt.meter.AverageValueMeter()
    # tv_meter = tnt.meter.AverageValueMeter()
    kk = 0
    for count in range(opt.epoch):
        print('Training Start!!')
        content_meter.reset()
        style_meter.reset()
        temporal_meter.reset()
        long_temporal_meter.reset()
        depth_meter.reset()
        # tv_meter.reset()
        for step, frames in enumerate(loader):
            for i in tqdm.tqdm(range(1, len(frames))):
                kk += 1
                if (kk + 1) % 3000 == 0:
                    print('LR had changed')
                    for param in optimizer.param_groups:
                        param['lr'] = max(param['lr'] / 1.2, 1e-4)

                optimizer.zero_grad()
                x_t = frames[i].cuda()

                x_t1 = frames[i - 1].cuda()

                h_xt = transformer(x_t)

                h_xt1 = transformer(x_t1)
                depth_x_t = depthnet(x_t)
                depth_x_t1 = depthnet(x_t1)
                depth_h_xt = depthnet(h_xt)
                depth_h_xt1 = depthnet(h_xt1)

                img1 = h_xt1.data.cpu().squeeze(0).numpy().transpose(1, 2, 0)
                img2 = h_xt.data.cpu().squeeze(0).numpy().transpose(1, 2, 0)

                flow, mask = opticalflow(img1, img2)

                d1, d2, d3, d4, d5, d6, d7, d8 = net(x_t)
                a1pha1 = PROCESS(d1, x_t)
                del d1, d2, d3, d4, d5, d6, d7, d8

                d1, d2, d3, d4, d5, d6, d7, d8 = net(x_t1)
                a1pha2 = PROCESS(d1, x_t1)
                del d1, d2, d3, d4, d5, d6, d7, d8

                h_xt_features = vgg(h_xt)
                h_xt1_features = vgg(h_xt1)
                x_xt_features = vgg(a1pha1)
                x_xt1_features = vgg(a1pha2)

                # ContentLoss, conv3_2
                content_t = F.mse_loss(x_xt_features[2], h_xt_features[2])
                content_t1 = F.mse_loss(x_xt1_features[2], h_xt1_features[2])
                content_loss = opt.content_weight * (content_t1 + content_t)
                # StyleLoss
                style_t = 0
                style_t1 = 0
                for ft_y, gm_s in zip(h_xt_features, gram_style):
                    gram_y = gram_matrix(ft_y)
                    style_t += F.mse_loss(gram_y, gm_s.expand_as(gram_y))
                for ft_y, gm_s in zip(h_xt1_features, gram_style):
                    gram_y = gram_matrix(ft_y)
                    style_t1 += F.mse_loss(gram_y, gm_s.expand_as(gram_y))

                style_loss = opt.style_weight * (style_t1 + style_t)

                # # depth loss
                depth_loss1 = F.mse_loss(depth_h_xt, depth_x_t)
                depth_loss2 = F.mse_loss(depth_h_xt1, depth_x_t1)
                depth_loss = opt.depth_weight * (depth_loss1 + depth_loss2)
                # # TVLoss
                # print(type(s_hxt[layer]),s_hxt[layer].size())
                # tv_loss = TVLoss(h_xt)

                #Long-temprol loss
                if (i - 1) % opt.sample_frames == 0:
                    frames0 = h_xt1.cpu()
                    long_img1 = frames0.data.cpu().squeeze(
                        0).numpy().transpose(1, 2, 0)
                # long_img2 = h_xt.data.cpu().squeeze(0).numpy().transpose(1,2,0)
                long_flow, long_mask = opticalflow(long_img1, img2)

                # Optical flow

                flow = torch.from_numpy(flow).permute(2, 0, 1).unsqueeze(0).to(
                    torch.float32)
                long_flow = torch.from_numpy(long_flow).permute(
                    2, 0, 1).unsqueeze(0).to(torch.float32)

                # print(flow.size())
                # print(h_xt1.size())
                warped = warp(h_xt1.cpu().permute(0, 2, 3, 1), flow,
                              opt.img_shape[1], opt.img_shape[0]).cuda()
                long_warped = warp(frames0.cpu().permute(0, 2, 3,
                                                         1), long_flow,
                                   opt.img_shape[1], opt.img_shape[0]).cuda()
                long_temporal_loss = F.mse_loss(
                    h_xt, long_mask * long_warped.permute(0, 3, 1, 2))
                # print(warped.size())
                # tv.utils.save_image((warped.permute(0,3,1,2).data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1),
                #                     './warped.jpg')
                mask = mask.transpose(2, 0, 1)
                mask = torch.from_numpy(mask).cuda().to(torch.float32)
                # print(mask.shape)
                temporal_loss = F.mse_loss(h_xt,
                                           mask * warped.permute(0, 3, 1, 2))

                temporal_loss = opt.temporal_weight * temporal_loss
                long_temporal_loss = opt.long_temporal_weight * long_temporal_loss

                # Spatial Loss
                spatial_loss = content_loss + style_loss

                Loss = spatial_loss + depth_loss + temporal_loss + long_temporal_loss

                Loss.backward(retain_graph=True)
                optimizer.step()
                content_meter.add(float(content_loss.data))
                style_meter.add(float(style_loss.data))
                temporal_meter.add(float(temporal_loss.data))
                long_temporal_meter.add(float(long_temporal_loss.data))
                depth_meter.add(float(depth_loss.data))
                # tv_meter.add(float(tv_loss.data))

                vis.plot('temporal_loss', temporal_meter.value()[0])
                vis.plot('long_temporal_loss', long_temporal_meter.value()[0])
                vis.plot('content_loss', content_meter.value()[0])
                vis.plot('style_loss', style_meter.value()[0])
                vis.plot('depth_loss', depth_meter.value()[0])
                # vis.plot('tv_loss', tv_meter.value()[0])

                if i % 10 == 0:
                    vis.img('input(t)',
                            (x_t.data.cpu()[0] * 0.225 + 0.45).clamp(min=0,
                                                                     max=1))
                    vis.img('output(t)',
                            (h_xt.data.cpu()[0] * 0.225 + 0.45).clamp(min=0,
                                                                      max=1))
                    vis.img('output(t-1)',
                            (h_xt1.data.cpu()[0] * 0.225 + 0.45).clamp(min=0,
                                                                       max=1))
                    print(
                        'epoch{},content loss:{},style loss:{},temporal loss:{},long temporal loss:{},depth loss:{},total loss{}'
                        .format(count, content_loss, style_loss, temporal_loss,
                                long_temporal_loss, depth_loss, Loss))
                    # print('epoch{},content loss:{},style loss:{},depth loss:{},total loss{}'
                    #       .format(count,content_loss, style_loss,depth_loss,Loss))

            vis.save([opt.env])
            torch.save(transformer.state_dict(), opt.model_path)
Exemple #3
0
def train(**kwargs):
    '''
    para : 
        opts:the para from your 
    return:
        the train model 
    '''
    opts.parse_kwargs(**kwargs)
    print "train begin!"
    viz = utils.Visualizer(opts.env)
    #model
    our_model = getattr(models, opts.model)(opts)
    our_model.load_state_dict(
        torch.load(
            "./check_point/<class 'models.RDN.rdn'>_0823_17:59:06.path"))
    #step2 data_set
    data_size, data_loader = data.dataset.data_loader(opts)
    #step 3 criterion optimer
    #l1
    criterion = getattr(models, opts.loss_function)()
    print data_size
    optimer = torch.optim.Adam(our_model.parameters(), lr=opts.lr)
    sche = torch.optim.lr_scheduler.MultiStepLR(optimer,
                                                milestones=opts.stone,
                                                gamma=0.5)
    #step 4 device
    best_loss = 1e11
    device = torch.device("cuda:0" if opts.use_gpu else "cpu")
    our_model.to(device)
    since = time.time()
    plot_line_win = None
    plot_img_win = None
    plot_label_win = None
    plot_test_img_win = None
    plot_test_label_win = None
    #step 5 trainning
    for epoch in range(10, opts.max_epoch):
        print("*****************" * 10)
        print("epoch {}/{}".format(epoch, opts.max_epoch))
        our_model.train()
        epoch_loss = 0.0
        for _, datas in enumerate(data_loader['train'], 1):
            inputs = datas['input'].to(device)
            labels = datas['label'].to(device)
            optimer.zero_grad()
            with torch.set_grad_enabled(True):
                outputs = our_model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimer.step()
                show_outputs = copy.deepcopy(outputs.detach())
                show_outputs[show_outputs > 1.0] = 1.0
                show_outputs[show_outputs < 0.0] = 0.0
                plot_img_win = viz.images(show_outputs,
                                          win=plot_img_win,
                                          title='img_test')
                plot_label_win = viz.images(labels,
                                            win=plot_label_win,
                                            title='img_label')
            epoch_loss += loss.item() * inputs.shape[0]
            if _ % 40 == 0:
                #draw loss_line
                print "iteration :{}".format(_)
                times = time.time() - since
                times = 1.0 * times / 60
                x = times
                train_loss = (epoch_loss * 1.0 / (opts.batch_size * _))
                plot_test_img_win, plot_test_label_win, test_loss = val(
                    our_model, data_loader['val'], data_size['val'], device,
                    criterion, plot_test_img_win, plot_test_label_win, viz)
                x = np.column_stack((np.asarray(x), np.asarray(x)))
                y = np.column_stack(
                    (np.asarray(train_loss), np.asarray(test_loss)))
                plot_line_win = viz.plot(x, y, plot_line_win, 'Loss',
                                         'train_loss', 'val_loss')
                if (test_loss < best_loss):
                    best_loss = test_loss
                    best_model = our_model.state_dict()
                    our_model.save()
        epoch_loss = epoch_loss / data_size['train']
        plot_test_img_win, plot_test_label_win, test_loss = val(
            our_model, data_loader['val'], data_size['val'], device, criterion,
            plot_test_img_win, plot_test_label_win, viz)
        print("{} :train_loss{:.8f},val_loss{:.8f}".format(
            'lossing', epoch_loss, test_loss))
        sche.step()
    print("best_loss for val{:.8f}".format(best_loss))
    torch.save(opts.load_model_path, best_model)
def train(**kwargs):
    opt = Config()
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)
    # 可视化操作
    vis = utils.Visualizer(opt.env)

    # 数据加载
    transfroms = tv.transforms.Compose([
        # 将输入的`PIL.Image`重新改变大小成给定的`size`  `size`是最小边的边长
        tv.transforms.Scale(opt.image_size),
        tv.transforms.CenterCrop(opt.image_size),
        # 转为0-1之间
        tv.transforms.ToTensor(),
        # 转为0-255之间
        tv.transforms.Lambda(lambda x: x * 255)
    ])
    # 封装数据集,并进行数据转化
    dataset = tv.datasets.ImageFolder(opt.data_root, transfroms)
    # 数据加载器
    dataloader = data.DataLoader(dataset, opt.batch_size)

    # 转换网络
    transformer = TransformerNet()
    if opt.model_path:
        transformer.load_state_dict(
            t.load(opt.model_path, map_location=lambda _s, _: _s))

    # 损失网络 Vgg16  置为预测模式
    vgg = Vgg16().eval()

    # 优化器(需要训练 风格转化网络的参数)
    optimizer = t.optim.Adam(transformer.parameters(), opt.lr)

    # 获取风格图片的数据  形状 1*c*h*w, 分布 -2~2(使用预设)
    style = utils.get_style_data(opt.style_path)
    # 可视化风格图:-2 到2 转化为0-1
    vis.img('style', (style[0] * 0.225 + 0.45).clamp(min=0, max=1))

    if opt.use_gpu:
        transformer.cuda()
        style = style.cuda()
        vgg.cuda()

    # 风格图片的gram矩阵
    style_v = Variable(style, volatile=True)
    # 得到vgg中间四层的结果(用以跟输入图片的输出四层比较,计算损失)
    features_style = vgg(style_v)
    # gram_matrix:输入 b,c,h,w  输出 b,c,c 计算gram矩阵(四层的gram矩阵)
    gram_style = [Variable(utils.gram_matrix(y.data)) for y in features_style]

    # 损失统计  仪表盘 用以可视化(每个epoch中的所有batch平均损失)
    # 风格损失
    style_meter = tnt.meter.AverageValueMeter()
    # 内容损失
    content_meter = tnt.meter.AverageValueMeter()

    for epoch in range(opt.epoches):
        # 仪表盘清零
        content_meter.reset()
        style_meter.reset()

        for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            optimizer.zero_grad()
            if opt.use_gpu:
                x = x.cuda()
            # x为输入的真实图像
            x = Variable(x)
            # 风格转换后的预测图像为y
            y = transformer(x)
            # 输入: b, ch, h, w   0~255
            # 输出: b, ch, h, w    - 2~2
            # 将x,y范围从0-255转化为-2-2
            y = utils.normalize_batch(y)
            x = utils.normalize_batch(x)
            # 返回 四个中间层的特征输出
            features_y = vgg(y)
            features_x = vgg(x)

            # content loss内容损失 只计算relu2_2之间的损失   预测图片与原图在relu2_2中间层比较,计算损失
            # content_weight内容的权重     mse_loss均方误差损失函数
            content_loss = opt.content_weight * F.mse_loss(
                features_y.relu2_2, features_x.relu2_2)

            # style loss
            style_loss = 0.
            # 风格损失取四层的均方误差损失总和
            # features_y:预测图像的四层输出内容    gram_style:风格图像的四层输出的gram_matrix
            # zip将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表
            for ft_y, gm_s in zip(features_y, gram_style):
                # 计算预测图像的四层输出内容的gram_matrix
                gram_y = utils.gram_matrix(ft_y)
                style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y))
            style_loss *= opt.style_weight
            # 总损失=风格损失+内容损失
            total_loss = content_loss + style_loss
            # 反向传播
            total_loss.backward()
            # 更新参数
            optimizer.step()

            # 损失平滑  将损失加入仪表盘,以便可视化损失过程
            content_meter.add(content_loss.data[0])
            style_meter.add(style_loss.data[0])
            # 每plot_every次前向传播后可视化
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # 可视化
                vis.plot('content_loss', content_meter.value()[0])
                vis.plot('style_loss', style_meter.value()[0])
                # 因为x和y经过标准化处理(utils.normalize_batch),所以需要将它们还原
                #x,y为[-2,2]还原回[0,1]
                vis.img('output',
                        (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1))
                vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0,
                                                                        max=1))

        # 每次epoch完毕后保存visdom和模型
        vis.save([opt.env])
        t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)
Exemple #5
0
def train(**kwargs):
    opt = Config()
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    vis = utils.Visualizer(opt.env)

    # 数据加载
    transfroms = tv.transforms.Compose([
        tv.transforms.Resize(opt.image_size),
        tv.transforms.CenterCrop(opt.image_size),
        tv.transforms.ToTensor(),
        tv.transforms.Lambda(lambda x: x * 255)
    ])
    dataset = tv.datasets.ImageFolder(opt.data_root, transfroms)
    dataloader = data.DataLoader(dataset, opt.batch_size)

    # 转换网络
    transformer = TransformerNet()
    if opt.model_path:
        transformer.load_state_dict(
            t.load(opt.model_path, map_location=lambda _s, _: _s))
    transformer.to(device)

    # 损失网络 Vgg16
    vgg = Vgg16().eval()
    vgg.to(device)
    for param in vgg.parameters():
        param.requires_grad = False

    # 优化器
    optimizer = t.optim.Adam(transformer.parameters(), opt.lr)

    # 获取风格图片的数据
    style = utils.get_style_data(opt.style_path)
    vis.img('style', (style.data[0] * 0.225 + 0.45).clamp(min=0, max=1))
    style = style.to(device)

    # 风格图片的gram矩阵
    with t.no_grad():
        features_style = vgg(style)
        gram_style = [utils.gram_matrix(y) for y in features_style]

    # 损失统计
    style_meter = tnt.meter.AverageValueMeter()
    content_meter = tnt.meter.AverageValueMeter()

    for epoch in range(opt.epoches):
        content_meter.reset()
        style_meter.reset()

        for ii, (x, _) in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            optimizer.zero_grad()
            x = x.to(device)
            y = transformer(x)
            y = utils.normalize_batch(y)
            x = utils.normalize_batch(x)
            features_y = vgg(y)
            features_x = vgg(x)

            # content loss
            content_loss = opt.content_weight * F.mse_loss(
                features_y.relu2_2, features_x.relu2_2)

            # style loss
            style_loss = 0.
            for ft_y, gm_s in zip(features_y, gram_style):
                gram_y = utils.gram_matrix(ft_y)
                style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y))
            style_loss *= opt.style_weight

            total_loss = content_loss + style_loss
            total_loss.backward()
            optimizer.step()

            # 损失平滑
            content_meter.add(content_loss.item())
            style_meter.add(style_loss.item())

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # 可视化
                vis.plot('content_loss', content_meter.value()[0])
                vis.plot('style_loss', style_meter.value()[0])
                # 因为x和y经过标准化处理(utils.normalize_batch),所以需要将它们还原
                vis.img('output',
                        (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1))
                vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0,
                                                                        max=1))

        # 保存visdom和模型
        vis.save([opt.env])
        t.save(transformer.state_dict(), 'checkpoints/%s_style.pth' % epoch)
folder_num = 8
valid_folder_num = 5
num_epoch = 100
use_cuda = torch.cuda.is_available()
path = os.path.expanduser('~/codedata/ice/')

print('loading data.....')
images_all, labels_all, inc_angle_all = read_clean(path,
                                                   'train_clean_size.json')
train_set_folders = train_cross(images_all, labels_all, inc_angle_all,
                                folder_num)

best_test_loss_stl = [np.inf] * folder_num
best_train_loss_stl = [np.inf] * folder_num

vis = utils.Visualizer(env='lxg')

for folder in range(folder_num):
    if folder is valid_folder_num:
        break

    train_data, train_label, train_inc, test_data, test_label, test_inc = \
                        train_set_folders.getset(folder)
    train_dataset = DataSet(train_data, train_label, train_inc, train=True)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=5)
    test_dataset = DataSet(test_data, test_label, test_inc, train=False)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
Exemple #7
0
 def start_visdom(self):
     self.vis = utils.Visualizer(env='Adversarial AutoEncoder Training',
                                 port=8888)
Exemple #8
0
 def start_visdom(self):
     self.vis = utils.Visualizer(port=8888)
Exemple #9
0
        '''
        返回为数据集中所有图片的个数
        :return:
        '''
        return len(self.imgs)


if __name__ == '__main__':
    picFulPath(
        '/home/mlxuan/project/DeepLearning/data/benchmark/benchmark_RELEASE/dataset/train.txt',
        '/home/mlxuan/project/DeepLearning/data/benchmark/benchmark_RELEASE/dataset/img/',
        '/home/mlxuan/project/DeepLearning/data/benchmark/benchmark_RELEASE/dataset/cls/'
    )
    train_dataset = SBDClassSeg('./ImagAndLal.txt')
    from torch.utils.data import DataLoader
    trainloader = DataLoader(train_dataset,
                             batch_size=2,
                             shuffle=False,
                             drop_last=True)
    V = utils.Visualizer()
    #按batch_size遍历trainloader
    for i, (data, label) in enumerate(
            trainloader
    ):  # 如果batch_size不是1,那么每次输入的batch_size个Tensor就需要有相同的size,否则就不能遍历。还可以用for data,label in trainloader:遍历
        data, label = trainloader.dataset.untransforms(data, label)

        V.plot_many('imgs' + str(i), data)
        V.plot_img('imgs' + str(i), label)

    V.close()
Exemple #10
0
def train(**kwargs):
    opt = Config()
    for _k, _v in kwargs.items():
        setattr(opt, _k, _v)

    device = t.device("cuda" if t.cuda.is_available() else "cpu")
    vis = utils.Visualizer(opt.env)

    # 数据加载
    transforms = tv.transforms.Compose([
        tv.transforms.Resize(opt.image_size),
        tv.transforms.CenterCrop(opt.image_size),
        tv.transforms.ToTensor(),
        tv.transforms.Lambda(lambda x: x * 255)
    ])
    dataset = tv.datasets.ImageFolder(opt.data_root, transforms)
    dataloader = data.DataLoader(dataset, opt.batch_size)

    # 风格转换网络
    transformer = TransformerNet()
    if opt.model_path:
        transformer.load_state_dict(
            t.load(opt.model_path, map_location=t.device('cpu')))
    transformer.to(device)

    # 损失网络 Vgg16
    vgg = Vgg16().eval()
    vgg.to(device)
    for param in vgg.parameters():
        param.requires_grad = False

    # 优化器
    optimizer = t.optim.Adam(transformer.parameters(), opt.lr)

    # 获取风格图片的数据
    style = utils.get_style_data(opt.style_path)
    vis.img('style', (style.data[0] * 0.225 + 0.45).clamp(min=0, max=1))
    style = style.to(device)

    # 风格图片的gramj矩阵
    with t.no_grad():
        features_style = vgg(style)
        gram_style = [utils.gram_matrix(y) for y in features_style]

    # 损失统计
    style_loss_avg = 0
    content_loss_avg = 0

    for epoch in range(opt.epoches):
        for ii, (x, _) in tqdm(enumerate(dataloader)):

            # 训练
            optimizer.zero_grad()
            x = x.to(device)
            y = transformer(x)
            # print(y.size())
            y = utils.normalize_batch(y)
            x = utils.normalize_batch(x)
            features_x = vgg(x)
            features_y = vgg(y)

            # content loss
            content_loss = opt.content_weight * F.mse_loss(
                features_y.relu3_3, features_x.relu3_3)

            # style loss
            style_loss = 0
            for ft_y, gm_s in zip(features_y, gram_style):
                with t.no_grad():
                    gram_y = utils.gram_matrix(ft_y)
                style_loss += F.mse_loss(gram_y, gm_s.expand_as(gram_y))
            style_loss *= opt.style_weight

            total_loss = content_loss + style_loss
            total_loss.backward()
            optimizer.step()

            content_loss_avg += content_loss.item()
            style_loss_avg += style_loss.item()

            if (ii + 1) % opt.plot_every == 0:
                vis.plot('content_loss', content_loss_avg / opt.plot_every)
                vis.plot('style_loss', style_loss_avg / opt.plot_every)
                content_loss_avg = 0
                style_loss_avg = 0
                vis.img('output',
                        (y.data.cpu()[0] * 0.225 + 0.45).clamp(min=0, max=1))
                vis.img('input', (x.data.cpu()[0] * 0.225 + 0.45).clamp(min=0,
                                                                        max=1))

            if (ii + 1) % opt.save_every == 0:
                vis.save([opt.env])
                t.save(transformer.state_dict(),
                       'checkpoints/%s_style.pth' % (ii + 1))
Exemple #11
0
if use_gpu:
    net = net.cuda()

print(net)

train_data = YoloDataloader(img_root = train_img_root,label_path = train_label_path,train = True,transforms = trans)
train_dataloader = torch.utils.data.DataLoader(train_data,batch_size= batch_size,shuffle=True,num_workers=4)
val_data = YoloDataloader(img_root = val_img_root,label_path = val_label_path,train = False,transforms = trans)
val_dataloader = torch.utils.data.DataLoader(val_data,batch_size= batch_size,shuffle=False,num_workers=4)

## define yolov1 loss 
criterion = Lossv1(Ceils,Box,coor_l,noor_l)
## only optimize full connect weights
optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,net.parameters()),
            lr = learning_rate,momentum=momentum,weight_decay=decay)
vis = utils.Visualizer(env="handsome")   ## visdom 

def train(epoch):
    net.train()
    total_loss = 0.
    for index,(imgs,labels) in enumerate(train_dataloader):
        if use_gpu:
            imgs = imgs.cuda()
            labels = labels.cuda()
        imgs = Variable(imgs)
        labels = Variable(labels)

        optimizer.zero_grad()
        out = net(imgs)
        out = out.view(batch_size,Ceils,Ceils,30)
        loss = criterion(out,labels)