Exemple #1
0
def main():
    BASE_DIR = r'./'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    draw_path = os.path.join(BASE_DIR, 'sample_run_MAE')
    if not os.path.exists(draw_path):
        os.makedirs(draw_path)

    writer = SummaryWriter(draw_path)

    # 读入图像并缩放到适合模型输入的尺寸
    img_raw = Image.open(os.path.join(BASE_DIR, 'photo.jpg'))
    h, w = img_raw.height, img_raw.width
    ratio = h / w
    print(f"image hxw: {h} x {w} mode: {img_raw.mode}")

    img_size, patch_size = (224, 224), (16, 16)
    img = img_raw.resize(img_size)
    rh, rw = img.height, img.width
    print(f'resized image hxw: {rh} x {rw} mode: {img.mode}')
    img.save(os.path.join(BASE_DIR, 'resized_photo.jpg'))

    # 将图像转换成张量
    from torchvision.transforms import ToTensor, ToPILImage

    img_ts = ToTensor()(img).unsqueeze(0).to(device)
    print(
        f"input tensor shape: {img_ts.shape} dtype: {img_ts.dtype} device: {img_ts.device}"
    )

    # 实例化模型
    # encoder = ViT(img_size, patch_size, dim=512, mlp_dim=1024, dim_per_head=64)
    encoder = timm.create_model('vit_base_patch16_224',
                                pretrained=True,
                                num_classes=1000)
    decoder_dim = 512
    mae = MAE(encoder, decoder_dim, decoder_depth=6)
    # weight = torch.load(os.path.join(BASE_DIR, 'mae.pth'), map_location='cpu')  # 加载训练好的权重
    mae.to(device)

    # 训练
    criterion = nn.MSELoss()
    optimizer = optim.Adam(mae.parameters(), lr=0.0001, weight_decay=0.01)

    train(mae, img_ts, optimizer, criterion, epoch=1000,
          writer=writer)  # TODO 这个用的假dataloader

    # 推理
    # 模型重建的效果图,mask 效果图
    recons_img_ts, masked_img_ts = mae.predict(img_ts)
    recons_img_ts, masked_img_ts = recons_img_ts.cpu().squeeze(
        0), masked_img_ts.cpu().squeeze(0)

    # 将结果保存下来以便和原图比较
    recons_img = ToPILImage()(recons_img_ts)
    recons_img.save(os.path.join(BASE_DIR, 'reconstucted_photo.jpg'))

    masked_img = ToPILImage()(masked_img_ts)
    masked_img.save(os.path.join(BASE_DIR, 'masked_photo.jpg'))

    # 画图
    img = Image.open(os.path.join(BASE_DIR, 'photo.jpg'))
    plt.figure("photo")  # 图像窗口名称
    plt.imshow(img)
    plt.axis('off')  # 关掉坐标轴为 off
    plt.title('photo')  # 图像题目
    plt.show()

    img = Image.open(os.path.join(BASE_DIR, 'masked_photo.jpg'))
    plt.figure("masked_photo")  # 图像窗口名称
    plt.imshow(img)
    plt.axis('off')  # 关掉坐标轴为 off
    plt.title('masked_photo')  # 图像题目
    plt.show()

    img = Image.open(os.path.join(BASE_DIR, 'reconstucted_photo.jpg'))
    plt.figure("reconstucted_photo")  # 图像窗口名称
    plt.imshow(img)
    plt.axis('off')  # 关掉坐标轴为 off
    plt.title('reconstucted_photo')  # 图像题目
    plt.show()
Exemple #2
0
def to_image():
    return Compose([
        ToPILImage(),
        ToTensor()
    ])
Exemple #3
0
        self.imgs = [os.path.join(root, img) for img in imgs]
        self.transforms = transforms

    def __getitem__(self, index):
        img_path = self.imgs[index]
        label = 0 if 'dog' in img_path.split('/')[-1] else 1
        data = Image.open(img_path)
        if self.transforms:
            data = self.transforms(data)
        return data, label

    def __len__(self):
        return len(self.imgs)


#torchvison的两个常用的函数,
#make_grid,它能够将多张图片拼接成一个网格中
#save_img它能将tensor保存成图片
dataset = DogCat('./dogcat/', transforms=transform)

dataloader = DataLoader(dataset, shuffle=True, batch_size=16)

dataiter = iter(dataloader)
#print(len(next(dataiter)[0]))8,因为一共有8张图片,所以虽然batch_size为16,但是只能取出8张来
img = make_grid(next(dataiter)[0], 4)  #拼接成4*4的网格图片,并且会将图片转成3通道的
#print(img)<PIL.Image.Image image mode=RGB size=906x454 at 0x7F102350C240>
a = ToPILImage(img)  #将tensor转成图片
#print(a)#<PIL.Image.Image image mode=RGB size=906x454 at 0x7F2C637872B0>
save_image(img, 'a.png')  #保存图片,名称是a.png
#Image.open('a.png')
Exemple #4
0
MODEL_NAME = opt.model_name

path = 'drive/My Drive/Aerocosmos/10june/esrgan/main1/'
path_img = 'drive/My Drive/Aerocosmos/data/pan_png/SRF_4/data/'
IMAGE_NAME = path_img + IMAGE_NAME0

model = Generator(UPSCALE_FACTOR).eval()
if TEST_MODE:
    model.cuda()
    model.load_state_dict(torch.load(path + 'epochs/' + MODEL_NAME))
    # model.load_state_dict(torch.load('epochs/' + MODEL_NAME))
else:
    model.load_state_dict(
        torch.load(path + 'epochs/' + MODEL_NAME,
                   map_location=lambda storage, loc: storage))

image = Image.open(IMAGE_NAME)
with torch.no_grad():
    image = Variable(ToTensor()(image), volatile=True).unsqueeze(0)

if TEST_MODE:
    image = image.cuda()

start = time.clock()
out = model(image)
elapsed = (time.clock() - start)
#print('cost' + str(elapsed) + 's')
out_img = ToPILImage()(out[0].data.cpu())
out_img.save(path + 'test/' + 'out_srf_' + str(UPSCALE_FACTOR) + '_' +
             IMAGE_NAME0)
    def validate(self, val_loader, epoch, use_cuda, log_path, **kwargs):
        loss_day2night2day_sum, loss_night2day2night_sum, loss_day2day_sum, loss_night2night_sum = 0, 0, 0, 0
        day_img, night_img = None, None

        with torch.no_grad():
            for day_img, night_img in val_loader:
                if use_cuda:
                    day_img, night_img = day_img.cuda(), night_img.cuda()

                # Day -> Night -> Day  and  Day -> Day
                loss_day2night2day, loss_day2day = \
                    self.cycle_plus_reconstruction_loss(day_img, self.ae_day, self.ae_night)

                # Night -> Day -> Night  and  Night -> Night
                loss_night2day2night, loss_night2night = \
                    self.cycle_plus_reconstruction_loss(night_img, self.ae_night, self.ae_day)

                loss_day2night2day_sum += loss_day2night2day
                loss_day2day_sum += loss_day2day
                loss_night2day2night_sum += loss_night2day2night
                loss_night2night_sum += loss_night2night

        loss_day2night2day_mean = loss_day2night2day_sum / len(val_loader)
        loss_night2day2night_mean = loss_night2day2night_sum / len(val_loader)
        loss_day2day_mean = loss_day2day_sum / len(val_loader)
        loss_night2night_mean = loss_night2night_sum / len(val_loader)

        # log losses
        log_str = f'[Epoch {epoch}] ' \
            f'Val loss day -> night -> day: {loss_day2night2day_mean} ' \
            f'Val loss night -> day -> night: {loss_night2day2night_mean} ' \
            f'Val loss day -> day: {loss_day2day_mean} ' \
            f'Val loss night -> night: {loss_night2night_mean}'
        print(log_str)
        with open(os.path.join(log_path, 'log.txt'), 'a+') as f:
            f.write(log_str + '\n')

        # create sample images

        latent_day = self.ae_day.encode(day_img[0].unsqueeze(0))
        latent_night = self.ae_night.encode(night_img[0].unsqueeze(0))
        # reconstruction
        day2day = self.ae_day.decode(latent_day)
        night2night = self.ae_night.decode(latent_night)
        # domain translation
        day2night = self.ae_night.decode(latent_day)
        night2day = self.ae_day.decode(latent_night)
        # cycle
        day2night2day = self.ae_day.decode(self.ae_night.encode(day2night))
        night2day2night = self.ae_night.decode(self.ae_day.encode(night2day))

        # save sample images
        samples = {
            'day_img': day_img[0],
            'night_img': night_img[0],
            'day2day': day2day[0],
            'night2night': night2night[0],
            'day2night': day2night[0],
            'night2day': night2day[0],
            'day2night2day': day2night2day[0],
            'night2day2night': night2day2night[0],
        }

        for name, img in samples.items():
            ToPILImage()(img.cpu()).save(os.path.join(log_path, f'{epoch}_{name}.jpeg'), 'JPEG')
Exemple #6
0
def main():
    #扩充数据集防止过拟合
    show = ToPILImage() #可以把Tensor转换成Image,方便可视化
    transform = transforms.Compose([  #transforms.Compose就是将对图像处理的方法集中起来
        transforms.RandomHorizontalFlip(),#水平翻转
        transforms.RandomCrop((32, 32), padding=4),
        transforms.ToTensor(),#转为Tensor
        #在做数据归一化之前必须要把PIL Image转成Tensor,而其他resize或crop操作则不需要。
         transforms.Normalize((0.5, 0.5 ,0.5), (0.5, 0.5, 0.5)),#归一化
        ])
    #训练集
    #训练集数据的下载
    trainset = tv.datasets.CIFAR10(
    root='D:\\AI\\practise',#设置数据集的根目录  D:\AI\practise
        train=True,#训练集所以是True
        download=True,
        transform=transform
    )
    #训练集数据的加载方式
    trainloader = t.utils.data.DataLoader(
        trainset,#设置为训练集
        batch_size=4,#设置每个batch有4个样本数据
        shuffle=True,#设置对每个epoch将数据集打乱
        num_workers=2#设置使用2个子进程用来加载数据
    )
    #测试集
    #测试集下载
    testset = tv.datasets.CIFAR10(
    'D:\\AI\\practise',
    train=False,
    download=True,
    transform=transform#用了之前定义的transform
    )
    #测试集加载方式
    testloader = t.utils.data.DataLoader(
        testset,
        batch_size=4,
        shuffle=False,
        num_workers=2)
    #数据集10个类的定义
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    net = Net()
    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)  # SGD(螺旋)随机梯度下降法,

    for epoch in range(10):  # 设置训练的迭代次数
        running_loss = 0.0
        # 在测试集中迭代数据
        for i, data in enumerate(trainloader, 0):  # enumerate枚举数据并从下标0开始

            # 输入数据
            # 读取数据的数据内容和标签
            inputs, labels = data
            inputs, labels = Variable(inputs), Variable(labels)

            # 梯度清零,也就是把loss关于weight的导数变成0.
            optimizer.zero_grad()

            # forward+backward
            # 得到网络的输出
            outputs = net(inputs)

            # 计算损失值,将输出的outputs和原来导入的labels作为loss函数的输入就可以得到损失了:
            loss = criterion(outputs, labels)  # output 和 labels的交叉熵损失
            # 计算得到loss后就要回传损失。

            loss.backward()
            # loss.backward(),有时候,我们并不想求所有Variable的梯度。那就要考虑如何在Backward过程中排除子图(ie.排除没必要的梯度计算)。
            # 可以通过Variable的两个参数(requires_grad和volatile)与电路的连接有关啊这样记住吧哈哈哈

            # 更新参数

            # 回传损失过程中会计算梯度,然后需要根据这些梯度更新参数,optimizer.step()就是用来更新参数的。optimizer.step()后,
            # 你就可以从optimizer.param_groups[0][‘params’]里面看到各个层的梯度和权值信息。
            optimizer.step()  # 利用计算的得到的梯度对参数进行更新

            # 打印log信息
            running_loss += loss.item()  # #用于从tensor中获取python数字
            if i % 2000 == 1999:  # 每2000个batch打印一次训练状态
                print('[%d, %5d] loss: %.3f' \
                      % (epoch + 1, i + 1, running_loss / 2000))

                running_loss = 0.0
        print('Finished Training')
Exemple #7
0
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
import torch as t
show = ToPILImage()  #Could convert Tensor to Image, to visualize
transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = tv.datasets.CIFAR10(
    root='G:/CIFAR10/',
    train=True,
    download=True,
    transform=transforms,
)
train_loader = t.utils.data.Dataloader(trainset,
                                       batch_size=4,
                                       shuffle=True,
                                       numworkers=0)

testset = tv.datasets.CIFAR10('G:/CIFAR10/',
                              train=False,
                              download=True,
                              transform=transforms)

test_loader = t.utils.data.Dataloader(testset,
                                      batch_size=4,
                                      shuffle=True,
                                      numworkers=0)
Exemple #8
0
def display_transform():
    return Compose([ToPILImage(), Resize(400), CenterCrop(400), ToTensor()])
 def __init__(self,pre_trained = "Waifu2x/model_check_points/DCSCN/DCSCN_weights_387epos_L12_noise_1.pt"):
     self.model = DCSCN(color_channel=3,up_scale=2, feature_layers=12, first_feature_filters=196,
                     last_feature_filters=48, reconstruction_filters=128, up_sampler_filters=32)
     self.model.load_state_dict(torch.load(pre_trained, map_location='cpu'))
     self.model = self.model.cuda()
     self.unloader = ToPILImage()
Exemple #10
0
import torch
from torch import nn
from math import floor
from torch.nn import functional as F
from torch.nn.init import xavier_uniform_, xavier_normal_, kaiming_uniform_, kaiming_normal_
from torch.utils.data import Dataset
from torchvision.transforms import CenterCrop, ToTensor, ToPILImage, RandomHorizontalFlip, RandomResizedCrop, RandomVerticalFlip, RandomRotation
import numpy as np
import h5py
import math
from numbers import Number
to_tensor = ToTensor()
to_PIL_image = ToPILImage()

from torchvision import transforms
import random
''' ---------------------------------------------
               PYTORCH DATASET HELPERS
-------------------------------------------------'''


class Dataset(Dataset):
    """ Dataset to train auto-encoders representations during exploration"""
    def __init__(self, img_size, preprocess=None, data_augmentation=False):

        self.n_images = 0
        self.images = []
        self.labels = []

        self.img_size = img_size
Exemple #11
0
                    help='name of the rollout')
parser.add_argument('--num_classes',
                    default=125,
                    type=int,
                    help='number of classes in dataset')
parser.add_argument('--dataloader_num_workers',
                    default=4,
                    type=int,
                    help='number of workers for dataloader')
parser.add_argument('--resume_training',
                    default=False,
                    type=bool,
                    help='whether to resume training')
args = parser.parse_args()

IMAGE_TRANSFORM = Compose([UnnormImage(), ToPILImage()])
IMAGE_TRANSFORM_TB = Compose([UnnormImage()])  # for tensorboard

CLASSIFIER_PATH = 'pretrained_models/resnet_classifier.pt'


def test(model,
         test_loader,
         outpath,
         device,
         epoch,
         log=None,
         writer=None,
         comparator=None):
    log.info(f'Classifier weights: {CLASSIFIER_PATH}')
    classifier = ResnetClassifier()
Exemple #12
0
def draw(index, output):
    img = Colorize()(output.unsqueeze(0))
    img = ToPILImage()(img)
    img.save('./pic/' + str(index) + '.png')
Exemple #13
0
def train(args, model, enc=False):
    global best_acc

    weight = torch.ones(NUM_CLASSES)
    weight[0] = 121.21
    weight[1] = 947.02
    weight[2] = 151.92
    weight[3] = 428.31
    weight[4] = 25.88
    weight[5] = 235.97
    weight[6] = 885.72
    weight[7] = 911.87
    weight[8] = 307.49
    weight[9] = 204.69
    weight[10] = 813.92
    weight[11] = 5.83
    weight[12] = 34.22
    weight[13] = 453.34
    weight[14] = 346.10
    weight[15] = 250.19
    weight[16] = 119.99
    weight[17] = 75.28
    weight[18] = 76.71
    weight[19] = 8.58
    weight[20] = 281.68
    weight[21] = 924.07
    weight[22] = 3.91
    weight[23] = 7.14
    weight[24] = 88.89
    weight[25] = 59.00
    weight[26] = 126.59
    weight[27] = 0

    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)  #1024)
    co_transform_val = MyCoTransform(enc, augment=False,
                                     height=args.height)  #1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        #criterion = CrossEntropyLoss2d(weight.cuda())
        criterion = FocalLoss2d(weight.cuda())
    else:
        #criterion = CrossEntropyLoss2d(weight)
        criterion = FocalLoss2d(weight.cuda())

    print(type(criterion))

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(),
                     5e-5, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=2e-4)  ## scheduler 2

    start_epoch = 1

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)  ## scheduler 2

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        #TODO: remake the evalIoU.py code to avoid using "evalIoU.args"
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)
            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            #loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        #New confusion matrix data
        confMatrix = evalIoU.generateMatrixTrainId(evalIoU.args)
        perImageStats = {}
        nbPixels = 0

        for step, (images, labels) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(
                images, volatile=True
            )  #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs = model(inputs, only_encode=enc)

            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)

            #Add outputs to confusion matrix
            if (doIouVal):
                #compatibility with criterion dataparallel
                if isinstance(outputs, list):  #merge gpu tensors
                    outputs_cpu = outputs[0].cpu()
                    for i in range(1, len(outputs)):
                        outputs_cpu = torch.cat(
                            (outputs_cpu, outputs[i].cpu()), 0)
                    #print(outputs_cpu.size())
                else:
                    outputs_cpu = outputs.cpu()

                #start_time_iou = time.time()
                for i in range(0, outputs_cpu.size(0)):  #args.batch_size
                    prediction = ToPILImage()(
                        outputs_cpu[i].max(0)[1].data.unsqueeze(0).byte())
                    groundtruth = ToPILImage()(labels[i].cpu().byte())
                    nbPixels += evalIoU.evaluatePairPytorch(
                        prediction, groundtruth, confMatrix, perImageStats,
                        evalIoU.args)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(
                    f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        # Calculate IOU scores on class level from matrix
        iouVal = 0
        iouTrain = 0
        if (doIouVal):
            #start_time_iou = time.time()
            classScoreList = {}
            for label in evalIoU.args.evalLabels:
                labelName = evalIoU.trainId2label[label].name
                classScoreList[labelName] = evalIoU.getIouScoreForTrainLabel(
                    label, confMatrix, evalIoU.args)

            iouAvgStr = evalIoU.getColorEntry(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args),
                evalIoU.args) + "{avg:5.3f}".format(
                    avg=evalIoU.getScoreAverage(
                        classScoreList, evalIoU.args)) + evalIoU.args.nocol
            iouVal = float(
                evalIoU.getScoreAverage(classScoreList, evalIoU.args))
            print("EPOCH IoU on VAL set: ", iouAvgStr)

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if (enc and epoch == args.num_epochs):
            best_acc = 0

        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth'
            filenameBest = savedir + '/model_best_enc.pth'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth'
            filenameBest = savedir + '/model_best.pth'
        save_checkpoint({
            'state_dict': model.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best_each.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best_each.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        #if (True) #(is_best):
        torch.save(model.state_dict(), filenamebest)
        print(f'save: {filenamebest} (epoch: {epoch})')
        filenameSuperBest = f'{savedir}/model_superbest.pth'
        if (is_best):
            torch.save(model.state_dict(), filenameSuperBest)
            print(f'saving superbest')
        if (not enc):
            with open(savedir + "/best.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))
        else:
            with open(savedir + "/best_encoder.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))

    return (model)  #return model (convenience for encoder-decoder training)
Exemple #14
0
def t2pil(t):
    return ToPILImage()(t)
Exemple #15
0
from torchvision.transforms import Compose, CenterCrop, Normalize,Scale
from torchvision.transforms import ToTensor, ToPILImage

import network,dataset,criterion,transform
from dataset import VOC12
from network import PSPNet
from criterion import CrossEntropyLoss2d
from transform import Relabel, ToLabel, Colorize
import deeplab_resnet
#from visualize import Dashboard

NUM_CHANNELS = 3
NUM_CLASSES = 22

color_transform = Colorize()
image_transform = ToPILImage()
input_transform = Compose([
    CenterCrop(256),
    Scale(136),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])
target_transform = Compose([
    CenterCrop(256),
    Scale(136),
    ToLabel(),
    Relabel(255, 21),
])

def train(args, model):
    model.train()
Exemple #16
0
def plot_video(orig: Tensor,
               recons: Tensor,
               model_name: str,
               epoch: int,
               out_path: str,
               rows: int,
               cols: int,
               fps: int,
               thumbnail_width: int = None,
               thumbnail_height: int = None):
    if orig.shape[-2:] != (thumbnail_height, thumbnail_width):
        # Resize each frame
        to_pil = ToPILImage()
        to_tensor = ToTensor()
        resize = Resize((thumbnail_height, thumbnail_width))

        def transform(x):
            return to_tensor(resize(to_pil(x)))

        def resize_batch(batch):
            return torch.cat([
                torch.cat(
                    [transform(frame).unsqueeze(dim=0)
                     for frame in video]).unsqueeze(dim=0) for video in batch
            ])

        recons = resize_batch(recons)
        orig = resize_batch(orig)

    # Convert [B, T, C, H, W] to [T, C, H, W]

    # Distributing the batch dimension in a grid
    n = min(rows * cols, orig.shape[0])
    i = 0
    video_rows = []
    for _ in range(rows):
        done = False
        # Build each row, one column at a time
        video_cols = []
        for _ in range(cols):
            if i >= n:
                done = True
                break
            # Original on left, recons on right
            video = torch.cat([orig[i], recons[i]], dim=-1)
            video *= 255.0
            video = video.byte()
            video_cols.append(video)
            i += 1
        while len(video_cols) < cols:
            # Append black videos to the empty spaces
            video_cols.append(torch.zeros(video.shape))
        # Concatenate all columns into a row
        video_row = torch.cat(video_cols, dim=-1)
        video_rows.append(video_row)
        if done:
            break

    # Concatenate all rows into a single video
    video_array = torch.cat(video_rows, dim=-2)
    # [T, C, H, W] -> [T, W, H, C] -> [T, H, W, C]
    video_array = torch.transpose(video_array, 1, -1)
    video_array = torch.transpose(video_array, 1, 2)
    # Monochrome to RGB
    video_array = video_array.repeat(1, 1, 1, 3)

    # Export the tensor as a video
    # TODO: improve video quality
    write_video(out_path + '.mp4', video_array, fps)
Exemple #17
0
import torch as t
import torchvision as tv
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
from torch import optim
show = ToPILImage()


# 定义网络
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16,32,5)
        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

Exemple #18
0
def fmri_stat_map_video(orig: Tensor,
                        recons: Tensor,
                        model_name: str,
                        epoch: int,
                        out_path: str,
                        bg_img: str,
                        mask_path: str,
                        rows: int,
                        cols: int,
                        fps: int = 1,
                        format: str = 'gif'):
    mask = nl.image.load_img(mask_path)
    num_frames = orig.shape[1]
    n = min(orig.shape[0], rows * cols)

    def plot_frame(x, out_path):
        x = nl.image.new_img_like(mask,
                                  x.numpy(),
                                  affine=mask.affine,
                                  copy_header=True)
        try:
            nlplt.plot_stat_map(x,
                                bg_img=bg_img,
                                threshold=3,
                                colorbar=False,
                                output_file=out_path)
            img = ToTensor()(Image.open(out_path))
        finally:
            try:
                os.remove(out_path)
            except:
                pass
        return img

    frames = []
    for frame in range(num_frames):
        i = 0
        frame_rows = []
        for _ in range(rows):
            done = False
            frame_cols = []
            for _ in range(cols):
                if i >= n:
                    done = True
                    break
                o = plot_frame(orig[i, :, :, :, frame],
                               f'{out_path}_{i}_{frame}_orig.tmp.png')
                r = plot_frame(recons[i, :, :, :, frame],
                               f'{out_path}_{i}_{frame}_recons.tmp.png')
                f = torch.cat([o, r], dim=-2)
                frame_cols.append(f)
                i += 1
            frame_cols = torch.cat(frame_cols, dim=-1)
            frame_rows.append(frame_cols)
            if done:
                break
        frame_rows = torch.cat(frame_rows, dim=-2)
        ToPILImage()(frame_rows).save(f'{out_path}_{frame}.tmp.png')
        frames.append(frame_rows.unsqueeze(0))

    def path(p):
        if os.name == 'nt':
            return f'$(wslpath {p})'
        return p

    def run(cmd):
        if os.name == 'nt':
            return subprocess.run(['debian.exe', 'run', cmd],
                                  capture_output=True)
        proc = subprocess.run(['sh', '-c', cmd], capture_output=True)
        if proc.returncode != 0:
            msg = f'expected exit code 0 from ffmpeg, got exit code {proc.returncode}: {proc.stdout.decode("unicode_escape")}'
            if proc.stderr:
                msg += ' ' + proc.stderr.decode('unicode_escape')
            raise ValueError(msg)

    in_path = path(f'{out_path}_%d.tmp.png')
    webm_path = path(f'{out_path}.webm')
    run(f'ffmpeg -y -framerate {fps} -i {in_path} -c:v libvpx-vp9 -pix_fmt yuva420p -lossless 1 {webm_path}'
        )
    if format == 'gif':
        gif_path = path(f'{out_path}.gif')
        run(f'ffmpeg -y -i {webm_path} {gif_path}')
        os.remove(f'{out_path}.webm')
    elif format != 'webm':
        raise ValueError('unknown format')
    for i in range(num_frames):
        os.remove(f'{out_path}_{i}.tmp.png')
Exemple #19
0
def convert_sr(image):
    return ToPILImage()(image.squeeze(0))
Exemple #20
0
def main(args):
    logger = CompleteLogger(args.log, args.phase)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    cudnn.benchmark = True

    # Data loading code
    train_transform = T.Compose([
        T.RandomResizedCrop(size=args.train_size,
                            ratio=args.resize_ratio,
                            scale=(0.5, 1.)),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    source_dataset = datasets.__dict__[args.source]
    train_source_dataset = source_dataset(root=args.source_root,
                                          transforms=train_transform)
    train_source_loader = DataLoader(train_source_dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.workers,
                                     pin_memory=True,
                                     drop_last=True)

    target_dataset = datasets.__dict__[args.target]
    train_target_dataset = target_dataset(root=args.target_root,
                                          transforms=train_transform)
    train_target_loader = DataLoader(train_target_dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.workers,
                                     pin_memory=True,
                                     drop_last=True)

    train_source_iter = ForeverDataIterator(train_source_loader)
    train_target_iter = ForeverDataIterator(train_target_loader)

    # define networks (both generators and discriminators)
    netG_S2T = cyclegan.generator.__dict__[args.netG](
        ngf=args.ngf, norm=args.norm, use_dropout=False).to(device)
    netG_T2S = cyclegan.generator.__dict__[args.netG](
        ngf=args.ngf, norm=args.norm, use_dropout=False).to(device)
    netD_S = cyclegan.discriminator.__dict__[args.netD](
        ndf=args.ndf, norm=args.norm).to(device)
    netD_T = cyclegan.discriminator.__dict__[args.netD](
        ndf=args.ndf, norm=args.norm).to(device)

    # create image buffer to store previously generated images
    fake_S_pool = ImagePool(args.pool_size)
    fake_T_pool = ImagePool(args.pool_size)

    # define optimizer and lr scheduler
    optimizer_G = Adam(itertools.chain(netG_S2T.parameters(),
                                       netG_T2S.parameters()),
                       lr=args.lr,
                       betas=(args.beta1, 0.999))
    optimizer_D = Adam(itertools.chain(netD_S.parameters(),
                                       netD_T.parameters()),
                       lr=args.lr,
                       betas=(args.beta1, 0.999))
    lr_decay_function = lambda epoch: 1.0 - max(0, epoch - args.epochs
                                                ) / float(args.epochs_decay)
    lr_scheduler_G = LambdaLR(optimizer_G, lr_lambda=lr_decay_function)
    lr_scheduler_D = LambdaLR(optimizer_D, lr_lambda=lr_decay_function)

    # optionally resume from a checkpoint
    if args.resume:
        print("Resume from", args.resume)
        checkpoint = torch.load(args.resume, map_location='cpu')
        netG_S2T.load_state_dict(checkpoint['netG_S2T'])
        netG_T2S.load_state_dict(checkpoint['netG_T2S'])
        netD_S.load_state_dict(checkpoint['netD_S'])
        netD_T.load_state_dict(checkpoint['netD_T'])
        optimizer_G.load_state_dict(checkpoint['optimizer_G'])
        optimizer_D.load_state_dict(checkpoint['optimizer_D'])
        lr_scheduler_G.load_state_dict(checkpoint['lr_scheduler_G'])
        lr_scheduler_D.load_state_dict(checkpoint['lr_scheduler_D'])
        args.start_epoch = checkpoint['epoch'] + 1

    if args.phase == 'test':
        transform = T.Compose([
            T.Resize(image_size=args.test_input_size),
            T.wrapper(cyclegan.transform.Translation)(netG_S2T, device),
        ])
        train_source_dataset.translate(transform, args.translated_root)
        return

    # define loss function
    criterion_gan = cyclegan.LeastSquaresGenerativeAdversarialLoss()
    criterion_cycle = nn.L1Loss()
    criterion_identity = nn.L1Loss()
    criterion_semantic = SemanticConsistency(
        ignore_index=[args.ignore_label] +
        train_source_dataset.ignore_classes).to(device)
    interp_train = nn.Upsample(size=args.train_size[::-1],
                               mode='bilinear',
                               align_corners=True)

    # define segmentation model and predict function
    model = models.__dict__[args.arch](
        num_classes=train_source_dataset.num_classes).to(device)
    if args.pretrain:
        print("Loading pretrain segmentation model from", args.pretrain)
        checkpoint = torch.load(args.pretrain, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
    model.eval()

    cycle_gan_tensor_to_segmentation_tensor = Compose([
        Denormalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        Lambda(lambda image: image.mul(255).permute((1, 2, 0))),
        NormalizeAndTranspose(),
    ])

    def predict(image):
        image = cycle_gan_tensor_to_segmentation_tensor(image.squeeze())
        image = image.unsqueeze(dim=0).to(device)
        prediction = model(image)
        return interp_train(prediction)

    # define visualization function
    tensor_to_image = Compose(
        [Denormalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
         ToPILImage()])
    decode = train_source_dataset.decode_target

    def visualize(image, name, pred=None):
        """
        Args:
            image (tensor): image in shape 3 x H x W
            name: name of the saving image
            pred (tensor): predictions in shape C x H x W
        """
        tensor_to_image(image).save(
            logger.get_image_path("{}.png".format(name)))
        if pred is not None:
            pred = pred.detach().max(dim=0).indices.cpu().numpy()
            pred = decode(pred)
            pred.save(logger.get_image_path("pred_{}.png".format(name)))

    # start training
    for epoch in range(args.start_epoch, args.epochs + args.epochs_decay):
        logger.set_epoch(epoch)
        print(lr_scheduler_G.get_lr())

        # train for one epoch
        train(train_source_iter, train_target_iter, netG_S2T, netG_T2S, netD_S,
              netD_T, predict, criterion_gan, criterion_cycle,
              criterion_identity, criterion_semantic, optimizer_G, optimizer_D,
              fake_S_pool, fake_T_pool, epoch, visualize, args)

        # update learning rates
        lr_scheduler_G.step()
        lr_scheduler_D.step()

        # save checkpoint
        torch.save(
            {
                'netG_S2T': netG_S2T.state_dict(),
                'netG_T2S': netG_T2S.state_dict(),
                'netD_S': netD_S.state_dict(),
                'netD_T': netD_T.state_dict(),
                'optimizer_G': optimizer_G.state_dict(),
                'optimizer_D': optimizer_D.state_dict(),
                'lr_scheduler_G': lr_scheduler_G.state_dict(),
                'lr_scheduler_D': lr_scheduler_D.state_dict(),
                'epoch': epoch,
                'args': args
            }, logger.get_checkpoint_path(epoch))

    if args.translated_root is not None:
        transform = T.Compose([
            T.Resize(image_size=args.test_input_size),
            T.wrapper(cyclegan.transform.Translation)(netG_S2T, device),
        ])
        train_source_dataset.translate(transform, args.translated_root)

    logger.close()
import torchvision as tv
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
import torch as t
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
show = ToPILImage()  # 可以把Tensor转成Image,方便可视化

# 第一次运行程序torchvision会自动下载CIFAR-10数据集,
# 大约100M,需花费一定的时间,
# 如果已经下载有CIFAR-10,可通过root参数指定

# 定义对数据的预处理
transform = transforms.Compose([
    transforms.ToTensor(),  # 转为Tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # 归一化
])

# 训练集
trainset = tv.datasets.CIFAR10(root='/home/cy/tmp/data/',
                               train=True,
                               download=True,
                               transform=transform)

trainloader = t.utils.data.DataLoader(trainset,
                                      batch_size=4,
                                      shuffle=True,
                                      num_workers=2)

# 测试集
def x_ray_pred(image, is_cnn_feat):

    int_to_class = ['NORMAL', 'PNEUMONIA']
    img, convert = img_to_tensor(image)
    if is_cnn_feat:
        visualize_cnn(img)
    pred, prob = check_image(img)
    if int(pred) == 1:
        grad_cam(image, pred)

    return pred, prob, int_to_class[pred], convert


#--------------------------------------------------------
from torchvision.transforms import ToPILImage
to_img = ToPILImage()


def save_visual(output, name):
    for i in range(int(output.size(0))):
        img = to_img(output[i])
        basewidth = 150
        wpercent = (basewidth / float(img.size[0]))
        hsize = int((float(img.size[1]) * float(wpercent)))
        img = img.resize((basewidth, hsize), Image.ANTIALIAS)
        img.save('static/visual_img/{}_{}.jpg'.format(name, i))


def visualize_cnn(x):
    conv1 = nn.Sequential(*list(model.features.children()))[:1](x)[0,
                                                                   0:10, :, :]
THRESH = 100

font = cv2.FONT_HERSHEY_SIMPLEX

faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

video_capture = cv2.VideoCapture(0, cv2.CAP_DSHOW)

model = torch.load('modelFaceMask.pt')
model.eval()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
labels = ['Mask', 'No Mask']
labelColor = [(10, 255, 0), (10, 0, 255)]
transformations = Compose([
    ToPILImage(),
    Resize((128, 128)),
    ToTensor(),
    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

while True:
    ret, frame = video_capture.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    faces = faceCascade.detectMultiScale(gray,
                                         scaleFactor=1.1,
                                         minNeighbors=5,
                                         minSize=(30, 30),
                                         flags=cv2.CASCADE_SCALE_IMAGE)
Exemple #24
0
    [fitvec],
    lr=1e-3,
)
#%
for step in range(250):
    fitimg = G.visualize(fitvec)
    dsim = ImDist(fitimg, refimg)
    L1 = L1loss(fitimg, refimg)
    loss = L1 + dsim
    loss.backward()
    optimizer.step()
    if step % 10 == 0:
        print("step %d L1 %.3f dsim %.3f" % (step, L1.item(), dsim.item()))

imgcmp = torch.cat((refimg, fitimg))
ToPILImage()(make_grid(imgcmp).cpu()).show()
#%%

# basis = evc_tsr
basis = torch.eye(120).float().cuda()
#%%
fitvec = torch.randn(4, 512).cuda()
fitproj = fitvec @ basis
fitproj.requires_grad_(True)
optimizer = Adam(
    [fitproj],
    lr=5e-3,
)
for step in range(500):
    fitimg = G.visualize(fitproj @ basis.T)
    dsim = ImDist(fitimg, refimg)
def train_lr_transform(crop_size, upscale_factor):
    return Compose([
        ToPILImage(),
        Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC),
        ToTensor()
    ])
def main(MODEL_NAME, NUM_CHANNELS, ARGS_LOAD_WEIGHTS, ARGS_LOAD_MODEL,
         ARGS_SAVE_DIR):
    """
    Main inference code
    """
    module = __import__(MODEL_NAME)
    Net = getattr(module, "Net")
    display_time = False

    print("---------- DATA PATHS: ----------")
    print("Model File: " + ARTIFACT_DETECTION_DIR + ARGS_LOAD_MODEL +
          MODEL_NAME)
    print("Weight File: " + ARTIFACT_DETECTION_DIR + ARGS_LOAD_WEIGHTS)

    # Initialize model
    model = Net(NUM_CHANNELS, NUM_CLASSES)
    if MODEL_NAME in [
            'mavnet', 'mavnet_rgb', 'original_unet', 'original_unet_rgb',
            'resunet', 'erfnet_rgb', 'erfnet', 'pstnet', 'pstnet_thermal'
    ]:
        model = torch.nn.DataParallel(model)
    model = model.cuda()

    # Load weights
    def load_my_state_dict(model, state_dict):
        own_state = model.state_dict()
        for name, param in state_dict.items():
            if name not in own_state:
                print("[weight not copied for %s]" % (name))
                continue
            own_state[name].copy_(param)
        return model

    model = load_my_state_dict(
        model, torch.load(ARTIFACT_DETECTION_DIR + ARGS_LOAD_WEIGHTS))
    print("Model and weights loaded..")
    print("---------------------------------")
    model.eval()

    if (not os.path.exists(ARGS_INFERENCE_DIR)):
        print("Problem finding Inference Directory. Check path and try again.")

    # Setup image transforms
    co_transform = ImageTransform(height=IMG_HEIGHT) if MODEL_NAME not in [
        'pstnet_thermal', 'erfnet'
    ] else ImageTransform2(height=IMG_HEIGHT)

    # Initialize dataset and loader
    dataset = SemanticSegmentation(root=ARGS_INFERENCE_DIR,
                                   co_transform=co_transform,
                                   NUM_CHANNELS=NUM_CHANNELS)
    loader = DataLoader(dataset, num_workers=8, batch_size=1, shuffle=False)

    # Initialize evaluation meters
    iouEvalVal = iouEval(NUM_CLASSES)
    inf_ctr = 0

    for step, (images, labels) in enumerate(loader):
        # Load {image, label} pair, enable GPU access
        images = images.cuda()
        labels = labels.cuda()
        # Setup as PyTorch variable
        inputs = Variable(images, requires_grad=False)
        targets = Variable(labels, requires_grad=False)

        # Setup clock
        inf_time_in = time.time()
        # Perform inference
        outputs = model(inputs)
        # Stop clock
        inf_time_out = time.time()

        # Add result to running evaluation
        iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)

        # Colormap output and save
        label = outputs[0].max(0)[1].byte().cpu().data
        label_color = label.unsqueeze(0)
        filenameSave = ARGS_SAVE_DIR + "/inference_" + str(inf_ctr).zfill(
            6) + ".png"
        os.makedirs(os.path.dirname(filenameSave), exist_ok=True)
        label_save = ToPILImage()(label_color)
        label_save.save(filenameSave)

        # Print inference time if required
        if display_time:
            print("Val image: {} | Latency: {} ms".format(
                step, (inf_time_out - inf_time_in) * 1000.0))

        inf_ctr += 1

    iouVal, iou_val_classes = iouEvalVal.getIoU()
    print("=============== " + MODEL_NAME + " VALIDATION ==============")
    print("[Validation] mIOU                : {}".format(iouVal))
    print("[Validation] Background          : {}".format(iou_val_classes[0]))
    print("[Validation] Fire extinguisher   : {}".format(iou_val_classes[1]))
    print("[Validation] Backpack            : {}".format(iou_val_classes[2]))
    print("[Validation] Hand drill          : {}".format(iou_val_classes[3]))
    print("[Validation] Rescue randy        : {}".format(iou_val_classes[4]))
    print("=============================================================")
Exemple #27
0
 def __init__(self, dataset_dir, crop_size, upscale_factor):
     super(TrainDataset, self).__init__()
     self.image_filenames = [join(dataset_dir, x) for x in listdir(dataset_dir) if is_image_file(x)]
     crop_size = calculate_valid_crop_size(crop_size, upscale_factor)
     self.hr_preprocess = Compose([CenterCrop(384), RandomCrop(crop_size), ToTensor()])
     self.lr_preprocess = Compose([ToPILImage(), Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC), ToTensor()])
Exemple #28
0
                distance = data_i['distance'].cuda()

                # Evaluating
                if prior:
                    loss, _ = trainer.run_validation(original, synthesis, semantic, label)
                else:
                    loss, _ = trainer.run_validation(original, synthesis, semantic, label, entropy, mae, distance)
                val_loss += loss
                outputs = softmax(outputs)
                (softmax_pred, predictions) = torch.max(outputs, dim=1)

                # post processing for semantic, label and prediction
                semantic_post = torch.zeros([original.shape[0], 3, 256, 512])
                for idx, semantic_ in enumerate(semantic):
                    (_, semantic_) = torch.max(semantic_, dim = 0)
                    semantic_ = 256 - np.asarray(ToPILImage()(semantic_.type(torch.FloatTensor).cpu()))
                    semantic_[semantic_ == 256] = 0
                    semantic_ = visualization.colorize_mask(semantic_)
                    semantic_ = ToTensor()(semantic_.convert('RGB'))
                    semantic_post[idx, :, :, :] = semantic_

                label_post = torch.zeros([original.shape[0], 3, 256, 512])
                for idx, label_ in enumerate(label):
                    label_ = 256 - np.asarray(ToPILImage()(label_.type(torch.FloatTensor).cpu()))
                    # There must be a better way...
                    label_[label_ == 256] = 0
                    label_[label_ == 255] = 100
                    label_[label_ == 1] = 255
                    label_ = ToTensor()(Image.fromarray(label_).convert('RGB'))
                    label_post[idx, :, :, :] = label_
Exemple #29
0
import torchvision as tv
from torchvision import transforms as tfs
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from PIL import Image
from torchvision.transforms import ToTensor, ToPILImage
from torch import nn
show = ToPILImage()  #tensor->img


def train_tf(x):
    im_aug = tfs.Compose([
        tfs.Resize(120),  #先升级到120,再裁剪96
        tfs.RandomHorizontalFlip(),
        tfs.RandomCrop(96),
        tfs.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),
        tfs.ToTensor(),
        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    x = im_aug(x)
    return x


def test_tf(x):
    im_aug = tfs.Compose([
        tfs.Resize(96),
        tfs.ToTensor(),
        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    x = im_aug(x)
def main(args: argparse.Namespace):
    logger = CompleteLogger(args.log, args.phase)
    print(args)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    cudnn.benchmark = True

    # Data loading code
    normalize = T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    train_transform = T.Compose([
        T.RandomRotation(args.rotation),
        T.RandomResizedCrop(size=args.image_size, scale=args.resize_scale),
        T.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25),
        T.GaussianBlur(),
        T.ToTensor(), normalize
    ])
    val_transform = T.Compose(
        [T.Resize(args.image_size),
         T.ToTensor(), normalize])
    image_size = (args.image_size, args.image_size)
    heatmap_size = (args.heatmap_size, args.heatmap_size)
    source_dataset = datasets.__dict__[args.source]
    train_source_dataset = source_dataset(root=args.source_root,
                                          transforms=train_transform,
                                          image_size=image_size,
                                          heatmap_size=heatmap_size)
    train_source_loader = DataLoader(train_source_dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.workers,
                                     pin_memory=True,
                                     drop_last=True)
    val_source_dataset = source_dataset(root=args.source_root,
                                        split='test',
                                        transforms=val_transform,
                                        image_size=image_size,
                                        heatmap_size=heatmap_size)
    val_source_loader = DataLoader(val_source_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=False,
                                   pin_memory=True)

    target_dataset = datasets.__dict__[args.target]
    train_target_dataset = target_dataset(root=args.target_root,
                                          transforms=train_transform,
                                          image_size=image_size,
                                          heatmap_size=heatmap_size)
    train_target_loader = DataLoader(train_target_dataset,
                                     batch_size=args.batch_size,
                                     shuffle=True,
                                     num_workers=args.workers,
                                     pin_memory=True,
                                     drop_last=True)
    val_target_dataset = target_dataset(root=args.target_root,
                                        split='test',
                                        transforms=val_transform,
                                        image_size=image_size,
                                        heatmap_size=heatmap_size)
    val_target_loader = DataLoader(val_target_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=False,
                                   pin_memory=True)

    print("Source train:", len(train_source_loader))
    print("Target train:", len(train_target_loader))
    print("Source test:", len(val_source_loader))
    print("Target test:", len(val_target_loader))

    train_source_iter = ForeverDataIterator(train_source_loader)
    train_target_iter = ForeverDataIterator(train_target_loader)

    # create model
    model = models.__dict__[args.arch](
        num_keypoints=train_source_dataset.num_keypoints).to(device)
    criterion = JointsMSELoss()

    # define optimizer and lr scheduler
    optimizer = Adam(model.get_parameters(lr=args.lr))
    lr_scheduler = MultiStepLR(optimizer, args.lr_step, args.lr_factor)

    # optionally resume from a checkpoint
    start_epoch = 0
    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        start_epoch = checkpoint['epoch'] + 1

    # define visualization function
    tensor_to_image = Compose([
        Denormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToPILImage()
    ])

    def visualize(image, keypoint2d, name):
        """
        Args:
            image (tensor): image in shape 3 x H x W
            keypoint2d (tensor): keypoints in shape K x 2
            name: name of the saving image
        """
        train_source_dataset.visualize(
            tensor_to_image(image), keypoint2d,
            logger.get_image_path("{}.jpg".format(name)))

    if args.phase == 'test':
        # evaluate on validation set
        source_val_acc = validate(val_source_loader, model, criterion, None,
                                  args)
        target_val_acc = validate(val_target_loader, model, criterion,
                                  visualize, args)
        print("Source: {:4.3f} Target: {:4.3f}".format(source_val_acc['all'],
                                                       target_val_acc['all']))
        for name, acc in target_val_acc.items():
            print("{}: {:4.3f}".format(name, acc))
        return

    # start training
    best_acc = 0
    for epoch in range(start_epoch, args.epochs):
        logger.set_epoch(epoch)
        lr_scheduler.step()

        # train for one epoch
        train(train_source_iter, train_target_iter, model, criterion,
              optimizer, epoch, visualize if args.debug else None, args)

        # evaluate on validation set
        source_val_acc = validate(val_source_loader, model, criterion, None,
                                  args)
        target_val_acc = validate(val_target_loader, model, criterion,
                                  visualize if args.debug else None, args)

        # remember best acc and save checkpoint
        torch.save(
            {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch,
                'args': args
            }, logger.get_checkpoint_path(epoch))
        if target_val_acc['all'] > best_acc:
            shutil.copy(logger.get_checkpoint_path(epoch),
                        logger.get_checkpoint_path('best'))
            best_acc = target_val_acc['all']
        print("Source: {:4.3f} Target: {:4.3f} Target(best): {:4.3f}".format(
            source_val_acc['all'], target_val_acc['all'], best_acc))
        for name, acc in target_val_acc.items():
            print("{}: {:4.3f}".format(name, acc))

    logger.close()