if TEST_MODE:
    model.cuda()
    model.load_state_dict(torch.load('epochs/' + MODEL_NAME))
else:
    model.load_state_dict(
        torch.load('epochs/' + MODEL_NAME,
                   map_location=lambda storage, loc: storage))

dir = '/home/lucas/Lab/SRGAN/SRGAN/data/test2'
files = os.listdir(dir)
for file in files:
    image = Image.open(dir + '/' + file)
    print(image.size)
    print(image.size[0])
    min_edge = image.size[0] if image.size[0] < image.size[1] else image.size[1]
    image_lr = Scale(min_edge // 2, interpolation=Image.BICUBIC)(image)

    image = Variable(ToTensor()(image_lr), volatile=True).unsqueeze(0)
    if TEST_MODE:
        image = image.cuda()

    start = time.clock()
    out = model(image)
    elapsed = (time.clock() - start)
    print('cost' + str(elapsed) + 's')
    out_img = ToPILImage()(out[0].data.cpu())
    #out_img.save(dir + '/' + file[:-4] + '_generate' + str(UPSCALE_FACTOR) + '_init.jpg')
    #out_img = Scale(256, interpolation=Image.BICUBIC)(out_img)
    out_img.save(dir + '/' + file[:-4] + '_generate' + '.jpg')
    image_lr = Scale(min_edge * 2, interpolation=Image.BICUBIC)(image_lr)
    image_lr.save(dir + '/' + file[:-4] + '_init' + '.jpg')
Esempio n. 2
0
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, Normalize, Scale
from torchvision.transforms import ToTensor, ToPILImage

from dataset import cityscapes
from erfnet import ERFNet
from transform import Relabel, ToLabel, Colorize

import visdom

NUM_CHANNELS = 3
NUM_CLASSES = 20

image_transform = ToPILImage()
input_transform_cityscapes = Compose([
    Scale(512),
    ToTensor(),
    #Normalize([.485, .456, .406], [.229, .224, .225]),
])
target_transform_cityscapes = Compose([
    Scale(512),
    ToLabel(),
    Relabel(255, 19),  #ignore label to 19
])

cityscapes_trainIds2labelIds = Compose([
    Relabel(19, 255),
    Relabel(18, 33),
    Relabel(17, 32),
    Relabel(16, 31),
    Relabel(15, 28),
    parser_train.add_argument('--batch', type=int, default=1)
    parser_train.add_argument('--steps-loss', type=int, default=50)
    parser_train.add_argument('--steps-plot', type=int, default=0)
    parser_train.add_argument('--steps-save', type=int, default=500)
    parser_train.add_argument('--exp', default='default')

    return parser.parse_args()

NUM_CHANNELS = 3
NUM_CLASSES = 2

color_transform = Colorize()
image_transform = ToPILImage()

input_transform = Compose([
    Scale(256),
    CenterCrop(256),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])

target_transform = Compose([
    Scale(256),
    CenterCrop(256),
    ToLabel(),
    Relabel(255, 1)
])


def get_model():
    Net = FCN8
Esempio n. 4
0
    def __call__(self, input, target):
        # do something to both images
        input = Scale(self.height, Image.BILINEAR)(input)
        target = Scale(self.height, Image.NEAREST)(target)

        if (self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)

            degree = random.randint(-20, 20)
            input = input.rotate(degree, resample=Image.BILINEAR, expand=True)
            target = target.rotate(degree, resample=Image.NEAREST, expand=True)

            w, h = input.size
            nratio = random.uniform(0.5, 1.0)
            ni = random.randint(0, int(h - nratio * h))
            nj = random.randint(0, int(w - nratio * w))
            input = input.crop(
                (nj, ni, int(nj + nratio * w), int(ni + nratio * h)))
            target = target.crop(
                (nj, ni, int(nj + nratio * w), int(ni + nratio * h)))
            input = Resize((480, 640), Image.BILINEAR)(input)
            target = Resize((480, 640), Image.NEAREST)(target)

            brightness_factor = random.uniform(0.8, 1.2)
            contrast_factor = random.uniform(0.8, 1.2)
            saturation_factor = random.uniform(0.8, 1.2)
            #sharpness_factor=random.uniform(0.0,2.0)
            hue_factor = random.uniform(-0.2, 0.2)

            enhancer1 = ImageEnhance.Brightness(input)
            input = enhancer1.enhance(brightness_factor)

            enhancer2 = ImageEnhance.Contrast(input)
            input = enhancer2.enhance(contrast_factor)

            enhancer3 = ImageEnhance.Color(input)
            input = enhancer3.enhance(saturation_factor)

            #enhancer4=ImageEnhance.Sharpness(input)
            #input=enhancer4.enhance(sharpness_factor)

            input_mode = input.mode
            h, s, v = input.convert('HSV').split()
            np_h = np.array(h, dtype=np.uint8)
            with np.errstate(over='ignore'):
                np_h += np.uint8(hue_factor * 255)
            h = Image.fromarray(np_h, 'L')
            input = Image.merge('HSV', (h, s, v)).convert(input_mode)

        else:
            input = Resize((480, 640), Image.BILINEAR)(input)
            target = Resize((480, 640), Image.NEAREST)(target)

        input = ToTensor()(input)

        if (self.enc):
            target = Resize((60, 80), Image.NEAREST)(target)
        target = ToLabel()(target)
        target = Relabel(255, 27)(target)

        return input, target
Esempio n. 5
0
def display_transform():
    return Compose([ToPILImage(), Scale(400), CenterCrop(400), ToTensor()])
Esempio n. 6
0
def target_transform1(crop_size):
    return Compose([
        CenterCrop(crop_size),
        Scale((64, 64)),
        ToTensor(),
    ])
Esempio n. 7
0
from torchvision.transforms import Compose, Normalize, Scale, ToTensor

from PIL import Image

img = Image.open("../../Documents/2017_07/test/21094803716_da3cea21b8_o.jpg")

ready_image = Compose([
    Scale([224, 224]),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406],
              std=[0.229, 0.224, 0.225]),

])


Esempio n. 8
0
def input_transform(crop_size, upscale_factor):
    return Compose([
        CenterCrop(crop_size),
        Scale(crop_size // upscale_factor, interpolation=Image.BICUBIC)
    ])
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, Normalize, Scale
from torchvision.transforms import ToTensor, ToPILImage

from dataset import cityscapes
from erfnet import ERFNet
from transform import Relabel, ToLabel, Colorize


NUM_CHANNELS = 3
NUM_CLASSES = 20

image_transform = ToPILImage()
input_transform_cityscapes = Compose([
    Scale(512),
    ToTensor(),
    #Normalize([.485, .456, .406], [.229, .224, .225]),
])
target_transform_cityscapes = Compose([
    Scale(512),
    ToLabel(),
    Relabel(255, 19),   #ignore label to 19
])

cityscapes_trainIds2labelIds = Compose([
    Relabel(19, 255),  
    Relabel(18, 33),
    Relabel(17, 32),
    Relabel(16, 31),
    Relabel(15, 28),
Esempio n. 10
0
def input_transform(crop_size, scale):

    return Compose([
        CenterCrop(crop_size),
        Scale(crop_size // scale, interpolation=Image.BICUBIC)
    ])
Esempio n. 11
0
def test(args):

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    testdata = data_loader(data_path,
                           split="val",
                           is_transform=False,
                           img_size=(512, 512))
    n_classes = testdata.n_classes
    eps = 1e-10

    # (TODO): Choose the scale according to dataset requirements
    scales = [0.5, 0.75, 1.0, 1.25]
    base_size = min(testdata.img_size)
    crop_size = (args.img_rows, args.img_cols)
    stride = [0, 0]
    stride[0] = int(np.ceil(float(crop_size[0]) * 2 / 3))
    stride[1] = int(np.ceil(float(crop_size[1]) * 2 / 3))
    size_transform_img = [Scale(int(base_size * i)) for i in scales]

    mask1_len = np.zeros(n_classes, dtype=float)
    mask2_len = np.zeros(n_classes, dtype=float)
    correct_len = np.zeros(n_classes, dtype=float)

    # Setup Model
    model = torch.nn.DataParallel(
        get_model(args.arch,
                  n_classes,
                  ignore_index=testdata.ignore_index,
                  output_stride=args.ost))
    model_name = args.model_path.split('.')
    checkpoint_name = model_name[0] + '_optimizer.pkl'
    checkpoint = torch.load(checkpoint_name)
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    soft = nn.Softmax2d()
    cm = np.zeros((n_classes, n_classes), dtype=np.float64)
    if torch.cuda.is_available():
        model.cuda()
        soft.cuda()

    for f_no, line in enumerate(testdata.files):
        imgr, lblr = testdata.readfile(line)
        lbl = np.array(lblr)
        origw, origh = imgr.size

        # Maintain final prediction array for each image
        pred = np.zeros((n_classes, origh, origw), dtype=np.float32)

        # Loop over all scales for single image
        for i in range(len(scales)):
            img = size_transform_img[i](imgr)
            imsw, imsh = img.size

            imwstart, imhstart = 0, 0
            imw, imh = imsw, imsh
            # Zero padding if any size if smaller than crop_size
            if imsw < crop_size[1] or imsh < crop_size[0]:
                padw, padh = max(crop_size[1] - imsw,
                                 0), max(crop_size[0] - imsh, 0)
                imw += padw
                imh += padh
                im = Image.new(img.mode, (imw, imh), tuple(testdata.filler))
                im.paste(img, (int(padw / 2), int(padh / 2)))
                imwstart += int(padw / 2)
                imhstart += int(padh / 2)
                img = im

            # Now tile image - each of crop_size and loop over them
            h_grid = int(np.ceil(float(imh - crop_size[0]) / stride[0])) + 1
            w_grid = int(np.ceil(float(imw - crop_size[1]) / stride[1])) + 1

            # maintain prediction probability for each pixel
            datascale = torch.zeros(n_classes, imh, imw).cuda()
            countscale = torch.zeros(n_classes, imh, imw).cuda()
            for w in range(w_grid):
                for h in range(h_grid):
                    # crop portion from image - crop_size
                    x1, y1 = w * stride[1], h * stride[0]
                    x2, y2 = int(min(x1 + crop_size[1],
                                     imw)), int(min(y1 + crop_size[0], imh))
                    x1, y1 = x2 - crop_size[1], y2 - crop_size[0]
                    img_cropped = img.crop((x1, y1, x2, y2))

                    # Input image as well its flipped version
                    img1 = testdata.image_transform(img_cropped)
                    img2 = testdata.image_transform(
                        img_cropped.transpose(Image.FLIP_LEFT_RIGHT))
                    images = torch.stack((img1, img2), dim=0)

                    if torch.cuda.is_available():
                        images = Variable(images.cuda(), volatile=True)
                    else:
                        images = Variable(images, volatile=True)

                    # Output prediction for image and its flip version
                    outputs = model(images)

                    # Sum prediction from image and its flip and then normalize
                    prob = outputs[0] + outputs[
                        1][:, :,
                           getattr(torch.arange(outputs.size(3) -
                                                1, -1, -1), 'cuda')().long()]
                    prob = soft(prob.view(-1, *prob.size()))

                    # Place the score in the proper position
                    datascale[:, y1:y2, x1:x2] += prob.data
                    countscale[:, y1:y2, x1:x2] += 1
            # After looping over all tiles of image, normalize the scores and bilinear interpolation to orignal image size
            datascale /= (countscale + eps)
            datascale = datascale[:, imhstart:imhstart + imsh,
                                  imwstart:imwstart + imsw]
            datascale = datascale.cpu().numpy()
            datascale = np.transpose(datascale, (1, 2, 0))
            datascale = resize(datascale, (origh, origw),
                               order=1,
                               preserve_range=True,
                               mode='symmetric',
                               clip=False)
            datascale = np.transpose(datascale, (2, 0, 1))

            # Sum up all the scores for all scales
            pred += (datascale / (np.sum(datascale, axis=0) + eps))

        pred = pred / len(scales)
        pred = pred.argmax(0)

        pred[lbl == testdata.ignore_index] = testdata.ignore_index

        for m in range(n_classes):
            mask1 = lbl == m
            mask2 = pred == m
            diff = pred[mask1] - lbl[mask1]
            mask1_len[m] += float(np.sum(mask1))
            mask2_len[m] += float(np.sum(mask2))
            correct_len[m] += np.sum(diff == 0)

        cm += confusion_matrix(lbl.ravel(),
                               pred.ravel(),
                               labels=range(n_classes))
        indexes_to_avg = mask1_len > 0
        print("pixel accuracy")
        print(
            np.sum(correct_len[indexes_to_avg]) /
            np.sum(mask1_len[indexes_to_avg]))
        print("Class_wise_IOU")
        print(correct_len[indexes_to_avg] /
              (mask1_len[indexes_to_avg] + mask2_len[indexes_to_avg] -
               correct_len[indexes_to_avg]))
        print("mean IOU")
        print(
            np.mean(correct_len[indexes_to_avg] /
                    (mask1_len[indexes_to_avg] + mask2_len[indexes_to_avg] -
                     correct_len[indexes_to_avg])))
        print("mean accuracy")
        print(np.mean(correct_len[indexes_to_avg] / mask1_len[indexes_to_avg]))

        decoded = testdata.decode_segmap(pred)
        pickle.dump(
            np.transpose(np.array(imgr, dtype=np.uint8), [2, 0, 1]),
            open("results/saved_test_images/" + str(f_no) + "_input.p", "wb"))
        pickle.dump(
            np.transpose(decoded, [2, 0, 1]),
            open("results/saved_test_images/" + str(f_no) + "_output.p", "wb"))
        pickle.dump(
            np.transpose(testdata.decode_segmap(lbl), [2, 0, 1]),
            open("results/saved_test_images/" + str(f_no) + "_target.p", "wb"))

    sio.savemat("results/cm.mat", {'cm': cm})
Esempio n. 12
0
class ConvolutionalCaptionPredictor(Predictor):
    ts = transforms.Compose([
        Scale([224, 224]),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    def __init__(self, beam_size: int = 20):
        self.max_tokens = 15
        self.beam_size = beam_size
        num_layers = 3
        worddict_tmp = pickle.load(
            open(FilePathManager.resolve('image_to_text/data/wordlist.p'),
                 'rb'))
        wordlist = [l for l in iter(list(worddict_tmp.keys())) if l != '</S>']
        wordlist = ['EOS'] + sorted(wordlist)
        numwords = len(wordlist)
        self.wordlist = wordlist

        model_imgcnn = Vgg16Feats()
        model_imgcnn.cuda()

        model_convcap = Convcap(numwords, num_layers, is_attention=True)
        model_convcap.cuda()
        checkpoint = torch.load(
            FilePathManager.resolve(
                "image_to_text/models/convolutional_caption-model.pth"))
        model_convcap.load_state_dict(checkpoint['state_dict'])
        model_imgcnn.load_state_dict(checkpoint['img_state_dict'])

        model_imgcnn.train(False)
        model_convcap.train(False)
        self.model_imgcnn = model_imgcnn
        self.model_convcap = model_convcap

    @staticmethod
    def convert_image(image):
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = ConvolutionalCaptionPredictor.ts(image)
        image = image.unsqueeze(0)
        return image

    def predict(self, image):
        image = self.convert_image(image)
        img_v = Variable(image.cuda())
        imgfeats, imgfc7 = self.model_imgcnn(img_v)

        b, f_dim, f_h, f_w = imgfeats.size()
        imgfeats = imgfeats.unsqueeze(1).expand(b, self.beam_size, f_dim, f_h,
                                                f_w)
        imgfeats = imgfeats.contiguous().view(b * self.beam_size, f_dim, f_h,
                                              f_w)

        b, f_dim = imgfc7.size()
        imgfc7 = imgfc7.unsqueeze(1).expand(b, self.beam_size, f_dim)
        imgfc7 = imgfc7.contiguous().view(b * self.beam_size, f_dim)

        beam_searcher = beamsearch(self.beam_size, 1, self.max_tokens)

        wordclass_feed = np.zeros((self.beam_size * 1, self.max_tokens),
                                  dtype='int64')
        wordclass_feed[:, 0] = self.wordlist.index('<S>')
        outcaps = np.empty((1, 0)).tolist()
        for j in range(self.max_tokens - 1):
            wordclass = Variable(torch.from_numpy(wordclass_feed)).cuda()
            wordact, attn = self.model_convcap(imgfeats, imgfc7, wordclass)
            wordact = wordact[:, :, :-1]
            wordact_j = wordact[..., j]

            beam_indices, wordclass_indices = beam_searcher.expand_beam(
                wordact_j)

            if len(beam_indices) == 0 or j == (self.max_tokens - 2):
                generated_captions = beam_searcher.get_results()
                for k in range(1):
                    g = generated_captions[:, k]
                    outcaps[k] = [self.wordlist[x] for x in g]
            else:
                wordclass_feed = wordclass_feed[beam_indices]
                imgfc7 = imgfc7.index_select(
                    0, Variable(torch.cuda.LongTensor(beam_indices)))
                imgfeats = imgfeats.index_select(
                    0, Variable(torch.cuda.LongTensor(beam_indices)))
                for i, wordclass_idx in enumerate(wordclass_indices):
                    wordclass_feed[i, j + 1] = wordclass_idx
        outcap = outcaps[0]
        num_words = len(outcap)
        if 'EOS' in outcap:
            num_words = outcap.index('EOS')
        outcap = outcap[:num_words]
        attn = attn[0, :num_words].cpu()
        return outcap, attn
Esempio n. 13
0
from torchvision.transforms import Compose, CenterCrop, Normalize,Scale
from torchvision.transforms import ToTensor, ToPILImage

from transform import ToLabel
from net import NetS

import time
import cv2
EXTENSIONS = ['.jpg', '.png']

NUM_CHANNELS = 1
NUM_CLASSES = 2


input_transform = Compose([
    Scale((224,224)),
    ToTensor(),
])
target_transform = Compose([
    Scale((224,224)),
    ToLabel(),
])

def infer(model):
   # print 'ok'
    label_np=array(Image.open('./test/Labels/333.png'))
    #label_np = cv2.cvtColor(label_np, cv2.COLOR_BGR2GRAY)
    img=Image.open('./test/Images/333.png')
    img_n=array(img)
    
    img_np=np.resize(img_n,(400,400,3))
Esempio n. 14
0
def LR_transform(crop_size):
    return Compose([
        Scale(crop_size//8),
        ToTensor(),
    ])
Esempio n. 15
0
class ToLabel:
    def __call__(self, image):
        return torch.from_numpy(np.array(image)).long().unsqueeze(0)

class Relabel:
    def __init__(self, olabel, nlabel):
        self.olabel = olabel
        self.nlabel = nlabel
    def __call__(self, tensor):
        assert isinstance(tensor, torch.LongTensor), 'tensor type shoule be long'
        tensor[(tensor <= 10)] = 0
        tensor[tensor>10] = self.nlabel
        return tensor

image_transform = Compose([
    Scale(128),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])

label_transform = Compose([
    Scale(128),
    ToLabel(),
    Relabel(255, 1),
])

class TGSDS(Dataset):
    def __init__(self, root, input_trans=None, target_trans=None):
        self.root = root
        self.root_images = os.path.join(self.root, 'images')
        self.root_masks = os.path.join(self.root, 'masks')
Esempio n. 16
0
def HR_4_transform(crop_size):
    return Compose([
        Scale(crop_size//2),
        ToTensor(),
    ])
Esempio n. 17
0
def transform_input(crop_size, upscale_factor):
    """LR of target image
    """
    return Compose([
        Scale(crop_size // upscale_factor),
    ])
Esempio n. 18
0
def train_lr_transform(crop_size, upscale_factor):
    return Compose([
        ToPILImage(),
        Scale(crop_size // upscale_factor, interpolation=Image.BICUBIC),
    ])
Esempio n. 19
0
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, Normalize, Scale
from torchvision.transforms import ToTensor, ToPILImage

from dataset import cityscapes
from erfnet import ERFNet
from transform import Relabel, ToLabel, Colorize
from iouEval import iouEval, getColorEntry

NUM_CHANNELS = 3
NUM_CLASSES = 20

image_transform = ToPILImage()
input_transform_cityscapes = Compose([
    Scale(512, Image.BILINEAR),
    ToTensor(),
])
target_transform_cityscapes = Compose([
    Scale(512, Image.NEAREST),
    ToLabel(),
    Relabel(255, 19),  #ignore label to 19
])


def main(args):

    modelpath = args.loadDir + args.loadModel
    weightspath = args.loadDir + args.loadWeights

    print("Loading model: " + modelpath)
Esempio n. 20
0
    parser_train.add_argument('--steps-loss', type=int, default=50)
    parser_train.add_argument('--steps-plot', type=int, default=0)
    parser_train.add_argument('--steps-save', type=int, default=500)
    parser_train.add_argument('--exp', default='default')

    return parser.parse_args()


NUM_CHANNELS = 3
NUM_CLASSES = 2

color_transform = Colorize()
image_transform = ToPILImage()

input_transform = Compose([
    Scale(256),
    CenterCrop(256),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225]),
])

target_transform = Compose(
    [Scale(256), CenterCrop(256),
     ToLabel(), Relabel(255, 1)])


def get_model():
    Net = FCN8
    model = Net(NUM_CLASSES, './vgg_16.pth')
    return model
Esempio n. 21
0
'''
Net_G = Generator(depth=128)
Net_D = Discriminator(depth=128)
Net_G.weight_init(mean=0.0, std=0.02)
Net_D.weight_init(mean=0.0, std=0.02)

Net_G = DataParallel(Net_G)
Net_D = DataParallel(Net_D)
if GPU_NUMS > 1:
    Net_G.cuda()
    Net_D.cuda()
'''
读入数据并进行预处理
'''
transform = Compose([
    Scale(IMG_SIZE),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])
train_loader = torch.utils.data.DataLoader(
    # MNIST('data', train=True, download=True, transform=transform),
    MNISTDataSet('../ganData/mnist.npz', train=True, transform=transform),
    batch_size=BATCH_SIZE,
    shuffle=True)
'''
开始训练
'''
BCE_loss = BCELoss()
G_optimizer = Adam(Net_G.parameters(), lr=LR, betas=(0.5, 0.999))
D_optimizer = Adam(Net_D.parameters(), lr=LR, betas=(0.5, 0.999))
bar = ProgressBar(EPOCHS, len(train_loader), "D Loss:%.3f; G Loss:%.3f")
Esempio n. 22
0
def target_transform2(crop_size, upscale_factor):
    crop_size = calculate_valid_crop_size(crop_size, upscale_factor)
    return Compose([
        Scale((crop_size, crop_size)),
        ToTensor(),
    ])
Esempio n. 23
0
    def __call__(self, input, target):
        # do something to both images
        input = Scale(self.height, Image.BILINEAR)(input)
        target = Scale(self.height, Image.NEAREST)(target)

        if (self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)

            #Random translation 0-2 pixels (fill rest with padding
            transX = random.randint(-2, 2)
            transY = random.randint(-2, 2)

            input = ImageOps.expand(input,
                                    border=(transX, transY, 0, 0),
                                    fill=0)
            target = ImageOps.expand(target,
                                     border=(transX, transY, 0, 0),
                                     fill=255)  #pad label filling with 255
            input = input.crop(
                (0, 0, input.size[0] - transX, input.size[1] - transY))
            target = target.crop(
                (0, 0, target.size[0] - transX, target.size[1] - transY))

        input = ToTensor()(input)
        if (self.enc):
            target = Scale(int(self.height / 8), Image.NEAREST)(target)
        target = ToLabel()(target)
        target = Relabel(1, 1)(target)
        target = Relabel(2, 2)(target)
        target = Relabel(3, 255)(target)
        target = Relabel(4, 255)(target)
        target = Relabel(5, 255)(target)
        target = Relabel(6, 255)(target)
        target = Relabel(7, 255)(target)
        target = Relabel(8, 3)(target)
        target = Relabel(9, 255)(target)
        target = Relabel(10, 4)(target)
        target = Relabel(11, 255)(target)
        target = Relabel(12, 255)(target)
        target = Relabel(13, 5)(target)
        target = Relabel(14, 255)(target)
        target = Relabel(15, 255)(target)
        target = Relabel(16, 255)(target)
        target = Relabel(17, 255)(target)
        target = Relabel(18, 255)(target)
        target = Relabel(19, 255)(target)

        target = Relabel(255, 6)(target)

        return input, target
Esempio n. 24
0
def demo_transform1(crop_size, upscale_factor):
    return Compose([
        CenterCrop(crop_size),
        Scale(crop_size // upscale_factor),
        ToTensor(),
    ])
Esempio n. 25
0
    def __call__(self, input, target):
        # do something to both images
        input =  Scale(self.height, Image.BILINEAR)(input)
        target = Scale(self.height, Image.NEAREST)(target)

        if(self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)

        input = ToTensor()(input)
        
        target = ToLabel()(target)
        
        for iter in range(1,19):
            target = Relabel(iter, 255)(target)
        
        target = Relabel(19, 1)(target) 
        target = Relabel(20, 2)(target)
        target = Relabel(21, 3)(target)
        target = Relabel(22, 4)(target)
        target = Relabel(23, 5)(target)
        target = Relabel(24, 6)(target)
        target = Relabel(25, 7)(target)
        target = Relabel(26, 8)(target)
        target = Relabel(27, 9)(target) 
        
        return input, target
Esempio n. 26
0
    def __call__(self, input, target):
        # do something to both images
        input = Scale(self.height, Image.BILINEAR)(input)
        target = Scale(self.height, Image.NEAREST)(target)

        if (self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)

            #Random translation 0-2 pixels (fill rest with padding
            transX = random.randint(-2, 2)
            transY = random.randint(-2, 2)

            input = ImageOps.expand(input,
                                    border=(transX, transY, 0, 0),
                                    fill=0)
            target = ImageOps.expand(target,
                                     border=(transX, transY, 0, 0),
                                     fill=255)  #pad label filling with 255
            input = input.crop(
                (0, 0, input.size[0] - transX, input.size[1] - transY))
            target = target.crop(
                (0, 0, target.size[0] - transX, target.size[1] - transY))

            #TODO future: additional augments
            #CenterCrop(256)
            #Normalize([.485, .456, .406], [.229, .224, .225]),

        input = ToTensor()(input)
        if (self.enc):
            target = Scale(int(self.height / 8), Image.NEAREST)(target)
        target = ToLabel()(target)
        target = Relabel(255, 19)(target)

        return input, target
Esempio n. 27
0

face_dataset = FaceLandmarksDataset(csv_file='faces/face_landmarks.csv',
                                    root_dir='faces/')

fig = plt.figure()

# for i,sample in enumerate(face_dataset):
# 	ax = plt.subplot(2, 2, i + 1)
# 	plt.tight_layout()
# 	ax.set_title('Sample #{}'.format(i))
# 	show_landmarks(**sample)
# 	if i==3:
# 		plt.show()
# 		break
from torchvision.transforms import Scale, RandomCrop, ToTensor

transformed_dataset = FaceLandmarksDataset(
    csv_file='faces/face_landmarks.csv',
    root_dir='faces/',
    transform=transforms.Compose([Scale(size=256),
                                  RandomCrop(size=224)]))
for i, sample in enumerate(transformed_dataset):
    ax = plt.subplot(2, 2, i + 1)
    plt.tight_layout()
    ax.set_title('Sample #{}'.format(i))
    show_landmarks(**sample)
    if i == 3:
        plt.show()
        break