import argparse NUM_CHANNELS = 3 NUM_CLASSES = 22 color_transform = Colorize() image_transform = ToPILImage() input_transform = Compose([ CenterCrop(256), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop(256), ToLabel(), Relabel(255, 21), ]) parser = argparse.ArgumentParser( description='Single Shot MultiBox Detector Training With Pytorch') parser.add_argument('--datadir', required=False, default="/data_1/data/VOC2012/VOCdevkit/VOC2012") parser.add_argument('--epochs', default=50, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--save_interval', default=10, type=int, metavar='N', help='number of epochs to save the model') parser.add_argument('--batch_size', default=4, type=int, help='Batch size for training') parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument('--cuda', action='store_true', default=True)
def __call__(self, input, target): # do something to both images input = Scale(self.height, Image.BILINEAR)(input) target = Scale(self.height, Image.NEAREST)(target) if (self.augment): # Random hflip hflip = random.random() if (hflip < 0.5): input = input.transpose(Image.FLIP_LEFT_RIGHT) target = target.transpose(Image.FLIP_LEFT_RIGHT) degree = random.randint(-20, 20) input = input.rotate(degree, resample=Image.BILINEAR, expand=True) target = target.rotate(degree, resample=Image.NEAREST, expand=True) w, h = input.size nratio = random.uniform(0.5, 1.0) ni = random.randint(0, int(h - nratio * h)) nj = random.randint(0, int(w - nratio * w)) input = input.crop( (nj, ni, int(nj + nratio * w), int(ni + nratio * h))) target = target.crop( (nj, ni, int(nj + nratio * w), int(ni + nratio * h))) input = Resize((480, 640), Image.BILINEAR)(input) target = Resize((480, 640), Image.NEAREST)(target) brightness_factor = random.uniform(0.8, 1.2) contrast_factor = random.uniform(0.8, 1.2) saturation_factor = random.uniform(0.8, 1.2) #sharpness_factor=random.uniform(0.0,2.0) hue_factor = random.uniform(-0.2, 0.2) enhancer1 = ImageEnhance.Brightness(input) input = enhancer1.enhance(brightness_factor) enhancer2 = ImageEnhance.Contrast(input) input = enhancer2.enhance(contrast_factor) enhancer3 = ImageEnhance.Color(input) input = enhancer3.enhance(saturation_factor) #enhancer4=ImageEnhance.Sharpness(input) #input=enhancer4.enhance(sharpness_factor) input_mode = input.mode h, s, v = input.convert('HSV').split() np_h = np.array(h, dtype=np.uint8) with np.errstate(over='ignore'): np_h += np.uint8(hue_factor * 255) h = Image.fromarray(np_h, 'L') input = Image.merge('HSV', (h, s, v)).convert(input_mode) else: input = Resize((480, 640), Image.BILINEAR)(input) target = Resize((480, 640), Image.NEAREST)(target) input = ToTensor()(input) if (self.enc): target = Resize((60, 80), Image.NEAREST)(target) target = ToLabel()(target) target = Relabel(255, 27)(target) return input, target
from shutil import copyfile NUM_CHANNELS = 3 NUM_CLASSES = 28 color_transform = Colorize(NUM_CLASSES) image_transform = ToPILImage() input_transform = Compose([ CenterCrop(240), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop(240), ToLabel(), Relabel(255, 27), ]) #Augmentations - different function implemented to perform random augments on both image and target class MyCoTransform(object): def __init__(self, enc, augment=True, height=480): self.enc = enc self.augment = augment self.height = height pass def __call__(self, input, target): # do something to both images input = Scale(self.height, Image.BILINEAR)(input) target = Scale(self.height, Image.NEAREST)(target)
image_transform = ToPILImage() to_img=transforms.ToPILImage() input_transform = Compose([ RandomGrayscale(0.02), CenterCrop((512,512)), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop((512,512)), ToLabel(), Relabel(255, 22), ]) ) def train(args, model): model.train() weight = torch.ones(22) weight[0] = 0 if args.cuda: criterion = CrossEntropyLoss2d(weight.cuda())
def __call__(self, input, target): ## do something to both images # input = Resize((1086,1351), Image.BILINEAR)(input) # target = Resize((1086,1351),Image.NEAREST)(target) input = Resize((512, 1024), Image.BILINEAR)(input) target = Resize((512, 1024), Image.NEAREST)(target) # input = Resize((256,512), Image.BILINEAR)(input) # target = Resize((256,512),Image.NEAREST)(target) if (self.augment): rotation_degree = 1 shear_degree = 1 input = RandomAffine(rotation_degree, None, None, shear_degree, resample=Image.BILINEAR, fillcolor=0)(input) target = RandomAffine(rotation_degree, None, None, shear_degree, resample=Image.NEAREST, fillcolor=255)(target) w, h = input.size nratio = random.uniform(0.5, 1.0) ni = random.randint(0, int(h - nratio * h)) nj = random.randint(0, int(w - nratio * w)) input = input.crop( (nj, ni, int(nj + nratio * w), int(ni + nratio * h))) target = target.crop( (nj, ni, int(nj + nratio * w), int(ni + nratio * h))) # input=Resize((256,512),Image.BILINEAR)(input) # target=Resize((256,512),Image.NEAREST)(target) input = Resize((512, 1024), Image.BILINEAR)(input) target = Resize((512, 1024), Image.NEAREST)(target) brightness = 0.1 contrast = 0.1 saturation = 0.1 hue = 0.1 input = ColorJitter(brightness, contrast, saturation, hue)(input) hflip = random.random() if (hflip < 0.5): input = input.transpose(Image.FLIP_LEFT_RIGHT) target = target.transpose(Image.FLIP_LEFT_RIGHT) else: # input=Resize((256,512),Image.BILINEAR)(input) # target=Resize((256,512),Image.NEAREST)(target) input = Resize((512, 1024), Image.BILINEAR)(input) target = Resize((512, 1024), Image.NEAREST)(target) input = ToTensor()(input) if (self.enc): target = Resize((64, 128), Image.NEAREST)(target) target = ToLabel()(target) target = Relabel(255, 7)(target) # print(np.max(target)) return input, target
from shutil import copyfile NUM_CHANNELS = 3 NUM_CLASSES = 8 color_transform = Colorize(NUM_CLASSES) image_transform = ToPILImage() input_transform = Compose([ CenterCrop(240), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) target_transform = Compose([ CenterCrop(240), ToLabel(), Relabel(255, 7), ]) #Important Data Augmentations #Data Augmentations (Here Only Traditional Augmentations: Geometry + Texture) - different function implemented to perform random augments on both image and target class MyCoTransform(object): def __init__(self, enc, augment=True, height=512): self.enc = enc self.augment = augment self.height = height pass def __call__(self, input, target): ## do something to both images