def rotate_horizontal(self, path, export_path, rot_range_start, rot_range_end): image_shape = None it = ImageTransformer(path, image_shape) for ang in range(135, 225, 20): rotated_img = it.rotate_along_axis(phi=ang) rotated_img = self.center_crop(rotated_img, 128, 128) self.save_image(path, export_path, rotated_img, "horizontal", ang)
def rotate_horizontal_vertical(self, path, export_path, rot_range_start, rot_range_end): image_shape = None it = ImageTransformer(path, image_shape) for ang in range(150, 210, 15): rotated_img = it.rotate_along_axis(phi=ang, gamma=ang) rotated_img = self.center_crop(rotated_img, 128, 128) self.save_image(path, export_path, rotated_img, "vertical-horizontal", ang)
def perspectiveTransform(self, maxXangle, maxYangle, maxZangle, bgColor=255): if (self.modifiedFlag == 1): it = ImageTransformer(self.modifiedImg, (self.height, self.width)) else: it = ImageTransformer(self.image, (self.height, self.width)) self.modifiedFlag = 1 angX = np.random.uniform(-maxXangle, maxXangle) angY = np.random.uniform(-maxYangle, maxYangle) angZ = np.random.uniform(-maxZangle, maxZangle) self.modifiedImg = it.rotate_along_axis(theta=angX, phi=angY, gamma=angZ, dx=25, dy=-25, dz=0, bgColor=bgColor) #cv.imshow("modified",self.modifiedImg) #cv.waitKey(1000) self.maskImage = cv.inRange(self.modifiedImg, self.lower, self.upper) return angX, angY, angZ
# # Output: # image : the rotated image # Input image path img_path = sys.argv[1] # Rotation range rot_range = 360 if len(sys.argv) <= 2 else int(sys.argv[2]) # Ideal image shape (w, h) img_shape = None if len(sys.argv) <= 4 else (int(sys.argv[3]), int(sys.argv[4])) # Instantiate the class it = ImageTransformer(img_path, img_shape) # Make output dir if not os.path.isdir('output'): os.mkdir('output') # Iterate through rotation range for ang in xrange(0, rot_range): # NOTE: Here we can change which angle, axis, shift """ Example of rotating an image along y-axis from 0 to 360 degree with a 5 pixel shift in +X direction """ rotated_img = it.rotate_along_axis(phi = ang, dx = 5) """ Example of rotating an image along yz-axis from 0 to 360 degree """
def main(): args, config = parse_args_and_config() tb_logger = tensorboardX.SummaryWriter( log_dir=os.path.join('transformer_logs', args.doc)) if config.model.distr == "dmol": # Scale size and rescale data to [-1, 1] transform = transforms.Compose([ transforms.Resize(config.model.image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) else: transform = transforms.Compose([ transforms.Resize(config.model.image_size), transforms.ToTensor() ]) if args.img64 is None: dataset = datasets.CIFAR10('datasets/transformer', transform=transform, download=True) loader = DataLoader(dataset, batch_size=config.train.batch_size, shuffle=True, num_workers=4) input_dim = config.model.image_size**2 * config.model.channels model = ImageReformer(config.model).to( config.device) if args.reformer else ImageTransformer( config.model).to(config.device) optimizer = optim.Adam(model.parameters(), lr=1., betas=(0.9, 0.98), eps=1e-9) scheduler = optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: get_lr(step, config)) else: train_dir = args.img64 / 'train' val_dir = args.img64 / 'val' transform = transforms.Compose([ transforms.Resize(config.model.image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # dataset = datasets.ImageFolder(train_dir, transform=transform) # loader = DataLoader(dataset, batch_size=config.train.batch_size, shuffle=True, num_workers=4) dataset = Imagenet64('../data/train_64x64/', transform=transform) loader = DataLoader(dataset, batch_size=config.train.batch_size, shuffle=True, num_workers=4) input_dim = config.model.image_size**2 * config.model.channels model = ImageReformer(config.model).to( config.device) if args.reformer else ImageTransformer( config.model).to(config.device) optimizer = optim.Adam(model.parameters(), lr=1., betas=(0.9, 0.98), eps=1e-9, weight_decay=0.1) scheduler = optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: get_lr(step, config)) # Initialize as in their code gain = config.model.initializer_gain for name, p in model.named_parameters(): if "layernorm" in name: continue # This is from a pytorch implementation of the language transformer, but is not needed/in TF code. # if "attn" in name and "output" not in name: # nn.init.xavier_normal_(p) if p.dim() > 1: # Need sqrt for inconsistency between pytorch / TF nn.init.xavier_uniform_(p, gain=np.sqrt(gain)) else: a = np.sqrt(3. * gain / p.shape[0]) nn.init.uniform_(p, -a, a) # Accumulate data statistics for debugging purposes, e.g. to analyze the entropy of the first dimension # data_avgs = torch.zeros(config.model.channels, config.model.image_size, config.model.image_size, 256) # for i, (imgs, l) in tqdm(enumerate(loader)): # one_hot_data = torch.zeros(imgs.shape + (256,)).scatter_(-1, (imgs * 255).long().unsqueeze(-1), 1) # data_avgs += one_hot_data.mean(0) # data_avgs /= i def revert_samples(input): if config.model.distr == "cat": return input elif config.model.distr == "dmol": return input * 0.5 + 0.5 step = 0 losses_per_dim = torch.zeros(config.model.channels, config.model.image_size, config.model.image_size).to(config.device) for _ in range(config.train.epochs): for _, imgs in enumerate(loader): # for _, (imgs, l) in enumerate(loader): imgs = imgs.to(config.device) model.train() optimizer.zero_grad() preds = model(imgs) loss = model.loss(preds, imgs) decay = 0. if step == 0 else 0.99 if config.model.distr == "dmol": losses_per_dim[0, :, :] = losses_per_dim[0, :, :] * \ decay + (1 - decay) * loss.detach().mean(0) / np.log(2) else: losses_per_dim = losses_per_dim * decay + \ (1 - decay) * loss.detach().mean(0) / np.log(2) loss = loss.view(loss.shape[0], -1).sum(1) loss = loss.mean(0) # Show computational graph # dot = make_dot(loss, dict(model.named_parameters())) # dot.render('test.gv', view=True) loss.backward() total_norm = 0 for p in model.parameters(): param_norm = p.grad.data.norm(2) total_norm += param_norm.item()**2 total_norm = (total_norm**(1. / 2)) if config.train.clip_grad_norm > 0.0: nn.utils.clip_grad_norm_(model.parameters(), config.train.clip_grad_norm) total_norm_post = 0 for p in model.parameters(): param_norm = p.grad.data.norm(2) total_norm_post += param_norm.item()**2 total_norm_post = (total_norm_post**(1. / 2)) torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() scheduler.step() bits_per_dim = loss / (np.log(2.) * input_dim) acc = model.accuracy(preds, imgs) if step % config.train.log_iter == 0: logging.info( 'step: {}; loss: {:.3f}; bits_per_dim: {:.3f}, acc: {:.3f}, grad norm pre: {:.3f}, post: {:.3f}' .format(step, loss.item(), bits_per_dim.item(), acc.item(), total_norm, total_norm_post)) tb_logger.add_scalar('loss', loss.item(), global_step=step) tb_logger.add_scalar('bits_per_dim', bits_per_dim.item(), global_step=step) tb_logger.add_scalar('acc', acc.item(), global_step=step) tb_logger.add_scalar('grad_norm', total_norm, global_step=step) if step % config.train.sample_iter == 0: logging.info("Sampling from model: {}".format(args.doc)) if config.model.distr == "cat": channels = ['r', 'g', 'b'] color_codes = ['Reds', "Greens", 'Blues'] for idx, c in enumerate(channels): ax = sns.heatmap( losses_per_dim[idx, :, :].cpu().numpy(), linewidth=0.5, cmap=color_codes[idx]) tb_logger.add_figure("losses_per_dim/{}".format(c), ax.get_figure(), close=True, global_step=step) else: ax = sns.heatmap(losses_per_dim[0, :, :].cpu().numpy(), linewidth=0.5, cmap='Blues') tb_logger.add_figure("losses_per_dim", ax.get_figure(), close=True, global_step=step) model.eval() with torch.no_grad(): imgs = revert_samples(imgs) imgs_grid = torchvision.utils.make_grid(imgs[:8, ...], 3) tb_logger.add_image('imgs', imgs_grid, global_step=step) # Evaluate model predictions for the input pred_samples = revert_samples( model.sample_from_preds(preds)) pred_samples_grid = torchvision.utils.make_grid( pred_samples[:8, ...], 3) tb_logger.add_image('pred_samples/random', pred_samples_grid, global_step=step) pred_samples = revert_samples( model.sample_from_preds(preds, argmax=True)) pred_samples_grid = torchvision.utils.make_grid( pred_samples[:8, ...], 3) tb_logger.add_image('pred_samples/argmax', pred_samples_grid, global_step=step) if args.sample: samples = revert_samples( model.sample(config.train.sample_size, config.device)) samples_grid = torchvision.utils.make_grid( samples[:8, ...], 3) tb_logger.add_image('samples', samples_grid, global_step=step) # Argmax samples are not useful for unconditional generation # if config.model.distr == "cat": # argmax_samples = model.sample(1, config.device, argmax=True) # samples_grid = torchvision.utils.make_grid(argmax_samples[:8, ...], 3) # tb_logger.add_image('argmax_samples', samples_grid, global_step=step) torch.save( model.state_dict(), os.path.join('transformer_logs', args.doc, "model.pth")) step += 1 return 0
def generateComposite(composite_count): iterator = 0 while iterator < composite_count: background = cv2.imread(get_random_background(), cv2.IMREAD_UNCHANGED) background = background[200:915, 100:1800, :] rand_sign, index = get_random_sign() sign = cv2.imread(rand_sign, cv2.IMREAD_UNCHANGED) height, width, _ = background.shape # sign = cut_the_empty_bounding(sign) resizedImage = resize_sign(sign) augmented_img = resizedImage if random.random() > 0.90: augmented_img = augment_brightness_camera_images(augmented_img) if random.random() > 0.90: augmented_img = add_random_shadow(augmented_img) if random.random() > 0.90: augmented_img = overlap(augmented_img) if random.random() > 0.90: augmented_img = blend(augmented_img) h, w, c = augmented_img.shape if c == 4: it = ImageTransformer(augmented_img, (h, w)) theta = np.random.normal(0, 10) if theta < -20 or theta > 20: theta = np.random.normal(0, 1) phi = np.random.normal(0, 30) if phi < -45 or phi > 45: phi = np.random.normal(0, 1) gamma = np.random.normal(0, 3) if gamma < -20 or gamma > 20: gamma = np.random.normal(0, 1) rotated = it.rotate_along_axis(theta=theta, phi=phi, gamma=gamma, dx=h / 2, dy=w / 2) # 3D rotate function somehow resize the image(swap the width and height), I don't know how to fix # in the matrix, so I have to resize it back here rotated = cv2.resize(rotated, (w, h)) rotated = cut_the_empty_bounding(rotated) rotated = cv2.cvtColor(rotated, cv2.COLOR_RGB2RGBA) rotated_h, rotated_w, _ = rotated.shape # Let's put the sign on x, y position # Extremely inconsistent results with x and y offset ?????? (spent lots of time debugging) x_start_max = int(width) - rotated_w y_start_max = int(height) - rotated_h x_start = random.randint(0, x_start_max) y_start = random.randint(0, y_start_max) for x in range(rotated_w): for y in range(rotated_h): if rotated[y][x][3] > 50: for i in range(3): background[y + y_start][x + x_start][i] = rotated[y][x][i] cv2.imwrite(output_path + "/" + "Composite" + str(iterator) + ".png", background) XML_data = XMLPackage(rand_sign.replace('\\', '/'), "Composite" + str(iterator) + ".png", "Unknown", width, height, "4", "0", sign_list[index][0], "Unspecified", "0", "0", int(x_start), int(y_start), int(x_start + rotated_w), int(y_start + rotated_h)) generate_XML_File((output_path + "/" + "Composite" + str(iterator) + ".xml"), XML_data) iterator = iterator + 1 print(iterator) else: print(rand_sign)
parser.add_argument('-j', help='make GIF of output', action='store_true') parser.add_argument('--bg', metavar='bg', type=rgb_tuple, help='background color', default=(0, 0, 0)) options = parser.parse_args() if options.image_path is None: raise ValueError('Image not found, check image path') filename, file_extension = os.path.splitext(options.image_path) it = ImageTransformer(options.image_path, height=options.height, width=options.width) if not os.path.isdir('output'): os.mkdir('output') if options.mode == 'single': rotated_img = it.rotate_along_axis(bg=options.bg, theta=options.theta, phi=options.phi, gamma=options.gamma, dx=options.dx, dy=options.dy, dz=options.dz) save_image(f'{options.output}{file_extension}', rotated_img)
def generateComposite(composite_count): iterator = 0 while iterator < composite_count: background = cv2.imread(get_random_background(), cv2.IMREAD_UNCHANGED) background = background[200:915, 100:1800, :] height, width, _ = background.shape rand_sign, sign_index = get_random_sign() # Each auto-generated image may have multiple signs in it name = [] xmin = [] ymin = [] xmax = [] ymax = [] print(iterator) # For speed limit sign, here 60% of them we add speed limit word, (80% word on top, 20% word below) # 40% add mph word(80% below, 20% right) if re.match(r'\d\d\smph', sign_list[sign_index][0]): sign = cv2.imread(rand_sign, cv2.IMREAD_UNCHANGED) resizedImage = resize_sign(sign) if random.random() >= 0.40: speed_limit, concat_index = get_random_speed_limit() speed_limit = cv2.imread(speed_limit, cv2.IMREAD_UNCHANGED) resized_speed_limit = resize_sign(speed_limit) augmented_concat = resized_speed_limit concate_type = 'speed_limit' else: mph, concat_index = get_random_mph() mph = cv2.imread(mph, cv2.IMREAD_UNCHANGED) resized_speed_limit = resize_sign(mph) augmented_concat = resized_speed_limit concate_type = 'mph' # TODO: we can add km/h and so on augmented_img = resizedImage augmented_img = cv2.cvtColor(augmented_img, cv2.COLOR_RGB2RGBA) augmented_concat = cv2.cvtColor(augmented_concat, cv2.COLOR_RGB2RGBA) if random.random() > 0.90: img_list = augment_brightness_camera_images( [augmented_img, augmented_concat]) augmented_img = img_list[0] augmented_concat = img_list[1] if random.random() > 0.90: img_list = add_random_shadow([augmented_img, augmented_concat]) augmented_img = img_list[0] augmented_concat = img_list[1] if random.random() > 0.90: img_list = overlap([augmented_img, augmented_concat]) augmented_img = img_list[0] augmented_concat = img_list[1] if random.random() > 0.90: img_list = blend([augmented_img, augmented_concat]) augmented_img = img_list[0] augmented_concat = img_list[1] h, w, c = augmented_img.shape hc, wc, cc = augmented_concat.shape # cv2.imshow('1', augmented_concat) # cv2.waitKey(0) if random.random() > 0.20: if c == 4: it = ImageTransformer(augmented_img, (h, w)) itc = ImageTransformer(augmented_concat, (hc, wc)) theta = np.random.normal(0, 10) if theta < -20 or theta > 20: theta = np.random.normal(0, 1) phi = np.random.normal(0, 30) if phi < -45 or phi > 45: phi = np.random.normal(0, 1) gamma = np.random.normal(0, 3) if gamma < -20 or gamma > 20: gamma = np.random.normal(0, 1) augmented_img = it.rotate_along_axis(theta=theta, phi=phi, gamma=gamma, dx=h / 2, dy=w / 2) augmented_concat = itc.rotate_along_axis(theta=theta, phi=phi, gamma=gamma, dx=hc / 2, dy=wc / 2) # 3D rotate function somehow resize the image(swap the width and height), I don't know how to fix # in the matrix, so I have to resize it back here augmented_img = cv2.resize(augmented_img, (w, h)) augmented_img = cut_the_empty_bounding(augmented_img) augmented_img = cv2.cvtColor(augmented_img, cv2.COLOR_RGB2RGBA) augmented_concat = cv2.resize(augmented_concat, (wc, hc)) augmented_concat = cut_the_empty_bounding(augmented_concat) augmented_concat = cv2.cvtColor(augmented_concat, cv2.COLOR_RGB2RGBA) aug_h, aug_w, _ = augmented_img.shape aug_concat_h, aug_concat_w, _ = augmented_concat.shape # Let's put the sign on x, y position x_start_max = int(width) - aug_w - aug_concat_w y_start_max = int(height) - aug_h - aug_concat_h x_start = random.randint(0, x_start_max) y_start = random.randint(aug_concat_h, y_start_max) xmin.append(x_start) ymin.append(y_start) xmax.append(x_start + aug_w) ymax.append(y_start + aug_h) for x in range(aug_w): for y in range(aug_h): if augmented_img[y][x][3] > 50: for i in range(3): background[y + y_start][ x + x_start][i] = augmented_img[y][x][i] # After we put the sign position, we concatenate the word to it if concate_type == 'speed_limit': if random.random() >= 0.20: # 80% cases we put speed limit sign above the number y_start = y_start - aug_concat_h xmin.append(x_start) ymin.append(y_start) xmax.append(x_start + aug_concat_w) ymax.append(y_start + aug_concat_h) for x in range(aug_concat_w): for y in range(aug_concat_h): if augmented_concat[y][x][3] > 10: for i in range(3): background[y + y_start][ x + x_start][i] = augmented_concat[y][x][i] else: # 20% cases we put speed limit sign below the number y_start = y_start + aug_h xmin.append(x_start) ymin.append(y_start) xmax.append(x_start + aug_concat_w) ymax.append(y_start + aug_concat_h) for x in range(aug_concat_w): for y in range(aug_concat_h): if augmented_concat[y][x][3] > 10: for i in range(3): background[y + y_start][ x + x_start][i] = augmented_concat[y][x][i] elif concate_type == 'mph': if random.random() >= 0.40: # 60% cases we put mph sign below the number y_start = y_start + aug_h xmin.append(x_start) ymin.append(y_start) xmax.append(x_start + aug_concat_w) ymax.append(y_start + aug_concat_h) for x in range(aug_concat_w): for y in range(aug_concat_h): if augmented_concat[y][x][3] > 10: for i in range(3): background[y + y_start][ x + x_start][i] = augmented_concat[y][x][i] else: # 40% cases we put mph right to the number x_start = x_start + aug_w xmin.append(x_start) ymin.append(y_start) xmax.append(x_start + aug_concat_w) ymax.append(y_start + aug_concat_h) for x in range(aug_concat_w): for y in range(aug_concat_h): if augmented_concat[y][x][3] > 50: for i in range(3): background[y + y_start][ x + x_start][i] = augmented_concat[y][x][i] cv2.imwrite( output_path + "/" + "Composite" + str(iterator) + ".png", background) name.append(sign_list[sign_index][0]) name.append(sign_list[concat_index][0]) XML_data = XMLPackage(path=rand_sign.replace('\\', '/'), filename="Composite" + str(iterator) + ".png", width=width, height=height, depth="4", name=name, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax) generate_XML_File( (output_path + "/" + "Composite" + str(iterator) + ".xml"), XML_data) iterator = iterator + 1 else: sign = cv2.imread(rand_sign, cv2.IMREAD_UNCHANGED) resizedImage = resize_sign(sign) augmented_img = resizedImage augmented_img = cv2.cvtColor(augmented_img, cv2.COLOR_RGB2RGBA) if random.random() > 0.90: augmented_img = augment_brightness_camera_images( [augmented_img])[0] if random.random() > 0.90: augmented_img = add_random_shadow([augmented_img])[0] if random.random() > 0.90: augmented_img = overlap([augmented_img])[0] if random.random() > 0.90: augmented_img = blend([augmented_img])[0] h, w, c = augmented_img.shape # cv2.imshow('1', augmented_concat) # cv2.waitKey(0) if random.random() > 0.20: if c == 4: it = ImageTransformer(augmented_img, (h, w)) theta = np.random.normal(0, 10) if theta < -20 or theta > 20: theta = np.random.normal(0, 1) phi = np.random.normal(0, 30) if phi < -45 or phi > 45: phi = np.random.normal(0, 1) gamma = np.random.normal(0, 3) if gamma < -20 or gamma > 20: gamma = np.random.normal(0, 1) augmented_img = it.rotate_along_axis(theta=theta, phi=phi, gamma=gamma, dx=h / 2, dy=w / 2) # 3D rotate function somehow resize the image(swap the width and height), I don't know how to fix # in the matrix, so I have to resize it back here augmented_img = cv2.resize(augmented_img, (w, h)) augmented_img = cut_the_empty_bounding(augmented_img) augmented_img = cv2.cvtColor(augmented_img, cv2.COLOR_RGB2RGBA) aug_h, aug_w, _ = augmented_img.shape # Let's put the sign on x, y position x_start_max = int(width) - aug_w y_start_max = int(height) - aug_h x_start = random.randint(0, x_start_max) y_start = random.randint(0, y_start_max) xmin.append(x_start) ymin.append(y_start) xmax.append(x_start + aug_w) ymax.append(y_start + aug_h) for x in range(aug_w): for y in range(aug_h): if augmented_img[y][x][3] > 50: for i in range(3): background[y + y_start][ x + x_start][i] = augmented_img[y][x][i] cv2.imwrite( output_path + "/" + "Composite" + str(iterator) + ".png", background) name.append(sign_list[sign_index][0]) XML_data = XMLPackage(path=rand_sign.replace('\\', '/'), filename="Composite" + str(iterator) + ".png", width=width, height=height, depth="4", name=name, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax) generate_XML_File( (output_path + "/" + "Composite" + str(iterator) + ".xml"), XML_data) iterator = iterator + 1
# phi : the rotation around the y axis # gamma : the rotation around the z axis (basically a 2D rotation) # dx : translation along the x axis # dy : translation along the y axis # dz : translation along the z axis (distance to the image) # # Output: # image : the rotated image # Input image path img_path = sys.argv[1] img_shape = (500,500) # Instantiate the class it = ImageTransformer(img_path, img_shape) # Make output dir if not os.path.isdir('output'): os.mkdir('output') # NOTE: Here we can change which angle, axis, shift rot_val = 0 for rx,ry,rz in [(0,0,-45),(0,0,0),(0,0,45),(45,0,0),(-45,0,0),(0,45,0),(0,-45,0)]: rotated_img = it.rotate_along_axis(rx,ry,rz, dz = 600) save_image('output/{}x{}y{}z.png'.format(rx,ry,rz),rotated_img)
if not os.path.isdir(output): os.mkdir(output) retval = True index = 1 min_col = 0 min_row = 0 max_col = 0 max_row = 0 print(output) while (retval): img_path = img_root_path + 'video_%d.jpg' % index if os.path.isfile(img_path): it = ImageTransformer(img_path, None) rotated_img = it.rotate_along_axis(phi=rot_range, dx=5) if index == 1: mat = it.getTransfrom() width, height = it.getImageSize() points = np.array([[[width - 1, 0], [width - 1, height - 1]]], dtype='float32') new_points = cv2.perspectiveTransform(points, mat) print(new_points) min_col = 0 min_row = new_points[0][0][1] max_col = min(new_points[0][0][0], new_points[0][1][0]) max_row = new_points[0][1][1] save_image(output + '/video_%d.jpg' % index, rotated_img) print('save_image:%d' % index)