Example #1
0
 def rotate_horizontal(self, path, export_path, rot_range_start,
                       rot_range_end):
     image_shape = None
     it = ImageTransformer(path, image_shape)
     for ang in range(135, 225, 20):
         rotated_img = it.rotate_along_axis(phi=ang)
         rotated_img = self.center_crop(rotated_img, 128, 128)
         self.save_image(path, export_path, rotated_img, "horizontal", ang)
Example #2
0
 def rotate_horizontal_vertical(self, path, export_path, rot_range_start,
                                rot_range_end):
     image_shape = None
     it = ImageTransformer(path, image_shape)
     for ang in range(150, 210, 15):
         rotated_img = it.rotate_along_axis(phi=ang, gamma=ang)
         rotated_img = self.center_crop(rotated_img, 128, 128)
         self.save_image(path, export_path, rotated_img,
                         "vertical-horizontal", ang)
Example #3
0
 def perspectiveTransform(self,
                          maxXangle,
                          maxYangle,
                          maxZangle,
                          bgColor=255):
     if (self.modifiedFlag == 1):
         it = ImageTransformer(self.modifiedImg, (self.height, self.width))
     else:
         it = ImageTransformer(self.image, (self.height, self.width))
         self.modifiedFlag = 1
     angX = np.random.uniform(-maxXangle, maxXangle)
     angY = np.random.uniform(-maxYangle, maxYangle)
     angZ = np.random.uniform(-maxZangle, maxZangle)
     self.modifiedImg = it.rotate_along_axis(theta=angX,
                                             phi=angY,
                                             gamma=angZ,
                                             dx=25,
                                             dy=-25,
                                             dz=0,
                                             bgColor=bgColor)
     #cv.imshow("modified",self.modifiedImg)
     #cv.waitKey(1000)
     self.maskImage = cv.inRange(self.modifiedImg, self.lower, self.upper)
     return angX, angY, angZ
Example #4
0
#
# Output:
#     image     : the rotated image


# Input image path
img_path = sys.argv[1]

# Rotation range
rot_range = 360 if len(sys.argv) <= 2 else int(sys.argv[2])

# Ideal image shape (w, h)
img_shape = None if len(sys.argv) <= 4 else (int(sys.argv[3]), int(sys.argv[4]))

# Instantiate the class
it = ImageTransformer(img_path, img_shape)

# Make output dir
if not os.path.isdir('output'):
    os.mkdir('output')

# Iterate through rotation range
for ang in xrange(0, rot_range):

    # NOTE: Here we can change which angle, axis, shift
    
    """ Example of rotating an image along y-axis from 0 to 360 degree 
        with a 5 pixel shift in +X direction """
    rotated_img = it.rotate_along_axis(phi = ang, dx = 5)

    """ Example of rotating an image along yz-axis from 0 to 360 degree """
Example #5
0
def main():
    args, config = parse_args_and_config()
    tb_logger = tensorboardX.SummaryWriter(
        log_dir=os.path.join('transformer_logs', args.doc))

    if config.model.distr == "dmol":
        # Scale size and rescale data to [-1, 1]
        transform = transforms.Compose([
            transforms.Resize(config.model.image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    else:
        transform = transforms.Compose([
            transforms.Resize(config.model.image_size),
            transforms.ToTensor()
        ])

    if args.img64 is None:
        dataset = datasets.CIFAR10('datasets/transformer',
                                   transform=transform,
                                   download=True)
        loader = DataLoader(dataset,
                            batch_size=config.train.batch_size,
                            shuffle=True,
                            num_workers=4)
        input_dim = config.model.image_size**2 * config.model.channels
        model = ImageReformer(config.model).to(
            config.device) if args.reformer else ImageTransformer(
                config.model).to(config.device)
        optimizer = optim.Adam(model.parameters(),
                               lr=1.,
                               betas=(0.9, 0.98),
                               eps=1e-9)
        scheduler = optim.lr_scheduler.LambdaLR(
            optimizer, lr_lambda=lambda step: get_lr(step, config))
    else:
        train_dir = args.img64 / 'train'
        val_dir = args.img64 / 'val'
        transform = transforms.Compose([
            transforms.Resize(config.model.image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        #         dataset = datasets.ImageFolder(train_dir, transform=transform)
        #         loader = DataLoader(dataset, batch_size=config.train.batch_size, shuffle=True, num_workers=4)
        dataset = Imagenet64('../data/train_64x64/', transform=transform)
        loader = DataLoader(dataset,
                            batch_size=config.train.batch_size,
                            shuffle=True,
                            num_workers=4)
        input_dim = config.model.image_size**2 * config.model.channels
        model = ImageReformer(config.model).to(
            config.device) if args.reformer else ImageTransformer(
                config.model).to(config.device)
        optimizer = optim.Adam(model.parameters(),
                               lr=1.,
                               betas=(0.9, 0.98),
                               eps=1e-9,
                               weight_decay=0.1)
        scheduler = optim.lr_scheduler.LambdaLR(
            optimizer, lr_lambda=lambda step: get_lr(step, config))

    # Initialize as in their code
    gain = config.model.initializer_gain
    for name, p in model.named_parameters():
        if "layernorm" in name:
            continue
        # This is from a pytorch implementation of the language transformer, but is not needed/in TF code.
        # if "attn" in name and "output" not in name:
        #     nn.init.xavier_normal_(p)
        if p.dim() > 1:
            # Need sqrt for inconsistency between pytorch / TF
            nn.init.xavier_uniform_(p, gain=np.sqrt(gain))
        else:
            a = np.sqrt(3. * gain / p.shape[0])
            nn.init.uniform_(p, -a, a)

    # Accumulate data statistics for debugging purposes, e.g. to analyze the entropy of the first dimension
    # data_avgs = torch.zeros(config.model.channels, config.model.image_size, config.model.image_size, 256)
    # for i, (imgs, l) in tqdm(enumerate(loader)):
    #     one_hot_data = torch.zeros(imgs.shape + (256,)).scatter_(-1, (imgs * 255).long().unsqueeze(-1), 1)
    #     data_avgs += one_hot_data.mean(0)
    # data_avgs /= i

    def revert_samples(input):
        if config.model.distr == "cat":
            return input
        elif config.model.distr == "dmol":
            return input * 0.5 + 0.5

    step = 0
    losses_per_dim = torch.zeros(config.model.channels,
                                 config.model.image_size,
                                 config.model.image_size).to(config.device)
    for _ in range(config.train.epochs):
        for _, imgs in enumerate(loader):
            #         for _, (imgs, l) in enumerate(loader):
            imgs = imgs.to(config.device)
            model.train()
            optimizer.zero_grad()
            preds = model(imgs)
            loss = model.loss(preds, imgs)
            decay = 0. if step == 0 else 0.99
            if config.model.distr == "dmol":
                losses_per_dim[0, :, :] = losses_per_dim[0, :, :] * \
                    decay + (1 - decay) * loss.detach().mean(0) / np.log(2)
            else:
                losses_per_dim = losses_per_dim * decay + \
                    (1 - decay) * loss.detach().mean(0) / np.log(2)
            loss = loss.view(loss.shape[0], -1).sum(1)
            loss = loss.mean(0)

            # Show computational graph
            # dot = make_dot(loss, dict(model.named_parameters()))
            # dot.render('test.gv', view=True)

            loss.backward()

            total_norm = 0
            for p in model.parameters():
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item()**2
            total_norm = (total_norm**(1. / 2))

            if config.train.clip_grad_norm > 0.0:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         config.train.clip_grad_norm)

            total_norm_post = 0
            for p in model.parameters():
                param_norm = p.grad.data.norm(2)
                total_norm_post += param_norm.item()**2
            total_norm_post = (total_norm_post**(1. / 2))
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()
            scheduler.step()
            bits_per_dim = loss / (np.log(2.) * input_dim)
            acc = model.accuracy(preds, imgs)

            if step % config.train.log_iter == 0:
                logging.info(
                    'step: {}; loss: {:.3f}; bits_per_dim: {:.3f}, acc: {:.3f}, grad norm pre: {:.3f}, post: {:.3f}'
                    .format(step, loss.item(), bits_per_dim.item(), acc.item(),
                            total_norm, total_norm_post))
                tb_logger.add_scalar('loss', loss.item(), global_step=step)
                tb_logger.add_scalar('bits_per_dim',
                                     bits_per_dim.item(),
                                     global_step=step)
                tb_logger.add_scalar('acc', acc.item(), global_step=step)
                tb_logger.add_scalar('grad_norm', total_norm, global_step=step)

            if step % config.train.sample_iter == 0:
                logging.info("Sampling from model: {}".format(args.doc))
                if config.model.distr == "cat":
                    channels = ['r', 'g', 'b']
                    color_codes = ['Reds', "Greens", 'Blues']
                    for idx, c in enumerate(channels):
                        ax = sns.heatmap(
                            losses_per_dim[idx, :, :].cpu().numpy(),
                            linewidth=0.5,
                            cmap=color_codes[idx])
                        tb_logger.add_figure("losses_per_dim/{}".format(c),
                                             ax.get_figure(),
                                             close=True,
                                             global_step=step)
                else:
                    ax = sns.heatmap(losses_per_dim[0, :, :].cpu().numpy(),
                                     linewidth=0.5,
                                     cmap='Blues')
                    tb_logger.add_figure("losses_per_dim",
                                         ax.get_figure(),
                                         close=True,
                                         global_step=step)

                model.eval()
                with torch.no_grad():
                    imgs = revert_samples(imgs)
                    imgs_grid = torchvision.utils.make_grid(imgs[:8, ...], 3)
                    tb_logger.add_image('imgs', imgs_grid, global_step=step)

                    # Evaluate model predictions for the input
                    pred_samples = revert_samples(
                        model.sample_from_preds(preds))
                    pred_samples_grid = torchvision.utils.make_grid(
                        pred_samples[:8, ...], 3)
                    tb_logger.add_image('pred_samples/random',
                                        pred_samples_grid,
                                        global_step=step)
                    pred_samples = revert_samples(
                        model.sample_from_preds(preds, argmax=True))
                    pred_samples_grid = torchvision.utils.make_grid(
                        pred_samples[:8, ...], 3)
                    tb_logger.add_image('pred_samples/argmax',
                                        pred_samples_grid,
                                        global_step=step)

                    if args.sample:
                        samples = revert_samples(
                            model.sample(config.train.sample_size,
                                         config.device))
                        samples_grid = torchvision.utils.make_grid(
                            samples[:8, ...], 3)
                        tb_logger.add_image('samples',
                                            samples_grid,
                                            global_step=step)

                    # Argmax samples are not useful for unconditional generation
                    # if config.model.distr == "cat":
                    #     argmax_samples = model.sample(1, config.device, argmax=True)
                    #     samples_grid = torchvision.utils.make_grid(argmax_samples[:8, ...], 3)
                    #     tb_logger.add_image('argmax_samples', samples_grid, global_step=step)
                torch.save(
                    model.state_dict(),
                    os.path.join('transformer_logs', args.doc, "model.pth"))
            step += 1

    return 0
Example #6
0
def generateComposite(composite_count):
    iterator = 0

    while iterator < composite_count:

        background = cv2.imread(get_random_background(), cv2.IMREAD_UNCHANGED)
        background = background[200:915, 100:1800, :]
        rand_sign, index = get_random_sign()
        sign = cv2.imread(rand_sign, cv2.IMREAD_UNCHANGED)
        height, width, _ = background.shape
        # sign = cut_the_empty_bounding(sign)
        resizedImage = resize_sign(sign)
        augmented_img = resizedImage
        if random.random() > 0.90:
            augmented_img = augment_brightness_camera_images(augmented_img)
        if random.random() > 0.90:
            augmented_img = add_random_shadow(augmented_img)
        if random.random() > 0.90:
            augmented_img = overlap(augmented_img)
        if random.random() > 0.90:
            augmented_img = blend(augmented_img)
        h, w, c = augmented_img.shape

        if c == 4:
            it = ImageTransformer(augmented_img, (h, w))
            theta = np.random.normal(0, 10)
            if theta < -20 or theta > 20:
                theta = np.random.normal(0, 1)

            phi = np.random.normal(0, 30)
            if phi < -45 or phi > 45:
                phi = np.random.normal(0, 1)

            gamma = np.random.normal(0, 3)
            if gamma < -20 or gamma > 20:
                gamma = np.random.normal(0, 1)

            rotated = it.rotate_along_axis(theta=theta, phi=phi,
                                           gamma=gamma, dx=h / 2, dy=w / 2)
            # 3D rotate function somehow resize the image(swap the width and height), I don't know how to fix
            # in the matrix, so I have to resize it back here
            rotated = cv2.resize(rotated, (w, h))
            rotated = cut_the_empty_bounding(rotated)
            rotated = cv2.cvtColor(rotated, cv2.COLOR_RGB2RGBA)

            rotated_h, rotated_w, _ = rotated.shape

            # Let's put the sign on x, y position

            # Extremely inconsistent results with x and y offset ?????? (spent lots of time debugging)
            x_start_max = int(width) - rotated_w
            y_start_max = int(height) - rotated_h

            x_start = random.randint(0, x_start_max)
            y_start = random.randint(0, y_start_max)
            for x in range(rotated_w):
                for y in range(rotated_h):

                    if rotated[y][x][3] > 50:
                        for i in range(3):
                            background[y + y_start][x + x_start][i] = rotated[y][x][i]

            cv2.imwrite(output_path + "/" + "Composite" + str(iterator) + ".png", background)

            XML_data = XMLPackage(rand_sign.replace('\\', '/'), "Composite" + str(iterator) + ".png", "Unknown", width,
                                  height, "4", "0", sign_list[index][0], "Unspecified", "0", "0", int(x_start),
                                  int(y_start), int(x_start + rotated_w), int(y_start + rotated_h))
            generate_XML_File((output_path + "/" + "Composite" + str(iterator) + ".xml"), XML_data)

            iterator = iterator + 1
            print(iterator)
        else:
            print(rand_sign)
Example #7
0
parser.add_argument('-j', help='make GIF of output', action='store_true')
parser.add_argument('--bg',
                    metavar='bg',
                    type=rgb_tuple,
                    help='background color',
                    default=(0, 0, 0))

options = parser.parse_args()

if options.image_path is None:
    raise ValueError('Image not found, check image path')

filename, file_extension = os.path.splitext(options.image_path)

it = ImageTransformer(options.image_path,
                      height=options.height,
                      width=options.width)

if not os.path.isdir('output'):
    os.mkdir('output')

if options.mode == 'single':
    rotated_img = it.rotate_along_axis(bg=options.bg,
                                       theta=options.theta,
                                       phi=options.phi,
                                       gamma=options.gamma,
                                       dx=options.dx,
                                       dy=options.dy,
                                       dz=options.dz)
    save_image(f'{options.output}{file_extension}', rotated_img)
def generateComposite(composite_count):
    iterator = 0
    while iterator < composite_count:
        background = cv2.imread(get_random_background(), cv2.IMREAD_UNCHANGED)
        background = background[200:915, 100:1800, :]
        height, width, _ = background.shape
        rand_sign, sign_index = get_random_sign()
        # Each auto-generated image may have multiple signs in it
        name = []
        xmin = []
        ymin = []
        xmax = []
        ymax = []
        print(iterator)
        # For speed limit sign, here 60% of them we add speed limit word, (80% word on top, 20% word below)
        # 40% add mph word(80% below, 20% right)
        if re.match(r'\d\d\smph', sign_list[sign_index][0]):
            sign = cv2.imread(rand_sign, cv2.IMREAD_UNCHANGED)
            resizedImage = resize_sign(sign)
            if random.random() >= 0.40:
                speed_limit, concat_index = get_random_speed_limit()
                speed_limit = cv2.imread(speed_limit, cv2.IMREAD_UNCHANGED)
                resized_speed_limit = resize_sign(speed_limit)
                augmented_concat = resized_speed_limit
                concate_type = 'speed_limit'
            else:
                mph, concat_index = get_random_mph()
                mph = cv2.imread(mph, cv2.IMREAD_UNCHANGED)
                resized_speed_limit = resize_sign(mph)
                augmented_concat = resized_speed_limit
                concate_type = 'mph'
            # TODO: we can add km/h and so on

            augmented_img = resizedImage

            augmented_img = cv2.cvtColor(augmented_img, cv2.COLOR_RGB2RGBA)
            augmented_concat = cv2.cvtColor(augmented_concat,
                                            cv2.COLOR_RGB2RGBA)
            if random.random() > 0.90:
                img_list = augment_brightness_camera_images(
                    [augmented_img, augmented_concat])
                augmented_img = img_list[0]
                augmented_concat = img_list[1]
            if random.random() > 0.90:
                img_list = add_random_shadow([augmented_img, augmented_concat])
                augmented_img = img_list[0]
                augmented_concat = img_list[1]
            if random.random() > 0.90:
                img_list = overlap([augmented_img, augmented_concat])
                augmented_img = img_list[0]
                augmented_concat = img_list[1]
            if random.random() > 0.90:
                img_list = blend([augmented_img, augmented_concat])
                augmented_img = img_list[0]
                augmented_concat = img_list[1]

            h, w, c = augmented_img.shape
            hc, wc, cc = augmented_concat.shape
            # cv2.imshow('1', augmented_concat)
            # cv2.waitKey(0)
            if random.random() > 0.20:
                if c == 4:
                    it = ImageTransformer(augmented_img, (h, w))
                    itc = ImageTransformer(augmented_concat, (hc, wc))
                    theta = np.random.normal(0, 10)
                    if theta < -20 or theta > 20:
                        theta = np.random.normal(0, 1)

                    phi = np.random.normal(0, 30)
                    if phi < -45 or phi > 45:
                        phi = np.random.normal(0, 1)

                    gamma = np.random.normal(0, 3)
                    if gamma < -20 or gamma > 20:
                        gamma = np.random.normal(0, 1)

                    augmented_img = it.rotate_along_axis(theta=theta,
                                                         phi=phi,
                                                         gamma=gamma,
                                                         dx=h / 2,
                                                         dy=w / 2)
                    augmented_concat = itc.rotate_along_axis(theta=theta,
                                                             phi=phi,
                                                             gamma=gamma,
                                                             dx=hc / 2,
                                                             dy=wc / 2)
                    # 3D rotate function somehow resize the image(swap the width and height), I don't know how to fix
                    # in the matrix, so I have to resize it back here
                    augmented_img = cv2.resize(augmented_img, (w, h))
                    augmented_img = cut_the_empty_bounding(augmented_img)
                    augmented_img = cv2.cvtColor(augmented_img,
                                                 cv2.COLOR_RGB2RGBA)

                    augmented_concat = cv2.resize(augmented_concat, (wc, hc))
                    augmented_concat = cut_the_empty_bounding(augmented_concat)
                    augmented_concat = cv2.cvtColor(augmented_concat,
                                                    cv2.COLOR_RGB2RGBA)

            aug_h, aug_w, _ = augmented_img.shape
            aug_concat_h, aug_concat_w, _ = augmented_concat.shape

            # Let's put the sign on x, y position
            x_start_max = int(width) - aug_w - aug_concat_w
            y_start_max = int(height) - aug_h - aug_concat_h

            x_start = random.randint(0, x_start_max)
            y_start = random.randint(aug_concat_h, y_start_max)
            xmin.append(x_start)
            ymin.append(y_start)
            xmax.append(x_start + aug_w)
            ymax.append(y_start + aug_h)
            for x in range(aug_w):
                for y in range(aug_h):
                    if augmented_img[y][x][3] > 50:
                        for i in range(3):
                            background[y + y_start][
                                x + x_start][i] = augmented_img[y][x][i]
            # After we put the sign position, we concatenate the word to it
            if concate_type == 'speed_limit':
                if random.random() >= 0.20:
                    # 80% cases we put speed limit sign above the number
                    y_start = y_start - aug_concat_h
                    xmin.append(x_start)
                    ymin.append(y_start)
                    xmax.append(x_start + aug_concat_w)
                    ymax.append(y_start + aug_concat_h)
                    for x in range(aug_concat_w):
                        for y in range(aug_concat_h):
                            if augmented_concat[y][x][3] > 10:
                                for i in range(3):
                                    background[y + y_start][
                                        x +
                                        x_start][i] = augmented_concat[y][x][i]
                else:
                    # 20% cases we put speed limit sign below the number
                    y_start = y_start + aug_h
                    xmin.append(x_start)
                    ymin.append(y_start)
                    xmax.append(x_start + aug_concat_w)
                    ymax.append(y_start + aug_concat_h)
                    for x in range(aug_concat_w):
                        for y in range(aug_concat_h):
                            if augmented_concat[y][x][3] > 10:
                                for i in range(3):
                                    background[y + y_start][
                                        x +
                                        x_start][i] = augmented_concat[y][x][i]

            elif concate_type == 'mph':
                if random.random() >= 0.40:
                    # 60% cases we put mph sign below the number
                    y_start = y_start + aug_h
                    xmin.append(x_start)
                    ymin.append(y_start)
                    xmax.append(x_start + aug_concat_w)
                    ymax.append(y_start + aug_concat_h)
                    for x in range(aug_concat_w):
                        for y in range(aug_concat_h):
                            if augmented_concat[y][x][3] > 10:
                                for i in range(3):
                                    background[y + y_start][
                                        x +
                                        x_start][i] = augmented_concat[y][x][i]
                else:
                    # 40% cases we put mph right to the number
                    x_start = x_start + aug_w
                    xmin.append(x_start)
                    ymin.append(y_start)
                    xmax.append(x_start + aug_concat_w)
                    ymax.append(y_start + aug_concat_h)
                    for x in range(aug_concat_w):
                        for y in range(aug_concat_h):
                            if augmented_concat[y][x][3] > 50:
                                for i in range(3):
                                    background[y + y_start][
                                        x +
                                        x_start][i] = augmented_concat[y][x][i]

            cv2.imwrite(
                output_path + "/" + "Composite" + str(iterator) + ".png",
                background)
            name.append(sign_list[sign_index][0])
            name.append(sign_list[concat_index][0])

            XML_data = XMLPackage(path=rand_sign.replace('\\', '/'),
                                  filename="Composite" + str(iterator) +
                                  ".png",
                                  width=width,
                                  height=height,
                                  depth="4",
                                  name=name,
                                  xmin=xmin,
                                  ymin=ymin,
                                  xmax=xmax,
                                  ymax=ymax)
            generate_XML_File(
                (output_path + "/" + "Composite" + str(iterator) + ".xml"),
                XML_data)

            iterator = iterator + 1

        else:
            sign = cv2.imread(rand_sign, cv2.IMREAD_UNCHANGED)
            resizedImage = resize_sign(sign)
            augmented_img = resizedImage
            augmented_img = cv2.cvtColor(augmented_img, cv2.COLOR_RGB2RGBA)

            if random.random() > 0.90:
                augmented_img = augment_brightness_camera_images(
                    [augmented_img])[0]
            if random.random() > 0.90:
                augmented_img = add_random_shadow([augmented_img])[0]
            if random.random() > 0.90:
                augmented_img = overlap([augmented_img])[0]
            if random.random() > 0.90:
                augmented_img = blend([augmented_img])[0]

            h, w, c = augmented_img.shape
            # cv2.imshow('1', augmented_concat)
            # cv2.waitKey(0)
            if random.random() > 0.20:
                if c == 4:
                    it = ImageTransformer(augmented_img, (h, w))
                    theta = np.random.normal(0, 10)
                    if theta < -20 or theta > 20:
                        theta = np.random.normal(0, 1)

                    phi = np.random.normal(0, 30)
                    if phi < -45 or phi > 45:
                        phi = np.random.normal(0, 1)

                    gamma = np.random.normal(0, 3)
                    if gamma < -20 or gamma > 20:
                        gamma = np.random.normal(0, 1)

                    augmented_img = it.rotate_along_axis(theta=theta,
                                                         phi=phi,
                                                         gamma=gamma,
                                                         dx=h / 2,
                                                         dy=w / 2)
                    # 3D rotate function somehow resize the image(swap the width and height), I don't know how to fix
                    # in the matrix, so I have to resize it back here
                    augmented_img = cv2.resize(augmented_img, (w, h))
                    augmented_img = cut_the_empty_bounding(augmented_img)
                    augmented_img = cv2.cvtColor(augmented_img,
                                                 cv2.COLOR_RGB2RGBA)

            aug_h, aug_w, _ = augmented_img.shape

            # Let's put the sign on x, y position
            x_start_max = int(width) - aug_w
            y_start_max = int(height) - aug_h

            x_start = random.randint(0, x_start_max)
            y_start = random.randint(0, y_start_max)
            xmin.append(x_start)
            ymin.append(y_start)
            xmax.append(x_start + aug_w)
            ymax.append(y_start + aug_h)
            for x in range(aug_w):
                for y in range(aug_h):
                    if augmented_img[y][x][3] > 50:
                        for i in range(3):
                            background[y + y_start][
                                x + x_start][i] = augmented_img[y][x][i]
            cv2.imwrite(
                output_path + "/" + "Composite" + str(iterator) + ".png",
                background)
            name.append(sign_list[sign_index][0])

            XML_data = XMLPackage(path=rand_sign.replace('\\', '/'),
                                  filename="Composite" + str(iterator) +
                                  ".png",
                                  width=width,
                                  height=height,
                                  depth="4",
                                  name=name,
                                  xmin=xmin,
                                  ymin=ymin,
                                  xmax=xmax,
                                  ymax=ymax)
            generate_XML_File(
                (output_path + "/" + "Composite" + str(iterator) + ".xml"),
                XML_data)

            iterator = iterator + 1
Example #9
0
#     phi       : the rotation around the y axis
#     gamma     : the rotation around the z axis (basically a 2D rotation)
#     dx        : translation along the x axis
#     dy        : translation along the y axis
#     dz        : translation along the z axis (distance to the image)
#
# Output:
#     image     : the rotated image


# Input image path
img_path = sys.argv[1]

img_shape = (500,500)
# Instantiate the class
it = ImageTransformer(img_path, img_shape)

# Make output dir
if not os.path.isdir('output'):
    os.mkdir('output')


# NOTE: Here we can change which angle, axis, shift

rot_val = 0
for rx,ry,rz in [(0,0,-45),(0,0,0),(0,0,45),(45,0,0),(-45,0,0),(0,45,0),(0,-45,0)]:

    rotated_img = it.rotate_along_axis(rx,ry,rz, dz = 600)

    save_image('output/{}x{}y{}z.png'.format(rx,ry,rz),rotated_img)
Example #10
0
if not os.path.isdir(output):
    os.mkdir(output)

retval = True
index = 1

min_col = 0
min_row = 0
max_col = 0
max_row = 0
print(output)
while (retval):
    img_path = img_root_path + 'video_%d.jpg' % index
    if os.path.isfile(img_path):
        it = ImageTransformer(img_path, None)
        rotated_img = it.rotate_along_axis(phi=rot_range, dx=5)
        if index == 1:
            mat = it.getTransfrom()
            width, height = it.getImageSize()
            points = np.array([[[width - 1, 0], [width - 1, height - 1]]],
                              dtype='float32')
            new_points = cv2.perspectiveTransform(points, mat)
            print(new_points)
            min_col = 0
            min_row = new_points[0][0][1]
            max_col = min(new_points[0][0][0], new_points[0][1][0])
            max_row = new_points[0][1][1]

        save_image(output + '/video_%d.jpg' % index, rotated_img)
        print('save_image:%d' % index)