Esempio n. 1
0
class Synthetic2Data(object):
    def __init__(self, args):
        self.batch_size = args.batch_size
        self.num_objects = args.num_objects
        self.im_size = args.image_size
        self.im_channel = args.image_channel
        self.m_range = args.motion_range
        self.m_type = args.motion_type
        self.num_frame = args.num_frame
        self.bg_move = args.bg_move
        self.bg_noise = args.bg_noise
        self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict()
        self.visualizer = BaseVisualizer(args, self.reverse_m_dict)
        self.save_display = args.save_display
        self.save_display_dir = args.save_display_dir
        if args.fixed_data:
            numpy.random.seed(args.seed)

    def motion_dict(self):
        m_range = self.m_range
        m_dict, reverse_m_dict = {}, {}
        x = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        y = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        m_x, m_y = numpy.meshgrid(x, y)
        m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int)
        m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1))
        for i in range(len(m_x)):
            m_dict[(m_x[i], m_y[i])] = i
            reverse_m_dict[i] = (m_x[i], m_y[i])
            m_kernel[:, i, -m_y[i] + m_range, -m_x[i] + m_range] = 1
        return m_dict, reverse_m_dict, m_kernel

    def generate_data(self, src_image, src_mask):
        batch_size, im_size, im_channel = self.batch_size, self.im_size, self.im_channel
        # generate foreground motion
        src_motion, m = self.generate_motion(self.num_objects)
        # move foreground
        fg_im, fg_motion, fg_mask = self.move_foreground(src_image, src_motion, src_mask, m)
        # generate background
        src_bg = numpy.random.rand(batch_size, im_size, im_size, im_channel) * self.bg_noise
        # generate background motion
        if self.bg_move:
            src_motion, m = self.generate_motion(num_objects=1)
        else:
            src_motion = numpy.zeros((1, batch_size, im_size, im_size, 2))
            m = numpy.zeros((1, batch_size, 2, 3))
            m[0, :, 0, 0] = 1
            m[0, :, 1, 1] = 1
        # move background
        bg_im, bg_motion = self.move_background(src_bg, src_motion, m)
        # merge foreground and background, merge foreground motion and background motion
        mask = fg_mask == 0
        motion_mask = numpy.concatenate((mask, mask), 4)
        fg_motion[motion_mask] = bg_motion[motion_mask]
        if self.im_channel == 1:
            im_mask = mask
        elif self.im_channel == 3:
            im_mask = numpy.concatenate((mask, mask, mask), 4)
        fg_im[im_mask] = bg_im[im_mask]
        # swap axes between bacth size and frame
        im = fg_im.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3)
        motion = fg_motion.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3)
        seg_layer = fg_mask.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3)
        return im, motion, seg_layer.astype(int)

    def generate_motion(self, num_objects):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        m_type = self.m_type
        m = numpy.zeros((num_objects, batch_size, 2, 3))
        if m_type == 'translation_discrete':
            t_x = numpy.random.randint(-m_range, m_range, size=(num_objects, batch_size))
            t_y = numpy.random.randint(-m_range, m_range, size=(num_objects, batch_size))
            m[:, :, 0, 0] = 1
            m[:, :, 1, 1] = 1
            m[:, :, 0, 2] = t_x
            m[:, :, 1, 2] = t_y
        elif m_type == 'translation':
            t_x = numpy.random.uniform(-m_range, m_range, size=(num_objects, batch_size))
            t_y = numpy.random.uniform(-m_range, m_range, size=(num_objects, batch_size))
            m[:, :, 0, 0] = 1
            m[:, :, 1, 1] = 1
            m[:, :, 0, 2] = t_x
            m[:, :, 1, 2] = t_y
        elif m_type == 'rotation':
            max_angle = m_range * 1.0 / im_size * 180 / math.pi * 1.8
            angle = numpy.random.uniform(-max_angle, max_angle, size=(num_objects, batch_size))
            for i in range(num_objects):
                for j in range(batch_size):
                    m[i, j, :, :] = cv2.getRotationMatrix2D((im_size/2, im_size/2), angle[i, j], 1)
        elif m_type == 'affine':
            rand_idx = numpy.random.randint(0, 4)
            if rand_idx == 0:
                pts1 = numpy.float32([[0, 0], [im_size, 0], [im_size, im_size]])
            elif rand_idx == 1:
                pts1 = numpy.float32([[im_size, 0], [im_size, im_size], [0, im_size]])
            elif rand_idx == 2:
                pts1 = numpy.float32([[im_size, im_size], [0, im_size], [0, 0]])
            elif rand_idx == 3:
                pts1 = numpy.float32([[0, im_size], [0, 0], [im_size, 0]])
            max_shift = m_range * 0.33
            for i in range(num_objects):
                for j in range(batch_size):
                    shift = numpy.random.uniform(-max_shift, max_shift, size=(3, 2))
                    pts2 = pts1 + shift.astype(numpy.float32)
                    m[i, j, :, :] = cv2.getAffineTransform(pts1, pts2)
        elif m_type == 'perspective':
            logging.error('motion generation for perspective transformation is not implemented')
            pts1 = numpy.float32([[0, 0], [im_size, 0], [0, im_size], [im_size, im_size]])
            for i in range(num_objects):
                for j in range(batch_size):
                    shift = numpy.random.uniform(-m_range, m_range, size=(4, 2))
                    pts2 = pts1 + shift.astype(numpy.float32)
                    m[i, j, :, :] = cv2.getPerspectiveTransform(pts1, pts2)
        src_motion = numpy.zeros((num_objects, batch_size, im_size, im_size, 2))
        pts = numpy.mgrid[0:im_size, 0:im_size].reshape(2, -1)
        pts = pts[::-1, :]
        pts = numpy.concatenate((pts, numpy.ones((1, pts.shape[1]))), 0)
        for i in range(num_objects):
            for j in range(batch_size):
                if m_type == 'perspective':
                    logging.error('ground truth for perspective transformation is not implemented')
                motion = numpy.dot(m[i, j, :, :], pts) - pts[:2, :]
                src_motion[i, j, :, :, :] = motion.swapaxes(0, 1).reshape(im_size, im_size, 2)
        return src_motion, m

    def move_foreground(self, src_image, src_motion, src_mask, m):
        batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size
        im = numpy.zeros((num_frame, batch_size, im_size, im_size, self.im_channel))
        motion = numpy.zeros((num_frame, batch_size, im_size, im_size, 2))
        mask = numpy.zeros((num_frame, batch_size, im_size, im_size, 1))
        for i in range(num_frame):
            im[i, ...], motion[i, ...], mask[i, ...] = \
                self.merge_objects(src_image, src_motion, src_mask)
            src_image = self.move_image_fg(src_image, m)
            src_mask = self.move_mask(src_mask, m)
        return im, motion, mask

    def merge_objects(self, src_image, src_motion, src_mask):
        batch_size, num_objects, im_size = self.batch_size, self.num_objects, self.im_size
        im = numpy.zeros((batch_size, im_size, im_size, self.im_channel))
        motion = numpy.zeros((batch_size, im_size, im_size, 2))
        mask = numpy.zeros((batch_size, im_size, im_size, 1))
        for i in range(num_objects):
            zero_mask = mask == 0
            zero_motion_mask = numpy.concatenate((zero_mask, zero_mask), 3)
            if self.im_channel == 1:
                zero_im_mask = zero_mask
            elif self.im_channel == 3:
                zero_im_mask = numpy.concatenate((zero_mask, zero_mask, zero_mask), 3)
            im[zero_im_mask] = src_image[i, zero_im_mask]
            motion[zero_motion_mask] = src_motion[i, zero_motion_mask]
            mask[zero_mask] = src_mask[i, zero_mask] * (num_objects - i)
        return im, motion, mask

    def move_image_fg(self, im, m):
        batch_size, num_objects, im_size, = self.batch_size, self.num_objects, self.im_size
        m_type = self.m_type
        im_new = numpy.zeros((num_objects, batch_size, im_size, im_size, self.im_channel))
        for i in range(num_objects):
            for j in range(batch_size):
                curr_im = im[i, j, :, :, :].squeeze()
                if m_type in ['translation', 'translation_discrete', 'rotation', 'affine']:
                    if self.im_channel == 1:
                        im_new[i, j, :, :, 0] = cv2.warpAffine(curr_im, m[i, j, :, :], (im_size, im_size))
                    elif self.im_channel == 3:
                        im_new[i, j, ...] = cv2.warpAffine(curr_im, m[i, j, :, :], (im_size, im_size))
                elif m_type == 'perspective':
                    if self.im_channel == 1:
                        im_new[i, j, :, :, 0] = cv2.warpAffine(curr_im, m[i, j, :, :], (im_size, im_size))
                    elif self.im_channel == 3:
                        im_new[i, j, ...] = cv2.warpPerspective(curr_im, m[i, j, :, :], (im_size, im_size))
        return im_new

    def move_mask(self, mask, m):
        batch_size, num_objects, im_size, = self.batch_size, self.num_objects, self.im_size
        m_type = self.m_type
        mask_new = numpy.zeros((num_objects, batch_size, im_size, im_size, 1))
        for i in range(num_objects):
            for j in range(batch_size):
                curr_mask = mask[i, j, :, :, :].squeeze()
                if m_type in ['translation', 'translation_discrete', 'rotation', 'affine']:
                    mask_new[i, j, :, :, 0] = cv2.warpAffine(curr_mask, m[i, j, :, :], (im_size, im_size))
                elif m_type == 'perspective':
                    mask_new[i, j, :, :, 0] = cv2.warpPerspective(curr_mask, m[i, j, :, :], (im_size, im_size))
        mask_new = numpy.round(mask_new)
        return mask_new

    def move_background(self, src_image, src_motion, m):
        batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size
        im = numpy.zeros((num_frame, batch_size, im_size, im_size, self.im_channel))
        motion = numpy.zeros((num_frame, batch_size, im_size, im_size, 2))
        for i in range(num_frame):
            im[i, :, :, :, :] = src_image
            src_image = self.move_image_bg(src_image, m)
            motion[i, :, :, :, :] = src_motion
        return im, motion

    def move_image_bg(self, bg_im, m):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        m_type = self.m_type
        im_new = numpy.zeros((batch_size, im_size, im_size, self.im_channel))
        for i in range(batch_size):
            curr_im = bg_im[i, :, :, :].squeeze()
            if m_type in ['translation', 'translation_discrete', 'rotation', 'affine']:
                if self.im_channel == 1:
                    im_new[i, :, :, 0] = cv2.warpAffine(curr_im, m[0, i, :, :], (im_size, im_size))
                elif self.im_channel == 3:
                    im_new[i, ...] = cv2.warpAffine(curr_im, m[0, i, :, :], (im_size, im_size))
            elif m_type == 'perspective':
                if self.im_channel == 1:
                    im_new[i, :, :, 0] = cv2.warpPerspective(curr_im, m[0, i, :, :], (im_size, im_size))
                elif self.im_channel == 3:
                    im_new[i, ...] = cv2.warpPerspective(curr_im, m[0, i, :, :], (im_size, im_size))
        return im_new

    def display(self, im, motion, seg_layer):
        num_frame = self.num_frame
        width, height = self.visualizer.get_img_size(4, num_frame)
        img = numpy.ones((height, width, 3))
        prev_im = None
        for i in range(num_frame):
            curr_im = im[0, i, :, :, :].transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1)
            img[y1:y2, x1:x2, :] = curr_im

            if i > 0:
                im_diff = abs(curr_im - prev_im)
                x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1)
                img[y1:y2, x1:x2, :] = im_diff
            prev_im = curr_im

            flow = motion[0, i, :, :, :].transpose(1, 2, 0)
            optical_flow = flowlib.visualize_flow(flow, self.m_range)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, i + 1)
            img[y1:y2, x1:x2, :] = optical_flow / 255.0

            seg_max = seg_layer[0, i, :, :, :].max()
            seg = seg_layer[0, i, :, :, :].squeeze() * 1.0 / seg_max
            cmap = plt.get_cmap('jet')
            seg_map = cmap(seg)[:, :, 0:3]
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, i + 1)
            img[y1:y2, x1:x2, :] = seg_map

        if self.save_display:
            img = img * 255.0
            img = img.astype(numpy.uint8)
            img = Image.fromarray(img)
            img.save(os.path.join(self.save_display_dir, 'data.png'))
        else:
            plt.figure(1)
            plt.imshow(img)
            plt.axis('off')
            plt.show()
Esempio n. 2
0
class Synthetic2Data(object):
    def __init__(self, args):
        self.batch_size = args.batch_size
        self.num_objects = args.num_objects
        self.im_size = args.image_size
        self.im_channel = args.image_channel
        self.m_range = args.motion_range
        self.num_frame = args.num_frame
        self.bg_move = args.bg_move
        self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict()
        self.visualizer = BaseVisualizer(args, self.reverse_m_dict)
        self.save_display = args.save_display
        self.save_display_dir = args.save_display_dir
        if args.fixed_data:
            numpy.random.seed(args.seed)

    def motion_dict(self):
        m_range = self.m_range
        m_dict, reverse_m_dict = {}, {}
        x = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        y = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        m_x, m_y = numpy.meshgrid(x, y)
        m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int)
        m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1))
        for i in range(len(m_x)):
            m_dict[(m_x[i], m_y[i])] = i
            reverse_m_dict[i] = (m_x[i], m_y[i])
            m_kernel[:, i, m_y[i] + m_range, m_x[i] + m_range] = 1
        return m_dict, reverse_m_dict, m_kernel

    def generate_data(self, src_fg, src_mask, src_bg):
        batch_size, im_size, im_channel = self.batch_size, self.im_size, self.im_channel
        m_dict = self.m_dict
        # generate foreground motion
        src_motion, src_motion_label, m_x, m_y = self.generate_motion(
            self.num_objects, src_mask)
        # move foreground
        fg_im, fg_motion, fg_motion_label, fg_mask = \
            self.move_foreground(src_fg, src_motion, src_motion_label, src_mask, m_x, m_y)
        # generate background motion
        if self.bg_move:
            bg_motion, bg_motion_label, m_x, m_y = self.generate_motion(
                num_objects=1)
        else:
            bg_motion = numpy.zeros((1, batch_size, 2, im_size, im_size))
            bg_motion_label = m_dict[(0, 0)] * numpy.ones(
                (1, batch_size, 1, im_size, im_size))
            m_x = numpy.zeros((1, batch_size)).astype(int)
            m_y = numpy.zeros((1, batch_size)).astype(int)
        # move background
        bg_im, bg_motion, bg_motion_label = \
            self.move_background(src_bg, bg_motion, bg_motion_label, m_x, m_y)
        # merge foreground and background, merge foreground motion and background motion
        mask = fg_mask == 0
        fg_motion_label[mask] = bg_motion_label[mask]
        motion_mask = numpy.concatenate((mask, mask), 2)
        fg_motion[motion_mask] = bg_motion[motion_mask]
        if self.im_channel == 1:
            im_mask = mask
        elif self.im_channel == 3:
            im_mask = numpy.concatenate((mask, mask, mask), 2)
        fg_im[im_mask] = bg_im[im_mask]
        # swap axes between bacth size and frame
        im, motion, motion_label, seg_layer = fg_im.swapaxes(0, 1), fg_motion.swapaxes(0, 1), \
            fg_motion_label.swapaxes(0, 1), fg_mask.swapaxes(0, 1)
        return im, motion.astype(int), motion_label.astype(
            int), seg_layer.astype(int)

    def generate_motion(self, num_objects, src_mask=None):
        batch_size, im_size = self.batch_size, self.im_size
        m_dict, reverse_m_dict = self.m_dict, self.reverse_m_dict
        m_label = numpy.random.randint(0,
                                       len(m_dict),
                                       size=(num_objects, batch_size))
        m_x = numpy.zeros((num_objects, batch_size)).astype(int)
        m_y = numpy.zeros((num_objects, batch_size)).astype(int)
        for i in range(num_objects):
            for j in range(batch_size):
                (m_x[i, j], m_y[i, j]) = reverse_m_dict[m_label[i, j]]
        src_motion = numpy.zeros(
            (num_objects, batch_size, 2, im_size, im_size))
        src_motion_label = \
            m_dict[(0, 0)] * numpy.ones((num_objects, batch_size, 1, im_size, im_size))
        if src_mask is None:
            src_mask = numpy.ones(
                (num_objects, batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                mask = src_mask[i, j, 0, :, :] > 0
                src_motion[i, j, 0, mask] = m_x[i, j]
                src_motion[i, j, 1, mask] = m_y[i, j]
                src_motion_label[i, j, 0, mask] = m_label[i, j]
        return src_motion, src_motion_label, m_x, m_y

    def move_foreground(self, src_image, src_motion, src_motion_label,
                        src_mask, m_x, m_y):
        batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size
        im = numpy.zeros(
            (num_frame, batch_size, self.im_channel, im_size, im_size))
        motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size))
        motion_label = numpy.zeros(
            (num_frame, batch_size, 1, im_size, im_size))
        mask = numpy.zeros((num_frame, batch_size, 1, im_size, im_size))
        for i in range(num_frame):
            im[i, ...], motion[i, ...], motion_label[i, ...], mask[i, ...] = \
                self.merge_objects(src_image, src_motion, src_motion_label, src_mask)
            src_image = self.move_image_fg(src_image, m_x, m_y)
            src_motion = self.move_motion(src_motion, m_x, m_y)
            src_motion_label = self.move_motion_label(src_motion_label, m_x,
                                                      m_y)
            src_mask = self.move_mask(src_mask, m_x, m_y)
        return im, motion, motion_label, mask

    def merge_objects(self, src_image, src_motion, src_motion_label, src_mask):
        batch_size, num_objects, im_size = self.batch_size, self.num_objects, self.im_size
        im = numpy.zeros((batch_size, self.im_channel, im_size, im_size))
        motion = numpy.zeros((batch_size, 2, im_size, im_size))
        motion_label = numpy.zeros((batch_size, 1, im_size, im_size))
        mask = numpy.zeros((batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            zero_mask = mask == 0
            zero_motion_mask = numpy.concatenate((zero_mask, zero_mask), 1)
            if self.im_channel == 1:
                zero_im_mask = zero_mask
            elif self.im_channel == 3:
                zero_im_mask = numpy.concatenate(
                    (zero_mask, zero_mask, zero_mask), 1)
            im[zero_im_mask] = src_image[i, zero_im_mask]
            motion[zero_motion_mask] = src_motion[i, zero_motion_mask]
            motion_label[zero_mask] = src_motion_label[i, zero_mask]
            mask[zero_mask] = src_mask[i, zero_mask]
        return im, motion, motion_label, mask

    def move_image_fg(self, im, m_x, m_y):
        batch_size, im_size, im_channel = self.batch_size, self.im_size, self.im_channel
        m_range, num_objects = self.m_range, self.num_objects
        im_big = numpy.zeros((num_objects, batch_size, im_channel,
                              im_size + m_range * 2, im_size + m_range * 2))
        im_big[:, :, :, m_range:-m_range, m_range:-m_range] = im
        im_new = numpy.zeros(
            (num_objects, batch_size, im_channel, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                im_new[i, j, :, :, :] = im_big[i, j, :, y:y + im_size,
                                               x:x + im_size]
        return im_new

    def move_motion(self, motion, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        m_dict = self.m_dict
        motion_big = m_dict[(0, 0)] * numpy.ones(
            (num_objects, batch_size, 2, im_size + m_range * 2,
             im_size + m_range * 2))
        motion_big[:, :, :, m_range:-m_range, m_range:-m_range] = motion
        motion_new = numpy.zeros(
            (num_objects, batch_size, 2, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                motion_new[i, j, :, :, :] = motion_big[i, j, :, y:y + im_size,
                                                       x:x + im_size]
        return motion_new

    def move_motion_label(self, motion_label, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        motion_label_big = numpy.zeros(
            (num_objects, batch_size, 1, im_size + m_range * 2,
             im_size + m_range * 2))
        motion_label_big[:, :, :, m_range:-m_range,
                         m_range:-m_range] = motion_label
        motion_label_new = numpy.zeros(
            (num_objects, batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                motion_label_new[i, j, :, :, :] = \
                    motion_label_big[i, j, :, y:y + im_size, x:x + im_size]
        return motion_label_new

    def move_mask(self, mask, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        mask_big = numpy.zeros((num_objects, batch_size, 1,
                                im_size + m_range * 2, im_size + m_range * 2))
        mask_big[:, :, :, m_range:-m_range, m_range:-m_range] = mask
        mask_new = numpy.zeros((num_objects, batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                mask_new[i, j, :, :, :] = mask_big[i, j, :, y:y + im_size,
                                                   x:x + im_size]
        return mask_new

    def move_background(self, src_bg, src_motion, src_motion_label, m_x, m_y):
        batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size
        im = numpy.zeros(
            (num_frame, batch_size, self.im_channel, im_size, im_size))
        motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size))
        motion_label = numpy.zeros(
            (num_frame, batch_size, 1, im_size, im_size))
        height, width = src_bg.shape[2], src_bg.shape[3]
        xc, yc = numpy.ones((batch_size)) * width / 2, numpy.ones(
            (batch_size)) * height / 2
        xc, yc = xc.astype(numpy.int), yc.astype(numpy.int)
        for i in range(num_frame):
            x1, y1, x2, y2 = xc - im_size / 2, yc - im_size / 2, xc + im_size / 2, yc + im_size / 2
            for j in range(batch_size):
                im[i, j, :, :, :] = src_bg[j, :, y1[j]:y2[j], x1[j]:x2[j]]
            motion[i, :, :, :, :] = src_motion
            motion_label[i, :, :, :, :] = src_motion_label
            xc, yc = xc + m_x[0, :], yc + m_y[0, :]
        return im, motion, motion_label

    def display(self, im, motion, seg_layer):
        num_frame = self.num_frame
        width, height = self.visualizer.get_img_size(4, num_frame)
        img = numpy.ones((height, width, 3))
        prev_im = None
        for i in range(num_frame):
            curr_im = im[0, i, :, :, :].transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1)
            img[y1:y2, x1:x2, :] = curr_im

            if i > 0:
                im_diff = abs(curr_im - prev_im)
                x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1)
                img[y1:y2, x1:x2, :] = im_diff
            prev_im = curr_im

            flow = motion[0, i, :, :, :].transpose(1, 2, 0)
            optical_flow = flowlib.visualize_flow(flow, self.m_range)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, i + 1)
            img[y1:y2, x1:x2, :] = optical_flow / 255.0

            seg_max = seg_layer[0, i, :, :, :].max()
            seg = seg_layer[0, i, :, :, :].squeeze() * 1.0 / seg_max
            cmap = plt.get_cmap('jet')
            seg_map = cmap(seg)[:, :, 0:3]
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, i + 1)
            img[y1:y2, x1:x2, :] = seg_map

        if self.save_display:
            img = img * 255.0
            img = img.astype(numpy.uint8)
            img = Image.fromarray(img)
            img.save(os.path.join(self.save_display_dir, 'data.png'))
        else:
            plt.figure(1)
            plt.imshow(img)
            plt.axis('off')
            plt.show()
Esempio n. 3
0
class RealData(object):
    def __init__(self, args):
        self.batch_size = args.batch_size
        self.im_size = args.image_size
        self.im_channel = args.image_channel
        self.m_range = args.motion_range
        self.num_frame = args.num_frame
        self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict()
        self.visualizer = BaseVisualizer(args, self.reverse_m_dict)
        self.save_display = args.save_display
        self.save_display_dir = args.save_display_dir
        self.min_diff_thresh = args.min_diff_thresh
        self.max_diff_thresh = args.max_diff_thresh
        self.diff_div_thresh = args.diff_div_thresh
        self.fixed_data = args.fixed_data
        if args.fixed_data:
            numpy.random.seed(args.seed)
        self.rand_noise = args.rand_noise
        self.augment_reverse = args.augment_reverse

    def motion_dict(self):
        m_range = self.m_range
        m_dict, reverse_m_dict = {}, {}
        x = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        y = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        m_x, m_y = numpy.meshgrid(x, y)
        m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int)
        m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1))
        for i in range(len(m_x)):
            m_dict[(m_x[i], m_y[i])] = i
            reverse_m_dict[i] = (m_x[i], m_y[i])
            m_kernel[:, i, -m_y[i] + m_range, -m_x[i] + m_range] = 1
        return m_dict, reverse_m_dict, m_kernel

    def get_meta(self, image_dir):
        meta, cnt = {}, 0
        for sub_dir in os.listdir(image_dir):
            for sub_sub_dir in os.listdir(os.path.join(image_dir, sub_dir)):
                image_files = os.listdir(os.path.join(image_dir, sub_dir, sub_sub_dir))
                image_files.sort(key=lambda f: int(filter(str.isdigit, f)))
                image_names = [os.path.join(image_dir, sub_dir, sub_sub_dir, f) for f in image_files]
                num_images = len(image_names)
                if num_images < self.num_frame:
                    continue
                for i in range(num_images - self.num_frame + 1):
                    meta[cnt] = image_names[i:i + self.num_frame]
                    cnt += 1
        return meta

    def generate_data(self, meta):
        batch_size, im_size, num_frame = self.batch_size, self.im_size, self.num_frame
        im_channel = self.im_channel
        min_diff_thresh, max_diff_thresh = self.min_diff_thresh, self.max_diff_thresh
        diff_div_thresh = self.diff_div_thresh
        idx = numpy.random.permutation(len(meta))
        im = numpy.zeros((batch_size, num_frame, im_channel, im_size, im_size))
        i, cnt = 0, 0
        while i < batch_size:
            image_names = meta[idx[cnt]]
            for j in range(len(image_names)):
                if im_channel == 1:
                    image = numpy.array(Image.open(image_names[j]).convert('L'))
                    image = image / 255.0
                    image = numpy.expand_dims(image, 3)
                elif im_channel == 3:
                    image = numpy.array(Image.open(image_names[j]))
                    image = image / 255.0
                if j == 0:
                    height, width = image.shape[0], image.shape[1]
                    idx_h = numpy.random.randint(0, height + 1 - im_size)
                    idx_w = numpy.random.randint(0, width + 1 - im_size)
                image = image.transpose((2, 0, 1))
                im[i, j, :, :, :] = image[:, idx_h:idx_h+im_size, idx_w:idx_w+im_size]
            cnt = cnt + 1
            im_diff = numpy.zeros((num_frame - 1))
            for j in range(num_frame - 1):
                diff = numpy.abs(im[i, j, :, :, :] - im[i, j+1, :, :, :])
                im_diff[j] = numpy.sum(diff) / im_channel / im_size / im_size
            if any(im_diff < min_diff_thresh) or any(im_diff > max_diff_thresh):
                continue
            if num_frame > 2:
                im_diff_div = im_diff / (numpy.median(im_diff) + 1e-5)
                if any(im_diff_div > diff_div_thresh) or any(im_diff_div < 1/diff_div_thresh):
                    continue
            if self.augment_reverse:
                if numpy.random.rand() < 0.5:
                    im[i, :, :, :, :] = im[i, ::-1, :, :, :]
            i = i + 1
        im = im + (numpy.random.rand(batch_size, num_frame, im_channel, im_size, im_size) - 0.5) * self.rand_noise
        im[im > 1] = 1
        im[im < 0] = 0
        return im

    def display(self, im):
        num_frame, im_channel = self.num_frame, self.im_channel
        width, height = self.visualizer.get_img_size(2, num_frame)
        img = numpy.ones((height, width, 3))
        prev_im = None
        for i in range(num_frame):
            curr_im = im[0, i, :, :, :].transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1)
            img[y1:y2, x1:x2, :] = curr_im

            if i > 0:
                im_diff = abs(curr_im - prev_im)
                x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1)
                img[y1:y2, x1:x2, :] = im_diff
            prev_im = curr_im

        if self.save_display:
            img = img * 255.0
            img = img.astype(numpy.uint8)
            img = Image.fromarray(img)
            img.save(os.path.join(self.save_display_dir, 'data.png'))
        else:
            plt.figure(1)
            plt.imshow(img)
            plt.axis('off')
            plt.show()
Esempio n. 4
0
class SyntheticDataBidirect(object):
    def __init__(self, args):
        self.batch_size = args.batch_size
        self.num_objects = args.num_objects
        self.im_size = args.image_size
        self.m_range = args.motion_range
        self.num_frame = args.num_frame
        self.num_frame_one_direction = (self.num_frame + 1) / 2
        self.bg_move = args.bg_move
        self.bg_noise = args.bg_noise
        self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict()
        self.visualizer = BaseVisualizer(args, self.reverse_m_dict)
        self.save_display = args.save_display
        self.save_display_dir = args.save_display_dir

    def motion_dict(self):
        m_range = self.m_range
        m_dict, reverse_m_dict = {}, {}
        x = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        y = numpy.linspace(-m_range, m_range, 2 * m_range + 1)
        m_x, m_y = numpy.meshgrid(x, y)
        m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int)
        m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1))
        for i in range(len(m_x)):
            m_dict[(m_x[i], m_y[i])] = i
            reverse_m_dict[i] = (m_x[i], m_y[i])
            m_kernel[:, i, m_y[i] + m_range, m_x[i] + m_range] = 1
        return m_dict, reverse_m_dict, m_kernel

    def generate_data(self, src_image, src_mask):
        batch_size, im_size = self.batch_size, self.im_size
        m_dict = self.m_dict
        # generate forward foreground motion
        src_motion_f, src_motion_label_f, m_x_f, m_y_f, src_motion_b, src_motion_label_b, m_x_b, m_y_b = \
            self.generate_motion(self.num_objects, src_mask)
        # move foreground forward
        fg_im_f, fg_motion_f, fg_motion_f_r, fg_motion_label_f, fg_motion_label_f_r, fg_mask_f = \
            self.move_foreground(src_image, src_motion_f, src_motion_b, src_motion_label_f, src_motion_label_b, src_mask, m_x_f, m_y_f)
        # move foreground backward
        fg_im_b, fg_motion_b, fg_motion_b_r, fg_motion_label_b, fg_motion_label_b_r, fg_mask_b = \
            self.move_foreground(src_image, src_motion_b, src_motion_f, src_motion_label_b, src_motion_label_f, src_mask, m_x_b, m_y_b)
        # generate background
        src_bg = numpy.random.rand(batch_size, 3, im_size,
                                   im_size) * self.bg_noise
        # generate background motion
        if self.bg_move:
            src_motion_f, src_motion_label_f, m_x_f, m_y_f, src_motion_b, src_motion_label_b, m_x_b, m_y_b = self.generate_motion(
                1)
        else:
            src_motion_f = numpy.zeros((1, batch_size, 2, im_size, im_size))
            src_motion_label_f = m_dict[(0, 0)] * numpy.ones(
                (1, batch_size, 1, im_size, im_size))
            m_x_f = numpy.zeros((1, batch_size)).astype(int)
            m_y_f = numpy.zeros((1, batch_size)).astype(int)
            src_motion_b = numpy.zeros((1, batch_size, 2, im_size, im_size))
            src_motion_label_b = m_dict[(0, 0)] * numpy.ones(
                (1, batch_size, 1, im_size, im_size))
            m_x_b = numpy.zeros((1, batch_size)).astype(int)
            m_y_b = numpy.zeros((1, batch_size)).astype(int)
        # move background
        bg_im_f, bg_motion_f, bg_motion_f_r, bg_motion_label_f, bg_motion_label_f_r = \
            self.move_background(src_bg, src_motion_f, src_motion_b, src_motion_label_f, src_motion_label_b, m_x_f, m_y_f)
        bg_im_b, bg_motion_b, bg_motion_b_r, bg_motion_label_b, bg_motion_label_b_r = \
            self.move_background(src_bg, src_motion_b, src_motion_f, src_motion_label_b, src_motion_label_f, m_x_b, m_y_b)
        # merge foreground and background, merge foreground motion and background motion
        mask = fg_mask_f == 0
        fg_motion_label_f[mask] = bg_motion_label_f[mask]
        fg_motion_label_f_r[mask] = bg_motion_label_f_r[mask]
        motion_mask = numpy.concatenate((mask, mask), 2)
        fg_motion_f[motion_mask] = bg_motion_f[motion_mask]
        fg_motion_f_r[motion_mask] = bg_motion_f_r[motion_mask]
        im_mask = numpy.concatenate((mask, mask, mask), 2)
        fg_im_f[im_mask] = bg_im_f[im_mask]
        mask = fg_mask_b == 0
        fg_motion_label_b[mask] = bg_motion_label_b[mask]
        fg_motion_label_b_r[mask] = bg_motion_label_b_r[mask]
        motion_mask = numpy.concatenate((mask, mask), 2)
        fg_motion_b[motion_mask] = bg_motion_b[motion_mask]
        fg_motion_b_r[motion_mask] = bg_motion_b_r[motion_mask]
        im_mask = numpy.concatenate((mask, mask, mask), 2)
        fg_im_b[im_mask] = bg_im_b[im_mask]
        # merge forward and backward frames and motions
        im, motion, motion_r, motion_label, motion_label_r, seg_layer = \
            self.merge_forward_backward(fg_im_f, fg_motion_f, fg_motion_f_r, fg_motion_label_f, fg_motion_label_f_r, fg_im_b, fg_motion_b, fg_motion_b_r, fg_motion_label_b, fg_motion_label_b_r, fg_mask_f, fg_mask_b)
        # swap axes between batch size and frame
        im, motion, motion_r, motion_label, motion_label_r, seg_layer = im.swapaxes(
            0, 1), motion.swapaxes(0, 1), motion_r.swapaxes(
                0, 1), motion_label.swapaxes(0, 1), motion_label_r.swapaxes(
                    0, 1), seg_layer.swapaxes(0, 1)
        return im, motion.astype(int), motion_r.astype(
            int), motion_label.astype(int), motion_label_r.astype(
                int), seg_layer.astype(int)

    def generate_motion(self, num_objects, src_mask=None):
        batch_size, im_size = self.batch_size, self.im_size
        m_dict, reverse_m_dict = self.m_dict, self.reverse_m_dict
        m_label = numpy.random.randint(0,
                                       len(m_dict),
                                       size=(num_objects, batch_size))
        m_x_f = numpy.zeros((num_objects, batch_size)).astype(int)
        m_y_f = numpy.zeros((num_objects, batch_size)).astype(int)
        for i in range(num_objects):
            for j in range(batch_size):
                (m_x_f[i, j], m_y_f[i, j]) = reverse_m_dict[m_label[i, j]]
        m_x_b = numpy.zeros((num_objects, batch_size)).astype(int)
        m_y_b = numpy.zeros((num_objects, batch_size)).astype(int)
        for i in range(num_objects):
            for j in range(batch_size):
                (m_x_b[i, j], m_y_b[i, j]) = (-m_x_f[i, j], -m_y_f[i, j])
        src_motion_f = numpy.zeros(
            (num_objects, batch_size, 2, im_size, im_size))
        src_motion_b = numpy.zeros(
            (num_objects, batch_size, 2, im_size, im_size))
        src_motion_label_f = m_dict[(0, 0)] * numpy.ones(
            (num_objects, batch_size, 1, im_size, im_size))
        src_motion_label_b = m_dict[(0, 0)] * numpy.ones(
            (num_objects, batch_size, 1, im_size, im_size))
        if src_mask is None:
            src_mask = numpy.ones(
                (num_objects, batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                mask = src_mask[i, j, 0, :, :] > 0
                src_motion_f[i, j, 0, mask] = m_x_f[i, j]
                src_motion_f[i, j, 1, mask] = m_y_f[i, j]
                src_motion_b[i, j, 0, mask] = m_x_b[i, j]
                src_motion_b[i, j, 1, mask] = m_y_b[i, j]
                src_motion_label_f[i, j, 0, mask] = m_label[i, j]
                src_motion_label_b[i, j, 0,
                                   mask] = m_dict[(m_x_b[i, j], m_y_b[i, j])]
        return src_motion_f, src_motion_label_f, m_x_f, m_y_f, src_motion_b, src_motion_label_b, m_x_b, m_y_b

    def move_foreground(self, src_image, src_motion, src_motion_r,
                        src_motion_label, src_motion_label_r, src_mask, m_x,
                        m_y):
        batch_size, num_frame, im_size = self.batch_size, self.num_frame_one_direction, self.im_size
        im = numpy.zeros((num_frame, batch_size, 3, im_size, im_size))
        motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size))
        motion_r = numpy.zeros((num_frame, batch_size, 2, im_size, im_size))
        motion_label = numpy.zeros(
            (num_frame, batch_size, 1, im_size, im_size))
        motion_label_r = numpy.zeros(
            (num_frame, batch_size, 1, im_size, im_size))
        mask = numpy.zeros((num_frame, batch_size, 1, im_size, im_size))
        for i in range(num_frame):
            im[i, ...], motion[i, ...], motion_r[i, ...], motion_label[i, ...], motion_label_r[i, ...], mask[i, ...] = \
                self.merge_objects(src_image, src_motion, src_motion_r, src_motion_label, src_motion_label_r, src_mask)
            src_image = self.move_image_fg(src_image, m_x, m_y)
            src_motion = self.move_motion(src_motion, m_x, m_y)
            src_motion_r = self.move_motion(src_motion_r, m_x, m_y)
            src_motion_label = self.move_motion_label(src_motion_label, m_x,
                                                      m_y)
            src_motion_label_r = self.move_motion_label(
                src_motion_label_r, m_x, m_y)
            src_mask = self.move_mask(src_mask, m_x, m_y)
        return im, motion, motion_r, motion_label, motion_label_r, mask

    def merge_objects(self, src_image, src_motion, src_motion_r,
                      src_motion_label, src_motion_label_r, src_mask):
        batch_size, num_objects, im_size = self.batch_size, self.num_objects, self.im_size
        im = numpy.zeros((batch_size, 3, im_size, im_size))
        motion = numpy.zeros((batch_size, 2, im_size, im_size))
        motion_r = numpy.zeros((batch_size, 2, im_size, im_size))
        motion_label = numpy.zeros((batch_size, 1, im_size, im_size))
        motion_label_r = numpy.zeros((batch_size, 1, im_size, im_size))
        mask = numpy.zeros((batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            zero_mask = mask == 0
            zero_motion_mask = numpy.concatenate((zero_mask, zero_mask), 1)
            zero_im_mask = numpy.concatenate((zero_mask, zero_mask, zero_mask),
                                             1)
            im[zero_im_mask] = src_image[i, zero_im_mask]
            motion[zero_motion_mask] = src_motion[i, zero_motion_mask]
            motion_r[zero_motion_mask] = src_motion_r[i, zero_motion_mask]
            motion_label[zero_mask] = src_motion_label[i, zero_mask]
            motion_label_r[zero_mask] = src_motion_label_r[i, zero_mask]
            mask[zero_mask] = src_mask[i, zero_mask]
        return im, motion, motion_r, motion_label, motion_label_r, mask

    def move_image_fg(self, im, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        im_big = numpy.zeros((num_objects, batch_size, 3,
                              im_size + m_range * 2, im_size + m_range * 2))
        im_big[:, :, :, m_range:-m_range, m_range:-m_range] = im
        im_new = numpy.zeros((num_objects, batch_size, 3, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                im_new[i, j, :, :, :] = im_big[i, j, :, y:y + im_size,
                                               x:x + im_size]
        return im_new

    def move_motion(self, motion, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        m_dict = self.m_dict
        motion_big = m_dict[(0, 0)] * numpy.ones(
            (num_objects, batch_size, 2, im_size + m_range * 2,
             im_size + m_range * 2))
        motion_big[:, :, :, m_range:-m_range, m_range:-m_range] = motion
        motion_new = numpy.zeros(
            (num_objects, batch_size, 2, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                motion_new[i, j, :, :, :] = motion_big[i, j, :, y:y + im_size,
                                                       x:x + im_size]
        return motion_new

    def move_motion_label(self, motion_label, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        motion_label_big = numpy.zeros(
            (num_objects, batch_size, 1, im_size + m_range * 2,
             im_size + m_range * 2))
        motion_label_big[:, :, :, m_range:-m_range,
                         m_range:-m_range] = motion_label
        motion_label_new = numpy.zeros(
            (num_objects, batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                motion_label_new[i, j, :, :, :] = \
                    motion_label_big[i, j, :, y:y + im_size, x:x + im_size]
        return motion_label_new

    def move_mask(self, mask, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        num_objects = self.num_objects
        mask_big = numpy.zeros((num_objects, batch_size, 1,
                                im_size + m_range * 2, im_size + m_range * 2))
        mask_big[:, :, :, m_range:-m_range, m_range:-m_range] = mask
        mask_new = numpy.zeros((num_objects, batch_size, 1, im_size, im_size))
        for i in range(num_objects):
            for j in range(batch_size):
                x = m_range + m_x[i, j]
                y = m_range + m_y[i, j]
                mask_new[i, j, :, :, :] = mask_big[i, j, :, y:y + im_size,
                                                   x:x + im_size]
        return mask_new

    def move_background(self, src_image, src_motion, src_motion_r,
                        src_motion_label, src_motion_label_r, m_x, m_y):
        batch_size, num_frame, im_size = self.batch_size, self.num_frame_one_direction, self.im_size
        im = numpy.zeros((num_frame, batch_size, 3, im_size, im_size))
        motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size))
        motion_r = numpy.zeros((num_frame, batch_size, 2, im_size, im_size))
        motion_label = numpy.zeros(
            (num_frame, batch_size, 1, im_size, im_size))
        motion_label_r = numpy.zeros(
            (num_frame, batch_size, 1, im_size, im_size))
        for i in range(num_frame):
            im[i, :, :, :, :] = src_image
            src_image = self.move_image_bg(src_image, m_x, m_y)
            motion[i, :, :, :, :] = src_motion
            motion_r[i, :, :, :] = src_motion_r
            motion_label[i, :, :, :, :] = src_motion_label
            motion_label_r[i, :, :, :, :] = src_motion_label_r
        return im, motion, motion_r, motion_label, motion_label_r

    def move_image_bg(self, bg_im, m_x, m_y):
        batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range
        bg_noise = self.bg_noise
        im_big = numpy.random.rand(batch_size, 3, im_size + m_range * 2,
                                   im_size + m_range * 2) * bg_noise
        im_big[:, :, m_range:-m_range, m_range:-m_range] = bg_im
        im_new = numpy.zeros((batch_size, 3, im_size, im_size))
        for i in range(batch_size):
            x = m_range + m_x[0, i]
            y = m_range + m_y[0, i]
            im_new[i, :, :, :] = im_big[i, :, y:y + im_size, x:x + im_size]
        return im_new

    @staticmethod
    def merge_forward_backward(im_f, motion_f, motion_f_r, motion_label_f,
                               motion_label_f_r, im_b, motion_b, motion_b_r,
                               motion_label_b, motion_label_b_r, mask_f,
                               mask_b):
        im_f = im_f[1:, ...]
        im_b = im_b[::-1, ...]
        im = numpy.concatenate((im_b, im_f), 0)
        motion_f = motion_f[1:, ...]
        motion_b_r = motion_b_r[::-1, ...]
        motion = numpy.concatenate((motion_b_r, motion_f), 0)
        motion_f_r = motion_f_r[1:, ...]
        motion_b = motion_b[::-1, ...]
        motion_r = numpy.concatenate((motion_b, motion_f_r), 0)
        motion_label_f = motion_label_f[1:, ...]
        motion_label_b_r = motion_label_b_r[::-1, ...]
        motion_label = numpy.concatenate((motion_label_b_r, motion_label_f), 0)
        motion_label_f_r = motion_label_f_r[1:, ...]
        motion_label_b = motion_label_b[::-1, ...]
        motion_label_r = numpy.concatenate((motion_label_b, motion_label_f_r),
                                           0)
        mask_f = mask_f[1:, ...]
        mask_b = mask_b[::-1, ...]
        mask = numpy.concatenate((mask_b, mask_f), 0)
        return im, motion, motion_r, motion_label, motion_label_r, mask

    def display(self, im, motion, motion_r, seg_layer):
        num_frame = self.num_frame
        width, height = self.visualizer.get_img_size(5, num_frame)
        img = numpy.ones((height, width, 3))
        prev_im = None
        for i in range(num_frame):
            curr_im = im[0, i, :, :, :].squeeze().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1)
            img[y1:y2, x1:x2, :] = curr_im

            if i > 0:
                im_diff = abs(curr_im - prev_im)
                x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1)
                img[y1:y2, x1:x2, :] = im_diff
            prev_im = curr_im

            flow = motion[0, i, :, :, :].squeeze().transpose(1, 2, 0)
            optical_flow = flowlib.visualize_flow(flow, self.m_range)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, i + 1)
            img[y1:y2, x1:x2, :] = optical_flow / 255.0

            flow = motion_r[0, i, :, :, :].squeeze().transpose(1, 2, 0)
            optical_flow = flowlib.visualize_flow(flow, self.m_range)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, i + 1)
            img[y1:y2, x1:x2, :] = optical_flow / 255.0

            seg = seg_layer[0, i, :, :, :].squeeze() * 1.0 / seg_layer[
                0, i, :, :, :].max().astype(numpy.float)
            cmap = plt.get_cmap('jet')
            seg_map = cmap(seg)[:, :, 0:3]
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(5, i + 1)
            img[y1:y2, x1:x2, :] = seg_map

        if self.save_display:
            img = img * 255.0
            img = img.astype(numpy.uint8)
            img = Image.fromarray(img)
            img.save(os.path.join(self.save_display_dir, 'data.png'))
        else:
            plt.figure(1)
            plt.imshow(img)
            plt.axis('off')
            plt.show()
Esempio n. 5
0
class BaseDemo(object):
    def __init__(self, args):
        self.learning_rate = args.learning_rate
        self.train_epoch = args.train_epoch
        self.test_epoch = args.test_epoch
        self.test_interval = args.test_interval
        self.save_dir = args.save_dir
        self.display = args.display
        self.display_all = args.display_all
        self.best_improve_percent = -1e10
        self.batch_size = args.batch_size
        self.im_size = args.image_size
        self.im_channel = args.image_channel
        self.num_frame = args.num_frame
        self.m_range = args.motion_range
        if args.data == 'box':
            self.data = BoxData(args)
        elif args.data == 'mnist':
            self.data = MnistData(args)
        elif args.data == 'box2':
            self.data = Box2Data(args)
        elif args.data == 'mnist2':
            self.data = Mnist2Data(args)
        elif args.data == 'robot64':
            self.data = Robot64Data(args)
        elif args.data == 'mpii64':
            self.data = Mpii64Data(args)
        elif args.data == 'mpii64_sample':
            self.data = Mpii64Sample(args)
        elif args.data == 'nyuv2':
            self.data = Nyuv2Data(args)
        else:
            logging.error('%s data not supported' % args.data)
        self.visualizer = BaseVisualizer(args, self.data.reverse_m_dict)

    def compare(self):

        width, height = self.visualizer.get_img_size(4, 3)
        img = numpy.ones((height, width, 3))

        base1_loss, base2_loss, interp_loss, extrap_loss = [], [], [], []
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist']:
                im, motion, _, _ = self.data.get_next_batch(
                    self.data.test_images)
            elif self.data.name in ['box2', 'mnist2']:
                im, motion, _ = self.data.get_next_batch(self.data.test_images)
            elif self.data.name in [
                    'robot64', 'mpii64', 'nyuv2', 'mpii64_sample'
            ]:
                im, motion = self.data.get_next_batch(
                    self.data.test_meta), None
            elif self.data.name in ['mpii64_sample']:
                im, motion = self.data.get_next_batch(
                    self.data.test_meta), None
                im = im[:, -self.num_frame:, :, :, :]
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im = Variable(torch.from_numpy(im).float())
            if torch.cuda.is_available():
                im = im.cuda()
            im1 = im[:, 0, :, :, :]
            im2 = im[:, 1, :, :, :]
            im3 = im[:, 2, :, :, :]

            im = im1[0].cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, 1)
            img[y1:y2, x1:x2, :] = im

            im = im2[0].cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, 2)
            img[y1:y2, x1:x2, :] = im

            im = im3[0].cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, 3)
            img[y1:y2, x1:x2, :] = im

            im_diff = torch.abs(im1[0] - im2[0]).cpu().data.numpy().transpose(
                1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, 1)
            img[y1:y2, x1:x2, :] = im_diff

            im_diff = torch.abs(im2[0] - im3[0]).cpu().data.numpy().transpose(
                1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, 2)
            img[y1:y2, x1:x2, :] = im_diff

            base1_loss.append(torch.abs(im1 - im2).sum().data[0])
            im_base = (im1 + im3) / 2
            interp_loss.append(torch.abs(im_base - im2).sum().data[0])

            im = im_base[0].cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, 1)
            img[y1:y2, x1:x2, :] = im

            im_diff = torch.abs(im_base[0] -
                                im2[0]).cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, 2)
            img[y1:y2, x1:x2, :] = im_diff

            base2_loss.append(torch.abs(im2 - im3).sum().data[0])
            im_base = 2 * im2 - im1
            im_base[im_base < 0] = 0
            im_base[im_base > 1] = 1
            extrap_loss.append(torch.abs(im_base - im3).sum().data[0])

            im = im_base[0].cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, 1)
            img[y1:y2, x1:x2, :] = im

            im_diff = torch.abs(im_base[0] -
                                im3[0]).cpu().data.numpy().transpose(1, 2, 0)
            x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, 2)
            img[y1:y2, x1:x2, :] = im_diff

            plt.figure(1)
            plt.imshow(img)
            plt.axis('off')
            plt.show()

        base1_loss = numpy.mean(numpy.asarray(base1_loss))
        interp_loss = numpy.mean(numpy.asarray(interp_loss))
        base2_loss = numpy.mean(numpy.asarray(base2_loss))
        extrap_loss = numpy.mean(numpy.asarray(extrap_loss))
        print base1_loss, interp_loss
        print base2_loss, extrap_loss