def main(label_type):
    wildcard = '/*/*/' if label_type == 'train' else '/'
    # dataset_path = 'data/ILSVRC2015/'
    # dataset_path = '/media/yueshen/Sea_Gate!/imagenet/detection/ILSVRC2014_DET_train/'
    # dataset_path = '/media/yueshen/Sea_Gate!/imagenet/detection/ILSVRC2014_DET_train/'

    # dataset path on google
    dataset_path = '/home/detection/ILSVRC/'

    annotationPath = dataset_path + 'Annotations/'
    imagePath = dataset_path + 'Data/'

    if not os.path.exists(os.path.join('labels', label_type)):
        os.makedirs(os.path.join('labels', label_type))
    imageNameFile = open('labels/' + label_type + '/image_names.txt', 'w')

    labels = []
    labels = glob.glob(annotationPath + 'DET/' + label_type + wildcard +
                       '*.xml')
    print('len labels = ', len(labels))
    # # test
    # labels = glob.glob(annotationPath + 'n01443537/' + '*.xml')
    labels.sort()
    print('label = ', len(labels))
    images = [
        label.replace('Annotations', 'Data').replace('xml', 'JPEG')
        for label in labels
    ]

    print(len(images))

    bboxes = []
    for ii, imageName in enumerate(images):
        if ii % 100 == 0:
            print('iter %d of %d = %.2f%%' %
                  (ii, len(images), ii * 1.0 / len(images) * 100))
        if not DEBUG:
            imageNameFile.write(imageName + '\n')
        imOn = ii
        label = labels[imOn]
        labelTree = ET.parse(label)
        # try:
        imgSize = get_image_size(images[imOn])
        # except:
        #     continue
        area_cutoff = imgSize[0] * imgSize[1] * 0.01
        if DEBUG:
            print('\nimage name\n\n%s\n' % images[imOn])
            image = cv2.imread(images[imOn])
            print('image size', image.shape)
            print(label)
            print(labelTree)
            print(labelTree.findall('object'))
        for obj in labelTree.findall('object'):
            bbox = obj.find('bndbox')
            bbox = [
                int(bbox.find('xmin').text),
                int(bbox.find('ymin').text),
                int(bbox.find('xmax').text),
                int(bbox.find('ymax').text), imOn
            ]
            if (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) < area_cutoff:
                continue
            if DEBUG:
                print('name', obj.find('name').text, '\n')
                print(bbox)
                image = image.squeeze()
                if len(image.shape) < 3:
                    image = np.tile(image[:, :, np.newaxis], (1, 1, 3))
                drawing.drawRect(image, bbox[:-1], 3, [0, 0, 255])
            bboxes.append(bbox)

        if DEBUG:
            if len(image.shape) == 2:
                image = np.tile(image[:, :, np.newaxis], (1, 1, 3))
            cv2.imshow('image', image)
            cv2.waitKey(0)

    bboxes = np.array(bboxes)
    if not DEBUG:
        np.save('labels/' + label_type + '/labels.npy', bboxes)
Beispiel #2
0
    def draw_state(self):
        from utils import drawing
        scale = 8
        if self.board is None:
            locs = self.graph.points * scale
            self.board = np.zeros(
                ((self.graph.yMax - self.graph.yMin) * scale,
                 (self.graph.xMax - self.graph.xMin) * scale),
                dtype=np.uint8)
            locs -= np.array([self.graph.xMin, self.graph.yMin]) * scale
            for loc in locs:
                drawing.drawRect(self.board, [loc[0], loc[1], loc[0], loc[1]],
                                 scale / 2, 4)
            if type(self.end_point) == list:
                for end_point in self.end_point:
                    goal_loc = (
                        np.array(end_point) * np.array([scale, scale, 90]) -
                        np.array([self.graph.xMin, self.graph.yMin, 0]) *
                        scale).astype(int)
                    drawing.drawRect(
                        self.board,
                        [goal_loc[0], goal_loc[1], goal_loc[0], goal_loc[1]],
                        scale / 2, 5)
            else:
                goal_loc = (
                    np.array(self.end_point) * np.array([scale, scale, 90]) -
                    np.array([self.graph.xMin, self.graph.yMin, 0]) *
                    scale).astype(int)
                goal_arrow = [
                    goal_loc[0] + scale / 2 * (goal_loc[2] == 90) - scale / 2 *
                    (goal_loc[2] == 270), goal_loc[1] + scale / 2 *
                    (goal_loc[2] == 0) - scale / 2 * (goal_loc[2] == 180)
                ]
                drawing.drawRect(
                    self.board,
                    [goal_loc[0], goal_loc[1], goal_loc[0], goal_loc[1]],
                    scale / 2, 5)
                drawing.drawRect(self.board, [
                    goal_arrow[0], goal_arrow[1], goal_arrow[0], goal_arrow[1]
                ], scale / 4, 6)

        self.board[np.logical_or(self.board == 2, self.board == 3)] = 4
        curr_point = np.array(self.pose[:3])
        curr_loc = (curr_point * np.array([scale, scale, 90]) -
                    np.array([self.graph.xMin, self.graph.yMin, 0]) *
                    scale).astype(int)
        curr_arrow = [
            curr_loc[0] + scale / 2 * (curr_loc[2] == 90) - scale / 2 *
            (curr_loc[2] == 270), curr_loc[1] + scale / 2 *
            (curr_loc[2] == 0) - scale / 2 * (curr_loc[2] == 180)
        ]
        drawing.drawRect(self.board,
                         [curr_loc[0], curr_loc[1], curr_loc[0], curr_loc[1]],
                         scale / 2, 2)
        drawing.drawRect(
            self.board,
            [curr_arrow[0], curr_arrow[1], curr_arrow[0], curr_arrow[1]],
            scale / 4, 3)
        self.board[0, 0] = 6
        return np.flipud(self.board)
Beispiel #3
0
        Images[num_seq, ...] = tImage.copy()
        Labels[num_seq, ...] = xyxyLabels.copy()
        num_seq += 1
        print('current seq # = ', num_seq)

    # np.save('Images.npy', Images)
    # np.save('Labels.npy', Labels)

    print('final seq idx = ', dataset.seq_idx)
    print('done!')
    print('Checking images... ')

    path = './test/'
    idx = 15  # random
    image = Images[idx, ...].copy()
    labels = Labels[idx, ...].copy()
    for i in range(image.shape[0]):
        # print(i)
        im = image[i, ...].transpose(1, 2, 0).copy()
        bbox = 227 * labels[i // 2, ...].copy() / 10
        print('bbox = ', bbox)
        patch = drawing.drawRect(im, bbox, 1, (255, 255, 0))
        # print(im.shape)

        cv2.imwrite(path + str(i) + '.png', patch)
    # Images_load = np.load('Images.npy')
    # Labels_load = np.load('Labels.npy')
    # print(Images_load.shape, Labels_load.shape)
    # print('images load = ', np.sum(Images_load))
    # print('label load = ', Labels_load[0,0:5,:])
    def get_data_sequence(self):
        try:

            # Preallocate the space for the datas and labels.
            dataImage   = np.zeros((self.num_unrolls, self.memory_size, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
            dataMotion  = np.zeros((self.num_unrolls, self.memory_size, 4), dtype=np.float32)

            labelImage  = np.zeros((self.num_unrolls, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
            labelMotion = np.zeros((self.num_unrolls, 4), dtype=np.float32)

            # Read a new data sequence from batch cache and get the ground truth.
            (batchKey, images) = self.getData()
            # key = [data_idx, video_idx, track_id, image_seq]
            gtKey = batchKey
            initImageIndex = self.key_lookup[gtKey]
            if self.debug:
                print('Inital gtKey: ', gtKey)
                print('')

            # initial data
            # key = [data_idx, video_idx, track_id, image_seq]
            newKey = list(gtKey)
            newKey = tuple(newKey)
            imageIndex = self.key_lookup[newKey]
            bbox = self.datasets[newKey[0]][imageIndex, :4].copy()
            x1, y1, x2, y2 = bbox

            # image
            image = images[0]
            object = image[int(y1):int(y2), int(x1):int(x2)]
            object = cv2.resize(object, (IMG_SIZE, IMG_SIZE)) / 255.

            dataImage[:] = object

            # motion
            height, width, _ = image.shape
            """
            cx = float(int((x1 + x2) / 2) / width)
            cy = float(int((y1 + y2) / 2) / height)
            w = float((x2 - x1) / width)
            h = float((y2 - y1) / height)
            dataMotion[:] = [cx, cy, w, h]
            """

            x1 = float(x1 /width)
            y1 = float(y1 / height)
            x2 = float(x2 / width)
            y2 = float(y2 / height)

            dataMotion[:] = [x1, y1, x2, y2]

            # data
            for unroll in range(self.num_unrolls):

                if self.debug:
                    print('Unroll : ', unroll)
                    print('')
                    debug_image  = np.zeros((self.memory_size, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
                    debug_motion = np.zeros((self.memory_size, 4), dtype=np.float32)

                    debug_image_label = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)
                    debug_motion_label = np.zeros((4), dtype=np.float32)

                for memory_idx in range(self.memory_size):

                    if unroll >= memory_idx:
                        # key = [data_idx, video_idx, track_id, image_seq]
                        newKey = list(gtKey)
                        newKey[3] += unroll - memory_idx
                        newKey = tuple(newKey)
                        imageIndex = self.key_lookup[newKey]
                        bbox = self.datasets[newKey[0]][imageIndex, :4].copy()
                        x1, y1, x2, y2 = bbox

                        # image
                        image = images[unroll - memory_idx]
                        object = image[int(y1):int(y2), int(x1):int(x2)]
                        object = cv2.resize(object, (IMG_SIZE, IMG_SIZE)) / 255.

                        dataImage[unroll, memory_idx] = object

                        # motion
                        height, width, _ = image.shape
                        """
                        cx = float(int((x1 + x2) / 2) / width)
                        cy = float(int((y1 + y2) / 2) / height)
                        w = float((x2 - x1) / width)
                        h = float((y2 - y1) / height)
                        dataMotion[:] = [cx, cy, w, h]
                        """

                        x1 = float(x1 / width)
                        y1 = float(y1 / height)
                        x2 = float(x2 / width)
                        y2 = float(y2 / height)

                        dataMotion[:] = [x1, y1, x2, y2]


                    if self.debug:
                            # debug each
                            debug_image1 = image.copy()
                            drawing.drawRect(debug_image1, [x1, y1, x2, y2], 2, [255, 0, 0])
                            #cv2.imshow('debug bbox', debug_image1)

                            path = self.image_paths[newKey[0]][newKey[-1]]
                            print('Memory idx  : ', memory_idx)
                            print('gtKey       : ', newKey)
                            print('bbox        : ', bbox)
                            print('bbox(float) : ', [x1, y1, x2, y2])
                            print('Image idx   : ', imageIndex)
                            print('Image path  : ', path)
                            print('')
                            debug_image[memory_idx] = object
                            debug_motion[memory_idx] = [x1, y1, x2, y2]

                            #cv2.waitKey(0)

                # label
                # key = [data_idx, video_idx, track_id, image_seq]
                newKey = list(gtKey)
                newKey[3] += unroll + 1
                newKey = tuple(newKey)
                imageIndex = self.key_lookup[newKey]
                bbox = self.datasets[newKey[0]][imageIndex, :4].copy()
                x1, y1, x2, y2 = bbox

                # image
                image = images[unroll + 1]
                object = image[int(y1):int(y2), int(x1):int(x2)]
                object = cv2.resize(object, (IMG_SIZE, IMG_SIZE)) / 255.

                labelImage[unroll] = object

                # motion
                height, width, _ = image.shape
                """
                cx = float(int((x1 + x2) / 2) / width)
                cy = float(int((y1 + y2) / 2) / height)
                w = float((x2 - x1) / width)
                h = float((y2 - y1) / height)
                dataMotion[:] = [cx, cy, w, h]
                """

                x1 = float(x1 / width)
                y1 = float(y1 / height)
                x2 = float(x2 / width)
                y2 = float(y2 / height)

                labelMotion[:] = [x1, y1, x2, y2]

                if self.debug:
                    # debug each
                    #debug_image1 = image.copy()
                    #drawing.drawRect(debug_image1, [x1, y1, x2, y2], 2, [255, 0, 0])
                    #cv2.imshow('debug bbox', debug_image1)

                    path = self.image_paths[newKey[0]][newKey[-1]]
                    print('[label]')
                    print('gtKey       : ', newKey)
                    print('bbox        : ', bbox)
                    print('bbox(float) : ', [x1, y1, x2, y2])
                    print('Image idx   : ', imageIndex)
                    print('Image path  : ', path)
                    print('')
                    debug_image_label = object
                    debug_motion_label = [x1, y1, x2, y2]

                    plots = []
                    for idx in range(self.memory_size):
                        #print('Memory idx :', idx, debug_motion[idx])
                        plots.append(dataImage[unroll, idx])

                    subplot = np.zeros((IMG_SIZE * self.memory_size, IMG_SIZE, 3), dtype=np.float32)
                    cv2.vconcat(tuple(plots), subplot)
                    cv2.imshow('external memory', subplot)
                    cv2.imshow('prediction', labelImage[unroll])
                    cv2.waitKey(0)

            dataImage = dataImage.reshape(([self.num_unrolls * self.memory_size] + list(dataImage.shape[2:])))
            dataMotion = dataMotion.reshape(([self.num_unrolls * self.memory_size] + list(dataMotion.shape[2:])))

            return (dataImage, dataMotion), (labelImage, labelMotion)

        except Exception as e:
            import traceback
            traceback.print_exc()
            import pdb
            pdb.set_trace()
            print('exception')
def main(label_type):
    folder = [
        '/ILSVRC2015_VID_train_0001/*/', '/ILSVRC2015_VID_train_0002/*/',
        '/ILSVRC2015_VID_train_0003/*/'
    ]
    for fol in folder:
        wildcard = fol if label_type == 'train' else '/*/'
        # dataset_path = 'data/ILSVRC2015/'
        # dataset_path = '/media/yueshen/Sea_Gate!/imagenet/ILSVRC/'
        # google root directory
        dataset_path = '/home/ILSVRC/'
        annotationPath = dataset_path + 'Annotations/'
        imagePath = dataset_path + 'Data/'

        if not DEBUG:
            if not os.path.exists(os.path.join('labels', label_type)):
                os.makedirs(os.path.join('labels', label_type))
            imageNameFile = open(
                'labels/' + label_type + '/image_names' + str(fol[-4]) +
                '.txt', 'w')

        videos = sorted(
            glob.glob(annotationPath + 'VID/' + label_type + wildcard))

        bboxes = []
        imNum = 0
        totalImages = len(
            glob.glob(annotationPath + 'VID/' + label_type + wildcard +
                      '*.xml'))
        print('totalImages', totalImages)
        classes = {
            'n01674464': 1,
            'n01662784': 2,
            'n02342885': 3,
            'n04468005': 4,
            'n02509815': 5,
            'n02084071': 6,
            'n01503061': 7,
            'n02324045': 8,
            'n02402425': 9,
            'n02834778': 10,
            'n02419796': 11,
            'n02374451': 12,
            'n04530566': 13,
            'n02118333': 14,
            'n02958343': 15,
            'n02510455': 16,
            'n03790512': 17,
            'n02391049': 18,
            'n02121808': 19,
            'n01726692': 20,
            'n02062744': 21,
            'n02503517': 22,
            'n02691156': 23,
            'n02129165': 24,
            'n02129604': 25,
            'n02355227': 26,
            'n02484322': 27,
            'n02411705': 28,
            'n02924116': 29,
            'n02131653': 30,
        }

        for vv, video in enumerate(videos):
            labels = sorted(glob.glob(video + '*.xml'))
            images = [
                label.replace('Annotations', 'Data').replace('xml', 'JPEG')
                for label in labels
            ]
            trackColor = dict()
            for ii, imageName in enumerate(images):
                if imNum % 100 == 0:
                    print('imNum %d of %d = %.2f%%' %
                          (imNum, totalImages, imNum * 100.0 / totalImages))
                if not DEBUG:
                    # Leave off initial bit of path so we can just add parent dir to path later.
                    imageNameFile.write(imageName + '\n')
                label = labels[ii]
                labelTree = ET.parse(label)
                imgSize = get_image_size(images[ii])
                area = imgSize[0] * imgSize[1]
                if DEBUG:
                    print('\n%s' % images[ii])
                    image = cv2.imread(images[ii])
                    print('video', vv, 'image', ii)
                for obj in labelTree.findall('object'):
                    cls = obj.find('name').text
                    assert cls in classes
                    classInd = classes[cls]

                    occl = int(obj.find('occluded').text)
                    trackId = int(obj.find('trackid').text)
                    bbox = obj.find('bndbox')
                    bbox = [
                        int(bbox.find('xmin').text),
                        int(bbox.find('ymin').text),
                        int(bbox.find('xmax').text),
                        int(bbox.find('ymax').text), vv, trackId, imNum,
                        classInd, occl
                    ]

                    if DEBUG:
                        print('name', obj.find('name').text, '\n')
                        print(bbox)
                        if trackId not in trackColor:
                            trackColor[trackId] = [
                                random.random() * 255 for _ in range(3)
                            ]
                        drawing.drawRect(image, bbox[:4], 3,
                                         trackColor[trackId])
                    bboxes.append(bbox)
                if DEBUG:
                    cv2.imshow('image', image)
                    cv2.waitKey(1)

                imNum += 1

        bboxes = np.array(bboxes)
        # Reorder by video_id, then track_id, then video image number so all labels for a single track are next to each other.
        # This only matters if a single image could have multiple tracks.
        order = np.lexsort((bboxes[:, 6], bboxes[:, 5], bboxes[:, 4]))
        bboxes = bboxes[order, :]
        if not DEBUG:
            np.save('labels/' + label_type + '/labels' + str(fol[-4]) + '.npy',
                    bboxes)