def get_dataset_info_pascal(image_dir, anno_dir, img_to_anno=None, split=None):
    images = find_images(image_dir)
    info_images = []
    info_annotations = []
    image_num = 0
    anno_num = 0
    image_paths = []
    images_ids = []
    for curr_image_path in tqdm.tqdm(images):

        if img_to_anno is not None:
            curr_anno_name = img_to_anno(fileName(curr_image_path))
        else:
            curr_anno_name = fileName(curr_image_path)

        curr_anno_path = os.path.join(anno_dir, curr_anno_name + '.xml')
        if os.path.isfile(curr_anno_path):
            image_paths.append(curr_image_path)
            images_ids.append(image_num)
            file = ET.parse(curr_anno_path)
            curr_anno_info = []
            for anno in file.iter('object'):
                curr_anno_info.append(
                    give_anno_info_pascal_coco(anno, image_num, anno_num))
                anno_num += 1
            image_num += 1
            info_annotations.append(curr_anno_info)

            if len(image_paths) == _PARALLEL_READS:
                with ThreadPoolExecutor() as executer:
                    results = executer.map(give_image_info_coco, image_paths,
                                           images_ids)
                info_images.extend(results)
                image_paths = []
                images_ids = []

    if len(image_paths) > 0:
        with ThreadPoolExecutor() as executer:
            results = executer.map(give_image_info_coco, image_paths,
                                   images_ids)
        info_images.extend(results)
        image_paths = []
        images_ids = []

    image_with_anno = list(zip(info_images, info_annotations))
    random.shuffle(image_with_anno)
    return make_coco_dataset(image_with_anno, split)
Example #2
0
def get_datapoint_yolo(image_path, anno_path, _CLASSES):
    dataset_info = {
        'image_name': [],
        'image_hash': [],
        'image_width': [],
        'image_height': [],
        'xmin': [],
        'ymin': [],
        'xmax': [],
        'ymax': [],
        'label_name': []
    }
    file = open(anno_path)
    num_annos = 0
    with Image.open(image_path) as im:
        image_shape = im.size
    _hash = hashFile(image_path)
    if anno_path is not None:
        for anno in file:
            anno = anno.split(' ')
            anno[0] = int(anno[0])
            anno[1] = float(anno[1])
            anno[2] = float(anno[2])
            anno[3] = float(anno[3])
            anno[4] = float(anno[4])
            label = anno[0]
            xmin = image_shape[0] * (anno[1] - anno[3] / 2.0)
            ymin = image_shape[1] * (anno[2] - anno[4] / 2.0)
            xmax = image_shape[0] * (anno[1] + anno[3] / 2.0)
            ymax = image_shape[1] * (anno[2] + anno[4] / 2.0)
            dataset_info['xmin'].append(xmin)
            dataset_info['ymin'].append(ymin)
            dataset_info['xmax'].append(xmax)
            dataset_info['ymax'].append(ymax)
            if len(_CLASSES) != 0:
                dataset_info['label_name'].append(_CLASSES[label])
            else:
                dataset_info['label_name'].append(label)
            num_annos += 1
    else:
        num_annos = 1
        dataset_info['xmin'].append(-1.0)
        dataset_info['ymin'].append(-1.0)
        dataset_info['xmax'].append(-1.0)
        dataset_info['ymax'].append(-1.0)
        dataset_info['label_name'].append('background_empty')

    dataset_info['image_name'].extend(
        repeat(fileName(image_path, ext=True), num_annos))
    dataset_info['image_width'].extend(repeat(image_shape[0], num_annos))
    dataset_info['image_height'].extend(repeat(image_shape[1], num_annos))
    dataset_info['image_hash'].extend(repeat(_hash, num_annos))
    return dataset_info
Example #3
0
def get_datapoint_pascal(image_path, anno_path):
    dataset_info = {
        'image_name': [],
        'image_hash': [],
        'image_width': [],
        'image_height': [],
        'xmin': [],
        'ymin': [],
        'xmax': [],
        'ymax': [],
        'label_name': []
    }
    num_annos = 0
    _hash = hashFile(image_path)
    with Image.open(image_path) as im:
        image_shape = im.size

    if anno_path is not None:
        file = ET.parse(anno_path)
        for anno in file.iter('object'):
            dataset_info['xmin'].append(
                float(anno.find('bndbox').find('xmin').text))
            dataset_info['ymin'].append(
                float(anno.find('bndbox').find('ymin').text))
            dataset_info['xmax'].append(
                float(anno.find('bndbox').find('xmax').text))
            dataset_info['ymax'].append(
                float(anno.find('bndbox').find('ymax').text))
            dataset_info['label_name'].append(anno.find('name').text)
            num_annos += 1
    else:
        num_annos = 1
        dataset_info['xmin'].append(-1.0)
        dataset_info['ymin'].append(-1.0)
        dataset_info['xmax'].append(-1.0)
        dataset_info['ymax'].append(-1.0)
        dataset_info['label_name'].append('background_empty')

    dataset_info['image_name'].extend(
        repeat(fileName(image_path, ext=True), num_annos))
    dataset_info['image_width'].extend(repeat(image_shape[0], num_annos))
    dataset_info['image_height'].extend(repeat(image_shape[1], num_annos))
    dataset_info['image_hash'].extend(repeat(_hash, num_annos))
    return dataset_info
def get_dataset_info_yolo(image_dir, anno_dir, img_to_anno=None, split=None):
    images = find_images(image_dir)
    info_images = []
    info_annotations = []
    image_num = 0
    anno_num = 0
    image_paths = []
    images_ids = []
    for curr_image_path in tqdm.tqdm(images):
        image_name = fileName(curr_image_path)
        if img_to_anno is not None:
            curr_anno_name = img_to_anno(image_name)
        else:
            curr_anno_name = image_name

        curr_anno_path = os.path.join(anno_dir, curr_anno_name + '.txt')
        if os.path.isfile(curr_anno_path):
            image_paths.append(curr_image_path)
            images_ids.append(image_num)
            info_annotations.append(
                give_anno_info_batch_yolo_coco(curr_anno_path, curr_image_path,
                                               image_num, anno_num))
            anno_num += len(info_annotations[-1])
            image_num += 1

            if len(image_paths) == _PARALLEL_READS:
                with ThreadPoolExecutor() as executer:
                    results = executer.map(give_image_info_coco, image_paths,
                                           images_ids)
                info_images.extend(results)
                image_paths = []
                images_ids = []

    if len(image_paths) > 0:
        with ThreadPoolExecutor() as executer:
            results = executer.map(give_image_info_coco, image_paths,
                                   images_ids)
        info_images.extend(results)
        image_paths = []
        images_ids = []

    image_with_anno = list(zip(info_images, info_annotations))
    # random.shuffle(image_with_anno)
    return make_coco_dataset(image_with_anno, split)
def give_image_info_coco(
        image_path,
        image_id,
        date_captured=datetime.datetime.utcnow().isoformat(' '),
        license_id=1,
        coco_url='',
        flickr_url=''):
    with Image.open(image_path) as im:
        image_size = im.size

    return {
        'id': image_id,
        'file_name': fileName(image_path, ext=True),
        'width': image_size[0],
        'height': image_size[1],
        'date_captured': date_captured,
        'license': license_id,
        'coco_url': coco_url,
        'flickr_url': flickr_url,
        'segmind_image_hash': hashFile(image_path)
    }
Example #6
0
def get_dataset_info_yolo(image_dir,
                          anno_dir,
                          names_file=None,
                          img_to_anno=None,
                          start_id=0,
                          split=None,
                          train_only=True,
                          *args,
                          **kwargs):
    _CLASSES = []
    if names_file is not None:
        with open(names_file) as file:
            _CLASSES = [x[:-1] for x in file]

    images = find_images(image_dir)
    dataset_info = {
        'image_id': [],
        'image_name': [],
        'image_hash': [],
        'image_width': [],
        'image_height': [],
        'xmin': [],
        'ymin': [],
        'xmax': [],
        'ymax': [],
        'label_name': [],
        'train_only': []
    }

    image_paths = []
    anno_paths = []
    subset = 'Test'
    if train_only:
        subset = 'Training'
    print(f'\nProcessing {subset} Dataset')

    for curr_image_path in tqdm.tqdm(images, ncols=100):
        if img_to_anno is not None:
            curr_anno_name = img_to_anno(fileName(curr_image_path))
        else:
            curr_anno_name = fileName(curr_image_path)
        curr_anno_path = os.path.join(anno_dir, curr_anno_name + '.txt')
        image_paths.append(curr_image_path)
        if os.path.isfile(curr_anno_path):
            anno_paths.append(curr_anno_path)
        else:
            anno_paths.append(None)
        if len(image_paths) == _PARALLEL_READS:
            with ThreadPoolExecutor() as executer:
                results = executer.map(get_datapoint_yolo, image_paths,
                                       anno_paths, repeat(_CLASSES))
            map_result_to_data(results, dataset_info)
            image_paths = []
            anno_paths = []
    if len(image_paths) > 0:
        with ThreadPoolExecutor() as executer:
            results = executer.map(get_datapoint_yolo, image_paths, anno_paths,
                                   repeat(_CLASSES))
        map_result_to_data(results, dataset_info)
        image_paths = []
        anno_paths = []
    dataset_info['train_only'] = repeat(train_only,
                                        len(dataset_info['image_name']))
    dataset_info['image_id'] = repeat(0, len(dataset_info['image_name']))
    dataset_df = pd.DataFrame.from_dict(dataset_info)
    grouped = [df for _, df in dataset_df.groupby('image_name')]
    total_images = len(grouped)
    random.shuffle(grouped)
    dataset_df = pd.concat(grouped).reset_index(drop=True)
    dataset_df['image_id'] = dataset_df.groupby('image_name',
                                                sort=False).ngroup() + start_id
    if split != 0 and split is not None:
        split_len = split * total_images
        dataset_df.loc[dataset_df['image_id'] < start_id + split_len,
                       'train_only'] = False
    return dataset_df