Beispiel #1
0
def parse_keypoints(content, outdir):
    keypoints = dict(
        zip(range(1,
                  len(content['categories'][0]['keypoints']) + 1),
            content['categories'][0]['keypoints']))
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(
        cytoolz.merge,
        cytoolz.join('id', content['images'], 'image_id',
                     content['annotations']))
    # convert category name to person
    for keypoint in merged_info_list:
        keypoint['category_id'] = "person"
    # group by filename to pool all bbox and keypoint in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml")
        anno_tree = keypoints2xml_base(groups[0])
        for group in groups:
            anno_tree = keypoints2xml_object(group,
                                             anno_tree,
                                             keypoints,
                                             bbox_type="xyxy")
        doc = etree.ElementTree(anno_tree)
        doc.write(open(filename, "w"), pretty_print=True)
        print("Formating keypoints xml file {} done!".format(name))
Beispiel #2
0
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(
        cytoolz.merge,
        cytoolz.join('id', content['images'], 'image_id',
                     content['annotations']))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            if group[u'iscrowd'] == 0:
                filenames.append(
                    os.path.join(outdir, re.sub(" ", "_",
                                                group['category_id']),
                                 os.path.splitext(name)[0] + ".xml"))
                anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print "Formating instance xml file {} done!".format(name)
def extract_urls(args):
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    content = json.load(open(args.anno_file, 'r'))
    merge_info_list = map(
        cytoolz.merge,
        cytoolz.join('id', content['images'], 'image_id',
                     content['annotations']))
    if args.type == 'instance':
        outfiles = {
            category['id']: os.path.join(
                args.output_dir,
                re.sub(" ", "_", category['name']) + "_image_urls.txt")
            for category in content['categories']
        }
        for info in merge_info_list:
            print "Saving file name: ", info['file_name']
            with open(outfiles[info['category_id']], "a") as f:
                f.write(
                    os.path.splitext(info['file_name'])[0] + " " +
                    info['coco_url'] + "\n")
            f.close()
        print "Exporting coco image urls for instance done!"
    else:
        outfile = os.path.join(args.output_dir,
                               "person_keypoints_imag_urls.txt")
        url_dict = {
            info['file_name']: info['coco_url']
            for info in merge_info_list
        }
        with open(outfile, "w") as f:
            for name, url in url_dict.items():
                f.write(os.path.splitext(name)[0] + " " + url + "\n")
        f.close()
        print "Exporting coco image urls for keypoints done!"
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(
        map(
            cytoolz.merge,
            cytoolz.join('id', content['images'], 'image_id',
                         content['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        multiple = groups[0]['url'].split('/')[-4]
        if multiple != 'multiple':
            continue
        subfolder = groups[0]['url'].split('/')[-2]
        folder = groups[0]['url'].split('/')[-3]
        if not os.path.exists(os.path.join(outdir, folder)):
            os.mkdir(os.path.join(outdir, folder))
        if not os.path.exists(os.path.join(outdir, folder, subfolder)):
            os.mkdir(os.path.join(outdir, folder, subfolder))

        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filenames.append(
                os.path.join(outdir, folder, subfolder,
                             os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print("Formating instance xml file {} done!".format(name))
Beispiel #5
0
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))

    filtered_info_list = []

    # convert category id to name && get target object info
    for instance in merged_info_list:
        cat_name = categories[instance['category_id']]
        filepath = os.path.join(dataDir, instance['file_name'])
        if cat_name in target_classes:

            # 过滤对于voc不合格的照片
            origimg = Image.open(filepath)
            if len(np.asarray(origimg).shape) != 3:
                continue
            instance['category_id'] = cat_name
            filtered_info_list.append(instance)

    # ##  控制每个类别的数量
    # target_image_list = []
    # for img_info in filtered_info_list:
    #     if img_info['category_id'] == 'bicycle':
    #         target_image_list.append(img_info)
    #     elif len(target_image_list) < total_num:
    #         target_image_list.append(img_info)


    # group by filename to pool all bbox in same file
    target_images = []
    for name, groups in cytoolz.groupby('file_name', filtered_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            # filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
            #                               'annotations', os.path.splitext(name)[0] + ".xml"))

            filenames.append(os.path.join(outdir, 'annotations', os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)

        print("Formating instance xml file {} done!".format(name))

        # copy target image file to outdir
        if name not in target_images:
            img_path = os.path.join(dataDir, name)
            # target_dir = os.path.join(output_dir, re.sub(" ", "_", group['category_id']), 'images', name)
            target_dir = os.path.join(output_dir, 'images', name)

            shutil.copyfile(img_path, target_dir)
            target_images.append(name)

        # if len(target_images) > total_num:
        #     break

    print(len(target_images))
Beispiel #6
0
def broadcast_dimensions(argpairs,
                         numblocks,
                         sentinels=(1, (1, )),
                         consolidate=None):
    """ Find block dimensions from arguments

    Parameters
    ----------
    argpairs: iterable
        name, ijk index pairs
    numblocks: dict
        maps {name: number of blocks}
    sentinels: iterable (optional)
        values for singleton dimensions
    consolidate: func (optional)
        use this to reduce each set of common blocks into a smaller set

    Examples
    --------
    >>> argpairs = [('x', 'ij'), ('y', 'ji')]
    >>> numblocks = {'x': (2, 3), 'y': (3, 2)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Supports numpy broadcasting rules

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> numblocks = {'x': (2, 1), 'y': (1, 3)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Works in other contexts too

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))}
    >>> broadcast_dimensions(argpairs, d)
    {'i': 'Hello', 'j': (2, 3)}
    """
    # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
    argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
    L = toolz.concat([
        zip(inds, dims) for (x, inds), (x, dims) in toolz.join(
            toolz.first, argpairs2, toolz.first, numblocks.items())
    ])

    g = toolz.groupby(0, L)
    g = dict((k, set([d for i, d in v])) for k, v in g.items())

    g2 = dict(
        (k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items())

    if consolidate:
        return toolz.valmap(consolidate, g2)

    if g2 and not set(map(len, g2.values())) == set([1]):
        raise ValueError("Shapes do not align %s" % g)

    return toolz.valmap(toolz.first, g2)
Beispiel #7
0
def broadcast_dimensions(argpairs, numblocks, sentinels=(1, (1,)),
                         consolidate=None):
    """ Find block dimensions from arguments

    Parameters
    ----------
    argpairs: iterable
        name, ijk index pairs
    numblocks: dict
        maps {name: number of blocks}
    sentinels: iterable (optional)
        values for singleton dimensions
    consolidate: func (optional)
        use this to reduce each set of common blocks into a smaller set

    Examples
    --------
    >>> argpairs = [('x', 'ij'), ('y', 'ji')]
    >>> numblocks = {'x': (2, 3), 'y': (3, 2)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Supports numpy broadcasting rules

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> numblocks = {'x': (2, 1), 'y': (1, 3)}
    >>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Works in other contexts too

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))}
    >>> broadcast_dimensions(argpairs, d)
    {'i': 'Hello', 'j': (2, 3)}
    """
    # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
    argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
    L = toolz.concat([zip(inds, dims) for (x, inds), (x, dims)
                     in toolz.join(toolz.first, argpairs2, toolz.first, numblocks.items())])

    g = toolz.groupby(0, L)
    g = dict((k, set([d for i, d in v])) for k, v in g.items())

    g2 = dict((k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items())

    if consolidate:
        return toolz.valmap(consolidate, g2)

    if g2 and not set(map(len, g2.values())) == set([1]):
        raise ValueError("Shapes do not align %s" % g)

    return toolz.valmap(toolz.first, g2)
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(map(cytoolz.merge, cytoolz.join(
        'id', content['images'], 'image_id', content['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        for group in groups:
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml")
        etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print("Formating instance xml file {} done!".format(name))
def parse_keypoints(content, outdir):
    keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    # convert category name to person
    for keypoint in merged_info_list:
        keypoint['category_id'] = "person"
    # group by filename to pool all bbox and keypoint in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
        anno_tree = keypoints2xml_base(groups[0])
        for group in groups:
            anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
        doc = etree.ElementTree(anno_tree)
        doc.write(open(filename, "w"), pretty_print=True)
        print "Formating keypoints xml file {} done!".format(name)
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
                                    os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print "Formating instance xml file {} done!".format(name)
Beispiel #11
0
def coco2xml_convert(anno_file,
                     output_dir='./result',
                     bbox_type='xywh',
                     folder_split=False):
    """converting COCO format to xml format.

        Args:
            anno_file (str): Path to annotations of data.
            output_dir (str): Path to save folder.
            bbox_type (str): Bbox format. Default: 'xywh'.
            folder_split (bool): Whether to store file by category.
        """

    os.makedirs(output_dir, exist_ok=True)
    annotation = json.load(open(anno_file, 'r'))
    categories = {d['id']: d['name'] for d in annotation['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(map(cytoolz.merge,
                                cytoolz.join('id', annotation['images'],
                                             'image_id',
                                             annotation['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        filenames = []
        for group in groups:
            if folder_split:
                subdir = os.path.join(output_dir, group['category_id'])
                os.makedirs(subdir, exist_ok=True)
                filenames.append(
                    os.path.join(subdir, os.path.splitext(name)[0] + '.xml'))
            else:
                filenames = [
                    os.path.join(output_dir,
                                 os.path.splitext(name)[0] + '.xml')]

            anno_tree.append(instance2xml_bbox(group, bbox_type=bbox_type))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print(f'Formating instance xml file {name} done!')
def extract_urls(args):
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    content = json.load(open(args.anno_file, 'r'))
    merge_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    if args.type == 'instance':
        outfiles = {category['id']: os.path.join(args.output_dir, re.sub(" ", "_", category['name'])+"_image_urls.txt") for category in content['categories']}
        for info in merge_info_list:
            print "Saving file name: ", info['file_name']
            with open(outfiles[info['category_id']], "a") as f:
                f.write(os.path.splitext(info['file_name'])[0]+" "+info['coco_url']+"\n")
            f.close()
        print "Exporting coco image urls for instance done!"
    else:
        outfile = os.path.join(args.output_dir, "person_keypoints_imag_urls.txt")
        url_dict = {info['file_name']: info['coco_url'] for info in merge_info_list}
        with open(outfile, "w") as f:
            for name, url in url_dict.items():
                f.write(os.path.splitext(name)[0]+" "+url+"\n")
        f.close()
        print "Exporting coco image urls for keypoints done!"
Beispiel #13
0
def parse_instance(content, outdir):
    """
    Version that does not create replicate files for each category.
    """
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(
        map(
            cytoolz.merge,
            cytoolz.join('id', content['images'], 'image_id',
                         content['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filename = os.path.join(outdir, os.path.splitext(name)[0] + ".xml")
        for group in groups:
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        etree.ElementTree(anno_tree).write(filename, pretty_print=True)
Beispiel #14
0
def left_join2(lseq, rseq, key):
    key_fn = operator.itemgetter(*key)
    return (sorted(list(
        dict(l, **r) for l, r in cytoolz.join(
            key_fn, lseq, key_fn, rseq, right_default={})),
                   key=key_fn))
binop = lambda total, acc: total + acc[2]
print(reduceby(get(3), binop, accounts), 0)  # {'M': 400, 'F': 400}

# SEMI-STREAMING `JOIN`
# We register multiple datasets together with `join`.
# Consider a second dataset storing addresses by ID
addresses = [
    (1, '123 Main Street'),  # id, address
    (2, '5 Adams Way'),
    (5, '34 Rue St Michel')
]

# SELECT accounts.name, addresses.address FROM accounts, addresses
# WHERE accounts.id = addresses.id;

result = join(first, accounts, first, addresses)

for ((id, name, bal, gender), (id, address)) in result:
    print((id, name, bal, gender), (id, address))
    print((name, address))

# toolz.itertoolz.join(leftkey, leftseq, rightkey, rightseq, left_default='__no__default__', right_default='__no__default__')
# This is a semi-streaming operation. The LEFT sequence is fully evaluated
# and placed into memory. The RIGHT sequence is evaluated lazily and so
# can be arbitrarily large.


# `JOIN` ON ARBITRARY FUNCTIONS / DATA
def isodd(x):
    return x % 2 == 1
Beispiel #16
0
    'year': 1,
    'acct': 6,
    'y': 'y6'
}, {
    'year': 1,
    'acct': 7,
    'y': 'y7'
}]

key = ('year', 'acct')

key_fn = operator.itemgetter(*key)

list(dict(x, **y) for y in ly for x in lx if key_fn(x) == key_fn(y))

list(dict(x, **y) for x, y in cytoolz.join(key_fn, lx, key_fn, ly))

list(
    dict(x, **y) for x, y in itertools.product(lx, ly)
    if key_fn(x) == key_fn(y))

list(
    dict(_[0], **_[1]) for _ in cytoolz.groupby(key_fn, lx + ly).values()
    if len(_) > 1)

[cytoolz.merge(x, y) for x in lx for y in ly if key_fn(x) == key_fn(y)]

# OUTPUT:
# [{'acct': 3, 'x': 'x3', 'y': 'y3', 'year': 1},
#  {'acct': 4, 'x': 'x4', 'y': 'y4', 'year': 1},
#  {'acct': 5, 'x': 'x5', 'y': 'y5', 'year': 1}]
Beispiel #17
0
def parse_instance(content, outdir, download_images=False):
    categories = {d['id']: d['name'] for d in content['categories']}

    # EDITED - make sure image_id is of type int (and not of type string)
    for i in range(len(content['annotations'])):
        content['annotations'][i]['image_id'] = int(
            content['annotations'][i]['image_id'])

    # EDITED - save all annotation .xml files into same sub-directory
    anno_dir = os.path.join(outdir, "annotations")
    if not os.path.exists(anno_dir):
        os.makedirs(anno_dir)

    # EDITED - download images
    if download_images:
        im_dir = os.path.join(outdir, "images")
        if not os.path.exists(im_dir):
            os.makedirs(im_dir)

        for index, obj in enumerate(content['images']):
            print(
                f"Downloading image {index} of {len(content['images'])} from: {obj['coco_url']}"
            )

            # Update 'filename' field to be a (local) filename and not a url
            im_local_filename = os.path.splitext(
                os.path.basename(obj['file_name']))[0] + ".jpg"
            obj['file_name'] = im_local_filename

            # download image
            dst_path = os.path.join(im_dir, im_local_filename)
            urllib.request.urlretrieve(obj['coco_url'], dst_path)

    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(
        map(
            cytoolz.merge,
            cytoolz.join('id', content['images'], 'image_id',
                         content['annotations'])))

    # convert category id to name
    for instance in merged_info_list:
        assert 'category_id' in instance, f"WARNING: annotation error: image {instance['file_name']} has a rectangle without a 'category_id' field."
        instance['category_id'] = categories[instance['category_id']]

    # group by filename to pool all bbox in same file
    img_filenames = {}
    names_groups = cytoolz.groupby('file_name', merged_info_list).items()
    for index, (name, groups) in enumerate(names_groups):
        print(
            f"Converting annotations for image {index} of {len(names_groups)}: {name}"
        )
        assert not name.lower().startswith(
            ("http:", "https:")
        ), "Image seems to be a url rather than local. Need to set 'download_images' = False"

        anno_tree = instance2xml_base(groups[0], download_images)
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filename = os.path.splitext(name)[0] + ".xml"

            # EDITED - save all annotations in single folder, rather than separate folders for each object
            #filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), filename))
            filenames.append(os.path.join(anno_dir, filename))

            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))

        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
Beispiel #18
0
 def join(self, rightseq, leftkey, rightkey):
     return self.__class__(cytoolz.join(leftkey, self, rightkey, rightseq))
Beispiel #19
0
def keyjoin(leftkey, leftseq, rightkey, rightseq):
    return starmap(merge, join(leftkey, leftseq, rightkey, rightseq))
Beispiel #20
0
def key_join(left_key, left_seq, right_key, right_seq):
    return starmap(merge, join(left_key, left_seq, right_key, right_seq))