Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'dataset_name',
        help='A short string representing the dataset to be used as a partition key in MegaDB')
    parser.add_argument(
        '--image_db',
        help='Path to the json containing the image DB in CCT format')
    parser.add_argument(
        '--bbox_db',
        help='Path to the json containing the bbox DB in CCT format')
    parser.add_argument(
        '--docs',
        help='Embedded CCT format json to use instead of image_db or bbox_db')
    parser.add_argument(
        '--partial_mega_db',
        required=True,
        help='Path to store the resulting json')
    args = parser.parse_args()

    assert len(args.dataset_name) > 0, 'dataset_name cannot be an empty string'

    if args.image_db:
        assert os.path.exists(args.image_db), 'image_db file path provided does not point to a file'
    if args.bbox_db:
        assert os.path.exists(args.bbox_db), 'bbox_db file path provided does not point to a file'

    docs = make_cct_embedded(args.image_db, args.bbox_db)

    sequences = process_sequences(docs, args.dataset_name)

    sequences_schema_check.sequences_schema_check(sequences)

    write_json(args.partial_mega_db, sequences)
Esempio n. 2
0
def megadb_to_cct(dataset_name, mega_db, output_path, bbox_only):

    mega_db = [i for i in mega_db if i['dataset'] == dataset_name]
    assert len(mega_db) > 0, 'There are no entries from the dataset {}'.format(dataset_name)
    for i in mega_db:
        del i['dataset']  # all remaining fields will be added to the CCT database
    print('Number of entries belonging to dataset {}: {}'.format(dataset_name, len(mega_db)))

    cct_images, cct_annotations = break_into_images_annotations(mega_db, bbox_only)

    # consolidate categories
    category_names = set()
    for anno in cct_annotations:
        category_names.add(anno['category_name'])

    cat_name_to_id = {
        'empty': 0  # always set empty to 0 even for dataset without 'empty' labeled images
    }

    if bbox_only:
        cat_name_to_id['animal'] = 1
        cat_name_to_id['person'] = 2
        cat_name_to_id['group'] = 3
        cat_name_to_id['vehicle'] = 4

    for cat in category_names:
        if cat not in cat_name_to_id:
            cat_name_to_id[cat] = len(cat_name_to_id)

    for anno in cct_annotations:
        anno['category_id'] = cat_name_to_id[anno['category_name']]
        del anno['category_name']

    cct_categories = []
    for name, num_id in cat_name_to_id.items():
        cct_categories.append({
            'id': num_id,
            'name': name
        })

    print('Final CCT DB has {} image entries, and {} annotation entries.'.format(len(cct_images), len(cct_annotations)))
    cct_db = {
        'info': {
            'version': str(datetime.now()),
            'date_created': str(datetime.today().date()),
            'description': ''  # to be filled by main()
        },
        'images': cct_images,
        'categories': cct_categories,
        'annotations': cct_annotations
    }
    cct_db = CameraTrapJsonUtils.order_db_keys(cct_db)

    cct_db['info']['description'] = 'COCO Camera Traps database converted from sequences in dataset {}'.format(
        dataset_name)
    print('Writing to output file...')
    write_json(output_path, cct_db)
    print('Done!')
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'dataset_name',
        help=
        'The name of the dataset; only entries from this dataset will be used')
    parser.add_argument('mega_db_seqs',
                        help='A json containing a list of sequence objects')
    parser.add_argument(
        'out_file',
        help='Path to store the resulting json to input to megadb_to_cct.py')
    parser.add_argument(
        '--ncores',
        type=int,
        default=None,
        help=
        'Number of cores to use when downloading images to read their dimensions'
    )
    args = parser.parse_args()

    assert len(args.dataset_name) > 0, 'dataset_name cannot be an empty string'
    assert os.path.exists(
        args.mega_db_seqs), 'File at mega_db path does not exist'
    assert args.out_file.endswith(
        '.json'), 'out_cct_db path needs to end in .json'
    assert args.out_file != args.mega_db_seqs
    assert 'COSMOS_ENDPOINT' in os.environ and 'COSMOS_KEY' in os.environ

    print('Loading entries...')
    with open(args.mega_db_seqs) as f:
        mega_db_entries = json.load(f)
    print('Number of entries in the mega_db: {}'.format(len(mega_db_entries)))

    megadb_utils = MegadbUtils()
    datasets_table = megadb_utils.get_datasets_table()

    start_time = time.time()

    updated_seqs = get_image_dims(mega_db_entries, args.dataset_name,
                                  datasets_table, args.ncores)
    write_json(args.out_file, updated_seqs)

    elapsed = time.time() - start_time
    print('Time elapsed: {}'.format(humanfriendly.format_timespan(elapsed)))