def main(cfg):
    '''Main function. Takes config as input.
    '''

    # Back up config
    cfg_orig = deepcopy(cfg)
    method = cfg_orig.method_dict

    # Add config options to the dict
    master_dict = OrderedDict()
    master_dict['config'] = method

    # Add date
    master_dict['properties'] = OrderedDict()
    master_dict['properties'][
        'processing_date'] = pack_helper.get_current_date()
    print('Adding processing date: {}'.format(
        master_dict['properties']['processing_date']))

    # Add submission flag
    master_dict['properties']['is_submission'] = cfg.is_submission
    print('Flagging as user submission: {}'.format(cfg.is_submission))

    # Add descriptor properties
    cfg_desc = deepcopy(cfg_orig)
    cfg_desc.dataset = 'phototourism'
    cfg_desc.scene = 'british_museum'
    try:
        descriptors_dict = load_h5(get_desc_file(cfg_desc))
        desc_type, desc_size, desc_nbytes = pack_helper.get_descriptor_properties(
            cfg_desc, descriptors_dict)
    except Exception:
        desc_type = 'none'
        desc_size = 0
        desc_nbytes = 0
    master_dict['properties']['descriptor_type'] = desc_type
    master_dict['properties']['descriptor_size'] = desc_size
    master_dict['properties']['descriptor_nbytes'] = desc_nbytes
    print('Adding descriptor properties: {} {} ({} bytes)'.format(
        master_dict['properties']['descriptor_size'],
        master_dict['properties']['descriptor_type'],
        master_dict['properties']['descriptor_nbytes']))

    deprecated_images_all = load_json(cfg.json_deprecated_images)
    if cfg.dataset in deprecated_images_all and cfg.scene in deprecated_images_all[
            cfg.dataset]:
        deprecated_images = deprecated_images_all[cfg.dataset][cfg.scene]
    else:
        deprecated_images = []

    # Read data and splits
    DATASET_LIST = ['phototourism', 'pragueparks', 'googleurban']
    for dataset in DATASET_LIST:
        # Skip if not in config
        if 'config_{}_stereo'.format(
                dataset) not in method and 'config_{}_multiview'.format(
                    dataset) not in method:
            continue

        # Create empty dictionary
        master_dict[dataset] = OrderedDict()
        res_dict = OrderedDict()
        master_dict[dataset]['results'] = res_dict

        # Save number of runs
        master_dict[dataset]['num_runs_stereo'] = getattr(
            cfg_orig, 'num_runs_{}_stereo'.format(cfg_orig.subset))
        master_dict[dataset]['num_runs_multiview'] = getattr(
            cfg_orig, 'num_runs_{}_multiview'.format(cfg_orig.subset))

        # Load data config
        scene_list = load_json(
            getattr(cfg_orig, 'scenes_{}_{}'.format(dataset, cfg_orig.subset)))
        bag_size_json = load_json(
            getattr(cfg_orig, 'splits_{}_{}'.format(dataset, cfg_orig.subset)))
        bag_size_list = [b['bag_size'] for b in bag_size_json]
        bag_size_num = [b['num_in_bag'] for b in bag_size_json]
        bag_size_str = ['{}bag'.format(b) for b in bag_size_list]

        # Create empty dicts
        for scene in ['allseq'] + scene_list:
            res_dict[scene] = OrderedDict()
            for task in ['stereo', 'multiview']:
                res_dict[scene][task] = OrderedDict()
                res_dict[scene][task]['run_avg'] = OrderedDict()
                if task == 'multiview':
                    for bag in bag_size_str + ['bag_avg']:
                        res_dict[scene]['multiview']['run_avg'][
                            bag] = OrderedDict()

        # Stereo -- multiple runs
        t = time()
        cur_key = 'config_{}_stereo'.format(dataset)
        if cfg_orig.eval_stereo and cur_key in method and method[cur_key]:
            num_runs = getattr(cfg_orig,
                               'num_runs_{}_stereo'.format(cfg_orig.subset))
            cfg = deepcopy(cfg_orig)
            cfg.dataset = dataset
            cfg.task = 'stereo'
            for scene in scene_list:
                cfg.scene = scene

                res_dict[scene]['stereo']['run_avg'] = OrderedDict()
                for run in range(num_runs):
                    res_dict[scene]['stereo']['run_{}'.format(
                        run)] = OrderedDict()

                # Create list of things to gather
                metric_list = []
                metric_list += ['avg_num_keypoints']
                # metric_list += ['matching_scores_epipolar']
                metric_list += ['num_inliers']
                if dataset != 'googleurban':
                    metric_list += ['matching_scores_depth_projection']
                    metric_list += ['repeatability']
                metric_list += ['qt_auc']
                metric_list += ['timings']

                for run in range(num_runs):
                    # Compute and pack results
                    cfg.run = run
                    cur_dict = res_dict[scene]['stereo']['run_{}'.format(run)]
                    for metric in metric_list:
                        t_cur = time()
                        getattr(pack_helper,
                                'compute_' + metric)(cur_dict,
                                                     deprecated_images, cfg)
                        print(
                            ' -- Packing "{}"/"{}"/stereo, run: {}/{}, metric: {} [{:.02f} s]'
                            .format(dataset, scene, run + 1, num_runs, metric,
                                    time() - t_cur))

            # Compute average across runs, for stereo
            t_cur = time()
            pack_helper.average_stereo_over_runs(cfg, res_dict, num_runs)
            print(
                ' -- Packing "{}"/stereo: averaging over {} run(s) [{:.02f} s]'
                .format(dataset, num_runs,
                        time() - t_cur))

            # Compute average across scenes, for stereo
            t_cur = time()
            pack_helper.average_stereo_over_scenes(cfg, res_dict, num_runs)
            print(
                ' -- Packing "{}"/stereo: averaging over {} scene(s) [{:.02f} s]'
                .format(dataset, len(scene_list),
                        time() - t_cur))

            print(' -- Finished packing stereo in {:.01f} sec.'.format(time() -
                                                                       t))
        else:
            print('Skipping "{}/stereo"'.format(dataset))

        # Multiview -- multiple runs
        t = time()
        cur_key = 'config_{}_multiview'.format(dataset)
        if cfg_orig.eval_multiview and cur_key in method and method[cur_key]:
            num_runs = getattr(cfg, 'num_runs_{}_multiview'.format(cfg.subset))
            cfg = deepcopy(cfg_orig)
            cfg.dataset = dataset
            cfg.task = 'multiview'
            for scene in scene_list:
                cfg.scene = scene

                for run in ['run_avg'
                            ] + ['run_{}'.format(f) for f in range(num_runs)]:
                    res_dict[scene]['multiview'][run] = OrderedDict()
                    for bags_label in ['bag_avg'] + bag_size_str:
                        res_dict[scene]['multiview'][run][
                            bags_label] = OrderedDict()

                # Create list of things to gather
                metric_list = []
                metric_list += ['avg_num_keypoints']
                metric_list += ['num_input_matches']
                metric_list += ['qt_auc_colmap']
                metric_list += ['ATE']
                metric_list += ['colmap_stats']

                for run in range(num_runs):
                    for bag_size in bag_size_list:
                        # Compute and pack results
                        cfg.run = run
                        cfg.bag_size = bag_size
                        cur_dict = res_dict[scene]['multiview']
                        for metric in metric_list:
                            t_cur = time()
                            getattr(pack_helper, 'compute_' + metric)(
                                cur_dict['run_{}'.format(run)]['{}bag'.format(
                                    bag_size)], deprecated_images, cfg)
                            print(
                                ' -- Packing "{}"/"{}"/multiview, run {}/{}, "{}", metric: {} [{:.02f} s]'
                                .format(dataset, scene, run + 1, num_runs,
                                        '{}bag'.format(bag_size), metric,
                                        time() - t_cur))

                        # Compute average across bags
                        any_key = random.choice([
                            key for key in cur_dict['run_{}'.format(run)]
                            if ('bag' in key and key != 'bag_avg')
                        ])
                        for metric in cur_dict['run_{}'.format(run)][any_key]:
                            pack_helper.average_multiview_over_bags(
                                cfg, cur_dict['run_{}'.format(run)],
                                bag_size_list)

            # Compute average across runs, for multiview
            t_cur = time()
            pack_helper.average_multiview_over_runs(cfg, res_dict, num_runs,
                                                    bag_size_str + ['bag_avg'])
            print(
                ' -- Packing "{}"/multiview: averaging over {} run(s) [{:.02f} s]'
                .format(dataset, num_runs,
                        time() - t_cur))

            # Compute average across scenes, for multiview
            t_cur = time()
            pack_helper.average_multiview_over_scenes(
                cfg, res_dict, num_runs, ['bag_avg'] + bag_size_str)
            print(
                ' -- Packing "{}"/multiview: averaging over {} scene(s) [{:.02f} s]'
                .format(dataset, len(scene_list),
                        time() - t_cur))

            print(' -- Finished packing multiview in {:.01f} sec.'.format(
                time() - t))
        else:
            print('Skipping "{}/multiview"'.format(dataset))

    # Add a unique identifier (equivalent to "submission id" in previous versions.
    if cfg.is_challenge:
        master_dict['uuid'] = get_uuid(cfg)

    # Dump packed result
    if not os.path.exists(cfg.path_pack):
        os.makedirs(cfg.path_pack)
    json_dump_file = os.path.join(
        cfg.path_pack,
        '{}.json'.format(cfg.method_dict['config_common']['json_label']))

    print(' -- Saving to: "{}"'.format(json_dump_file))
    with open(json_dump_file, 'w') as outfile:
        json.dump(master_dict, outfile, indent=2)

    # Add a short results summary.
    print()
    print('-- SUMMARY --')
    print('Subset: "{}"'.format(cfg.subset))
    for dataset in DATASET_LIST:
        print()
        print('Dataset "{}"'.format(dataset))
        if dataset in master_dict:
            # Stereo
            if 'stereo' in master_dict[dataset]['results'][
                    'allseq'] and cfg.eval_stereo:
                print('-- Stereo mAA(10 deg): {:.05f}'.format(
                    master_dict[dataset]['results']['allseq']['stereo']
                    ['run_avg']['qt_auc_10_th_0.1']['mean']))
                for scene in master_dict[dataset]['results']:
                    if scene != 'allseq':
                        print('---- Scene "{}" -> Stereo mAA(10 deg): {:.05f}'.
                              format(
                                  scene, master_dict[dataset]['results'][scene]
                                  ['stereo']['run_avg']['qt_auc_10_th_0.1']
                                  ['mean']))
            if 'multiview' in master_dict[dataset]['results'][
                    'allseq'] and cfg.eval_multiview:
                print('-- Multiview mAA(10 deg): {:.05f}'.format(
                    master_dict[dataset]['results']['allseq']['multiview']
                    ['run_avg']['bag_avg']['qt_auc_colmap_10']['mean']))
                for scene in master_dict[dataset]['results']:
                    if scene != 'allseq':
                        print(
                            '---- Scene "{}" -> Multiview mAA(10 deg): {:.05f}'
                            .format(
                                scene, master_dict[dataset]['results'][scene]
                                ['multiview']['run_avg']['bag_avg']
                                ['qt_auc_colmap_10']['mean']))
Esempio n. 2
0
def main(cfg):
    '''Main function. Takes config as input.
    '''

    # Back up config
    cfg_orig = deepcopy(cfg)
    method = cfg_orig.method_dict

    # Add config options to the dict
    master_dict = OrderedDict()
    master_dict['config'] = method

    # Add date
    master_dict['properties'] = OrderedDict()
    master_dict['properties'][
        'processing_date'] = pack_helper.get_current_date()
    print('Adding processing date: {}'.format(
        master_dict['properties']['processing_date']))

    # Add descriptor properties
    cfg_desc = deepcopy(cfg_orig)
    cfg_desc.dataset = 'phototourism'
    cfg_desc.scene = 'british_museum'
    try:
        descriptors_dict = load_h5(get_desc_file(cfg_desc))
        desc_type, desc_size, desc_nbytes = pack_helper.get_descriptor_properties(
            cfg_desc, descriptors_dict)
    except Exception:
        desc_type = 'none'
        desc_size = 0
        desc_nbytes = 0
    master_dict['properties']['descriptor_type'] = desc_type
    master_dict['properties']['descriptor_size'] = desc_size
    master_dict['properties']['descriptor_nbytes'] = desc_nbytes
    print('Adding descriptor properties: {} {} ({} bytes)'.format(
        master_dict['properties']['descriptor_size'],
        master_dict['properties']['descriptor_type'],
        master_dict['properties']['descriptor_nbytes']))

    # Read data and splits
    for dataset in ['phototourism']:
        setattr(cfg_orig, 'scenes_{}_{}'.format(dataset, cfg_orig.subset),
                './json/data/{}_{}.json'.format(dataset, cfg_orig.subset))
        setattr(cfg_orig, 'splits_{}_{}'.format(dataset, cfg_orig.subset),
                './json/bag_size/{}_{}.json'.format(dataset, cfg_orig.subset))

        # Create empty dictionary
        master_dict[dataset] = OrderedDict()
        res_dict = OrderedDict()
        master_dict[dataset]['results'] = res_dict

        # Save number of runs
        master_dict[dataset]['num_runs_stereo'] = getattr(
            cfg_orig, 'num_runs_{}_stereo'.format(cfg_orig.subset))
        master_dict[dataset]['num_runs_multiview'] = getattr(
            cfg_orig, 'num_runs_{}_multiview'.format(cfg_orig.subset))

        # Load data config
        scene_list = load_json(
            getattr(cfg_orig, 'scenes_{}_{}'.format(dataset, cfg_orig.subset)))
        bag_size_json = load_json(
            getattr(cfg_orig, 'splits_{}_{}'.format(dataset, cfg_orig.subset)))
        bag_size_list = [b['bag_size'] for b in bag_size_json]
        bag_size_num = [b['num_in_bag'] for b in bag_size_json]
        bag_size_str = ['{}bag'.format(b) for b in bag_size_list]

        # Create empty dicts
        for scene in ['allseq'] + scene_list:
            res_dict[scene] = OrderedDict()
            for task in ['stereo', 'multiview', 'relocalization']:
                res_dict[scene][task] = OrderedDict()
                res_dict[scene][task]['run_avg'] = OrderedDict()
                if task == 'multiview':
                    for bag in bag_size_str + ['bag_avg']:
                        res_dict[scene]['multiview']['run_avg'][
                            bag] = OrderedDict()

        # Stereo -- multiple runs
        t = time()
        cur_key = 'config_{}_stereo'.format(dataset)
        if cfg_orig.eval_stereo and cur_key in method and method[cur_key]:
            num_runs = getattr(cfg_orig,
                               'num_runs_{}_stereo'.format(cfg_orig.subset))
            cfg = deepcopy(cfg_orig)
            cfg.dataset = dataset
            cfg.task = 'stereo'
            for scene in scene_list:
                cfg.scene = scene

                res_dict[scene]['stereo']['run_avg'] = OrderedDict()
                for run in range(num_runs):
                    res_dict[scene]['stereo']['run_{}'.format(
                        run)] = OrderedDict()

                # Create list of things to gather
                metric_list = []
                metric_list += ['avg_num_keypoints']
                # metric_list += ['matching_scores_epipolar']
                metric_list += ['num_inliers']
                metric_list += ['matching_scores_depth_projection']
                metric_list += ['repeatability']
                metric_list += ['qt_auc']
                metric_list += ['timings']

                for run in range(num_runs):
                    # Compute and pack results
                    cfg.run = run
                    cur_dict = res_dict[scene]['stereo']['run_{}'.format(run)]
                    for metric in metric_list:
                        t_cur = time()
                        getattr(pack_helper, 'compute_' + metric)(cur_dict,
                                                                  cfg)
                        print(
                            ' -- Packing "{}"/"{}"/stereo, run: {}/{}, metric: {} [{:.02f} s]'
                            .format(dataset, scene, run + 1, num_runs, metric,
                                    time() - t_cur))

            # Compute average across runs, for stereo
            t_cur = time()
            pack_helper.average_stereo_over_runs(cfg, res_dict, num_runs)
            print(
                ' -- Packing "{}"/stereo: averaging over {} run(s) [{:.02f} s]'
                .format(dataset, num_runs,
                        time() - t_cur))

            # Compute average across scenes, for stereo
            t_cur = time()
            pack_helper.average_stereo_over_scenes(cfg, res_dict, num_runs)
            print(
                ' -- Packing "{}"/stereo: averaging over {} scene(s) [{:.02f} s]'
                .format(dataset, len(scene_list),
                        time() - t_cur))

            print(' -- Finished packing stereo in {:.01f} sec.'.format(time() -
                                                                       t))
        else:
            print('Skipping "{}/stereo"'.format(dataset))

        # Multiview -- multiple runs
        t = time()
        cur_key = 'config_{}_multiview'.format(dataset)
        if cfg_orig.eval_multiview and cur_key in method and method[cur_key]:
            num_runs = getattr(cfg, 'num_runs_{}_multiview'.format(cfg.subset))
            cfg = deepcopy(cfg_orig)
            cfg.dataset = dataset
            cfg.task = 'multiview'
            for scene in scene_list:
                cfg.scene = scene
                for run in ['run_avg'
                            ] + ['run_{}'.format(f) for f in range(num_runs)]:
                    res_dict[scene]['multiview'][run] = OrderedDict()
                    for bags_label in ['bag_avg'] + bag_size_str:
                        res_dict[scene]['multiview'][run][
                            bags_label] = OrderedDict()

                # Create list of things to gather
                metric_list = []
                metric_list += ['avg_num_keypoints']
                metric_list += ['num_input_matches']
                metric_list += ['qt_auc_colmap']
                metric_list += ['ATE']
                metric_list += ['colmap_stats']

                for run in range(num_runs):
                    for bag_size in bag_size_list:
                        # Compute and pack results
                        cfg.run = run
                        cfg.bag_size = bag_size
                        cur_dict = res_dict[scene]['multiview']
                        for metric in metric_list:
                            t_cur = time()
                            getattr(pack_helper, 'compute_' + metric)(
                                cur_dict['run_{}'.format(run)]['{}bag'.format(
                                    bag_size)], cfg)
                            print(
                                ' -- Packing "{}"/"{}"/multiview, run {}/{}, "{}", metric: {} [{:.02f} s]'
                                .format(dataset, scene, run + 1, num_runs,
                                        '{}bag'.format(bag_size), metric,
                                        time() - t_cur))

                        # Compute average across bags
                        for metric in cur_dict['run_{}'.format(run)]['25bag']:
                            pack_helper.average_multiview_over_bags(
                                cfg, cur_dict['run_{}'.format(run)],
                                bag_size_list)

            # Compute average across runs, for multiview
            t_cur = time()
            pack_helper.average_multiview_over_runs(cfg, res_dict, num_runs,
                                                    bag_size_str + ['bag_avg'])
            print(
                ' -- Packing "{}"/multiview: averaging over {} run(s) [{:.02f} s]'
                .format(dataset, num_runs,
                        time() - t_cur))

            # Compute average across scenes, for multiview
            t_cur = time()
            pack_helper.average_multiview_over_scenes(
                cfg, res_dict, num_runs, ['bag_avg'] + bag_size_str)
            print(
                ' -- Packing "{}"/multiview: averaging over {} scene(s) [{:.02f} s]'
                .format(dataset, len(scene_list),
                        time() - t_cur))

            print(' -- Finished packing multiview in {:.01f} sec.'.format(
                time() - t))

            # Relocalization -- multiple runs
            # TODO
        else:
            print('Skipping "{}/multiview"'.format(dataset))

    # Dump packed result
    print(' -- Saving to: "{}"'.format(
        cfg.method_dict['config_common']['json_label']))
    if not os.path.exists(cfg.path_pack):
        os.makedirs(cfg.path_pack)
    json_dump_file = os.path.join(
        cfg.path_pack,
        '{}.json'.format(cfg.method_dict['config_common']['json_label']))

    with open(json_dump_file, 'w') as outfile:
        json.dump(master_dict, outfile, indent=2)
def validate_submission_files(sub_path, benchmark_repo_path, datasets,
                              raw_data_path, logger):
    for dataset in datasets:

        raw_dataset_path = os.path.join(raw_data_path, dataset)
        # check if dataset folder exists
        sub_dataset_path = os.path.join(sub_path, dataset)
        if not os.path.isdir(sub_dataset_path):
            logger.add_new_log(
                'Submission does not contain {} dataset.'.format(dataset))
            continue
        # read seqs from json
        seqs = load_json(
            os.path.join(benchmark_repo_path,
                         'json/data/{}_test.json'.format(dataset)))
        for seq in seqs:
            # get number of image
            raw_seq_path = os.path.join(raw_dataset_path, seq)
            im_list = [
                os.path.splitext(f)[0] for f in os.listdir(raw_seq_path)
                if (os.path.isfile(os.path.join(raw_seq_path, f))
                    and f.endswith(('png', 'jpg')))
            ]
            num_im = len(im_list)

            # get all key pairs
            key_pairs = [
                pair[0] + '-' + pair[1]
                for pair in list(product(im_list, im_list))
                if pair[0] > pair[1]
            ]

            # check if seq folder exists
            sub_seq_path = os.path.join(sub_dataset_path, seq)
            if not os.path.isdir(sub_seq_path):
                logger.add_new_log(
                    'Submission does not contain {} sequence in {}  dataset.'.
                    format(seq, dataset))
                continue
            # validate keypoints file
            kp_path = os.path.join(sub_seq_path, 'keypoints.h5')
            if not os.path.isfile(kp_path):
                logger.add_new_log(
                    'Submission does not contain keypoints file for {} sequence in {} dataset.'
                    .format(seq, dataset))
            else:
                keypoints = load_h5(kp_path)

                if sorted(list(keypoints.keys())) != sorted(im_list):
                    logger.add_new_log(
                        '{}-{}: Keypoints file does not contain all the image keys.'
                        .format(dataset, seq))
                if len(list(keypoints.values())[0].shape) != 2:
                    logger.add_new_log(
                        '{}-{}: Keypoints file is in wrong format.'.format(
                            dataset, seq))
                if list(keypoints.values())[0].shape[1] != 2:
                    logger.add_new_log(
                        '{}-{}: Keypoints file is in wrong format.'.format(
                            dataset, seq))
                # check number of keypoints
                if list(keypoints.values())[0].shape[0] > 8000:
                    logger.add_new_log(
                        '{}-{}: Keypoints file contains more than 8000 points.'
                        .format(dataset, seq))

            # check if match file exists first
            match_files = [
                file for file in os.listdir(sub_seq_path)
                if os.path.isfile(os.path.join(sub_seq_path, file))
                and file.startswith('match')
            ]

            # validate descriptor file
            desc_path = os.path.join(sub_seq_path, 'descriptors.h5')

            # much provide either descriptor file or match file
            if not os.path.isfile(desc_path) and len(match_files) == 0:
                logger.add_new_log(
                    'Submission does not contain descriptors file for {} sequence in {}  dataset.'
                    .format(seq, dataset))
            elif not os.path.isfile(desc_path):
                pass
            else:
                descriptors = load_h5(desc_path)

                if sorted(list(descriptors.keys())) != sorted(im_list):
                    logger.add_new_log(
                        '{}-{}: Descriptors file does not contain all the image keys.'
                        .format(dataset, seq))
                if len(list(descriptors.values())[0].shape) != 2:
                    logger.add_new_log(
                        '{}-{}: Descriptors file is in wrong format'.format(
                            dataset, seq))
                if list(descriptors.values())[0].shape[1] < 64 or list(
                        descriptors.values())[0].shape[1] > 2048:
                    logger.add_new_log(
                        '{}-{}: Descriptors file is in wrong format'.format(
                            dataset, seq))

                # check descriptor size
                desc_type, desc_size, desc_nbytes = get_descriptor_properties(
                    {}, descriptors)
                if desc_nbytes > 512 and len(match_files) == 0:
                    logger.add_new_log(
                        '{}-{}: Descriptors size is larger than 512 bytes, you need to provide custom match file'
                        .format(dataset, seq))

            # validate match file
            # check match file name
            if 'matches.h5' in match_files:
                if len(match_files) != 1:
                    logger.add_new_log(
                        '{}-{}: matches.h5 exists. Do not need to provide any other match files.'
                        .format(dataset, seq))
            elif 'matches_multiview.h5' in match_files or 'matches_stereo_0.h5' in match_files or 'matches_stereo.h5' in match_files:
                if 'matches_multiview.h5' not in match_files:
                    logger.add_new_log(
                        '{}-{}: missing matches_multiview.h5'.format(
                            dataset, seq))
                if 'matches_stereo_0.h5' not in match_files and 'matches_stereo.h5' not in match_files:
                    logger.add_new_log(
                        '{}-{}: missing matches_stereo.h5'.format(
                            dataset, seq))
                if 'matches_stereo_1.h5' in match_files or 'matches_stereo_2.h5' in match_files:
                    logger.add_new_log(
                        '{}-{}: for 2021 challenge, we only run stereo once, no need to provide matches_stereo_1 and matches_stereo_2'
                        .format(dataset, seq))

            for match_file in match_files:
                matches = load_h5(os.path.join(sub_seq_path, match_file))
                if len(matches.keys()) != len(key_pairs):
                    logger.add_new_log(
                        '{}-{}: Matches file contains wrong number of keys, should have {} keys, have {}.'
                        .format(dataset, seq, len(key_pairs),
                                len(matches.keys())))
                elif sorted(list(matches.keys())) != sorted(key_pairs):
                    logger.add_new_log(
                        '{}-{}: Matches file contains worng keys, maybe the image names is in reverse order. Plase refer to submission instruction for proper custom match key naming convention'
                        .format(dataset, seq))
                if len(list(matches.values())[0].shape) != 2:
                    logger.add_new_log(
                        '{}-{}: Matches file is in wrong format.'.format(
                            dataset, seq))
                if list(matches.values())[0].shape[0] != 2:
                    logger.add_new_log(
                        '{}-{}: Matches file is in wrong format.'.format(
                            dataset, seq))
def import_features(cfg, data_list):
    '''
    Import features with the third (2020) submission format (separate files for
    scores, orientations, and scales). Keypoint category is set by looking at
    the maximum value over all files, instead of the mean.
    '''

    # Retrieve stats
    print('Retrieving number of keypoints...')
    size_kp_file = []
    for _data in data_list:
        print('--- On "{}"...'.format(_data))
        with h5py.File(os.path.join(cfg.path_features, _data, 'keypoints.h5'),
                       'r') as f_kp:
            for k in f_kp:
                size_kp_file.append(f_kp[k].shape[0])

    if len(size_kp_file) == 0:
        raise RuntimeError('No keypoints?')

    print('------ Found {} keypoint files'.format(len(size_kp_file)))
    print('------ Min: {}, max: {}, mean: {}'.format(np.min(size_kp_file),
                                                     np.max(size_kp_file),
                                                     np.mean(size_kp_file)))

    # If no category is selected, determine it automatically
    if cfg.num_keypoints == -1:
        numkp = get_kp_category(np.max(size_kp_file))
        print('Setting number of keypoints category to: {}'.format(numkp))
    # Otherwise, hand-pick it
    else:
        numkp = cfg.num_keypoints
        print('Pre-selected number of keypoints: {}'.format(numkp))

    # only check descriptor size if it is provided    
    if os.path.isfile(os.path.join(cfg.path_features, data_list[0], 'descriptors.h5')):
        # Open a descriptors file to get their size
        print('Retrieving descriptor_size...')
        for _data in data_list:
            print('--- On "{}"...'.format(_data))
            with h5py.File(
                    os.path.join(cfg.path_features, _data, 'descriptors.h5'),
                    'r') as f_desc:
                desc_type, desc_size, desc_nbytes = get_descriptor_properties(
                    cfg, f_desc)
                break
            break
        print('Descriptor type: {} {} ({} bytes)'.format(desc_size, desc_type,
                                                         desc_nbytes))
        nbytes_category = get_desc_category(desc_nbytes)
        print('Falling under challenge category: {} bytes'.format(nbytes_category))
    else:
        print('Descriptor file is not given')
    # Import
    print('Importing features...')
    for _data in data_list:
        print('--- On "{}"...'.format(_data))

        fn_kp = os.path.join(cfg.path_features, _data, 'keypoints.h5')
        fn_desc = os.path.join(cfg.path_features, _data, 'descriptors.h5')
        fn_score = os.path.join(cfg.path_features, _data, 'scores.h5')
        fn_scale = os.path.join(cfg.path_features, _data, 'scales.h5')
        fn_ori = os.path.join(cfg.path_features, _data, 'orientations.h5')
        fn_match = os.path.join(cfg.path_features, _data, 'matches.h5')
        fn_multiview_match = os.path.join(cfg.path_features, _data, 'matches_multiview.h5')
        fn_stereo_match_list = [os.path.join(cfg.path_features, _data,'matches_stereo_{}.h5').
            format(idx) for idx in range(3)]

        # create keypoints folder
        tgt_cur = os.path.join(
            cfg.path_results, _data,
            '_'.join([cfg.kp_name, str(numkp), cfg.desc_name]))
        if not os.path.isdir(tgt_cur):
            os.makedirs(tgt_cur)
            
        # Both keypoints and descriptors files are provided
        if os.path.isfile(fn_kp) and os.path.isfile(fn_desc) and not \
           (os.path.isfile(fn_match) or (
           (os.path.isfile(fn_multiview_match) and os.path.isfile(fn_stereo_match_list[0])))):
            # We cannot downsample the keypoints without scores
            if numkp < max(size_kp_file) and not os.path.isfile(fn_score):
                raise RuntimeError('------ No scores, and subsampling is required!'
                                   '(wanted: {}, found: {})'.format(
                                       numkp, max(size_kp_file)))

            # Import keypoints
            print('------ Importing keypoints and descriptors')

            # If there is no need to subsample, we can just copy the files
            if numkp >= max(size_kp_file):
                copy(fn_kp, tgt_cur)
                copy(fn_desc, tgt_cur)
                if os.path.isfile(fn_score):
                    copy(fn_score, tgt_cur)
                if os.path.isfile(fn_scale):
                    copy(fn_scale, tgt_cur)
                if os.path.isfile(fn_ori):
                    copy(fn_ori, tgt_cur)
            # Otherwise, crop each file separately
            else:
                subsampled_indices = {}
                with h5py.File(fn_score, 'r') as h5_r, \
                        h5py.File(os.path.join(tgt_cur, 'scores.h5'), 'w') as h5_w:
                    for k in h5_r:
                        sorted_indices = np.argsort(h5_r[k])[::-1]
                        subsampled_indices[k] = sorted_indices[:min(
                            h5_r[k].size, numkp)]
                        crop = h5_r[k].value[subsampled_indices[k]]
                        h5_w[k] = crop
                with h5py.File(fn_kp, 'r') as h5_r, \
                        h5py.File(
                                os.path.join(tgt_cur, 'keypoints.h5'),
                                'w') as h5_w:
                    for k in h5_r:
                        crop = h5_r[k].value[subsampled_indices[k], :]
                        h5_w[k] = crop
                with h5py.File(fn_desc, 'r') as h5_r, \
                        h5py.File(
                                os.path.join(
                                    tgt_cur, 'descriptors.h5'), 'w') as h5_w:
                    for k in h5_r:
                        crop = h5_r[k].value[subsampled_indices[k], :]
                        h5_w[k] = crop
                if os.path.isfile(fn_scale):
                    with h5py.File(fn_scale, 'r') as h5_r, \
                            h5py.File(
                                    os.path.join(tgt_cur, 'scales.h5'),
                                    'w') as h5_w:
                        for k in h5_r:
                            crop = h5_r[k].value[subsampled_indices[k]]
                            h5_w[k] = crop
                if os.path.isfile(fn_ori):
                    with h5py.File(fn_ori, 'r') as h5_r, \
                            h5py.File(
                                    os.path.join(tgt_cur, 'orientations.h5'),
                                    'w') as h5_w:
                        for k in h5_r:
                            crop = h5_r[k].value[subsampled_indices[k]]
                            h5_w[k] = crop
        elif os.path.isfile(fn_kp) and \
             (os.path.isfile(fn_match) or \
             (os.path.isfile(fn_multiview_match) and os.path.isfile(fn_stereo_match_list[0]))):

            if os.path.isfile(fn_desc):
                print('------ Matches file is provided, ignore descriptors')
            print('------ Importing matches')
            if not cfg.match_name:
                raise RuntimeError('Must define match_name')

            # For match only submission, no downsampling is performed.
            if numkp < max(size_kp_file):
                raise RuntimeError('------ number of keypoints exceeds maximum allowed limit'
                                   '(wanted: {}, found: {})'.format(
                                       numkp, max(size_kp_file)))   

            # copy keypoints file to raw results folder
            copy(fn_kp, tgt_cur)
            # create match folder with match method name
            match_folder_path = os.path.join(tgt_cur,cfg.match_name)
            if not os.path.isdir(match_folder_path):
                os.makedirs(match_folder_path)
            # copy match file to raw results folder

            if os.path.isfile(fn_multiview_match) and os.path.isfile(fn_stereo_match_list[0]):
                print('------ Multiview match file and Stereo match file are provieded seperately')
                fn_match = fn_multiview_match
            else:
                print('------ Only one match file is provided for both stereo and multiveiw tasks')

            copy(fn_match,os.path.join(match_folder_path,'matches.h5'))
            # make dummy cost file
            with h5py.File(os.path.join(match_folder_path,'matching_cost.h5'),'w') as h5_w:
                h5_w.create_dataset('cost', data=0.0)

            # create post filter folder with 'no filter'
            filter_folder_path = os.path.join(match_folder_path,'no_filter')
            if not os.path.isdir(filter_folder_path):
                os.makedirs(filter_folder_path)
            # copy match file to post filter folder
            copy(fn_match,os.path.join(filter_folder_path,'matches_inlier.h5'))
            # make dummy cost file
            with h5py.File(os.path.join(filter_folder_path,'matches_inlier_cost.h5'),'w') as h5_w:
                h5_w.create_dataset('cost', data=0.0)
            
            # check if three stereo matches are provided
            if all([os.path.isfile(fn_stereo_match_list[idx]) for idx in range(3)]):
                print('------ Three stereo match files are provided')
            # if only one stereo match is provided, copy it three times
            elif os.path.isfile(fn_stereo_match_list[0]):
                print('------ One stereo match files is provided, copy it three times')
                fn_stereo_match_list = [fn_stereo_match_list[0]]*3
            # if only one match is provided for both stereo and multiview, copy it three times
            else:
                fn_stereo_match_list = [fn_match]*3

            for idx, fn_stereo_match in enumerate(fn_stereo_match_list):
                copy(fn_stereo_match,
                    os.path.join(filter_folder_path,'matches_imported_stereo_{}.h5'.format(idx)))
        else:
            raise RuntimeError('Neither descriptors nor matches are provided!')