Ejemplo n.º 1
0
        def _heuristic_weighting(nodes, avail_uv):
            avail_uv = np.array(avail_uv)
            weights = np.ones(len(avail_uv))

            if 'view_weight' in enabled_heuristics:
                from vtool import _rhomb_dist

                view_edge = [(node_to_view[u], node_to_view[v]) for (u, v) in avail_uv]
                view_weight = np.array(
                    [_rhomb_dist.VIEW_CODE_DIST[(v1, v2)] for (v1, v2) in view_edge]
                )
                # Assume comparable by default and prefer undefined
                # more than probably not, but less than definately so.
                view_weight[np.isnan(view_weight)] = 1.5
                # Prefer viewpoint 10x more than time
                weights += 10 * view_weight

            if 'time_weight' in enabled_heuristics:
                # Prefer linking annotations closer in time
                times = ut.take(node_to_time, nodes)
                maxtime = vt.safe_max(times, fill=1, nans=False)
                mintime = vt.safe_min(times, fill=0, nans=False)
                time_denom = maxtime - mintime
                # Try linking by time for lynx data
                time_delta = np.array(
                    [abs(node_to_time[u] - node_to_time[v]) for u, v in avail_uv]
                )
                time_weight = time_delta / time_denom
                weights += time_weight

            weights = np.array(weights)
            weights[np.isnan(weights)] = 1.0

            avail = [(u, v, {'weight': w}) for (u, v), w in zip(avail_uv, weights)]
            return avail
Ejemplo n.º 2
0
 def _make_lnbnn_scores(infr, edges):
     edge_to_data = infr._get_cm_edge_data(edges)
     edges = list(edge_to_data.keys())
     edge_scores = list(ut.take_column(edge_to_data.values(), 'score'))
     edge_scores = ut.replace_nones(edge_scores, np.nan)
     edge_scores = np.array(edge_scores)
     # take the inf-norm
     normscores = edge_scores / vt.safe_max(edge_scores, nans=False)
     return normscores
Ejemplo n.º 3
0
    def apply_match_scores(infr):
        """

        Applies precomputed matching scores to edges that already exist in the
        graph. Typically you should run infr.apply_match_edges() before running
        this.

        CommandLine:
            python -m wbia.algo.graph.core apply_match_scores --show

        Example:
            >>> # xdoctest: +REQUIRES(--slow)
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.graph.core import *  # NOQA
            >>> infr = testdata_infr('PZ_MTEST')
            >>> infr.exec_matching()
            >>> infr.apply_match_edges()
            >>> infr.apply_match_scores()
            >>> infr.get_edge_attrs('score')
        """
        if infr.cm_list is None:
            infr.print('apply_match_scores - no scores to apply!')
            return
        infr.print('apply_match_scores', 1)
        edges = list(infr.graph.edges())
        edge_to_data = infr._get_cm_edge_data(edges)

        # Remove existing attrs
        ut.nx_delete_edge_attr(infr.graph, 'score')
        ut.nx_delete_edge_attr(infr.graph, 'rank')
        ut.nx_delete_edge_attr(infr.graph, 'normscore')

        edges = list(edge_to_data.keys())
        edge_scores = list(ut.take_column(edge_to_data.values(), 'score'))
        edge_scores = ut.replace_nones(edge_scores, np.nan)
        edge_scores = np.array(edge_scores)
        edge_ranks = np.array(ut.take_column(edge_to_data.values(), 'rank'))
        # take the inf-norm
        normscores = edge_scores / vt.safe_max(edge_scores, nans=False)

        # Add new attrs
        infr.set_edge_attrs('score', ut.dzip(edges, edge_scores))
        infr.set_edge_attrs('rank', ut.dzip(edges, edge_ranks))

        # Hack away zero probabilites
        # probs = np.vstack([p_nomatch, p_match, p_notcomp]).T + 1e-9
        # probs = vt.normalize(probs, axis=1, ord=1, out=probs)
        # entropy = -(np.log2(probs) * probs).sum(axis=1)
        infr.set_edge_attrs('normscore', dict(zip(edges, normscores)))
Ejemplo n.º 4
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m wbia.other.dbinfo split_analysis --show
        python -m wbia split_analysis --show
        python -m wbia split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import wbia.guitool as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import wbia.plottool as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    # nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime

    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        # 'view': ['right'],
        # 'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        },
    ))
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    ))
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    logger.info('%d annots on day 1' % (len(aids1)))
    logger.info('%d annots on day 2' % (len(aids2)))
    logger.info('%d annots overall' % (len(all_annots)))
    logger.info('%d names overall' % (len(ut.unique(all_annots.nids))))

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        # aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool as vt

    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    logger.info('#nan_idx = %r' % (len(nan_idx), ))
    logger.info('#inf_idx = %r' % (len(inf_idx), ))
    logger.info('#ok_idx = %r' % (len(ok_idx), ))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    # idx = vt.find_elbow_point(sorted_speeds)
    # EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    # ZEBRA_SPEED_MAX  = 64  # km/h
    # ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN = 20  # km/h
    # ZEBRA_SPEED_FAST_WALK = 10  # km/h
    # ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    # MAX_SPEED = ZEBRA_SPEED_WALK
    # MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    logger.info('MAX_SPEED = %r km/h' % (MAX_SPEED, ))
    logger.info('%d annots with infinite speed' % (len(inf_annots), ))
    logger.info('%d annots with large speed' % (len(flagged_ok_annots), ))
    logger.info(
        'Marking all pairs of annots above the threshold as non-matching')

    from wbia.algo.graph import graph_iden
    import networkx as nx

    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots,
                              lbl='flag speeding names',
                              **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [
            edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED
        ]
        good_edges = [
            edge for edge, speed in edge_to_speeds.items()
            if speed <= MAX_SPEED
        ]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    logger.info('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)), ))
    logger.info('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)), ))

    if 1:
        from wbia.viz import viz_graph2
        import wbia.guitool as gt

        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            logger.info('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            logger.info('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(
                ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs,
                                       scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(
                *sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            # am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [
                ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                for tag in positive_tags
            ]
            logger.info('edge_case_hist: ' + ut.repr3([
                '%s %s' % (txt, sum(flags_))
                for flags_, txt in zip(flags_list, positive_tags)
            ]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(
                ut.lmap(any,
                        ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            logger.info('A positive is any edge flagged as a %s' %
                        (ut.conj_phrase(positive_tags, 'or'), ))
            logger.info('--- Sampling wrt edges ---')
            logger.info('edge_sample_size  = %r' % (edge_sample_size, ))
            logger.info('edge_population_size = %r' % (len(aid_pairs), ))
            logger.info('num_positive_edges = %r' % (sum(is_positive)))
            logger.info('--- Sampling wrt names ---')
            logger.info('name_population_size = %r' % (pop, ))
            vt.calc_error_bars_from_sample(sample_size,
                                           num_positive,
                                           pop,
                                           conf_level=0.95)

        nx.set_edge_attributes(infr.graph,
                               name='score',
                               values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr,
                                          use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_,
                                         lbl='creating inference',
                                         **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    # num_positive = 0
    # for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    # logger.info('num_positive = %r' % (num_positive,))
    # pop = len(infr_list)
    # logger.info('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_,
                                       lbl='adding speed edges',
                                       **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph,
                               name='_speed_split',
                               values={edge: 'new'
                                       for edge in bad_edges})
        nx.set_edge_attributes(
            infr.graph,
            name='_speed_split',
            values={edge: 'flip'
                    for edge in flipped_edges},
        )

    # for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(
                nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(
                nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                (
                    'frac_nonmatch_edges',
                    state_hist[NEGTV] /
                    (state_hist[POSTV] + state_hist[NEGTV]),
                ),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    logger.info('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append(
            'stats(%s) = %s' %
            (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    logger.info('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats,
                                             'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    logger.info('Can trivially split %d / %d' %
                (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    logger.info('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append(
            'stats(%s) = %s' %
            (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    logger.info('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats,
                                              'num_match_edges'))
    num_nonmatch_edges = np.array(
        ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges,
                            num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [
        len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > 0.3
        for sizes in new_sizes_list
    ]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    logger.info('#reasonable_infr = %r' % (len(reasonable_infr), ))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        logger.info('max_speed = %r' % (max(edge_to_speeds.values())), )
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        logger.info('max_speed = %r' % (max(edge_to_speeds.values())), )
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    # import scipy.stats as st
    # conf_interval = .95
    # st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    # zval = 1.96  # 95 percent confidence
    # zValC = 3.8416  #
    # zValC = 6.6564

    # import statsmodels.stats.api as sms
    # es = sms.proportion_effectsize(0.5, 0.75)
    # sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = 0.95
    # conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    logger.info('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop,
                                   conf_level)
    logger.info('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop,
                                   conf_level)
    logger.info('---')

    vt.calc_error_bars_from_sample(15 + 38,
                                   num_positive=3,
                                   pop=675,
                                   conf_level=0.95)
    vt.calc_error_bars_from_sample(15,
                                   num_positive=3,
                                   pop=675,
                                   conf_level=0.95)

    pop = 279
    # err_frac = .05  # 5%
    err_frac = 0.10  # 10%
    conf_level = 0.95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.1)
    vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.68, prior=0.2)
    vt.calc_sample_from_error_bars(0.10, pop, conf_level=0.68)

    vt.calc_error_bars_from_sample(100,
                                   num_positive=5,
                                   pop=675,
                                   conf_level=0.95)
    vt.calc_error_bars_from_sample(100,
                                   num_positive=5,
                                   pop=675,
                                   conf_level=0.68)
Ejemplo n.º 5
0
def compute_occurrence_groups(ibs, gid_list, config={}, use_gps=False, verbose=None):
    r"""
    Args:
        ibs (IBEISController):  wbia controller object
        gid_list (list):

    Returns:
        tuple: (None, None)

    CommandLine:
        python -m wbia compute_occurrence_groups

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.preproc.preproc_occurrence import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(defaultdb='testdb1')
        >>> verbose = True
        >>> images = ibs.images()
        >>> gid_list = images.gids
        >>> config = {}  # wbia.algo.Config.OccurrenceConfig().asdict()
        >>> tup = wbia_compute_occurrences(ibs, gid_list)
        >>> (flat_imgsetids, flat_gids)
        >>> aids_list = list(ut.group_items(aid_list_, flat_imgsetids).values())
        >>> metric = list(map(len, aids_list))
        >>> sortx = ut.list_argsort(metric)[::-1]
        >>> index = sortx[1]
        >>> aids = aids_list[index]
        >>> gids = list(set(ibs.get_annot_gids(aids)))
    """
    if verbose is None:
        verbose = ut.NOT_QUIET
    # Config info
    gid_list = np.unique(gid_list)
    if verbose:
        logger.info('[occur] Computing occurrences on %r images.' % (len(gid_list)))
        logger.info('[occur] config = ' + ut.repr3(config))

    use_gps = config['use_gps']
    datas = prepare_X_data(ibs, gid_list, use_gps=use_gps)

    from wbia.algo.preproc import occurrence_blackbox

    cluster_algo = config.get('cluster_algo', 'agglomerative')
    km_per_sec = config.get('km_per_sec', occurrence_blackbox.KM_PER_SEC)
    thresh_sec = config.get('seconds_thresh', 30 * 60.0)
    min_imgs_per_occurence = config.get('min_imgs_per_occurence', 1)
    # 30 minutes = 3.6 kilometers
    # 5 minutes = 0.6 kilometers

    assert cluster_algo == 'agglomerative', 'only agglomerative is supported'

    # Group datas with different values separately
    all_gids = []
    all_labels = []
    for key in datas.keys():
        val = datas[key]
        gids, latlons, posixtimes = val
        labels = occurrence_blackbox.cluster_timespace_sec(
            latlons, posixtimes, thresh_sec, km_per_sec=km_per_sec
        )
        if labels is None:
            labels = np.zeros(len(gids), dtype=np.int)
        all_gids.append(gids)
        all_labels.append(labels)

    # Combine labels across different groups
    pads = [vt.safe_max(ys, fill=0) + 1 for ys in all_labels]
    offsets = np.array([0] + pads[:-1]).cumsum()
    all_labels_ = [ys + offset for ys, offset in zip(all_labels, offsets)]
    label_arr = np.array(ut.flatten(all_labels_))
    gid_arr = np.array(ut.flatten(all_gids))

    # Group images by unique label
    labels, label_gids = group_images_by_label(label_arr, gid_arr)
    # Remove occurrences less than the threshold
    occur_labels = labels
    occur_gids = label_gids
    occur_unixtimes = compute_occurrence_unixtime(ibs, occur_gids)
    occur_labels, occur_gids = filter_and_relabel(
        labels, label_gids, min_imgs_per_occurence, occur_unixtimes
    )
    if verbose:
        logger.info('[occur] Found %d clusters.' % len(occur_labels))
    if len(label_gids) > 0 and verbose:
        logger.info('[occur] Cluster image size stats:')
        ut.print_dict(
            ut.get_stats(list(map(len, occur_gids)), use_median=True, use_sum=True),
            'occur image stats',
        )
    return occur_labels, occur_gids