Ejemplo n.º 1
0
def compute_word_metrics(invindex):
    invindex.idx2_wxs = np.array(invindex.idx2_wxs)
    wx2_idxs  = invindex.wx2_idxs
    idx2_dvec = invindex.idx2_dvec
    words     = invindex.words
    wx2_pdist = {}
    wx2_wdist = {}
    wx2_nMembers = {}
    wx2_pdist_stats = {}
    wx2_wdist_stats = {}
    wordidx_iter = ut.progiter(six.iteritems(wx2_idxs), lbl='Word Dists: ', num=len(wx2_idxs), freq=200)

    for _item in wordidx_iter:
        wx, idxs = _item
        dvecs = idx2_dvec.take(idxs, axis=0)
        word = words[wx:wx + 1]
        wx2_pdist[wx] = spdist.pdist(dvecs)  # pairwise dist between words
        wx2_wdist[wx] = ut.euclidean_dist(dvecs, word)  # dist to word center
        wx2_nMembers[wx] = len(idxs)

    for wx, pdist in ut.progiter(six.iteritems(wx2_pdist), lbl='Word pdist Stats: ', num=len(wx2_idxs), freq=2000):
        wx2_pdist_stats[wx] = ut.get_stats(pdist)

    for wx, wdist in ut.progiter(six.iteritems(wx2_wdist), lbl='Word wdist Stats: ', num=len(wx2_idxs), freq=2000):
        wx2_wdist_stats[wx] = ut.get_stats(wdist)

    ut.print_stats(wx2_nMembers.values(), 'word members')
    metrics = Metrics(wx2_nMembers, wx2_pdist_stats, wx2_wdist_stats)
    return metrics
Ejemplo n.º 2
0
def dictinfo(dict_):
    if not isinstance(dict_, dict):
        return 'expected dict got %r' % type(dict_)

    keys = list(dict_.keys())
    vals = list(dict_.values())
    num_keys = len(keys)
    key_types = list(set(map(type, keys)))
    val_types = list(set(map(type, vals)))

    fmtstr_ = '\n' + ut.unindent('''
    * num_keys  = {num_keys}
    * key_types = {key_types}
    * val_types = {val_types}
    '''.strip('\n'))

    if len(val_types) == 1:
        if val_types[0] == np.ndarray:
            # each key holds an ndarray
            val_shape_stats = ut.get_stats(set(map(np.shape, vals)), axis=0)
            val_shape_stats_str = ut.dict_str(val_shape_stats,
                                              strvals=True,
                                              newlines=False)
            val_dtypes = list(set([val.dtype for val in vals]))
            fmtstr_ += ut.unindent('''
            * val_shape_stats = {val_shape_stats_str}
            * val_dtypes = {val_dtypes}
            '''.strip('\n'))
        elif val_types[0] == list:
            # each key holds a list
            val_len_stats = ut.get_stats(set(map(len, vals)))
            val_len_stats_str = ut.dict_str(val_len_stats,
                                            strvals=True,
                                            newlines=False)
            depth = ut.list_depth(vals)
            deep_val_types = list(set(ut.list_deep_types(vals)))
            fmtstr_ += ut.unindent('''
            * list_depth = {depth}
            * val_len_stats = {val_len_stats_str}
            * deep_types = {deep_val_types}
            '''.strip('\n'))
            if len(deep_val_types) == 1:
                if deep_val_types[0] == np.ndarray:
                    deep_val_dtypes = list(set([val.dtype for val in vals]))
                    fmtstr_ += ut.unindent('''
                    * deep_val_dtypes = {deep_val_dtypes}
                    ''').strip('\n')
        elif val_types[0] in [
                np.uint8, np.int8, np.int32, np.int64, np.float16, np.float32,
                np.float64
        ]:
            # each key holds a scalar
            val_stats = ut.get_stats(vals)
            fmtstr_ += ut.unindent('''
            * val_stats = {val_stats}
            ''').strip('\n')

    fmtstr = fmtstr_.format(**locals())
    return ut.indent(fmtstr)
Ejemplo n.º 3
0
 def print_feat_stats(kpts, vecs):
     assert len(vecs) == len(kpts), 'disagreement'
     print('keypoints and vecs agree')
     flat_kpts = np.vstack(kpts)
     num_kpts = list(map(len, kpts))
     kpt_scale = vt.get_scales(flat_kpts)
     num_kpts_stats = ut.get_stats(num_kpts)
     scale_kpts_stats = ut.get_stats(kpt_scale)
     print('Number of ' + prefix + ' keypoints: ' + ut.repr3(num_kpts_stats, nl=0, precision=2))
     print('Scale of ' + prefix + ' keypoints: ' + ut.repr3(scale_kpts_stats, nl=0, precision=2))
Ejemplo n.º 4
0
 def print_feat_stats(kpts, vecs):
     assert len(vecs) == len(kpts), 'disagreement'
     print('keypoints and vecs agree')
     flat_kpts = np.vstack(kpts)
     num_kpts = list(map(len, kpts))
     kpt_scale = vt.get_scales(flat_kpts)
     num_kpts_stats = ut.get_stats(num_kpts)
     scale_kpts_stats = ut.get_stats(kpt_scale)
     print('Number of ' + prefix + ' keypoints: ' + ut.repr3(num_kpts_stats, nl=0, precision=2))
     print('Scale of ' + prefix + ' keypoints: ' + ut.repr3(scale_kpts_stats, nl=0, precision=2))
Ejemplo n.º 5
0
def dictinfo(dict_):
    if not isinstance(dict_, dict):
        return 'expected dict got %r' % type(dict_)

    keys = list(dict_.keys())
    vals = list(dict_.values())
    num_keys  = len(keys)
    key_types = list(set(map(type, keys)))
    val_types = list(set(map(type, vals)))

    fmtstr_ = '\n' + ut.unindent('''
    * num_keys  = {num_keys}
    * key_types = {key_types}
    * val_types = {val_types}
    '''.strip('\n'))

    if len(val_types) == 1:
        if val_types[0] == np.ndarray:
            # each key holds an ndarray
            val_shape_stats = ut.get_stats(set(map(np.shape, vals)), axis=0)
            val_shape_stats_str = ut.dict_str(val_shape_stats, strvals=True, newlines=False)
            val_dtypes = list(set([val.dtype for val in vals]))
            fmtstr_ += ut.unindent('''
            * val_shape_stats = {val_shape_stats_str}
            * val_dtypes = {val_dtypes}
            '''.strip('\n'))
        elif val_types[0] == list:
            # each key holds a list
            val_len_stats =  ut.get_stats(set(map(len, vals)))
            val_len_stats_str = ut.dict_str(val_len_stats, strvals=True, newlines=False)
            depth = ut.list_depth(vals)
            deep_val_types = list(set(ut.list_deep_types(vals)))
            fmtstr_ += ut.unindent('''
            * list_depth = {depth}
            * val_len_stats = {val_len_stats_str}
            * deep_types = {deep_val_types}
            '''.strip('\n'))
            if len(deep_val_types) == 1:
                if deep_val_types[0] == np.ndarray:
                    deep_val_dtypes = list(set([val.dtype for val in vals]))
                    fmtstr_ += ut.unindent('''
                    * deep_val_dtypes = {deep_val_dtypes}
                    ''').strip('\n')
        elif val_types[0] in [np.uint8, np.int8, np.int32, np.int64, np.float16, np.float32, np.float64]:
            # each key holds a scalar
            val_stats = ut.get_stats(vals)
            fmtstr_ += ut.unindent('''
            * val_stats = {val_stats}
            ''').strip('\n')

    fmtstr = fmtstr_.format(**locals())
    return ut.indent(fmtstr)
Ejemplo n.º 6
0
def vector_normal_stats(vectors):
    import numpy.linalg as npl
    norm_list = npl.norm(vectors, axis=1)
    #norm_list2 = np.sqrt((vectors ** 2).sum(axis=1))
    #assert np.all(norm_list == norm_list2)
    norm_stats = ut.get_stats(norm_list)
    print('normal_stats:' + ut.dict_str(norm_stats, newlines=False))
Ejemplo n.º 7
0
def vector_normal_stats(vectors):
    import numpy.linalg as npl
    norm_list = npl.norm(vectors, axis=1)
    #norm_list2 = np.sqrt((vectors ** 2).sum(axis=1))
    #assert np.all(norm_list == norm_list2)
    norm_stats = ut.get_stats(norm_list)
    print('normal_stats:' + ut.dict_str(norm_stats, newlines=False))
Ejemplo n.º 8
0
 def __init__(invassign, fstack, vocab, wx2_idxs, wx2_maws, wx2_fxs, wx2_axs):
     invassign.fstack = fstack
     invassign.vocab = vocab
     invassign.wx2_idxs = wx2_idxs
     invassign.wx2_maws = wx2_maws
     invassign.wx2_fxs = wx2_fxs
     invassign.wx2_axs = wx2_axs
     invassign.wx2_num = ut.map_dict_vals(len, invassign.wx2_axs)
     invassign.wx_list = sorted(invassign.wx2_num.keys())
     invassign.num_list = ut.take(invassign.wx2_num, invassign.wx_list)
     invassign.perword_stats = ut.get_stats(invassign.num_list)
Ejemplo n.º 9
0
 def print_dataset_info(data, labels, key):
     labelhist = {key: len(val) for key, val in ut.group_items(labels, labels).items()}
     stats_dict = ut.get_stats(data.ravel())
     ut.delete_keys(stats_dict, ['shape', 'nMax', 'nMin'])
     print('[dataset] Dataset Info: ')
     print('[dataset] * Data:')
     print('[dataset]     %s_data(shape=%r, dtype=%r)' % (key, data.shape, data.dtype))
     print('[dataset]     %s_memory(data) = %r' % (key, ut.get_object_size_str(data),))
     print('[dataset]     %s_stats(data) = %s' % (key, ut.repr2(stats_dict, precision=2),))
     print('[dataset] * Labels:')
     print('[dataset]     %s_labels(shape=%r, dtype=%r)' % (key, labels.shape, labels.dtype))
     print('[dataset]     %s_label histogram = %s' % (key, ut.repr2(labelhist)))
Ejemplo n.º 10
0
 def __init__(invassign, fstack, vocab, wx2_idxs, wx2_maws, wx2_fxs,
              wx2_axs):
     invassign.fstack = fstack
     invassign.vocab = vocab
     invassign.wx2_idxs = wx2_idxs
     invassign.wx2_maws = wx2_maws
     invassign.wx2_fxs = wx2_fxs
     invassign.wx2_axs = wx2_axs
     invassign.wx2_num = ut.map_dict_vals(len, invassign.wx2_axs)
     invassign.wx_list = sorted(invassign.wx2_num.keys())
     invassign.num_list = ut.take(invassign.wx2_num, invassign.wx_list)
     invassign.perword_stats = ut.get_stats(invassign.num_list)
Ejemplo n.º 11
0
def compute_word_metrics(invindex):
    invindex.idx2_wxs = np.array(invindex.idx2_wxs)
    wx2_idxs = invindex.wx2_idxs
    idx2_dvec = invindex.idx2_dvec
    words = invindex.words
    wx2_pdist = {}
    wx2_wdist = {}
    wx2_nMembers = {}
    wx2_pdist_stats = {}
    wx2_wdist_stats = {}
    wordidx_iter = ut.progiter(six.iteritems(wx2_idxs),
                               lbl='Word Dists: ',
                               num=len(wx2_idxs),
                               freq=200)

    for _item in wordidx_iter:
        wx, idxs = _item
        dvecs = idx2_dvec.take(idxs, axis=0)
        word = words[wx:wx + 1]
        wx2_pdist[wx] = spdist.pdist(dvecs)  # pairwise dist between words
        wx2_wdist[wx] = ut.euclidean_dist(dvecs, word)  # dist to word center
        wx2_nMembers[wx] = len(idxs)

    for wx, pdist in ut.progiter(six.iteritems(wx2_pdist),
                                 lbl='Word pdist Stats: ',
                                 num=len(wx2_idxs),
                                 freq=2000):
        wx2_pdist_stats[wx] = ut.get_stats(pdist)

    for wx, wdist in ut.progiter(six.iteritems(wx2_wdist),
                                 lbl='Word wdist Stats: ',
                                 num=len(wx2_idxs),
                                 freq=2000):
        wx2_wdist_stats[wx] = ut.get_stats(wdist)

    ut.print_stats(wx2_nMembers.values(), 'word members')
    metrics = Metrics(wx2_nMembers, wx2_pdist_stats, wx2_wdist_stats)
    return metrics
Ejemplo n.º 12
0
def inspect_deck(deck):
    def get_card_tags(card, deck):
        tags = []
        stats = card.mana_source_stats(deck)
        if stats is not None:
            tags.append("land")
            if len(stats[1]) > 0:
                tags.append("tapland")
            else:
                tags.append("untapland")
        return tags

    # ------------
    print("len(deck) = %r" % (len(deck),))
    tags_list = [get_card_tags(card, deck) for card in deck.card_list]
    print("Deck Counts:")
    print(ut.repr2(ut.dict_hist(ut.flatten(tags_list)), nl=True))

    hand = deck.sample_hand()
    manastats_list = [card.mana_source_stats(deck) for card in hand]
    print(ut.list_str([card.name + ": " + text_type(stats) for card, stats in zip(hand, manastats_list)]))
    tags_list = [get_card_tags(card, deck) for card in hand]
    print("Hand Counts")
    print(ut.repr2(ut.dict_hist(ut.flatten(tags_list)), nl=True))

    valid_tags = ["land", "tapland", "untapland"]
    x = {tag: [] for tag in valid_tags}

    for _ in range(500):
        hand = deck.sample_hand()
        tags_list = [get_card_tags(card, deck) for card in hand]
        taghist = ut.dict_hist(ut.flatten(tags_list))
        for key, val in x.items():
            val.append(taghist.get(key, 0))

    print("Monte Stats:")
    for key, val in list(x.items()):
        print("%15s: %s" % (key, ut.repr2(ut.get_stats(val), precision=2)))

    def hand_stats():
        # [card.types for card in hand]
        # [card.rrr() for card in hand]
        [card.mana_source_stats(deck) for card in hand]
        card.types
Ejemplo n.º 13
0
def get_timestats_dict(unixtime_list, full=True, isutc=False):
    import utool as ut
    unixtime_stats = ut.get_stats(unixtime_list, use_nan=True)
    datetime_stats = {}
    if unixtime_stats.get('empty_list', False):
        datetime_stats = unixtime_stats
        return datetime_stats
    for key in ['max', 'min', 'mean']:
        try:
            datetime_stats[key] = ut.unixtime_to_datetimestr(
                unixtime_stats[key], isutc=isutc)
        except KeyError:
            pass
        except ValueError as ex:
            datetime_stats[key] = 'NA'
        except Exception as ex:
            ut.printex(ex, keys=['key', 'unixtime_stats'])
            raise
    for key in ['std']:
        try:
            datetime_stats[key] = str(
                ut.get_unix_timedelta(int(round(unixtime_stats[key]))))
        except KeyError:
            pass
        except ValueError as ex:
            datetime_stats[key] = 'NA'
    try:
        datetime_stats['range'] = str(
            ut.get_unix_timedelta(
                int(round(unixtime_stats['max'] - unixtime_stats['min']))))
    except KeyError:
        pass
    except ValueError as ex:
        datetime_stats['range'] = 'NA'

    if full:
        #unused_keys = (set(unixtime_stats.keys()) - set(datetime_stats.keys())
        for key in ['nMax', 'num_nan', 'shape', 'nMin']:
            datetime_stats[key] = unixtime_stats[key]
    #print('Unused keys = %r' % (set(unixtime_stats.keys()) - set(datetime_stats.keys()),))
    return datetime_stats
Ejemplo n.º 14
0
def get_timestats_dict(unixtime_list, full=True, isutc=False):
    import utool as ut
    unixtime_stats = ut.get_stats(unixtime_list, use_nan=True)
    datetime_stats = {}
    if unixtime_stats.get('empty_list', False):
        datetime_stats = unixtime_stats
        return datetime_stats
    # elif unixtime_stats.get('num_nan', 0) == unixtime_stats.get('shape')[0]:
    #     datetime_stats = unixtime_stats
    #     return datetime_stats
    for key in ['max', 'min', 'mean']:
        try:
            datetime_stats[key] = ut.unixtime_to_datetimestr(unixtime_stats[key], isutc=isutc)
        except KeyError:
            pass
        except (ValueError, OSError) as ex:
            datetime_stats[key]  = 'NA'
        except Exception as ex:
            ut.printex(ex, keys=['key', 'unixtime_stats'])
            raise
    for key in ['std']:
        try:
            datetime_stats[key] = str(ut.get_unix_timedelta(int(round(unixtime_stats[key]))))
        except KeyError:
            pass
        except (ValueError, OSError) as ex:
            datetime_stats[key]  = 'NA'
    try:
        datetime_stats['range'] = str(ut.get_unix_timedelta(int(round(unixtime_stats['max'] - unixtime_stats['min']))))
    except KeyError:
        pass
    except (ValueError, OSError) as ex:
        datetime_stats['range']  = 'NA'

    if full:
        #unused_keys = (set(unixtime_stats.keys()) - set(datetime_stats.keys())
        for key in ['nMax', 'num_nan', 'shape', 'nMin']:
            datetime_stats[key] = unixtime_stats[key]
    #print('Unused keys = %r' % (set(unixtime_stats.keys()) - set(datetime_stats.keys()),))
    return datetime_stats
Ejemplo n.º 15
0
def latex_get_stats(lbl, data, mode=0):
    import utool as ut
    stats_ = ut.get_stats(data)
    if stats_.get('empty_list', False):
        return '% NA: latex_get_stats, data=[]'
    try:
        max_ = stats_['max']
        min_ = stats_['min']
        mean = stats_['mean']
        std  = stats_['std']
        shape = stats_['shape']
    except KeyError as ex:
        stat_keys = stats_.keys()  # NOQA
        ut.printex(ex, key_list=['stat_keys', 'stats_', 'data'])
        raise

    #int_fmt = lambda num: util.num_fmt(int(num))
    def float_fmt(num):
        return util_num.num_fmt(float(num))

    def tup_fmt(tup):
        return str(tup)
    fmttup = (float_fmt(min_), float_fmt(max_), float_fmt(mean), float_fmt(std), tup_fmt(shape))
    lll = ' ' * len(lbl)
    if mode == 0:
        prefmtstr = r'''
        {label} stats & min ; max = %s ; %s\\
        {space}       & mean; std = %s ; %s\\
        {space}       & shape = %s \\'''
    if mode == 1:
        prefmtstr = r'''
        {label} stats & min  = $%s$\\
        {space}       & max  = $%s$\\
        {space}       & mean = $%s$\\
        {space}       & std  = $%s$\\
        {space}       & shape = $%s$\\'''
    fmtstr = prefmtstr.format(label=lbl, space=lll)
    latex_str = textwrap.dedent(fmtstr % fmttup).strip('\n') + '\n'
    return latex_str
Ejemplo n.º 16
0
def wx_len_stats(wx2_xxx):
    """
    Example:
        >>> from ibeis.algo.hots.smk.smk_debug import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> from ibeis.algo.hots.smk import smk_repr
        >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe()
        >>> qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids)
        >>> qparams = qreq_.qparams
        >>> invindex = smk_repr.index_data_annots(annots_df, daids, words)
        >>> qaid = qaids[0]
        >>> wx2_qrvecs, wx2_qaids, wx2_qfxs, query_sccw = smk_repr.new_qindex(annots_df, qaid, invindex, qparams)
        >>> print(ut.dict_str(wx2_rvecs_stats(wx2_qrvecs)))
    """
    import utool as ut
    if wx2_xxx is None:
        return 'None'
    if isinstance(wx2_xxx, dict):
        #len_list = [len(xxx) for xxx in ]
        val_list = wx2_xxx.values()
    else:
        val_list = wx2_xxx
    try:
        len_list = [len(xxx) for xxx in val_list]
        statdict = ut.get_stats(len_list)
        return ut.dict_str(statdict, strvals=True, newlines=False)
    except Exception as ex:
        ut.printex(ex)
        for count, xxx in wx2_xxx:
            try:
                len(xxx)
            except Exception:
                print('failed on count=%r' % (count,))
                print('failed on xxx=%r' % (xxx,))
                pass
        raise
Ejemplo n.º 17
0
def wx_len_stats(wx2_xxx):
    """
    Example:
        >>> from ibeis.algo.hots.smk.smk_debug import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> from ibeis.algo.hots.smk import smk_repr
        >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe()
        >>> qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids)
        >>> qparams = qreq_.qparams
        >>> invindex = smk_repr.index_data_annots(annots_df, daids, words)
        >>> qaid = qaids[0]
        >>> wx2_qrvecs, wx2_qaids, wx2_qfxs, query_sccw = smk_repr.new_qindex(annots_df, qaid, invindex, qparams)
        >>> print(ut.dict_str(wx2_rvecs_stats(wx2_qrvecs)))
    """
    import utool as ut
    if wx2_xxx is None:
        return 'None'
    if isinstance(wx2_xxx, dict):
        #len_list = [len(xxx) for xxx in ]
        val_list = wx2_xxx.values()
    else:
        val_list = wx2_xxx
    try:
        len_list = [len(xxx) for xxx in val_list]
        statdict = ut.get_stats(len_list)
        return ut.dict_str(statdict, strvals=True, newlines=False)
    except Exception as ex:
        ut.printex(ex)
        for count, xxx in wx2_xxx:
            try:
                len(xxx)
            except Exception:
                print('failed on count=%r' % (count, ))
                print('failed on xxx=%r' % (xxx, ))
                pass
        raise
Ejemplo n.º 18
0
def get_timestats_dict(unixtime_list, full=True):
    import utool as ut
    unixtime_stats = ut.get_stats(unixtime_list, use_nan=True)
    datetime_stats = {}
    for key in ['max', 'min', 'mean']:
        try:
            datetime_stats[key] = ut.unixtime_to_datetime(unixtime_stats[key])
        except KeyError:
            pass
    for key in ['std']:
        try:
            datetime_stats[key] = str(ut.get_unix_timedelta(int(round(unixtime_stats[key]))))
        except KeyError:
            pass
    try:
        datetime_stats['range'] = str(ut.get_unix_timedelta(int(round(unixtime_stats['max'] - unixtime_stats['min']))))
    except KeyError:
        pass

    if full:
        for key in ['nMax', 'num_nan', 'shape', 'nMin']:
            datetime_stats[key] = unixtime_stats[key]
    #print('Unused keys = %r' % (set(unixtime_stats.keys()) - set(datetime_stats.keys()),))
    return datetime_stats
Ejemplo n.º 19
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m ibeis.other.dbinfo split_analysis --show
        python -m ibeis split_analysis --show
        python -m ibeis split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    #nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime
    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        #'view': ['right'],
        #'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        })
    )
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        })
    )
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    print('%d annots on day 1' % (len(aids1)) )
    print('%d annots on day 2' % (len(aids2)) )
    print('%d annots overall' % (len(all_annots)) )
    print('%d names overall' % (len(ut.unique(all_annots.nids))) )

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool_ibeis as vt
    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    print('#nan_idx = %r' % (len(nan_idx),))
    print('#inf_idx = %r' % (len(inf_idx),))
    print('#ok_idx = %r' % (len(ok_idx),))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    #idx = vt.find_elbow_point(sorted_speeds)
    #EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    #ZEBRA_SPEED_MAX  = 64  # km/h
    #ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN  = 20  # km/h
    #ZEBRA_SPEED_FAST_WALK = 10  # km/h
    #ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    #MAX_SPEED = ZEBRA_SPEED_WALK
    #MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    print('MAX_SPEED = %r km/h' % (MAX_SPEED,))
    print('%d annots with infinite speed' % (len(inf_annots),))
    print('%d annots with large speed' % (len(flagged_ok_annots),))
    print('Marking all pairs of annots above the threshold as non-matching')

    from ibeis.algo.graph import graph_iden
    import networkx as nx
    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]
        good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),))
    print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),))

    if 1:
        from ibeis.viz import viz_graph2
        import guitool_ibeis as gt
        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            print('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            print('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            #am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                          for tag in positive_tags]
            print('edge_case_hist: ' + ut.repr3(
                ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),))
            print('--- Sampling wrt edges ---')
            print('edge_sample_size  = %r' % (edge_sample_size,))
            print('edge_population_size = %r' % (len(aid_pairs),))
            print('num_positive_edges = %r' % (sum(is_positive)))
            print('--- Sampling wrt names ---')
            print('name_population_size = %r' % (pop,))
            vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95)

        nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    #num_positive = 0
    #for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    #print('num_positive = %r' % (num_positive,))
    #pop = len(infr_list)
    #print('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges})
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges})

    #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                ('frac_nonmatch_edges',  state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    print('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    print('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (
            key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges'))
    num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3
              for sizes in new_sizes_list]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    print('#reasonable_infr = %r' % (len(reasonable_infr),))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    #import scipy.stats as st
    #conf_interval = .95
    #st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    #zval = 1.96  # 95 percent confidence
    #zValC = 3.8416  #
    #zValC = 6.6564

    #import statsmodels.stats.api as sms
    #es = sms.proportion_effectsize(0.5, 0.75)
    #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = .95
    #conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level)
    print('---')

    vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95)

    pop = 279
    #err_frac = .05  # 5%
    err_frac = .10  # 10%
    conf_level = .95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2)
    vt.calc_sample_from_error_bars(.10, pop, conf_level=.68)

    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
Ejemplo n.º 20
0
def compute_occurrence_groups(ibs, gid_list, cluster_algo, cfgdict={}, use_gps=False, verbose=None):
    r"""
    Args:
        ibs (IBEISController):  ibeis controller object
        gid_list (list):

    Returns:
        tuple: (None, None)

    CommandLine:
        python -m ibeis --tf compute_occurrence_groups
        python -m ibeis --tf compute_occurrence_groups --show --zoom=.3

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_occurrence import *  # NOQA
        >>> import ibeis
        >>> import vtool as vt
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> gid_list = ibs.get_valid_gids(require_unixtime=True, require_gps=True)
        >>> use_gps = True
        >>> cluster_algo = 'meanshift'
        >>> cfgdict = dict(quantile=.005, min_imgs_per_occurence=2)
        >>> (occur_labels, occur_gids) = compute_occurrence_groups(ibs, gid_list, cluster_algo, cfgdict, use_gps=use_gps)
        >>> aidsgroups_list = ibs.unflat_map(ibs.get_image_aids, occur_gids)
        >>> aids_list = list(map(ut.flatten, aidsgroups_list))
        >>> nids_list = list(map(np.array, ibs.unflat_map(ibs.get_annot_name_rowids, aids_list)))
        >>> metric = [len(np.unique(nids[nids > -1])) for nids in nids_list]
        >>> metric = [vt.safe_max(np.array(ut.dict_hist(nids).values())) for nids in nids_list]
        >>> #metric = list(map(len, aids_list))
        >>> sortx = ut.list_argsort(metric)[::-1]
        >>> index = sortx[20]
        >>> #gids = occur_gids[index]
        >>> aids = aids_list[index]
        >>> aids = ibs.filter_annots_general(aids, min_qual='ok', is_known=True)
        >>> gids = list(set(ibs.get_annot_gids(aids)))
        >>> print('len(aids) = %r' % (len(aids),))
        >>> img_list = ibs.get_images(gids)
        >>> ut.quit_if_noshow()
        >>> from ibeis.viz import viz_graph
        >>> import plottool as pt
        >>> #pt.imshow(bigimg)
        >>> #aids = ibs.group_annots_by_name(aids)[0][0]
        >>> self = viz_graph.make_name_graph_interaction(ibs, aids=aids,
        >>>                                              with_all=False,
        >>>                                              prog='neato')
        >>> ut.show_if_requested()

        ibs.unflat_map(ibs.get_annot_case_tags, aids_list)
        ibs.filter_aidpairs_by_tags(has_any='photobomb')

        photobomb_aids = ibs.filter_aidpairs_by_tags(has_any='photobomb')
        aids = photobomb_aids[0:10].flatten()
        _gt_aids = ibs.get_annot_groundtruth(aids)
        gt_aids = ut.get_list_column_slice(_gt_aids, slice(0, 3))
        aid_set = np.unique(np.append(aids.flatten(), ut.flatten(gt_aids)))
        aid_set = ibs.filter_annots_general(aid_set, minqual='ok')

        # This is the set of annotations used for testing intraoccurrence photobombs
        #print(ut.repr3(ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=aid_set), strvals=True, nl=1))
        print(ut.repr3(ibs.get_annot_stats_dict(aid_set, forceall=True), strvals=True, nl=1))

    """
    if verbose is None:
        verbose = ut.NOT_QUIET
    # Config info
    gid_list = np.unique(gid_list)
    if verbose:
        print("[occur] Computing %r occurrences on %r images." % (cluster_algo, len(gid_list)))
    if len(gid_list) == 0:
        print("[occur] WARNING: len(gid_list) == 0. " "No images to compute occurrences with")
        occur_labels, occur_gids = [], []
    else:
        if len(gid_list) == 1:
            print("[occur] WARNING: custering 1 image into its own occurrence")
            gid_arr = np.array(gid_list)
            label_arr = np.zeros(gid_arr.shape)
        else:
            X_data, gid_arr = prepare_X_data(ibs, gid_list, use_gps=use_gps)
            # Agglomerative clustering of unixtimes
            if cluster_algo == "agglomerative":
                seconds_thresh = cfgdict.get("seconds_thresh", 60.0)
                label_arr = agglomerative_cluster_occurrences(X_data, seconds_thresh)
            elif cluster_algo == "meanshift":
                quantile = cfgdict.get("quantile", 0.01)
                label_arr = meanshift_cluster_occurrences(X_data, quantile)
            else:
                raise AssertionError("[occurrence] Uknown clustering algorithm: %r" % cluster_algo)
        # Group images by unique label
        labels, label_gids = group_images_by_label(label_arr, gid_arr)
        # Remove occurrences less than the threshold
        occur_labels = labels
        occur_gids = label_gids
        occur_unixtimes = compute_occurrence_unixtime(ibs, occur_gids)
        min_imgs_per_occurence = cfgdict.get("min_imgs_per_occurence", 1)
        occur_labels, occur_gids = filter_and_relabel(labels, label_gids, min_imgs_per_occurence, occur_unixtimes)
        if verbose:
            print("[occur] Found %d clusters." % len(occur_labels))
        if len(label_gids) > 0 and verbose:
            print("[occur] Cluster image size stats:")
            ut.print_dict(ut.get_stats(list(map(len, occur_gids)), use_median=True, use_sum=True), "occur image stats")
    return occur_labels, occur_gids
Ejemplo n.º 21
0
def find_consistent_labeling(grouped_oldnames,
                             extra_prefix='_extra_name',
                             verbose=False):
    r"""
    Solves a a maximum bipirtite matching problem to find a consistent
    name assignment that minimizes the number of annotations with different
    names. For each new grouping of annotations we assign

    For each group of annotations we must assign them all the same name, either from




    To reduce the running time

    Args:
        gropued_oldnames (list): A group of old names where the grouping is
            based on new names. For instance:

                Given:
                    aids      = [1, 2, 3, 4, 5]
                    old_names = [0, 1, 1, 1, 0]
                    new_names = [0, 0, 1, 1, 0]

                The grouping is
                    [[0, 1, 0], [1, 1]]

                This lets us keep the old names in a split case and
                re-use exising names and make minimal changes to
                current annotation names while still being consistent
                with the new and improved grouping.

                The output will be:
                    [0, 1]

                Meaning that all annots in the first group are assigned the
                name 0 and all annots in the second group are assigned the name
                1.

    References:
        http://stackoverflow.com/questions/1398822/assignment-problem-numpy

    CommandLine:
        python -m ibeis.scripts.name_recitifer find_consistent_labeling


    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = testdata_oldnames(25, 15,  5, n_per_incon=5)
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)
        >>> grouped_oldnames = testdata_oldnames(0, 15,  5, n_per_incon=1)
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)
        >>> grouped_oldnames = testdata_oldnames(0, 0, 0, n_per_incon=1)
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> ydata = []
        >>> xdata = list(range(10, 150, 50))
        >>> for x in xdata:
        >>>     print('x = %r' % (x,))
        >>>     grouped_oldnames = testdata_oldnames(x, 15,  5, n_per_incon=5)
        >>>     t = ut.Timerit(3, verbose=1)
        >>>     for timer in t:
        >>>         with timer:
        >>>             new_names = find_consistent_labeling(grouped_oldnames)
        >>>     ydata.append(t.ave_secs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> pt.qtensure()
        >>> pt.multi_plot(xdata, [ydata])
        >>> ut.show_if_requested()

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']]
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['a', 'b', 'e']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['b', 'a', '_extra_name0']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b'], ['e'], ['a', 'a', 'b'], [], ['a'], ['d']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['b', 'e', 'a', '_extra_name0', '_extra_name1', 'd']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [[], ['a', 'a'], [],
        >>>                     ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['_extra_name0', 'a', '_extra_name1', 'b', '_extra_name2']
    """
    unique_old_names = ut.unique(ut.flatten(grouped_oldnames))
    n_old_names = len(unique_old_names)
    n_new_names = len(grouped_oldnames)

    # Initialize assignment to all Nones
    assignment = [None for _ in range(n_new_names)]

    if verbose:
        print('finding maximally consistent labeling')
        print('n_old_names = %r' % (n_old_names, ))
        print('n_new_names = %r' % (n_new_names, ))

    # For each old_name, determine now many new_names use it.
    oldname_sets = list(map(set, grouped_oldnames))
    oldname_usage = ut.dict_hist(ut.flatten(oldname_sets))

    # Any name used more than once is a conflict and must be resolved
    conflict_oldnames = {k for k, v in oldname_usage.items() if v > 1}

    # Partition into trivial and non-trivial cases
    nontrivial_oldnames = []
    nontrivial_new_idxs = []

    trivial_oldnames = []
    trivial_new_idxs = []
    for new_idx, group in enumerate(grouped_oldnames):
        if set(group).intersection(conflict_oldnames):
            nontrivial_oldnames.append(group)
            nontrivial_new_idxs.append(new_idx)
        else:
            trivial_oldnames.append(group)
            trivial_new_idxs.append(new_idx)

    # Rectify trivial cases
    # Any new-name that does not share any of its old-names with other
    # new-names can be resolved trivially
    n_trivial_unchanged = 0
    n_trivial_ignored = 0
    n_trivial_merges = 0
    for group, new_idx in zip(trivial_oldnames, trivial_new_idxs):
        if len(group) > 0:
            # new-names that use more than one old-name are simple merges
            h = ut.dict_hist(group)
            if len(h) > 1:
                n_trivial_merges += 1
            else:
                n_trivial_unchanged += 1
            hitems = list(h.items())
            hvals = [i[1] for i in hitems]
            maxval = max(hvals)
            g = min([k for k, v in hitems if v == maxval])
            assignment[new_idx] = g
        else:
            # new-names that use no old-names can be ignored
            n_trivial_ignored += 1

    if verbose:
        n_trivial = len(trivial_oldnames)
        n_nontrivial = len(nontrivial_oldnames)
        print('rectify %d trivial groups' % (n_trivial, ))
        print('  * n_trivial_unchanged = %r' % (n_trivial_unchanged, ))
        print('  * n_trivial_merges = %r' % (n_trivial_merges, ))
        print('  * n_trivial_ignored = %r' % (n_trivial_ignored, ))
        print('rectify %d non-trivial groups' % (n_nontrivial, ))

    # Partition nontrivial_oldnames into smaller disjoint sets
    nontrivial_oldnames_sets = list(map(set, nontrivial_oldnames))
    import networkx as nx
    g = nx.Graph()
    g.add_nodes_from(range(len(nontrivial_oldnames_sets)))
    for u, group1 in enumerate(nontrivial_oldnames_sets):
        rest = nontrivial_oldnames_sets[u + 1:]
        for v, group2 in enumerate(rest, start=u + 1):
            if group1.intersection(group2):
                g.add_edge(u, v)
    nontrivial_partition = list(nx.connected_components(g))
    if verbose:
        print('  * partitioned non-trivial into %d subgroups' %
              (len(nontrivial_partition)))
        part_size_stats = ut.get_stats(map(len, nontrivial_partition))
        stats_str = ut.repr2(part_size_stats, precision=2, strkeys=True)
        print('  * partition size stats = %s' % (stats_str, ))

    # Rectify nontrivial cases
    for part_idxs in ut.ProgIter(nontrivial_partition,
                                 labels='rectify parts',
                                 enabled=verbose):
        part_oldnames = ut.take(nontrivial_oldnames, part_idxs)
        part_newidxs = ut.take(nontrivial_new_idxs, part_idxs)
        # Rectify this part
        assignment_ = simple_munkres(part_oldnames)
        for new_idx, new_name in zip(part_newidxs, assignment_):
            assignment[new_idx] = new_name

    # Any unassigned name is now given a new unique label with a prefix
    if extra_prefix is not None:
        num_extra = 0
        for idx, val in enumerate(assignment):
            if val is None:
                assignment[idx] = '%s%d' % (
                    extra_prefix,
                    num_extra,
                )
                num_extra += 1
    return assignment
Ejemplo n.º 22
0
def get_dbinfo(ibs, verbose=True,
               with_imgsize=False,
               with_bytes=False,
               with_contrib=False,
               with_agesex=False,
               with_header=True,
               short=False,
               tag='dbinfo',
               aid_list=None):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    CommandLine:
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0
        python -m ibeis.other.dbinfo --test-get_dbinfo:1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from ibeis.expt import cfghelpers
        >>> #from ibeis.expt import annotation_configs
        >>> #from ibeis.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> verbose = True
        >>> short = True
        >>> #ibs = ibeis.opendb(db='GZ_ALL')
        >>> #ibs = ibeis.opendb(db='PZ_Master0')
        >>> ibs = ibeis.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = ibeis.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            print('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from ibeis.expt import experiment_helpers
            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list)
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            #aid_list =
        if verbose:
            print('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) -
            {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    #associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    FILTER_HACK = True
    if FILTER_HACK:
        # HUGE HACK - get only images and names with filtered aids
        valid_aids_ = ibs.filter_aids_custom(valid_aids)
        valid_nids_ = ibs.filter_nids_custom(valid_nids)
        valid_gids_ = ibs.filter_gids_custom(valid_gids)
        if verbose:
            print('Filtered %d names' % (len(valid_nids) - len(valid_nids_)))
            print('Filtered %d images' % (len(valid_gids) - len(valid_gids_)))
            print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_)))
        valid_gids = valid_gids_
        valid_nids = valid_nids_
        valid_aids = valid_aids_
        #associated_nids = ut.compress(associated_nids, map(any,
        #ibs.unflat_map(ibs.get_annot_custom_filterflags,
        #               ibs.get_name_aids(associated_nids))))

    # Image info
    if verbose:
        print('Checking Image Info')
    gx2_aids = ibs.get_image_aids(valid_gids)
    if FILTER_HACK:
        gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids]  # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats  = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True)
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        print('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if FILTER_HACK:
        nx2_aids =  [ibs.filter_aids_custom(aids) for aids in nx2_aids]    # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    # Occurrence Info
    def compute_annot_occurrence_ids(ibs, aid_list):
        from ibeis.algo.preproc import preproc_occurrence
        gid_list = ibs.get_annot_gids(aid_list)
        gid2_aids = ut.group_items(aid_list, gid_list)
        flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
        occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
        occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
        return occurid2_aids

    import utool
    with utool.embed_on_exception_context:
        occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
        occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
        occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
        nid2_occurxs = ut.ddict(list)
        for occurx, nids in enumerate(occur_unique_nids):
            for nid in nids:
                nid2_occurxs[nid].append(occurx)

    nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1}
    nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1}
    singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

    singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True)
    resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True)

    try:
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0)
        undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False)
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)

        num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1]))
        pair_tag_info['num_reviewed'] = num_reviewed_pairs
    except Exception:
        pair_tag_info = {}

    #print(ut.dict_str(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        print('Checking Annot Species')
    unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids))
    species_list = ibs.get_annot_species_texts(valid_aids)
    species2_aids = ut.group_items(valid_aids, species_list)
    species2_nAids = {key: len(val) for key, val in species2_aids.items()}

    if verbose:
        print('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs  = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs      = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        print('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            print('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)
        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = OrderedDict(
                [( 'max', wh_list.max(0)),
                 ( 'min', wh_list.min(0)),
                 ('mean', wh_list.mean(0)),
                 ( 'std', wh_list.std(0))])
            def arr2str(var):
                return ('[' + (
                    ', '.join(list(map(lambda x: '%.1f' % x, var)))
                ) + ']')
            ret = (',\n    '.join([
                '%s:%s' % (key, arr2str(val))
                for key, val in stat_dict.items()
            ]))
            return '{\n    ' + ret + '\n}'

        print('reading image sizes')
        # Image size stats
        img_size_list  = ibs.get_image_sizes(valid_gids)
        img_size_stats  = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        print('Building Stats String')

    multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True)

    # Time stats
    unixtime_list = ibs.get_image_unixtime(valid_gids)
    unixtime_list = ut.list_replace(unixtime_list, -1, float('nan'))
    #valid_unixtime_list = [time for time in unixtime_list if time != -1]
    #unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list  = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda : 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (36 <= max_age or max_age is None):
                age_dict['Adult'] += 1
            else:
                print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
                age_dict['UNKNOWN'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
        sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
        # Filter 0's
        sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
        return sextext2_nAnnots

    if verbose:
        print('Checking Other Annot Stats')

    qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids)
    yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        print('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]
    image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags)
    contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags)

    contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}
    contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}

    contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)}
    contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)}

    if verbose:
        print('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton =  len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_aids)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            print('Checking Disk Space')
        ibsdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space    = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            print('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_aids)
            _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton
            _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            #if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(ex, keys=[
                '_num_names_total_check',
                'num_names',
                '_num_annots_total_check',
                'num_annots',
                'num_names_singleton',
                'num_names_multiton',
                'num_unknown_annots',
                'num_multiton_annots',
                'num_singleton_annots',
            ])
            raise

    # Get contributor statistics
    contrib_rowids = ibs.get_valid_contrib_rowids()
    num_contributors = len(contrib_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.dict_str(dict_)
        return align2(str_)

    header_block_lines = (
        [('+============================'), ] + (
            [
                ('+ singleton := single sighting'),
                ('+ multiton  := multiple sightings'),
                ('--' * num_tabs),
            ] if not short and with_header else []
        )
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = [
        ('--' * num_tabs),
        ('DB Bytes: '),
        ('     +- dbdir nBytes:         ' + dbdir_space),
        ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
        ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
        ('     |  |  +-cachedir nBytes: ' + cachedir_space),
    ] if with_bytes else []

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = [
        ('--' * num_tabs),
        ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
        ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
        ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
    ] if not short else []

    occurrence_block_lines = [
        ('--' * num_tabs),
        ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
        ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
        ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
    ] if not short else []

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = [
        '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
        '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
    ] if not short  and with_agesex else []

    contrib_block_lines = [
        '# Images per contributor       = ' + align_dict2(contrib_tag_to_nImages),
        '# Annots per contributor       = ' + align_dict2(contrib_tag_to_nAnnots),
        '# Quality per contributor      = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True),
        '# Viewpoint per contributor    = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True),
    ] if with_contrib else []

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None if short else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        #('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None if short else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines +
        bytes_block_lines +
        source_block_lines +
        name_block_lines +
        annot_block_lines +
        annot_per_basic_block_lines +
        occurrence_block_lines +
        annot_per_qualview_block_lines +
        annot_per_agesex_block_lines +
        img_block_lines +
        contrib_block_lines +
        imgsize_stat_lines +
        [('L============================'), ]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        print(info_str2)
    locals_ = locals()
    return locals_
Ejemplo n.º 23
0
def run_asmk_script():
    with ut.embed_on_exception_context:  # NOQA
        """
    >>> from wbia.algo.smk.script_smk import *
    """

  # NOQA

        # ==============================================
        # PREPROCESSING CONFIGURATION
        # ==============================================
        config = {
            # 'data_year': 2013,
            'data_year': None,
            'dtype': 'float32',
            # 'root_sift': True,
            'root_sift': False,
            # 'centering': True,
            'centering': False,
            'num_words': 2**16,
            # 'num_words': 1E6
            # 'num_words': 8000,
            'kmeans_impl': 'sklearn.mini',
            'extern_words': False,
            'extern_assign': False,
            'assign_algo': 'kdtree',
            'checks': 1024,
            'int_rvec': True,
            'only_xy': False,
        }
        # Define which params are relevant for which operations
        relevance = {}
        relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year']
        relevance['words'] = relevance['feats'] + [
            'num_words',
            'extern_words',
            'kmeans_impl',
        ]
        relevance['assign'] = relevance['words'] + [
            'checks',
            'extern_assign',
            'assign_algo',
        ]
        # relevance['ydata'] = relevance['assign'] + ['int_rvec']
        # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec']

        nAssign = 1

        class SMKCacher(ut.Cacher):
            def __init__(self, fname, ext='.cPkl'):
                relevant_params = relevance[fname]
                relevant_cfg = ut.dict_subset(config, relevant_params)
                cfgstr = ut.get_cfg_lbl(relevant_cfg)
                dbdir = ut.truepath('/raid/work/Oxford/')
                super(SMKCacher, self).__init__(fname,
                                                cfgstr,
                                                cache_dir=dbdir,
                                                ext=ext)

        # ==============================================
        # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES
        # ==============================================
        if config['data_year'] == 2007:
            data = load_oxford_2007()
        elif config['data_year'] == 2013:
            data = load_oxford_2013()
        elif config['data_year'] is None:
            data = load_oxford_wbia()

        offset_list = data['offset_list']
        all_kpts = data['all_kpts']
        raw_vecs = data['all_vecs']
        query_uri_order = data['query_uri_order']
        data_uri_order = data['data_uri_order']
        # del data

        # ================
        # PRE-PROCESS
        # ================
        import vtool as vt

        # Alias names to avoid errors in interactive sessions
        proc_vecs = raw_vecs
        del raw_vecs

        feats_cacher = SMKCacher('feats', ext='.npy')
        all_vecs = feats_cacher.tryload()
        if all_vecs is None:
            if config['dtype'] == 'float32':
                logger.info('Converting vecs to float32')
                proc_vecs = proc_vecs.astype(np.float32)
            else:
                proc_vecs = proc_vecs
                raise NotImplementedError('other dtype')

            if config['root_sift']:
                with ut.Timer('Apply root sift'):
                    np.sqrt(proc_vecs, out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['centering']:
                with ut.Timer('Apply centering'):
                    mean_vec = np.mean(proc_vecs, axis=0)
                    # Center and then re-normalize
                    np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['dtype'] == 'int8':
                smk_funcs

            all_vecs = proc_vecs
            feats_cacher.save(all_vecs)
        del proc_vecs

        # =====================================
        # BUILD VISUAL VOCABULARY
        # =====================================
        if config['extern_words']:
            words = data['words']
            assert config['num_words'] is None or len(
                words) == config['num_words']
        else:
            word_cacher = SMKCacher('words')
            words = word_cacher.tryload()
            if words is None:
                with ut.embed_on_exception_context:
                    if config['kmeans_impl'] == 'sklearn.mini':
                        import sklearn.cluster

                        rng = np.random.RandomState(13421421)
                        # init_size = int(config['num_words'] * 8)
                        init_size = int(config['num_words'] * 4)
                        # converged after 26043 iterations
                        clusterer = sklearn.cluster.MiniBatchKMeans(
                            config['num_words'],
                            init_size=init_size,
                            batch_size=1000,
                            compute_labels=False,
                            max_iter=20,
                            random_state=rng,
                            n_init=1,
                            verbose=1,
                        )
                        clusterer.fit(all_vecs)
                        words = clusterer.cluster_centers_
                    elif config['kmeans_impl'] == 'yael':
                        from yael import ynumpy

                        centroids, qerr, dis, assign, nassign = ynumpy.kmeans(
                            all_vecs,
                            config['num_words'],
                            init='kmeans++',
                            verbose=True,
                            output='all',
                        )
                        words = centroids
                    word_cacher.save(words)

        # =====================================
        # ASSIGN EACH VECTOR TO ITS NEAREST WORD
        # =====================================
        if config['extern_assign']:
            assert config[
                'extern_words'], 'need extern cluster to extern assign'
            idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2)
            idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32)
            idx_to_wxs = np.ma.array(idx_to_wxs)
            idx_to_maws = np.ma.array(idx_to_maws)
        else:
            from wbia.algo.smk import vocab_indexer

            vocab = vocab_indexer.VisualVocab(words)
            dassign_cacher = SMKCacher('assign')
            assign_tup = dassign_cacher.tryload()
            if assign_tup is None:
                vocab.flann_params['algorithm'] = config['assign_algo']
                vocab.build()
                # Takes 12 minutes to assign jegous vecs to 2**16 vocab
                with ut.Timer('assign vocab neighbors'):
                    _idx_to_wx, _idx_to_wdist = vocab.nn_index(
                        all_vecs, nAssign, checks=config['checks'])
                    if nAssign > 1:
                        idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns(
                            _idx_to_wx,
                            _idx_to_wdist,
                            massign_alpha=1.2,
                            massign_sigma=80.0,
                            massign_equal_weights=True,
                        )
                    else:
                        idx_to_wxs = np.ma.masked_array(_idx_to_wx,
                                                        fill_value=-1)
                        idx_to_maws = np.ma.ones(idx_to_wxs.shape,
                                                 fill_value=-1,
                                                 dtype=np.float32)
                        idx_to_maws.mask = idx_to_wxs.mask
                assign_tup = (idx_to_wxs, idx_to_maws)
                dassign_cacher.save(assign_tup)

        idx_to_wxs, idx_to_maws = assign_tup

        # Breakup vectors, keypoints, and word assignments by annotation
        wx_lists = [
            idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        maw_lists = [
            idx_to_maws[left:right] for left, right in ut.itertwo(offset_list)
        ]
        vecs_list = [
            all_vecs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        kpts_list = [
            all_kpts[left:right] for left, right in ut.itertwo(offset_list)
        ]

        # =======================
        # FIND QUERY SUBREGIONS
        # =======================

        ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots(
            data_uri_order, query_uri_order)
        daids = data_annots.aids
        qaids = query_annots.aids

        query_super_kpts = ut.take(kpts_list, qx_to_dx)
        query_super_vecs = ut.take(vecs_list, qx_to_dx)
        query_super_wxs = ut.take(wx_lists, qx_to_dx)
        query_super_maws = ut.take(maw_lists, qx_to_dx)
        # Mark which keypoints are within the bbox of the query
        query_flags_list = []
        only_xy = config['only_xy']
        for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes):
            flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy)
            query_flags_list.append(flags)

        logger.info('Queries are crops of existing database images.')
        logger.info('Looking at average percents')
        percent_list = [
            flags_.sum() / flags_.shape[0] for flags_ in query_flags_list
        ]
        percent_stats = ut.get_stats(percent_list)
        logger.info('percent_stats = %s' % (ut.repr4(percent_stats), ))

        import vtool as vt

        query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0)
        query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0)
        query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0)
        query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0)

        # =======================
        # CONSTRUCT QUERY / DATABASE REPR
        # =======================

        # int_rvec = not config['dtype'].startswith('float')
        int_rvec = config['int_rvec']

        X_list = []
        _prog = ut.ProgPartial(length=len(qaids),
                               label='new X',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(
                zip(qaids, query_wxs, query_maws)):
            X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            X_list.append(X)

        # ydata_cacher = SMKCacher('ydata')
        # Y_list = ydata_cacher.tryload()
        # if Y_list is None:
        Y_list = []
        _prog = ut.ProgPartial(length=len(daids),
                               label='new Y',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists,
                                                    maw_lists)):
            Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            Y_list.append(Y)
        # ydata_cacher.save(Y_list)

        # ======================
        # Add in some groundtruth

        logger.info('Add in some groundtruth')
        for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)):
            Y.nid = nid

        for X, nid in zip(X_list, ibs.get_annot_nids(qaids)):
            X.nid = nid

        for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)):
            Y.qual = qual

        # ======================
        # Add in other properties
        for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list):
            Y.vecs = vecs
            Y.kpts = kpts

        imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images')
        for Y, imgid in zip(Y_list, data_uri_order):
            gpath = ut.unixjoin(imgdir, imgid + '.jpg')
            Y.gpath = gpath

        for X, vecs, kpts in zip(X_list, query_vecs, query_kpts):
            X.kpts = kpts
            X.vecs = vecs

        # ======================
        logger.info('Building inverted list')
        daids = [Y.aid for Y in Y_list]
        # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list]))
        wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list]))
        assert daids == data_annots.aids
        assert len(wx_list) <= config['num_words']

        wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list],
                                            all_wxs=wx_list)

        # Compute IDF weights
        logger.info('Compute IDF weights')
        ndocs_total = len(daids)
        # Use only the unique number of words
        ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list])
        logger.info('ndocs_perword stats: ' +
                    ut.repr4(ut.get_stats(ndocs_per_word)))
        idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word)
        wx_to_weight = dict(zip(wx_list, idf_per_word))
        logger.info('idf stats: ' +
                    ut.repr4(ut.get_stats(wx_to_weight.values())))

        # Filter junk
        Y_list_ = [Y for Y in Y_list if Y.qual != 'junk']

        # =======================
        # CHOOSE QUERY KERNEL
        # =======================
        params = {
            'asmk': dict(alpha=3.0, thresh=0.0),
            'bow': dict(),
            'bow2': dict(),
        }
        # method = 'bow'
        method = 'bow2'
        method = 'asmk'
        smk = SMK(wx_to_weight, method=method, **params[method])

        # Specific info for the type of query
        if method == 'asmk':
            # Make residual vectors
            if True:
                # The stacked way is 50x faster
                # TODO: extend for multi-assignment and record fxs
                flat_query_vecs = np.vstack(query_vecs)
                flat_query_wxs = np.vstack(query_wxs)
                flat_query_offsets = np.array(
                    [0] + ut.cumsum(ut.lmap(len, query_wxs)))

                flat_wxs_assign = flat_query_wxs
                flat_offsets = flat_query_offsets
                flat_vecs = flat_query_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)
                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list,
                                                   agg_flags_list):
                    X.agg_rvecs = agg_rvecs
                    X.agg_flags = agg_flags[:, None]

                flat_wxs_assign = idx_to_wxs
                flat_offsets = offset_list
                flat_vecs = all_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)

                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list,
                                                   agg_flags_list):
                    Y.agg_rvecs = agg_rvecs
                    Y.agg_flags = agg_flags[:, None]
            else:
                # This non-stacked way is about 500x slower
                _prog = ut.ProgPartial(label='agg Y rvecs',
                                       bs=True,
                                       adjust=True)
                for Y in _prog(Y_list_):
                    make_agg_vecs(Y, words, Y.vecs)

                _prog = ut.ProgPartial(label='agg X rvecs',
                                       bs=True,
                                       adjust=True)
                for X in _prog(X_list):
                    make_agg_vecs(X, words, X.vecs)
        elif method == 'bow2':
            # Hack for orig tf-idf bow vector
            nwords = len(words)
            for X in ut.ProgIter(X_list, label='make bow vector'):
                ensure_tf(X)
                bow_vector(X, wx_to_weight, nwords)

            for Y in ut.ProgIter(Y_list_, label='make bow vector'):
                ensure_tf(Y)
                bow_vector(Y, wx_to_weight, nwords)

        if method != 'bow2':
            for X in ut.ProgIter(X_list, 'compute X gamma'):
                X.gamma = smk.gamma(X)
            for Y in ut.ProgIter(Y_list_, 'compute Y gamma'):
                Y.gamma = smk.gamma(Y)

        # Execute matches (could go faster by enumerating candidates)
        scores_list = []
        for X in ut.ProgIter(X_list, label='query %s' % (smk, )):
            scores = [smk.kernel(X, Y) for Y in Y_list_]
            scores = np.array(scores)
            scores = np.nan_to_num(scores)
            scores_list.append(scores)

        import sklearn.metrics

        avep_list = []
        _iter = list(zip(scores_list, X_list))
        _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, ))
        for scores, X in _iter:
            truth = [X.nid == Y.nid for Y in Y_list_]
            avep = sklearn.metrics.average_precision_score(truth, scores)
            avep_list.append(avep)
        avep_list = np.array(avep_list)
        mAP = np.mean(avep_list)
        logger.info('mAP  = %r' % (mAP, ))
Ejemplo n.º 24
0
def latex_dbstats(ibs_list, **kwargs):
    r"""
    Args:
        ibs (IBEISController):  wbia controller object

    CommandLine:
        python -m wbia.other.dbinfo --exec-latex_dbstats --dblist testdb1
        python -m wbia.other.dbinfo --exec-latex_dbstats --dblist testdb1 --show
        python -m wbia.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 testdb1 --show
        python -m wbia.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 PZ_MTEST GZ_ALL --show
        python -m wbia.other.dbinfo --test-latex_dbstats --dblist GZ_ALL NNP_MasterGIRM_core --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> db_list = ut.get_argval('--dblist', type_=list, default=['testdb1'])
        >>> ibs_list = [wbia.opendb(db=db) for db in db_list]
        >>> tabular_str = latex_dbstats(ibs_list)
        >>> tabular_cmd = ut.latex_newcommand(ut.latex_sanitize_command_name('DatabaseInfo'), tabular_str)
        >>> ut.copy_text_to_clipboard(tabular_cmd)
        >>> write_fpath = ut.get_argval('--write', type_=str, default=None)
        >>> if write_fpath is not None:
        >>>     fpath = ut.truepath(write_fpath)
        >>>     text = ut.readfrom(fpath)
        >>>     new_text = ut.replace_between_tags(text, tabular_cmd, '% <DBINFO>', '% </DBINFO>')
        >>>     ut.writeto(fpath, new_text)
        >>> ut.print_code(tabular_cmd, 'latex')
        >>> ut.quit_if_noshow()
        >>> ut.render_latex_text('\\noindent \n' + tabular_str)
    """
    import wbia

    # Parse for aids test data
    aids_list = [wbia.testdata_aids(ibs=ibs) for ibs in ibs_list]

    # dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False) for ibs in ibs_list]
    dbinfo_list = [
        get_dbinfo(ibs, with_contrib=False, verbose=False, aid_list=aids)
        for ibs, aids in zip(ibs_list, aids_list)
    ]

    # title = db_name + ' database statistics'
    title = 'Database statistics'
    stat_title = '# Annotations per name (multiton)'

    # col_lbls = [
    #    'multiton',
    #    #'singleton',
    #    'total',
    #    'multiton',
    #    'singleton',
    #    'total',
    # ]
    key_to_col_lbls = {
        'num_names_multiton': 'multiton',
        'num_names_singleton': 'singleton',
        'num_names': 'total',
        'num_multiton_annots': 'multiton',
        'num_singleton_annots': 'singleton',
        'num_unknown_annots': 'unknown',
        'num_annots': 'total',
    }
    # Structure of columns / multicolumns
    multi_col_keys = [
        (
            '# Names',
            (
                'num_names_multiton',
                # 'num_names_singleton',
                'num_names',
            ),
        ),
        (
            '# Annots',
            (
                'num_multiton_annots',
                'num_singleton_annots',
                # 'num_unknown_annots',
                'num_annots',
            ),
        ),
    ]
    # multicol_lbls = [('# Names', 3), ('# Annots', 3)]
    multicol_lbls = [(mcolname, len(mcols)) for mcolname, mcols in multi_col_keys]

    # Flatten column labels
    col_keys = ut.flatten(ut.get_list_column(multi_col_keys, 1))
    col_lbls = ut.dict_take(key_to_col_lbls, col_keys)

    row_lbls = []
    row_values = []

    # stat_col_lbls = ['max', 'min', 'mean', 'std', 'nMin', 'nMax']
    stat_col_lbls = ['max', 'min', 'mean', 'std', 'med']
    # stat_row_lbls = ['# Annot per Name (multiton)']
    stat_row_lbls = []
    stat_row_values = []

    SINGLE_TABLE = False
    EXTRA = True

    for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
        row_ = ut.dict_take(dbinfo_locals, col_keys)
        dbname = ibs.get_dbname_alias()
        row_lbls.append(dbname)
        multiton_annot_stats = ut.get_stats(
            dbinfo_locals['multiton_nid2_nannots'], use_median=True, nl=1
        )
        stat_rows = ut.dict_take(multiton_annot_stats, stat_col_lbls)
        if SINGLE_TABLE:
            row_.extend(stat_rows)
        else:
            stat_row_lbls.append(dbname)
            stat_row_values.append(stat_rows)

        row_values.append(row_)

    CENTERLINE = False
    AS_TABLE = True
    tablekw = dict(
        astable=AS_TABLE,
        centerline=CENTERLINE,
        FORCE_INT=False,
        precision=2,
        col_sep='',
        multicol_sep='|',
        **kwargs
    )

    if EXTRA:
        extra_keys = [
            # 'species2_nAids',
            'qualtext2_nAnnots',
            'viewcode2_nAnnots',
        ]
        extra_titles = {
            'species2_nAids': 'Annotations per species.',
            'qualtext2_nAnnots': 'Annotations per quality.',
            'viewcode2_nAnnots': 'Annotations per viewpoint.',
        }
        extra_collbls = ut.ddict(list)
        extra_rowvalues = ut.ddict(list)
        extra_tables = ut.ddict(list)

        for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
            for key in extra_keys:
                extra_collbls[key] = ut.unique_ordered(
                    extra_collbls[key] + list(dbinfo_locals[key].keys())
                )

        extra_collbls['qualtext2_nAnnots'] = [
            'excellent',
            'good',
            'ok',
            'poor',
            'junk',
            'UNKNOWN',
        ]
        # extra_collbls['viewcode2_nAnnots'] = ['backleft', 'left', 'frontleft', 'front', 'frontright', 'right', 'backright', 'back', None]
        extra_collbls['viewcode2_nAnnots'] = [
            'BL',
            'L',
            'FL',
            'F',
            'FR',
            'R',
            'BR',
            'B',
            None,
        ]

        for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
            for key in extra_keys:
                extra_rowvalues[key].append(
                    ut.dict_take(dbinfo_locals[key], extra_collbls[key], 0)
                )

        qualalias = {'UNKNOWN': None}

        extra_collbls['viewcode2_nAnnots'] = [
            ibs.const.YAWALIAS.get(val, val) for val in extra_collbls['viewcode2_nAnnots']
        ]
        extra_collbls['qualtext2_nAnnots'] = [
            qualalias.get(val, val) for val in extra_collbls['qualtext2_nAnnots']
        ]

        for key in extra_keys:
            extra_tables[key] = ut.util_latex.make_score_tabular(
                row_lbls,
                extra_collbls[key],
                extra_rowvalues[key],
                title=extra_titles[key],
                col_align='r',
                table_position='[h!]',
                **tablekw
            )

    # tabular_str = util_latex.tabular_join(tabular_body_list)
    if SINGLE_TABLE:
        col_lbls += stat_col_lbls
        multicol_lbls += [(stat_title, len(stat_col_lbls))]

    count_tabular_str = ut.util_latex.make_score_tabular(
        row_lbls,
        col_lbls,
        row_values,
        title=title,
        multicol_lbls=multicol_lbls,
        table_position='[ht!]',
        **tablekw
    )

    # logger.info(row_lbls)

    if SINGLE_TABLE:
        tabular_str = count_tabular_str
    else:
        stat_tabular_str = ut.util_latex.make_score_tabular(
            stat_row_lbls,
            stat_col_lbls,
            stat_row_values,
            title=stat_title,
            col_align='r',
            table_position='[h!]',
            **tablekw
        )

        # Make a table of statistics
        if tablekw['astable']:
            tablesep = '\n%--\n'
        else:
            tablesep = '\\\\\n%--\n'
        if EXTRA:
            tabular_str = tablesep.join(
                [count_tabular_str, stat_tabular_str]
                + ut.dict_take(extra_tables, extra_keys)
            )
        else:
            tabular_str = tablesep.join([count_tabular_str, stat_tabular_str])

    return tabular_str
Ejemplo n.º 25
0
def get_dbinfo(
    ibs,
    verbose=True,
    with_imgsize=False,
    with_bytes=False,
    with_contrib=False,
    with_agesex=False,
    with_header=True,
    short=False,
    tag='dbinfo',
    aid_list=None,
    aids=None,
):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    SeeAlso:
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all

    CommandLine:
        python -m wbia.other.dbinfo --exec-get_dbinfo:0
        python -m wbia.other.dbinfo --test-get_dbinfo:1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = wbia.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from wbia.expt import cfghelpers
        >>> #from wbia.expt import annotation_configs
        >>> #from wbia.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> verbose = True
        >>> short = True
        >>> #ibs = wbia.opendb(db='GZ_ALL')
        >>> #ibs = wbia.opendb(db='PZ_Master0')
        >>> ibs = wbia.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = wbia.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...
    if aids is not None:
        aid_list = aids

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from wbia.expt import experiment_helpers

            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list
            )
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            # aid_list =
        if verbose:
            logger.info('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False))
            - {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    # associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    valid_images = ibs.images(valid_gids)
    valid_annots = ibs.annots(valid_aids)

    # Image info
    if verbose:
        logger.info('Checking Image Info')
    gx2_aids = valid_images.aids
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats = ut.repr4(
        ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True
    )
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        logger.info('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    if False:
        # Occurrence Info
        def compute_annot_occurrence_ids(ibs, aid_list):
            from wbia.algo.preproc import preproc_occurrence

            gid_list = ibs.get_annot_gids(aid_list)
            gid2_aids = ut.group_items(aid_list, gid_list)
            config = {'seconds_thresh': 4 * 60 * 60}
            flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences(
                ibs, gid_list, config=config, verbose=False
            )
            occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
            occurid2_aids = {
                oid: ut.flatten(ut.take(gid2_aids, gids))
                for oid, gids in occurid2_gids.items()
            }
            return occurid2_aids

        import utool

        with utool.embed_on_exception_context:
            occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
            occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
            occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
            nid2_occurxs = ut.ddict(list)
            for occurx, nids in enumerate(occur_unique_nids):
                for nid in nids:
                    nid2_occurxs[nid].append(occurx)

        nid2_occurx_single = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1
        }
        nid2_occurx_resight = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1
        }
        singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

        singlesight_annot_stats = ut.get_stats(
            list(map(len, singlesight_encounters)), use_median=True, use_sum=True
        )
        resight_name_stats = ut.get_stats(
            list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True
        )

    # Encounter Info
    def break_annots_into_encounters(aids):
        from wbia.algo.preproc import occurrence_blackbox
        import datetime

        thresh_sec = datetime.timedelta(minutes=30).seconds
        posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids))
        # latlons = ibs.get_annot_image_gps(aids)
        labels = occurrence_blackbox.cluster_timespace2(
            posixtimes, None, thresh_sec=thresh_sec
        )
        return labels
        # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()]
        # ut.square_pdist(ave_enc_time)

    try:
        am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[
            0
        ]
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids)
        undirected_tags = ibs.get_aidpair_tags(
            aid_pairs.T[0], aid_pairs.T[1], directed=False
        )
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)
    except Exception:
        pair_tag_info = {}

    # logger.info(ut.repr2(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        logger.info('Checking Annot Species')
    unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots))
    species_list = valid_annots.species_texts
    species2_annots = valid_annots.group_items(valid_annots.species_texts)
    species2_nAids = {key: len(val) for key, val in species2_annots.items()}

    if verbose:
        logger.info('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        logger.info('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            logger.info('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)

        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = collections.OrderedDict(
                [
                    ('max', wh_list.max(0)),
                    ('min', wh_list.min(0)),
                    ('mean', wh_list.mean(0)),
                    ('std', wh_list.std(0)),
                ]
            )

            def arr2str(var):
                return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']'

            ret = ',\n    '.join(
                ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()]
            )
            return '{\n    ' + ret + '\n}'

        logger.info('reading image sizes')
        # Image size stats
        img_size_list = ibs.get_image_sizes(valid_gids)
        img_size_stats = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        logger.info('Building Stats String')

    multiton_stats = ut.repr3(
        ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True
    )

    # Time stats
    unixtime_list = valid_images.unixtime2
    # valid_unixtime_list = [time for time in unixtime_list if time != -1]
    # unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda: 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if max_age is None:
                max_age = min_age
            if min_age is None:
                min_age = max_age
            if max_age is None and min_age is None:
                logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,))
                age_dict['UNKNOWN'] += 1
            elif (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (max_age is None or 36 <= max_age):
                age_dict['Adult'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(
            set(annot_sextext_list) - set(sex_keys)
        )
        sextext2_nAnnots = ut.odict(
            [(key, len(sextext2_aids.get(key, []))) for key in sex_keys]
        )
        # Filter 0's
        sextext2_nAnnots = {
            key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0
        }
        return sextext2_nAnnots

    def get_annot_qual_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        qualtext2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.quality_texts)),
            list(ibs.const.QUALITY_TEXT_TO_INT.keys()),
        )
        return qualtext2_nAnnots

    def get_annot_viewpoint_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        viewcode2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.viewpoint_code)),
            list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None],
        )
        return viewcode2_nAnnots

    if verbose:
        logger.info('Checking Other Annot Stats')

    qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids)
    viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        logger.info('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]

    image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags)
    contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags)

    contributor_tag_to_qualstats = {
        key: get_annot_qual_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }
    contributor_tag_to_viewstats = {
        key: get_annot_viewpoint_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }

    contributor_tag_to_nImages = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_gids)
    }
    contributor_tag_to_nAnnots = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_aids)
    }

    if verbose:
        logger.info('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton = len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_annots)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            logger.info('Checking Disk Space')
        ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            logger.info('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_annots)
            _num_names_total_check = (
                num_names_singleton + num_names_unassociated + num_names_multiton
            )
            _num_annots_total_check = (
                num_unknown_annots + num_singleton_annots + num_multiton_annots
            )
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            # if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(
                ex,
                keys=[
                    '_num_names_total_check',
                    'num_names',
                    '_num_annots_total_check',
                    'num_annots',
                    'num_names_singleton',
                    'num_names_multiton',
                    'num_unknown_annots',
                    'num_multiton_annots',
                    'num_singleton_annots',
                ],
            )
            raise

    # Get contributor statistics
    contributor_rowids = ibs.get_valid_contributor_rowids()
    num_contributors = len(contributor_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.repr2(dict_, si=True)
        return align2(str_)

    header_block_lines = [('+============================')] + (
        [
            ('+ singleton := single sighting'),
            ('+ multiton  := multiple sightings'),
            ('--' * num_tabs),
        ]
        if not short and with_header
        else []
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = (
        [
            ('--' * num_tabs),
            ('DB Bytes: '),
            ('     +- dbdir nBytes:         ' + dbdir_space),
            ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
            ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
            ('     |  |  +-cachedir nBytes: ' + cachedir_space),
        ]
        if with_bytes
        else []
    )

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = (
        [
            ('--' * num_tabs),
            ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
            ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
            ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
        ]
        if not short
        else []
    )

    occurrence_block_lines = (
        [
            ('--' * num_tabs),
            # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
            # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
            ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
        ]
        if not short
        else []
    )

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = (
        [
            '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
            '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
        ]
        if not short and with_agesex
        else []
    )

    contributor_block_lines = (
        [
            '# Images per contributor       = ' + align_dict2(contributor_tag_to_nImages),
            '# Annots per contributor       = ' + align_dict2(contributor_tag_to_nAnnots),
            '# Quality per contributor      = '
            + ut.repr2(contributor_tag_to_qualstats, sorted_=True),
            '# Viewpoint per contributor    = '
            + ut.repr2(contributor_tag_to_viewstats, sorted_=True),
        ]
        if with_contrib
        else []
    )

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None
        if short
        else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        # ('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None
        if short
        else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines
        + bytes_block_lines
        + source_block_lines
        + name_block_lines
        + annot_block_lines
        + annot_per_basic_block_lines
        + occurrence_block_lines
        + annot_per_qualview_block_lines
        + annot_per_agesex_block_lines
        + img_block_lines
        + contributor_block_lines
        + imgsize_stat_lines
        + [('L============================')]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        logger.info(info_str2)
    locals_ = locals()
    return locals_
Ejemplo n.º 26
0
def compute_occurrence_groups(ibs, gid_list, config={}, use_gps=False, verbose=None):
    r"""
    Args:
        ibs (IBEISController):  wbia controller object
        gid_list (list):

    Returns:
        tuple: (None, None)

    CommandLine:
        python -m wbia compute_occurrence_groups

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.preproc.preproc_occurrence import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(defaultdb='testdb1')
        >>> verbose = True
        >>> images = ibs.images()
        >>> gid_list = images.gids
        >>> config = {}  # wbia.algo.Config.OccurrenceConfig().asdict()
        >>> tup = wbia_compute_occurrences(ibs, gid_list)
        >>> (flat_imgsetids, flat_gids)
        >>> aids_list = list(ut.group_items(aid_list_, flat_imgsetids).values())
        >>> metric = list(map(len, aids_list))
        >>> sortx = ut.list_argsort(metric)[::-1]
        >>> index = sortx[1]
        >>> aids = aids_list[index]
        >>> gids = list(set(ibs.get_annot_gids(aids)))
    """
    if verbose is None:
        verbose = ut.NOT_QUIET
    # Config info
    gid_list = np.unique(gid_list)
    if verbose:
        logger.info('[occur] Computing occurrences on %r images.' % (len(gid_list)))
        logger.info('[occur] config = ' + ut.repr3(config))

    use_gps = config['use_gps']
    datas = prepare_X_data(ibs, gid_list, use_gps=use_gps)

    from wbia.algo.preproc import occurrence_blackbox

    cluster_algo = config.get('cluster_algo', 'agglomerative')
    km_per_sec = config.get('km_per_sec', occurrence_blackbox.KM_PER_SEC)
    thresh_sec = config.get('seconds_thresh', 30 * 60.0)
    min_imgs_per_occurence = config.get('min_imgs_per_occurence', 1)
    # 30 minutes = 3.6 kilometers
    # 5 minutes = 0.6 kilometers

    assert cluster_algo == 'agglomerative', 'only agglomerative is supported'

    # Group datas with different values separately
    all_gids = []
    all_labels = []
    for key in datas.keys():
        val = datas[key]
        gids, latlons, posixtimes = val
        labels = occurrence_blackbox.cluster_timespace_sec(
            latlons, posixtimes, thresh_sec, km_per_sec=km_per_sec
        )
        if labels is None:
            labels = np.zeros(len(gids), dtype=np.int)
        all_gids.append(gids)
        all_labels.append(labels)

    # Combine labels across different groups
    pads = [vt.safe_max(ys, fill=0) + 1 for ys in all_labels]
    offsets = np.array([0] + pads[:-1]).cumsum()
    all_labels_ = [ys + offset for ys, offset in zip(all_labels, offsets)]
    label_arr = np.array(ut.flatten(all_labels_))
    gid_arr = np.array(ut.flatten(all_gids))

    # Group images by unique label
    labels, label_gids = group_images_by_label(label_arr, gid_arr)
    # Remove occurrences less than the threshold
    occur_labels = labels
    occur_gids = label_gids
    occur_unixtimes = compute_occurrence_unixtime(ibs, occur_gids)
    occur_labels, occur_gids = filter_and_relabel(
        labels, label_gids, min_imgs_per_occurence, occur_unixtimes
    )
    if verbose:
        logger.info('[occur] Found %d clusters.' % len(occur_labels))
    if len(label_gids) > 0 and verbose:
        logger.info('[occur] Cluster image size stats:')
        ut.print_dict(
            ut.get_stats(list(map(len, occur_gids)), use_median=True, use_sum=True),
            'occur image stats',
        )
    return occur_labels, occur_gids
Ejemplo n.º 27
0
def get_toy_data_1vM(num_annots, num_names=None, **kwargs):
    r"""
    Args:
        num_annots (int):
        num_names (int): (default = None)

    Kwargs:
        initial_aids, initial_nids, nid_sequence, seed

    Returns:
        tuple: (pair_list, feat_list)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_annots = 1000
        >>> num_names = 40
        >>> get_toy_data_1vM(num_annots, num_names)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import vtool as vt
    tup_ = get_toy_annots(num_annots, num_names, **kwargs)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    rng = vt.ensure_rng(None)

    # Test a simple SVM classifier
    nid2_nexemp = ut.dict_hist(nids1)
    aid2_nid = dict(zip(aids, nids))

    ut.fix_embed_globals()

    #def add_to_globals(globals_, subdict):
    #    globals_.update(subdict)

    unique_nids = list(nid2_nexemp.keys())

    def annot_to_class_feats2(aid, aid2_nid, top=None):
        pair_list = []
        score_list = []
        nexemplar_list = []
        for nid in unique_nids:
            label = (aid2_nid[aid] == nid)
            num_exemplars = nid2_nexemp.get(nid, 0)
            if num_exemplars == 0:
                continue
            params = toy_params[label]
            mu, sigma = ut.dict_take(params, ['mu', 'sigma'])
            score_ = rng.normal(mu, sigma, size=num_exemplars).max()
            score = np.clip(score_, 0, np.inf)
            pair_list.append((aid, nid))
            score_list.append(score)
            nexemplar_list.append(num_exemplars)
        rank_list = ut.argsort(score_list, reverse=True)
        feat_list = np.array([score_list, rank_list, nexemplar_list]).T
        sortx = np.argsort(rank_list)
        feat_list = feat_list.take(sortx, axis=0)
        pair_list = np.array(pair_list).take(sortx, axis=0)
        if top is not None:
            feat_list = feat_list[:top]
            pair_list = pair_list[0:top]
        return pair_list, feat_list

    toclass_features = [
        annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids
    ]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    score_list = feat_list.T[0:1].T
    lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs]

    from sklearn import svm
    #clf1 = svm.LinearSVC()
    print('Learning classifiers')

    clf3 = svm.SVC(probability=True)
    clf3.fit(feat_list, lbl_list)
    #prob_true, prob_false = clf3.predict_proba(feat_list).T

    clf1 = svm.LinearSVC()
    clf1.fit(score_list, lbl_list)

    # Score new annots against the training database
    tup_ = get_toy_annots(num_annots * 2,
                          num_names,
                          initial_aids=all_aids,
                          initial_nids=all_nids)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    aid2_nid = dict(zip(aids, nids))
    toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs])

    print('Running tests')

    score_list = feat_list.T[0:1].T

    tp_feat_list = feat_list[lbl_list]
    tn_feat_list = feat_list[~lbl_list]
    tp_lbls = lbl_list[lbl_list]
    tn_lbls = lbl_list[~lbl_list]
    print('num tp: %d' % len(tp_lbls))
    print('num fp: %d' % len(tn_lbls))

    tp_score_list = score_list[lbl_list]
    tn_score_list = score_list[~lbl_list]

    print('tp_feat' +
          ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2))
    print('tp_feat' +
          ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2))

    print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2))
    print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2))

    tp_pred3 = clf3.predict(tp_feat_list)
    tn_pred3 = clf3.predict(tn_feat_list)
    print((tp_pred3.sum(), tp_pred3.shape))
    print((tn_pred3.sum(), tn_pred3.shape))

    tp_score3 = clf3.score(tp_feat_list, tp_lbls)
    tn_score3 = clf3.score(tn_feat_list, tn_lbls)

    tp_pred1 = clf1.predict(tp_score_list)
    tn_pred1 = clf1.predict(tn_score_list)
    print((tp_pred1.sum(), tp_pred1.shape))
    print((tn_pred1.sum(), tn_pred1.shape))

    tp_score1 = clf1.score(tp_score_list, tp_lbls)
    tn_score1 = clf1.score(tn_score_list, tn_lbls)
    print('tp score with rank    = %r' % (tp_score3, ))
    print('tn score with rank    = %r' % (tn_score3, ))

    print('tp score without rank = %r' % (tp_score1, ))
    print('tn score without rank = %r' % (tn_score1, ))
    toy_data = {}

    return toy_data
Ejemplo n.º 28
0
    def select_ith_match(self, mx):
        """
        Selects the ith match and visualizes and prints information concerning
        features weights, keypoint details, and sift descriptions
        """
        import wbia.plottool as pt
        from wbia.plottool import viz_featrow
        from wbia.plottool import interact_helpers as ih

        fnum = self.fnum
        same_fig = self.same_fig
        rchip1 = self.rchip1
        rchip2 = self.rchip2
        self.mx = mx
        print('+--- SELECT --- ')
        print('... selecting mx-th=%r feature match' % mx)
        fsv = self.fsv
        fs = self.fs
        print('score stats:')
        print(ut.repr2(ut.get_stats(fsv, axis=0), nl=1))
        print('fsv[mx] = %r' % (fsv[mx], ))
        print('fs[mx] = %r' % (fs[mx], ))
        # ----------------------
        # Get info for the select_ith_match plot
        self.mode = 1
        # Get the mx-th feature match
        fx1, fx2 = self.fm[mx]

        # Older info
        fscore2 = self.fs[mx]
        fk2 = None if self.fk is None else self.fk[mx]
        kp1, kp2 = self.kpts1[fx1], self.kpts2[fx2]
        vecs1, vecs2 = self.vecs1[fx1], self.vecs2[fx2]
        info1 = '\nquery'
        info2 = '\nk=%r fscore=%r' % (fk2, fscore2)
        # self.last_fx = fx1
        self.last_fx = fx1

        # Extracted keypoints to draw
        extracted_list = [
            (rchip1, kp1, vecs1, fx1, 'aid1', info1),
            (rchip2, kp2, vecs2, fx2, 'aid2', info2),
        ]
        # Normalizng Keypoint
        # if hasattr(cm, 'filt2_meta') and 'lnbnn' in cm.filt2_meta:
        #    qfx2_norm = cm.filt2_meta['lnbnn']
        #    # Normalizing chip and feature
        #    (aid3, fx3, normk) = qfx2_norm[fx1]
        #    rchip3 = ibs.get_annot_chips(aid3)
        #    kp3 = ibs.get_annot_kpts(aid3)[fx3]
        #    sift3 = ibs.get_annot_vecs(aid3)[fx3]
        #    info3 = '\nnorm %s k=%r' % (vh.get_aidstrs(aid3), normk)
        #    extracted_list.append((rchip3, kp3, sift3, fx3, aid3, info3))
        # else:
        #    pass
        #    #print('WARNING: meta doesnt exist')

        # ----------------------
        # Draw the select_ith_match plot
        nRows, nCols = len(extracted_list) + same_fig, 3
        # Draw matching chips and features
        sel_fm = np.array([(fx1, fx2)])
        pnum1 = (nRows, 1, 1) if same_fig else (1, 1, 1)
        vert = self.vert if self.vert is not None else False
        self.chipmatch_view(
            pnum=pnum1,
            ell_alpha=0.4,
            ell_linewidth=1.8,
            colors=pt.BLUE,
            sel_fm=sel_fm,
            vert=vert,
        )
        # Draw selected feature matches
        px = nCols * same_fig  # plot offset
        prevsift = None
        if not same_fig:
            # fnum2 = fnum + len(viz.FNUMS)
            fnum2 = self.fnum2
            fig2 = pt.figure(fnum=fnum2, docla=True, doclf=True)
        else:
            fnum2 = fnum

        for (rchip, kp, sift, fx, aid, info) in extracted_list:
            px = viz_featrow.draw_feat_row(
                rchip,
                fx,
                kp,
                sift,
                fnum2,
                nRows,
                nCols,
                px,
                prevsift=prevsift,
                aid=aid,
                info=info,
            )
            prevsift = sift
        if not same_fig:
            ih.connect_callback(fig2, 'button_press_event', self.on_click)
Ejemplo n.º 29
0
def get_toy_data_1vM(num_annots, num_names=None, **kwargs):
    r"""
    Args:
        num_annots (int):
        num_names (int): (default = None)

    Kwargs:
        initial_aids, initial_nids, nid_sequence, seed

    Returns:
        tuple: (pair_list, feat_list)

    CommandLine:
        python -m ibeis.algo.hots.demobayes --exec-get_toy_data_1vM --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.demobayes import *  # NOQA
        >>> num_annots = 1000
        >>> num_names = 40
        >>> get_toy_data_1vM(num_annots, num_names)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import vtool as vt
    tup_ = get_toy_annots(num_annots, num_names, **kwargs)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    rng = vt.ensure_rng(None)

    # Test a simple SVM classifier
    nid2_nexemp = ut.dict_hist(nids1)
    aid2_nid = dict(zip(aids, nids))

    ut.fix_embed_globals()

    #def add_to_globals(globals_, subdict):
    #    globals_.update(subdict)

    unique_nids = list(nid2_nexemp.keys())

    def annot_to_class_feats2(aid, aid2_nid, top=None):
        pair_list = []
        score_list = []
        nexemplar_list = []
        for nid in unique_nids:
            label = (aid2_nid[aid] == nid)
            num_exemplars = nid2_nexemp.get(nid, 0)
            if num_exemplars == 0:
                continue
            params = toy_params[label]
            mu, sigma = ut.dict_take(params, ['mu', 'sigma'])
            score_ = rng.normal(mu, sigma, size=num_exemplars).max()
            score = np.clip(score_, 0, np.inf)
            pair_list.append((aid, nid))
            score_list.append(score)
            nexemplar_list.append(num_exemplars)
        rank_list = ut.argsort(score_list, reverse=True)
        feat_list = np.array([score_list, rank_list, nexemplar_list]).T
        sortx = np.argsort(rank_list)
        feat_list = feat_list.take(sortx, axis=0)
        pair_list = np.array(pair_list).take(sortx, axis=0)
        if top is not None:
            feat_list = feat_list[:top]
            pair_list = pair_list[0:top]
        return pair_list, feat_list

    toclass_features = [annot_to_class_feats2(aid, aid2_nid, top=5) for aid in aids]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    score_list = feat_list.T[0:1].T
    lbl_list = [aid2_nid[aid] == nid for aid, nid in aidnid_pairs]

    from sklearn import svm
    #clf1 = svm.LinearSVC()
    print('Learning classifiers')

    clf3 = svm.SVC()
    clf3.fit(feat_list, lbl_list)

    clf1 = svm.LinearSVC()
    clf1.fit(score_list, lbl_list)

    # Score new annots against the training database
    tup_ = get_toy_annots(num_annots * 2, num_names, initial_aids=all_aids, initial_nids=all_nids)
    aids, nids, aids1, nids1, all_aids, all_nids = tup_
    aid2_nid = dict(zip(aids, nids))
    toclass_features = [annot_to_class_feats2(aid, aid2_nid) for aid in aids]
    aidnid_pairs = np.vstack(ut.get_list_column(toclass_features, 0))
    feat_list = np.vstack(ut.get_list_column(toclass_features, 1))
    lbl_list = np.array([aid2_nid[aid] == nid for aid, nid in aidnid_pairs])

    print('Running tests')

    score_list = feat_list.T[0:1].T

    tp_feat_list = feat_list[lbl_list]
    tn_feat_list = feat_list[~lbl_list]
    tp_lbls = lbl_list[lbl_list]
    tn_lbls = lbl_list[~lbl_list]
    print('num tp: %d' % len(tp_lbls))
    print('num fp: %d' % len(tn_lbls))

    tp_score_list = score_list[lbl_list]
    tn_score_list = score_list[~lbl_list]

    print('tp_feat' + ut.repr3(ut.get_stats(tp_feat_list, axis=0), precision=2))
    print('tp_feat' + ut.repr3(ut.get_stats(tn_feat_list, axis=0), precision=2))

    print('tp_score' + ut.repr2(ut.get_stats(tp_score_list), precision=2))
    print('tp_score' + ut.repr2(ut.get_stats(tn_score_list), precision=2))

    tp_pred3 = clf3.predict(tp_feat_list)
    tn_pred3 = clf3.predict(tn_feat_list)
    print((tp_pred3.sum(), tp_pred3.shape))
    print((tn_pred3.sum(), tn_pred3.shape))

    tp_score3 = clf3.score(tp_feat_list, tp_lbls)
    tn_score3 = clf3.score(tn_feat_list, tn_lbls)

    tp_pred1 = clf1.predict(tp_score_list)
    tn_pred1 = clf1.predict(tn_score_list)
    print((tp_pred1.sum(), tp_pred1.shape))
    print((tn_pred1.sum(), tn_pred1.shape))

    tp_score1 = clf1.score(tp_score_list, tp_lbls)
    tn_score1 = clf1.score(tn_score_list, tn_lbls)
    print('tp score with rank    = %r' % (tp_score3,))
    print('tn score with rank    = %r' % (tn_score3,))

    print('tp score without rank = %r' % (tp_score1,))
    print('tn score without rank = %r' % (tn_score1,))
    toy_data = {}

    return toy_data
Ejemplo n.º 30
0
def ggr_random_name_splits():
    """
    CommandLine:
        python -m wbia.viz.viz_graph2 ggr_random_name_splits --show

    Ignore:
        sshfs -o idmap=user lev:/ ~/lev

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.viz.viz_graph2 import *  # NOQA
        >>> ggr_random_name_splits()
    """
    import wbia.guitool as gt

    gt.ensure_qtapp()
    # nid_list = ibs.get_valid_nids(filter_empty=True)
    import wbia

    dbdir = '/media/danger/GGR/GGR-IBEIS'
    dbdir = (dbdir if ut.checkpath(dbdir) else
             ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS'))
    ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False)

    import datetime

    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    orig_filter_kw = {
        'multiple': None,
        # 'view': ['right'],
        # 'minqual': 'good',
        'is_known': True,
        'min_pername': 2,
    }
    orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union(
        orig_filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    ))
    orig_all_annots = ibs.annots(orig_aids)
    orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group(
        orig_all_annots.nids)
    # Ensure we get everything
    orig_grouped_annots = [
        ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids)
    ]

    # pip install quantumrandom
    if False:
        import quantumrandom

        data = quantumrandom.uint16()
        seed = data.sum()
        print('seed = %r' % (seed, ))
        # import Crypto.Random
        # from Crypto import Random
        # quantumrandom.get_data()
        # StrongRandom = Crypto.Random.random.StrongRandom
        # aes.reseed(3340258)
        # chars = [str(chr(x)) for x in data.view(np.uint8)]
        # aes_seed = str('').join(chars)
        # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator()
        # aes.reseed(aes_seed)
        # aes.pseudo_random_data(10)

    orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258)
    orig_sample_size = 75
    random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs)
    orig_annot_sample = random_annot_groups[:orig_sample_size]

    # OOOPS MADE ERROR REDO ----

    filter_kw = {
        'multiple': None,
        'view': ['right'],
        'minqual': 'good',
        'is_known': True,
        'min_pername': 2,
    }
    filter_kw_ = ut.dict_union(
        filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    )
    refiltered_sample = [
        ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)
        for annot in orig_annot_sample
    ]
    is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2
    ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok)
    ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample]

    # Now compute real sample
    aids = ibs.filter_annots_general(filter_kw=filter_kw_)
    all_annots = ibs.annots(aids)
    unique_nids, grouped_annots_ = all_annots.group(all_annots.nids)
    grouped_annots = grouped_annots_
    # Ensure we get everything
    # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)]

    pop = len(grouped_annots)
    pername_list = ut.lmap(len, grouped_annots)
    groups = wbia.annots.AnnotGroups(grouped_annots, ibs)
    match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags]
    tag_case_hist = ut.dict_hist(ut.flatten(match_tags))
    print('name_pop = %r' % (pop, ))
    print('Annots per Multiton Name' +
          ut.repr3(ut.get_stats(pername_list, use_median=True)))
    print('Name Tag Hist ' + ut.repr3(tag_case_hist))
    print('Percent Photobomb: %.2f%%' %
          (tag_case_hist['photobomb'] / pop * 100))
    print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100))

    # Remove the ok part from this sample
    remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids)
    remain_grouped_annots = [
        ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids)
    ]

    sample_size = 75
    import vtool as vt

    vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05)

    remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots),
                                         seed=3340258)
    remain_sample_size = sample_size - len(ok_part_orig_nids)
    remain_random_annot_groups = ut.take(remain_grouped_annots,
                                         remain_rand_idxs)
    remain_annot_sample = remain_random_annot_groups[:remain_sample_size]

    annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample
    # Filter out all bad parts
    annot_sample_filter = [
        ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_))
        for annot in annot_sample_nofilter
    ]
    annot_sample = annot_sample_filter

    win = None
    from wbia.viz import viz_graph2

    for annots in ut.InteractiveIter(annot_sample):
        if win is not None:
            win.close()
        win = viz_graph2.make_qt_graph_interface(ibs,
                                                 aids=annots.aids,
                                                 init_mode='rereview')
        print(win)

    sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs)

    flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags]

    print('Using Split and Photobomb')
    is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)

    print('Only Photobomb')
    is_positive = ['photobomb' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)

    print('Only SplitCase')
    is_positive = ['splitcase' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)
Ejemplo n.º 31
0
def latex_dbstats(ibs_list, **kwargs):
    r"""
    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1 --show
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 testdb1 --show
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 PZ_MTEST GZ_ALL --show
        python -m ibeis.other.dbinfo --test-latex_dbstats --dblist GZ_ALL NNP_MasterGIRM_core --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> db_list = ut.get_argval('--dblist', type_=list, default=['testdb1'])
        >>> ibs_list = [ibeis.opendb(db=db) for db in db_list]
        >>> tabular_str = latex_dbstats(ibs_list)
        >>> tabular_cmd = ut.latex_newcommand(ut.latex_sanitize_command_name('DatabaseInfo'), tabular_str)
        >>> ut.copy_text_to_clipboard(tabular_cmd)
        >>> write_fpath = ut.get_argval('--write', type_=str, default=None)
        >>> if write_fpath is not None:
        >>>     fpath = ut.truepath(write_fpath)
        >>>     text = ut.readfrom(fpath)
        >>>     new_text = ut.replace_between_tags(text, tabular_cmd, '% <DBINFO>', '% </DBINFO>')
        >>>     ut.writeto(fpath, new_text)
        >>> ut.print_code(tabular_cmd, 'latex')
        >>> ut.quit_if_noshow()
        >>> ut.render_latex_text('\\noindent \n' + tabular_str)
    """

    import ibeis
    # Parse for aids test data
    aids_list = [ibeis.testdata_aids(ibs=ibs) for ibs in ibs_list]

    #dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False) for ibs in ibs_list]
    dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False, aid_list=aids)
                   for ibs, aids in zip(ibs_list, aids_list)]

    #title = db_name + ' database statistics'
    title = 'Database statistics'
    stat_title = '# Annotations per name (multiton)'

    #col_lbls = [
    #    'multiton',
    #    #'singleton',
    #    'total',
    #    'multiton',
    #    'singleton',
    #    'total',
    #]
    key_to_col_lbls = {
        'num_names_multiton':   'multiton',
        'num_names_singleton':  'singleton',
        'num_names':            'total',

        'num_multiton_annots':  'multiton',
        'num_singleton_annots': 'singleton',
        'num_unknown_annots':   'unknown',
        'num_annots':           'total',
    }
    # Structure of columns / multicolumns
    multi_col_keys = [
        ('# Names', (
            'num_names_multiton',
            #'num_names_singleton',
            'num_names',
        )),

        ('# Annots', (
            'num_multiton_annots',
            'num_singleton_annots',
            #'num_unknown_annots',
            'num_annots')),
    ]
    #multicol_lbls = [('# Names', 3), ('# Annots', 3)]
    multicol_lbls = [(mcolname, len(mcols)) for mcolname, mcols in multi_col_keys]

    # Flatten column labels
    col_keys = ut.flatten(ut.get_list_column(multi_col_keys, 1))
    col_lbls = ut.dict_take(key_to_col_lbls, col_keys)

    row_lbls   = []
    row_values = []

    #stat_col_lbls = ['max', 'min', 'mean', 'std', 'nMin', 'nMax']
    stat_col_lbls = ['max', 'min', 'mean', 'std', 'med']
    #stat_row_lbls = ['# Annot per Name (multiton)']
    stat_row_lbls = []
    stat_row_values = []

    SINGLE_TABLE = False
    EXTRA = True

    for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
        row_ = ut.dict_take(dbinfo_locals, col_keys)
        dbname = ibs.get_dbname_alias()
        row_lbls.append(dbname)
        multiton_annot_stats = ut.get_stats(dbinfo_locals['multiton_nid2_nannots'], use_median=True)
        stat_rows = ut.dict_take(multiton_annot_stats, stat_col_lbls)
        if SINGLE_TABLE:
            row_.extend(stat_rows)
        else:
            stat_row_lbls.append(dbname)
            stat_row_values.append(stat_rows)

        row_values.append(row_)

    CENTERLINE = False
    AS_TABLE = True
    tablekw = dict(
        astable=AS_TABLE, centerline=CENTERLINE, FORCE_INT=False, precision=2,
        col_sep='', multicol_sep='|',
        **kwargs)

    if EXTRA:
        extra_keys = [
            #'species2_nAids',
            'qualtext2_nAnnots',
            'yawtext2_nAnnots',
        ]
        extra_titles = {
            'species2_nAids': 'Annotations per species.',
            'qualtext2_nAnnots': 'Annotations per quality.',
            'yawtext2_nAnnots': 'Annotations per viewpoint.',
        }
        extra_collbls = ut.ddict(list)
        extra_rowvalues = ut.ddict(list)
        extra_tables = ut.ddict(list)

        for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
            for key in extra_keys:
                extra_collbls[key] = ut.unique_ordered(extra_collbls[key] + list(dbinfo_locals[key].keys()))

        extra_collbls['qualtext2_nAnnots'] = ['excellent', 'good', 'ok', 'poor', 'junk', 'UNKNOWN']
        #extra_collbls['yawtext2_nAnnots'] = ['backleft', 'left', 'frontleft', 'front', 'frontright', 'right', 'backright', 'back', None]
        extra_collbls['yawtext2_nAnnots'] = ['BL', 'L', 'FL', 'F', 'FR', 'R', 'BR', 'B', None]

        for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
            for key in extra_keys:
                extra_rowvalues[key].append(ut.dict_take(dbinfo_locals[key], extra_collbls[key], 0))

        qualalias = {'UNKNOWN': None}

        extra_collbls['yawtext2_nAnnots'] = [ibs.const.YAWALIAS.get(val, val) for val in extra_collbls['yawtext2_nAnnots']]
        extra_collbls['qualtext2_nAnnots'] = [qualalias.get(val, val) for val in extra_collbls['qualtext2_nAnnots']]

        for key in extra_keys:
            extra_tables[key] = ut.util_latex.make_score_tabular(
                row_lbls, extra_collbls[key], extra_rowvalues[key],
                title=extra_titles[key], col_align='r', table_position='[h!]', **tablekw)

    #tabular_str = util_latex.tabular_join(tabular_body_list)
    if SINGLE_TABLE:
        col_lbls += stat_col_lbls
        multicol_lbls += [(stat_title, len(stat_col_lbls))]

    count_tabular_str = ut.util_latex.make_score_tabular(
        row_lbls, col_lbls, row_values, title=title, multicol_lbls=multicol_lbls, table_position='[ht!]', **tablekw)

    #print(row_lbls)

    if SINGLE_TABLE:
        tabular_str = count_tabular_str
    else:
        stat_tabular_str = ut.util_latex.make_score_tabular(
            stat_row_lbls, stat_col_lbls, stat_row_values, title=stat_title,
            col_align='r', table_position='[h!]', **tablekw)

        # Make a table of statistics
        if tablekw['astable']:
            tablesep = '\n%--\n'
        else:
            tablesep = '\\\\\n%--\n'
        if EXTRA:
            tabular_str = tablesep.join([count_tabular_str, stat_tabular_str] + ut.dict_take(extra_tables, extra_keys))
        else:
            tabular_str = tablesep.join([count_tabular_str, stat_tabular_str])

    return tabular_str