Example #1
0
def color_by_nids(graph, unique_nids=None, ibs=None, nid2_color_=None):
    """ Colors edges and nodes by nid """
    # TODO use ut.color_nodes
    import plottool as pt

    ensure_graph_nid_labels(graph, unique_nids, ibs=ibs)
    node_to_nid = nx.get_node_attributes(graph, 'nid')
    unique_nids = ut.unique(node_to_nid.values())
    ncolors = len(unique_nids)
    if (ncolors) == 1:
        unique_colors = [pt.UNKNOWN_PURP]
    else:
        if nid2_color_ is not None:
            unique_colors = pt.distinct_colors(ncolors + len(nid2_color_) * 2)
        else:
            unique_colors = pt.distinct_colors(ncolors)
    # Find edges and aids strictly between two nids
    nid_to_color = dict(zip(unique_nids, unique_colors))
    if nid2_color_ is not None:
        # HACK NEED TO ENSURE COLORS ARE NOT REUSED
        nid_to_color.update(nid2_color_)
    edge_aids = list(graph.edges())
    edge_nids = ut.unflat_take(node_to_nid, edge_aids)
    flags = [nids[0] == nids[1] for nids in edge_nids]
    flagged_edge_aids = ut.compress(edge_aids, flags)
    flagged_edge_nids = ut.compress(edge_nids, flags)
    flagged_edge_colors = [nid_to_color[nids[0]] for nids in flagged_edge_nids]
    edge_to_color = dict(zip(flagged_edge_aids, flagged_edge_colors))
    node_to_color = ut.map_dict_vals(ut.partial(ut.take, nid_to_color),
                                     node_to_nid)
    nx.set_edge_attributes(graph, 'color', edge_to_color)
    nx.set_node_attributes(graph, 'color', node_to_color)
Example #2
0
def color_by_nids(graph, unique_nids=None, ibs=None, nid2_color_=None):
    """ Colors edges and nodes by nid """
    # TODO use ut.color_nodes
    import plottool as pt

    ensure_graph_nid_labels(graph, unique_nids, ibs=ibs)
    node_to_nid = nx.get_node_attributes(graph, 'nid')
    unique_nids = ut.unique(node_to_nid.values())
    ncolors = len(unique_nids)
    if (ncolors) == 1:
        unique_colors = [pt.UNKNOWN_PURP]
    else:
        if nid2_color_ is not None:
            unique_colors = pt.distinct_colors(ncolors + len(nid2_color_) * 2)
        else:
            unique_colors = pt.distinct_colors(ncolors)
    # Find edges and aids strictly between two nids
    nid_to_color = dict(zip(unique_nids, unique_colors))
    if nid2_color_ is not None:
        # HACK NEED TO ENSURE COLORS ARE NOT REUSED
        nid_to_color.update(nid2_color_)
    edge_aids = list(graph.edges())
    edge_nids = ut.unflat_take(node_to_nid, edge_aids)
    flags = [nids[0] == nids[1] for nids in edge_nids]
    flagged_edge_aids = ut.compress(edge_aids, flags)
    flagged_edge_nids = ut.compress(edge_nids, flags)
    flagged_edge_colors = [nid_to_color[nids[0]] for nids in flagged_edge_nids]
    edge_to_color = dict(zip(flagged_edge_aids, flagged_edge_colors))
    node_to_color = ut.map_dict_vals(ut.partial(ut.take, nid_to_color), node_to_nid)
    nx.set_edge_attributes(graph, 'color', edge_to_color)
    nx.set_node_attributes(graph, 'color', node_to_color)
Example #3
0
    def send_wbia_request(suffix, type_='post', json=True, **kwargs):
        """
        Posts a request to a url suffix
        """
        import requests
        import utool as ut

        if not suffix.endswith('/'):
            raise Exception('YOU PROBABLY WANT A / AT THE END OF YOUR URL')
        payload = ut.map_dict_vals(ut.to_json, kwargs)
        if type_ == 'post':
            resp = requests.post(baseurl + suffix, data=payload)
            content = resp._content
        elif type_ == 'get':
            resp = requests.get(baseurl + suffix, data=payload)
            content = resp.content
        if json:
            try:
                content = ut.from_json(content)
            except ValueError:
                raise Exception('Expected JSON string but got content=%r' %
                                (content, ))
            else:
                # logger.info('content = %r' % (content,))
                if content['status']['code'] != 200:
                    logger.info(content['status']['message'])
                    raise Exception(content['status']['message'])
            content = content['response']
        return content
Example #4
0
def _debug_repr_cpd(cpd):
    import re
    import utool as ut
    code_fmt = ut.codeblock(
        '''
        {variable} = pgmpy.factors.TabularCPD(
            variable={variable_repr},
            variable_card={variable_card_repr},
            values={get_cpd_repr},
            evidence={evidence_repr},
            evidence_card={evidence_card_repr},
        )
        ''')
    keys = ['variable', 'variable_card', 'values', 'evidence', 'evidence_card']
    dict_ = ut.odict(zip(keys, [getattr(cpd, key) for key in keys]))
    # HACK
    dict_['values'] = cpd.get_cpd()
    r = ut.repr2(dict_, explicit=True, nobraces=True, nl=True)
    print(r)

    # Parse props that are needed for this fmtstr
    fmt_keys = [match.groups()[0] for match in re.finditer('{(.*?)}', code_fmt)]
    need_reprs = [key[:-5] for key in fmt_keys if key.endswith('_repr')]
    need_keys = [key for key in fmt_keys if not key.endswith('_repr')]
    # Get corresponding props
    # Call methods if needbe
    tmp = [(prop, getattr(cpd, prop)) for prop in need_reprs]
    tmp = [(x, y()) if ut.is_funclike(y) else (x, y) for (x, y) in tmp]
    fmtdict = dict(tmp)
    fmtdict = ut.map_dict_vals(ut.repr2, fmtdict)
    fmtdict = ut.map_dict_keys(lambda x: x + '_repr', fmtdict)
    tmp2 = [(prop, getattr(cpd, prop)) for prop in need_keys]
    fmtdict.update(dict(tmp2))
    code = code_fmt.format(**fmtdict)
    return code
Example #5
0
 def send_ibeis_request(suffix, type_='post', **kwargs):
     """
     Posts a request to a url suffix
     """
     import requests
     import utool as ut
     if not suffix.endswith('/'):
         raise Exception('YOU PROBABLY WANT A / AT THE END OF YOUR URL')
     payload = ut.map_dict_vals(ut.to_json, kwargs)
     if type_ == 'post':
         resp = requests.post(baseurl + suffix, data=payload)
         json_content = resp._content
     elif type_ == 'get':
         resp = requests.get(baseurl + suffix, data=payload)
         json_content = resp.content
     try:
         content = ut.from_json(json_content)
     except ValueError:
         raise Exception('Expected JSON string but got json_content=%r' % (json_content,))
     else:
         # print('content = %r' % (content,))
         if content['status']['code'] != 200:
             print(content['status']['message'])
             raise Exception(content['status']['message'])
     request_response = content['response']
     return request_response
Example #6
0
 def print_size_info(inva):
     sizes = inva.get_size_info()
     sizes = ut.sort_dict(sizes, 'vals', ut.identity)
     total_nbytes = sum(sizes.values())
     logger.info(
         ut.align(ut.repr3(ut.map_dict_vals(ut.byte_str2, sizes), strvals=True), ':')
     )
     logger.info('total_nbytes = %r' % (ut.byte_str2(total_nbytes),))
Example #7
0
 def __init__(invassign, fstack, vocab, wx2_idxs, wx2_maws, wx2_fxs, wx2_axs):
     invassign.fstack = fstack
     invassign.vocab = vocab
     invassign.wx2_idxs = wx2_idxs
     invassign.wx2_maws = wx2_maws
     invassign.wx2_fxs = wx2_fxs
     invassign.wx2_axs = wx2_axs
     invassign.wx2_num = ut.map_dict_vals(len, invassign.wx2_axs)
     invassign.wx_list = sorted(invassign.wx2_num.keys())
     invassign.num_list = ut.take(invassign.wx2_num, invassign.wx_list)
     invassign.perword_stats = ut.get_stats(invassign.num_list)
Example #8
0
 def __init__(invassign, fstack, vocab, wx2_idxs, wx2_maws, wx2_fxs,
              wx2_axs):
     invassign.fstack = fstack
     invassign.vocab = vocab
     invassign.wx2_idxs = wx2_idxs
     invassign.wx2_maws = wx2_maws
     invassign.wx2_fxs = wx2_fxs
     invassign.wx2_axs = wx2_axs
     invassign.wx2_num = ut.map_dict_vals(len, invassign.wx2_axs)
     invassign.wx_list = sorted(invassign.wx2_num.keys())
     invassign.num_list = ut.take(invassign.wx2_num, invassign.wx_list)
     invassign.perword_stats = ut.get_stats(invassign.num_list)
Example #9
0
 def infohist(group):
     cardnames = [six.text_type(c) for c in group.cards]
     types_ = [c.types[-1] for c in group.cards]
     hgroup = dict(ut.hierarchical_group_items(cardnames, [types_, cardnames]))
     infohist1 = ut.map_dict_vals(ut.dict_hist, hgroup).items()
     # Grouped infohist
     infohist = [(key, [(vals[n], n) for n in vals]) for key, vals in infohist1]
     # list_ = [six.text_type(c) for c in group.cards]
     # dict_ = ut.dict_hist(list_)
     # ulist_ = ut.unique_ordered(list_)
     # infohist = [(dict_[item], item) for item in ulist_]
     # ut.embed()
     return infohist
Example #10
0
 def send_ibeis_request(suffix, type_='post', **kwargs):
     """
     Posts a request to a url suffix
     """
     import requests
     import utool as ut
     payload = ut.map_dict_vals(ut.to_json, kwargs)
     if type_ == 'post':
         resp = requests.post(baseurl + suffix, data=payload)
         content = ut.from_json(resp._content)
     elif type_ == 'get':
         resp = requests.get(baseurl + suffix, data=payload)
         content = ut.from_json(resp.content)
     response = content['response']
     return response
Example #11
0
    def apply_hard_soft_evidence(cpd_list, evidence_list):
        for cpd, ev in zip(cpd_list, evidence_list):
            if isinstance(ev, int):
                # hard internal evidence
                evidence[cpd.variable] = ev
            if isinstance(ev, six.string_types):
                # hard external evidence
                evidence[cpd.variable] = cpd._internal_varindex(
                    cpd.variable, ev)
            if isinstance(ev, dict):
                # soft external evidence
                # HACK THAT MODIFIES CPD IN PLACE
                def rectify_evidence_val(_v, card=cpd.variable_card):
                    # rectify hacky string structures
                    tmp = 1 / (2 * card**2)
                    return (1 + tmp) / (card + tmp) if _v == '+eps' else _v

                ev_ = ut.map_dict_vals(rectify_evidence_val, ev)
                fill = (1.0 - sum(ev_.values())) / (cpd.variable_card -
                                                    len(ev_))
                # HACK fix for float problems
                if len(ev_) == cpd.variable_card - 1:
                    fill = 0

                assert fill > -1e7, 'fill=%r' % (fill, )
                row_labels = list(ut.iprod(*cpd.statenames))

                for i, lbl in enumerate(row_labels):
                    if lbl in ev_:
                        # external case1
                        cpd.values[i] = ev_[lbl]
                    elif len(lbl) == 1 and lbl[0] in ev_:
                        # external case2
                        cpd.values[i] = ev_[lbl[0]]
                    elif i in ev_:
                        # internal case
                        cpd.values[i] = ev_[i]
                    else:
                        cpd.values[i] = fill
                cpd.normalize()
                soft_evidence[cpd.variable] = True
Example #12
0
    def apply_hard_soft_evidence(cpd_list, evidence_list):
        for cpd, ev in zip(cpd_list, evidence_list):
            if isinstance(ev, int):
                # hard internal evidence
                evidence[cpd.variable] = ev
            if isinstance(ev, six.string_types):
                # hard external evidence
                evidence[cpd.variable] = cpd._internal_varindex(
                    cpd.variable, ev)
            if isinstance(ev, dict):
                # soft external evidence
                # HACK THAT MODIFIES CPD IN PLACE
                def rectify_evidence_val(_v, card=cpd.variable_card):
                    # rectify hacky string structures
                    tmp = (1 / (2 * card ** 2))
                    return (1 + tmp) / (card + tmp) if _v == '+eps' else _v
                ev_ = ut.map_dict_vals(rectify_evidence_val, ev)
                fill = (1.0 - sum(ev_.values())) / (cpd.variable_card - len(ev_))
                # HACK fix for float problems
                if len(ev_) == cpd.variable_card - 1:
                    fill = 0

                assert fill > -1E7, 'fill=%r' % (fill,)
                row_labels = list(ut.iprod(*cpd.statenames))

                for i, lbl in enumerate(row_labels):
                    if lbl in ev_:
                        # external case1
                        cpd.values[i] = ev_[lbl]
                    elif len(lbl) == 1 and lbl[0] in ev_:
                        # external case2
                        cpd.values[i] = ev_[lbl[0]]
                    elif i in ev_:
                        # internal case
                        cpd.values[i] = ev_[i]
                    else:
                        cpd.values[i] = fill
                cpd.normalize()
                soft_evidence[cpd.variable] = True
Example #13
0
def _debug_repr_cpd(cpd):
    import re
    import utool as ut

    code_fmt = ut.codeblock("""
        {variable} = pgmpy.factors.TabularCPD(
            variable={variable_repr},
            variable_card={variable_card_repr},
            values={get_cpd_repr},
            evidence={evidence_repr},
            evidence_card={evidence_card_repr},
        )
        """)
    keys = ['variable', 'variable_card', 'values', 'evidence', 'evidence_card']
    dict_ = ut.odict(zip(keys, [getattr(cpd, key) for key in keys]))
    # HACK
    dict_['values'] = cpd.get_cpd()
    r = ut.repr2(dict_, explicit=True, nobraces=True, nl=True)
    logger.info(r)

    # Parse props that are needed for this fmtstr
    fmt_keys = [
        match.groups()[0] for match in re.finditer('{(.*?)}', code_fmt)
    ]
    need_reprs = [key[:-5] for key in fmt_keys if key.endswith('_repr')]
    need_keys = [key for key in fmt_keys if not key.endswith('_repr')]
    # Get corresponding props
    # Call methods if needbe
    tmp = [(prop, getattr(cpd, prop)) for prop in need_reprs]
    tmp = [(x, y()) if ut.is_funclike(y) else (x, y) for (x, y) in tmp]
    fmtdict = dict(tmp)
    fmtdict = ut.map_dict_vals(ut.repr2, fmtdict)
    fmtdict = ut.map_dict_keys(lambda x: x + '_repr', fmtdict)
    tmp2 = [(prop, getattr(cpd, prop)) for prop in need_keys]
    fmtdict.update(dict(tmp2))
    code = code_fmt.format(**fmtdict)
    return code
Example #14
0
def nx_dag_node_rank(graph, nodes=None):
    """
    Returns rank of nodes that define the "level" each node is on in a
    topological sort. This is the same as the Graphviz dot rank.

    Ignore:
        simple_graph = ut.simplify_graph(exi_graph)
        adj_dict = ut.nx_to_adj_dict(simple_graph)
        import plottool as pt
        pt.qt4ensure()
        pt.show_nx(graph)

    Example:
        >>> # ENABLE_DOCTEST
        >>> from utool.util_graph import *  # NOQA
        >>> import utool as ut
        >>> adj_dict = {0: [5], 1: [5], 2: [1], 3: [4], 4: [0], 5: [], 6: [4], 7: [9], 8: [6], 9: [1]}
        >>> import networkx as nx
        >>> nodes = [2, 1, 5]
        >>> f_graph = ut.nx_from_adj_dict(adj_dict, nx.DiGraph)
        >>> graph = f_graph.reverse()
        >>> #ranks = ut.nx_dag_node_rank(graph, nodes)
        >>> ranks = ut.nx_dag_node_rank(graph, nodes)
        >>> result = ('ranks = %r' % (ranks,))
        >>> print(result)
        ranks = [3, 2, 1]
    """
    import utool as ut
    source = list(ut.nx_source_nodes(graph))[0]
    longest_paths = dict([(target, dag_longest_path(graph, source, target))
                          for target in graph.nodes()])
    node_to_rank = ut.map_dict_vals(len, longest_paths)
    if nodes is None:
        return node_to_rank
    else:
        ranks = ut.dict_take(node_to_rank, nodes)
        return ranks
Example #15
0
 def add_split(dataset, key, idxs):
     print('[dataset] adding split %r' % (key,))
     # Build subset filenames
     ut.ensuredir(dataset.split_dpath)
     ext = dataset._ext
     fmtdict = dict(key=key, ext=ext, size=len(idxs))
     fmtstr = dataset.get_split_fmtstr(forward=True)
     splitset = {
         type_: join(dataset.split_dpath, fmtstr.format(type_=type_, **fmtdict))
         for type_ in ['data', 'labels', 'metadata']
     }
     # Partition data into the subset
     part_dict = {
         'data': dataset.data.take(idxs, axis=0),
         'labels': dataset.labels.take(idxs, axis=0),
     }
     if dataset.metadata is not None:
         taker = ut.partial(ut.take, index_list=idxs)
         part_dict['metadata'] = ut.map_dict_vals(taker, dataset.metadata)
     # Write splitset data to files
     for type_ in part_dict.keys():
         ut.save_data(splitset[type_], part_dict[type_])
     # Register filenames with dataset
     dataset.fpath_dict[key] = splitset
Example #16
0
def draw_feat_scoresep(testres, f=None, disttype=None):
    r"""
    SeeAlso:
        ibeis.algo.hots.scorenorm.train_featscore_normalizer

    CommandLine:
        python -m ibeis --tf TestResult.draw_feat_scoresep --show
        python -m ibeis --tf TestResult.draw_feat_scoresep --show -t default:sv_on=[True,False]
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift,fg
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --disttype=L2_sift
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=True
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True --namemode=False

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=L2_sift -t best:SV=False

        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1
        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=1:2
        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 --fsvx=0:1

        utprof.py -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=False,bar_l2_on=True  --fsvx=0:1

        # We want to query the oxford annots taged query
        # and we want the database to contain
        # K correct images per query, as well as the distractors

        python -m ibeis --tf TestResult.draw_feat_scoresep  --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=ok
        python -m ibeis --tf TestResult.draw_feat_scoresep  --show --db Oxford -a default:qhas_any=\(query,\),dpername=1,exclude_reference=True,minqual=good

        python -m ibeis --tf get_annotcfg_list  --db PZ_Master1 -a timectrl --acfginfo --verbtd  --veryverbtd --nocache-aid

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST --disttype=ratio

    Example:
        >>> # SCRIPT
        >>> from ibeis.expt.test_result import *  # NOQA
        >>> from ibeis.init import main_helpers
        >>> disttype = ut.get_argval('--disttype', type_=list, default=None)
        >>> ibs, testres = main_helpers.testdata_expts(
        >>>     defaultdb='PZ_MTEST', a=['timectrl'], t=['best'])
        >>> f = ut.get_argval(('--filt', '-f'), type_=list, default=[''])
        >>> testres.draw_feat_scoresep(f=f)
        >>> ut.show_if_requested()
    """
    print('[testres] draw_feat_scoresep')
    import plottool as pt

    def load_feat_scores(qreq_, qaids):
        import ibeis  # NOQA
        from os.path import dirname, join  # NOQA
        # HACKY CACHE
        cfgstr = qreq_.get_cfgstr(with_input=True)
        cache_dir = join(dirname(dirname(ibeis.__file__)),
                         'TMP_FEATSCORE_CACHE')
        namemode = ut.get_argval('--namemode', default=True)
        fsvx = ut.get_argval('--fsvx',
                             type_='fuzzy_subset',
                             default=slice(None, None, None))
        threshx = ut.get_argval('--threshx', type_=int, default=None)
        thresh = ut.get_argval('--thresh', type_=float, default=.9)
        num = ut.get_argval('--num', type_=int, default=1)
        cfg_components = [
            cfgstr, disttype, namemode, fsvx, threshx, thresh, f, num
        ]
        cache_cfgstr = ','.join(ut.lmap(six.text_type, cfg_components))
        cache_hashid = ut.hashstr27(cache_cfgstr + '_v1')
        cache_name = ('get_cfgx_feat_scores_' + cache_hashid)

        @ut.cached_func(cache_name,
                        cache_dir=cache_dir,
                        key_argx=[],
                        use_cache=True)
        def get_cfgx_feat_scores(qreq_, qaids):
            from ibeis.algo.hots import scorenorm
            cm_list = qreq_.execute(qaids)
            # print('Done loading cached chipmatches')
            tup = scorenorm.get_training_featscores(qreq_,
                                                    cm_list,
                                                    disttype,
                                                    namemode,
                                                    fsvx,
                                                    threshx,
                                                    thresh,
                                                    num=num)
            # print(ut.depth_profile(tup))
            tp_scores, tn_scores, scorecfg = tup
            return tp_scores, tn_scores, scorecfg

        tp_scores, tn_scores, scorecfg = get_cfgx_feat_scores(qreq_, qaids)
        return tp_scores, tn_scores, scorecfg

    valid_case_pos = testres.case_sample2(filt_cfg=f, return_mask=False)
    cfgx2_valid_qxs = ut.group_items(valid_case_pos.T[0], valid_case_pos.T[1])
    test_qaids = testres.get_test_qaids()
    cfgx2_valid_qaids = ut.map_dict_vals(ut.partial(ut.take, test_qaids),
                                         cfgx2_valid_qxs)

    join_acfgs = True

    # TODO: option to average over pipeline configurations
    if join_acfgs:
        groupxs = testres.get_cfgx_groupxs()
    else:
        groupxs = list(zip(range(len(testres.cfgx2_qreq_))))
    grouped_qreqs = ut.apply_grouping(testres.cfgx2_qreq_, groupxs)

    grouped_scores = []
    for cfgxs, qreq_group in zip(groupxs, grouped_qreqs):
        # testres.print_pcfg_info()
        score_group = []
        for cfgx, qreq_ in zip(cfgxs, testres.cfgx2_qreq_):
            print('Loading cached chipmatches')
            qaids = cfgx2_valid_qaids[cfgx]
            tp_scores, tn_scores, scorecfg = load_feat_scores(qreq_, qaids)
            score_group.append((tp_scores, tn_scores, scorecfg))
        grouped_scores.append(score_group)

    cfgx2_shortlbl = testres.get_short_cfglbls(join_acfgs=join_acfgs)
    for score_group, lbl in zip(grouped_scores, cfgx2_shortlbl):
        tp_scores = np.hstack(ut.take_column(score_group, 0))
        tn_scores = np.hstack(ut.take_column(score_group, 1))
        scorecfg = '+++'.join(ut.unique(ut.take_column(score_group, 2)))
        score_group
        # TODO: learn this score normalizer as a model
        # encoder = vt.ScoreNormalizer(adjust=4, monotonize=False)
        encoder = vt.ScoreNormalizer(adjust=2, monotonize=True)
        encoder.fit_partitioned(tp_scores, tn_scores, verbose=False)
        figtitle = 'Feature Scores: %s, %s' % (scorecfg, lbl)
        fnum = None

        vizkw = {}
        sephack = ut.get_argflag('--sephack')
        if not sephack:
            vizkw['target_tpr'] = .95
            vizkw['score_range'] = (0, 1.0)

        encoder.visualize(
            figtitle=figtitle,
            fnum=fnum,
            with_scores=False,
            #with_prebayes=True,
            with_prebayes=False,
            with_roc=True,
            with_postbayes=False,
            #with_postbayes=True,
            **vizkw)
        icon = testres.ibs.get_database_icon()
        if icon is not None:
            pt.overlay_icon(icon, coords=(1, 0), bbox_alignment=(1, 0))

        if ut.get_argflag('--contextadjust'):
            pt.adjust_subplots(left=.1, bottom=.25, wspace=.2, hspace=.2)
            pt.adjust_subplots(use_argv=True)
    return encoder
Example #17
0
    def make_graph(infr, show=False):
        import networkx as nx
        import itertools
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        thresh = infr.choose_thresh()

        # Simply cut any edge with a weight less than a threshold
        qaid_list = [cm.qaid for cm in cm_list]
        postcut = prob_names > thresh
        qxs, nxs = np.where(postcut)
        if False:
            kw = dict(precision=2, max_line_width=140, suppress_small=True)
            print(
                ut.hz_str('prob_names = ', ut.array2string2((prob_names),
                                                            **kw)))
            print(
                ut.hz_str('postcut = ',
                          ut.array2string2((postcut).astype(np.int), **kw)))
        matching_qaids = ut.take(qaid_list, qxs)
        matched_nids = ut.take(unique_nids, nxs)

        qreq_ = infr.qreq_

        nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist())
        if not hasattr(qreq_, 'dnids'):
            qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids)
            qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids)
        dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids)
        grouped_aids = dnid2_daids.values()
        matched_daids = ut.take(dnid2_daids, matched_nids)
        name_cliques = [
            list(itertools.combinations(aids, 2)) for aids in grouped_aids
        ]
        aid_matches = [
            list(ut.product([qaid], daids))
            for qaid, daids in zip(matching_qaids, matched_daids)
        ]

        graph = nx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(ut.flatten(name_cliques))
        graph.add_edges_from(ut.flatten(aid_matches))

        #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list)))
        name_nodes = [('nid', l) for l in qreq_.dnids]
        db_aid_nid_edges = list(zip(qreq_.daids, name_nodes))
        #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids]))
        #G = nx.Graph()
        #G.add_nodes_from(matchless_quries)
        #G.add_edges_from(db_aid_nid_edges)
        #G.add_edges_from(query_aid_nid_edges)

        graph.add_edges_from(db_aid_nid_edges)

        if infr.user_feedback is not None:
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            p_bg = 0.0
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
            for aid1, aid2, p_same in zip(user_feedback['aid1'],
                                          user_feedback['aid2'], p_same_list):
                if p_same > .5:
                    if not graph.has_edge(aid1, aid2):
                        graph.add_edge(aid1, aid2)
                else:
                    if graph.has_edge(aid1, aid2):
                        graph.remove_edge(aid1, aid2)
        if show:
            import plottool as pt
            nx.set_node_attributes(graph, 'color',
                                   {aid: pt.LIGHT_PINK
                                    for aid in qreq_.daids})
            nx.set_node_attributes(graph, 'color',
                                   {aid: pt.TRUE_BLUE
                                    for aid in qreq_.qaids})
            nx.set_node_attributes(
                graph, 'color', {
                    aid: pt.LIGHT_PURPLE
                    for aid in np.intersect1d(qreq_.qaids, qreq_.daids)
                })
            nx.set_node_attributes(
                graph, 'label',
                {node: 'n%r' % (node[1], )
                 for node in name_nodes})
            nx.set_node_attributes(
                graph, 'color', {node: pt.LIGHT_GREEN
                                 for node in name_nodes})
        if show:
            import plottool as pt
            pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False)
        return graph
def show_top_featmatches(qreq_, cm_list):
    """
    Args:
        qreq_ (ibeis.QueryRequest):  query request object with hyper-parameters
        cm_list (list):

    SeeAlso:
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True,lnbnn_normalizer=normlnbnn-test -a default --sephack

        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=True -a timectrl --sephack
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True -a default:size=30 --sephack
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=5,lnbnn_on=True -a default:size=30 --sephack
        python -m ibeis --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=3,lnbnn_on=True -a default --sephack


    CommandLine:
        python -m ibeis.viz.viz_nearest_descriptors --exec-show_top_featmatches --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.viz.viz_nearest_descriptors import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist(defaultdb='PZ_MTEST',
        >>>                                        a=['default:has_none=mother,size=30'])
        >>> show_top_featmatches(qreq_, cm_list)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    # for cm in cm_list:
    #     cm.score_csum(qreq_)
    import numpy as np
    import vtool as vt
    from functools import partial
    # Stack chipmatches
    ibs = qreq_.ibs
    infos = [cm.get_flat_fm_info() for cm in cm_list]
    flat_metadata = dict([(k, np.concatenate(v))
                          for k, v in ut.dict_stack2(infos).items()])
    fsv_flat = flat_metadata['fsv']
    flat_metadata['fs'] = fsv_flat.prod(axis=1)
    aids1 = flat_metadata['aid1'][:, None]
    aids2 = flat_metadata['aid2'][:, None]
    flat_metadata['aid_pairs'] = np.concatenate([aids1, aids2], axis=1)

    # Take sample of metadata
    sortx = flat_metadata['fs'].argsort()[::-1]
    num = len(cm_list) * 3
    # num = 10
    taker = partial(np.take, indices=sortx[:num], axis=0)
    flat_metadata_top = ut.map_dict_vals(taker, flat_metadata)
    aid1s, aid2s, fms = ut.dict_take(flat_metadata_top, ['aid1', 'aid2', 'fm'])

    annots = {}
    aids = np.unique(np.hstack((aid1s, aid2s)))
    annots = {aid: ibs.get_annot_lazy_dict(aid, config2_=qreq_.qparams)
              for aid in aids}

    label_lists = ibs.get_aidpair_truths(aid1s, aid2s) == ibs.const.TRUTH_MATCH
    patch_size = 64

    def extract_patches(annots, aid, fxs):
        """ custom_func(lazydict, key, subkeys) for multigroup_lookup """
        annot = annots[aid]
        kpts = annot['kpts']
        rchip = annot['rchip']
        kpts_m = kpts.take(fxs, axis=0)
        warped_patches, warped_subkpts = vt.get_warped_patches(rchip, kpts_m,
                                                               patch_size=patch_size)
        return warped_patches

    data_lists = vt.multigroup_lookup(annots, [aid1s, aid2s], fms.T, extract_patches)

    import plottool as pt
    pt.ensure_pylab_qt4()
    import ibeis_cnn
    inter = ibeis_cnn.draw_results.interact_patches(
        label_lists, data_lists, flat_metadata_top, chunck_sizes=(2, 4),
        ibs=ibs, hack_one_per_aid=False, sortby='fs', qreq_=qreq_)
    inter.show()
Example #19
0
def get_dbinfo(
    ibs,
    verbose=True,
    with_imgsize=False,
    with_bytes=False,
    with_contrib=False,
    with_agesex=False,
    with_header=True,
    short=False,
    tag='dbinfo',
    aid_list=None,
    aids=None,
):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    SeeAlso:
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all

    CommandLine:
        python -m wbia.other.dbinfo --exec-get_dbinfo:0
        python -m wbia.other.dbinfo --test-get_dbinfo:1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = wbia.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from wbia.expt import cfghelpers
        >>> #from wbia.expt import annotation_configs
        >>> #from wbia.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> verbose = True
        >>> short = True
        >>> #ibs = wbia.opendb(db='GZ_ALL')
        >>> #ibs = wbia.opendb(db='PZ_Master0')
        >>> ibs = wbia.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = wbia.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...
    if aids is not None:
        aid_list = aids

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from wbia.expt import experiment_helpers

            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list
            )
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            # aid_list =
        if verbose:
            logger.info('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False))
            - {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    # associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    valid_images = ibs.images(valid_gids)
    valid_annots = ibs.annots(valid_aids)

    # Image info
    if verbose:
        logger.info('Checking Image Info')
    gx2_aids = valid_images.aids
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats = ut.repr4(
        ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True
    )
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        logger.info('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    if False:
        # Occurrence Info
        def compute_annot_occurrence_ids(ibs, aid_list):
            from wbia.algo.preproc import preproc_occurrence

            gid_list = ibs.get_annot_gids(aid_list)
            gid2_aids = ut.group_items(aid_list, gid_list)
            config = {'seconds_thresh': 4 * 60 * 60}
            flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences(
                ibs, gid_list, config=config, verbose=False
            )
            occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
            occurid2_aids = {
                oid: ut.flatten(ut.take(gid2_aids, gids))
                for oid, gids in occurid2_gids.items()
            }
            return occurid2_aids

        import utool

        with utool.embed_on_exception_context:
            occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
            occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
            occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
            nid2_occurxs = ut.ddict(list)
            for occurx, nids in enumerate(occur_unique_nids):
                for nid in nids:
                    nid2_occurxs[nid].append(occurx)

        nid2_occurx_single = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1
        }
        nid2_occurx_resight = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1
        }
        singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

        singlesight_annot_stats = ut.get_stats(
            list(map(len, singlesight_encounters)), use_median=True, use_sum=True
        )
        resight_name_stats = ut.get_stats(
            list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True
        )

    # Encounter Info
    def break_annots_into_encounters(aids):
        from wbia.algo.preproc import occurrence_blackbox
        import datetime

        thresh_sec = datetime.timedelta(minutes=30).seconds
        posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids))
        # latlons = ibs.get_annot_image_gps(aids)
        labels = occurrence_blackbox.cluster_timespace2(
            posixtimes, None, thresh_sec=thresh_sec
        )
        return labels
        # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()]
        # ut.square_pdist(ave_enc_time)

    try:
        am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[
            0
        ]
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids)
        undirected_tags = ibs.get_aidpair_tags(
            aid_pairs.T[0], aid_pairs.T[1], directed=False
        )
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)
    except Exception:
        pair_tag_info = {}

    # logger.info(ut.repr2(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        logger.info('Checking Annot Species')
    unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots))
    species_list = valid_annots.species_texts
    species2_annots = valid_annots.group_items(valid_annots.species_texts)
    species2_nAids = {key: len(val) for key, val in species2_annots.items()}

    if verbose:
        logger.info('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        logger.info('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            logger.info('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)

        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = collections.OrderedDict(
                [
                    ('max', wh_list.max(0)),
                    ('min', wh_list.min(0)),
                    ('mean', wh_list.mean(0)),
                    ('std', wh_list.std(0)),
                ]
            )

            def arr2str(var):
                return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']'

            ret = ',\n    '.join(
                ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()]
            )
            return '{\n    ' + ret + '\n}'

        logger.info('reading image sizes')
        # Image size stats
        img_size_list = ibs.get_image_sizes(valid_gids)
        img_size_stats = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        logger.info('Building Stats String')

    multiton_stats = ut.repr3(
        ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True
    )

    # Time stats
    unixtime_list = valid_images.unixtime2
    # valid_unixtime_list = [time for time in unixtime_list if time != -1]
    # unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda: 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if max_age is None:
                max_age = min_age
            if min_age is None:
                min_age = max_age
            if max_age is None and min_age is None:
                logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,))
                age_dict['UNKNOWN'] += 1
            elif (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (max_age is None or 36 <= max_age):
                age_dict['Adult'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(
            set(annot_sextext_list) - set(sex_keys)
        )
        sextext2_nAnnots = ut.odict(
            [(key, len(sextext2_aids.get(key, []))) for key in sex_keys]
        )
        # Filter 0's
        sextext2_nAnnots = {
            key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0
        }
        return sextext2_nAnnots

    def get_annot_qual_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        qualtext2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.quality_texts)),
            list(ibs.const.QUALITY_TEXT_TO_INT.keys()),
        )
        return qualtext2_nAnnots

    def get_annot_viewpoint_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        viewcode2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.viewpoint_code)),
            list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None],
        )
        return viewcode2_nAnnots

    if verbose:
        logger.info('Checking Other Annot Stats')

    qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids)
    viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        logger.info('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]

    image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags)
    contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags)

    contributor_tag_to_qualstats = {
        key: get_annot_qual_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }
    contributor_tag_to_viewstats = {
        key: get_annot_viewpoint_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }

    contributor_tag_to_nImages = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_gids)
    }
    contributor_tag_to_nAnnots = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_aids)
    }

    if verbose:
        logger.info('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton = len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_annots)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            logger.info('Checking Disk Space')
        ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            logger.info('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_annots)
            _num_names_total_check = (
                num_names_singleton + num_names_unassociated + num_names_multiton
            )
            _num_annots_total_check = (
                num_unknown_annots + num_singleton_annots + num_multiton_annots
            )
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            # if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(
                ex,
                keys=[
                    '_num_names_total_check',
                    'num_names',
                    '_num_annots_total_check',
                    'num_annots',
                    'num_names_singleton',
                    'num_names_multiton',
                    'num_unknown_annots',
                    'num_multiton_annots',
                    'num_singleton_annots',
                ],
            )
            raise

    # Get contributor statistics
    contributor_rowids = ibs.get_valid_contributor_rowids()
    num_contributors = len(contributor_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.repr2(dict_, si=True)
        return align2(str_)

    header_block_lines = [('+============================')] + (
        [
            ('+ singleton := single sighting'),
            ('+ multiton  := multiple sightings'),
            ('--' * num_tabs),
        ]
        if not short and with_header
        else []
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = (
        [
            ('--' * num_tabs),
            ('DB Bytes: '),
            ('     +- dbdir nBytes:         ' + dbdir_space),
            ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
            ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
            ('     |  |  +-cachedir nBytes: ' + cachedir_space),
        ]
        if with_bytes
        else []
    )

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = (
        [
            ('--' * num_tabs),
            ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
            ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
            ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
        ]
        if not short
        else []
    )

    occurrence_block_lines = (
        [
            ('--' * num_tabs),
            # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
            # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
            ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
        ]
        if not short
        else []
    )

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = (
        [
            '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
            '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
        ]
        if not short and with_agesex
        else []
    )

    contributor_block_lines = (
        [
            '# Images per contributor       = ' + align_dict2(contributor_tag_to_nImages),
            '# Annots per contributor       = ' + align_dict2(contributor_tag_to_nAnnots),
            '# Quality per contributor      = '
            + ut.repr2(contributor_tag_to_qualstats, sorted_=True),
            '# Viewpoint per contributor    = '
            + ut.repr2(contributor_tag_to_viewstats, sorted_=True),
        ]
        if with_contrib
        else []
    )

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None
        if short
        else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        # ('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None
        if short
        else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines
        + bytes_block_lines
        + source_block_lines
        + name_block_lines
        + annot_block_lines
        + annot_per_basic_block_lines
        + occurrence_block_lines
        + annot_per_qualview_block_lines
        + annot_per_agesex_block_lines
        + img_block_lines
        + contributor_block_lines
        + imgsize_stat_lines
        + [('L============================')]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        logger.info(info_str2)
    locals_ = locals()
    return locals_
Example #20
0
def get_annotmatch_subgraph(ibs):
    r"""
    http://bokeh.pydata.org/en/latest/
    https://github.com/jsexauer/networkx_viewer

    TODO: Need a special visualization
        In the web I need:
            * graph of annotations matches.
            * can move them around.
            * edit lines between them.
            * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool

            This should  share functionality with a name view.

    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show

        # Networkx example
        python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show

    Ignore:

        from ibeis import viz

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> result = get_annotmatch_subgraph(ibs)
        >>> ut.show_if_requested()
    """
    #import ibeis
    #ibs = ibeis.opendb(db='PZ_MTEST')
    #rowids = ibs._get_all_annotmatch_rowids()
    #aids1 = ibs.get_annotmatch_aid1(rowids)
    #aids2 = ibs.get_annotmatch_aid2(rowids)
    #
    #
    nids = ibs.get_valid_nids()
    nids = nids[0:5]
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    # Enumerate annotmatch properties
    rng = np.random.RandomState(0)
    edge_props = {
        'weight': rng.rand(len(aids1)),
        'reviewer_confidence': rng.rand(len(aids1)),
        'algo_confidence': rng.rand(len(aids1)),
    }

    # Remove data that does not need to be visualized
    # (dont show all the aids if you dont have to)
    thresh = .5
    flags = edge_props['weight'] > thresh
    aids1_ = ut.compress(aids1, flags)
    aids2_ = ut.compress(aids2, flags)
    chosen_props = ut.dict_subset(edge_props, ['weight'])
    edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags),
                                  chosen_props)

    edge_keys = list(edge_props.keys())
    edge_vals = ut.dict_take(edge_props, edge_keys)
    edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)]

    unique_aids = list(set(aids1_ + aids2_))
    # Make a graph between the chips
    nodes = unique_aids
    edges = list(zip(aids1_, aids2_, edge_attr_list))
    import networkx as nx
    graph = nx.DiGraph()
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)
    from ibeis.viz import viz_graph
    fnum = None
    #zoom = kwargs.get('zoom', .4)
    viz_graph.viz_netx_chipgraph(ibs,
                                 graph,
                                 fnum=fnum,
                                 with_images=True,
                                 augment_graph=False)
Example #21
0
def update_bindings():
    r"""
    Returns:
        dict: matchtups

    CommandLine:
        python ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings
        utprof.py ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings

    Example:
        >>> # DISABLE_DOCTEST
        >>> from autogen_bindings import *  # NOQA
        >>> import sys
        >>> import utool as ut
        >>> sys.path.append(ut.truepath('~/local/build_scripts/flannscripts'))
        >>> matchtups = update_bindings()
        >>> result = ('matchtups = %s' % (ut.repr2(matchtups),))
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    from os.path import basename
    import difflib
    import numpy as np
    import re
    binding_names = [
        'build_index',
        'used_memory',
        'add_points',
        'remove_point',
        'compute_cluster_centers',
        'load_index',
        'save_index',
        'find_nearest_neighbors',
        'radius_search',
        'remove_points',
        'free_index',
        'find_nearest_neighbors_index',

        # 'size',
        # 'veclen',
        # 'get_point',
        # 'flann_get_distance_order',
        # 'flann_get_distance_type',
        # 'flann_log_verbosity',

        # 'clean_removed_points',
    ]

    _places = [
        '~/code/flann/src/cpp/flann/flann.cpp',
        '~/code/flann/src/cpp/flann/flann.h',
        '~/code/flann/src/python/pyflann/flann_ctypes.py',
        '~/code/flann/src/python/pyflann/index.py',
    ]

    eof_sentinals = {
        # 'flann_ctypes.py': '# END DEFINE BINDINGS',
        'flann_ctypes.py': 'def ensure_2d_array(arr',
        # 'flann.h': '// END DEFINE BINDINGS',
        'flann.h': '#ifdef __cplusplus',
        'flann.cpp': None,
        'index.py': None,
    }
    block_sentinals = {
        'flann.h': re.escape('/**'),
        'flann.cpp': 'template *<typename Distance>',
        # 'flann_ctypes.py': '\n',
        'flann_ctypes.py': 'flann\.[a-z_.]* =',
        # 'index.py': '    def .*',
        'index.py': '    [^ ].*',
    }
    places = {
        basename(fpath): fpath
        for fpath in ut.lmap(ut.truepath, _places)
    }
    text_dict = ut.map_dict_vals(ut.readfrom, places)
    lines_dict = {key: val.split('\n') for key, val in text_dict.items()}
    orig_texts = text_dict.copy()  # NOQA
    binding_defs = {}
    named_blocks = {}

    print('binding_names = %r' % (binding_names, ))
    for binding_name in binding_names:
        blocks, defs = autogen_parts(binding_name)
        binding_defs[binding_name] = defs
        named_blocks[binding_name] = blocks

    for binding_name in ut.ProgIter(binding_names):
        ut.colorprint('+--- GENERATE BINDING %s -----' % (binding_name, ),
                      'yellow')
        blocks_dict = named_blocks[binding_name]
        for key in places.keys():
            ut.colorprint(
                '---- generating %s for %s -----' % (
                    binding_name,
                    key,
                ), 'yellow')
            # key = 'flann_ctypes.py'
            # print(text_dict[key])
            old_text = text_dict[key]
            line_list = lines_dict[key]
            #text = old_text
            block = blocks_dict[key]

            debug = ut.get_argflag('--debug')
            # debug = True
            # if debug:
            #     print(ut.highlight_code(block, splitext(key)[1]))

            # Find a place in the code that already exists

            searchblock = block
            if key.endswith('.cpp') or key.endswith('.h'):
                searchblock = re.sub(ut.REGEX_C_COMMENT,
                                     '',
                                     searchblock,
                                     flags=re.MULTILINE | re.DOTALL)
            searchblock = '\n'.join(searchblock.splitlines()[0:3])

            # @ut.cached_func(verbose=False)
            def cached_match(old_text, searchblock):
                def isjunk(x):
                    return False
                    return x in ' \t,*()'

                def isjunk2(x):
                    return x in ' \t,*()'

                # Not sure why the first one just doesnt find it
                # isjunk = None
                sm = difflib.SequenceMatcher(isjunk,
                                             old_text,
                                             searchblock,
                                             autojunk=False)
                sm0 = difflib.SequenceMatcher(isjunk,
                                              old_text,
                                              searchblock,
                                              autojunk=True)
                sm1 = difflib.SequenceMatcher(isjunk2,
                                              old_text,
                                              searchblock,
                                              autojunk=False)
                sm2 = difflib.SequenceMatcher(isjunk2,
                                              old_text,
                                              searchblock,
                                              autojunk=True)
                matchtups = (sm.get_matching_blocks() +
                             sm0.get_matching_blocks() +
                             sm1.get_matching_blocks() +
                             sm2.get_matching_blocks())
                return matchtups

            matchtups = cached_match(old_text, searchblock)
            # Find a reasonable match in matchtups

            found = False
            if debug:
                # print('searchblock =\n%s' % (searchblock,))
                print('searchblock = %r' % (searchblock, ))
            for (a, b, size) in matchtups:
                matchtext = old_text[a:a + size]
                pybind = binding_defs[binding_name]['py_binding_name']
                if re.search(binding_name + '\\b', matchtext) or re.search(
                        pybind + '\\b', matchtext):
                    found = True
                    pos = a + size
                    if debug:
                        print('MATCHING TEXT')
                        print(matchtext)
                    break
                else:
                    if debug and 0:
                        print('Not matching')
                        print('matchtext = %r' % (matchtext, ))
                        matchtext2 = old_text[a - 10:a + size + 20]
                        print('matchtext2 = %r' % (matchtext2, ))

            if found:
                linelens = np.array(ut.lmap(len, line_list)) + 1
                sumlen = np.cumsum(linelens)
                row = np.where(sumlen < pos)[0][-1] + 1
                #print(line_list[row])
                # Search for extents of the block to overwrite
                block_sentinal = block_sentinals[key]
                row1 = ut.find_block_end(row, line_list, block_sentinal,
                                         -1) - 1
                row2 = ut.find_block_end(row + 1, line_list, block_sentinal,
                                         +1)
                eof_sentinal = eof_sentinals[key]
                if eof_sentinal is not None:
                    print('eof_sentinal = %r' % (eof_sentinal, ))
                    row2 = min([
                        count for count, line in enumerate(line_list)
                        if line.startswith(eof_sentinal)
                    ][-1], row2)
                nr = len((block + '\n\n').splitlines())
                new_line_list = ut.insert_block_between_lines(
                    block + '\n', row1, row2, line_list)
                rtext1 = '\n'.join(line_list[row1:row2])
                rtext2 = '\n'.join(new_line_list[row1:row1 + nr])
                if debug:
                    print('-----')
                    ut.colorprint('FOUND AND REPLACING %s' % (binding_name, ),
                                  'yellow')
                    print(ut.highlight_code(rtext1))
                if debug:
                    print('-----')
                    ut.colorprint(
                        'FOUND AND REPLACED WITH %s' % (binding_name, ),
                        'yellow')
                    print(ut.highlight_code(rtext2))
                if not ut.get_argflag('--diff') and not debug:
                    print(
                        ut.color_diff_text(
                            ut.difftext(rtext1,
                                        rtext2,
                                        num_context_lines=7,
                                        ignore_whitespace=True)))
            else:
                # Append to end of the file
                eof_sentinal = eof_sentinals[key]
                if eof_sentinal is None:
                    row2 = len(line_list) - 1
                else:
                    row2_choice = [
                        count for count, line in enumerate(line_list)
                        if line.startswith(eof_sentinal)
                    ]
                    if len(row2_choice) == 0:
                        row2 = len(line_list) - 1
                        assert False
                    else:
                        row2 = row2_choice[-1] - 1

                # row1 = row2 - 1
                # row2 = row2 - 1
                row1 = row2

                new_line_list = ut.insert_block_between_lines(
                    block + '\n', row1, row2, line_list)
                # block + '\n\n\n', row1, row2, line_list)

                rtext1 = '\n'.join(line_list[row1:row2])
                nr = len((block + '\n\n').splitlines())
                rtext2 = '\n'.join(new_line_list[row1:row1 + nr])

                if debug:
                    print('-----')
                    ut.colorprint(
                        'NOT FOUND AND REPLACING %s' % (binding_name, ),
                        'yellow')
                    print(ut.highlight_code(rtext1))
                if debug:
                    print('-----')
                    ut.colorprint(
                        'NOT FOUND AND REPLACED WITH %s' % (binding_name, ),
                        'yellow')
                    print(ut.highlight_code(rtext2))

                if not ut.get_argflag('--diff') and not debug:
                    print(
                        ut.color_diff_text(
                            ut.difftext(rtext1,
                                        rtext2,
                                        num_context_lines=7,
                                        ignore_whitespace=True)))
            text_dict[key] = '\n'.join(new_line_list)
            lines_dict[key] = new_line_list
        ut.colorprint('L___  GENERATED BINDING %s ___' % (binding_name, ),
                      'yellow')

    for key in places:
        new_text = '\n'.join(lines_dict[key])
        #ut.writeto(ut.augpath(places[key], '.new'), new_text)
        ut.writeto(ut.augpath(places[key]), new_text)

    for key in places:
        if ut.get_argflag('--diff'):
            difftext = ut.get_textdiff(orig_texts[key],
                                       new_text,
                                       num_context_lines=7,
                                       ignore_whitespace=True)
            difftext = ut.color_diff_text(difftext)
            print(difftext)
Example #22
0
    def make_inference(infr):
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        cluster_tuples = infr.make_clusters()

        # Make pair list for output
        if infr.user_feedback is not None:
            keys = list(zip(infr.user_feedback['aid1'], infr.user_feedback['aid2']))
            feedback_lookup = ut.make_index_lookup(keys)
            user_feedback = infr.user_feedback
            p_bg = 0
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
        else:
            feedback_lookup = {}
        infr.user_feedback
        needs_review_list = []
        num_top = 4
        for cm, row in zip(cm_list, prob_names):
            # Find top scoring names for this chip match in the posterior distribution
            idxs = row.argsort()[::-1]
            top_idxs = idxs[:num_top]
            nids = ut.take(unique_nids, top_idxs)
            # Find the matched annotations in the pairwise prior distributions
            nidxs = ut.dict_take(cm.nid2_nidx, nids, None)
            name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs))
            daids_list = ut.take(cm.daid_list, name_groupxs)
            for daids in daids_list:
                ut.take(cm.score_list, ut.take(cm.daid2_idx, daids))
                scores_all = cm.annot_score_list / cm.annot_score_list.sum()
                idxs = ut.take(cm.daid2_idx, daids)
                scores = scores_all.take(idxs)
                raw_scores = cm.score_list.take(idxs)
                scorex = scores.argmax()
                raw_score = raw_scores[scorex]
                daid = daids[scorex]
                import scipy.special
                # SUPER HACK: these are not probabilities
                # TODO: set a and b based on dbsize and param configuration
                # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)"
                #a = 2.0
                a = 1.5
                b = 2
                p_same = scipy.special.expit(b * raw_score - a)
                #confidence = scores[scorex]
                #p_diff = 1 - p_same
                #decision = 'same' if confidence > thresh else 'diff'
                #confidence = p_same if confidence > thresh else p_diff
                #tup = (cm.qaid, daid, decision, confidence, raw_score)
                confidence = (2 * np.abs(0.5 - p_same)) ** 2
                #if infr.user_feedback is not None:
                #    import utool
                #    utool.embed(
                key = (cm.qaid, daid)
                fb_idx = feedback_lookup.get(key)
                if fb_idx is not None:
                    confidence = p_same_list[fb_idx]
                tup = (cm.qaid, daid, p_same, confidence, raw_score)
                needs_review_list.append(tup)

        # Sort resulting list by confidence
        sortx = ut.argsort(ut.take_column(needs_review_list, 3))
        needs_review_list = ut.take(needs_review_list, sortx)

        infr.needs_review_list = needs_review_list
        infr.cluster_tuples = cluster_tuples
Example #23
0
def analyize_multiple_drives(drives):
    """
    CommandLine:
        export PYTHONPATH=$PYTHONPATH:~/local/scripts

        python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/

        python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store
        python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/
        /media/joncrall/backup

        cd ~/local/scripts

    Example:
        >>> from register_files import *  # NOQA
        >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/'])
        >>> drives = [Drive(root_dpath) for root_dpath in dpaths]
        >>> drive = Broadcaster(drives)
        >>> drive.compute_info()
        >>> #drive.build_fpath_hashes()
        >>> drive.check_consistency()
        >>> E = drive = drives[0]
        >>> analyize_multiple_drives(drives)
        >>> #D, E, F = drives
        >>> #drive = D
    """
    # -----
    ## Find the files shared on all disks
    #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives])
    #print('#allhave = %r' % (len(allhave),))
    #allhave.keys()[0:3]
    #allhave.values()[0:3]
    #ut.embed()
    #for drive in drives:
    #drive.rrr()
    #print(drive.root_dpath)
    #print(len(drive.hash_to_unique_fpaths))
    #print(len(drive.hash_to_fpaths))
    #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths))

    # Build dict to map from dpath to file pointers of unique descendants
    #unique_fidxs_list = drive.hash_to_fidxs.values()
    #fidxs = ut.flatten(unique_fidxs_list)

    esc = re.escape

    # Find which files exist on all drives
    hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives]
    allhave_hashes = reduce(set.intersection, hashes_list)
    print('Drives %r have %d file hashes in common' %
          (drives, len(allhave_hashes)))

    lbls = [drive_.root_dpath for drive_ in drives]
    isect_lens = np.zeros((len(drives), len(drives)))
    for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)):
        for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)):
            if drive1 is not drive2:
                common = set.intersection(hashes1, hashes2)
                isect_lens[idx1, idx2] = len(common)
            else:
                isect_lens[idx1, idx2] = len(hashes2)
    import pandas as pd
    print(pd.DataFrame(isect_lens, index=lbls, columns=lbls))

    # for drive in drives
    drive = drives[0]
    print('Finding unique files in drive=%r' % (drive, ))
    # Get subset of fidxs on this drive
    unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes)
    valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs))

    # Filter fpaths by patterns
    ignore_patterns = [esc('Thumbs.db')]
    ignore_paths = ['Spotify']
    patterns = ignore_paths + ignore_patterns
    valid_fpaths = ut.take(drive.fpath_list, valid_fidxs)
    valid_flags = [
        not any([re.search(p, fpath) for p in patterns])
        for fpath in valid_fpaths
    ]
    valid_flags = np.array(valid_flags)
    valid_fidxs = ut.compress(valid_fidxs, valid_flags)

    print(ut.filtered_infostr(valid_flags, 'invalid fpaths'))

    fidxs = valid_fidxs
    valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs))

    dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs,
                                               drive.root_dpath)

    def make_tree_structure(valid_fpaths):
        root = {}

        def dict_getitem_default(dict_, key, type_):
            try:
                val = dict_[key]
            except KeyError:
                val = type_()
                dict_[key] = val
            return val

        for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000):
            path_components = ut.dirsplit(fpath)
            current = root
            for comp in path_components[:-1]:
                current = dict_getitem_default(current, comp, dict)
            contents = dict_getitem_default(current, '.', list)
            contents.append(path_components[-1])
        return root

    root = make_tree_structure(valid_fpaths)

    def print_tree(root,
                   path,
                   dpath_to_unique_fidx=dpath_to_unique_fidx,
                   drive=drive,
                   depth=None):
        print('path = %r' % (path, ))
        print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])))
        path_components = ut.dirsplit(path)
        # Navigate to correct spot in tree
        current = root
        for c in path_components:
            current = current[c]
        print(ut.repr3(current, truncate=1))

    def get_tree_info(root,
                      path,
                      dpath_to_unique_fidx=dpath_to_unique_fidx,
                      drive=drive,
                      depth=0):
        path_components = ut.dirsplit(path)
        current = root
        for c in path_components:
            current = current[c]
        if isinstance(current, list):
            tree_tmp = []
        else:
            key_list = list(current.keys())
            child_list = [join(path, key) for key in key_list]
            dpath_nbytes_list = [
                drive.get_total_nbytes(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            nfiles_list = [
                len(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            tree_tmp = sorted([
                (key, ut.byte_str2(nbytes), nfiles) if depth == 0 else
                (key, ut.byte_str2(nbytes), nfiles,
                 get_tree_info(root,
                               path=child,
                               dpath_to_unique_fidx=dpath_to_unique_fidx,
                               drive=drive,
                               depth=depth - 1))
                for key, child, nbytes, nfiles in zip(
                    key_list, child_list, dpath_nbytes_list, nfiles_list)
            ])
        return tree_tmp

    def print_tree_struct(*args, **kwargs):
        tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1)))
        print(tree_str)
        #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))
        #print('path = %r, %s' % (path, bytes_str))
        #print(ut.repr3(key_list))
        return tree_str

    dpath_to_unique_fidx
    dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx)
    complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs,
                                           dpath_to_unique_fidx)
    complete_root = make_tree_structure(complete_unique_dpaths.keys())

    globals()['ut'] = ut
    globals()['os'] = os
    globals()['join'] = join

    print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\'])))
    get_tree_info(root, path='E:\\', depth=0)

    get_tree_info(complete_root, path='E:\\', depth=0)

    get_tree_info(root, path='E:\\', depth=1)
    print(print_tree_struct(root, path='E:\\Clutter', depth=0))
    print_tree(root, path=r'E:\TV')
    print_tree(root, path=r'E:\Movies')
    print_tree(root, path=r'E:\Boot')

    print_tree(root, path='E:\\')
    print_tree(root, path=r'E:\Downloaded')
    print_tree(root, path=r'E:\Recordings')
    print_tree(root, path=r'E:\Clutter')
    print_tree(root, path=r'E:\Audio Books')

    # TODO:
    # * Ignore list
    # * Find and rectify internal duplicates
    # * Update registry with new files and deleted ones
    # * Ensure that all unique files are backed up
    # Index the C: Drive as well.
    # * Lazy properties of drive
    # * Multiple types of identifiers (hash, fname, ext, fsize)
    # Drive subsets
    # Export/Import Drive for analysis on other machines

    ut.embed()
Example #24
0
def get_injured_sharks():
    """
    >>> from wbia.scripts.getshark import *  # NOQA
    """
    import requests

    url = 'http://www.whaleshark.org/getKeywordImages.jsp'
    resp = requests.get(url)
    assert resp.status_code == 200
    keywords = resp.json()['keywords']
    key_list = ut.take_column(keywords, 'indexName')
    key_to_nice = {k['indexName']: k['readableName'] for k in keywords}

    injury_patterns = [
        'injury',
        'net',
        'hook',
        'trunc',
        'damage',
        'scar',
        'nicks',
        'bite',
    ]

    injury_keys = [
        key for key in key_list if any([pat in key for pat in injury_patterns])
    ]
    noninjury_keys = ut.setdiff(key_list, injury_keys)
    injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys)  # NOQA
    noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys)  # NOQA
    key_list = injury_keys

    keyed_images = {}
    for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
        key_url = url + '?indexName={indexName}'.format(indexName=key)
        key_resp = requests.get(key_url)
        assert key_resp.status_code == 200
        key_imgs = key_resp.json()['images']
        keyed_images[key] = key_imgs

    key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
    key_hist = ut.sort_dict(key_hist, 'vals')
    logger.info(ut.repr3(key_hist))
    nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
    nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
    logger.info(ut.repr3(nice_key_hist))

    key_to_urls = {
        key: ut.take_column(vals, 'url')
        for key, vals in keyed_images.items()
    }
    overlaps = {}
    import itertools

    overlap_img_list = []
    for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
        overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
        num_overlap = len(overlap_imgs)
        overlaps[(k1, k2)] = num_overlap
        overlaps[(k1, k1)] = len(key_to_urls[k1])
        if num_overlap > 0:
            # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
            overlap_img_list.extend(overlap_imgs)

    all_img_urls = list(set(ut.flatten(key_to_urls.values())))
    num_all = len(all_img_urls)  # NOQA
    logger.info('num_all = %r' % (num_all, ))

    # Determine super-categories
    categories = ['nicks', 'scar', 'trunc']

    # Force these keys into these categories
    key_to_cat = {'scarbite': 'other_injury'}

    cat_to_keys = ut.ddict(list)

    for key in key_to_urls.keys():
        flag = 1
        if key in key_to_cat:
            cat = key_to_cat[key]
            cat_to_keys[cat].append(key)
            continue
        for cat in categories:
            if cat in key:
                cat_to_keys[cat].append(key)
                flag = 0
        if flag:
            cat = 'other_injury'
            cat_to_keys[cat].append(key)

    cat_urls = ut.ddict(list)
    for cat, keys in cat_to_keys.items():
        for key in keys:
            cat_urls[cat].extend(key_to_urls[key])

    cat_hist = {}
    for cat in list(cat_urls.keys()):
        cat_urls[cat] = list(set(cat_urls[cat]))
        cat_hist[cat] = len(cat_urls[cat])

    logger.info(ut.repr3(cat_to_keys))
    logger.info(ut.repr3(cat_hist))

    key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items()
                       for val in vals])

    # ingestset = {
    #    '__class__': 'ImageSet',
    #    'images': ut.ddict(dict)
    # }
    # for key, key_imgs in keyed_images.items():
    #    for imgdict in key_imgs:
    #        url = imgdict['url']
    #        encid = imgdict['correspondingEncounterNumber']
    #        # Make structure
    #        encdict = encounters[encid]
    #        encdict['__class__'] = 'Encounter'
    #        imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
    #        imgdict['__class__'] = 'Image'
    #        cat = key_to_cat[key]
    #        annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
    #        annotdict['__class__'] = 'Annotation'

    #        # Ensure structures exist
    #        encdict['images'] = encdict.get('images', [])
    #        imgdict['annots'] = imgdict.get('annots', [])

    #        # Add an image to this encounter
    #        encdict['images'].append(imgdict)
    #        # Add an annotation to this image
    #        imgdict['annots'].append(annotdict)

    # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
    # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
    # resp = requests.get(get_enc_url)
    # logger.info(ut.repr3(encdict))
    # logger.info(ut.repr3(encounters))

    # Download the files to the local disk
    # fpath_list =

    all_urls = ut.unique(
        ut.take_column(
            ut.flatten(
                ut.dict_subset(keyed_images,
                               ut.flatten(cat_to_keys.values())).values()),
            'url',
        ))

    dldir = ut.truepath('~/tmpsharks')
    from os.path import commonprefix, basename  # NOQA

    prefix = commonprefix(all_urls)
    suffix_list = [url_[len(prefix):] for url_ in all_urls]
    fname_list = [suffix.replace('/', '--') for suffix in suffix_list]

    fpath_list = []
    for url, fname in ut.ProgIter(zip(all_urls, fname_list),
                                  lbl='downloading imgs',
                                  freq=1):
        fpath = ut.grab_file_url(url,
                                 download_dir=dldir,
                                 fname=fname,
                                 verbose=False)
        fpath_list.append(fpath)

    # Make sure we keep orig info
    # url_to_keys = ut.ddict(list)
    url_to_info = ut.ddict(dict)
    for key, imgdict_list in keyed_images.items():
        for imgdict in imgdict_list:
            url = imgdict['url']
            info = url_to_info[url]
            for k, v in imgdict.items():
                info[k] = info.get(k, [])
                info[k].append(v)
            info['keys'] = info.get('keys', [])
            info['keys'].append(key)
            # url_to_keys[url].append(key)

    info_list = ut.take(url_to_info, all_urls)
    for info in info_list:
        if len(set(info['correspondingEncounterNumber'])) > 1:
            assert False, 'url with two different encounter nums'
    # Combine duplicate tags

    hashid_list = [
        ut.get_file_uuid(fpath_, stride=8)
        for fpath_ in ut.ProgIter(fpath_list, bs=True)
    ]
    groupxs = ut.group_indices(hashid_list)[1]

    # Group properties by duplicate images
    # groupxs = [g for g in groupxs if len(g) > 1]
    fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
    url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
    info_list_ = [
        ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
        for info_ in ut.apply_grouping(info_list, groupxs)
    ]

    encid_list_ = [
        ut.unique(info_['correspondingEncounterNumber'])[0]
        for info_ in info_list_
    ]
    keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
    cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]

    clist = ut.ColumnLists({
        'gpath': fpath_list_,
        'url': url_list_,
        'encid': encid_list_,
        'key': keys_list_,
        'cat': cats_list_,
    })

    # for info_ in ut.apply_grouping(info_list, groupxs):
    #    info = ut.dict_accum(*info_)
    #    info = ut.map_dict_vals(ut.flatten, info)
    #    x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
    #    if len(x) > 1:
    #        info = info.copy()
    #        del info['keys']
    #        logger.info(ut.repr3(info))

    flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
    clist = clist.compress(flags)

    import wbia

    ibs = wbia.opendb('WS_Injury', allow_newdir=True)

    gid_list = ibs.add_images(clist['gpath'])
    clist['gid'] = gid_list

    failed_flags = ut.flag_None_items(clist['gid'])
    logger.info('# failed %s' % (sum(failed_flags), ))
    passed_flags = ut.not_list(failed_flags)
    clist = clist.compress(passed_flags)
    ut.assert_all_not_None(clist['gid'])
    # ibs.get_image_uris_original(clist['gid'])
    ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)

    # ut.zipflat(clist['cat'], clist['key'])
    if False:
        # Can run detection instead
        clist['tags'] = ut.zipflat(clist['cat'])
        aid_list = ibs.use_images_as_annotations(clist['gid'],
                                                 adjust_percent=0.01,
                                                 tags_list=clist['tags'])
        aid_list

    import wbia.plottool as pt
    from wbia import core_annots

    pt.qt4ensure()
    # annots = ibs.annots()
    # aids = [1, 2]
    # ibs.depc_annot.get('hog', aids , 'hog')
    # ibs.depc_annot.get('chip', aids, 'img')
    for aid in ut.InteractiveIter(ibs.get_valid_aids()):
        hogs = ibs.depc_annot.d.get_hog_hog([aid])
        chips = ibs.depc_annot.d.get_chips_img([aid])
        chip = chips[0]
        hogimg = core_annots.make_hog_block_image(hogs[0])
        pt.clf()
        pt.imshow(hogimg, pnum=(1, 2, 1))
        pt.imshow(chip, pnum=(1, 2, 2))
        fig = pt.gcf()
        fig.show()
        fig.canvas.draw()

    # logger.info(len(groupxs))

    # if False:
    # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
    # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
    #    # FIX
    #    for fpath, fname in zip(fpath_list, fname_list):
    #        if ut.checkpath(fpath):
    #            ut.move(fpath, join(dirname(fpath), fname))
    #            logger.info('fpath = %r' % (fpath,))

    # import wbia
    # from wbia.dbio import ingest_dataset
    # dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
    # self = ingest_dataset.Ingestable2(dbdir)

    if False:
        # Show overlap matrix
        import wbia.plottool as pt
        import pandas as pd
        import numpy as np

        dict_ = overlaps
        s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
        df = s.unstack()
        lhs, rhs = df.align(df.T)
        df = lhs.add(rhs, fill_value=0).fillna(0)

        label_texts = df.columns.values

        def label_ticks(label_texts):
            import wbia.plottool as pt

            truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
            ax = pt.gca()
            ax.set_xticks(list(range(len(label_texts))))
            ax.set_xticklabels(truncated_labels)
            [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
            [
                lbl.set_horizontalalignment('left')
                for lbl in ax.get_xticklabels()
            ]

            # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
            # pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
            ax.set_yticks(list(range(len(label_texts))))
            ax.set_yticklabels(truncated_labels)
            [
                lbl.set_horizontalalignment('right')
                for lbl in ax.get_yticklabels()
            ]
            [
                lbl.set_verticalalignment('center')
                for lbl in ax.get_yticklabels()
            ]
            # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]

        # df = df.sort(axis=0)
        # df = df.sort(axis=1)

        sortx = np.argsort(df.sum(axis=1).values)[::-1]
        df = df.take(sortx, axis=0)
        df = df.take(sortx, axis=1)

        fig = pt.figure(fnum=1)
        fig.clf()
        mat = df.values.astype(np.int32)
        mat[np.diag_indices(len(mat))] = 0
        vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
        import matplotlib.colors

        norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
        pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
        pt.plt.colorbar()
        pt.plt.grid('off')
        label_ticks(label_texts)
        fig.tight_layout()

    # overlap_df = pd.DataFrame.from_dict(overlap_img_list)

    class TmpImage(ut.NiceRepr):
        pass

    from skimage.feature import hog
    from skimage import data, color, exposure
    import wbia.plottool as pt

    image2 = color.rgb2gray(data.astronaut())  # NOQA

    fpath = './GOPR1120.JPG'

    import vtool as vt

    for fpath in [fpath]:
        """
        http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
        """

        image = vt.imread(fpath, grayscale=True)
        image = pt.color_funcs.to_base01(image)

        fig = pt.figure(fnum=2)
        fd, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualise=True,
        )

        fig, (ax1, ax2) = pt.plt.subplots(1,
                                          2,
                                          figsize=(8, 4),
                                          sharex=True,
                                          sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=pt.plt.cm.gray)
        ax1.set_title('Input image')
        ax1.set_adjustable('box-forced')

        # Rescale histogram for better display
        hog_image_rescaled = exposure.rescale_intensity(hog_image,
                                                        in_range=(0, 0.02))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        ax1.set_adjustable('box-forced')
        pt.plt.show()
Example #25
0
    def make_graph(infr, show=False):
        import networkx as nx
        import itertools
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        thresh = infr.choose_thresh()

        # Simply cut any edge with a weight less than a threshold
        qaid_list = [cm.qaid for cm in cm_list]
        postcut = prob_names > thresh
        qxs, nxs = np.where(postcut)
        if False:
            kw = dict(precision=2, max_line_width=140, suppress_small=True)
            print(ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw)))
            print(ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw)))
        matching_qaids = ut.take(qaid_list, qxs)
        matched_nids = ut.take(unique_nids, nxs)

        qreq_ = infr.qreq_

        nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist())
        if not hasattr(qreq_, 'dnids'):
            qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids)
            qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids)
        dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids)
        grouped_aids = dnid2_daids.values()
        matched_daids = ut.take(dnid2_daids, matched_nids)
        name_cliques = [list(itertools.combinations(aids, 2)) for aids in grouped_aids]
        aid_matches = [list(ut.product([qaid], daids)) for qaid, daids in
                       zip(matching_qaids, matched_daids)]

        graph = nx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(ut.flatten(name_cliques))
        graph.add_edges_from(ut.flatten(aid_matches))

        #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list)))
        name_nodes = [('nid', l) for l in qreq_.dnids]
        db_aid_nid_edges = list(zip(qreq_.daids, name_nodes))
        #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids]))
        #G = nx.Graph()
        #G.add_nodes_from(matchless_quries)
        #G.add_edges_from(db_aid_nid_edges)
        #G.add_edges_from(query_aid_nid_edges)

        graph.add_edges_from(db_aid_nid_edges)

        if infr.user_feedback is not None:
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            p_bg = 0.0
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
            for aid1, aid2, p_same in zip(user_feedback['aid1'],
                                          user_feedback['aid2'], p_same_list):
                if p_same > .5:
                    if not graph.has_edge(aid1, aid2):
                        graph.add_edge(aid1, aid2)
                else:
                    if graph.has_edge(aid1, aid2):
                        graph.remove_edge(aid1, aid2)
        if show:
            import plottool as pt
            nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK
                                                    for aid in qreq_.daids})
            nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE
                                                    for aid in qreq_.qaids})
            nx.set_node_attributes(graph, 'color', {
                aid: pt.LIGHT_PURPLE
                for aid in np.intersect1d(qreq_.qaids, qreq_.daids)})
            nx.set_node_attributes(graph, 'label', {node: 'n%r' % (node[1],)
                                                    for node in name_nodes})
            nx.set_node_attributes(graph, 'color', {node: pt.LIGHT_GREEN
                                                    for node in name_nodes})
        if show:
            import plottool as pt
            pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False)
        return graph
Example #26
0
def update_bindings():
    r"""
    Returns:
        dict: matchtups

    CommandLine:
        python ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings
        utprof.py ~/local/build_scripts/flannscripts/autogen_bindings.py --exec-update_bindings

    Example:
        >>> # DISABLE_DOCTEST
        >>> from autogen_bindings import *  # NOQA
        >>> import sys
        >>> import utool as ut
        >>> sys.path.append(ut.truepath('~/local/build_scripts/flannscripts'))
        >>> matchtups = update_bindings()
        >>> result = ('matchtups = %s' % (ut.repr2(matchtups),))
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    from os.path import basename
    import difflib
    import numpy as np
    import re
    binding_names = [
        'build_index',
        'used_memory',
        'add_points',
        'remove_point',

        'compute_cluster_centers',
        'load_index',
        'save_index',
        'find_nearest_neighbors',

        'radius_search',
        'remove_points',
        'free_index',
        'find_nearest_neighbors_index',

        # 'size',
        # 'veclen',
        # 'get_point',
        # 'flann_get_distance_order',
        # 'flann_get_distance_type',
        # 'flann_log_verbosity',

        # 'clean_removed_points',
    ]

    _places = [
        '~/code/flann/src/cpp/flann/flann.cpp',
        '~/code/flann/src/cpp/flann/flann.h',
        '~/code/flann/src/python/pyflann/flann_ctypes.py',
        '~/code/flann/src/python/pyflann/index.py',
    ]

    eof_sentinals = {
        # 'flann_ctypes.py': '# END DEFINE BINDINGS',
        'flann_ctypes.py': 'def ensure_2d_array(arr',
        # 'flann.h': '// END DEFINE BINDINGS',
        'flann.h': '#ifdef __cplusplus',
        'flann.cpp': None,
        'index.py': None,
    }
    block_sentinals = {
        'flann.h': re.escape('/**'),
        'flann.cpp': 'template *<typename Distance>',
        # 'flann_ctypes.py': '\n',
        'flann_ctypes.py': 'flann\.[a-z_.]* =',
        # 'index.py': '    def .*',
        'index.py': '    [^ ].*',
    }
    places = {basename(fpath): fpath for fpath in ut.lmap(ut.truepath, _places)}
    text_dict = ut.map_dict_vals(ut.readfrom, places)
    lines_dict = {key: val.split('\n') for key, val in text_dict.items()}
    orig_texts = text_dict.copy()  # NOQA
    binding_defs = {}
    named_blocks  = {}

    print('binding_names = %r' % (binding_names,))
    for binding_name in binding_names:
        blocks, defs = autogen_parts(binding_name)
        binding_defs[binding_name] = defs
        named_blocks[binding_name] = blocks

    for binding_name in ut.ProgIter(binding_names):
        ut.colorprint('+--- GENERATE BINDING %s -----' % (binding_name,), 'yellow')
        blocks_dict = named_blocks[binding_name]
        for key in places.keys():
            ut.colorprint('---- generating %s for %s -----' % (binding_name, key,), 'yellow')
            # key = 'flann_ctypes.py'
            # print(text_dict[key])
            old_text = text_dict[key]
            line_list = lines_dict[key]
            #text = old_text
            block = blocks_dict[key]

            debug = ut.get_argflag('--debug')
            # debug = True
            # if debug:
            #     print(ut.highlight_code(block, splitext(key)[1]))

            # Find a place in the code that already exists

            searchblock = block
            if key.endswith('.cpp') or key.endswith('.h'):
                searchblock = re.sub(ut.REGEX_C_COMMENT, '', searchblock,
                                     flags=re.MULTILINE | re.DOTALL)
            searchblock = '\n'.join(searchblock.splitlines()[0:3])

            # @ut.cached_func(verbose=False)
            def cached_match(old_text, searchblock):
                def isjunk(x):
                    return False
                    return x in ' \t,*()'
                def isjunk2(x):
                    return x in ' \t,*()'
                # Not sure why the first one just doesnt find it
                # isjunk = None
                sm = difflib.SequenceMatcher(isjunk, old_text, searchblock,
                                             autojunk=False)
                sm0 = difflib.SequenceMatcher(isjunk, old_text, searchblock,
                                              autojunk=True)
                sm1 = difflib.SequenceMatcher(isjunk2, old_text, searchblock,
                                              autojunk=False)
                sm2 = difflib.SequenceMatcher(isjunk2, old_text, searchblock,
                                              autojunk=True)
                matchtups = (sm.get_matching_blocks() +
                             sm0.get_matching_blocks() +
                             sm1.get_matching_blocks() +
                             sm2.get_matching_blocks())
                return matchtups
            matchtups = cached_match(old_text, searchblock)
            # Find a reasonable match in matchtups

            found = False
            if debug:
                # print('searchblock =\n%s' % (searchblock,))
                print('searchblock = %r' % (searchblock,))
            for (a, b, size) in matchtups:
                matchtext = old_text[a: a + size]
                pybind = binding_defs[binding_name]['py_binding_name']
                if re.search(binding_name + '\\b', matchtext) or re.search(pybind + '\\b', matchtext):
                    found = True
                    pos = a + size
                    if debug:
                        print('MATCHING TEXT')
                        print(matchtext)
                    break
                else:
                    if debug and 0:
                        print('Not matching')
                        print('matchtext = %r' % (matchtext,))
                        matchtext2 = old_text[a - 10: a + size + 20]
                        print('matchtext2 = %r' % (matchtext2,))

            if found:
                linelens = np.array(ut.lmap(len, line_list)) + 1
                sumlen = np.cumsum(linelens)
                row = np.where(sumlen < pos)[0][-1] + 1
                #print(line_list[row])
                # Search for extents of the block to overwrite
                block_sentinal = block_sentinals[key]
                row1 = ut.find_block_end(row, line_list, block_sentinal, -1) - 1
                row2 = ut.find_block_end(row + 1, line_list, block_sentinal, +1)
                eof_sentinal = eof_sentinals[key]
                if eof_sentinal is not None:
                    print('eof_sentinal = %r' % (eof_sentinal,))
                    row2 = min([count for count, line in enumerate(line_list) if line.startswith(eof_sentinal)][-1], row2)
                nr = len((block + '\n\n').splitlines())
                new_line_list = ut.insert_block_between_lines(
                    block + '\n', row1, row2, line_list)
                rtext1 = '\n'.join(line_list[row1:row2])
                rtext2 = '\n'.join(new_line_list[row1:row1 + nr])
                if debug:
                    print('-----')
                    ut.colorprint('FOUND AND REPLACING %s' % (binding_name,), 'yellow')
                    print(ut.highlight_code(rtext1))
                if debug:
                    print('-----')
                    ut.colorprint('FOUND AND REPLACED WITH %s' % (binding_name,), 'yellow')
                    print(ut.highlight_code(rtext2))
                if not ut.get_argflag('--diff') and not debug:
                    print(ut.color_diff_text(ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True)))
            else:
                # Append to end of the file
                eof_sentinal = eof_sentinals[key]
                if eof_sentinal is None:
                    row2 = len(line_list) - 1
                else:
                    row2_choice = [count for count, line in enumerate(line_list)
                                   if line.startswith(eof_sentinal)]
                    if len(row2_choice) == 0:
                        row2 = len(line_list) - 1
                        assert False
                    else:
                        row2 = row2_choice[-1] - 1

                # row1 = row2 - 1
                # row2 = row2 - 1
                row1 = row2

                new_line_list = ut.insert_block_between_lines(
                    block + '\n', row1, row2, line_list)
                # block + '\n\n\n', row1, row2, line_list)

                rtext1 = '\n'.join(line_list[row1:row2])
                nr = len((block + '\n\n').splitlines())
                rtext2 = '\n'.join(new_line_list[row1:row1 + nr])

                if debug:
                    print('-----')
                    ut.colorprint('NOT FOUND AND REPLACING %s' % (binding_name,), 'yellow')
                    print(ut.highlight_code(rtext1))
                if debug:
                    print('-----')
                    ut.colorprint('NOT FOUND AND REPLACED WITH %s' % (binding_name,), 'yellow')
                    print(ut.highlight_code(rtext2))

                if not ut.get_argflag('--diff') and not debug:
                    print(ut.color_diff_text(ut.difftext(rtext1, rtext2, num_context_lines=7, ignore_whitespace=True)))
            text_dict[key] = '\n'.join(new_line_list)
            lines_dict[key] = new_line_list
        ut.colorprint('L___  GENERATED BINDING %s ___' % (binding_name,), 'yellow')

    for key in places:
        new_text = '\n'.join(lines_dict[key])
        #ut.writeto(ut.augpath(places[key], '.new'), new_text)
        ut.writeto(ut.augpath(places[key]), new_text)

    for key in places:
        if ut.get_argflag('--diff'):
            difftext = ut.get_textdiff(orig_texts[key], new_text,
                                       num_context_lines=7, ignore_whitespace=True)
            difftext = ut.color_diff_text(difftext)
            print(difftext)
Example #27
0
def get_dbinfo(ibs, verbose=True,
               with_imgsize=False,
               with_bytes=False,
               with_contrib=False,
               with_agesex=False,
               with_header=True,
               short=False,
               tag='dbinfo',
               aid_list=None):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    CommandLine:
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0
        python -m ibeis.other.dbinfo --test-get_dbinfo:1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from ibeis.expt import cfghelpers
        >>> #from ibeis.expt import annotation_configs
        >>> #from ibeis.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> verbose = True
        >>> short = True
        >>> #ibs = ibeis.opendb(db='GZ_ALL')
        >>> #ibs = ibeis.opendb(db='PZ_Master0')
        >>> ibs = ibeis.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = ibeis.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            print('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from ibeis.expt import experiment_helpers
            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list)
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            #aid_list =
        if verbose:
            print('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) -
            {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    #associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    FILTER_HACK = True
    if FILTER_HACK:
        # HUGE HACK - get only images and names with filtered aids
        valid_aids_ = ibs.filter_aids_custom(valid_aids)
        valid_nids_ = ibs.filter_nids_custom(valid_nids)
        valid_gids_ = ibs.filter_gids_custom(valid_gids)
        if verbose:
            print('Filtered %d names' % (len(valid_nids) - len(valid_nids_)))
            print('Filtered %d images' % (len(valid_gids) - len(valid_gids_)))
            print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_)))
        valid_gids = valid_gids_
        valid_nids = valid_nids_
        valid_aids = valid_aids_
        #associated_nids = ut.compress(associated_nids, map(any,
        #ibs.unflat_map(ibs.get_annot_custom_filterflags,
        #               ibs.get_name_aids(associated_nids))))

    # Image info
    if verbose:
        print('Checking Image Info')
    gx2_aids = ibs.get_image_aids(valid_gids)
    if FILTER_HACK:
        gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids]  # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats  = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True)
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        print('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if FILTER_HACK:
        nx2_aids =  [ibs.filter_aids_custom(aids) for aids in nx2_aids]    # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    # Occurrence Info
    def compute_annot_occurrence_ids(ibs, aid_list):
        from ibeis.algo.preproc import preproc_occurrence
        gid_list = ibs.get_annot_gids(aid_list)
        gid2_aids = ut.group_items(aid_list, gid_list)
        flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
        occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
        occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
        return occurid2_aids

    import utool
    with utool.embed_on_exception_context:
        occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
        occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
        occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
        nid2_occurxs = ut.ddict(list)
        for occurx, nids in enumerate(occur_unique_nids):
            for nid in nids:
                nid2_occurxs[nid].append(occurx)

    nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1}
    nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1}
    singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

    singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True)
    resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True)

    try:
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0)
        undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False)
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)

        num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1]))
        pair_tag_info['num_reviewed'] = num_reviewed_pairs
    except Exception:
        pair_tag_info = {}

    #print(ut.dict_str(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        print('Checking Annot Species')
    unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids))
    species_list = ibs.get_annot_species_texts(valid_aids)
    species2_aids = ut.group_items(valid_aids, species_list)
    species2_nAids = {key: len(val) for key, val in species2_aids.items()}

    if verbose:
        print('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs  = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs      = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        print('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            print('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)
        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = OrderedDict(
                [( 'max', wh_list.max(0)),
                 ( 'min', wh_list.min(0)),
                 ('mean', wh_list.mean(0)),
                 ( 'std', wh_list.std(0))])
            def arr2str(var):
                return ('[' + (
                    ', '.join(list(map(lambda x: '%.1f' % x, var)))
                ) + ']')
            ret = (',\n    '.join([
                '%s:%s' % (key, arr2str(val))
                for key, val in stat_dict.items()
            ]))
            return '{\n    ' + ret + '\n}'

        print('reading image sizes')
        # Image size stats
        img_size_list  = ibs.get_image_sizes(valid_gids)
        img_size_stats  = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        print('Building Stats String')

    multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True)

    # Time stats
    unixtime_list = ibs.get_image_unixtime(valid_gids)
    unixtime_list = ut.list_replace(unixtime_list, -1, float('nan'))
    #valid_unixtime_list = [time for time in unixtime_list if time != -1]
    #unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list  = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda : 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (36 <= max_age or max_age is None):
                age_dict['Adult'] += 1
            else:
                print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
                age_dict['UNKNOWN'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
        sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
        # Filter 0's
        sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
        return sextext2_nAnnots

    if verbose:
        print('Checking Other Annot Stats')

    qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids)
    yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        print('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]
    image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags)
    contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags)

    contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}
    contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}

    contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)}
    contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)}

    if verbose:
        print('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton =  len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_aids)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            print('Checking Disk Space')
        ibsdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space    = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            print('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_aids)
            _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton
            _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            #if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(ex, keys=[
                '_num_names_total_check',
                'num_names',
                '_num_annots_total_check',
                'num_annots',
                'num_names_singleton',
                'num_names_multiton',
                'num_unknown_annots',
                'num_multiton_annots',
                'num_singleton_annots',
            ])
            raise

    # Get contributor statistics
    contrib_rowids = ibs.get_valid_contrib_rowids()
    num_contributors = len(contrib_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.dict_str(dict_)
        return align2(str_)

    header_block_lines = (
        [('+============================'), ] + (
            [
                ('+ singleton := single sighting'),
                ('+ multiton  := multiple sightings'),
                ('--' * num_tabs),
            ] if not short and with_header else []
        )
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = [
        ('--' * num_tabs),
        ('DB Bytes: '),
        ('     +- dbdir nBytes:         ' + dbdir_space),
        ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
        ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
        ('     |  |  +-cachedir nBytes: ' + cachedir_space),
    ] if with_bytes else []

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = [
        ('--' * num_tabs),
        ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
        ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
        ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
    ] if not short else []

    occurrence_block_lines = [
        ('--' * num_tabs),
        ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
        ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
        ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
    ] if not short else []

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = [
        '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
        '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
    ] if not short  and with_agesex else []

    contrib_block_lines = [
        '# Images per contributor       = ' + align_dict2(contrib_tag_to_nImages),
        '# Annots per contributor       = ' + align_dict2(contrib_tag_to_nAnnots),
        '# Quality per contributor      = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True),
        '# Viewpoint per contributor    = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True),
    ] if with_contrib else []

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None if short else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        #('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None if short else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines +
        bytes_block_lines +
        source_block_lines +
        name_block_lines +
        annot_block_lines +
        annot_per_basic_block_lines +
        occurrence_block_lines +
        annot_per_qualview_block_lines +
        annot_per_agesex_block_lines +
        img_block_lines +
        contrib_block_lines +
        imgsize_stat_lines +
        [('L============================'), ]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        print(info_str2)
    locals_ = locals()
    return locals_
Example #28
0
def merge_level_order(level_orders, topsort):
    """
    Merge orders of individual subtrees into a total ordering for
    computation.

    >>> level_orders = {
    >>>     'multi_chip_multitest': [['dummy_annot'], ['chip'], ['multitest'],
    >>>         ['multitest_score'], ],
    >>>     'multi_fgweight_multitest': [ ['dummy_annot'], ['chip', 'probchip'],
    >>>         ['keypoint'], ['fgweight'], ['multitest'], ['multitest_score'], ],
    >>>     'multi_keypoint_nnindexer': [ ['dummy_annot'], ['chip'], ['keypoint'],
    >>>         ['nnindexer'], ['multitest'], ['multitest_score'], ],
    >>>     'normal': [ ['dummy_annot'], ['chip', 'probchip'], ['keypoint'],
    >>>         ['fgweight'], ['spam'], ['multitest'], ['multitest_score'], ],
    >>>     'nwise_notch_multitest_1': [ ['dummy_annot'], ['notch'], ['multitest'],
    >>>         ['multitest_score'], ],
    >>>     'nwise_notch_multitest_2': [ ['dummy_annot'], ['notch'], ['multitest'],
    >>>         ['multitest_score'], ],
    >>>     'nwise_notch_notchpair_1': [ ['dummy_annot'], ['notch'], ['notchpair'],
    >>>         ['multitest'], ['multitest_score'], ],
    >>>     'nwise_notch_notchpair_2': [ ['dummy_annot'], ['notch'], ['notchpair'],
    >>>         ['multitest'], ['multitest_score'], ],
    >>> }
    >>> topsort = [u'dummy_annot', u'notch', u'probchip', u'chip', u'keypoint',
    >>>            u'fgweight', u'nnindexer', u'spam', u'notchpair', u'multitest',
    >>>            u'multitest_score']
    >>> print(ut.repr3(ut.merge_level_order(level_orders, topsort)))

    EG2:
        level_orders = {u'normal': [[u'dummy_annot'], [u'chip', u'probchip'], [u'keypoint'], [u'fgweight'], [u'spam']]}
        topsort = [u'dummy_annot', u'probchip', u'chip', u'keypoint', u'fgweight', u'spam']
    """

    import utool as ut
    if False:
        compute_order = []
        level_orders = ut.map_dict_vals(ut.total_flatten, level_orders)
        level_sets = ut.map_dict_vals(set, level_orders)
        for tablekey in topsort:
            compute_order.append((tablekey, [groupkey for groupkey, set_ in level_sets.items() if tablekey in set_]))
        return compute_order
    else:
        # Do on common subgraph
        import itertools
        # Pointer to current level.: Start at the end and
        # then work your way up.
        main_ptr = len(topsort) - 1
        stack = []
        #from six.moves import zip_longest
        keys = list(level_orders.keys())
        type_to_ptr = {key: -1 for key in keys}
        print('level_orders = %s' % (ut.repr3(level_orders),))
        for count in itertools.count(0):
            print('----')
            print('count = %r' % (count,))
            ptred_levels = []
            for key in keys:
                levels = level_orders[key]
                ptr = type_to_ptr[key]
                try:
                    level = tuple(levels[ptr])
                except IndexError:
                    level = None
                ptred_levels.append(level)
            print('ptred_levels = %r' % (ptred_levels,))
            print('main_ptr = %r' % (main_ptr,))
            # groupkeys, groupxs = ut.group_indices(ptred_levels)
            # Group keys are tablenames
            # They point to the (type) of the input
            # num_levelkeys = len(ut.total_flatten(ptred_levels))
            groupkeys, groupxs = ut.group_indices(ptred_levels)
            main_idx = None
            while main_idx is None and main_ptr >= 0:
                target = topsort[main_ptr]
                print('main_ptr = %r' % (main_ptr,))
                print('target = %r' % (target,))
                # main_idx = ut.listfind(groupkeys, (target,))
                # if main_idx is None:
                possible_idxs = [idx for idx, keytup in enumerate(groupkeys) if keytup is not None and target in keytup]
                if len(possible_idxs) == 1:
                    main_idx = possible_idxs[0]
                else:
                    main_idx = None
                if main_idx is None:
                    main_ptr -= 1
            if main_idx is None:
                print('break I')
                break
            found_groups = ut.apply_grouping(keys, groupxs)[main_idx]
            print('found_groups = %r' % (found_groups,))
            stack.append((target, found_groups))
            for k in found_groups:
                type_to_ptr[k] -= 1

            if len(found_groups) == len(keys):
                main_ptr -= 1
                if main_ptr < 0:
                    print('break E')
                    break
        print('stack = %s' % (ut.repr3(stack),))
        print('have = %r' % (sorted(ut.take_column(stack, 0)),))
        print('need = %s' % (sorted(ut.total_flatten(level_orders.values())),))
        compute_order = stack[::-1]

    return compute_order
Example #29
0
    def make_inference(infr):
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        cluster_tuples = infr.make_clusters()

        # Make pair list for output
        if infr.user_feedback is not None:
            keys = list(
                zip(infr.user_feedback['aid1'], infr.user_feedback['aid2']))
            feedback_lookup = ut.make_index_lookup(keys)
            user_feedback = infr.user_feedback
            p_bg = 0
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
        else:
            feedback_lookup = {}
        infr.user_feedback
        needs_review_list = []
        num_top = 4
        for cm, row in zip(cm_list, prob_names):
            # Find top scoring names for this chip match in the posterior distribution
            idxs = row.argsort()[::-1]
            top_idxs = idxs[:num_top]
            nids = ut.take(unique_nids, top_idxs)
            # Find the matched annotations in the pairwise prior distributions
            nidxs = ut.dict_take(cm.nid2_nidx, nids, None)
            name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs))
            daids_list = ut.take(cm.daid_list, name_groupxs)
            for daids in daids_list:
                ut.take(cm.score_list, ut.take(cm.daid2_idx, daids))
                scores_all = cm.annot_score_list / cm.annot_score_list.sum()
                idxs = ut.take(cm.daid2_idx, daids)
                scores = scores_all.take(idxs)
                raw_scores = cm.score_list.take(idxs)
                scorex = scores.argmax()
                raw_score = raw_scores[scorex]
                daid = daids[scorex]
                import scipy.special
                # SUPER HACK: these are not probabilities
                # TODO: set a and b based on dbsize and param configuration
                # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)"
                #a = 2.0
                a = 1.5
                b = 2
                p_same = scipy.special.expit(b * raw_score - a)
                #confidence = scores[scorex]
                #p_diff = 1 - p_same
                #decision = 'same' if confidence > thresh else 'diff'
                #confidence = p_same if confidence > thresh else p_diff
                #tup = (cm.qaid, daid, decision, confidence, raw_score)
                confidence = (2 * np.abs(0.5 - p_same))**2
                #if infr.user_feedback is not None:
                #    import utool
                #    utool.embed(
                key = (cm.qaid, daid)
                fb_idx = feedback_lookup.get(key)
                if fb_idx is not None:
                    confidence = p_same_list[fb_idx]
                tup = (cm.qaid, daid, p_same, confidence, raw_score)
                needs_review_list.append(tup)

        # Sort resulting list by confidence
        sortx = ut.argsort(ut.take_column(needs_review_list, 3))
        needs_review_list = ut.take(needs_review_list, sortx)

        infr.needs_review_list = needs_review_list
        infr.cluster_tuples = cluster_tuples
Example #30
0
def intraoccurrence_connected():
    r"""
    CommandLine:
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.specialdraw import *  # NOQA
        >>> result = intraoccurrence_connected()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import ibeis
    import plottool as pt
    from ibeis.viz import viz_graph
    import networkx as nx
    pt.ensure_pylab_qt4()
    ibs = ibeis.opendb(defaultdb='PZ_Master1')
    nid2_aid = {
        #4880: [3690, 3696, 3703, 3706, 3712, 3721],
        4880: [3690, 3696, 3703],
        6537: [3739],
        6653: [7671],
        6610: [7566, 7408],
        #6612: [7664, 7462, 7522],
        #6624: [7465, 7360],
        #6625: [7746, 7383, 7390, 7477, 7376, 7579],
        6630: [7586, 7377, 7464, 7478],
        #6677: [7500]
    }
    nid2_dbaids = {4880: [33, 6120, 7164], 6537: [7017, 7206], 6653: [7660]}
    if ut.get_argflag('--small') or ut.get_argflag('--smaller'):
        del nid2_aid[6630]
        del nid2_aid[6537]
        del nid2_dbaids[6537]
        if ut.get_argflag('--smaller'):
            nid2_dbaids[4880].remove(33)
            nid2_aid[4880].remove(3690)
            nid2_aid[6610].remove(7408)
        #del nid2_aid[4880]
        #del nid2_dbaids[4880]

    aids = ut.flatten(nid2_aid.values())

    temp_nids = [1] * len(aids)
    postcut = ut.get_argflag('--postcut')
    aids_list = ibs.group_annots_by_name(aids)[0]
    ensure_edges = 'all' if True or not postcut else None
    unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups(
        ibs,
        aids_list,
        #invis_edges=invis_edges,
        ensure_edges=ensure_edges,
        temp_nids=temp_nids)
    viz_graph.color_by_nids(unlabeled_graph,
                            unique_nids=[1] *
                            len(list(unlabeled_graph.nodes())))
    viz_graph.ensure_node_images(ibs, unlabeled_graph)
    nx.set_node_attributes(unlabeled_graph, 'shape', 'rect')
    #unlabeled_graph = unlabeled_graph.to_undirected()

    # Find the "database exemplars for these annots"
    if False:
        gt_aids = ibs.get_annot_groundtruth(aids)
        gt_aids = [ut.setdiff(s, aids) for s in gt_aids]
        dbaids = ut.unique(ut.flatten(gt_aids))
        dbaids = ibs.filter_annots_general(dbaids, minqual='good')
        ibs.get_annot_quality_texts(dbaids)
    else:
        dbaids = ut.flatten(nid2_dbaids.values())
    exemplars = nx.DiGraph()
    #graph = exemplars  # NOQA
    exemplars.add_nodes_from(dbaids)

    def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}):
        edge_list = ut.upper_diag_self_prodx(nodes)
        graph.add_edges_from(edge_list, **edgeattrs)
        return edge_list

    for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)):
        add_clique(exemplars, aids_)
    viz_graph.ensure_node_images(ibs, exemplars)
    viz_graph.color_by_nids(exemplars, ibs=ibs)

    nx.set_node_attributes(unlabeled_graph, 'framewidth', False)
    nx.set_node_attributes(exemplars, 'framewidth', 4.0)

    nx.set_node_attributes(unlabeled_graph, 'group', 'unlab')
    nx.set_node_attributes(exemplars, 'group', 'exemp')

    #big_graph = nx.compose_all([unlabeled_graph])
    big_graph = nx.compose_all([exemplars, unlabeled_graph])

    # add sparse connections from unlabeled to exemplars
    import numpy as np
    rng = np.random.RandomState(0)
    if True or not postcut:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes())))
            flags = np.logical_or(exnids == nid_, flags)
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)),
                                     color=pt.ORANGE,
                                     implicit=True)
    else:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(exmatches))
            exmatches = ut.compress(exmatches, exnids == nid_)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)))
        pass

    nx.set_node_attributes(big_graph, 'shape', 'rect')
    #if False and postcut:
    #    ut.nx_delete_node_attr(big_graph, 'nid')
    #    ut.nx_delete_edge_attr(big_graph, 'color')
    #    viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs)
    #    viz_graph.color_by_nids(big_graph, ibs=ibs)
    #    big_graph = big_graph.to_undirected()

    layoutkw = {
        'sep': 1 / 5,
        'prog': 'neato',
        'overlap': 'false',
        #'splines': 'ortho',
        'splines': 'spline',
    }

    as_directed = False
    #as_directed = True
    #hacknode = True
    hacknode = 0

    graph = big_graph
    ut.nx_ensure_agraph_color(graph)
    if hacknode:
        nx.set_edge_attributes(graph, 'taillabel',
                               {e: str(e[0])
                                for e in graph.edges()})
        nx.set_edge_attributes(graph, 'headlabel',
                               {e: str(e[1])
                                for e in graph.edges()})

    explicit_graph = pt.get_explicit_graph(graph)
    _, layout_info = pt.nx_agraph_layout(explicit_graph,
                                         orig_graph=graph,
                                         inplace=True,
                                         **layoutkw)

    if ut.get_argflag('--smaller'):
        graph.node[7660]['pos'] = np.array([550, 350])
        graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw)
    elif ut.get_argflag('--small'):
        graph.node[7660]['pos'] = np.array([750, 350])
        graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400])
        graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw)

    if not postcut:
        #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw,
        #           as_directed=False)
        #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw,
        #           as_directed=as_directed, hacknode=hacknode)

        pt.show_nx(graph,
                   layout='custom',
                   layoutkw=layoutkw,
                   as_directed=as_directed,
                   hacknode=hacknode)
    else:
        #explicit_graph = pt.get_explicit_graph(graph)
        #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph,
        #                                     **layoutkw)

        #layout_info['edge']['alpha'] = .8
        #pt.apply_graph_layout_attrs(graph, layout_info)

        #graph_layout_attrs = layout_info['graph']
        ##edge_layout_attrs  = layout_info['edge']
        ##node_layout_attrs  = layout_info['node']

        #for key, vals in layout_info['node'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_node_attributes(graph, key, vals)

        #for key, vals in layout_info['edge'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_edge_attributes(graph, key, vals)

        #nx.set_edge_attributes(graph, 'alpha', .8)
        #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'polyline'   # graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'line'

        cut_graph = graph.copy()
        edge_list = list(cut_graph.edges())
        edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list))
        cut_flags = edge_nids.T[0] != edge_nids.T[1]
        cut_edges = ut.compress(edge_list, cut_flags)
        cut_graph.remove_edges_from(cut_edges)
        ut.nx_delete_node_attr(cut_graph, 'nid')
        viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs)

        #ut.nx_get_default_node_attributes(exemplars, 'color', None)
        ut.nx_delete_node_attr(cut_graph,
                               'color',
                               nodes=unlabeled_graph.nodes())
        aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color',
                                                       None)
        nid2_colors = ut.group_items(aid2_color.values(),
                                     ibs.get_annot_nids(aid2_color.keys()))
        nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors)
        nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors)
        #for val in nid2_colors.values():
        #    assert len(val) <= 1
        # Get initial colors
        nid2_color_ = {
            nid: colors_[0]
            for nid, colors_ in nid2_colors.items() if len(colors_) == 1
        }

        graph = cut_graph
        viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_)
        nx.set_node_attributes(cut_graph, 'framewidth', 4)

        pt.show_nx(cut_graph,
                   layout='custom',
                   layoutkw=layoutkw,
                   as_directed=as_directed,
                   hacknode=hacknode)

    pt.zoom_factory()
Example #31
0
def check_database_overlap(ibs1, ibs2):
    """
    CommandLine:
        python -m wbia.other.dbinfo --test-get_dbinfo:1 --db PZ_MTEST
        dev.py -t listdbs
        python -m wbia.dbio.export_subset check_database_overlap
        --db PZ_MTEST --db2 PZ_MOTHERS

    CommandLine:
        python -m wbia.dbio.export_subset check_database_overlap

        python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_MTEST --db2=PZ_Master0  # NOQA
        python -m wbia.dbio.export_subset check_database_overlap --db1=NNP_Master3 --db2=PZ_Master0  # NOQA

        python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_Master0 --db2=GZ_ALL
        python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_ALL --db2=lewa_grevys

        python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_FlankHack --db2=PZ_Master1
        python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_PB_RF_TRAIN --db2=PZ_Master1


    Example:
        >>> # SCRIPT
        >>> from wbia.dbio.export_subset import *  # NOQA
        >>> import wbia
        >>> import utool as ut
        >>> #ibs1 = wbia.opendb(db='PZ_Master0')
        >>> #ibs2 = wbia.opendb(dbdir='/raid/work2/Turk/PZ_Master')
        >>> db1 = ut.get_argval('--db1', str, default='PZ_MTEST')
        >>> db2 = ut.get_argval('--db2', str, default='testdb1')
        >>> dbdir1 = ut.get_argval('--dbdir1', str, default=None)
        >>> dbdir2 = ut.get_argval('--dbdir2', str, default=None)
        >>> ibs1 = wbia.opendb(db=db1, dbdir=dbdir1)
        >>> ibs2 = wbia.opendb(db=db2, dbdir=dbdir2)
        >>> check_database_overlap(ibs1, ibs2)
    """
    import numpy as np

    def print_isect(items1, items2, lbl=''):
        set1_ = set(items1)
        set2_ = set(items2)
        items_isect = set1_.intersection(set2_)
        fmtkw1 = dict(
            part=1,
            lbl=lbl,
            num=len(set1_),
            num_isect=len(items_isect),
            percent=100 * len(items_isect) / len(set1_),
        )
        fmtkw2 = dict(
            part=2,
            lbl=lbl,
            num=len(set2_),
            num_isect=len(items_isect),
            percent=100 * len(items_isect) / len(set2_),
        )
        fmt_a = '  * Num {lbl} {part}: {num_isect} / {num} = {percent:.2f}%'
        # fmt_b = '  * Num {lbl} isect: {num}'
        logger.info('Checking {lbl} intersection'.format(lbl=lbl))
        logger.info(fmt_a.format(**fmtkw1))
        logger.info(fmt_a.format(**fmtkw2))
        # logger.info(fmt_b.format(lbl=lbl, num=len(items_isect)))
        # items = items_isect
        # list_ = items1
        x_list1 = ut.find_list_indexes(items1, items_isect)
        x_list2 = ut.find_list_indexes(items2, items_isect)
        return x_list1, x_list2

    gids1 = ibs1.images()
    gids2 = ibs2.images()

    # Find common images
    # items1, items2, lbl, = gids1.uuids, gids2.uuids, 'images'
    gx_list1, gx_list2 = print_isect(gids1.uuids, gids2.uuids, 'images')
    gids_isect1 = gids1.take(gx_list1)
    gids_isect2 = gids2.take(gx_list2)
    assert gids_isect2.uuids == gids_isect1.uuids, 'sequence must be aligned'

    SHOW_ISECT_GIDS = False
    if SHOW_ISECT_GIDS:
        if len(gx_list1) > 0:
            logger.info('gids_isect1 = %r' % (gids_isect1, ))
            logger.info('gids_isect2 = %r' % (gids_isect2, ))
            if False:
                # Debug code
                import wbia.viz
                import wbia.plottool as pt

                gid_pairs = list(zip(gids_isect1, gids_isect2))
                pairs_iter = ut.ichunks(gid_pairs, chunksize=8)
                for fnum, pairs in enumerate(pairs_iter, start=1):
                    pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2)
                    for gid1, gid2 in pairs:
                        wbia.viz.show_image(ibs1,
                                            gid1,
                                            pnum=pnum_(),
                                            fnum=fnum)
                        wbia.viz.show_image(ibs2,
                                            gid2,
                                            pnum=pnum_(),
                                            fnum=fnum)

    # if False:
    #     aids1 = ibs1.get_valid_aids()
    #     aids2 = ibs2.get_valid_aids()
    #     ibs1.update_annot_visual_uuids(aids1)
    #     ibs2.update_annot_visual_uuids(aids2)
    #     ibs1.update_annot_semantic_uuids(aids1)
    #     ibs2.update_annot_semantic_uuids(aids2)

    # Check to see which intersecting images have different annotations
    image_aids_isect1 = gids_isect1.aids
    image_aids_isect2 = gids_isect2.aids
    image_avuuids_isect1 = np.array(
        ibs1.unflat_map(ibs1.get_annot_visual_uuids, image_aids_isect1))
    image_avuuids_isect2 = np.array(
        ibs2.unflat_map(ibs2.get_annot_visual_uuids, image_aids_isect2))
    changed_image_xs = np.nonzero(
        image_avuuids_isect1 != image_avuuids_isect2)[0]
    if len(changed_image_xs) > 0:
        logger.info(
            'There are %d images with changes in annotation visual information'
            % (len(changed_image_xs), ))
        changed_gids1 = ut.take(gids_isect1, changed_image_xs)
        changed_gids2 = ut.take(gids_isect2, changed_image_xs)

        SHOW_CHANGED_GIDS = False
        if SHOW_CHANGED_GIDS:
            logger.info('gids_isect1 = %r' % (changed_gids2, ))
            logger.info('gids_isect2 = %r' % (changed_gids1, ))
            # if False:
            #     # Debug code
            #     import wbia.viz
            #     import wbia.plottool as pt
            #     gid_pairs = list(zip(changed_gids1, changed_gids2))
            #     pairs_iter = ut.ichunks(gid_pairs, chunksize=8)
            #     for fnum, pairs in enumerate(pairs_iter, start=1):
            #         pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2)
            #         for gid1, gid2 in pairs:
            #             wbia.viz.show_image(
            #                 ibs1, gid1, pnum=pnum_(), fnum=fnum)
            #             wbia.viz.show_image(
            #                 ibs2, gid2, pnum=pnum_(), fnum=fnum)

    # Check for overlapping annotations (visual info only) in general
    aids1 = ibs1.annots()
    aids2 = ibs2.annots()

    # Check for overlapping annotations (visual + semantic info) in general
    aux_list1, aux_list2 = print_isect(aids1.uuids, aids2.uuids, 'uuids')
    avx_list1, avx_list2 = print_isect(aids1.visual_uuids, aids2.visual_uuids,
                                       'vuuids')
    asx_list1, asx_list2 = print_isect(aids1.semantic_uuids,
                                       aids2.semantic_uuids, 'suuids')

    # Check which images with the same visual uuids have different semantic
    # uuids
    changed_ax_list1 = ut.setdiff_ordered(avx_list1, asx_list1)
    changed_ax_list2 = ut.setdiff_ordered(avx_list2, asx_list2)
    assert len(changed_ax_list1) == len(changed_ax_list2)
    assert ut.take(aids1.visual_uuids,
                   changed_ax_list1) == ut.take(aids2.visual_uuids,
                                                changed_ax_list2)

    changed_aids1 = np.array(ut.take(aids1, changed_ax_list1))
    changed_aids2 = np.array(ut.take(aids2, changed_ax_list2))

    changed_sinfo1 = ibs1.get_annot_semantic_uuid_info(changed_aids1)
    changed_sinfo2 = ibs2.get_annot_semantic_uuid_info(changed_aids2)
    sinfo1_arr = np.array(changed_sinfo1)
    sinfo2_arr = np.array(changed_sinfo2)
    is_semantic_diff = sinfo2_arr != sinfo1_arr
    # Inspect semantic differences
    if np.any(is_semantic_diff):
        colxs, rowxs = np.nonzero(is_semantic_diff)
        colx2_rowids = ut.group_items(rowxs, colxs)
        prop2_rowids = ut.map_dict_keys(changed_sinfo1._fields.__getitem__,
                                        colx2_rowids)
        logger.info('changed_value_counts = ' +
                    ut.repr2(ut.map_dict_vals(len, prop2_rowids)))
        yawx = changed_sinfo1._fields.index('yaw')

        # Show change in viewpoints
        if len(colx2_rowids[yawx]) > 0:
            vp_category_diff = ibsfuncs.viewpoint_diff(
                sinfo1_arr[yawx], sinfo2_arr[yawx]).astype(np.float)
            # Look for category changes
            # any_diff = np.floor(vp_category_diff) > 0
            # _xs    = np.nonzero(any_diff)[0]
            # _aids1 = changed_aids1.take(_xs)
            # _aids2 = changed_aids2.take(_xs)
            # Look for significant changes
            is_significant_diff = np.floor(vp_category_diff) > 1
            significant_xs = np.nonzero(is_significant_diff)[0]
            significant_aids1 = changed_aids1.take(significant_xs)
            significant_aids2 = changed_aids2.take(significant_xs)
            logger.info('There are %d significant viewpoint changes' %
                        (len(significant_aids2), ))
            # vt.ori_distance(sinfo1_arr[yawx], sinfo2_arr[yawx])
            # zip(ibs1.get_annot_viewpoint_code(significant_aids1),
            # ibs2.get_annot_viewpoint_code(significant_aids2))
            # logger.info('yawdiff = %r' % )
            # if False:
            # Hack: Apply fixes
            # good_yaws = ibs2.get_annot_yaws(significant_aids2)
            # ibs1.set_annot_yaws(significant_aids1, good_yaws)
            #    pass
            if False:
                # Debug code
                import wbia.viz
                import wbia.plottool as pt

                # aid_pairs = list(zip(_aids1, _aids2))
                aid_pairs = list(zip(significant_aids1, significant_aids2))
                pairs_iter = ut.ichunks(aid_pairs, chunksize=8)
                for fnum, pairs in enumerate(pairs_iter, start=1):
                    pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2)
                    for aid1, aid2 in pairs:
                        wbia.viz.show_chip(
                            ibs1,
                            aid1,
                            pnum=pnum_(),
                            fnum=fnum,
                            show_viewcode=True,
                            nokpts=True,
                        )
                        wbia.viz.show_chip(
                            ibs2,
                            aid2,
                            pnum=pnum_(),
                            fnum=fnum,
                            show_viewcode=True,
                            nokpts=True,
                        )

    #
    nAnnots_per_image1 = np.array(ibs1.get_image_num_annotations(gids1))
    nAnnots_per_image2 = np.array(ibs2.get_image_num_annotations(gids2))
    #
    images_without_annots1 = sum(nAnnots_per_image1 == 0)
    images_without_annots2 = sum(nAnnots_per_image2 == 0)
    logger.info('images_without_annots1 = %r' % (images_without_annots1, ))
    logger.info('images_without_annots2 = %r' % (images_without_annots2, ))

    nAnnots_per_image1
Example #32
0
def get_annotmatch_subgraph(ibs):
    r"""
    http://bokeh.pydata.org/en/latest/
    https://github.com/jsexauer/networkx_viewer

    TODO: Need a special visualization
        In the web I need:
            * graph of annotations matches.
            * can move them around.
            * edit lines between them.
            * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool

            This should  share functionality with a name view.

    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show

        # Networkx example
        python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show

    Ignore:

        from ibeis import viz

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> result = get_annotmatch_subgraph(ibs)
        >>> ut.show_if_requested()
    """
    #import ibeis
    #ibs = ibeis.opendb(db='PZ_MTEST')
    #rowids = ibs._get_all_annotmatch_rowids()
    #aids1 = ibs.get_annotmatch_aid1(rowids)
    #aids2 = ibs.get_annotmatch_aid2(rowids)
    #
    #
    nids = ibs.get_valid_nids()
    nids = nids[0:5]
    aids_list = ibs.get_name_aids(nids)
    import itertools
    unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list)
    aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]
    aids1 = ut.get_list_column(aid_pairs, 0)
    aids2 = ut.get_list_column(aid_pairs, 1)

    # Enumerate annotmatch properties
    rng = np.random.RandomState(0)
    edge_props = {
        'weight': rng.rand(len(aids1)),
        'reviewer_confidence': rng.rand(len(aids1)),
        'algo_confidence': rng.rand(len(aids1)),
    }

    # Remove data that does not need to be visualized
    # (dont show all the aids if you dont have to)
    thresh = .5
    flags = edge_props['weight'] > thresh
    aids1_ = ut.compress(aids1, flags)
    aids2_ = ut.compress(aids2, flags)
    chosen_props = ut.dict_subset(edge_props, ['weight'])
    edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags), chosen_props)

    edge_keys = list(edge_props.keys())
    edge_vals = ut.dict_take(edge_props, edge_keys)
    edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)]

    unique_aids = list(set(aids1_ + aids2_))
    # Make a graph between the chips
    nodes = unique_aids
    edges = list(zip(aids1_, aids2_, edge_attr_list))
    import networkx as nx
    graph = nx.DiGraph()
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)
    from ibeis.viz import viz_graph
    fnum = None
    #zoom = kwargs.get('zoom', .4)
    viz_graph.viz_netx_chipgraph(ibs, graph, fnum=fnum, with_images=True, augment_graph=False)
Example #33
0
def show_top_featmatches(qreq_, cm_list):
    """
    Args:
        qreq_ (wbia.QueryRequest):  query request object with hyper-parameters
        cm_list (list):

    SeeAlso:
        python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True,lnbnn_normalizer=normlnbnn-test -a default --sephack

        python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_Master1 -t best:lnbnn_on=True -a timectrl --sephack
        python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:lnbnn_on=True -a default:size=30 --sephack
        python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=5,lnbnn_on=True -a default:size=30 --sephack
        python -m wbia --tf TestResult.draw_feat_scoresep --show --db PZ_MTEST -t best:K=1,Knorm=3,lnbnn_on=True -a default --sephack


    CommandLine:
        python -m wbia.viz.viz_nearest_descriptors --exec-show_top_featmatches --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.viz.viz_nearest_descriptors import *  # NOQA
        >>> import wbia
        >>> cm_list, qreq_ = wbia.testdata_cmlist(defaultdb='PZ_MTEST',
        >>>                                        a=['default:has_none=mother,size=30'])
        >>> show_top_featmatches(qreq_, cm_list)
        >>> ut.quit_if_noshow()
        >>> import wbia.plottool as pt
        >>> ut.show_if_requested()
    """
    # for cm in cm_list:
    #     cm.score_annot_csum(qreq_)
    import numpy as np
    import vtool as vt
    from functools import partial

    # Stack chipmatches
    ibs = qreq_.ibs
    infos = [cm.get_flat_fm_info() for cm in cm_list]
    flat_metadata = dict([(k, np.concatenate(v))
                          for k, v in ut.dict_stack2(infos).items()])
    fsv_flat = flat_metadata['fsv']
    flat_metadata['fs'] = fsv_flat.prod(axis=1)
    aids1 = flat_metadata['aid1'][:, None]
    aids2 = flat_metadata['aid2'][:, None]
    flat_metadata['aid_pairs'] = np.concatenate([aids1, aids2], axis=1)

    # Take sample of metadata
    sortx = flat_metadata['fs'].argsort()[::-1]
    num = len(cm_list) * 3
    # num = 10
    taker = partial(np.take, indices=sortx[:num], axis=0)
    flat_metadata_top = ut.map_dict_vals(taker, flat_metadata)
    aid1s, aid2s, fms = ut.dict_take(flat_metadata_top, ['aid1', 'aid2', 'fm'])

    annots = {}
    aids = np.unique(np.hstack((aid1s, aid2s)))
    annots = {
        aid: ibs.get_annot_lazy_dict(aid, config2_=qreq_.qparams)
        for aid in aids
    }

    label_lists = (ibs.get_match_truths(
        aid1s, aid2s) == ibs.const.EVIDENCE_DECISION.POSITIVE)
    patch_size = 64

    def extract_patches(annots, aid, fxs):
        """ custom_func(lazydict, key, subkeys) for multigroup_lookup """
        annot = annots[aid]
        kpts = annot['kpts']
        rchip = annot['rchip']
        kpts_m = kpts.take(fxs, axis=0)
        warped_patches, warped_subkpts = vt.get_warped_patches(
            rchip, kpts_m, patch_size=patch_size)
        return warped_patches

    data_lists = vt.multigroup_lookup(annots, [aid1s, aid2s], fms.T,
                                      extract_patches)

    import wbia.plottool as pt  # NOQA

    pt.ensureqt()
    import wbia_cnn

    inter = wbia_cnn.draw_results.interact_patches(
        label_lists,
        data_lists,
        flat_metadata_top,
        chunck_sizes=(2, 4),
        ibs=ibs,
        hack_one_per_aid=False,
        sortby='fs',
        qreq_=qreq_,
    )
    inter.show()
Example #34
0
def analyize_multiple_drives(drives):
    """
    CommandLine:
        export PYTHONPATH=$PYTHONPATH:~/local/scripts

        python -m register_files --exec-analyize_multiple_drives --drives ~ E:/ D:/

        python -m register_files --exec-analyize_multiple_drives --drives ~ /media/Store
        python register_files.py --exec-analyize_multiple_drives --drives /media/joncrall/media/ /media/joncrall/store/
        /media/joncrall/backup

        cd ~/local/scripts

    Example:
        >>> from register_files import *  # NOQA
        >>> dpaths = ut.get_argval('--drives', type_=list, default=['E://', 'D://'])#'D:/', 'E:/', 'F:/'])
        >>> drives = [Drive(root_dpath) for root_dpath in dpaths]
        >>> drive = Broadcaster(drives)
        >>> drive.compute_info()
        >>> #drive.build_fpath_hashes()
        >>> drive.check_consistency()
        >>> E = drive = drives[0]
        >>> analyize_multiple_drives(drives)
        >>> #D, E, F = drives
        >>> #drive = D
    """
    # -----
    ## Find the files shared on all disks
    #allhave = reduce(ut.dict_isect_combine, [drive.hash_to_fpaths for drive in drives])
    #print('#allhave = %r' % (len(allhave),))
    #allhave.keys()[0:3]
    #allhave.values()[0:3]
    #ut.embed()
    #for drive in drives:
    #drive.rrr()
    #print(drive.root_dpath)
    #print(len(drive.hash_to_unique_fpaths))
    #print(len(drive.hash_to_fpaths))
    #print(len(drive.hash_to_unique_fpaths) / len(drive.hash_to_fpaths))

    # Build dict to map from dpath to file pointers of unique descendants
    #unique_fidxs_list = drive.hash_to_fidxs.values()
    #fidxs = ut.flatten(unique_fidxs_list)

    esc = re.escape

    # Find which files exist on all drives
    hashes_list = [set(drive_.hash_to_fidxs.keys()) for drive_ in drives]
    allhave_hashes = reduce(set.intersection, hashes_list)
    print('Drives %r have %d file hashes in common' % (drives, len(allhave_hashes)))

    lbls = [drive_.root_dpath for drive_ in drives]
    isect_lens = np.zeros((len(drives), len(drives)))
    for idx1, (hashes1, drive1) in enumerate(zip(hashes_list, drives)):
        for idx2, (hashes2, drive2) in enumerate(zip(hashes_list, drives)):
            if drive1 is not drive2:
                common = set.intersection(hashes1, hashes2)
                isect_lens[idx1, idx2] = len(common)
            else:
                isect_lens[idx1, idx2] = len(hashes2)
    import pandas as pd
    print(pd.DataFrame(isect_lens, index=lbls, columns=lbls))

    # for drive in drives
    drive = drives[0]
    print('Finding unique files in drive=%r' % (drive,))
    # Get subset of fidxs on this drive
    unflat_valid_fidxs = ut.take(drive.hash_to_fidxs, allhave_hashes)
    valid_fidxs = sorted(ut.flatten(unflat_valid_fidxs))

    # Filter fpaths by patterns
    ignore_patterns = [
        esc('Thumbs.db')
    ]
    ignore_paths = [
        'Spotify'
    ]
    patterns = ignore_paths + ignore_patterns
    valid_fpaths = ut.take(drive.fpath_list, valid_fidxs)
    valid_flags = [not any([re.search(p, fpath) for p in patterns])
                   for fpath in valid_fpaths]
    valid_flags = np.array(valid_flags)
    valid_fidxs = ut.compress(valid_fidxs, valid_flags)

    print(ut.filtered_infostr(valid_flags, 'invalid fpaths'))

    fidxs = valid_fidxs
    valid_fpaths = sorted(ut.take(drive.fpath_list, fidxs))

    dpath_to_unique_fidx = build_dpath_to_fidx(valid_fpaths, valid_fidxs,
                                                drive.root_dpath)

    def make_tree_structure(valid_fpaths):
        root = {}

        def dict_getitem_default(dict_, key, type_):
            try:
                val = dict_[key]
            except KeyError:
                val = type_()
                dict_[key] = val
            return val

        for fpath in ut.ProgIter(valid_fpaths, 'building tree', freq=30000):
            path_components = ut.dirsplit(fpath)
            current = root
            for comp in path_components[:-1]:
                current = dict_getitem_default(current, comp, dict)
            contents = dict_getitem_default(current, '.', list)
            contents.append(path_components[-1])
        return root

    root = make_tree_structure(valid_fpaths)

    def print_tree(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=None):
        print('path = %r' % (path,))
        print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path])))
        path_components = ut.dirsplit(path)
        # Navigate to correct spot in tree
        current = root
        for c in path_components:
            current = current[c]
        print(ut.repr3(current, truncate=1))

    def get_tree_info(root, path, dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive, depth=0):
        path_components = ut.dirsplit(path)
        current = root
        for c in path_components:
            current = current[c]
        if isinstance(current, list):
            tree_tmp = []
        else:
            key_list = list(current.keys())
            child_list = [join(path, key) for key in key_list]
            dpath_nbytes_list = [
                drive.get_total_nbytes(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            nfiles_list = [
                len(dpath_to_unique_fidx.get(child, []))
                for child in child_list
            ]
            tree_tmp = sorted([
                (key, ut.byte_str2(nbytes), nfiles)
                if depth == 0 else
                (key, ut.byte_str2(nbytes), nfiles,
                    get_tree_info(root, path=child,
                                  dpath_to_unique_fidx=dpath_to_unique_fidx, drive=drive,
                                  depth=depth - 1))
                for key, child, nbytes, nfiles in zip(key_list, child_list, dpath_nbytes_list, nfiles_list)
            ])
        return tree_tmp

    def print_tree_struct(*args, **kwargs):
        tree_str = (ut.indent(ut.repr3(get_tree_info(*args, **kwargs), nl=1)))
        print(tree_str)
        #bytes_str = ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx[path]))
        #print('path = %r, %s' % (path, bytes_str))
        #print(ut.repr3(key_list))
        return tree_str

    dpath_to_unique_fidx
    dpath_to_fidxs = ut.map_dict_vals(set, drive.dpath_to_fidx)
    complete_unique_dpaths = ut.dict_isect(dpath_to_fidxs, dpath_to_unique_fidx)
    complete_root = make_tree_structure(complete_unique_dpaths.keys())

    globals()['ut'] = ut
    globals()['os'] = os
    globals()['join'] = join

    print(ut.byte_str2(drive.get_total_nbytes(dpath_to_unique_fidx['E:\\'])))
    get_tree_info(root, path='E:\\', depth=0)

    get_tree_info(complete_root, path='E:\\', depth=0)

    get_tree_info(root, path='E:\\', depth=1)
    print(print_tree_struct(root, path='E:\\Clutter', depth=0))
    print_tree(root, path=r'E:\TV')
    print_tree(root, path=r'E:\Movies')
    print_tree(root, path=r'E:\Boot')

    print_tree(root, path='E:\\')
    print_tree(root, path=r'E:\Downloaded')
    print_tree(root, path=r'E:\Recordings')
    print_tree(root, path=r'E:\Clutter')
    print_tree(root, path=r'E:\Audio Books')

    # TODO:
    # * Ignore list
    # * Find and rectify internal duplicates
    # * Update registry with new files and deleted ones
    # * Ensure that all unique files are backed up
    # Index the C: Drive as well.
    # * Lazy properties of drive
    # * Multiple types of identifiers (hash, fname, ext, fsize)
    # Drive subsets
    # Export/Import Drive for analysis on other machines

    ut.embed()
Example #35
0
def ensure_tf(X):
    termfreq = ut.dict_hist(X.wx_list)
    # do what video google does
    termfreq = ut.map_dict_vals(lambda x: x / len(X.wx_list), termfreq)
    X.termfreq = termfreq
Example #36
0
def intraoccurrence_connected():
    r"""
    CommandLine:
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.specialdraw import *  # NOQA
        >>> result = intraoccurrence_connected()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import ibeis
    import plottool as pt
    from ibeis.viz import viz_graph
    import networkx as nx
    pt.ensure_pylab_qt4()
    ibs = ibeis.opendb(defaultdb='PZ_Master1')
    nid2_aid = {
        #4880: [3690, 3696, 3703, 3706, 3712, 3721],
        4880: [3690, 3696, 3703],
        6537: [3739],
        6653: [7671],
        6610: [7566, 7408],
        #6612: [7664, 7462, 7522],
        #6624: [7465, 7360],
        #6625: [7746, 7383, 7390, 7477, 7376, 7579],
        6630: [7586, 7377, 7464, 7478],
        #6677: [7500]
    }
    nid2_dbaids = {
        4880: [33, 6120, 7164],
        6537: [7017, 7206],
        6653: [7660]
    }
    if ut.get_argflag('--small') or ut.get_argflag('--smaller'):
        del nid2_aid[6630]
        del nid2_aid[6537]
        del nid2_dbaids[6537]
        if ut.get_argflag('--smaller'):
            nid2_dbaids[4880].remove(33)
            nid2_aid[4880].remove(3690)
            nid2_aid[6610].remove(7408)
        #del nid2_aid[4880]
        #del nid2_dbaids[4880]

    aids = ut.flatten(nid2_aid.values())

    temp_nids = [1] * len(aids)
    postcut = ut.get_argflag('--postcut')
    aids_list = ibs.group_annots_by_name(aids)[0]
    ensure_edges = 'all' if True or not postcut else None
    unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups(
        ibs, aids_list,
        #invis_edges=invis_edges,
        ensure_edges=ensure_edges, temp_nids=temp_nids)
    viz_graph.color_by_nids(unlabeled_graph, unique_nids=[1] *
                            len(list(unlabeled_graph.nodes())))
    viz_graph.ensure_node_images(ibs, unlabeled_graph)
    nx.set_node_attributes(unlabeled_graph, 'shape', 'rect')
    #unlabeled_graph = unlabeled_graph.to_undirected()

    # Find the "database exemplars for these annots"
    if False:
        gt_aids = ibs.get_annot_groundtruth(aids)
        gt_aids = [ut.setdiff(s, aids) for s in gt_aids]
        dbaids = ut.unique(ut.flatten(gt_aids))
        dbaids = ibs.filter_annots_general(dbaids, minqual='good')
        ibs.get_annot_quality_texts(dbaids)
    else:
        dbaids = ut.flatten(nid2_dbaids.values())
    exemplars = nx.DiGraph()
    #graph = exemplars  # NOQA
    exemplars.add_nodes_from(dbaids)

    def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}):
        edge_list = ut.upper_diag_self_prodx(nodes)
        graph.add_edges_from(edge_list, **edgeattrs)
        return edge_list

    for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)):
        add_clique(exemplars, aids_)
    viz_graph.ensure_node_images(ibs, exemplars)
    viz_graph.color_by_nids(exemplars, ibs=ibs)

    nx.set_node_attributes(unlabeled_graph, 'framewidth', False)
    nx.set_node_attributes(exemplars,  'framewidth', 4.0)

    nx.set_node_attributes(unlabeled_graph, 'group', 'unlab')
    nx.set_node_attributes(exemplars,  'group', 'exemp')

    #big_graph = nx.compose_all([unlabeled_graph])
    big_graph = nx.compose_all([exemplars, unlabeled_graph])

    # add sparse connections from unlabeled to exemplars
    import numpy as np
    rng = np.random.RandomState(0)
    if True or not postcut:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes())))
            flags = np.logical_or(exnids == nid_, flags)
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)),
                                     color=pt.ORANGE, implicit=True)
    else:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(exmatches))
            exmatches = ut.compress(exmatches, exnids == nid_)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)))
        pass

    nx.set_node_attributes(big_graph, 'shape', 'rect')
    #if False and postcut:
    #    ut.nx_delete_node_attr(big_graph, 'nid')
    #    ut.nx_delete_edge_attr(big_graph, 'color')
    #    viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs)
    #    viz_graph.color_by_nids(big_graph, ibs=ibs)
    #    big_graph = big_graph.to_undirected()

    layoutkw = {
        'sep' : 1 / 5,
        'prog': 'neato',
        'overlap': 'false',
        #'splines': 'ortho',
        'splines': 'spline',
    }

    as_directed = False
    #as_directed = True
    #hacknode = True
    hacknode = 0

    graph = big_graph
    ut.nx_ensure_agraph_color(graph)
    if hacknode:
        nx.set_edge_attributes(graph, 'taillabel', {e: str(e[0]) for e in graph.edges()})
        nx.set_edge_attributes(graph, 'headlabel', {e: str(e[1]) for e in graph.edges()})

    explicit_graph = pt.get_explicit_graph(graph)
    _, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph,
                                         inplace=True, **layoutkw)

    if ut.get_argflag('--smaller'):
        graph.node[7660]['pos'] = np.array([550, 350])
        graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph,
                                             inplace=True, **layoutkw)
    elif ut.get_argflag('--small'):
        graph.node[7660]['pos'] = np.array([750, 350])
        graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400])
        graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph,
                                             inplace=True, **layoutkw)

    if not postcut:
        #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw,
        #           as_directed=False)
        #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw,
        #           as_directed=as_directed, hacknode=hacknode)

        pt.show_nx(graph, layout='custom', layoutkw=layoutkw,
                   as_directed=as_directed, hacknode=hacknode)
    else:
        #explicit_graph = pt.get_explicit_graph(graph)
        #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph,
        #                                     **layoutkw)

        #layout_info['edge']['alpha'] = .8
        #pt.apply_graph_layout_attrs(graph, layout_info)

        #graph_layout_attrs = layout_info['graph']
        ##edge_layout_attrs  = layout_info['edge']
        ##node_layout_attrs  = layout_info['node']

        #for key, vals in layout_info['node'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_node_attributes(graph, key, vals)

        #for key, vals in layout_info['edge'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_edge_attributes(graph, key, vals)

        #nx.set_edge_attributes(graph, 'alpha', .8)
        #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'polyline'   # graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'line'

        cut_graph = graph.copy()
        edge_list = list(cut_graph.edges())
        edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list))
        cut_flags = edge_nids.T[0] != edge_nids.T[1]
        cut_edges = ut.compress(edge_list, cut_flags)
        cut_graph.remove_edges_from(cut_edges)
        ut.nx_delete_node_attr(cut_graph, 'nid')
        viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs)

        #ut.nx_get_default_node_attributes(exemplars, 'color', None)
        ut.nx_delete_node_attr(cut_graph, 'color', nodes=unlabeled_graph.nodes())
        aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color', None)
        nid2_colors = ut.group_items(aid2_color.values(), ibs.get_annot_nids(aid2_color.keys()))
        nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors)
        nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors)
        #for val in nid2_colors.values():
        #    assert len(val) <= 1
        # Get initial colors
        nid2_color_ = {nid: colors_[0] for nid, colors_ in nid2_colors.items()
                       if len(colors_) == 1}

        graph = cut_graph
        viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_)
        nx.set_node_attributes(cut_graph, 'framewidth', 4)

        pt.show_nx(cut_graph, layout='custom', layoutkw=layoutkw,
                   as_directed=as_directed, hacknode=hacknode)

    pt.zoom_factory()