Ejemplo n.º 1
0
def modify_tags(tags_list, direct_map=None, regex_map=None, regex_aug=None,
                delete_unmapped=False, return_unmapped=False,
                return_map=False):
    import utool as ut
    tag_vocab = ut.unique(ut.flatten(tags_list))
    alias_map = ut.odict()
    if regex_map is not None:
        alias_map.update(**ut.build_alias_map(regex_map, tag_vocab))
    if direct_map is not None:
        alias_map.update(ut.odict(direct_map))

    new_tags_list = tags_list
    new_tags_list = ut.alias_tags(new_tags_list, alias_map)

    if regex_aug is not None:
        alias_aug = ut.build_alias_map(regex_aug, tag_vocab)
        aug_tags_list = ut.alias_tags(new_tags_list, alias_aug)
        new_tags_list = [ut.unique(t1 + t2) for t1, t2 in zip(new_tags_list, aug_tags_list)]

    unmapped = list(set(tag_vocab) - set(alias_map.keys()))
    if delete_unmapped:
        new_tags_list = [ut.setdiff(tags, unmapped) for tags in new_tags_list]

    toreturn = None
    if return_map:
        toreturn = (alias_map,)

    if return_unmapped:
        toreturn = toreturn + (unmapped,)

    if toreturn is None:
        toreturn = new_tags_list
    else:
        toreturn = (new_tags_list,) + toreturn
    return toreturn
Ejemplo n.º 2
0
def export_annots(ibs, aid_list, new_dbpath=None):
    r"""
    exports a subset of annotations and other required info

    TODO:
        PZ_Master1 needs to backproject information back on to NNP_Master3 and
        PZ_Master0

    Args:
        ibs (IBEISController):  ibeis controller object
        aid_list (list):  list of annotation rowids
        new_dbpath (None): (default = None)

    Returns:
        str: new_dbpath

    CommandLine:
        python -m ibeis.dbio.export_subset export_annots
        python -m ibeis.dbio.export_subset export_annots --db NNP_Master3 \
            -a viewpoint_compare --nocache-aid --verbtd --new_dbpath=PZ_ViewPoints

        python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 \
            --db NNP_Master3 \
            -a viewpoint_compare --nocache-aid --verbtd
        python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db NNP_Master3 \
            -a viewpoint_compare --nocache-aid --verbtd
        python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db NNP_Master3 \
            -a default:aids=all,is_known=True,view_pername=#primary>0&#primary1>0,per_name=4,size=200
        python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db NNP_Master3 \
            -a default:aids=all,is_known=True,view_pername='#primary>0&#primary1>0',per_name=4,size=200 --acfginfo

        python -m ibeis.expt.experiment_helpers get_annotcfg_list:0 --db PZ_Master1 \
            -a default:has_any=photobomb --acfginfo

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.export_subset import *  # NOQA
        >>> import ibeis
        >>> from ibeis.expt import experiment_helpers
        >>> ibs = ibeis.opendb(defaultdb='NNP_Master3')
        >>> acfg_name_list = ut.get_argval(('--aidcfg', '--acfg', '-a'), type_=list, default=[''])
        >>> acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(ibs, acfg_name_list)
        >>> aid_list = expanded_aids_list[0][0]
        >>> ibs.print_annot_stats(aid_list, viewcode_isect=True, per_image=True)
        >>> # Expand to get all annots in each chosen image
        >>> gid_list = ut.unique_ordered(ibs.get_annot_gids(aid_list))
        >>> aid_list = ut.flatten(ibs.get_image_aids(gid_list))
        >>> ibs.print_annot_stats(aid_list, viewcode_isect=True, per_image=True)
        >>> new_dbpath = ut.get_argval('--new-dbpath', default='PZ_ViewPoints')
        >>> new_dbpath = export_annots(ibs, aid_list, new_dbpath)
        >>> result = ('new_dbpath = %s' % (str(new_dbpath),))
        >>> print(result)
    """
    print('Exporting annotations aid_list=%r' % (aid_list,))
    if new_dbpath is None:
        new_dbpath = make_new_dbpath(ibs, 'aid', aid_list)
    gid_list = ut.unique(ibs.get_annot_gids(aid_list))
    nid_list = ut.unique(ibs.get_annot_nids(aid_list))
    return export_data(ibs, gid_list, aid_list, nid_list, new_dbpath=new_dbpath)
Ejemplo n.º 3
0
def color_by_nids(graph, unique_nids=None, ibs=None, nid2_color_=None):
    """ Colors edges and nodes by nid """
    # TODO use ut.color_nodes
    import plottool as pt

    ensure_graph_nid_labels(graph, unique_nids, ibs=ibs)
    node_to_nid = nx.get_node_attributes(graph, 'nid')
    unique_nids = ut.unique(node_to_nid.values())
    ncolors = len(unique_nids)
    if (ncolors) == 1:
        unique_colors = [pt.UNKNOWN_PURP]
    else:
        if nid2_color_ is not None:
            unique_colors = pt.distinct_colors(ncolors + len(nid2_color_) * 2)
        else:
            unique_colors = pt.distinct_colors(ncolors)
    # Find edges and aids strictly between two nids
    nid_to_color = dict(zip(unique_nids, unique_colors))
    if nid2_color_ is not None:
        # HACK NEED TO ENSURE COLORS ARE NOT REUSED
        nid_to_color.update(nid2_color_)
    edge_aids = list(graph.edges())
    edge_nids = ut.unflat_take(node_to_nid, edge_aids)
    flags = [nids[0] == nids[1] for nids in edge_nids]
    flagged_edge_aids = ut.compress(edge_aids, flags)
    flagged_edge_nids = ut.compress(edge_nids, flags)
    flagged_edge_colors = [nid_to_color[nids[0]] for nids in flagged_edge_nids]
    edge_to_color = dict(zip(flagged_edge_aids, flagged_edge_colors))
    node_to_color = ut.map_dict_vals(ut.partial(ut.take, nid_to_color), node_to_nid)
    nx.set_edge_attributes(graph, 'color', edge_to_color)
    nx.set_node_attributes(graph, 'color', node_to_color)
Ejemplo n.º 4
0
 def __add__(self, other):
     assert self.__class__ is other.__class__, 'incompatable'
     assert self._ibs is other._ibs, 'incompatable'
     assert self._config is other._config, 'incompatable'
     rowids = ut.unique(self._rowids + other._rowids)
     new = self.__class__(rowids, self._ibs, self._config)
     return new
Ejemplo n.º 5
0
    def _cm_breaking(infr, cm_list=None, review_cfg={}):
        """
        >>> from wbia.algo.graph.core import *  # NOQA
        >>> review_cfg = {}
        """
        if cm_list is None:
            cm_list = infr.cm_list
        ranks_top = review_cfg.get('ranks_top', None)
        ranks_bot = review_cfg.get('ranks_bot', None)

        # Construct K-broken graph
        edges = []

        if ranks_bot is None:
            ranks_bot = 0

        for count, cm in enumerate(cm_list):
            score_list = cm.annot_score_list
            rank_list = ut.argsort(score_list)[::-1]
            sortx = ut.argsort(rank_list)

            top_sortx = sortx[:ranks_top]
            bot_sortx = sortx[len(sortx) - ranks_bot :]
            short_sortx = ut.unique(top_sortx + bot_sortx)

            daid_list = ut.take(cm.daid_list, short_sortx)
            for daid in daid_list:
                u, v = (cm.qaid, daid)
                if v < u:
                    u, v = v, u
                edges.append((u, v))
        return edges
Ejemplo n.º 6
0
def color_by_nids(graph, unique_nids=None, ibs=None, nid2_color_=None):
    """ Colors edges and nodes by nid """
    # TODO use ut.color_nodes
    import plottool as pt

    ensure_graph_nid_labels(graph, unique_nids, ibs=ibs)
    node_to_nid = nx.get_node_attributes(graph, 'nid')
    unique_nids = ut.unique(node_to_nid.values())
    ncolors = len(unique_nids)
    if (ncolors) == 1:
        unique_colors = [pt.UNKNOWN_PURP]
    else:
        if nid2_color_ is not None:
            unique_colors = pt.distinct_colors(ncolors + len(nid2_color_) * 2)
        else:
            unique_colors = pt.distinct_colors(ncolors)
    # Find edges and aids strictly between two nids
    nid_to_color = dict(zip(unique_nids, unique_colors))
    if nid2_color_ is not None:
        # HACK NEED TO ENSURE COLORS ARE NOT REUSED
        nid_to_color.update(nid2_color_)
    edge_aids = list(graph.edges())
    edge_nids = ut.unflat_take(node_to_nid, edge_aids)
    flags = [nids[0] == nids[1] for nids in edge_nids]
    flagged_edge_aids = ut.compress(edge_aids, flags)
    flagged_edge_nids = ut.compress(edge_nids, flags)
    flagged_edge_colors = [nid_to_color[nids[0]] for nids in flagged_edge_nids]
    edge_to_color = dict(zip(flagged_edge_aids, flagged_edge_colors))
    node_to_color = ut.map_dict_vals(ut.partial(ut.take, nid_to_color),
                                     node_to_nid)
    nx.set_edge_attributes(graph, 'color', edge_to_color)
    nx.set_node_attributes(graph, 'color', node_to_color)
Ejemplo n.º 7
0
 def get_name_image_closure(self):
     ibs = self._ibs
     aids = self.aids
     old_aids = []
     while len(old_aids) != len(aids):
         old_aids = aids
         gids = ut.unique(ibs.get_annot_gids(aids))
         other_aids = list(set(ut.flatten(ibs.get_image_aids(gids))))
         other_nids = list(set(ibs.get_annot_nids(other_aids)))
         aids = ut.flatten(ibs.get_name_aids(other_nids))
     return aids
Ejemplo n.º 8
0
def newFileDialog(directory_, other_sidebar_dpaths=[], use_sidebar_cwd=True):
    qdlg = QtGui.QFileDialog()
    sidebar_urls = qdlg.sidebarUrls()[:]
    if use_sidebar_cwd:
        sidebar_urls.append(QtCore.QUrl.fromLocalFile(os.getcwd()))
    if directory_ is not None:
        sidebar_urls.append(QtCore.QUrl.fromLocalFile(directory_))
    sidebar_urls.extend(list(map(QtCore.QUrl.fromUserInput, other_sidebar_dpaths)))
    sidebar_urls = ut.unique(sidebar_urls)
    #print('sidebar_urls = %r' % (sidebar_urls,))
    qdlg.setSidebarUrls(sidebar_urls)
    return qdlg
Ejemplo n.º 9
0
def _set_dialog_sidebar(
    qdlg, directory_=None, use_sidebar_cwd=True, other_sidebar_dpaths=[]
):
    sidebar_urls = qdlg.sidebarUrls()[:]
    if use_sidebar_cwd:
        sidebar_urls.append(QtCore.QUrl.fromLocalFile(os.getcwd()))
    if directory_ is not None:
        sidebar_urls.append(QtCore.QUrl.fromLocalFile(directory_))
    sidebar_urls.extend(list(map(QtCore.QUrl.fromUserInput, other_sidebar_dpaths)))
    sidebar_urls = ut.unique(sidebar_urls)
    # print('sidebar_urls = %r' % (sidebar_urls,))
    qdlg.setSidebarUrls(sidebar_urls)
Ejemplo n.º 10
0
def newFileDialog(directory_, other_sidebar_dpaths=[], use_sidebar_cwd=True):
    qdlg = QtGui.QFileDialog()
    sidebar_urls = qdlg.sidebarUrls()[:]
    if use_sidebar_cwd:
        sidebar_urls.append(QtCore.QUrl.fromLocalFile(os.getcwd()))
    if directory_ is not None:
        sidebar_urls.append(QtCore.QUrl.fromLocalFile(directory_))
    sidebar_urls.extend(
        list(map(QtCore.QUrl.fromUserInput, other_sidebar_dpaths)))
    sidebar_urls = ut.unique(sidebar_urls)
    #print('sidebar_urls = %r' % (sidebar_urls,))
    qdlg.setSidebarUrls(sidebar_urls)
    return qdlg
Ejemplo n.º 11
0
 def aliases(repo):
     aliases = []
     if repo._modname is not None:
         aliases.append(repo._modname)
     aliases.extend(repo._modname_hints[:])
     if repo.dpath and exists(repo.dpath):
         reponame = repo._find_modname_from_repo()
         if reponame is not None:
             aliases.append(reponame)
     aliases.append(repo.reponame)
     import utool as ut
     aliases = ut.unique(aliases)
     return aliases
Ejemplo n.º 12
0
    def aliases(repo):
        aliases = []
        if repo._modname is not None:
            aliases.append(repo._modname)
        aliases.extend(repo._modname_hints[:])
        if repo.dpath and exists(repo.dpath):
            reponame = repo._find_modname_from_repo()
            if reponame is not None:
                aliases.append(reponame)
        aliases.append(repo.reponame)
        import utool as ut

        aliases = ut.unique(aliases)
        return aliases
Ejemplo n.º 13
0
def color_nodes(graph, labelattr='label'):
    """ Colors edges and nodes by nid """
    import plottool as pt
    import utool as ut
    import networkx as nx
    node_to_lbl = nx.get_node_attributes(graph, labelattr)
    unique_lbls = ut.unique(node_to_lbl.values())
    ncolors = len(unique_lbls)
    if (ncolors) == 1:
        unique_colors = [pt.NEUTRAL_BLUE]
    else:
        unique_colors = pt.distinct_colors(ncolors)
    # Find edges and aids strictly between two nids
    lbl_to_color = dict(zip(unique_lbls, unique_colors))
    node_to_color = {node:  lbl_to_color[lbl] for node, lbl in node_to_lbl.items()}
    nx.set_node_attributes(graph, 'color', node_to_color)
    ut.nx_ensure_agraph_color(graph)
Ejemplo n.º 14
0
    def expand_input(inputs, index, inplace=False):
        """
        Pushes the rootmost inputs all the way up to the sources of the graph

        CommandLine:
            python -m dtool.input_helpers expand_input

        Example:
            >>> # ENABLE_DOCTEST
            >>> from dtool.input_helpers import *  # NOQA
            >>> from dtool.example_depcache2 import *  # NOQA
            >>> depc = testdata_depc4()
            >>> inputs = depc['smk_match'].rootmost_inputs
            >>> inputs = depc['neighbs'].rootmost_inputs
            >>> print('(pre-expand)  inputs  = %r' % (inputs,))
            >>> index = 'indexer'
            >>> inputs2 = inputs.expand_input(index)
            >>> print('(post-expand) inputs2 = %r' % (inputs2,))
            >>> assert 'indexer' in str(inputs), 'missing indexer1'
            >>> assert 'indexer' not in str(inputs2), (
            >>>     '(2) unexpected indexer in %s' % (inputs2,))
        """
        if isinstance(index, six.string_types):
            index_list = ut.where(
                [rmi.tablename == index for rmi in inputs.rmi_list])
            if len(index_list) == 0:
                index = 0
            else:
                index = index_list[0]

        rmi = inputs.rmi_list[index]
        parent_level = rmi.parent_level()
        if len(parent_level) == 0:
            #raise AssertionError('no parents to expand')
            new_rmi_list = inputs.rmi_list[:]
        else:
            new_rmi_list = ut.insert_values(inputs.rmi_list, index,
                                            parent_level, inplace)
            new_rmi_list = ut.unique(new_rmi_list)
        if inplace:
            inputs.rmi_list = new_rmi_list
            new_inputs = inputs
        else:
            new_inputs = TableInput(new_rmi_list, inputs.exi_graph,
                                    inputs.table)
        return new_inputs
Ejemplo n.º 15
0
def append_annot_case_tags(ibs, aid_list, tag_list):
    """
    Generally appends tags to annotations. Careful not to introduce too many
    random tags. Maybe we should just let that happen and introduce tag-aliases

    Note: this is more of a set add rather than a list append

    TODO: remove
    """
    # Ensure each item is a list
    #tags_list = [tag if isinstance(tag, list) else [tag] for tag in tag_list]
    if isinstance(tag_list, six.string_types):
        # Apply single tag to everybody
        tag_list = [tag_list] * len(aid_list)
    tags_list = [ut.ensure_iterable(tag) for tag in tag_list]
    text_list = ibs.get_annot_tag_text(aid_list)
    orig_tags_list = [[] if note is None else _parse_tags(note) for note in text_list]
    new_tags_list = [ut.unique(t1 + t2) for t1, t2 in zip(tags_list, orig_tags_list)]
    ibs.set_annot_case_tags(aid_list, new_tags_list)
Ejemplo n.º 16
0
    def flat_compute_order(inputs):
        """
        This is basically the scheduler

        TODO:
            We need to verify the correctness of this logic. It seems to
            not be deterministic between versions of python.

        CommandLine:
            python -m dtool.input_helpers flat_compute_order

        Example:
            >>> # xdoctest: +REQUIRES(--fixme)
            >>> from wbia.dtool.input_helpers import *  # NOQA
            >>> from wbia.dtool.example_depcache2 import *  # NOQA
            >>> depc = testdata_depc4()
            >>> inputs = depc['feat'].rootmost_inputs.total_expand()
            >>> flat_compute_order = inputs.flat_compute_order()
            >>> result = ut.repr2(flat_compute_order)
            ...
            >>> print(result)
            [chip[t, t:1, 1:1], probchip[t, t:1, 1:1], feat[t, t:1]]
        """
        # Compute the order in which all noes must be evaluated
        import networkx as nx  # NOQA

        ordered_compute_nodes = [
            rmi.compute_order() for rmi in inputs.rmi_list
        ]
        flat_node_order_ = ut.unique(ut.flatten(ordered_compute_nodes))

        rgraph = inputs.exi_graph.reverse()
        toprank = ut.nx_topsort_rank(rgraph, flat_node_order_)
        sortx = ut.argsort(toprank)[::-1]
        flat_compute_order = ut.take(flat_node_order_, sortx)
        # Inputs are pre-computed.
        for rmi in inputs.rmi_list:
            try:
                flat_compute_order.remove(rmi.node)
            except ValueError as ex:
                ut.printex(ex, 'something is wrong', keys=['rmi.node'])
                raise
        return flat_compute_order
Ejemplo n.º 17
0
def __debug_win_msvcr():
    import utool as ut
    fname = 'msvcr*.dll'
    key_list = ['PATH']
    found = ut.search_env_paths(fname, key_list)
    fpaths = ut.unique(ut.flatten(found.values()))
    fpaths = ut.lmap(ut.ensure_unixslash, fpaths)
    from os.path import basename
    dllnames = [basename(x) for x in fpaths]
    grouped = dict(ut.group_items(fpaths, dllnames))
    print(ut.dict_str(grouped, nl=4))

    keytoid = {}

    for key, vals in grouped.items():
        infos = ut.lmap(ut.get_file_nBytes, vals)
        #infos = ut.lmap(ut.get_file_uuid, vals)
        #uuids = [ut.get_file_uuid(val) for val in vals]
        keytoid[key] = list(zip(infos, vals))
    ut.print_dict(keytoid, nl=2)
Ejemplo n.º 18
0
def __debug_win_msvcr():
    import utool as ut
    fname = 'msvcr*.dll'
    key_list = ['PATH']
    found = ut.search_env_paths(fname, key_list)
    fpaths = ut.unique(ut.flatten(found.values()))
    fpaths = ut.lmap(ut.ensure_unixslash, fpaths)
    from os.path import basename
    dllnames = [basename(x) for x in fpaths]
    grouped = dict(ut.group_items(fpaths, dllnames))
    print(ut.repr4(grouped, nl=4))

    keytoid = {
    }

    for key, vals in grouped.items():
        infos = ut.lmap(ut.get_file_nBytes, vals)
        #infos = ut.lmap(ut.get_file_uuid, vals)
        #uuids = [ut.get_file_uuid(val) for val in vals]
        keytoid[key] = list(zip(infos, vals))
    ut.print_dict(keytoid, nl=2)
Ejemplo n.º 19
0
def get_annotmatch_rowids_from_aid(ibs,
                                   aid_list,
                                   eager=True,
                                   nInput=None,
                                   force_method=None):
    """
    Undirected version
    Returns a list of the aids that were reviewed as candidate matches to the input aid
    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m wbia.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid
        python -m wbia.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.annotmatch_funcs import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(defaultdb='testdb1')
        >>> ut.exec_funckw(ibs.get_annotmatch_rowids_from_aid, globals())
        >>> aid_list = ibs.get_valid_aids()[0:4]
        >>> annotmatch_rowid_list = ibs.get_annotmatch_rowids_from_aid(aid_list,
        >>>                                                        eager, nInput)
        >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),))
        >>> print(result)
    """
    # from wbia.control import manual_annotmatch_funcs
    if nInput is None:
        nInput = len(aid_list)
    if nInput == 0:
        return []
    rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list)
    rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list)
    annotmatch_rowid_list = [
        ut.unique(ut.flatten(p)) for p in zip(rowids1, rowids2)
    ]
    # Ensure funciton output is consistent
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
Ejemplo n.º 20
0
def fix_path(r):
    """ Removes duplicates from the path Variable """
    PATH_SEP = os.path.pathsep
    pathstr = robos.get_env_var('PATH')
    import utool as ut

    pathlist = ut.unique(pathstr.split(PATH_SEP))

    new_path = ''
    failed_bit = False
    for p in pathlist:
        if os.path.exists(p):
            new_path = new_path + p + PATH_SEP
        elif p == '':
            pass
        elif p.find('%') > -1 or p.find('$') > -1:
            print('PATH=%s has a envvar. Not checking existance' % p)
            new_path = new_path + p + PATH_SEP
        else:
            print('PATH=%s does not exist!!' % p)
            failed_bit = True
    #remove trailing semicolons

    if failed_bit:
        ans = input('Should I overwrite the path? yes/no?')
        if ans == 'yes':
            failed_bit = False

    if len(new_path) > 0 and new_path[-1] == PATH_SEP:
        new_path = new_path[0:-1]

    if failed_bit is True:
        print("Path FIXING Failed. A Good path should be: \n%s" % new_path)
        print("\n\n====\n\n The old path was:\n%s" % pathstr)
    elif pathstr == new_path:
        print("The path was already clean")
    else:
        robos.set_env_var('PATH', new_path)
Ejemplo n.º 21
0
def get_dbinfo(
    ibs,
    verbose=True,
    with_imgsize=False,
    with_bytes=False,
    with_contrib=False,
    with_agesex=False,
    with_header=True,
    short=False,
    tag='dbinfo',
    aid_list=None,
    aids=None,
):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    SeeAlso:
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --use-hist=True --old=False --per_name_vpedge=False
        python -m wbia.other.ibsfuncs --exec-get_annot_stats_dict --db PZ_PB_RF_TRAIN --all

    CommandLine:
        python -m wbia.other.dbinfo --exec-get_dbinfo:0
        python -m wbia.other.dbinfo --test-get_dbinfo:1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m wbia.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db LF_Bajo_bonito -a default
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 --db DETECT_SEATURTLES -a default --readonly

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m wbia.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = wbia.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = wbia.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from wbia.expt import cfghelpers
        >>> #from wbia.expt import annotation_configs
        >>> #from wbia.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from wbia.other.dbinfo import *  # NOQA
        >>> import wbia
        >>> verbose = True
        >>> short = True
        >>> #ibs = wbia.opendb(db='GZ_ALL')
        >>> #ibs = wbia.opendb(db='PZ_Master0')
        >>> ibs = wbia.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contributor_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = wbia.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...
    if aids is not None:
        aid_list = aids

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            logger.info('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from wbia.expt import experiment_helpers

            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list
            )
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            # aid_list =
        if verbose:
            logger.info('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False))
            - {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    # associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    valid_images = ibs.images(valid_gids)
    valid_annots = ibs.annots(valid_aids)

    # Image info
    if verbose:
        logger.info('Checking Image Info')
    gx2_aids = valid_images.aids
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats = ut.repr4(
        ut.get_stats(gx2_nAnnots, use_median=True), nl=0, precision=2, si=True
    )
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        logger.info('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids_).intersection(valid_aids_set)) for aids_ in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    if False:
        # Occurrence Info
        def compute_annot_occurrence_ids(ibs, aid_list):
            from wbia.algo.preproc import preproc_occurrence

            gid_list = ibs.get_annot_gids(aid_list)
            gid2_aids = ut.group_items(aid_list, gid_list)
            config = {'seconds_thresh': 4 * 60 * 60}
            flat_imgsetids, flat_gids = preproc_occurrence.wbia_compute_occurrences(
                ibs, gid_list, config=config, verbose=False
            )
            occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
            occurid2_aids = {
                oid: ut.flatten(ut.take(gid2_aids, gids))
                for oid, gids in occurid2_gids.items()
            }
            return occurid2_aids

        import utool

        with utool.embed_on_exception_context:
            occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
            occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
            occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
            nid2_occurxs = ut.ddict(list)
            for occurx, nids in enumerate(occur_unique_nids):
                for nid in nids:
                    nid2_occurxs[nid].append(occurx)

        nid2_occurx_single = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1
        }
        nid2_occurx_resight = {
            nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1
        }
        singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

        singlesight_annot_stats = ut.get_stats(
            list(map(len, singlesight_encounters)), use_median=True, use_sum=True
        )
        resight_name_stats = ut.get_stats(
            list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True
        )

    # Encounter Info
    def break_annots_into_encounters(aids):
        from wbia.algo.preproc import occurrence_blackbox
        import datetime

        thresh_sec = datetime.timedelta(minutes=30).seconds
        posixtimes = np.array(ibs.get_annot_image_unixtimes_asfloat(aids))
        # latlons = ibs.get_annot_image_gps(aids)
        labels = occurrence_blackbox.cluster_timespace2(
            posixtimes, None, thresh_sec=thresh_sec
        )
        return labels
        # ave_enc_time = [np.mean(times) for lbl, times in ut.group_items(posixtimes, labels).items()]
        # ut.square_pdist(ave_enc_time)

    try:
        am_rowids = ibs.get_annotmatch_rowids_between_groups([valid_aids], [valid_aids])[
            0
        ]
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0, am_rowids=am_rowids)
        undirected_tags = ibs.get_aidpair_tags(
            aid_pairs.T[0], aid_pairs.T[1], directed=False
        )
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)
    except Exception:
        pair_tag_info = {}

    # logger.info(ut.repr2(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        logger.info('Checking Annot Species')
    unknown_annots = valid_annots.compress(ibs.is_aid_unknown(valid_annots))
    species_list = valid_annots.species_texts
    species2_annots = valid_annots.group_items(valid_annots.species_texts)
    species2_nAids = {key: len(val) for key, val in species2_annots.items()}

    if verbose:
        logger.info('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        logger.info('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            logger.info('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)

        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = collections.OrderedDict(
                [
                    ('max', wh_list.max(0)),
                    ('min', wh_list.min(0)),
                    ('mean', wh_list.mean(0)),
                    ('std', wh_list.std(0)),
                ]
            )

            def arr2str(var):
                return '[' + (', '.join(list(map(lambda x: '%.1f' % x, var)))) + ']'

            ret = ',\n    '.join(
                ['%s:%s' % (key, arr2str(val)) for key, val in stat_dict.items()]
            )
            return '{\n    ' + ret + '\n}'

        logger.info('reading image sizes')
        # Image size stats
        img_size_list = ibs.get_image_sizes(valid_gids)
        img_size_stats = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        logger.info('Building Stats String')

    multiton_stats = ut.repr3(
        ut.get_stats(multiton_nid2_nannots, use_median=True), nl=0, precision=2, si=True
    )

    # Time stats
    unixtime_list = valid_images.unixtime2
    # valid_unixtime_list = [time for time in unixtime_list if time != -1]
    # unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.repr3(ut.get_timestats_dict(unixtime_list, full=True), si=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda: 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if max_age is None:
                max_age = min_age
            if min_age is None:
                min_age = max_age
            if max_age is None and min_age is None:
                logger.info('Found UNKNOWN Age: %r, %r' % (min_age, max_age,))
                age_dict['UNKNOWN'] += 1
            elif (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (max_age is None or 36 <= max_age):
                age_dict['Adult'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(
            set(annot_sextext_list) - set(sex_keys)
        )
        sextext2_nAnnots = ut.odict(
            [(key, len(sextext2_aids.get(key, []))) for key in sex_keys]
        )
        # Filter 0's
        sextext2_nAnnots = {
            key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0
        }
        return sextext2_nAnnots

    def get_annot_qual_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        qualtext2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.quality_texts)),
            list(ibs.const.QUALITY_TEXT_TO_INT.keys()),
        )
        return qualtext2_nAnnots

    def get_annot_viewpoint_stats(ibs, aid_list):
        annots = ibs.annots(aid_list)
        viewcode2_nAnnots = ut.order_dict_by(
            ut.map_vals(len, annots.group_items(annots.viewpoint_code)),
            list(ibs.const.VIEW.CODE_TO_INT.keys()) + [None],
        )
        return viewcode2_nAnnots

    if verbose:
        logger.info('Checking Other Annot Stats')

    qualtext2_nAnnots = get_annot_qual_stats(ibs, valid_aids)
    viewcode2_nAnnots = get_annot_viewpoint_stats(ibs, valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        logger.info('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]

    image_contributor_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contributor_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contributor_tag_to_gids = ut.group_items(valid_gids, image_contributor_tags)
    contributor_tag_to_aids = ut.group_items(valid_aids, annot_contributor_tags)

    contributor_tag_to_qualstats = {
        key: get_annot_qual_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }
    contributor_tag_to_viewstats = {
        key: get_annot_viewpoint_stats(ibs, aids)
        for key, aids in six.iteritems(contributor_tag_to_aids)
    }

    contributor_tag_to_nImages = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_gids)
    }
    contributor_tag_to_nAnnots = {
        key: len(val) for key, val in six.iteritems(contributor_tag_to_aids)
    }

    if verbose:
        logger.info('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton = len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_annots)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            logger.info('Checking Disk Space')
        ibsdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            logger.info('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_annots)
            _num_names_total_check = (
                num_names_singleton + num_names_unassociated + num_names_multiton
            )
            _num_annots_total_check = (
                num_unknown_annots + num_singleton_annots + num_multiton_annots
            )
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            # if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(
                ex,
                keys=[
                    '_num_names_total_check',
                    'num_names',
                    '_num_annots_total_check',
                    'num_annots',
                    'num_names_singleton',
                    'num_names_multiton',
                    'num_unknown_annots',
                    'num_multiton_annots',
                    'num_singleton_annots',
                ],
            )
            raise

    # Get contributor statistics
    contributor_rowids = ibs.get_valid_contributor_rowids()
    num_contributors = len(contributor_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.repr2(dict_, si=True)
        return align2(str_)

    header_block_lines = [('+============================')] + (
        [
            ('+ singleton := single sighting'),
            ('+ multiton  := multiple sightings'),
            ('--' * num_tabs),
        ]
        if not short and with_header
        else []
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = (
        [
            ('--' * num_tabs),
            ('DB Bytes: '),
            ('     +- dbdir nBytes:         ' + dbdir_space),
            ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
            ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
            ('     |  |  +-cachedir nBytes: ' + cachedir_space),
        ]
        if with_bytes
        else []
    )

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = (
        [
            ('--' * num_tabs),
            ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
            ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
            ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
        ]
        if not short
        else []
    )

    occurrence_block_lines = (
        [
            ('--' * num_tabs),
            # ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
            # ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
            ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
        ]
        if not short
        else []
    )

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(viewcode2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = (
        [
            '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
            '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
        ]
        if not short and with_agesex
        else []
    )

    contributor_block_lines = (
        [
            '# Images per contributor       = ' + align_dict2(contributor_tag_to_nImages),
            '# Annots per contributor       = ' + align_dict2(contributor_tag_to_nAnnots),
            '# Quality per contributor      = '
            + ut.repr2(contributor_tag_to_qualstats, sorted_=True),
            '# Viewpoint per contributor    = '
            + ut.repr2(contributor_tag_to_viewstats, sorted_=True),
        ]
        if with_contrib
        else []
    )

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None
        if short
        else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        # ('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None
        if short
        else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines
        + bytes_block_lines
        + source_block_lines
        + name_block_lines
        + annot_block_lines
        + annot_per_basic_block_lines
        + occurrence_block_lines
        + annot_per_qualview_block_lines
        + annot_per_agesex_block_lines
        + img_block_lines
        + contributor_block_lines
        + imgsize_stat_lines
        + [('L============================')]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        logger.info(info_str2)
    locals_ = locals()
    return locals_
Ejemplo n.º 22
0
def bigcache_vsone(qreq_, hyper_params):
    """
    Cached output of one-vs-one matches

        >>> from wbia.scripts.script_vsone import *  # NOQA
        >>> self = OneVsOneProblem()
        >>> qreq_ = self.qreq_
        >>> hyper_params = self.hyper_params
    """
    import vtool as vt
    import wbia

    # Get a set of training pairs
    ibs = qreq_.ibs
    cm_list = qreq_.execute()
    infr = wbia.AnnotInference.from_qreq_(qreq_, cm_list, autoinit=True)

    # Per query choose a set of correct, incorrect, and random training pairs
    aid_pairs_ = infr._cm_training_pairs(
        rng=np.random.RandomState(42), **hyper_params.pair_sample
    )

    aid_pairs_ = vt.unique_rows(np.array(aid_pairs_), directed=False).tolist()

    pb_aid_pairs_ = photobomb_samples(ibs)

    # TODO: try to add in more non-comparable samples
    aid_pairs_ = pb_aid_pairs_ + aid_pairs_
    aid_pairs_ = vt.unique_rows(np.array(aid_pairs_))

    # ======================================
    # Compute one-vs-one scores and local_measures
    # ======================================

    # Prepare lazy attributes for annotations
    qreq_ = infr.qreq_
    ibs = qreq_.ibs
    qconfig2_ = qreq_.extern_query_config2
    dconfig2_ = qreq_.extern_data_config2
    qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_)
    dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_)

    # Remove any pairs missing features
    if dannot_cfg == qannot_cfg:
        unique_annots = ibs.annots(np.unique(np.array(aid_pairs_)), config=dannot_cfg)
        bad_aids = unique_annots.compress(~np.array(unique_annots.num_feats) > 0).aids
        bad_aids = set(bad_aids)
    else:
        annots1_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 0)), config=qannot_cfg)
        annots2_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 1)), config=dannot_cfg)
        bad_aids1 = annots1_.compress(~np.array(annots1_.num_feats) > 0).aids
        bad_aids2 = annots2_.compress(~np.array(annots2_.num_feats) > 0).aids
        bad_aids = set(bad_aids1 + bad_aids2)
    subset_idxs = np.where(
        [not (a1 in bad_aids or a2 in bad_aids) for a1, a2 in aid_pairs_]
    )[0]
    # Keep only a random subset
    if hyper_params.subsample:
        rng = np.random.RandomState(3104855634)
        num_max = hyper_params.subsample
        if num_max < len(subset_idxs):
            subset_idxs = rng.choice(subset_idxs, size=num_max, replace=False)
            subset_idxs = sorted(subset_idxs)

    # Take the current selection
    aid_pairs = ut.take(aid_pairs_, subset_idxs)

    if True:
        # NEW WAY
        config = hyper_params.vsone_assign
        # TODO: ensure annot probs like chips and features can be appropriately
        # set via qreq_ config or whatever
        matches = infr.exec_vsone_subset(aid_pairs, config=config)
    else:
        query_aids = ut.take_column(aid_pairs, 0)
        data_aids = ut.take_column(aid_pairs, 1)
        # OLD WAY
        # Determine a unique set of annots per config
        configured_aids = ut.ddict(set)
        configured_aids[qannot_cfg].update(query_aids)
        configured_aids[dannot_cfg].update(data_aids)

        # Make efficient annot-object representation
        configured_obj_annots = {}
        for config, aids in configured_aids.items():
            annots = ibs.annots(sorted(list(aids)), config=config)
            configured_obj_annots[config] = annots

        annots1 = configured_obj_annots[qannot_cfg].loc(query_aids)
        annots2 = configured_obj_annots[dannot_cfg].loc(data_aids)

        # Get hash based on visual annotation appearence of each pair
        # as well as algorithm configurations used to compute those properties
        qvuuids = annots1.visual_uuids
        dvuuids = annots2.visual_uuids
        qcfgstr = annots1._config.get_cfgstr()
        dcfgstr = annots2._config.get_cfgstr()
        annots_cfgstr = ut.hashstr27(qcfgstr) + ut.hashstr27(dcfgstr)
        vsone_uuids = [
            ut.combine_uuids(uuids, salt=annots_cfgstr)
            for uuids in ut.ProgIter(
                zip(qvuuids, dvuuids), length=len(qvuuids), label='hashing ids'
            )
        ]

        # Combine into a big cache for the entire 1-v-1 matching run
        big_uuid = ut.hashstr_arr27(vsone_uuids, '', pathsafe=True)
        cacher = ut.Cacher('vsone_v7', cfgstr=str(big_uuid), appname='vsone_rf_train')

        cached_data = cacher.tryload()
        if cached_data is not None:
            # Caching doesn't work 100% for PairwiseMatch object, so we need to do
            # some postprocessing
            configured_lazy_annots = ut.ddict(dict)
            for config, annots in configured_obj_annots.items():
                annot_dict = configured_lazy_annots[config]
                for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'):
                    annot_dict[_annot.aid] = _annot._make_lazy_dict()

            # Extract pairs of annot objects (with shared caches)
            lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids)
            lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids)

            # Create a set of PairwiseMatches with the correct annot properties
            matches = [
                vt.PairwiseMatch(annot1, annot2)
                for annot1, annot2 in zip(lazy_annots1, lazy_annots2)
            ]

            # Updating a new matches dictionary ensure the annot1/annot2 properties
            # are set correctly
            for key, cached_matches in list(cached_data.items()):
                fixed_matches = [match.copy() for match in matches]
                for fixed, internal in zip(fixed_matches, cached_matches):
                    dict_ = internal.__dict__
                    ut.delete_dict_keys(dict_, ['annot1', 'annot2'])
                    fixed.__dict__.update(dict_)
                cached_data[key] = fixed_matches
        else:
            cached_data = vsone_(
                qreq_,
                query_aids,
                data_aids,
                qannot_cfg,
                dannot_cfg,
                configured_obj_annots,
                hyper_params,
            )
            cacher.save(cached_data)
        # key_ = 'SV_LNBNN'
        key_ = 'RAT_SV'
        # for key in list(cached_data.keys()):
        #     if key != 'SV_LNBNN':
        #         del cached_data[key]
        matches = cached_data[key_]
    return matches, infr
Ejemplo n.º 23
0
def remerge_subset():
    """
    Assumes ibs1 is an updated subset of ibs2.
    Re-merges ibs1 back into ibs2.

    TODO: annotmatch table must have non-directional edges for this to work.
    I.e. u < v

    Ignore:

        # Ensure annotmatch and names are up to date with staging

        # Load graph
        import ibei
        ibs = wbia.opendb('PZ_PB_RF_TRAIN')
        infr = wbia.AnnotInference(aids='all', ibs=ibs, verbose=3)
        infr.reset_feedback('staging', apply=True)
        infr.relabel_using_reviews()

        # Check deltas
        infr.wbia_name_group_delta_info()
        infr.wbia_delta_info()

        # Write if it looks good
        infr.write_wbia_annotmatch_feedback()
        infr.write_wbia_name_assignment()

    Ignore:
        import wbia
        ibs = wbia.opendb('PZ_Master1')
        infr = wbia.AnnotInference(ibs, 'all')
        infr.reset_feedback('annotmatch', apply=True)

    CommandLine:
        python -m wbia.dbio.export_subset remerge_subset
    """
    import wbia

    ibs1 = wbia.opendb('PZ_PB_RF_TRAIN')
    ibs2 = wbia.opendb('PZ_Master1')

    gids1, gids2 = ibs1.images(), ibs2.images()
    idxs1, idxs2 = ut.isect_indices(gids1.uuids, gids2.uuids)
    isect_gids1, isect_gids2 = gids1.take(idxs1), gids2.take(idxs2)

    assert all(
        set.issubset(set(a1), set(a2))
        for a1, a2 in zip(isect_gids1.annot_uuids, isect_gids2.annot_uuids))

    annot_uuids = ut.flatten(isect_gids1.annot_uuids)
    # aids1 = ibs1.annots(ibs1.get_annot_aids_from_uuid(annot_uuids), asarray=True)
    # aids2 = ibs2.annots(ibs2.get_annot_aids_from_uuid(annot_uuids), asarray=True)
    aids1 = ibs1.annots(uuids=annot_uuids, asarray=True)
    aids2 = ibs2.annots(uuids=annot_uuids, asarray=True)
    import numpy as np

    to_aids2 = dict(zip(aids1, aids2))
    # to_aids1 = dict(zip(aids2, aids1))

    # Step 1) Update individual annot properties
    # These annots need updates
    # np.where(aids1.visual_uuids != aids2.visual_uuids)
    # np.where(aids1.semantic_uuids != aids2.semantic_uuids)

    annot_unary_props = [
        # 'yaws', 'bboxes', 'thetas', 'qual', 'species', 'unary_tags']
        'yaws',
        'bboxes',
        'thetas',
        'qual',
        'species',
        'case_tags',
        'multiple',
        'age_months_est_max',
        'age_months_est_min',  # 'sex_texts'
    ]
    to_change = {}
    for key in annot_unary_props:
        prop1 = getattr(aids1, key)
        prop2 = getattr(aids2, key)
        diff_idxs = set(np.where(prop1 != prop2)[0])
        if diff_idxs:
            diff_prop1 = ut.take(prop1, diff_idxs)
            diff_prop2 = ut.take(prop2, diff_idxs)
            logger.info('key = %r' % (key, ))
            logger.info('diff_prop1 = %r' % (diff_prop1, ))
            logger.info('diff_prop2 = %r' % (diff_prop2, ))
            to_change[key] = diff_idxs
    if to_change:
        changed_idxs = ut.unique(ut.flatten(to_change.values()))
        logger.info('Found %d annots that need updated properties' %
                    len(changed_idxs))
        logger.info('changing unary attributes: %r' % (to_change, ))
        if False and ut.are_you_sure('apply change'):
            for key, idxs in to_change.items():
                subaids1 = aids1.take(idxs)
                subaids2 = aids2.take(idxs)
                prop1 = getattr(subaids1, key)
                # prop2 = getattr(subaids2, key)
                setattr(subaids2, key, prop1)
    else:
        logger.info('Annot properties are in sync. Nothing to change')

    # Step 2) Update annotmatch - pairwise relationships
    infr1 = wbia.AnnotInference(aids=aids1.aids,
                                ibs=ibs1,
                                verbose=3,
                                autoinit=False)

    # infr2 = wbia.AnnotInference(aids=ibs2.annots().aids, ibs=ibs2, verbose=3)
    aids2 = ibs2.get_valid_aids(is_known=True)
    infr2 = wbia.AnnotInference(aids=aids2, ibs=ibs2, verbose=3)
    infr2.reset_feedback('annotmatch', apply=True)

    # map feedback from ibs1 onto ibs2 using ibs2 aids.
    fb1 = infr1.read_wbia_annotmatch_feedback()
    fb1_t = {(to_aids2[u], to_aids2[v]): val for (u, v), val in fb1.items()}
    fb1_df_t = infr2._pandas_feedback_format(fb1_t).drop('am_rowid', axis=1)

    # Add transformed feedback into ibs2
    infr2.add_feedback_from(fb1_df_t)

    # Now ensure that dummy connectivity exists to preserve origninal names
    # from wbia.algo.graph import nx_utils
    # for (u, v) in infr2.find_mst_edges('name_label'):
    #     infr2.draw_aids((u, v))
    #     cc1 = infr2.pos_graph.connected_to(u)
    #     cc2 = infr2.pos_graph.connected_to(v)
    #     logger.info(nx_utils.edges_cross(infr2.graph, cc1, cc2))
    #     infr2.neg_redundancy(cc1, cc2)
    #     infr2.pos_redundancy(cc2)

    infr2.relabel_using_reviews(rectify=True)
    infr2.apply_nondynamic_update()

    if False:
        infr2.wbia_delta_info()
        infr2.wbia_name_group_delta_info()

    if len(list(infr2.inconsistent_components())) > 0:
        raise NotImplementedError('need to fix inconsistencies first')
        # Make it so it just loops until inconsistencies are resolved
        infr2.prioritize()
        infr2.qt_review_loop()
    else:
        infr2.write_wbia_staging_feedback()
        infr2.write_wbia_annotmatch_feedback()
        infr2.write_wbia_name_assignment()

    # if False:
    #     # Fix any inconsistency
    #     infr2.start_qt_interface(loop=False)
    #     test_nodes = [5344, 5430, 5349, 5334, 5383, 2280, 2265, 2234, 5399,
    #                   5338, 2654]
    #     import networkx as nx
    #     nx.is_connected(infr2.graph.subgraph(test_nodes))
    #     # infr = wbia.AnnotInference(aids=test_nodes, ibs=ibs2, verbose=5)

    #     # randomly sample some new labels to verify
    #     import wbia.guitool as gt
    #     from wbia.gui import inspect_gui
    #     gt.ensure_qapp()
    #     ut.qtensure()
    #     old_groups = ut.group_items(name_delta.index.tolist(), name_delta['old_name'])
    #     del old_groups['____']

    #     new_groups = ut.group_items(name_delta.index.tolist(), name_delta['new_name'])

    #     from wbia.algo.hots import simulate
    #     c = simulate.compare_groups(
    #         list(new_groups.values()),
    #         list(old_groups.values()),
    #     )
    #     ut.map_vals(len, c)
    #     for aids in c['pred_splits']:
    #         old_nids = ibs2.get_annot_nids(aids)
    #         new_nids = ut.take_column(infr2.gen_node_attrs('name_label', aids), 1)
    #         split_aids = ut.take_column(ut.group_items(aids, new_nids).values(), 0)
    #         aid1, aid2 = split_aids[0:2]

    #         if False:
    #             inspect_gui.show_vsone_tuner(ibs2, aid1, aid2)
    #     infr2.start_qt_interface(loop=False)

    # if False:
    #     # import wbia
    #     ibs1 = wbia.opendb('PZ_PB_RF_TRAIN')
    #     infr1 = wbia.AnnotInference(aids='all', ibs=ibs1, verbose=3)
    #     infr1.initialize_graph()
    #     # infr1.reset_feedback('staging')
    #     infr1.reset_feedback('annotmatch')
    #     infr1.apply_feedback_edges()
    #     infr1.relabel_using_reviews()
    #     infr1.apply_review_inference()
    #     infr1.start_qt_interface(loop=False)
    # delta = infr2.match_state_delta()
    # logger.info('delta = %r' % (delta,))

    # infr2.ensure_mst()
    # infr2.relabel_using_reviews()
    # infr2.apply_review_inference()

    # mst_edges = infr2.find_mst_edges()
    # set(infr2.graph.edges()).intersection(mst_edges)

    return
    """
Ejemplo n.º 24
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m ibeis.other.dbinfo split_analysis --show
        python -m ibeis split_analysis --show
        python -m ibeis split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    #nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime
    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        #'view': ['right'],
        #'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        })
    )
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        })
    )
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    print('%d annots on day 1' % (len(aids1)) )
    print('%d annots on day 2' % (len(aids2)) )
    print('%d annots overall' % (len(all_annots)) )
    print('%d names overall' % (len(ut.unique(all_annots.nids))) )

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool_ibeis as vt
    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    print('#nan_idx = %r' % (len(nan_idx),))
    print('#inf_idx = %r' % (len(inf_idx),))
    print('#ok_idx = %r' % (len(ok_idx),))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    #idx = vt.find_elbow_point(sorted_speeds)
    #EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    #ZEBRA_SPEED_MAX  = 64  # km/h
    #ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN  = 20  # km/h
    #ZEBRA_SPEED_FAST_WALK = 10  # km/h
    #ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    #MAX_SPEED = ZEBRA_SPEED_WALK
    #MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    print('MAX_SPEED = %r km/h' % (MAX_SPEED,))
    print('%d annots with infinite speed' % (len(inf_annots),))
    print('%d annots with large speed' % (len(flagged_ok_annots),))
    print('Marking all pairs of annots above the threshold as non-matching')

    from ibeis.algo.graph import graph_iden
    import networkx as nx
    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]
        good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),))
    print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),))

    if 1:
        from ibeis.viz import viz_graph2
        import guitool_ibeis as gt
        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            print('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            print('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            #am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                          for tag in positive_tags]
            print('edge_case_hist: ' + ut.repr3(
                ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),))
            print('--- Sampling wrt edges ---')
            print('edge_sample_size  = %r' % (edge_sample_size,))
            print('edge_population_size = %r' % (len(aid_pairs),))
            print('num_positive_edges = %r' % (sum(is_positive)))
            print('--- Sampling wrt names ---')
            print('name_population_size = %r' % (pop,))
            vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95)

        nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    #num_positive = 0
    #for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    #print('num_positive = %r' % (num_positive,))
    #pop = len(infr_list)
    #print('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges})
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges})

    #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                ('frac_nonmatch_edges',  state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    print('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    print('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (
            key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges'))
    num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3
              for sizes in new_sizes_list]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    print('#reasonable_infr = %r' % (len(reasonable_infr),))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    #import scipy.stats as st
    #conf_interval = .95
    #st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    #zval = 1.96  # 95 percent confidence
    #zValC = 3.8416  #
    #zValC = 6.6564

    #import statsmodels.stats.api as sms
    #es = sms.proportion_effectsize(0.5, 0.75)
    #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = .95
    #conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level)
    print('---')

    vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95)

    pop = 279
    #err_frac = .05  # 5%
    err_frac = .10  # 10%
    conf_level = .95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2)
    vt.calc_sample_from_error_bars(.10, pop, conf_level=.68)

    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
Ejemplo n.º 25
0
def estimate_twoday_count(ibs, day1, day2, filter_kw):
    #gid_list = ibs.get_valid_gids()
    all_images = ibs.images()
    dates = [dt.date() for dt in all_images.datetime]
    date_to_images = all_images.group_items(dates)
    date_to_images = ut.sort_dict(date_to_images)
    #date_hist = ut.map_dict_vals(len, date2_gids)
    #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),))
    verbose = 0

    visit_dates = [day1, day2]
    visit_info_list_ = []
    for day in visit_dates:
        images = date_to_images[day]
        aids = ut.flatten(images.aids)
        aids = ibs.filter_annots_general(aids, filter_kw=filter_kw,
                                         verbose=verbose)
        nids = ibs.get_annot_name_rowids(aids)
        grouped_aids = ut.group_items(aids, nids)
        unique_nids = ut.unique(list(grouped_aids.keys()))

        if False:
            aids_list = ut.take(grouped_aids, unique_nids)
            for aids in aids_list:
                if len(aids) > 30:
                    break
            timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list)
            # Do the five second rule
            marked_thresh = 5
            flags = []
            for nid, timedeltas in zip(unique_nids, timedeltas_list):
                flags.append(timedeltas.max() > marked_thresh)
            print('Unmarking %d names' % (len(flags) - sum(flags)))
            unique_nids = ut.compress(unique_nids, flags)
            grouped_aids = ut.dict_subset(grouped_aids, unique_nids)

        unique_aids = ut.flatten(list(grouped_aids.values()))
        info = {
            'unique_nids': unique_nids,
            'grouped_aids': grouped_aids,
            'unique_aids': unique_aids,
        }
        visit_info_list_.append(info)

    # Estimate statistics
    from ibeis.other import dbinfo
    aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids')
    nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids')
    resight_nids = ut.isect(nids_day1, nids_day2)
    nsight1 = len(nids_day1)
    nsight2 = len(nids_day2)
    resight = len(resight_nids)
    lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight)

    if False:
        from ibeis.other import dbinfo
        print('DAY 1 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1)  # NOQA
        print('DAY 2 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2)  # NOQA
        print('COMBINED STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2)  # NOQA

    print('%d annots on day 1' % (len(aids_day1)) )
    print('%d annots on day 2' % (len(aids_day2)) )
    print('%d names on day 1' % (nsight1,))
    print('%d names on day 2' % (nsight2,))
    print('resight = %r' % (resight,))
    print('lp_index = %r ± %r' % (lp_index, lp_error))
    return nsight1, nsight2, resight, lp_index, lp_error
Ejemplo n.º 26
0
def ggr_random_name_splits():
    """
    CommandLine:
        python -m wbia.viz.viz_graph2 ggr_random_name_splits --show

    Ignore:
        sshfs -o idmap=user lev:/ ~/lev

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.viz.viz_graph2 import *  # NOQA
        >>> ggr_random_name_splits()
    """
    import wbia.guitool as gt

    gt.ensure_qtapp()
    # nid_list = ibs.get_valid_nids(filter_empty=True)
    import wbia

    dbdir = '/media/danger/GGR/GGR-IBEIS'
    dbdir = (dbdir if ut.checkpath(dbdir) else
             ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS'))
    ibs = wbia.opendb(dbdir=dbdir, allow_newdir=False)

    import datetime

    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    orig_filter_kw = {
        'multiple': None,
        # 'view': ['right'],
        # 'minqual': 'good',
        'is_known': True,
        'min_pername': 2,
    }
    orig_aids = ibs.filter_annots_general(filter_kw=ut.dict_union(
        orig_filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    ))
    orig_all_annots = ibs.annots(orig_aids)
    orig_unique_nids, orig_grouped_annots_ = orig_all_annots.group(
        orig_all_annots.nids)
    # Ensure we get everything
    orig_grouped_annots = [
        ibs.annots(aids_) for aids_ in ibs.get_name_aids(orig_unique_nids)
    ]

    # pip install quantumrandom
    if False:
        import quantumrandom

        data = quantumrandom.uint16()
        seed = data.sum()
        print('seed = %r' % (seed, ))
        # import Crypto.Random
        # from Crypto import Random
        # quantumrandom.get_data()
        # StrongRandom = Crypto.Random.random.StrongRandom
        # aes.reseed(3340258)
        # chars = [str(chr(x)) for x in data.view(np.uint8)]
        # aes_seed = str('').join(chars)
        # aes = Crypto.Random.Fortuna.FortunaGenerator.AESGenerator()
        # aes.reseed(aes_seed)
        # aes.pseudo_random_data(10)

    orig_rand_idxs = ut.random_indexes(len(orig_grouped_annots), seed=3340258)
    orig_sample_size = 75
    random_annot_groups = ut.take(orig_grouped_annots, orig_rand_idxs)
    orig_annot_sample = random_annot_groups[:orig_sample_size]

    # OOOPS MADE ERROR REDO ----

    filter_kw = {
        'multiple': None,
        'view': ['right'],
        'minqual': 'good',
        'is_known': True,
        'min_pername': 2,
    }
    filter_kw_ = ut.dict_union(
        filter_kw,
        {
            'min_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime':
            ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        },
    )
    refiltered_sample = [
        ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_)
        for annot in orig_annot_sample
    ]
    is_ok = np.array(ut.lmap(len, refiltered_sample)) >= 2
    ok_part_orig_sample = ut.compress(orig_annot_sample, is_ok)
    ok_part_orig_nids = [x.nids[0] for x in ok_part_orig_sample]

    # Now compute real sample
    aids = ibs.filter_annots_general(filter_kw=filter_kw_)
    all_annots = ibs.annots(aids)
    unique_nids, grouped_annots_ = all_annots.group(all_annots.nids)
    grouped_annots = grouped_annots_
    # Ensure we get everything
    # grouped_annots = [ibs.annots(aids_) for aids_ in ibs.get_name_aids(unique_nids)]

    pop = len(grouped_annots)
    pername_list = ut.lmap(len, grouped_annots)
    groups = wbia.annots.AnnotGroups(grouped_annots, ibs)
    match_tags = [ut.unique(ut.flatten(t)) for t in groups.match_tags]
    tag_case_hist = ut.dict_hist(ut.flatten(match_tags))
    print('name_pop = %r' % (pop, ))
    print('Annots per Multiton Name' +
          ut.repr3(ut.get_stats(pername_list, use_median=True)))
    print('Name Tag Hist ' + ut.repr3(tag_case_hist))
    print('Percent Photobomb: %.2f%%' %
          (tag_case_hist['photobomb'] / pop * 100))
    print('Percent Split: %.2f%%' % (tag_case_hist['splitcase'] / pop * 100))

    # Remove the ok part from this sample
    remain_unique_nids = ut.setdiff(unique_nids, ok_part_orig_nids)
    remain_grouped_annots = [
        ibs.annots(aids_) for aids_ in ibs.get_name_aids(remain_unique_nids)
    ]

    sample_size = 75
    import vtool as vt

    vt.calc_sample_from_error_bars(0.05, pop, conf_level=0.95, prior=0.05)

    remain_rand_idxs = ut.random_indexes(len(remain_grouped_annots),
                                         seed=3340258)
    remain_sample_size = sample_size - len(ok_part_orig_nids)
    remain_random_annot_groups = ut.take(remain_grouped_annots,
                                         remain_rand_idxs)
    remain_annot_sample = remain_random_annot_groups[:remain_sample_size]

    annot_sample_nofilter = ok_part_orig_sample + remain_annot_sample
    # Filter out all bad parts
    annot_sample_filter = [
        ibs.annots(ibs.filter_annots_general(annot.aids, filter_kw=filter_kw_))
        for annot in annot_sample_nofilter
    ]
    annot_sample = annot_sample_filter

    win = None
    from wbia.viz import viz_graph2

    for annots in ut.InteractiveIter(annot_sample):
        if win is not None:
            win.close()
        win = viz_graph2.make_qt_graph_interface(ibs,
                                                 aids=annots.aids,
                                                 init_mode='rereview')
        print(win)

    sample_groups = wbia.annots.AnnotGroups(annot_sample, ibs)

    flat_tags = [ut.unique(ut.flatten(t)) for t in sample_groups.match_tags]

    print('Using Split and Photobomb')
    is_positive = ['photobomb' in t or 'splitcase' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)

    print('Only Photobomb')
    is_positive = ['photobomb' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)

    print('Only SplitCase')
    is_positive = ['splitcase' in t for t in flat_tags]
    num_positive = sum(is_positive)
    vt.calc_error_bars_from_sample(sample_size,
                                   num_positive,
                                   pop,
                                   conf_level=0.95)
Ejemplo n.º 27
0
def dans_splits(ibs):
    """
    python -m ibeis dans_splits --show

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = dans_splits(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
    """
    #pair = 9262, 932

    dans_aids = [26548, 2190, 9418, 29965, 14738, 26600, 3039, 2742, 8249,
                 20154, 8572, 4504, 34941, 4040, 7436, 31866, 28291,
                 16009, 7378, 14453, 2590, 2738, 22442, 26483, 21640, 19003,
                 13630, 25395, 20015, 14948, 21429, 19740, 7908, 23583, 14301,
                 26912, 30613, 19719, 21887, 8838, 16184, 9181, 8649, 8276,
                 14678, 21950, 4925, 13766, 12673, 8417, 2018, 22434, 21149,
                 14884, 5596, 8276, 14650, 1355, 21725, 21889, 26376, 2867,
                 6906, 4890, 21524, 6690, 14738, 1823, 35525, 9045, 31723,
                 2406, 5298, 15627, 31933, 19535, 9137, 21002, 2448,
                 32454, 12615, 31755, 20015, 24573, 32001, 23637, 3192, 3197,
                 8702, 1240, 5596, 33473, 23874, 9558, 9245, 23570, 33075,
                 23721,  24012, 33405, 23791, 19498, 33149, 9558, 4971,
                 34183, 24853, 9321, 23691, 9723, 9236, 9723,  21078,
                 32300, 8700, 15334, 6050, 23277, 31164, 14103,
                 21231, 8007, 10388, 33387, 4319, 26880, 8007, 31164,
                 32300, 32140]

    is_hyrbid = [7123, 7166, 7157, 7158, ]  # NOQA
    needs_mask = [26836, 29742]  # NOQA
    justfine = [19862]  # NOQA

    annots = ibs.annots(dans_aids)
    unique_nids = ut.unique(annots.nids)
    grouped_aids = ibs.get_name_aids(unique_nids)
    annot_groups = ibs._annot_groups(grouped_aids)

    split_props = {'splitcase', 'photobomb'}
    needs_tag = [len(split_props.intersection(ut.flatten(tags))) == 0 for tags in annot_groups.match_tags]
    num_needs_tag = sum(needs_tag)
    num_had_split = len(needs_tag) - num_needs_tag
    print('num_had_split = %r' % (num_had_split,))
    print('num_needs_tag = %r' % (num_needs_tag,))

    #all_annot_groups = ibs._annot_groups(ibs.group_annots_by_name(ibs.get_valid_aids())[0])
    #all_has_split = [len(split_props.intersection(ut.flatten(tags))) > 0 for tags in all_annot_groups.match_tags]
    #num_nondan = sum(all_has_split) - num_had_split
    #print('num_nondan = %r' % (num_nondan,))

    from ibeis.algo.graph import graph_iden
    from ibeis.viz import viz_graph2
    import guitool_ibeis as gt
    import plottool_ibeis as pt
    pt.qt4ensure()
    gt.ensure_qtapp()

    aids_list = ut.compress(grouped_aids, needs_tag)
    aids_list = [a for a in aids_list if len(a) > 1]
    print('len(aids_list) = %r' % (len(aids_list),))

    for aids in aids_list:
        infr = graph_iden.AnnotInference(ibs, aids)
        infr.initialize_graph()
        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode='rereview')
        win.populate_edge_model()
        win.show()
        return win
    assert False
Ejemplo n.º 28
0
def find_consistent_labeling_old(grouped_oldnames,
                                 extra_prefix='_extra_name',
                                 verbose=False):
    import numpy as np
    import scipy.optimize

    unique_old_names = ut.unique(ut.flatten(grouped_oldnames))

    # TODO: find names that are only used once, and just ignore those for
    # optimization.
    # unique_set = set(unique_old_names)
    oldname_sets = list(map(set, grouped_oldnames))
    usage_hist = ut.dict_hist(ut.flatten(oldname_sets))
    conflicts = {k for k, v in usage_hist.items() if v > 1}
    # nonconflicts = {k for k, v in usage_hist.items() if v == 1}

    conflict_groups = []
    orig_idxs = []
    assignment = [None] * len(grouped_oldnames)
    ntrivial = 0
    for idx, group in enumerate(grouped_oldnames):
        if set(group).intersection(conflicts):
            orig_idxs.append(idx)
            conflict_groups.append(group)
        else:
            ntrivial += 1
            if len(group) > 0:
                h = ut.dict_hist(group)
                hitems = list(h.items())
                hvals = [i[1] for i in hitems]
                maxval = max(hvals)
                g = min([k for k, v in hitems if v == maxval])
                assignment[idx] = g
            else:
                assignment[idx] = None

    if verbose:
        print('rectify %d non-trivial groups' % (len(conflict_groups), ))
        print('rectify %d trivial groups' % (ntrivial, ))

    num_extra = 0

    if len(conflict_groups) > 0:
        grouped_oldnames_ = conflict_groups
        unique_old_names = ut.unique(ut.flatten(grouped_oldnames_))
        num_new_names = len(grouped_oldnames_)
        num_old_names = len(unique_old_names)
        extra_oldnames = []

        # Create padded dummy values.  This accounts for the case where it is
        # impossible to uniquely map to the old db
        num_extra = num_new_names - num_old_names
        if num_extra > 0:
            extra_oldnames = [
                '%s%d' % (
                    extra_prefix,
                    count,
                ) for count in range(num_extra)
            ]
        elif num_extra < 0:
            pass
        else:
            extra_oldnames = []
        assignable_names = unique_old_names + extra_oldnames

        total = len(assignable_names)

        # Allocate assignment matrix
        # Start with a large negative value indicating
        # that you must select from your assignments only
        profit_matrix = -np.ones((total, total), dtype=np.int) * (2 * total)
        # Populate assignment profit matrix
        oldname2_idx = ut.make_index_lookup(assignable_names)
        name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames_]
        # Initialize base profit for using a previously used name
        for rowx, name_freq in enumerate(name_freq_list):
            for name, freq in name_freq.items():
                colx = oldname2_idx[name]
                profit_matrix[rowx, colx] = 1
        # Now add in the real profit
        for rowx, name_freq in enumerate(name_freq_list):
            for name, freq in name_freq.items():
                colx = oldname2_idx[name]
                profit_matrix[rowx, colx] += freq
        # Set a small profit for using an extra name
        extra_colxs = ut.take(oldname2_idx, extra_oldnames)
        profit_matrix[:, extra_colxs] = 1

        # Convert to minimization problem
        big_value = (profit_matrix.max()) - (profit_matrix.min())
        cost_matrix = big_value - profit_matrix

        # Don't use munkres, it is pure python and very slow. Use scipy instead
        indexes = list(zip(*scipy.optimize.linear_sum_assignment(cost_matrix)))

        # Map output to be aligned with input
        rx2_cx = dict(indexes)
        assignment_ = [
            assignable_names[rx2_cx[rx]] for rx in range(num_new_names)
        ]

        # Reintegrate trivial values
        for idx, g in zip(orig_idxs, assignment_):
            assignment[idx] = g

    for idx, val in enumerate(assignment):
        if val is None:
            assignment[idx] = '%s%d' % (
                extra_prefix,
                num_extra,
            )
            num_extra += 1
    return assignment
Ejemplo n.º 29
0
def make_name_graph_interaction(ibs,
                                nids=None,
                                aids=None,
                                selected_aids=[],
                                with_all=True,
                                invis_edges=None,
                                ensure_edges=None,
                                use_image=False,
                                temp_nids=None,
                                **kwargs):
    """
    CommandLine:
        python -m ibeis --tf make_name_graph_interaction --db PZ_MTEST \
            --aids=1,2,3,4,5,6,7,8,9 --show

        python -m ibeis --tf make_name_graph_interaction --db LEWA_splits \
                --nids=1 --show --split

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.viz.viz_graph import *  # NOQA
        >>> import ibeis
        >>> import plottool as pt
        >>> exec(ut.execstr_funckw(make_name_graph_interaction), globals())
        >>> defaultdb='testdb1'
        >>> ibs = ibeis.opendb(defaultdb=defaultdb)
        >>> aids = ut.get_argval('--aids', type_=list, default=None)
        >>> nids = ut.get_argval('--nids', type_=list, default=ibs.get_valid_nids()[0:5])
        >>> nids = None if aids is not None else nids
        >>> with_all = not ut.get_argflag('--no-with-all')
        >>> make_name_graph_interaction(ibs, nids, aids, with_all=with_all)
        >>> #pt.zoom_factory()
        >>> ut.show_if_requested()
    """
    if aids is None and nids is not None:
        aids = ut.flatten(ibs.get_name_aids(nids))
    elif nids is not None and aids is not None:
        aids += ibs.get_name_aids(nids)
        aids = ut.unique(aids)

    if with_all:
        nids = ut.unique(ibs.get_annot_name_rowids(aids))
        aids = ut.flatten(ibs.get_name_aids(nids))

    #aids = aids[0:10]

    nids = ibs.get_annot_name_rowids(aids)
    #from ibeis.algo.graph import graph_iden
    #infr = graph_iden.AnnotInference(aids, nids, temp_nids)  # NOQA
    #import utool
    #utool.embed()

    from ibeis.algo.graph import graph_iden
    infr = graph_iden.AnnotInference(ibs, aids, nids, temp_nids)
    infr.initialize_graph()
    #infr.apply_scores()
    #infr.apply_weights()
    if ut.get_argflag('--cut'):
        infr.apply_all()

    #import guitool as gt
    #gt.ensure_qtapp()
    #print('infr = %r' % (infr,))
    #win = test_qt_graphs(infr=infr, use_image=use_image)
    #self = win
    #gt.qtapp_loop(qwin=win, freq=10)

    self = AnnotGraphInteraction(infr,
                                 selected_aids=selected_aids,
                                 use_image=use_image)
    self.show_page()
    self.show()
    return self
Ejemplo n.º 30
0
def find_consistent_labeling(grouped_oldnames,
                             extra_prefix='_extra_name',
                             verbose=False):
    r"""
    Solves a a maximum bipirtite matching problem to find a consistent
    name assignment that minimizes the number of annotations with different
    names. For each new grouping of annotations we assign

    For each group of annotations we must assign them all the same name, either from




    To reduce the running time

    Args:
        gropued_oldnames (list): A group of old names where the grouping is
            based on new names. For instance:

                Given:
                    aids      = [1, 2, 3, 4, 5]
                    old_names = [0, 1, 1, 1, 0]
                    new_names = [0, 0, 1, 1, 0]

                The grouping is
                    [[0, 1, 0], [1, 1]]

                This lets us keep the old names in a split case and
                re-use exising names and make minimal changes to
                current annotation names while still being consistent
                with the new and improved grouping.

                The output will be:
                    [0, 1]

                Meaning that all annots in the first group are assigned the
                name 0 and all annots in the second group are assigned the name
                1.

    References:
        http://stackoverflow.com/questions/1398822/assignment-problem-numpy

    CommandLine:
        python -m ibeis.scripts.name_recitifer find_consistent_labeling


    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = testdata_oldnames(25, 15,  5, n_per_incon=5)
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)
        >>> grouped_oldnames = testdata_oldnames(0, 15,  5, n_per_incon=1)
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)
        >>> grouped_oldnames = testdata_oldnames(0, 0, 0, n_per_incon=1)
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> ydata = []
        >>> xdata = list(range(10, 150, 50))
        >>> for x in xdata:
        >>>     print('x = %r' % (x,))
        >>>     grouped_oldnames = testdata_oldnames(x, 15,  5, n_per_incon=5)
        >>>     t = ut.Timerit(3, verbose=1)
        >>>     for timer in t:
        >>>         with timer:
        >>>             new_names = find_consistent_labeling(grouped_oldnames)
        >>>     ydata.append(t.ave_secs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> pt.qtensure()
        >>> pt.multi_plot(xdata, [ydata])
        >>> ut.show_if_requested()

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']]
        >>> new_names = find_consistent_labeling(grouped_oldnames, verbose=1)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['a', 'b', 'e']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['b', 'a', '_extra_name0']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b'], ['e'], ['a', 'a', 'b'], [], ['a'], ['d']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['b', 'e', 'a', '_extra_name0', '_extra_name1', 'd']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [[], ['a', 'a'], [],
        >>>                     ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['_extra_name0', 'a', '_extra_name1', 'b', '_extra_name2']
    """
    unique_old_names = ut.unique(ut.flatten(grouped_oldnames))
    n_old_names = len(unique_old_names)
    n_new_names = len(grouped_oldnames)

    # Initialize assignment to all Nones
    assignment = [None for _ in range(n_new_names)]

    if verbose:
        print('finding maximally consistent labeling')
        print('n_old_names = %r' % (n_old_names, ))
        print('n_new_names = %r' % (n_new_names, ))

    # For each old_name, determine now many new_names use it.
    oldname_sets = list(map(set, grouped_oldnames))
    oldname_usage = ut.dict_hist(ut.flatten(oldname_sets))

    # Any name used more than once is a conflict and must be resolved
    conflict_oldnames = {k for k, v in oldname_usage.items() if v > 1}

    # Partition into trivial and non-trivial cases
    nontrivial_oldnames = []
    nontrivial_new_idxs = []

    trivial_oldnames = []
    trivial_new_idxs = []
    for new_idx, group in enumerate(grouped_oldnames):
        if set(group).intersection(conflict_oldnames):
            nontrivial_oldnames.append(group)
            nontrivial_new_idxs.append(new_idx)
        else:
            trivial_oldnames.append(group)
            trivial_new_idxs.append(new_idx)

    # Rectify trivial cases
    # Any new-name that does not share any of its old-names with other
    # new-names can be resolved trivially
    n_trivial_unchanged = 0
    n_trivial_ignored = 0
    n_trivial_merges = 0
    for group, new_idx in zip(trivial_oldnames, trivial_new_idxs):
        if len(group) > 0:
            # new-names that use more than one old-name are simple merges
            h = ut.dict_hist(group)
            if len(h) > 1:
                n_trivial_merges += 1
            else:
                n_trivial_unchanged += 1
            hitems = list(h.items())
            hvals = [i[1] for i in hitems]
            maxval = max(hvals)
            g = min([k for k, v in hitems if v == maxval])
            assignment[new_idx] = g
        else:
            # new-names that use no old-names can be ignored
            n_trivial_ignored += 1

    if verbose:
        n_trivial = len(trivial_oldnames)
        n_nontrivial = len(nontrivial_oldnames)
        print('rectify %d trivial groups' % (n_trivial, ))
        print('  * n_trivial_unchanged = %r' % (n_trivial_unchanged, ))
        print('  * n_trivial_merges = %r' % (n_trivial_merges, ))
        print('  * n_trivial_ignored = %r' % (n_trivial_ignored, ))
        print('rectify %d non-trivial groups' % (n_nontrivial, ))

    # Partition nontrivial_oldnames into smaller disjoint sets
    nontrivial_oldnames_sets = list(map(set, nontrivial_oldnames))
    import networkx as nx
    g = nx.Graph()
    g.add_nodes_from(range(len(nontrivial_oldnames_sets)))
    for u, group1 in enumerate(nontrivial_oldnames_sets):
        rest = nontrivial_oldnames_sets[u + 1:]
        for v, group2 in enumerate(rest, start=u + 1):
            if group1.intersection(group2):
                g.add_edge(u, v)
    nontrivial_partition = list(nx.connected_components(g))
    if verbose:
        print('  * partitioned non-trivial into %d subgroups' %
              (len(nontrivial_partition)))
        part_size_stats = ut.get_stats(map(len, nontrivial_partition))
        stats_str = ut.repr2(part_size_stats, precision=2, strkeys=True)
        print('  * partition size stats = %s' % (stats_str, ))

    # Rectify nontrivial cases
    for part_idxs in ut.ProgIter(nontrivial_partition,
                                 labels='rectify parts',
                                 enabled=verbose):
        part_oldnames = ut.take(nontrivial_oldnames, part_idxs)
        part_newidxs = ut.take(nontrivial_new_idxs, part_idxs)
        # Rectify this part
        assignment_ = simple_munkres(part_oldnames)
        for new_idx, new_name in zip(part_newidxs, assignment_):
            assignment[new_idx] = new_name

    # Any unassigned name is now given a new unique label with a prefix
    if extra_prefix is not None:
        num_extra = 0
        for idx, val in enumerate(assignment):
            if val is None:
                assignment[idx] = '%s%d' % (
                    extra_prefix,
                    num_extra,
                )
                num_extra += 1
    return assignment
Ejemplo n.º 31
0
def find_consistent_labeling(grouped_oldnames):
    """
    Solves a a maximum bipirtite matching problem to find a consistent
    name assignment.

    Notes:
        # Install module containing the Hungarian algorithm for matching
        pip install munkres

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> print(new_names)
        [u'b', u'c', u'a']

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> print(new_names)
        [u'a', u'b', u'e']

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']]
        >>> new_names = find_consistent_labeling(grouped_oldnames)
        >>> print(new_names)
        [u'a', u'b', u'e']
    """
    import numpy as np
    try:
        import munkres
    except ImportError:
        print('Need to install Hungrian algorithm bipartite matching solver.')
        print('Run:')
        print('pip install munkres')
        raise
    unique_old_names = ut.unique(ut.flatten(grouped_oldnames))
    num_new_names = len(grouped_oldnames)
    num_old_names = len(unique_old_names)
    extra_oldnames = []

    # Create padded dummy values.  This accounts for the case where it is
    # impossible to uniquely map to the old db
    num_extra = num_new_names - num_old_names
    if num_extra > 0:
        extra_oldnames = ['_extra_name%d' % (count,) for count in
                          range(num_extra)]
    elif num_extra < 0:
        pass
    else:
        extra_oldnames = []
    assignable_names = unique_old_names + extra_oldnames

    total = len(assignable_names)

    # Allocate assignment matrix
    profit_matrix = np.zeros((total, total), dtype=np.int)
    # Populate assignment profit matrix
    oldname2_idx = ut.make_index_lookup(assignable_names)
    name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames]
    for rowx, name_freq in enumerate(name_freq_list):
        for name, freq in name_freq.items():
            colx = oldname2_idx[name]
            profit_matrix[rowx, colx] += freq
    # Add extra profit for using a previously used name
    profit_matrix[profit_matrix > 0] += 2
    # Add small profit for using an extra name
    extra_colxs = ut.take(oldname2_idx, extra_oldnames)
    profit_matrix[:, extra_colxs] += 1

    # Convert to minimization problem
    big_value = (profit_matrix.max())
    cost_matrix = big_value - profit_matrix
    m = munkres.Munkres()
    indexes = m.compute(cost_matrix)

    # Map output to be aligned with input
    rx2_cx = dict(indexes)
    assignment = [assignable_names[rx2_cx[rx]]
                  for rx in range(num_new_names)]
    return assignment
Ejemplo n.º 32
0
    def make_graph(infr, show=False):
        import networkx as nx
        import itertools
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        thresh = infr.choose_thresh()

        # Simply cut any edge with a weight less than a threshold
        qaid_list = [cm.qaid for cm in cm_list]
        postcut = prob_names > thresh
        qxs, nxs = np.where(postcut)
        if False:
            kw = dict(precision=2, max_line_width=140, suppress_small=True)
            print(
                ut.hz_str('prob_names = ', ut.array2string2((prob_names),
                                                            **kw)))
            print(
                ut.hz_str('postcut = ',
                          ut.array2string2((postcut).astype(np.int), **kw)))
        matching_qaids = ut.take(qaid_list, qxs)
        matched_nids = ut.take(unique_nids, nxs)

        qreq_ = infr.qreq_

        nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist())
        if not hasattr(qreq_, 'dnids'):
            qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids)
            qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids)
        dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids)
        grouped_aids = dnid2_daids.values()
        matched_daids = ut.take(dnid2_daids, matched_nids)
        name_cliques = [
            list(itertools.combinations(aids, 2)) for aids in grouped_aids
        ]
        aid_matches = [
            list(ut.product([qaid], daids))
            for qaid, daids in zip(matching_qaids, matched_daids)
        ]

        graph = nx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(ut.flatten(name_cliques))
        graph.add_edges_from(ut.flatten(aid_matches))

        #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list)))
        name_nodes = [('nid', l) for l in qreq_.dnids]
        db_aid_nid_edges = list(zip(qreq_.daids, name_nodes))
        #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids]))
        #G = nx.Graph()
        #G.add_nodes_from(matchless_quries)
        #G.add_edges_from(db_aid_nid_edges)
        #G.add_edges_from(query_aid_nid_edges)

        graph.add_edges_from(db_aid_nid_edges)

        if infr.user_feedback is not None:
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            p_bg = 0.0
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
            for aid1, aid2, p_same in zip(user_feedback['aid1'],
                                          user_feedback['aid2'], p_same_list):
                if p_same > .5:
                    if not graph.has_edge(aid1, aid2):
                        graph.add_edge(aid1, aid2)
                else:
                    if graph.has_edge(aid1, aid2):
                        graph.remove_edge(aid1, aid2)
        if show:
            import plottool as pt
            nx.set_node_attributes(graph, 'color',
                                   {aid: pt.LIGHT_PINK
                                    for aid in qreq_.daids})
            nx.set_node_attributes(graph, 'color',
                                   {aid: pt.TRUE_BLUE
                                    for aid in qreq_.qaids})
            nx.set_node_attributes(
                graph, 'color', {
                    aid: pt.LIGHT_PURPLE
                    for aid in np.intersect1d(qreq_.qaids, qreq_.daids)
                })
            nx.set_node_attributes(
                graph, 'label',
                {node: 'n%r' % (node[1], )
                 for node in name_nodes})
            nx.set_node_attributes(
                graph, 'color', {node: pt.LIGHT_GREEN
                                 for node in name_nodes})
        if show:
            import plottool as pt
            pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False)
        return graph
Ejemplo n.º 33
0
def intraoccurrence_connected():
    r"""
    CommandLine:
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.specialdraw import *  # NOQA
        >>> result = intraoccurrence_connected()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import ibeis
    import plottool as pt
    from ibeis.viz import viz_graph
    import networkx as nx
    pt.ensure_pylab_qt4()
    ibs = ibeis.opendb(defaultdb='PZ_Master1')
    nid2_aid = {
        #4880: [3690, 3696, 3703, 3706, 3712, 3721],
        4880: [3690, 3696, 3703],
        6537: [3739],
        6653: [7671],
        6610: [7566, 7408],
        #6612: [7664, 7462, 7522],
        #6624: [7465, 7360],
        #6625: [7746, 7383, 7390, 7477, 7376, 7579],
        6630: [7586, 7377, 7464, 7478],
        #6677: [7500]
    }
    nid2_dbaids = {4880: [33, 6120, 7164], 6537: [7017, 7206], 6653: [7660]}
    if ut.get_argflag('--small') or ut.get_argflag('--smaller'):
        del nid2_aid[6630]
        del nid2_aid[6537]
        del nid2_dbaids[6537]
        if ut.get_argflag('--smaller'):
            nid2_dbaids[4880].remove(33)
            nid2_aid[4880].remove(3690)
            nid2_aid[6610].remove(7408)
        #del nid2_aid[4880]
        #del nid2_dbaids[4880]

    aids = ut.flatten(nid2_aid.values())

    temp_nids = [1] * len(aids)
    postcut = ut.get_argflag('--postcut')
    aids_list = ibs.group_annots_by_name(aids)[0]
    ensure_edges = 'all' if True or not postcut else None
    unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups(
        ibs,
        aids_list,
        #invis_edges=invis_edges,
        ensure_edges=ensure_edges,
        temp_nids=temp_nids)
    viz_graph.color_by_nids(unlabeled_graph,
                            unique_nids=[1] *
                            len(list(unlabeled_graph.nodes())))
    viz_graph.ensure_node_images(ibs, unlabeled_graph)
    nx.set_node_attributes(unlabeled_graph, 'shape', 'rect')
    #unlabeled_graph = unlabeled_graph.to_undirected()

    # Find the "database exemplars for these annots"
    if False:
        gt_aids = ibs.get_annot_groundtruth(aids)
        gt_aids = [ut.setdiff(s, aids) for s in gt_aids]
        dbaids = ut.unique(ut.flatten(gt_aids))
        dbaids = ibs.filter_annots_general(dbaids, minqual='good')
        ibs.get_annot_quality_texts(dbaids)
    else:
        dbaids = ut.flatten(nid2_dbaids.values())
    exemplars = nx.DiGraph()
    #graph = exemplars  # NOQA
    exemplars.add_nodes_from(dbaids)

    def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}):
        edge_list = ut.upper_diag_self_prodx(nodes)
        graph.add_edges_from(edge_list, **edgeattrs)
        return edge_list

    for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)):
        add_clique(exemplars, aids_)
    viz_graph.ensure_node_images(ibs, exemplars)
    viz_graph.color_by_nids(exemplars, ibs=ibs)

    nx.set_node_attributes(unlabeled_graph, 'framewidth', False)
    nx.set_node_attributes(exemplars, 'framewidth', 4.0)

    nx.set_node_attributes(unlabeled_graph, 'group', 'unlab')
    nx.set_node_attributes(exemplars, 'group', 'exemp')

    #big_graph = nx.compose_all([unlabeled_graph])
    big_graph = nx.compose_all([exemplars, unlabeled_graph])

    # add sparse connections from unlabeled to exemplars
    import numpy as np
    rng = np.random.RandomState(0)
    if True or not postcut:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes())))
            flags = np.logical_or(exnids == nid_, flags)
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)),
                                     color=pt.ORANGE,
                                     implicit=True)
    else:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(exmatches))
            exmatches = ut.compress(exmatches, exnids == nid_)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)))
        pass

    nx.set_node_attributes(big_graph, 'shape', 'rect')
    #if False and postcut:
    #    ut.nx_delete_node_attr(big_graph, 'nid')
    #    ut.nx_delete_edge_attr(big_graph, 'color')
    #    viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs)
    #    viz_graph.color_by_nids(big_graph, ibs=ibs)
    #    big_graph = big_graph.to_undirected()

    layoutkw = {
        'sep': 1 / 5,
        'prog': 'neato',
        'overlap': 'false',
        #'splines': 'ortho',
        'splines': 'spline',
    }

    as_directed = False
    #as_directed = True
    #hacknode = True
    hacknode = 0

    graph = big_graph
    ut.nx_ensure_agraph_color(graph)
    if hacknode:
        nx.set_edge_attributes(graph, 'taillabel',
                               {e: str(e[0])
                                for e in graph.edges()})
        nx.set_edge_attributes(graph, 'headlabel',
                               {e: str(e[1])
                                for e in graph.edges()})

    explicit_graph = pt.get_explicit_graph(graph)
    _, layout_info = pt.nx_agraph_layout(explicit_graph,
                                         orig_graph=graph,
                                         inplace=True,
                                         **layoutkw)

    if ut.get_argflag('--smaller'):
        graph.node[7660]['pos'] = np.array([550, 350])
        graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw)
    elif ut.get_argflag('--small'):
        graph.node[7660]['pos'] = np.array([750, 350])
        graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400])
        graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph, inplace=True, **layoutkw)

    if not postcut:
        #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw,
        #           as_directed=False)
        #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw,
        #           as_directed=as_directed, hacknode=hacknode)

        pt.show_nx(graph,
                   layout='custom',
                   layoutkw=layoutkw,
                   as_directed=as_directed,
                   hacknode=hacknode)
    else:
        #explicit_graph = pt.get_explicit_graph(graph)
        #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph,
        #                                     **layoutkw)

        #layout_info['edge']['alpha'] = .8
        #pt.apply_graph_layout_attrs(graph, layout_info)

        #graph_layout_attrs = layout_info['graph']
        ##edge_layout_attrs  = layout_info['edge']
        ##node_layout_attrs  = layout_info['node']

        #for key, vals in layout_info['node'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_node_attributes(graph, key, vals)

        #for key, vals in layout_info['edge'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_edge_attributes(graph, key, vals)

        #nx.set_edge_attributes(graph, 'alpha', .8)
        #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'polyline'   # graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'line'

        cut_graph = graph.copy()
        edge_list = list(cut_graph.edges())
        edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list))
        cut_flags = edge_nids.T[0] != edge_nids.T[1]
        cut_edges = ut.compress(edge_list, cut_flags)
        cut_graph.remove_edges_from(cut_edges)
        ut.nx_delete_node_attr(cut_graph, 'nid')
        viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs)

        #ut.nx_get_default_node_attributes(exemplars, 'color', None)
        ut.nx_delete_node_attr(cut_graph,
                               'color',
                               nodes=unlabeled_graph.nodes())
        aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color',
                                                       None)
        nid2_colors = ut.group_items(aid2_color.values(),
                                     ibs.get_annot_nids(aid2_color.keys()))
        nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors)
        nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors)
        #for val in nid2_colors.values():
        #    assert len(val) <= 1
        # Get initial colors
        nid2_color_ = {
            nid: colors_[0]
            for nid, colors_ in nid2_colors.items() if len(colors_) == 1
        }

        graph = cut_graph
        viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_)
        nx.set_node_attributes(cut_graph, 'framewidth', 4)

        pt.show_nx(cut_graph,
                   layout='custom',
                   layoutkw=layoutkw,
                   as_directed=as_directed,
                   hacknode=hacknode)

    pt.zoom_factory()
Ejemplo n.º 34
0
    def make_graph(infr, show=False):
        import networkx as nx
        import itertools
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        thresh = infr.choose_thresh()

        # Simply cut any edge with a weight less than a threshold
        qaid_list = [cm.qaid for cm in cm_list]
        postcut = prob_names > thresh
        qxs, nxs = np.where(postcut)
        if False:
            kw = dict(precision=2, max_line_width=140, suppress_small=True)
            print(ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw)))
            print(ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw)))
        matching_qaids = ut.take(qaid_list, qxs)
        matched_nids = ut.take(unique_nids, nxs)

        qreq_ = infr.qreq_

        nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist())
        if not hasattr(qreq_, 'dnids'):
            qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids)
            qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids)
        dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids)
        grouped_aids = dnid2_daids.values()
        matched_daids = ut.take(dnid2_daids, matched_nids)
        name_cliques = [list(itertools.combinations(aids, 2)) for aids in grouped_aids]
        aid_matches = [list(ut.product([qaid], daids)) for qaid, daids in
                       zip(matching_qaids, matched_daids)]

        graph = nx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(ut.flatten(name_cliques))
        graph.add_edges_from(ut.flatten(aid_matches))

        #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list)))
        name_nodes = [('nid', l) for l in qreq_.dnids]
        db_aid_nid_edges = list(zip(qreq_.daids, name_nodes))
        #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids]))
        #G = nx.Graph()
        #G.add_nodes_from(matchless_quries)
        #G.add_edges_from(db_aid_nid_edges)
        #G.add_edges_from(query_aid_nid_edges)

        graph.add_edges_from(db_aid_nid_edges)

        if infr.user_feedback is not None:
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            p_bg = 0.0
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
            for aid1, aid2, p_same in zip(user_feedback['aid1'],
                                          user_feedback['aid2'], p_same_list):
                if p_same > .5:
                    if not graph.has_edge(aid1, aid2):
                        graph.add_edge(aid1, aid2)
                else:
                    if graph.has_edge(aid1, aid2):
                        graph.remove_edge(aid1, aid2)
        if show:
            import plottool as pt
            nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK
                                                    for aid in qreq_.daids})
            nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE
                                                    for aid in qreq_.qaids})
            nx.set_node_attributes(graph, 'color', {
                aid: pt.LIGHT_PURPLE
                for aid in np.intersect1d(qreq_.qaids, qreq_.daids)})
            nx.set_node_attributes(graph, 'label', {node: 'n%r' % (node[1],)
                                                    for node in name_nodes})
            nx.set_node_attributes(graph, 'color', {node: pt.LIGHT_GREEN
                                                    for node in name_nodes})
        if show:
            import plottool as pt
            pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False)
        return graph
Ejemplo n.º 35
0
 def add_column_names(self, new_colnames):
     col_name_list = ut.unique(self.col_name_list + new_colnames)
     self.update_column_names(col_name_list)
Ejemplo n.º 36
0
def simple_munkres(part_oldnames):
    """
    Defines a munkres problem to solve name rectification.

    Notes:
        We create a matrix where each rows represents a group of annotations in
        the same PCC and each column represents an original name. If there are
        more PCCs than original names the columns are padded with extra values.
        The matrix is first initialized to be negative infinity representing
        impossible assignments. Then for each column representing a padded
        name, we set we its value to $1$ indicating that each new name could be
        assigned to a padded name for some small profit.  Finally, let $f_{rc}$
        be the the number of annotations in row $r$ with an original name of
        $c$. Each matrix value $(r, c)$ is set to $f_{rc} + 1$ if $f_{rc} > 0$,
        to represent how much each name ``wants'' to be labeled with a
        particular original name, and the extra one ensures that these original
        names are always preferred over padded names.

    CommandLine:
        python -m ibeis.scripts.name_recitifer simple_munkres

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> part_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']]
        >>> new_names = simple_munkres(part_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['b', 'c', 'a']

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> part_oldnames = [[], ['a', 'a'], [],
        >>>                  ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']]
        >>> new_names = simple_munkres(part_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        [None, 'a', None, 'b', None]

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.scripts.name_recitifer import *  # NOQA
        >>> part_oldnames = [[], ['b'], ['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']]
        >>> new_names = find_consistent_labeling(part_oldnames)
        >>> result = ut.repr2(new_names)
        >>> print(new_names)
        ['_extra_name0', 'b', 'a', 'c', 'e']

        Profit Matrix
            b   a   c   e  _0
        0 -10 -10 -10 -10   1
        1   2 -10 -10 -10   1
        2   2   2   2 -10   1
        3   2 -10   2 -10   1
        4 -10 -10   2   3   1
    """
    import numpy as np
    import scipy.optimize
    unique_old_names = ut.unique(ut.flatten(part_oldnames))
    num_new_names = len(part_oldnames)
    num_old_names = len(unique_old_names)

    # Create padded dummy values.  This accounts for the case where it is
    # impossible to uniquely map to the old db
    num_pad = max(num_new_names - num_old_names, 0)
    total = num_old_names + num_pad
    shape = (total, total)

    # Allocate assignment matrix.
    # rows are new-names and cols are old-names.
    # Initially the profit of any assignment is effectively -inf
    # This effectively marks all assignments as invalid
    profit_matrix = np.full(shape, -2 * total, dtype=np.int)
    # Overwrite valid assignments with positive profits
    oldname2_idx = ut.make_index_lookup(unique_old_names)
    name_freq_list = [ut.dict_hist(names) for names in part_oldnames]
    # Initialize profit of a valid assignment as 1 + freq
    # This incentivizes using a previously used name
    for rowx, name_freq in enumerate(name_freq_list):
        for name, freq in name_freq.items():
            colx = oldname2_idx[name]
            profit_matrix[rowx, colx] = freq + 1
    # Set a much smaller profit for using an extra name
    # This allows the solution to always exist
    profit_matrix[:, num_old_names:total] = 1

    # Convert to minimization problem
    big_value = (profit_matrix.max()) - (profit_matrix.min())
    cost_matrix = big_value - profit_matrix

    # Use scipy implementation of munkres algorithm.
    rx2_cx = dict(zip(*scipy.optimize.linear_sum_assignment(cost_matrix)))

    # Each row (new-name) has now been assigned a column (old-name)
    # Map this back to the input-space (using None to indicate extras)
    cx2_name = dict(enumerate(unique_old_names))

    if False:
        import pandas as pd
        columns = unique_old_names + ['_%r' % x for x in range(num_pad)]
        print('Profit Matrix')
        print(pd.DataFrame(profit_matrix, columns=columns))

        print('Cost Matrix')
        print(pd.DataFrame(cost_matrix, columns=columns))

    assignment_ = [
        cx2_name.get(rx2_cx[rx], None) for rx in range(num_new_names)
    ]
    return assignment_
Ejemplo n.º 37
0
def main(bib_fpath=None):
    r"""
    intro point to fixbib script

    CommmandLine:
        fixbib
        python -m fixtex bib
        python -m fixtex bib --dryrun
        python -m fixtex bib --dryrun --debug
    """

    if bib_fpath is None:
        bib_fpath = 'My Library.bib'

    # DEBUG = ub.argflag('--debug')
    # Read in text and ensure ascii format
    dirty_text = ut.readfrom(bib_fpath)

    from fixtex.fix_tex import find_used_citations, testdata_fpaths

    if exists('custom_extra.bib'):
        extra_parser = bparser.BibTexParser(ignore_nonstandard_types=False)
        parser = bparser.BibTexParser()
        ut.delete_keys(parser.alt_dict, ['url', 'urls'])
        print('Parsing extra bibtex file')
        extra_text = ut.readfrom('custom_extra.bib')
        extra_database = extra_parser.parse(extra_text, partial=False)
        print('Finished parsing extra')
        extra_dict = extra_database.get_entry_dict()
    else:
        extra_dict = None

    #udata = dirty_text.decode("utf-8")
    #dirty_text = udata.encode("ascii", "ignore")
    #dirty_text = udata

    # parser = bparser.BibTexParser()
    # bib_database = parser.parse(dirty_text)
    # d = bib_database.get_entry_dict()

    print('BIBTEXPARSER LOAD')
    parser = bparser.BibTexParser(ignore_nonstandard_types=False,
                                  common_strings=True)
    ut.delete_keys(parser.alt_dict, ['url', 'urls'])
    print('Parsing bibtex file')
    bib_database = parser.parse(dirty_text, partial=False)
    print('Finished parsing')

    bibtex_dict = bib_database.get_entry_dict()
    old_keys = list(bibtex_dict.keys())
    new_keys = []
    for key in ub.ProgIter(old_keys, label='fixing keys'):
        new_key = key
        new_key = new_key.replace(':', '')
        new_key = new_key.replace('-', '_')
        new_key = re.sub('__*', '_', new_key)
        new_keys.append(new_key)

    # assert len(ut.find_duplicate_items(new_keys)) == 0, 'new keys created conflict'
    assert len(ub.find_duplicates(new_keys)) == 0, 'new keys created conflict'

    for key, new_key in zip(old_keys, new_keys):
        if key != new_key:
            entry = bibtex_dict[key]
            entry['ID'] = new_key
            bibtex_dict[new_key] = entry
            del bibtex_dict[key]

    # The bibtext is now clean. Print it to stdout
    #print(clean_text)
    verbose = None
    if verbose is None:
        verbose = 1

    # Find citations from the tex documents
    key_list = None
    if key_list is None:
        cacher = ub.Cacher('texcite1', enabled=0)
        data = cacher.tryload()
        if data is None:
            fpaths = testdata_fpaths()
            key_list, inverse = find_used_citations(fpaths,
                                                    return_inverse=True)
            # ignore = ['JP', '?', 'hendrick']
            # for item in ignore:
            #     try:
            #         key_list.remove(item)
            #     except ValueError:
            #         pass
            if verbose:
                print('Found %d citations used in the document' %
                      (len(key_list), ))
            data = key_list, inverse
            cacher.save(data)
        key_list, inverse = data

    # else:
    #     key_list = None

    unknown_pubkeys = []
    debug_author = ub.argval('--debug-author', default=None)
    # ./fix_bib.py --debug_author=Kappes

    if verbose:
        print('Fixing %d/%d bibtex entries' %
              (len(key_list), len(bibtex_dict)))

    # debug = True
    debug = False
    if debug_author is not None:
        debug = False

    known_keys = list(bibtex_dict.keys())
    missing_keys = set(key_list) - set(known_keys)
    if extra_dict is not None:
        missing_keys.difference_update(set(extra_dict.keys()))

    if missing_keys:
        print('The library is missing keys found in tex files %s' %
              (ub.repr2(missing_keys), ))

    # Search for possible typos:
    candidate_typos = {}
    sedlines = []
    for key in missing_keys:
        candidates = ut.closet_words(key, known_keys, num=3, subset=True)
        if len(candidates) > 1:
            top = candidates[0]
            if ut.edit_distance(key, top) == 1:
                # "sed -i -e 's/{}/{}/g' *.tex".format(key, top)
                import os
                replpaths = ' '.join(
                    [relpath(p, os.getcwd()) for p in inverse[key]])
                sedlines.append("sed -i -e 's/{}/{}/g' {}".format(
                    key, top, replpaths))
        candidate_typos[key] = candidates
        print('Cannot find key = %r' % (key, ))
        print('Did you mean? %r' % (candidates, ))

    print('Quick fixes')
    print('\n'.join(sedlines))

    # group by file
    just = max([0] + list(map(len, missing_keys)))
    missing_fpaths = [inverse[key] for key in missing_keys]
    for fpath in sorted(set(ub.flatten(missing_fpaths))):
        # ut.fix_embed_globals()
        subkeys = [k for k in missing_keys if fpath in inverse[k]]
        print('')
        ut.cprint('--- Missing Keys ---', 'blue')
        ut.cprint('fpath = %r' % (fpath, ), 'blue')
        ut.cprint('{} | {}'.format('Missing'.ljust(just), 'Did you mean?'),
                  'blue')
        for key in subkeys:
            print('{} | {}'.format(ut.highlight_text(key.ljust(just), 'red'),
                                   ' '.join(candidate_typos[key])))

    # for key in list(bibtex_dict.keys()):

    if extra_dict is not None:
        # Extra database takes precidence over regular
        key_list = list(ut.unique(key_list + list(extra_dict.keys())))
        for k, v in extra_dict.items():
            bibtex_dict[k] = v

    full = ub.argflag('--full')

    for key in key_list:
        try:
            entry = bibtex_dict[key]
        except KeyError:
            continue
        self = BibTexCleaner(key, entry, full=full)

        if debug_author is not None:
            debug = debug_author in entry.get('author', '')

        if debug:
            ut.cprint(' --- ENTRY ---', 'yellow')
            print(ub.repr2(entry, nl=1))

        entry = self.fix()
        # self.clip_abstract()
        # self.shorten_keys()
        # self.fix_authors()
        # self.fix_year()
        # old_pubval = self.fix_pubkey()
        # if old_pubval:
        #     unknown_pubkeys.append(old_pubval)
        # self.fix_arxiv()
        # self.fix_general()
        # self.fix_paper_types()

        if debug:
            print(ub.repr2(entry, nl=1))
            ut.cprint(' --- END ENTRY ---', 'yellow')
        bibtex_dict[key] = entry

    unwanted_keys = set(bibtex_dict.keys()) - set(key_list)
    if verbose:
        print('Removing unwanted %d entries' % (len(unwanted_keys)))
    ut.delete_dict_keys(bibtex_dict, unwanted_keys)

    if 0:
        d1 = bibtex_dict.copy()
        full = True
        for key, entry in d1.items():
            self = BibTexCleaner(key, entry, full=full)
            pub = self.publication()
            if pub is None:
                print(self.entry['ENTRYTYPE'])

            old = self.fix_pubkey()
            x1 = self._pubval()
            x2 = self.standard_pubval(full=full)
            # if x2 is not None and len(x2) > 5:
            #     print(ub.repr2(self.entry))

            if x1 != x2:
                print('x2 = %r' % (x2, ))
                print('x1 = %r' % (x1, ))
                print(ub.repr2(self.entry))

            # if 'CVPR' in self.entry.get('booktitle', ''):
            #     if 'CVPR' != self.entry.get('booktitle', ''):
            #         break
            if old:
                print('old = %r' % (old, ))
            d1[key] = self.entry

    if full:
        d1 = bibtex_dict.copy()

        import numpy as np
        import pandas as pd
        df = pd.DataFrame.from_dict(d1, orient='index')

        paged_items = df[~pd.isnull(df['pub_accro'])]
        has_pages = ~pd.isnull(paged_items['pages'])
        print('have pages {} / {}'.format(has_pages.sum(), len(has_pages)))
        print(ub.repr2(paged_items[~has_pages]['title'].values.tolist()))

        entrytypes = dict(list(df.groupby('pub_type')))
        if False:
            # entrytypes['misc']
            g = entrytypes['online']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            entrytypes['book']
            entrytypes['thesis']
            g = entrytypes['article']
            g = entrytypes['incollection']
            g = entrytypes['conference']

        def lookup_pub(e):
            if e == 'article':
                return 'journal', 'journal'
            elif e == 'incollection':
                return 'booksection', 'booktitle'
            elif e == 'conference':
                return 'conference', 'booktitle'
            return None, None

        for e, g in entrytypes.items():
            print('e = %r' % (e, ))
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            if 'pub_full' in g.columns:
                place_title = g['pub_full'].tolist()
                print(ub.repr2(ub.dict_hist(place_title)))
            else:
                print('Unknown publications')

        if 'report' in entrytypes:
            g = entrytypes['report']
            missing = g[pd.isnull(g['title'])]
            if len(missing):
                print('Missing Title')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'journal' in entrytypes:
            g = entrytypes['journal']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['journal'])]
            if len(missing):
                print('Missing Journal')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'conference' in entrytypes:
            g = entrytypes['conference']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'incollection' in entrytypes:
            g = entrytypes['incollection']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]

            missing = g[pd.isnull(g['booktitle'])]
            if len(missing):
                print('Missing Booktitle')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        if 'thesis' in entrytypes:
            g = entrytypes['thesis']
            g = g[g.columns[~np.all(pd.isnull(g), axis=0)]]
            missing = g[pd.isnull(g['institution'])]
            if len(missing):
                print('Missing Institution')
                print(ub.repr2(missing[['title', 'author']].values.tolist()))

        # import utool
        # utool.embed()

    # Overwrite BibDatabase structure
    bib_database._entries_dict = bibtex_dict
    bib_database.entries = list(bibtex_dict.values())

    #conftitle_to_types_set_hist = {key: set(val) for key, val in conftitle_to_types_hist.items()}
    #print(ub.repr2(conftitle_to_types_set_hist))

    print('Unknown conference keys:')
    print(ub.repr2(sorted(unknown_pubkeys)))
    print('len(unknown_pubkeys) = %r' % (len(unknown_pubkeys), ))

    writer = BibTexWriter()
    writer.contents = ['comments', 'entries']
    writer.indent = '  '
    writer.order_entries_by = ('type', 'author', 'year')

    new_bibtex_str = bibtexparser.dumps(bib_database, writer)

    # Need to check
    #jegou_aggregating_2012

    # Fix the Journal Abreviations
    # References:
    # https://www.ieee.org/documents/trans_journal_names.pdf

    # Write out clean bibfile in ascii format
    clean_bib_fpath = ub.augpath(bib_fpath.replace(' ', '_'), suffix='_clean')

    if not ub.argflag('--dryrun'):
        ut.writeto(clean_bib_fpath, new_bibtex_str)
Ejemplo n.º 38
0
def get_dbinfo(ibs, verbose=True,
               with_imgsize=False,
               with_bytes=False,
               with_contrib=False,
               with_agesex=False,
               with_header=True,
               short=False,
               tag='dbinfo',
               aid_list=None):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    CommandLine:
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0
        python -m ibeis.other.dbinfo --test-get_dbinfo:1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from ibeis.expt import cfghelpers
        >>> #from ibeis.expt import annotation_configs
        >>> #from ibeis.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> verbose = True
        >>> short = True
        >>> #ibs = ibeis.opendb(db='GZ_ALL')
        >>> #ibs = ibeis.opendb(db='PZ_Master0')
        >>> ibs = ibeis.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = ibeis.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            print('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from ibeis.expt import experiment_helpers
            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list)
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            #aid_list =
        if verbose:
            print('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) -
            {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    #associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    FILTER_HACK = True
    if FILTER_HACK:
        # HUGE HACK - get only images and names with filtered aids
        valid_aids_ = ibs.filter_aids_custom(valid_aids)
        valid_nids_ = ibs.filter_nids_custom(valid_nids)
        valid_gids_ = ibs.filter_gids_custom(valid_gids)
        if verbose:
            print('Filtered %d names' % (len(valid_nids) - len(valid_nids_)))
            print('Filtered %d images' % (len(valid_gids) - len(valid_gids_)))
            print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_)))
        valid_gids = valid_gids_
        valid_nids = valid_nids_
        valid_aids = valid_aids_
        #associated_nids = ut.compress(associated_nids, map(any,
        #ibs.unflat_map(ibs.get_annot_custom_filterflags,
        #               ibs.get_name_aids(associated_nids))))

    # Image info
    if verbose:
        print('Checking Image Info')
    gx2_aids = ibs.get_image_aids(valid_gids)
    if FILTER_HACK:
        gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids]  # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats  = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True)
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        print('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if FILTER_HACK:
        nx2_aids =  [ibs.filter_aids_custom(aids) for aids in nx2_aids]    # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    # Occurrence Info
    def compute_annot_occurrence_ids(ibs, aid_list):
        from ibeis.algo.preproc import preproc_occurrence
        gid_list = ibs.get_annot_gids(aid_list)
        gid2_aids = ut.group_items(aid_list, gid_list)
        flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
        occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
        occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
        return occurid2_aids

    import utool
    with utool.embed_on_exception_context:
        occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
        occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
        occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
        nid2_occurxs = ut.ddict(list)
        for occurx, nids in enumerate(occur_unique_nids):
            for nid in nids:
                nid2_occurxs[nid].append(occurx)

    nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1}
    nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1}
    singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

    singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True)
    resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True)

    try:
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0)
        undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False)
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)

        num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1]))
        pair_tag_info['num_reviewed'] = num_reviewed_pairs
    except Exception:
        pair_tag_info = {}

    #print(ut.dict_str(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        print('Checking Annot Species')
    unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids))
    species_list = ibs.get_annot_species_texts(valid_aids)
    species2_aids = ut.group_items(valid_aids, species_list)
    species2_nAids = {key: len(val) for key, val in species2_aids.items()}

    if verbose:
        print('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs  = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs      = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        print('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            print('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)
        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = OrderedDict(
                [( 'max', wh_list.max(0)),
                 ( 'min', wh_list.min(0)),
                 ('mean', wh_list.mean(0)),
                 ( 'std', wh_list.std(0))])
            def arr2str(var):
                return ('[' + (
                    ', '.join(list(map(lambda x: '%.1f' % x, var)))
                ) + ']')
            ret = (',\n    '.join([
                '%s:%s' % (key, arr2str(val))
                for key, val in stat_dict.items()
            ]))
            return '{\n    ' + ret + '\n}'

        print('reading image sizes')
        # Image size stats
        img_size_list  = ibs.get_image_sizes(valid_gids)
        img_size_stats  = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        print('Building Stats String')

    multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True)

    # Time stats
    unixtime_list = ibs.get_image_unixtime(valid_gids)
    unixtime_list = ut.list_replace(unixtime_list, -1, float('nan'))
    #valid_unixtime_list = [time for time in unixtime_list if time != -1]
    #unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list  = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda : 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (36 <= max_age or max_age is None):
                age_dict['Adult'] += 1
            else:
                print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
                age_dict['UNKNOWN'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
        sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
        # Filter 0's
        sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
        return sextext2_nAnnots

    if verbose:
        print('Checking Other Annot Stats')

    qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids)
    yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        print('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]
    image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags)
    contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags)

    contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}
    contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}

    contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)}
    contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)}

    if verbose:
        print('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton =  len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_aids)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            print('Checking Disk Space')
        ibsdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space    = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            print('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_aids)
            _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton
            _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            #if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(ex, keys=[
                '_num_names_total_check',
                'num_names',
                '_num_annots_total_check',
                'num_annots',
                'num_names_singleton',
                'num_names_multiton',
                'num_unknown_annots',
                'num_multiton_annots',
                'num_singleton_annots',
            ])
            raise

    # Get contributor statistics
    contrib_rowids = ibs.get_valid_contrib_rowids()
    num_contributors = len(contrib_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.dict_str(dict_)
        return align2(str_)

    header_block_lines = (
        [('+============================'), ] + (
            [
                ('+ singleton := single sighting'),
                ('+ multiton  := multiple sightings'),
                ('--' * num_tabs),
            ] if not short and with_header else []
        )
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = [
        ('--' * num_tabs),
        ('DB Bytes: '),
        ('     +- dbdir nBytes:         ' + dbdir_space),
        ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
        ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
        ('     |  |  +-cachedir nBytes: ' + cachedir_space),
    ] if with_bytes else []

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = [
        ('--' * num_tabs),
        ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
        ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
        ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
    ] if not short else []

    occurrence_block_lines = [
        ('--' * num_tabs),
        ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
        ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
        ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
    ] if not short else []

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = [
        '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
        '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
    ] if not short  and with_agesex else []

    contrib_block_lines = [
        '# Images per contributor       = ' + align_dict2(contrib_tag_to_nImages),
        '# Annots per contributor       = ' + align_dict2(contrib_tag_to_nAnnots),
        '# Quality per contributor      = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True),
        '# Viewpoint per contributor    = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True),
    ] if with_contrib else []

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None if short else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        #('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None if short else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines +
        bytes_block_lines +
        source_block_lines +
        name_block_lines +
        annot_block_lines +
        annot_per_basic_block_lines +
        occurrence_block_lines +
        annot_per_qualview_block_lines +
        annot_per_agesex_block_lines +
        img_block_lines +
        contrib_block_lines +
        imgsize_stat_lines +
        [('L============================'), ]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        print(info_str2)
    locals_ = locals()
    return locals_
Ejemplo n.º 39
0
def clean_tags():
    zotero = get_libzotero()
    # dict of all zotero items
    # items = zotero.index
    # get sql cursor
    cur = zotero.cur
    if False:
        sorted(ut.util_sqlite.get_tablenames(cur))
        ut.print_database_structure(cur)
        # Debug info about tags table in sql

        # The `tags` table stores all tags
        # The itemTags table stores the association between items and tags
        ut.get_table_columninfo_list(cur, 'fields')
        # ut.get_table_columninfo_list(cur, 'relations')
        ut.get_table_columninfo_list(cur, 'fieldsCombined')

        ut.get_table_columninfo_list(cur, 'itemData')
        ut.get_table_columninfo_list(cur, 'itemDataValues')

        ut.get_table_columninfo_list(cur, 'tags')
        ut.get_table_columninfo_list(cur, 'itemTags')

    import pandas as pd
    pd.options.display.max_colwidth = 40
    pd.options.display.max_rows = 20
    def pandas_sql(table, columns):
        return pd.DataFrame(ut.get_table_rows(cur, table, columns),
                            columns=columns)

    item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False)
    tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False)
    itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID'))

    itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID'))

    itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID')
    field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID')

    itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values
    itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values

    titles = itemData_df[itemData_df['fieldName'] == 'title']
    assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1

    # itemTag_df.groupby('itemID').count()
    # Find how often each tag is used
    tagid_to_count = itemTag_df.groupby('tagID').count()
    tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'})
    tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name']
    tagid_to_count = tagid_to_count.sort_values('nItems')

    bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1]

    tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len)
    tagid_to_count = tagid_to_count.sort_values('tag_ncharsize')
    bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist()

    def clean_tags2():
        api_key = 'fBDBqRPwW9O3mYyNLiksBKZy'
        base_url = 'https://api.zotero.org'
        library_id = '1279414'
        library_type = 'user'
        from pyzotero import zotero
        zot = zotero.Zotero(library_id, library_type, api_key)

        for chunk in ut.ProgChunks(bad_tags, 50):
            zot.delete_tags(*chunk)

    if False:
        api_key = 'fBDBqRPwW9O3mYyNLiksBKZy'
        base_url = 'https://api.zotero.org'
        user_id = '1279414'
        userOrGroupPrefix = '/users/' + user_id
        params = {'v': 3, 'key': api_key}

        items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params)
        print(items_resp.content)
        print(items_resp)

        json_tags = []
        get_url = base_url + userOrGroupPrefix + '/tags'
        while True:
            print('get_url = %r' % (get_url,))
            tag_resp = requests.get(get_url, params=params)
            if tag_resp.status_code != 200:
                break
            json_tags.extend(tag_resp.json())
            if 'next' in tag_resp.links:
                get_url = tag_resp.links['next']['url']
            else:
                break

        version_to_tags = ut.ddict(list)
        bad_tags = []
        for tag in ut.ProgIter(json_tags, label='parsing tags'):
            # x = requests.get(tag['links']['self']['href'], params=params)
            if tag['meta']['numItems'] == 1:
                import urllib2
                try:
                    bad_tags.append(urllib2.quote(tag['tag']))
                except Exception as ex:
                    print('cant encode tag=%r' % (tag,))
                    pass

        for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50):
            search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk)
            r = requests.get(search_url, params=params)
            matching_items = r.json()
            # assert len(matching_items) == 1
            for item in matching_items:
                version = item['version']
            version_to_tags[item['version']].append(tag['tag'])

        # DELETE MULTIPLE TAGS
        import requests
        for chunk in ut.ichunks(bad_tags['name'], 50):
            import urllib2
            encoded_chunk = []
            for t in chunk:
                try:
                    encoded_chunk.append(urllib2.quote(t))
                except Exception:
                    print(t)
            suffix = ' || '.join(encoded_chunk)
            delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix
            print('delete_url = %r' % (delete_url,))
            resp = requests.delete(delete_url, params=params)

        bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1]
        bad_tags['tagID'] = bad_tags.index
        for tagid in bad_tags:
            delete from itemTags where tagID in (select tagID from tags where type=1);
        pass
        for name in k['name'].values.tolist()
    item_df['title'] = titles.set_index('itemID')['value']
    for idx, item in zotero.index.items():
        sql_title = item_df.loc[item.id]['title']
        if item.title != sql_title:
            if pd.isnull(sql_title) and item.title is not None:
                print(item.__dict__)
                print(item_df.loc[item.id])
                print('item.title = %r' % (item.title,))
                print('sql_title = %r' % (sql_title,))
                assert False

    duplicate_tags = [
        (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2
    ]
    tagname_to_tagid = tags_df.groupby('name', sort=True).first()
    new_to_oldtags = {}
    # Determine which tagi to use for each name
    for tagname, idxs in duplicate_tags:
        tags_subdf = tags_df.iloc[idxs]
        mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])]
        tag_hist = mapping.groupby('tagID').count()
        best_tagid = tag_hist['itemID'].idxmax()

        new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid}

        tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid]
        # for col in tagname_to_tagid.columns:
        #     tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col]
        # tags_df.loc[best_tagid]

    if False:
        # Update tagIds
        for newid, oldids in new_to_oldtags.items():
            for oldid in oldids:
                # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,))
                import sqlite3
                try:
                    cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?'
                    args = (newid, oldid)
                    print('(%s) args = %r' % (cmd, args,))
                    cur.execute(cmd, args)
                    print(cur.fetchall())
                except sqlite3.IntegrityError:
                    print('error')
                    pass

    # tags_df.groupby('name', sort=True)

    # itemTag_df.groupby('itemID')
    # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']]
    # duptags['tagID']
    # flags = itemTag_df['tagID'].isin(duptags['tagID'])
    # dup_rel = itemTag_df[flags]
    # item_df['title'].loc[dup_rel['itemID']].values
    # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']]

    # tags_df[tags_df['type'] == 1]
    # tags_df[tags_df['type'] == 0]
    # tags_df['libraryID'].unique()
    # tags_df['type'].unique()

    '''
    SELECT
    SELECT FROM itemTags WHERE name in (animals)
    '''

    item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID'))
    # Group tags by item
    itemid_to_tagids = ut.group_pairs(item_tag_pairs)
    # Group items by tags
    tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs)))

    # mapping from tagid to name
    tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name')))

    tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items())
    ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals')
    tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)
Ejemplo n.º 40
0
def intraoccurrence_connected():
    r"""
    CommandLine:
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --postcut
        python -m ibeis.scripts.specialdraw intraoccurrence_connected --show --smaller

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.specialdraw import *  # NOQA
        >>> result = intraoccurrence_connected()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import ibeis
    import plottool as pt
    from ibeis.viz import viz_graph
    import networkx as nx
    pt.ensure_pylab_qt4()
    ibs = ibeis.opendb(defaultdb='PZ_Master1')
    nid2_aid = {
        #4880: [3690, 3696, 3703, 3706, 3712, 3721],
        4880: [3690, 3696, 3703],
        6537: [3739],
        6653: [7671],
        6610: [7566, 7408],
        #6612: [7664, 7462, 7522],
        #6624: [7465, 7360],
        #6625: [7746, 7383, 7390, 7477, 7376, 7579],
        6630: [7586, 7377, 7464, 7478],
        #6677: [7500]
    }
    nid2_dbaids = {
        4880: [33, 6120, 7164],
        6537: [7017, 7206],
        6653: [7660]
    }
    if ut.get_argflag('--small') or ut.get_argflag('--smaller'):
        del nid2_aid[6630]
        del nid2_aid[6537]
        del nid2_dbaids[6537]
        if ut.get_argflag('--smaller'):
            nid2_dbaids[4880].remove(33)
            nid2_aid[4880].remove(3690)
            nid2_aid[6610].remove(7408)
        #del nid2_aid[4880]
        #del nid2_dbaids[4880]

    aids = ut.flatten(nid2_aid.values())

    temp_nids = [1] * len(aids)
    postcut = ut.get_argflag('--postcut')
    aids_list = ibs.group_annots_by_name(aids)[0]
    ensure_edges = 'all' if True or not postcut else None
    unlabeled_graph = viz_graph.make_netx_graph_from_aid_groups(
        ibs, aids_list,
        #invis_edges=invis_edges,
        ensure_edges=ensure_edges, temp_nids=temp_nids)
    viz_graph.color_by_nids(unlabeled_graph, unique_nids=[1] *
                            len(list(unlabeled_graph.nodes())))
    viz_graph.ensure_node_images(ibs, unlabeled_graph)
    nx.set_node_attributes(unlabeled_graph, 'shape', 'rect')
    #unlabeled_graph = unlabeled_graph.to_undirected()

    # Find the "database exemplars for these annots"
    if False:
        gt_aids = ibs.get_annot_groundtruth(aids)
        gt_aids = [ut.setdiff(s, aids) for s in gt_aids]
        dbaids = ut.unique(ut.flatten(gt_aids))
        dbaids = ibs.filter_annots_general(dbaids, minqual='good')
        ibs.get_annot_quality_texts(dbaids)
    else:
        dbaids = ut.flatten(nid2_dbaids.values())
    exemplars = nx.DiGraph()
    #graph = exemplars  # NOQA
    exemplars.add_nodes_from(dbaids)

    def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}):
        edge_list = ut.upper_diag_self_prodx(nodes)
        graph.add_edges_from(edge_list, **edgeattrs)
        return edge_list

    for aids_, nid in zip(*ibs.group_annots_by_name(dbaids)):
        add_clique(exemplars, aids_)
    viz_graph.ensure_node_images(ibs, exemplars)
    viz_graph.color_by_nids(exemplars, ibs=ibs)

    nx.set_node_attributes(unlabeled_graph, 'framewidth', False)
    nx.set_node_attributes(exemplars,  'framewidth', 4.0)

    nx.set_node_attributes(unlabeled_graph, 'group', 'unlab')
    nx.set_node_attributes(exemplars,  'group', 'exemp')

    #big_graph = nx.compose_all([unlabeled_graph])
    big_graph = nx.compose_all([exemplars, unlabeled_graph])

    # add sparse connections from unlabeled to exemplars
    import numpy as np
    rng = np.random.RandomState(0)
    if True or not postcut:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(list(exemplars.nodes())))
            flags = np.logical_or(exnids == nid_, flags)
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)),
                                     color=pt.ORANGE, implicit=True)
    else:
        for aid_ in unlabeled_graph.nodes():
            flags = rng.rand(len(exemplars)) > .5
            exmatches = ut.compress(list(exemplars.nodes()), flags)
            nid_ = ibs.get_annot_nids(aid_)
            exnids = np.array(ibs.get_annot_nids(exmatches))
            exmatches = ut.compress(exmatches, exnids == nid_)
            big_graph.add_edges_from(list(ut.product([aid_], exmatches)))
        pass

    nx.set_node_attributes(big_graph, 'shape', 'rect')
    #if False and postcut:
    #    ut.nx_delete_node_attr(big_graph, 'nid')
    #    ut.nx_delete_edge_attr(big_graph, 'color')
    #    viz_graph.ensure_graph_nid_labels(big_graph, ibs=ibs)
    #    viz_graph.color_by_nids(big_graph, ibs=ibs)
    #    big_graph = big_graph.to_undirected()

    layoutkw = {
        'sep' : 1 / 5,
        'prog': 'neato',
        'overlap': 'false',
        #'splines': 'ortho',
        'splines': 'spline',
    }

    as_directed = False
    #as_directed = True
    #hacknode = True
    hacknode = 0

    graph = big_graph
    ut.nx_ensure_agraph_color(graph)
    if hacknode:
        nx.set_edge_attributes(graph, 'taillabel', {e: str(e[0]) for e in graph.edges()})
        nx.set_edge_attributes(graph, 'headlabel', {e: str(e[1]) for e in graph.edges()})

    explicit_graph = pt.get_explicit_graph(graph)
    _, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph,
                                         inplace=True, **layoutkw)

    if ut.get_argflag('--smaller'):
        graph.node[7660]['pos'] = np.array([550, 350])
        graph.node[6120]['pos'] = np.array([200, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([200, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph,
                                             inplace=True, **layoutkw)
    elif ut.get_argflag('--small'):
        graph.node[7660]['pos'] = np.array([750, 350])
        graph.node[33]['pos'] = np.array([300, 600]) + np.array([350, -400])
        graph.node[6120]['pos'] = np.array([500, 600]) + np.array([350, -400])
        graph.node[7164]['pos'] = np.array([410, 480]) + np.array([350, -400])
        nx.set_node_attributes(graph, 'pin', 'true')
        _, layout_info = pt.nx_agraph_layout(graph,
                                             inplace=True, **layoutkw)

    if not postcut:
        #pt.show_nx(graph.to_undirected(), layout='agraph', layoutkw=layoutkw,
        #           as_directed=False)
        #pt.show_nx(graph, layout='agraph', layoutkw=layoutkw,
        #           as_directed=as_directed, hacknode=hacknode)

        pt.show_nx(graph, layout='custom', layoutkw=layoutkw,
                   as_directed=as_directed, hacknode=hacknode)
    else:
        #explicit_graph = pt.get_explicit_graph(graph)
        #_, layout_info = pt.nx_agraph_layout(explicit_graph, orig_graph=graph,
        #                                     **layoutkw)

        #layout_info['edge']['alpha'] = .8
        #pt.apply_graph_layout_attrs(graph, layout_info)

        #graph_layout_attrs = layout_info['graph']
        ##edge_layout_attrs  = layout_info['edge']
        ##node_layout_attrs  = layout_info['node']

        #for key, vals in layout_info['node'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_node_attributes(graph, key, vals)

        #for key, vals in layout_info['edge'].items():
        #    #print('[special] key = %r' % (key,))
        #    nx.set_edge_attributes(graph, key, vals)

        #nx.set_edge_attributes(graph, 'alpha', .8)
        #graph.graph['splines'] = graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'polyline'   # graph_layout_attrs.get('splines', 'line')
        #graph.graph['splines'] = 'line'

        cut_graph = graph.copy()
        edge_list = list(cut_graph.edges())
        edge_nids = np.array(ibs.unflat_map(ibs.get_annot_nids, edge_list))
        cut_flags = edge_nids.T[0] != edge_nids.T[1]
        cut_edges = ut.compress(edge_list, cut_flags)
        cut_graph.remove_edges_from(cut_edges)
        ut.nx_delete_node_attr(cut_graph, 'nid')
        viz_graph.ensure_graph_nid_labels(cut_graph, ibs=ibs)

        #ut.nx_get_default_node_attributes(exemplars, 'color', None)
        ut.nx_delete_node_attr(cut_graph, 'color', nodes=unlabeled_graph.nodes())
        aid2_color = ut.nx_get_default_node_attributes(cut_graph, 'color', None)
        nid2_colors = ut.group_items(aid2_color.values(), ibs.get_annot_nids(aid2_color.keys()))
        nid2_colors = ut.map_dict_vals(ut.filter_Nones, nid2_colors)
        nid2_colors = ut.map_dict_vals(ut.unique, nid2_colors)
        #for val in nid2_colors.values():
        #    assert len(val) <= 1
        # Get initial colors
        nid2_color_ = {nid: colors_[0] for nid, colors_ in nid2_colors.items()
                       if len(colors_) == 1}

        graph = cut_graph
        viz_graph.color_by_nids(cut_graph, ibs=ibs, nid2_color_=nid2_color_)
        nx.set_node_attributes(cut_graph, 'framewidth', 4)

        pt.show_nx(cut_graph, layout='custom', layoutkw=layoutkw,
                   as_directed=as_directed, hacknode=hacknode)

    pt.zoom_factory()