Example #1
0
def build_dpath_to_fidx(fpath_list, fidx_list, root_dpath):
    dpath_to_fidx = ut.ddict(list)
    nTotal = len(fpath_list)
    _iter = zip(fidx_list, fpath_list)
    dpath_to_fidx = ut.ddict(list)
    for fidx, fpath in ut.ProgIter(_iter, 'making dpath fidx map',
                                   freq=50000, nTotal=nTotal):
        current_path = fpath
        while True:
            current_path = dirname(current_path)
            dpath_to_fidx[current_path].append(fidx)
            if current_path == root_dpath:
                break
    return dpath_to_fidx
Example #2
0
    def _get_cm_edge_data(infr, edges, cm_list=None):
        symmetric = True

        if cm_list is None:
            cm_list = infr.cm_list
        # Find scores for the edges that exist in the graph
        edge_to_data = ut.ddict(dict)
        aid_to_cm = {cm.qaid: cm for cm in cm_list}
        for u, v in edges:
            if symmetric:
                u, v = e_(u, v)
            cm1 = aid_to_cm.get(u, None)
            cm2 = aid_to_cm.get(v, None)
            scores = []
            ranks = []
            for cm in ut.filter_Nones([cm1, cm2]):
                for aid in [u, v]:
                    idx = cm.daid2_idx.get(aid, None)
                    if idx is None:
                        continue
                    score = cm.annot_score_list[idx]
                    rank = cm.get_annot_ranks([aid])[0]
                    scores.append(score)
                    ranks.append(rank)
            if len(scores) == 0:
                score = None
                rank = None
            else:
                # Choose whichever one gave the best score
                idx = vt.safe_argmax(scores, nans=False)
                score = scores[idx]
                rank = ranks[idx]
            edge_to_data[(u, v)]['score'] = score
            edge_to_data[(u, v)]['rank'] = rank
        return edge_to_data
Example #3
0
def convert_multigraph_to_graph(G):
    """
    For each duplicate edge make a dummy node.
    TODO: preserve data, keys, and directedness
    """
    import utool as ut
    edge_list = list(G.edges())
    node_list = list(G.nodes())
    dupitem_to_idx = ut.find_duplicate_items(edge_list)
    node_to_freq = ut.ddict(lambda: 0)
    remove_idxs = ut.flatten(dupitem_to_idx.values())
    ut.delete_items_by_index(edge_list, remove_idxs)

    for dup_edge in dupitem_to_idx.keys():
        freq = len(dupitem_to_idx[dup_edge])
        u, v = dup_edge[0:2]
        pair_node = dup_edge
        pair_nodes = [pair_node + tuple([count]) for count in range(freq)]
        for pair_node in pair_nodes:
            node_list.append(pair_node)
            for node in dup_edge:
                node_to_freq[node] += freq
            edge_list.append((u, pair_node))
            edge_list.append((pair_node, v))

    import networkx as nx
    G2 = nx.DiGraph()
    G2.add_edges_from(edge_list)
    G2.add_nodes_from(node_list)
    return G2
Example #4
0
def nx_to_adj_dict(graph):
    import utool as ut
    adj_dict = ut.ddict(list)
    for u, edges in graph.adjacency():
        adj_dict[u].extend(list(edges.keys()))
    adj_dict = dict(adj_dict)
    return adj_dict
    def mcc_hack():
        sample_weight = np.ones(len(self.samples), dtype=np.int)
        task_mccs = ut.ddict(dict)
        # Determine threshold levels per score type
        score_to_order = {}
        for scoretype in score_dict.keys():
            y_score = score_dict[scoretype].values
            sortx = np.argsort(y_score, kind='mergesort')[::-1]
            y_score = y_score[sortx]
            distinct_value_indices = np.where(np.diff(y_score))[0]
            threshold_idxs = np.r_[distinct_value_indices, y_score.size - 1]
            thresh = y_score[threshold_idxs]
            score_to_order[scoretype] = (sortx, y_score, thresh)

        classes_ = np.array([0, 1], dtype=np.int)
        for task in task_list:
            labels = self.samples.subtasks[task]
            for sublabels in labels.gen_one_vs_rest_labels():
                for scoretype in score_dict.keys():
                    sortx, y_score, thresh = score_to_order[scoretype]
                    y_true = sublabels.y_enc[sortx]
                    mcc = -np.inf
                    for t in thresh:
                        y_pred = (y_score > t).astype(np.int)
                        C1 = quick_cm(y_true, y_pred, classes_, sample_weight)
                        mcc1 = quick_mcc(C1)
                        if mcc1 < 0:
                            C2 = quick_cm(y_true, 1 - y_pred, classes_, sample_weight)
                            mcc1 = quick_mcc(C2)
                        mcc = max(mcc1, mcc)
                    # logger.info('mcc = %r' % (mcc,))
                    task_mccs[sublabels.task_name][scoretype] = mcc
        return task_mccs
Example #6
0
def cheetah_stats(ibs):
    filters = [
        dict(view=['right', 'frontright', 'backright'], minqual='good'),
        dict(view=['right', 'frontright', 'backright']),
    ]
    for filtkw in filters:
        annots = ibs.annots(ibs.filter_annots_general(**filtkw))
        unique_nids, grouped_annots = annots.group(annots.nids)
        annots_per_name = ut.lmap(len, grouped_annots)
        annots_per_name_freq = ut.dict_hist(annots_per_name)
        def bin_mapper(num):
            if num < 5:
                return (num, num + 1)
            else:
                for bin, mod in [(20, 5), (50, 10)]:
                    if num < bin:
                        low = (num // mod) * mod
                        high = low + mod
                        return (low, high)
                if num >= bin:
                    return (bin, None)
                else:
                    assert False, str(num)
        hist = ut.ddict(lambda: 0)
        for num in annots_per_name:
            hist[bin_mapper(num)] += 1
        hist = ut.sort_dict(hist)

        print('------------')
        print('filters = %s' % ut.repr4(filtkw))
        print('num_annots = %r' % (len(annots)))
        print('num_names = %r' % (len(unique_nids)))
        print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq)))
        print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist)))
        assert sum(hist.values()) == len(unique_nids)
Example #7
0
 def trnps_(dict_list):
     """ tranpose dict list """
     list_dict = ut.ddict(list)
     for dict_ in dict_list:
         for key, val in dict_.items():
             list_dict[key + '_list'].append(val)
     return list_dict
Example #8
0
 def trnps_(dict_list):
     """ tranpose dict list """
     list_dict = ut.ddict(list)
     for dict_ in dict_list:
         for key, val in dict_.items():
             list_dict[key + '_list'].append(val)
     return list_dict
Example #9
0
def clean_line_profile_text(text):
    """
    Sorts the output from line profile by execution time
    Removes entries which were not run
    """
    #
    # Split the file into blocks along delimters and and put delimeters back in the list
    delim = 'Total time: '
    #delim = 'File: '
    list_ = utool.regex_split('^' + delim, text)
    for ix in range(1, len(list_)):
        list_[ix] = delim + list_[ix]
    #
    # Build a map from times to line_profile blocks
    prefix_list = []
    timemap = utool.ddict(list)
    for ix in range(len(list_)):
        block = list_[ix]
        total_time = get_block_totaltime(block)
        # Blocks without time go at the front of sorted output
        if total_time is None:
            prefix_list.append(block)
        # Blocks that are not run are not appended to output
        elif total_time != 0:
            timemap[total_time].append(block)
    # Sort the blocks by time
    sorted_lists = sorted(six.iteritems(timemap), key=operator.itemgetter(0))
    newlist = prefix_list[:]
    for key, val in sorted_lists:
        newlist.extend(val)
    # Rejoin output text
    output_text = '\n'.join(newlist)
    return output_text
Example #10
0
def build_dpath_to_fidx(fpath_list, fidx_list, root_dpath):
    dpath_to_fidx = ut.ddict(list)
    nTotal = len(fpath_list)
    _iter = zip(fidx_list, fpath_list)
    dpath_to_fidx = ut.ddict(list)
    for fidx, fpath in ut.ProgIter(_iter,
                                   'making dpath fidx map',
                                   freq=50000,
                                   nTotal=nTotal):
        current_path = fpath
        while True:
            current_path = dirname(current_path)
            dpath_to_fidx[current_path].append(fidx)
            if current_path == root_dpath:
                break
    return dpath_to_fidx
Example #11
0
    def make_header(tblname):
        """
        Args:
            table_name - the internal table name
        """
        tblnice    = TABLE_NICE[tblname]
        colnames   = TABLE_COLNAMES[tblname]
        editset    = TABLE_EDITSET[tblname]
        tblgetters = getters[tblname]
        tblsetters = setters[tblname]
        #if levels aren't found, we're not dealing with a tree, so everything is at level 0
        collevel_dict = TABLE_TREE_LEVELS.get(tblname, ut.ddict(lambda: 0))
        collevels  = [collevel_dict[colname] for colname in colnames]
        hiddencols = TABLE_HIDDEN_LIST.get(tblname, [False for _ in range(len(colnames))])
        numstripes = TABLE_STRIPE_LIST.get(tblname, 1)

        colwidths_dict = widths.get(tblname, {})
        colwidths = [colwidths_dict.get(colname, 100) for colname in colnames]

        def get_column_data(colname):
            try:
                coldef_tup = COL_DEF[colname]
                coltype, colnice = coldef_tup
            except KeyError as ex:
                strict = False
                ut.printex(ex, 'Need to add type info for colname=%r to COL_DEF'
                           % colname, iswarning=not strict)
                if strict:
                    raise
                else:
                    # default coldef to give a string type and nice=colname
                    coltype, colnice = (str, colname)
            coledit   = colname in editset
            colgetter = tblgetters[colname]
            colsetter = None if not coledit else tblsetters.get(colname, None)
            return (coltype, colnice, coledit, colgetter, colsetter)
        try:
            _tuplist = list(zip(*list(map(get_column_data, colnames))))
            (coltypes, colnices, coledits, colgetters, colsetters) = _tuplist
        except KeyError as ex:
            ut.printex(ex,  key_list=['tblname', 'colnames'])
            raise
        header = {
            'name'            : tblname,
            'nice'            : tblnice,
            'iders'           : iders[tblname],
            'col_name_list'   : colnames,
            'col_type_list'   : coltypes,
            'col_nice_list'   : colnices,
            'col_edit_list'   : coledits,
            'col_getter_list' : colgetters,
            'col_setter_list' : colsetters,
            'col_level_list'  : collevels,
            'col_hidden_list' : hiddencols,
            'num_duplicates'  : numstripes,
            'get_thumb_size'  : lambda: ibs.cfg.other_cfg.thumb_size,
            'col_width_list'  : colwidths,  # TODO
        }
        return header
Example #12
0
    def make_header(tblname):
        """
        Args:
            table_name - the internal table name
        """
        tblnice    = TABLE_NICE[tblname]
        colnames   = TABLE_COLNAMES[tblname]
        editset    = TABLE_EDITSET[tblname]
        tblgetters = getters[tblname]
        tblsetters = setters[tblname]
        #if levels aren't found, we're not dealing with a tree, so everything is at level 0
        collevel_dict = TABLE_TREE_LEVELS.get(tblname, ut.ddict(lambda: 0))
        collevels  = [collevel_dict[colname] for colname in colnames]
        hiddencols = TABLE_HIDDEN_LIST.get(tblname, [False for _ in range(len(colnames))])
        numstripes = TABLE_STRIPE_LIST.get(tblname, 1)

        colwidths_dict = widths.get(tblname, {})
        colwidths = [colwidths_dict.get(colname, 100) for colname in colnames]

        def get_column_data(colname):
            try:
                coldef_tup = COL_DEF[colname]
                coltype, colnice = coldef_tup
            except KeyError as ex:
                strict = False
                ut.printex(ex, 'Need to add type info for colname=%r to COL_DEF'
                           % colname, iswarning=not strict)
                if strict:
                    raise
                else:
                    # default coldef to give a string type and nice=colname
                    coltype, colnice = (str, colname)
            coledit   = colname in editset
            colgetter = tblgetters[colname]
            colsetter = None if not coledit else tblsetters.get(colname, None)
            return (coltype, colnice, coledit, colgetter, colsetter)
        try:
            _tuplist = list(zip(*list(map(get_column_data, colnames))))
            (coltypes, colnices, coledits, colgetters, colsetters) = _tuplist
        except KeyError as ex:
            ut.printex(ex,  key_list=['tblname', 'colnames'])
            raise
        header = {
            'name'            : tblname,
            'nice'            : tblnice,
            'iders'           : iders[tblname],
            'col_name_list'   : colnames,
            'col_type_list'   : coltypes,
            'col_nice_list'   : colnices,
            'col_edit_list'   : coledits,
            'col_getter_list' : colgetters,
            'col_setter_list' : colsetters,
            'col_level_list'  : collevels,
            'col_hidden_list' : hiddencols,
            'num_duplicates'  : numstripes,
            'get_thumb_size'  : lambda: ibs.cfg.other_cfg.thumb_size,
            'col_width_list'  : colwidths,  # TODO
        }
        return header
Example #13
0
def init_tablecache():
    r"""
    Returns:
       defaultdict: tablecache

    CommandLine:
        python -m ibeis.control.accessor_decors --test-init_tablecache

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.accessor_decors import *  # NOQA
        >>> result = init_tablecache()
        >>> print(result)
    """
    # 4 levels of dictionaries
    # tablename, colname, kwargs, and then rowids
    tablecache = ut.ddict(lambda: ut.ddict(lambda: ut.ddict(dict)))
    return tablecache
Example #14
0
def build_multindex(list_):
    """
    Creates mapping from unique items to indicies at which they appear
    """
    multiindex_dict_ = ut.ddict(list)
    for item, index in zip(list_, range(len(list_))):
        if item is not None:
            multiindex_dict_[item].append(index)
    return multiindex_dict_
Example #15
0
def build_multindex(list_):
    """
    Creates mapping from unique items to indicies at which they appear
    """
    multiindex_dict_ = ut.ddict(list)
    for item, index in zip(list_, range(len(list_))):
        if item is not None:
            multiindex_dict_[item].append(index)
    return multiindex_dict_
Example #16
0
def init_tablecache():
    r"""
    Returns:
       defaultdict: tablecache

    CommandLine:
        python -m ibeis.control.accessor_decors --test-init_tablecache

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.accessor_decors import *  # NOQA
        >>> result = init_tablecache()
        >>> print(result)
    """
    # 4 levels of dictionaries
    # tablename, colname, kwargs, and then rowids
    tablecache = ut.ddict(lambda: ut.ddict(lambda: ut.ddict(dict)))
    return tablecache
Example #17
0
def accumulate_scores(dscores_list, daids_list):
    """ helper to accumulate grouped scores for database annotations """
    daid2_aggscore = utool.ddict(lambda: 0)
    ### Weirdly iflatten was slower here
    for dscores, daids in zip(dscores_list, daids_list):
        for daid, score in zip(daids, dscores):
            daid2_aggscore[daid] += score
    daid_agg_keys   = np.array(list(daid2_aggscore.keys()))
    daid_agg_scores = np.array(list(daid2_aggscore.values()))
    return daid_agg_keys, daid_agg_scores
Example #18
0
def accumulate_scores(dscores_list, daids_list):
    """ helper to accumulate grouped scores for database annotations """
    daid2_aggscore = utool.ddict(lambda: 0)
    ### Weirdly iflatten was slower here
    for dscores, daids in zip(dscores_list, daids_list):
        for daid, score in zip(daids, dscores):
            daid2_aggscore[daid] += score
    daid_agg_keys = np.array(list(daid2_aggscore.keys()))
    daid_agg_scores = np.array(list(daid2_aggscore.values()))
    return daid_agg_keys, daid_agg_scores
Example #19
0
def filter_duplicate_acfgs(expanded_aids_list, acfg_list, acfg_name_list, verbose=None):
    """
    Removes configs with the same expanded aids list

    CommandLine:
        # The following will trigger this function:
        wbia -m wbia get_annotcfg_list:0 -a timectrl timectrl:view=left --db PZ_MTEST

    """
    from wbia.expt import annotation_configs

    if verbose is None:
        verbose = ut.VERBOSE
    acfg_list_ = []
    expanded_aids_list_ = []
    seen_ = ut.ddict(list)
    for acfg, (qaids, daids) in zip(acfg_list, expanded_aids_list):
        key = (ut.hashstr_arr27(qaids, 'qaids'), ut.hashstr_arr27(daids, 'daids'))
        if key in seen_:
            seen_[key].append(acfg)
            continue
        else:
            seen_[key].append(acfg)
            expanded_aids_list_.append((qaids, daids))
            acfg_list_.append(acfg)
    if verbose:
        duplicate_configs = dict(
            [(key_, val_) for key_, val_ in seen_.items() if len(val_) > 1]
        )
        if len(duplicate_configs) > 0:
            logger.info('The following configs produced duplicate annnotation configs')
            for key, val in duplicate_configs.items():
                # Print the difference between the duplicate configs
                _tup = annotation_configs.compress_acfg_list_for_printing(val)
                nonvaried_compressed_dict, varied_compressed_dict_list = _tup
                logger.info('+--')
                logger.info('key = %r' % (key,))
                logger.info(
                    'duplicate_varied_cfgs = %s'
                    % (ut.repr2(varied_compressed_dict_list),)
                )
                logger.info(
                    'duplicate_nonvaried_cfgs = %s'
                    % (ut.repr2(nonvaried_compressed_dict),)
                )
                logger.info('L__')

        if verbose >= 1:
            logger.info(
                '[harn.help] parsed %d / %d unique annot configs'
                % (len(acfg_list_), len(acfg_list))
            )
        if verbose > 2:
            logger.info('[harn.help] parsed from: %r' % (acfg_name_list,))
    return expanded_aids_list_, acfg_list_
Example #20
0
 def __init__(self, rowids, obj1d, cache=None):
     self._rowids = list(rowids)
     self._obj1d = obj1d
     self._unique_rowids = set(self._rowids)
     self._unique_inverse = ut.list_alignment(self._unique_rowids, self._rowids)
     if cache is None:
         self._cache = ut.ddict(dict)
     else:
         self._cache = cache
     # Views always cache data for now
     self._caching = True
Example #21
0
    def sub(self, other):
        """
        CommandLine:
            python -m mtgmonte.mtgobjs --exec-ManaSet.sub:0
            python -m mtgmonte.mtgobjs --exec-ManaSet.sub:1

        Example:
            >>> # ENABLE_DOCTEST
            >>> from mtgmonte.mtgobjs import *
            >>> from mtgmonte import mtgobjs
            >>> self = mtgobjs.ManaSet('RRRUC')
            >>> other = mtgobjs.ManaSet('RRU')
            >>> mana = self - other
            >>> result = ('mana = %s' % (mana,))
            >>> print(result)
            mana = {RC}

        Example:
            >>> # ENABLE_DOCTEST
            >>> from mtgmonte.mtgobjs import *  # NOQA
            >>> self = ManaSet(['WWURC'])
            >>> other = ManaCost([('W', 'colored'), ('W', 'colored'), ('U', 'colored'), ('1', 'uncolored')])
            >>> mana = self - other
            >>> result = ('mana = %s' % (mana,))
            >>> print(result)
            mana = {R}
        """
        if isinstance(other, ManaCost):
            colored_cost = other.colored.to_manaset()
            remainder1 = self.sub(colored_cost)
            color2_remain = remainder1.get_colordict()
            uncolored_need = other.num_uncolored
            # TODO: value different colors differently for payment
            if uncolored_need > 0:
                for color in list(color2_remain.keys()):
                    using = min(uncolored_need, color2_remain[color])
                    color2_remain[color] -= using
                    uncolored_need -= using
            if uncolored_need > 0:
                raise NotEnoughManaError('Cannot subtract more mana from less')
            # Todo hybrid / phyrexian
        else:
            color2_need = ut.dict_hist(other._manas)
            color2_remain = ut.ddict(lambda: 0, ut.dict_hist(self._manas))
            for color, num_need in color2_need.items():
                num_have = color2_remain[color]
                if num_have < num_need:
                    raise NotEnoughManaError('Cannot subtract more mana from less')
                color2_remain[color] -= num_need
        color2_remain = delete_dict_zeros(color2_remain)
        remainder = ManaSet(color2_remain)
        return remainder
Example #22
0
def search_env_paths(fname, key_list=None, verbose=None):
    r"""
    Searches your PATH to see if fname exists

    Args:
        fname (str): file name to search for (can be glob pattern)

    CommandLine:
        python -m utool search_env_paths --fname msvcr*.dll
        python -m utool search_env_paths --fname '*flann*'

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_cplat import *  # NOQA
        >>> import utool as ut
        >>> fname = 'opencv2/highgui/libopencv_highgui.so'
        >>> fname = ut.get_argval('--fname', default='*')
        >>> print('fname = %r' % (fname,))
        >>> key_list = None # ['PATH']
        >>> found = search_env_paths(fname, key_list)
        >>> print(ut.repr4(found, nl=True, strvals=True))

    Ignore:
        OpenCV_DIR:PATH={share_opencv}
        OpenCV_CONFIG_PATH:FILEPATH={share_opencv}

    """
    import utool as ut
    # from os.path import join
    if key_list is None:
        key_list = [key for key in os.environ if key.find('PATH') > -1]
        print('key_list = %r' % (key_list,))

    found = ut.ddict(list)

    for key in key_list:
        dpath_list = os.environ[key].split(os.pathsep)
        for dpath in dpath_list:
            #if verbose:
            #    print('dpath = %r' % (dpath,))
            # testname = join(dpath, fname)
            matches = ut.glob(dpath, fname)
            found[key].extend(matches)
            #import fnmatch
            #import utool
            #utool.embed()
            #if ut.checkpath(testname, verbose=False):
            #    if verbose:
            #        print('Found in key=%r' % (key,))
            #        ut.checkpath(testname, verbose=True, info=True)
            #    found += [testname]
    return dict(found)
Example #23
0
    def _get_cm_agg_aid_ranking(infr, cc):
        aid_to_cm = {cm.qaid: cm for cm in infr.cm_list}
        all_scores = ut.ddict(list)
        for qaid in cc:
            cm = aid_to_cm[qaid]
            # should we be doing nids?
            for daid, score in zip(cm.get_top_aids(), cm.get_top_scores()):
                all_scores[daid].append(score)

        max_scores = sorted(
            (max(scores), aid) for aid, scores in all_scores.items())[::-1]
        ranked_aids = ut.take_column(max_scores, 1)
        return ranked_aids
Example #24
0
 def find_pretrained(self):
     import glob
     import parse
     fname_fmt = self.fname_fmtstr + '.cPkl'
     task_clf_candidates = ut.ddict(list)
     globstr = self.fname_parts[0] + '.*.cPkl'
     for fpath in glob.iglob(join(self.dpath, globstr)):
         fname = basename(fpath)
         result = parse.parse(fname_fmt, fname)
         if result:
             task_key = result.named['task_key']
             task_clf_candidates[task_key].append(fpath)
     return task_clf_candidates
def get_photobomber_map(ibs, aids, aid_to_nid=None):
    """
    Builds map of which names that photobomb other names.

    python -m wbia.gui.id_review_api --test-test_review_widget --show --db PZ_MTEST -a default:qindex=0

    >>> import wbia
    >>> dbdir = ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
    >>> ibs = wbia.opendb(dbdir='/home/joncrall/lev/media/danger/GGR/GGR-IBEIS')
    >>> filter_kw = {
    >>>     'multiple': False,
    >>>     'minqual': 'good',
    >>>     'is_known': True,
    >>>     'min_pername': 2,
    >>>     'view': ['right'],
    >>> }
    >>> aids = ibs.filter_annots_general(ibs.get_valid_aids(), filter_kw=filter_kw)
    """
    ams_list = ibs.get_annotmatch_rowids_from_aid(aids)
    flags_list = ibs.unflat_map(
        ut.partial(ibs.get_annotmatch_prop, 'Photobomb'), ams_list)
    pb_ams = ut.zipcompress(ams_list, flags_list)
    has_pb_ams = [len(ams) > 0 for ams in pb_ams]
    pb_ams_ = ut.compress(pb_ams, has_pb_ams)
    # aids_ = ut.compress(aids, has_pb_ams)
    pb_ams_flat = ut.flatten(pb_ams_)

    pb_aids1_ = ibs.get_annotmatch_aid1(pb_ams_flat)
    pb_aids2_ = ibs.get_annotmatch_aid2(pb_ams_flat)

    pb_aid_pairs_ = list(zip(pb_aids1_, pb_aids2_))
    if aid_to_nid is None:
        pb_nid_pairs_ = ibs.unflat_map(ibs.get_annot_nids, pb_aid_pairs_)
    else:
        pb_nid_pairs_ = ibs.unflat_map(ut.partial(ut.take, aid_to_nid),
                                       pb_aid_pairs_)

    # invalid_aid_map = ut.ddict(set)
    # for aid1, aid2 in pb_aid_pairs_:
    #    if aid1 != aid2:
    #        invalid_aid_map[aid1].add(aid2)
    #        invalid_aid_map[aid2].add(aid1)

    invalid_nid_map = ut.ddict(set)
    for nid1, nid2 in pb_nid_pairs_:
        if nid1 != nid2:
            invalid_nid_map[nid1].add(nid2)
            invalid_nid_map[nid2].add(nid1)

    return invalid_nid_map
Example #26
0
def search_env_paths(fname, key_list=None, verbose=None):
    r"""
    Searches your PATH to see if fname exists

    Args:
        fname (str): file name to search for (can be glob pattern)

    CommandLine:
        python -m utool search_env_paths --fname msvcr*.dll

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_cplat import *  # NOQA
        >>> import utool as ut
        >>> fname = 'opencv2/highgui/libopencv_highgui.so'
        >>> fname = ut.get_argval('--fname', default='*')
        >>> key_list = ['PATH']
        >>> found = search_env_paths(fname, key_list)
        >>> print(ut.dict_str(found, nl=True, strvals=True))

    Ignore:
        OpenCV_DIR:PATH={share_opencv}
        OpenCV_CONFIG_PATH:FILEPATH={share_opencv}

    """
    import utool as ut
    # from os.path import join
    if key_list is None:
        key_list = [key for key in os.environ if key.find('PATH') > -1]

    found = ut.ddict(list)

    for key in key_list:
        dpath_list = os.environ[key].split(os.pathsep)
        for dpath in dpath_list:
            #if verbose:
            #    print('dpath = %r' % (dpath,))
            # testname = join(dpath, fname)
            matches = ut.glob(dpath, fname)
            found[key].extend(matches)
            #import fnmatch
            #import utool
            #utool.embed()
            #if ut.checkpath(testname, verbose=False):
            #    if verbose:
            #        print('Found in key=%r' % (key,))
            #        ut.checkpath(testname, verbose=True, info=True)
            #    found += [testname]
    return dict(found)
Example #27
0
 def get_annot_age_stats(aid_list):
     annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
     annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
     age_dict = ut.ddict((lambda : 0))
     for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
         if (min_age is None or min_age < 12) and max_age < 12:
             age_dict['Infant'] += 1
         elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
             age_dict['Juvenile'] += 1
         elif 36 <= min_age and (36 <= max_age or max_age is None):
             age_dict['Adult'] += 1
         else:
             print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
             age_dict['UNKNOWN'] += 1
     return age_dict
Example #28
0
File: smk1.py Project: whaozl/ibeis
def compute_data_gamma_(invindex, use_cache=True):
    """
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> with_internals = True
    >>> invindex = index_data_annots(annots_df, daids, words, with_internals)
    >>> daid2_gamma = compute_data_gamma_(invindex, use_cache=True)
    """
    cache_key = utool.hashstr(invindex.get_cfgstr())
    if use_cache:
        try:
            daid2_gamma = utool.global_cache_read(cache_key, appname='smk')
            #print('gamma_dbg cache hit')
            return daid2_gamma
        except Exception:
            pass

    # Gropuing by aid and words

    mark, end_ = utool.log_progress(('gamma grouping %s ' % (cache_key, )),
                                    invindex.wx2_drvecs.shape[0],
                                    flushfreq=100)
    daid2_wx2_drvecs = utool.ddict(dict)
    for count, wx in enumerate(invindex.wx2_drvecs.index):
        if count % 100 == 0:
            mark(wx)
        group = invindex.wx2_drvecs[wx].groupby(invindex.idx2_daid)
        for daid, vecs in group:
            daid2_wx2_drvecs[daid][wx] = vecs.values
    end_()

    # Summation over words for each aid
    mark, end_ = utool.log_progress('gamma summation ',
                                    len(daid2_wx2_drvecs),
                                    flushfreq=100)
    daid2_gamma = pd.Series(np.zeros(invindex.daids.shape[0]),
                            index=invindex.daids,
                            name='gamma')
    wx2_weight = invindex.wx2_weight
    for count, (daid,
                wx2_drvecs) in enumerate(six.iteritems(daid2_wx2_drvecs)):
        if count % 100 == 0:
            mark(count)
        wx2_rvecs = wx2_drvecs
        daid2_gamma[daid] = gamma_summation(wx2_rvecs, wx2_weight)
    utool.global_cache_write(cache_key, daid2_gamma, appname='smk')
    return daid2_gamma
Example #29
0
def parse_timemap_from_blocks(profile_block_list):
    """
    Build a map from times to line_profile blocks
    """
    prefix_list = []
    timemap = ut.ddict(list)
    for ix in range(len(profile_block_list)):
        block = profile_block_list[ix]
        total_time = get_block_totaltime(block)
        # Blocks without time go at the front of sorted output
        if total_time is None:
            prefix_list.append(block)
        # Blocks that are not run are not appended to output
        elif total_time != 0:
            timemap[total_time].append(block)
    return prefix_list, timemap
Example #30
0
def parse_timemap_from_blocks(profile_block_list):
    """
    Build a map from times to line_profile blocks
    """
    prefix_list = []
    timemap = ut.ddict(list)
    for ix in range(len(profile_block_list)):
        block = profile_block_list[ix]
        total_time = get_block_totaltime(block)
        # Blocks without time go at the front of sorted output
        if total_time is None:
            prefix_list.append(block)
        # Blocks that are not run are not appended to output
        elif total_time != 0:
            timemap[total_time].append(block)
    return prefix_list, timemap
Example #31
0
def compute_data_gamma_(invindex, use_cache=True):
    """
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> with_internals = True
    >>> invindex = index_data_annots(annots_df, daids, words, with_internals)
    >>> daid2_gamma = compute_data_gamma_(invindex, use_cache=True)
    """
    cache_key = utool.hashstr(invindex.get_cfgstr())
    if use_cache:
        try:
            daid2_gamma = utool.global_cache_read(cache_key, appname='smk')
            #print('gamma_dbg cache hit')
            return daid2_gamma
        except Exception:
            pass

    # Gropuing by aid and words

    mark, end_ = utool.log_progress(('gamma grouping %s ' % (cache_key,)),
                                    invindex.wx2_drvecs.shape[0],
                                    flushfreq=100)
    daid2_wx2_drvecs = utool.ddict(dict)
    for count, wx in enumerate(invindex.wx2_drvecs.index):
        if count % 100 == 0:
            mark(wx)
        group  = invindex.wx2_drvecs[wx].groupby(invindex.idx2_daid)
        for daid, vecs in group:
            daid2_wx2_drvecs[daid][wx] = vecs.values
    end_()

    # Summation over words for each aid
    mark, end_ = utool.log_progress('gamma summation ', len(daid2_wx2_drvecs),
                                    flushfreq=100)
    daid2_gamma = pd.Series(
        np.zeros(invindex.daids.shape[0]),
        index=invindex.daids,
        name='gamma')
    wx2_weight = invindex.wx2_weight
    for count, (daid, wx2_drvecs) in enumerate(six.iteritems(daid2_wx2_drvecs)):
        if count % 100 == 0:
            mark(count)
        wx2_rvecs = wx2_drvecs
        daid2_gamma[daid] = gamma_summation(wx2_rvecs, wx2_weight)
    utool.global_cache_write(cache_key, daid2_gamma, appname='smk')
    return daid2_gamma
Example #32
0
    def analyze_internal_duplicats(self):
        multis = self.find_internal_duplicates()
        unique_dnames = set([])
        associations = ut.ddict(lambda: 0)

        # diag_dups = []
        # other_dups = []

        for sub in multis.group_items('hash').values():
            dnames = sub['dname']
            unique_dnames.update(dnames)
            for dn1, dn2 in ut.combinations(dnames, 2):
                # if dn1 == dn2:
                #     diag_dups[dn1] += 1
                key = tuple(sorted([dn1, dn2]))
                associations[key] += 1

            print(sub['dname'])
Example #33
0
    def analyze_internal_duplicats(self):
        multis = self.find_internal_duplicates()
        unique_dnames = set([])
        associations = ut.ddict(lambda: 0)

        # diag_dups = []
        # other_dups = []

        for sub in multis.group_items('hash').values():
            dnames = sub['dname']
            unique_dnames.update(dnames)
            for dn1, dn2 in ut.combinations(dnames, 2):
                # if dn1 == dn2:
                #     diag_dups[dn1] += 1
                key = tuple(sorted([dn1, dn2]))
                associations[key] += 1

            print(sub['dname'])
Example #34
0
    def make_header(tblname):
        """
        Input:
            table_name - the internal table name
        """
        tblnice    = TABLE_NICE[tblname]
        colnames   = TABLE_COLNAMES[tblname]
        editset    = TABLE_EDITSET[tblname]
        tblgetters = getters[tblname]
        tblsetters = setters[tblname]
        #if levels aren't found, we're not dealing with a tree, so everything is at level 0
        collevel_dict = TABLE_TREE_LEVELS.get(tblname, utool.ddict(lambda: 0))
        collevels  = [collevel_dict[colname] for colname in colnames]
        hiddencols = TABLE_HIDDEN_LIST.get(tblname, [False for _ in xrange(len(colnames))])
        numstripes = TABLE_STRIPE_LIST.get(tblname, 1)

        def get_column_data(colname):
            coltype   = COL_DEF[colname][0]
            colnice   = COL_DEF[colname][1]
            coledit   = colname in editset
            colgetter = tblgetters[colname]
            colsetter = None if not coledit else tblsetters.get(colname, None)
            return (coltype, colnice, coledit, colgetter, colsetter)
        try:
            (coltypes, colnices, coledits, colgetters, colsetters) = zip(*map(get_column_data, colnames))
        except KeyError as ex:
            utool.printex(ex,  key_list=['tblname', 'colnames'])
            raise
        header = {
            'name': tblname,
            'nice': tblnice,
            'iders': iders[tblname],
            'col_name_list': colnames,
            'col_type_list': coltypes,
            'col_nice_list': colnices,
            'col_edit_list': coledits,
            'col_getter_list': colgetters,
            'col_setter_list': colsetters,
            'col_level_list': collevels,
            'col_hidden_list' : hiddencols,
            'num_duplicates'  : numstripes,
        }
        return header
Example #35
0
def find_used_citations(tex_fpath_list, return_inverse=False):
    """
    fpaths = get_thesis_tex_fpaths()
    """
    citekey_list = []
    inverse = ut.ddict(list)
    for tex_fpath in tex_fpath_list:
        text = ut.read_from(tex_fpath)
        #print('\n\n+-----')
        local_cites = find_citations(text)
        citekey_list.extend(local_cites)
        for key in local_cites:
            inverse[key].append(tex_fpath)

    citekey_list = sorted(set(citekey_list))
    if return_inverse:
        return citekey_list, inverse
    else:
        return citekey_list
Example #36
0
def compute_idf_label1(aids_list, daid2_label):
    """
    One of our idf extensions

    Example:
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1()
        >>> wx_series = np.arange(len(invindex.words))
        >>> idx2_aid = invindex.idx2_daid
        >>> daid2_label = invindex.daid2_label
        >>> _ = helper_idf_wordgroup(wx2_idxs, idx2_aid, wx_series)
        >>> idxs_list, aids_list = _
        >>> wx2_idf = compute_idf_label1(wx_series, wx2_idxs, idx2_aid, daids)
    """
    nWords = len(aids_list)
    # Computes our novel label idf weight
    lblindex_list = np.array(ut.tuples_to_unique_scalars(daid2_label.values()))
    #daid2_lblindex = dict(zip(daid_list, lblindex_list))
    unique_lblindexes, groupxs = clustertool.group_indices(lblindex_list)
    daid_list = np.array(daid2_label.keys())
    daids_list = [daid_list.take(xs) for xs in groupxs]
    daid2_wxs = ut.ddict(list)
    for wx, daids in enumerate(aids_list):
        for daid in daids:
            daid2_wxs[daid].append(wx)
    lblindex2_daids = list(zip(unique_lblindexes, daids_list))
    nLabels = len(unique_lblindexes)
    pcntLblsWithWord = np.zeros(nWords, np.float64)
    # Get num times word appears for eachlabel
    for lblindex, daids in lblindex2_daids:
        nWordsWithLabel = np.zeros(nWords)
        for daid in daids:
            wxs = daid2_wxs[daid]
            nWordsWithLabel[wxs] += 1
        pcntLblsWithWord += (1 - nWordsWithLabel.astype(np.float64) / len(daids))

    # Labels for each word
    idf_list = np.log(np.divide(nLabels, np.add(pcntLblsWithWord, 1),
                                dtype=hstypes.FLOAT_TYPE),
                      dtype=hstypes.FLOAT_TYPE)
    return idf_list
Example #37
0
def filter_duplicate_acfgs(expanded_aids_list, acfg_list, acfg_name_list, verbose=ut.NOT_QUIET):
    """
    Removes configs with the same expanded aids list

    CommandLine:
        # The following will trigger this function:
        ibeis -e print_acfg -a timectrl timectrl:view=left --db PZ_MTEST

    """
    from ibeis.expt import annotation_configs
    acfg_list_ = []
    expanded_aids_list_ = []
    seen_ = ut.ddict(list)
    for acfg, (qaids, daids) in zip(acfg_list, expanded_aids_list):
        key = (ut.hashstr_arr27(qaids, 'qaids'), ut.hashstr_arr27(daids, 'daids'))
        if key in seen_:
            seen_[key].append(acfg)
            continue
        else:
            seen_[key].append(acfg)
            expanded_aids_list_.append((qaids, daids))
            acfg_list_.append(acfg)
    if verbose:
        duplicate_configs = dict(
            [(key_, val_) for key_, val_ in seen_.items() if len(val_) > 1])
        if len(duplicate_configs) > 0:
            print('The following configs produced duplicate annnotation configs')
            for key, val in duplicate_configs.items():
                # Print the semantic difference between the duplicate configs
                _tup = annotation_configs.compress_acfg_list_for_printing(val)
                nonvaried_compressed_dict, varied_compressed_dict_list = _tup
                print('+--')
                print('key = %r' % (key,))
                print('duplicate_varied_cfgs = %s' % (
                    ut.list_str(varied_compressed_dict_list),))
                print('duplicate_nonvaried_cfgs = %s' % (
                    ut.dict_str(nonvaried_compressed_dict),))
                print('L__')

        print('[harn.help] parsed %d / %d unique annot configs from: %r' % (
            len(acfg_list_), len(acfg_list), acfg_name_list))
    return expanded_aids_list_, acfg_list_
Example #38
0
def compute_idf_label1(aids_list, daid2_label):
    """
    One of our idf extensions

    Example:
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1()
        >>> wx_series = np.arange(len(invindex.words))
        >>> idx2_aid = invindex.idx2_daid
        >>> daid2_label = invindex.daid2_label
        >>> _ = helper_idf_wordgroup(wx2_idxs, idx2_aid, wx_series)
        >>> idxs_list, aids_list = _
        >>> wx2_idf = compute_idf_label1(wx_series, wx2_idxs, idx2_aid, daids)
    """
    nWords = len(aids_list)
    # Computes our novel label idf weight
    lblindex_list = np.array(ut.tuples_to_unique_scalars(daid2_label.values()))
    #daid2_lblindex = dict(zip(daid_list, lblindex_list))
    unique_lblindexes, groupxs = clustertool.group_indices(lblindex_list)
    daid_list = np.array(daid2_label.keys())
    daids_list = [daid_list.take(xs) for xs in groupxs]
    daid2_wxs = ut.ddict(list)
    for wx, daids in enumerate(aids_list):
        for daid in daids:
            daid2_wxs[daid].append(wx)
    lblindex2_daids = list(zip(unique_lblindexes, daids_list))
    nLabels = len(unique_lblindexes)
    pcntLblsWithWord = np.zeros(nWords, np.float64)
    # Get num times word appears for eachlabel
    for lblindex, daids in lblindex2_daids:
        nWordsWithLabel = np.zeros(nWords)
        for daid in daids:
            wxs = daid2_wxs[daid]
            nWordsWithLabel[wxs] += 1
        pcntLblsWithWord += (1 - nWordsWithLabel.astype(np.float64) / len(daids))

    # Labels for each word
    idf_list = np.log(np.divide(nLabels, np.add(pcntLblsWithWord, 1),
                                dtype=hstypes.FLOAT_TYPE),
                      dtype=hstypes.FLOAT_TYPE)
    return idf_list
Example #39
0
    def __init__(back, ibs=None):
        """ Creates GUIBackend object """
        QtCore.QObject.__init__(back)
        print('[back] MainWindowBackend.__init__()')
        back.ibs  = None
        back.cfg = None
        # State variables
        back.sel_aids = []
        back.sel_nids = []
        back.sel_gids = []
        back.sel_qres = []
        back.active_enc = 0
        back.query_mode = 'intra_encounter'
        back.encounter_query_results = utool.ddict(dict)

        # Create GUIFrontend object
        back.mainwin = newgui.IBEISMainWindow(back=back, ibs=ibs)
        back.front = back.mainwin.ibswgt
        back.ibswgt = back.front  # Alias
        # connect signals and other objects
        fig_presenter.register_qt4_win(back.mainwin)
Example #40
0
def invert_lists(aids, wx_lists, all_wxs=None):
    """
    takes corresponding lists of (aids, wxs) and maps wxs to aids

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.smk.smk_funcs import *  # NOQA
        >>> aids = [1, 2, 3]
        >>> wx_lists = [[0, 1], [20, 0, 1], [3]]
        >>> wx_to_aids = invert_lists(aids, wx_lists)
        >>> result = ('wx_to_aids = %s' % (ut.repr2(wx_to_aids),))
        >>> print(result)
        wx_to_aids = {0: [1, 2], 1: [1, 2], 3: [3], 20: [2]}
    """
    if all_wxs is None:
        wx_to_aids = ut.ddict(list)
    else:
        wx_to_aids = {wx: [] for wx in all_wxs}
    for aid, wxs in zip(aids, wx_lists):
        for wx in wxs:
            wx_to_aids[wx].append(aid)
    return wx_to_aids
Example #41
0
def prepare_annot_pairs(ibs, qaids, daids, qconfig2_, dconfig2_):
    # Prepare lazy attributes for annotations
    qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_)
    dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_)

    unique_qaids = set(qaids)
    unique_daids = set(daids)

    # Determine a unique set of annots per config
    configured_aids = ut.ddict(set)
    configured_aids[qannot_cfg].update(unique_qaids)
    configured_aids[dannot_cfg].update(unique_daids)

    # Make efficient annot-object representation
    configured_obj_annots = {}
    for config, aids in configured_aids.items():
        annots = ibs.annots(sorted(list(aids)), config=config)
        configured_obj_annots[config] = annots.view()

    # These annot views behave like annot objects
    # but they use the same internal cache
    annots1 = configured_obj_annots[qannot_cfg].view(qaids)
    annots2 = configured_obj_annots[dannot_cfg].view(daids)
    return annots1, annots2
Example #42
0
def match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid):
    """
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> invindex = index_data_annots(annots_df, daids, words)
    >>> qaid = qaids[0]
    >>> wx2_qfxs, wx2_qrvecs = compute_query_repr(annots_df, qaid, invindex)
    >>> daid2_totalscore = match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid)
    """
    _daids = invindex.daids
    idx2_daid = invindex.idx2_daid
    wx2_drvecs = invindex.wx2_drvecs
    wx2_weight = invindex.wx2_weight
    daid2_gamma = invindex.daid2_gamma

    wx2_rvecs = wx2_qrvecs
    query_gamma = gamma_summation(wx2_rvecs, wx2_weight)

    # Accumulate scores over the entire database
    daid2_aggscore = pd.Series(np.zeros(len(_daids)), index=_daids, name='total_score')
    common_wxs = set(wx2_qrvecs.keys()).intersection(set(wx2_drvecs.keys()))

    daid2_wx2_scoremat = utool.ddict(lambda: utool.ddict(list))

    # for each word compute the pairwise scores between matches
    mark, end = utool.log_progress('query word: ', len(common_wxs), flushfreq=100)
    for count, wx in enumerate(common_wxs):
        if count % 100 == 0:
            mark(count)
        # Query and database vectors for wx-th word
        qrvecs = wx2_qrvecs[wx]
        drvecs = wx2_drvecs[wx]
        # Word Weight
        weight = wx2_weight[wx]
        # Compute score matrix
        qfx2_wscore = Match_N(qrvecs, drvecs)
        qfx2_wscore.groupby(idx2_daid)
        # Group scores by database annotation ids
        group = qfx2_wscore.groupby(idx2_daid, axis=1)
        for daid, scoremat in group:
            daid2_wx2_scoremat[daid][wx] = scoremat
        #qfx2_wscore = pd.DataFrame(qfx2_wscore_, index=qfxs, columns=_idxs)
        daid2_wscore = weight * qfx2_wscore.sum(axis=0).groupby(idx2_daid).sum()
        daid2_aggscore = daid2_aggscore.add(daid2_wscore, fill_value=0)
    daid2_totalscore = daid2_aggscore * daid2_gamma * query_gamma
    end()

    daid_fm = {}
    daid_fs = {}
    daid_fk = {}
    mark, end = utool.log_progress('accumulating match info: ', len(daid2_wx2_scoremat), flushfreq=100)
    for count, item in enumerate(daid2_wx2_scoremat.items()):
        daid, wx2_scoremat = item
        if count % 25 == 0:
            mark(count)
        fm_accum = []
        fs_accum = []
        fk_accum = []
        for wx, scoremat in wx2_scoremat.iteritems():
            qfxs = scoremat.index
            dfxs = invindex.idx2_dfx[scoremat.columns]
            fm_ = np.vstack(np.dstack(np.meshgrid(qfxs, dfxs, indexing='ij')))
            fs_ = scoremat.values.flatten()
            lower_thresh = 0.01
            valid = [fs_ > lower_thresh]
            fm = fm_[valid]
            fs = fs_[valid]
            fk = np.ones(len(fm), dtype=np.int32)
            fm_accum.append(fm)
            fs_accum.append(fs)
            fk_accum.append(fk)
        daid_fm[daid] = np.vstack(fm_accum)
        daid_fs[daid] = np.hstack(fs_accum).T
        daid_fk[daid] = np.hstack(fk_accum).T
    chipmatch = (daid_fm, daid_fs, daid_fk,)

    daid2_totalscore.sort(axis=1, ascending=False)
    return daid2_totalscore, chipmatch
Example #43
0
    def find_latest_remote(self):
        """
        Used to update the published dict

        CommandLine:
            python -m wbia.algo.verif.vsone find_latest_remote

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.algo.verif.vsone import *  # NOQA
            >>> self = Deployer()
            >>> task_clf_names = self.find_latest_remote()
        """
        base_url = 'https://{remote}/public/models/pairclf'.format(
            **self.publish_info)
        import requests
        import bs4

        resp = requests.get(base_url)
        soup = bs4.BeautifulSoup(resp.text, 'html.parser')
        table = soup.findAll('table')[0]

        def parse_bs_table(table):
            n_columns = 0
            n_rows = 0
            column_names = []
            # Find number of rows and columns
            # we also find the column titles if we can
            for row in table.find_all('tr'):
                td_tags = row.find_all('td')
                if len(td_tags) > 0:
                    n_rows += 1
                    if n_columns == 0:
                        n_columns = len(td_tags)
                # Handle column names if we find them
                th_tags = row.find_all('th')
                if len(th_tags) > 0 and len(column_names) == 0:
                    for th in th_tags:
                        column_names.append(th.get_text())

            # Safeguard on Column Titles
            if len(column_names) > 0 and len(column_names) != n_columns:
                raise Exception(
                    'Column titles do not match the number of columns')
            columns = column_names if len(column_names) > 0 else range(
                0, n_columns)
            import pandas as pd

            df = pd.DataFrame(columns=columns, index=list(range(0, n_rows)))
            row_marker = 0
            for row in table.find_all('tr'):
                column_marker = 0
                columns = row.find_all('td')
                for column in columns:
                    df.iat[row_marker,
                           column_marker] = column.get_text().strip()
                    column_marker += 1
                if len(columns) > 0:
                    row_marker += 1
            return df

        df = parse_bs_table(table)
        # Find all available models
        df = df[df['Name'].map(lambda x: x.endswith('.cPkl'))]
        # df = df[df['Last modified'].map(len) > 0]

        fname_fmt = self.fname_fmtstr + '.cPkl'
        task_clf_candidates = ut.ddict(list)
        import parse

        for idx, row in df.iterrows():
            fname = basename(row['Name'])
            result = parse.parse(fname_fmt, fname)
            if result:
                task_key = result.named['task_key']
                species = result.named['species']
                task_clf_candidates[(species, task_key)].append(idx)

        task_clf_fnames = ut.ddict(dict)
        for key, idxs in task_clf_candidates.items():
            species, task_key = key
            # Find the classifier most recently created
            max_idx = ut.argmax(df.loc[idxs]['Last modified'].tolist())
            fname = df.loc[idxs[max_idx]]['Name']
            task_clf_fnames[species][task_key] = fname

        logger.info('published = ' + ut.repr2(task_clf_fnames, nl=2))
        return task_clf_fnames
Example #44
0
def latex_dbstats(ibs_list, **kwargs):
    r"""
    Args:
        ibs (IBEISController):  ibeis controller object

    CommandLine:
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist testdb1 --show
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 testdb1 --show
        python -m ibeis.other.dbinfo --exec-latex_dbstats --dblist PZ_Master0 PZ_MTEST GZ_ALL --show
        python -m ibeis.other.dbinfo --test-latex_dbstats --dblist GZ_ALL NNP_MasterGIRM_core --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> db_list = ut.get_argval('--dblist', type_=list, default=['testdb1'])
        >>> ibs_list = [ibeis.opendb(db=db) for db in db_list]
        >>> tabular_str = latex_dbstats(ibs_list)
        >>> tabular_cmd = ut.latex_newcommand(ut.latex_sanitize_command_name('DatabaseInfo'), tabular_str)
        >>> ut.copy_text_to_clipboard(tabular_cmd)
        >>> write_fpath = ut.get_argval('--write', type_=str, default=None)
        >>> if write_fpath is not None:
        >>>     fpath = ut.truepath(write_fpath)
        >>>     text = ut.readfrom(fpath)
        >>>     new_text = ut.replace_between_tags(text, tabular_cmd, '% <DBINFO>', '% </DBINFO>')
        >>>     ut.writeto(fpath, new_text)
        >>> ut.print_code(tabular_cmd, 'latex')
        >>> ut.quit_if_noshow()
        >>> ut.render_latex_text('\\noindent \n' + tabular_str)
    """

    import ibeis
    # Parse for aids test data
    aids_list = [ibeis.testdata_aids(ibs=ibs) for ibs in ibs_list]

    #dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False) for ibs in ibs_list]
    dbinfo_list = [get_dbinfo(ibs, with_contrib=False, verbose=False, aid_list=aids)
                   for ibs, aids in zip(ibs_list, aids_list)]

    #title = db_name + ' database statistics'
    title = 'Database statistics'
    stat_title = '# Annotations per name (multiton)'

    #col_lbls = [
    #    'multiton',
    #    #'singleton',
    #    'total',
    #    'multiton',
    #    'singleton',
    #    'total',
    #]
    key_to_col_lbls = {
        'num_names_multiton':   'multiton',
        'num_names_singleton':  'singleton',
        'num_names':            'total',

        'num_multiton_annots':  'multiton',
        'num_singleton_annots': 'singleton',
        'num_unknown_annots':   'unknown',
        'num_annots':           'total',
    }
    # Structure of columns / multicolumns
    multi_col_keys = [
        ('# Names', (
            'num_names_multiton',
            #'num_names_singleton',
            'num_names',
        )),

        ('# Annots', (
            'num_multiton_annots',
            'num_singleton_annots',
            #'num_unknown_annots',
            'num_annots')),
    ]
    #multicol_lbls = [('# Names', 3), ('# Annots', 3)]
    multicol_lbls = [(mcolname, len(mcols)) for mcolname, mcols in multi_col_keys]

    # Flatten column labels
    col_keys = ut.flatten(ut.get_list_column(multi_col_keys, 1))
    col_lbls = ut.dict_take(key_to_col_lbls, col_keys)

    row_lbls   = []
    row_values = []

    #stat_col_lbls = ['max', 'min', 'mean', 'std', 'nMin', 'nMax']
    stat_col_lbls = ['max', 'min', 'mean', 'std', 'med']
    #stat_row_lbls = ['# Annot per Name (multiton)']
    stat_row_lbls = []
    stat_row_values = []

    SINGLE_TABLE = False
    EXTRA = True

    for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
        row_ = ut.dict_take(dbinfo_locals, col_keys)
        dbname = ibs.get_dbname_alias()
        row_lbls.append(dbname)
        multiton_annot_stats = ut.get_stats(dbinfo_locals['multiton_nid2_nannots'], use_median=True)
        stat_rows = ut.dict_take(multiton_annot_stats, stat_col_lbls)
        if SINGLE_TABLE:
            row_.extend(stat_rows)
        else:
            stat_row_lbls.append(dbname)
            stat_row_values.append(stat_rows)

        row_values.append(row_)

    CENTERLINE = False
    AS_TABLE = True
    tablekw = dict(
        astable=AS_TABLE, centerline=CENTERLINE, FORCE_INT=False, precision=2,
        col_sep='', multicol_sep='|',
        **kwargs)

    if EXTRA:
        extra_keys = [
            #'species2_nAids',
            'qualtext2_nAnnots',
            'yawtext2_nAnnots',
        ]
        extra_titles = {
            'species2_nAids': 'Annotations per species.',
            'qualtext2_nAnnots': 'Annotations per quality.',
            'yawtext2_nAnnots': 'Annotations per viewpoint.',
        }
        extra_collbls = ut.ddict(list)
        extra_rowvalues = ut.ddict(list)
        extra_tables = ut.ddict(list)

        for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
            for key in extra_keys:
                extra_collbls[key] = ut.unique_ordered(extra_collbls[key] + list(dbinfo_locals[key].keys()))

        extra_collbls['qualtext2_nAnnots'] = ['excellent', 'good', 'ok', 'poor', 'junk', 'UNKNOWN']
        #extra_collbls['yawtext2_nAnnots'] = ['backleft', 'left', 'frontleft', 'front', 'frontright', 'right', 'backright', 'back', None]
        extra_collbls['yawtext2_nAnnots'] = ['BL', 'L', 'FL', 'F', 'FR', 'R', 'BR', 'B', None]

        for ibs, dbinfo_locals in zip(ibs_list, dbinfo_list):
            for key in extra_keys:
                extra_rowvalues[key].append(ut.dict_take(dbinfo_locals[key], extra_collbls[key], 0))

        qualalias = {'UNKNOWN': None}

        extra_collbls['yawtext2_nAnnots'] = [ibs.const.YAWALIAS.get(val, val) for val in extra_collbls['yawtext2_nAnnots']]
        extra_collbls['qualtext2_nAnnots'] = [qualalias.get(val, val) for val in extra_collbls['qualtext2_nAnnots']]

        for key in extra_keys:
            extra_tables[key] = ut.util_latex.make_score_tabular(
                row_lbls, extra_collbls[key], extra_rowvalues[key],
                title=extra_titles[key], col_align='r', table_position='[h!]', **tablekw)

    #tabular_str = util_latex.tabular_join(tabular_body_list)
    if SINGLE_TABLE:
        col_lbls += stat_col_lbls
        multicol_lbls += [(stat_title, len(stat_col_lbls))]

    count_tabular_str = ut.util_latex.make_score_tabular(
        row_lbls, col_lbls, row_values, title=title, multicol_lbls=multicol_lbls, table_position='[ht!]', **tablekw)

    #print(row_lbls)

    if SINGLE_TABLE:
        tabular_str = count_tabular_str
    else:
        stat_tabular_str = ut.util_latex.make_score_tabular(
            stat_row_lbls, stat_col_lbls, stat_row_values, title=stat_title,
            col_align='r', table_position='[h!]', **tablekw)

        # Make a table of statistics
        if tablekw['astable']:
            tablesep = '\n%--\n'
        else:
            tablesep = '\\\\\n%--\n'
        if EXTRA:
            tabular_str = tablesep.join([count_tabular_str, stat_tabular_str] + ut.dict_take(extra_tables, extra_keys))
        else:
            tabular_str = tablesep.join([count_tabular_str, stat_tabular_str])

    return tabular_str
def bigcache_vsone(qreq_, hyper_params):
    """
    Cached output of one-vs-one matches

        >>> from wbia.scripts.script_vsone import *  # NOQA
        >>> self = OneVsOneProblem()
        >>> qreq_ = self.qreq_
        >>> hyper_params = self.hyper_params
    """
    import vtool as vt
    import wbia

    # Get a set of training pairs
    ibs = qreq_.ibs
    cm_list = qreq_.execute()
    infr = wbia.AnnotInference.from_qreq_(qreq_, cm_list, autoinit=True)

    # Per query choose a set of correct, incorrect, and random training pairs
    aid_pairs_ = infr._cm_training_pairs(
        rng=np.random.RandomState(42), **hyper_params.pair_sample
    )

    aid_pairs_ = vt.unique_rows(np.array(aid_pairs_), directed=False).tolist()

    pb_aid_pairs_ = photobomb_samples(ibs)

    # TODO: try to add in more non-comparable samples
    aid_pairs_ = pb_aid_pairs_ + aid_pairs_
    aid_pairs_ = vt.unique_rows(np.array(aid_pairs_))

    # ======================================
    # Compute one-vs-one scores and local_measures
    # ======================================

    # Prepare lazy attributes for annotations
    qreq_ = infr.qreq_
    ibs = qreq_.ibs
    qconfig2_ = qreq_.extern_query_config2
    dconfig2_ = qreq_.extern_data_config2
    qannot_cfg = ibs.depc.stacked_config(None, 'featweight', qconfig2_)
    dannot_cfg = ibs.depc.stacked_config(None, 'featweight', dconfig2_)

    # Remove any pairs missing features
    if dannot_cfg == qannot_cfg:
        unique_annots = ibs.annots(np.unique(np.array(aid_pairs_)), config=dannot_cfg)
        bad_aids = unique_annots.compress(~np.array(unique_annots.num_feats) > 0).aids
        bad_aids = set(bad_aids)
    else:
        annots1_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 0)), config=qannot_cfg)
        annots2_ = ibs.annots(ut.unique(ut.take_column(aid_pairs_, 1)), config=dannot_cfg)
        bad_aids1 = annots1_.compress(~np.array(annots1_.num_feats) > 0).aids
        bad_aids2 = annots2_.compress(~np.array(annots2_.num_feats) > 0).aids
        bad_aids = set(bad_aids1 + bad_aids2)
    subset_idxs = np.where(
        [not (a1 in bad_aids or a2 in bad_aids) for a1, a2 in aid_pairs_]
    )[0]
    # Keep only a random subset
    if hyper_params.subsample:
        rng = np.random.RandomState(3104855634)
        num_max = hyper_params.subsample
        if num_max < len(subset_idxs):
            subset_idxs = rng.choice(subset_idxs, size=num_max, replace=False)
            subset_idxs = sorted(subset_idxs)

    # Take the current selection
    aid_pairs = ut.take(aid_pairs_, subset_idxs)

    if True:
        # NEW WAY
        config = hyper_params.vsone_assign
        # TODO: ensure annot probs like chips and features can be appropriately
        # set via qreq_ config or whatever
        matches = infr.exec_vsone_subset(aid_pairs, config=config)
    else:
        query_aids = ut.take_column(aid_pairs, 0)
        data_aids = ut.take_column(aid_pairs, 1)
        # OLD WAY
        # Determine a unique set of annots per config
        configured_aids = ut.ddict(set)
        configured_aids[qannot_cfg].update(query_aids)
        configured_aids[dannot_cfg].update(data_aids)

        # Make efficient annot-object representation
        configured_obj_annots = {}
        for config, aids in configured_aids.items():
            annots = ibs.annots(sorted(list(aids)), config=config)
            configured_obj_annots[config] = annots

        annots1 = configured_obj_annots[qannot_cfg].loc(query_aids)
        annots2 = configured_obj_annots[dannot_cfg].loc(data_aids)

        # Get hash based on visual annotation appearence of each pair
        # as well as algorithm configurations used to compute those properties
        qvuuids = annots1.visual_uuids
        dvuuids = annots2.visual_uuids
        qcfgstr = annots1._config.get_cfgstr()
        dcfgstr = annots2._config.get_cfgstr()
        annots_cfgstr = ut.hashstr27(qcfgstr) + ut.hashstr27(dcfgstr)
        vsone_uuids = [
            ut.combine_uuids(uuids, salt=annots_cfgstr)
            for uuids in ut.ProgIter(
                zip(qvuuids, dvuuids), length=len(qvuuids), label='hashing ids'
            )
        ]

        # Combine into a big cache for the entire 1-v-1 matching run
        big_uuid = ut.hashstr_arr27(vsone_uuids, '', pathsafe=True)
        cacher = ut.Cacher('vsone_v7', cfgstr=str(big_uuid), appname='vsone_rf_train')

        cached_data = cacher.tryload()
        if cached_data is not None:
            # Caching doesn't work 100% for PairwiseMatch object, so we need to do
            # some postprocessing
            configured_lazy_annots = ut.ddict(dict)
            for config, annots in configured_obj_annots.items():
                annot_dict = configured_lazy_annots[config]
                for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'):
                    annot_dict[_annot.aid] = _annot._make_lazy_dict()

            # Extract pairs of annot objects (with shared caches)
            lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids)
            lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids)

            # Create a set of PairwiseMatches with the correct annot properties
            matches = [
                vt.PairwiseMatch(annot1, annot2)
                for annot1, annot2 in zip(lazy_annots1, lazy_annots2)
            ]

            # Updating a new matches dictionary ensure the annot1/annot2 properties
            # are set correctly
            for key, cached_matches in list(cached_data.items()):
                fixed_matches = [match.copy() for match in matches]
                for fixed, internal in zip(fixed_matches, cached_matches):
                    dict_ = internal.__dict__
                    ut.delete_dict_keys(dict_, ['annot1', 'annot2'])
                    fixed.__dict__.update(dict_)
                cached_data[key] = fixed_matches
        else:
            cached_data = vsone_(
                qreq_,
                query_aids,
                data_aids,
                qannot_cfg,
                dannot_cfg,
                configured_obj_annots,
                hyper_params,
            )
            cacher.save(cached_data)
        # key_ = 'SV_LNBNN'
        key_ = 'RAT_SV'
        # for key in list(cached_data.keys()):
        #     if key != 'SV_LNBNN':
        #         del cached_data[key]
        matches = cached_data[key_]
    return matches, infr
Example #46
0
def get_dbinfo(ibs, verbose=True,
               with_imgsize=False,
               with_bytes=False,
               with_contrib=False,
               with_agesex=False,
               with_header=True,
               short=False,
               tag='dbinfo',
               aid_list=None):
    """

    Returns dictionary of digestable database information
    Infostr is a string summary of all the stats. Prints infostr in addition to
    returning locals

    Args:
        ibs (IBEISController):
        verbose (bool):
        with_imgsize (bool):
        with_bytes (bool):

    Returns:
        dict:

    CommandLine:
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0
        python -m ibeis.other.dbinfo --test-get_dbinfo:1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db NNP_Master3
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db PZ_Master1
        python -m ibeis.other.dbinfo --test-get_dbinfo:0 --db GZ_ALL
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db PZ_ViewPoints
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 --db GZ_Master1

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a ctrl
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default:minqual=ok,require_timestamp=True --dbdir ~/lev/media/danger/LEWA --loadbackup=0

        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA
        python -m ibeis.other.dbinfo --exec-get_dbinfo:0 -a default: --dbdir ~/lev/media/danger/LEWA --loadbackup=0

    Example1:
        >>> # SCRIPT
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> defaultdb = 'testdb1'
        >>> ibs, aid_list = ibeis.testdata_aids(defaultdb, a='default:minqual=ok,view=primary,view_ext1=1')
        >>> kwargs = ut.get_kwdefaults(get_dbinfo)
        >>> kwargs['verbose'] = False
        >>> kwargs['aid_list'] = aid_list
        >>> kwargs = ut.parse_dict_from_argv(kwargs)
        >>> output = get_dbinfo(ibs, **kwargs)
        >>> result = (output['info_str'])
        >>> print(result)
        >>> #ibs = ibeis.opendb(defaultdb='testdb1')
        >>> # <HACK FOR FILTERING>
        >>> #from ibeis.expt import cfghelpers
        >>> #from ibeis.expt import annotation_configs
        >>> #from ibeis.init import filter_annots
        >>> #named_defaults_dict = ut.dict_take(annotation_configs.__dict__,
        >>> #                                   annotation_configs.TEST_NAMES)
        >>> #named_qcfg_defaults = dict(zip(annotation_configs.TEST_NAMES,
        >>> #                               ut.get_list_column(named_defaults_dict, 'qcfg')))
        >>> #acfg = cfghelpers.parse_argv_cfg(('--annot-filter', '-a'), named_defaults_dict=named_qcfg_defaults, default=None)[0]
        >>> #aid_list = ibs.get_valid_aids()
        >>> # </HACK FOR FILTERING>

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> verbose = True
        >>> short = True
        >>> #ibs = ibeis.opendb(db='GZ_ALL')
        >>> #ibs = ibeis.opendb(db='PZ_Master0')
        >>> ibs = ibeis.opendb('testdb1')
        >>> assert ibs.get_dbname() == 'testdb1', 'DO NOT DELETE CONTRIBUTORS OF OTHER DBS'
        >>> ibs.delete_contributors(ibs.get_valid_contrib_rowids())
        >>> ibs.delete_empty_nids()
        >>> #ibs = ibeis.opendb(db='PZ_MTEST')
        >>> output = get_dbinfo(ibs, with_contrib=False, verbose=False, short=True)
        >>> result = (output['info_str'])
        >>> print(result)
        +============================
        DB Info:  testdb1
        DB Notes: None
        DB NumContrib: 0
        ----------
        # Names                      = 7
        # Names (unassociated)       = 0
        # Names (singleton)          = 5
        # Names (multiton)           = 2
        ----------
        # Annots                     = 13
        # Annots (unknown)           = 4
        # Annots (singleton)         = 5
        # Annots (multiton)          = 4
        ----------
        # Img                        = 13
        L============================
    """
    # TODO Database size in bytes
    # TODO: occurrence, contributors, etc...

    # Basic variables
    request_annot_subset = False
    _input_aid_list = aid_list  # NOQA
    if aid_list is None:
        valid_aids = ibs.get_valid_aids()
        valid_nids = ibs.get_valid_nids()
        valid_gids = ibs.get_valid_gids()
    else:
        if isinstance(aid_list, str):
            # Hack to get experiment stats on aids
            acfg_name_list = [aid_list]
            print('Specified custom aids via acfgname %s' % (acfg_name_list,))
            from ibeis.expt import experiment_helpers
            acfg_list, expanded_aids_list = experiment_helpers.get_annotcfg_list(
                ibs, acfg_name_list)
            aid_list = sorted(list(set(ut.flatten(ut.flatten(expanded_aids_list)))))
            #aid_list =
        if verbose:
            print('Specified %d custom aids' % (len(aid_list,)))
        request_annot_subset = True
        valid_aids = aid_list
        valid_nids = list(
            set(ibs.get_annot_nids(aid_list, distinguish_unknowns=False)) -
            {const.UNKNOWN_NAME_ROWID}
        )
        valid_gids = list(set(ibs.get_annot_gids(aid_list)))
    #associated_nids = ibs.get_valid_nids(filter_empty=True)  # nids with at least one annotation
    FILTER_HACK = True
    if FILTER_HACK:
        # HUGE HACK - get only images and names with filtered aids
        valid_aids_ = ibs.filter_aids_custom(valid_aids)
        valid_nids_ = ibs.filter_nids_custom(valid_nids)
        valid_gids_ = ibs.filter_gids_custom(valid_gids)
        if verbose:
            print('Filtered %d names' % (len(valid_nids) - len(valid_nids_)))
            print('Filtered %d images' % (len(valid_gids) - len(valid_gids_)))
            print('Filtered %d annots' % (len(valid_aids) - len(valid_aids_)))
        valid_gids = valid_gids_
        valid_nids = valid_nids_
        valid_aids = valid_aids_
        #associated_nids = ut.compress(associated_nids, map(any,
        #ibs.unflat_map(ibs.get_annot_custom_filterflags,
        #               ibs.get_name_aids(associated_nids))))

    # Image info
    if verbose:
        print('Checking Image Info')
    gx2_aids = ibs.get_image_aids(valid_gids)
    if FILTER_HACK:
        gx2_aids = [ibs.filter_aids_custom(aids) for aids in gx2_aids]  # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        gx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in gx2_aids]

    gx2_nAnnots = np.array(list(map(len, gx2_aids)))
    image_without_annots = len(np.where(gx2_nAnnots == 0)[0])
    gx2_nAnnots_stats  = ut.get_stats_str(gx2_nAnnots, newlines=True, use_median=True)
    image_reviewed_list = ibs.get_image_reviewed(valid_gids)

    # Name stats
    if verbose:
        print('Checking Name Info')
    nx2_aids = ibs.get_name_aids(valid_nids)
    if FILTER_HACK:
        nx2_aids =  [ibs.filter_aids_custom(aids) for aids in nx2_aids]    # HACK FOR FILTER
    if request_annot_subset:
        # remove annots not in this subset
        valid_aids_set = set(valid_aids)
        nx2_aids = [list(set(aids).intersection(valid_aids_set)) for aids in nx2_aids]
    associated_nids = ut.compress(valid_nids, list(map(len, nx2_aids)))

    ibs.check_name_mapping_consistency(nx2_aids)

    # Occurrence Info
    def compute_annot_occurrence_ids(ibs, aid_list):
        from ibeis.algo.preproc import preproc_occurrence
        gid_list = ibs.get_annot_gids(aid_list)
        gid2_aids = ut.group_items(aid_list, gid_list)
        flat_imgsetids, flat_gids = preproc_occurrence.ibeis_compute_occurrences(ibs, gid_list, seconds_thresh=4 * 60 * 60, verbose=False)
        occurid2_gids = ut.group_items(flat_gids, flat_imgsetids)
        occurid2_aids = {oid: ut.flatten(ut.take(gid2_aids, gids)) for oid, gids in occurid2_gids.items()}
        return occurid2_aids

    import utool
    with utool.embed_on_exception_context:
        occurid2_aids = compute_annot_occurrence_ids(ibs, valid_aids)
        occur_nids = ibs.unflat_map(ibs.get_annot_nids, occurid2_aids.values())
        occur_unique_nids = [ut.unique(nids) for nids in occur_nids]
        nid2_occurxs = ut.ddict(list)
        for occurx, nids in enumerate(occur_unique_nids):
            for nid in nids:
                nid2_occurxs[nid].append(occurx)

    nid2_occurx_single = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) <= 1}
    nid2_occurx_resight = {nid: occurxs for nid, occurxs in nid2_occurxs.items() if len(occurxs) > 1}
    singlesight_encounters = ibs.get_name_aids(nid2_occurx_single.keys())

    singlesight_annot_stats = ut.get_stats(list(map(len, singlesight_encounters)), use_median=True, use_sum=True)
    resight_name_stats = ut.get_stats(list(map(len, nid2_occurx_resight.values())), use_median=True, use_sum=True)

    try:
        aid_pairs = ibs.filter_aidpairs_by_tags(min_num=0)
        undirected_tags = ibs.get_aidpair_tags(aid_pairs.T[0], aid_pairs.T[1], directed=False)
        tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        pair_tag_info = ut.map_dict_vals(len, tag_dict)

        num_reviewed_pairs = sum(ibs.get_annot_pair_is_reviewed(aid_pairs.T[0], aid_pairs.T[1]))
        pair_tag_info['num_reviewed'] = num_reviewed_pairs
    except Exception:
        pair_tag_info = {}

    #print(ut.dict_str(pair_tag_info))

    # Annot Stats
    # TODO: number of images where chips cover entire image
    # TODO: total image coverage of annotation
    # TODO: total annotation overlap
    """
    ax2_unknown = ibs.is_aid_unknown(valid_aids)
    ax2_nid = ibs.get_annot_name_rowids(valid_aids)
    assert all([nid < 0 if unknown else nid > 0 for nid, unknown in
                zip(ax2_nid, ax2_unknown)]), 'bad annot nid'
    """
    #
    if verbose:
        print('Checking Annot Species')
    unknown_aids = ut.compress(valid_aids, ibs.is_aid_unknown(valid_aids))
    species_list = ibs.get_annot_species_texts(valid_aids)
    species2_aids = ut.group_items(valid_aids, species_list)
    species2_nAids = {key: len(val) for key, val in species2_aids.items()}

    if verbose:
        print('Checking Multiton/Singleton Species')
    nx2_nAnnots = np.array(list(map(len, nx2_aids)))
    # Seperate singleton / multitons
    multiton_nxs  = np.where(nx2_nAnnots > 1)[0]
    singleton_nxs = np.where(nx2_nAnnots == 1)[0]
    unassociated_nxs = np.where(nx2_nAnnots == 0)[0]
    assert len(np.intersect1d(singleton_nxs, multiton_nxs)) == 0, 'intersecting names'
    valid_nxs      = np.hstack([multiton_nxs, singleton_nxs])
    num_names_with_gt = len(multiton_nxs)

    # Annot Info
    if verbose:
        print('Checking Annot Info')
    multiton_aids_list = ut.take(nx2_aids, multiton_nxs)
    assert len(set(multiton_nxs)) == len(multiton_nxs)
    if len(multiton_aids_list) == 0:
        multiton_aids = np.array([], dtype=np.int)
    else:
        multiton_aids = np.hstack(multiton_aids_list)
        assert len(set(multiton_aids)) == len(multiton_aids), 'duplicate annot'
    singleton_aids = ut.take(nx2_aids, singleton_nxs)
    multiton_nid2_nannots = list(map(len, multiton_aids_list))

    # Image size stats
    if with_imgsize:
        if verbose:
            print('Checking ImageSize Info')
        gpath_list = ibs.get_image_paths(valid_gids)
        def wh_print_stats(wh_list):
            if len(wh_list) == 0:
                return '{empty}'
            wh_list = np.asarray(wh_list)
            stat_dict = OrderedDict(
                [( 'max', wh_list.max(0)),
                 ( 'min', wh_list.min(0)),
                 ('mean', wh_list.mean(0)),
                 ( 'std', wh_list.std(0))])
            def arr2str(var):
                return ('[' + (
                    ', '.join(list(map(lambda x: '%.1f' % x, var)))
                ) + ']')
            ret = (',\n    '.join([
                '%s:%s' % (key, arr2str(val))
                for key, val in stat_dict.items()
            ]))
            return '{\n    ' + ret + '\n}'

        print('reading image sizes')
        # Image size stats
        img_size_list  = ibs.get_image_sizes(valid_gids)
        img_size_stats  = wh_print_stats(img_size_list)

        # Chip size stats
        annotation_bbox_list = ibs.get_annot_bboxes(valid_aids)
        annotation_bbox_arr = np.array(annotation_bbox_list)
        if len(annotation_bbox_arr) == 0:
            annotation_size_list = []
        else:
            annotation_size_list = annotation_bbox_arr[:, 2:4]
        chip_size_stats = wh_print_stats(annotation_size_list)
        imgsize_stat_lines = [
            (' # Img in dir                 = %d' % len(gpath_list)),
            (' Image Size Stats  = %s' % (img_size_stats,)),
            (' * Chip Size Stats = %s' % (chip_size_stats,)),
        ]
    else:
        imgsize_stat_lines = []

    if verbose:
        print('Building Stats String')

    multiton_stats = ut.get_stats_str(multiton_nid2_nannots, newlines=True, use_median=True)

    # Time stats
    unixtime_list = ibs.get_image_unixtime(valid_gids)
    unixtime_list = ut.list_replace(unixtime_list, -1, float('nan'))
    #valid_unixtime_list = [time for time in unixtime_list if time != -1]
    #unixtime_statstr = ibs.get_image_time_statstr(valid_gids)
    if ut.get_argflag('--hackshow-unixtime'):
        show_time_distributions(ibs, unixtime_list)
        ut.show_if_requested()
    unixtime_statstr = ut.get_timestats_str(unixtime_list, newlines=True, full=True)

    # GPS stats
    gps_list_ = ibs.get_image_gps(valid_gids)
    gpsvalid_list = [gps != (-1, -1) for gps in gps_list_]
    gps_list  = ut.compress(gps_list_, gpsvalid_list)

    def get_annot_age_stats(aid_list):
        annot_age_months_est_min = ibs.get_annot_age_months_est_min(aid_list)
        annot_age_months_est_max = ibs.get_annot_age_months_est_max(aid_list)
        age_dict = ut.ddict((lambda : 0))
        for min_age, max_age in zip(annot_age_months_est_min, annot_age_months_est_max):
            if (min_age is None or min_age < 12) and max_age < 12:
                age_dict['Infant'] += 1
            elif 12 <= min_age and min_age < 36 and 12 <= max_age and max_age < 36:
                age_dict['Juvenile'] += 1
            elif 36 <= min_age and (36 <= max_age or max_age is None):
                age_dict['Adult'] += 1
            else:
                print('Found UNKNOWN Age: %r, %r' % (min_age, max_age, ))
                age_dict['UNKNOWN'] += 1
        return age_dict

    def get_annot_sex_stats(aid_list):
        annot_sextext_list = ibs.get_annot_sex_texts(aid_list)
        sextext2_aids = ut.group_items(aid_list, annot_sextext_list)
        sex_keys = list(ibs.const.SEX_TEXT_TO_INT.keys())
        assert set(sex_keys) >= set(annot_sextext_list), 'bad keys: ' + str(set(annot_sextext_list) - set(sex_keys))
        sextext2_nAnnots = ut.odict([(key, len(sextext2_aids.get(key, []))) for key in sex_keys])
        # Filter 0's
        sextext2_nAnnots = {key: val for key, val in six.iteritems(sextext2_nAnnots) if val != 0}
        return sextext2_nAnnots

    if verbose:
        print('Checking Other Annot Stats')

    qualtext2_nAnnots = ibs.get_annot_qual_stats(valid_aids)
    yawtext2_nAnnots = ibs.get_annot_yaw_stats(valid_aids)
    agetext2_nAnnots = get_annot_age_stats(valid_aids)
    sextext2_nAnnots = get_annot_sex_stats(valid_aids)

    if verbose:
        print('Checking Contrib Stats')

    # Contributor Statistics
    # hack remove colon for image alignment
    def fix_tag_list(tag_list):
        return [None if tag is None else tag.replace(':', ';') for tag in tag_list]
    image_contrib_tags = fix_tag_list(ibs.get_image_contributor_tag(valid_gids))
    annot_contrib_tags = fix_tag_list(ibs.get_annot_image_contributor_tag(valid_aids))
    contrib_tag_to_gids = ut.group_items(valid_gids, image_contrib_tags)
    contrib_tag_to_aids = ut.group_items(valid_aids, annot_contrib_tags)

    contrib_tag_to_qualstats = {key: ibs.get_annot_qual_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}
    contrib_tag_to_viewstats = {key: ibs.get_annot_yaw_stats(aids) for key, aids in six.iteritems(contrib_tag_to_aids)}

    contrib_tag_to_nImages = {key: len(val) for key, val in six.iteritems(contrib_tag_to_gids)}
    contrib_tag_to_nAnnots = {key: len(val) for key, val in six.iteritems(contrib_tag_to_aids)}

    if verbose:
        print('Summarizing')

    # Summarize stats
    num_names = len(valid_nids)
    num_names_unassociated = len(valid_nids) - len(associated_nids)
    num_names_singleton = len(singleton_nxs)
    num_names_multiton =  len(multiton_nxs)

    num_singleton_annots = len(singleton_aids)
    num_multiton_annots = len(multiton_aids)
    num_unknown_annots = len(unknown_aids)
    num_annots = len(valid_aids)

    if with_bytes:
        if verbose:
            print('Checking Disk Space')
        ibsdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_ibsdir()))
        dbdir_space    = ut.byte_str2(ut.get_disk_space(ibs.get_dbdir()))
        imgdir_space   = ut.byte_str2(ut.get_disk_space(ibs.get_imgdir()))
        cachedir_space = ut.byte_str2(ut.get_disk_space(ibs.get_cachedir()))

    if True:
        if verbose:
            print('Check asserts')
        try:
            bad_aids = np.intersect1d(multiton_aids, unknown_aids)
            _num_names_total_check = num_names_singleton + num_names_unassociated + num_names_multiton
            _num_annots_total_check = num_unknown_annots + num_singleton_annots + num_multiton_annots
            assert len(bad_aids) == 0, 'intersecting multiton aids and unknown aids'
            assert _num_names_total_check == num_names, 'inconsistent num names'
            #if not request_annot_subset:
            # dont check this if you have an annot subset
            assert _num_annots_total_check == num_annots, 'inconsistent num annots'
        except Exception as ex:
            ut.printex(ex, keys=[
                '_num_names_total_check',
                'num_names',
                '_num_annots_total_check',
                'num_annots',
                'num_names_singleton',
                'num_names_multiton',
                'num_unknown_annots',
                'num_multiton_annots',
                'num_singleton_annots',
            ])
            raise

    # Get contributor statistics
    contrib_rowids = ibs.get_valid_contrib_rowids()
    num_contributors = len(contrib_rowids)

    # print
    num_tabs = 5

    def align2(str_):
        return ut.align(str_, ':', ' :')

    def align_dict2(dict_):
        str_ = ut.dict_str(dict_)
        return align2(str_)

    header_block_lines = (
        [('+============================'), ] + (
            [
                ('+ singleton := single sighting'),
                ('+ multiton  := multiple sightings'),
                ('--' * num_tabs),
            ] if not short and with_header else []
        )
    )

    source_block_lines = [
        ('DB Info:  ' + ibs.get_dbname()),
        ('DB Notes: ' + ibs.get_dbnotes()),
        ('DB NumContrib: %d' % num_contributors),
    ]

    bytes_block_lines = [
        ('--' * num_tabs),
        ('DB Bytes: '),
        ('     +- dbdir nBytes:         ' + dbdir_space),
        ('     |  +- _ibsdb nBytes:     ' + ibsdir_space),
        ('     |  |  +-imgdir nBytes:   ' + imgdir_space),
        ('     |  |  +-cachedir nBytes: ' + cachedir_space),
    ] if with_bytes else []

    name_block_lines = [
        ('--' * num_tabs),
        ('# Names                      = %d' % num_names),
        ('# Names (unassociated)       = %d' % num_names_unassociated),
        ('# Names (singleton)          = %d' % num_names_singleton),
        ('# Names (multiton)           = %d' % num_names_multiton),
    ]

    subset_str = '        ' if not request_annot_subset else '(SUBSET)'

    annot_block_lines = [
        ('--' * num_tabs),
        ('# Annots %s            = %d' % (subset_str, num_annots,)),
        ('# Annots (unknown)           = %d' % num_unknown_annots),
        ('# Annots (singleton)         = %d' % num_singleton_annots),
        ('# Annots (multiton)          = %d' % num_multiton_annots),
    ]

    annot_per_basic_block_lines = [
        ('--' * num_tabs),
        ('# Annots per Name (multiton) = %s' % (align2(multiton_stats),)),
        ('# Annots per Image           = %s' % (align2(gx2_nAnnots_stats),)),
        ('# Annots per Species         = %s' % (align_dict2(species2_nAids),)),
    ] if not short else []

    occurrence_block_lines = [
        ('--' * num_tabs),
        ('# Occurrence Per Name (Resights) = %s' % (align_dict2(resight_name_stats),)),
        ('# Annots per Encounter (Singlesights) = %s' % (align_dict2(singlesight_annot_stats),)),
        ('# Pair Tag Info (annots) = %s' % (align_dict2(pair_tag_info),)),
    ] if not short else []

    annot_per_qualview_block_lines = [
        None if short else '# Annots per Viewpoint = %s' % align_dict2(yawtext2_nAnnots),
        None if short else '# Annots per Quality = %s' % align_dict2(qualtext2_nAnnots),
    ]

    annot_per_agesex_block_lines = [
        '# Annots per Age = %s' % align_dict2(agetext2_nAnnots),
        '# Annots per Sex = %s' % align_dict2(sextext2_nAnnots),
    ] if not short  and with_agesex else []

    contrib_block_lines = [
        '# Images per contributor       = ' + align_dict2(contrib_tag_to_nImages),
        '# Annots per contributor       = ' + align_dict2(contrib_tag_to_nAnnots),
        '# Quality per contributor      = ' + ut.dict_str(contrib_tag_to_qualstats, sorted_=True),
        '# Viewpoint per contributor    = ' + ut.dict_str(contrib_tag_to_viewstats, sorted_=True),
    ] if with_contrib else []

    img_block_lines = [
        ('--' * num_tabs),
        ('# Img                        = %d' % len(valid_gids)),
        None if short else ('# Img reviewed               = %d' % sum(image_reviewed_list)),
        None if short else ('# Img with gps               = %d' % len(gps_list)),
        #('# Img with timestamp         = %d' % len(valid_unixtime_list)),
        None if short else ('Img Time Stats               = %s' % (align2(unixtime_statstr),)),
    ]

    info_str_lines = (
        header_block_lines +
        bytes_block_lines +
        source_block_lines +
        name_block_lines +
        annot_block_lines +
        annot_per_basic_block_lines +
        occurrence_block_lines +
        annot_per_qualview_block_lines +
        annot_per_agesex_block_lines +
        img_block_lines +
        contrib_block_lines +
        imgsize_stat_lines +
        [('L============================'), ]
    )
    info_str = '\n'.join(ut.filter_Nones(info_str_lines))
    info_str2 = ut.indent(info_str, '[{tag}]'.format(tag=tag))
    if verbose:
        print(info_str2)
    locals_ = locals()
    return locals_
Example #47
0
def draw_bayesian_model(model, evidence={}, soft_evidence={}, fnum=None,
                        pnum=None, **kwargs):

    from pgmpy.models import BayesianModel
    if not isinstance(model, BayesianModel):
        model = model.to_bayesian_model()

    import plottool as pt
    import networkx as nx
    kwargs = kwargs.copy()
    factor_list = kwargs.pop('factor_list', [])

    ttype_colors, ttype_scalars = make_colorcodes(model)

    textprops = {
        'horizontalalignment': 'left', 'family': 'monospace', 'size': 8, }

    # build graph attrs
    tup = get_node_viz_attrs(
        model, evidence, soft_evidence, factor_list, ttype_colors, **kwargs)
    node_color, pos_list, pos_dict, takws = tup

    # draw graph
    has_infered = evidence or 'factor_list' in kwargs

    if False:
        fig = pt.figure(fnum=fnum, pnum=pnum, doclf=True)  # NOQA
        ax = pt.gca()
        drawkw = dict(pos=pos_dict, ax=ax, with_labels=True, node_size=1100,
                      node_color=node_color)
        nx.draw(model, **drawkw)
    else:
        # BE VERY CAREFUL
        if 1:
            graph = model.copy()
            graph.__class__ = nx.DiGraph
            graph.graph['groupattrs'] = ut.ddict(dict)
            #graph = model.
            if getattr(graph, 'ttype2_cpds', None) is not None:
                # Add invis edges and ttype groups
                for ttype in model.ttype2_cpds.keys():
                    ttype_cpds = model.ttype2_cpds[ttype]
                    # use defined ordering
                    ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
                    # ttype_nodes = sorted(ttype_nodes)
                    invis_edges = list(ut.itertwo(ttype_nodes))
                    graph.add_edges_from(invis_edges)
                    nx.set_edge_attributes(graph, 'style', {edge: 'invis' for edge in invis_edges})
                    nx.set_node_attributes(graph, 'groupid', {node: ttype for node in ttype_nodes})
                    graph.graph['groupattrs'][ttype]['rank'] = 'same'
                    graph.graph['groupattrs'][ttype]['cluster'] = False
        else:
            graph = model
        pt.show_nx(graph, layout_kw={'prog': 'dot'}, fnum=fnum, pnum=pnum, verbose=0)
        pt.zoom_factory()
        fig = pt.gcf()
        ax = pt.gca()
        pass
    hacks = [pt.draw_text_annotations(textprops=textprops, **takw)
             for takw in takws if takw]

    xmin, ymin = np.array(pos_list).min(axis=0)
    xmax, ymax = np.array(pos_list).max(axis=0)
    if 'name' in model.ttype2_template:
        num_names = len(model.ttype2_template['name'].basis)
        num_annots = len(model.ttype2_cpds['name'])
        if num_annots > 4:
            ax.set_xlim((xmin - 40, xmax + 40))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(30, 7)
        else:
            ax.set_xlim((xmin - 42, xmax + 42))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(23, 7)
        title = 'num_names=%r, num_annots=%r' % (num_names, num_annots,)
    else:
        title = ''
    map_assign = kwargs.get('map_assign', None)

    def word_insert(text):
        return '' if len(text) == 0 else text + ' '

    top_assignments = kwargs.get('top_assignments', None)
    if top_assignments is not None:
        map_assign, map_prob = top_assignments[0]
        if map_assign is not None:
            title += '\n%sMAP: ' % (word_insert(kwargs.get('method', '')))
            title += map_assign + ' @' + '%.2f%%' % (100 * map_prob,)
    if kwargs.get('show_title', True):
        pt.set_figtitle(title, size=14)

    for hack in hacks:
        hack()

    if has_infered:
        # Hack in colorbars
        # if ut.list_type(basis) is int:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=np.array(basis) + 1)
        # else:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=basis)
        keys = ['name', 'score']
        locs = ['left', 'right']
        for key, loc in zip(keys, locs):
            if key in ttype_colors:
                basis = model.ttype2_template[key].basis
                # scalars =
                colors = ttype_colors[key]
                scalars = ttype_scalars[key]
                pt.colorbar(scalars, colors, lbl=key, ticklabels=basis,
                            ticklocation=loc)
Example #48
0
def compute_negentropy_names(aids_list, daid2_label):
    r"""
    One of our idf extensions
    Word weighting based on the negative entropy over all names of p(n_i | word)

    Args:
        aids_list (list of aids):
        daid2_label (dict from daid to label):

    Returns:
        negentropy_list (ndarray[float32]): idf-like weighting for each word based on the negative entropy

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.smk.smk_index import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1()
        >>> wx_series = np.arange(len(invindex.words))
        >>> idx2_aid = invindex.idx2_daid
        >>> daid2_label = invindex.daid2_label
        >>> _ = helper_idf_wordgroup(wx2_idxs, idx2_aid, wx_series)
        >>> idxs_list, aids_list = _

    Math::
        p(n_i | \word) = \sum_{\lbl \in L_i} p(\lbl | \word)

        p(\lbl | \word) = \frac{p(\word | \lbl) p(\lbl)}{p(\word)}

        p(\word) = \sum_{\lbl' \in L} p(\word | \lbl') p(\lbl')

        p(\word | \lbl) = NumAnnotOfLabelWithWord / NumAnnotWithLabel =
        \frac{\sum_{\X \in \DB_\lbl} b(\word, \X)}{\card{\DB_\lbl}}

        h(n_i | word) = -\sum_{i=1}^N p(n_i | \word) \log p(n_i | \word)

        word_weight = log(N) - h(n | word)

    CommandLine:
        python dev.py -t smk2 --allgt --db GZ_ALL
        python dev.py -t smk5 --allgt --db GZ_ALL

    Auto:
        python -c "import utool as ut; ut.print_auto_docstr('ibeis.algo.hots.smk.smk_index', 'compute_negentropy_names')"
    """
    nWords = len(aids_list)
    # --- LABEL MEMBERS w.r.t daids ---
    # compute mapping from label to daids
    # Translate tuples into scalars for efficiency
    label_list = list(daid2_label.values())
    lblindex_list = np.array(ut.tuples_to_unique_scalars(label_list))
    #daid2_lblindex = dict(zip(daid_list, lblindex_list))
    unique_lblindexes, groupxs = clustertool.group_indices(lblindex_list)
    daid_list = np.array(daid2_label.keys())
    daids_list = [daid_list.take(xs) for xs in groupxs]

    # --- DAID MEMBERS w.r.t. words ---
    # compute mapping from daid to word indexes
    # finds all the words that belong to an annotation
    daid2_wxs = ut.ddict(list)
    for wx, _daids in enumerate(aids_list):
        for daid in _daids:
            daid2_wxs[daid].append(wx)

    # --- \Pr(\word \given \lbl) for each label ---
    # Compute the number of annotations in a label with the word vs
    # the number of annotations in the label
    lblindex2_daids = list(zip(unique_lblindexes, daids_list))
    # Get num times word appears for each label
    probWordGivenLabel_list = []
    for lblindex, _daids in lblindex2_daids:
        nAnnotOfLabelWithWord = np.zeros(nWords, dtype=np.int32)
        for daid in _daids:
            wxs = np.unique(daid2_wxs[daid])
            nAnnotOfLabelWithWord[wxs] += 1
        probWordGivenLabel = nAnnotOfLabelWithWord.astype(np.float64) / len(_daids)
        probWordGivenLabel_list.append(probWordGivenLabel)
    # (nLabels, nWords)
    probWordGivenLabel_arr = np.array(probWordGivenLabel_list)
    # --- \Pr(\lbl \given \word) ---
    # compute partition function that approximates probability of a word
    # (1, nWords)
    probWord = probWordGivenLabel_arr.sum(axis=0)
    probWord.shape = (1, probWord.size)
    # (nLabels, nWords)
    probLabelGivenWord_arr = (probWordGivenLabel_arr / probWord)
    # --- \Pr(\name \given \lbl) ---
    # get names for each unique label
    nid_list = np.array([label_list[xs[0]][0] for xs in groupxs])
    unique_nids, groupxs_ = clustertool.group_indices(nid_list)
    # (nNames, nWords)
    # add a little wiggle room
    eps = 1E-9
    # http://stackoverflow.com/questions/872544/precision-of-floating-point
    #epsilon = 2^(E-52)    % For a 64-bit float (double precision)
    #epsilon = 2^(E-23)    % For a 32-bit float (single precision)
    #epsilon = 2^(E-10)    % For a 16-bit float (half precision)
    probNameGivenWord = eps + (1.0 - eps) * np.array([probLabelGivenWord_arr.take(xs, axis=0).sum(axis=0) for xs in groupxs_])
    logProbNameGivenWord = np.log(probNameGivenWord)
    wordNameEntropy = -(probNameGivenWord * logProbNameGivenWord).sum(0)
    # Compute negative entropy for weights
    nNames = len(nid_list)
    negentropy_list = np.log(nNames) - wordNameEntropy
    return negentropy_list
Example #49
0
def make_run_tests_script_text(test_headers, test_argvs, quick_tests=None,
                               repodir=None, exclude_list=[]):
    """
    Autogeneration function

    TODO move to util_autogen or just depricate

    Examples:
        >>> from utool.util_tests import *  # NOQA
        >>> import utool  # NOQA
        >>> testdirs = ['~/code/ibeis/test_ibs*.py']
    """
    import utool as ut
    from os.path import relpath, join, dirname  # NOQA

    exclude_list += ['__init__.py']

    # General format of the testing script

    script_fmtstr = ut.codeblock(
        r'''
        #!/bin/bash
        # Runs all tests
        # Win32 path hacks
        export CWD=$(pwd)
        export PYMAJOR="$(python -c "import sys; print(sys.version_info[0])")"

        # <CORRECT_PYTHON>
        # GET CORRECT PYTHON ON ALL PLATFORMS
        export SYSNAME="$(expr substr $(uname -s) 1 10)"
        if [ "$SYSNAME" = "MINGW32_NT" ]; then
            export PYEXE=python
        else
            if [ "$PYMAJOR" = "3" ]; then
                # virtual env?
                export PYEXE=python
            else
                export PYEXE=python2.7
            fi
        fi
        # </CORRECT_PYTHON>

        PRINT_DELIMETER()
        {{
            printf "\n#\n#\n#>>>>>>>>>>> next_test\n\n"
        }}

        export TEST_ARGV="{test_argvs} $@"

        {dirdef_block}

        # Default tests to run
        set_test_flags()
        {{
            export DEFAULT=$1
        {testdefault_block}
        }}
        set_test_flags OFF
        {testdefaulton_block}

        # Parse for bash commandline args
        for i in "$@"
        do
        case $i in --testall)
            set_test_flags ON
            ;;
        esac
        {testcmdline_block}
        done

        BEGIN_TESTS()
        {{
        cat <<EOF
        {runtests_bubbletext}
        EOF
            echo "BEGIN: TEST_ARGV=$TEST_ARGV"
            PRINT_DELIMETER
            num_passed=0
            num_ran=0
            export FAILED_TESTS=''
        }}

        RUN_TEST()
        {{
            echo "RUN_TEST: $@"
            export TEST="$PYEXE $@ $TEST_ARGV"
            $TEST
            export RETURN_CODE=$?
            echo "RETURN_CODE=$RETURN_CODE"
            PRINT_DELIMETER
            num_ran=$(($num_ran + 1))
            if [ "$RETURN_CODE" == "0" ] ; then
                num_passed=$(($num_passed + 1))
            fi
            if [ "$RETURN_CODE" != "0" ] ; then
                export FAILED_TESTS="$FAILED_TESTS\n$TEST"
            fi
        }}

        END_TESTS()
        {{
            echo "RUN_TESTS: DONE"
            if [ "$FAILED_TESTS" != "" ] ; then
                echo "-----"
                printf "Failed Tests:"
                printf "$FAILED_TESTS\n"
                printf "$FAILED_TESTS\n" >> failed_shelltests.txt
                echo "-----"
            fi
            echo "$num_passed / $num_ran tests passed"
        }}

        #---------------------------------------------
        # START TESTS
        BEGIN_TESTS

        {quicktest_block}

        {test_block}

        #---------------------------------------------
        # END TESTING
        END_TESTS
        ''')

    testcmdline_fmtstr = ut.codeblock(
        r'''
        case $i in --notest{header_lower})
            export {testflag}=OFF
            ;;
        esac
        case $i in --test{header_lower})
            export {testflag}=ON
            ;;
        esac
        ''')

    header_test_block_fmstr = ut.codeblock(
        r'''

        #---------------------------------------------
        #{header_text}
        if [ "${testflag}" = "ON" ] ; then
        cat <<EOF
        {header_bubble_text}
        EOF
        {testlines_block}
        fi
        ''')

    #specialargv = '--noshow'
    specialargv = ''
    testline_fmtstr = 'RUN_TEST ${dirvar}/{fpath} {specialargv}'
    testline_fmtstr2 = 'RUN_TEST {fpath} {specialargv}'

    def format_testline(fpath, dirvar):
        if dirvar is None:
            return testline_fmtstr2.format(fpath=fpath, specialargv=specialargv)
        else:
            return testline_fmtstr.format(dirvar=dirvar, fpath=fpath, specialargv=specialargv)

    default_flag_line_list = []
    defaulton_flag_line_list = []
    testcmdline_list = []
    dirdef_list = []
    header_test_block_list = []

    known_tests = ut.ddict(list)

    # Tests to always run
    if quick_tests is not None:
        quicktest_block = '\n'.join(
            ['# Quick Tests (always run)'] +
            ['RUN_TEST ' + testline for testline in quick_tests])
    else:
        quicktest_block = '# No quick tests'

    # Loop over different test types
    for testdef_tup in test_headers:
        header, default, modname, dpath, pats, testcmds = testdef_tup
        # Build individual test type information
        header_upper =  header.upper()
        header_lower = header.lower()
        testflag = header_upper + '_TEST'

        if modname is not None:
            dirvar = header_upper + '_DIR'
            dirdef = ''.join([
                'export {dirvar}=$($PYEXE -c "',
                'import os, {modname};',
                'print(str(os.path.dirname(os.path.dirname({modname}.__file__))))',
                '")']).format(dirvar=dirvar, modname=modname)
            dirdef_list.append(dirdef)
        else:
            dirvar = None

        # Build test dir
        #dirvar = header_upper + '_DIR'
        #dirdef = 'export {dirvar}={dirname}'.format(dirvar=dirvar, dirname=dirname)
        #dirdef_list.append(dirdef)

        # Build command line flags
        default_flag_line = 'export {testflag}=$DEFAULT'.format(testflag=testflag)

        if default:
            defaulton_flag_line = 'export {testflag}=ON'.format(testflag=testflag)
            defaulton_flag_line_list.append(defaulton_flag_line)

        testcmdline_fmtdict = dict(header_lower=header_lower,
                                        testflag=testflag,)
        testcmdline = testcmdline_fmtstr.format(**testcmdline_fmtdict)

        #ut.ls(dpath)

        # VERY HACK BIT OF CODE

        # Get list of tests from patterns
        if testcmds is None:
            if modname is not None:
                module = __import__(modname)
                repo_path = dirname(dirname(module.__file__))
            else:
                repo_path = repodir
            dpath_ = ut.unixpath(util_path.unixjoin(repo_path, dpath))

            if header_upper == 'OTHER':
                # Hacky way to grab any other tests not explicitly seen in this directory
                _testfpath_list = list(set(ut.glob(dpath_, '*.py')) - set(known_tests[dpath_]))
                #_testfpath_list = ut.glob(dpath_, '*.py')
                #set(known_tests[dpath_])
            else:
                _testfpath_list = ut.flatten([ut.glob(dpath_, pat) for pat in pats])

            def not_excluded(x):
                return not any([x.find(exclude) > -1 for exclude in exclude_list])

            _testfpath_list = list(filter(not_excluded, _testfpath_list))

            known_tests[dpath_].extend(_testfpath_list)
            #print(_testfpath_list)
            testfpath_list = [util_path.unixjoin(dpath, relpath(fpath, dpath_))
                              for fpath in _testfpath_list]

            testline_list = [format_testline(fpath, dirvar) for fpath in testfpath_list]
        else:
            testline_list = testcmds

        testlines_block = ut.indentjoin(testline_list).strip('\n')

        # Construct test block for this type
        header_text = header_upper + ' TESTS'
        headerfont = 'cybermedium'
        header_bubble_text =  ut.indent(ut.bubbletext(header_text, headerfont).strip())
        header_test_block_dict = dict(
            testflag=testflag,
            header_text=header_text,
            testlines_block=testlines_block,
            header_bubble_text=header_bubble_text,)
        header_test_block = header_test_block_fmstr.format(**header_test_block_dict)

        # Append to script lists
        header_test_block_list.append(header_test_block)
        default_flag_line_list.append(default_flag_line)
        testcmdline_list.append(testcmdline)

    runtests_bubbletext = ut.bubbletext('RUN TESTS', 'cyberlarge')

    test_block = '\n'.join(header_test_block_list)
    dirdef_block = '\n'.join(dirdef_list)
    testdefault_block = ut.indent('\n'.join(default_flag_line_list))
    testdefaulton_block = '\n'.join(defaulton_flag_line_list)
    testcmdline_block = '\n'.join(testcmdline_list)

    script_fmtdict = dict(
        quicktest_block=quicktest_block,
        runtests_bubbletext=runtests_bubbletext,
        test_argvs=test_argvs, dirdef_block=dirdef_block,
        testdefault_block=testdefault_block,
        testdefaulton_block=testdefaulton_block,
        testcmdline_block=testcmdline_block,
        test_block=test_block,)
    script_text = script_fmtstr.format(**script_fmtdict)

    return script_text
Example #50
0
def draw_bayesian_model(model,
                        evidence={},
                        soft_evidence={},
                        fnum=None,
                        pnum=None,
                        **kwargs):

    from pgmpy.models import BayesianModel
    if not isinstance(model, BayesianModel):
        model = model.to_bayesian_model()

    import plottool as pt
    import networkx as nx
    kwargs = kwargs.copy()
    factor_list = kwargs.pop('factor_list', [])

    ttype_colors, ttype_scalars = make_colorcodes(model)

    textprops = {
        'horizontalalignment': 'left',
        'family': 'monospace',
        'size': 8,
    }

    # build graph attrs
    tup = get_node_viz_attrs(model, evidence, soft_evidence, factor_list,
                             ttype_colors, **kwargs)
    node_color, pos_list, pos_dict, takws = tup

    # draw graph
    has_infered = evidence or 'factor_list' in kwargs

    if False:
        fig = pt.figure(fnum=fnum, pnum=pnum, doclf=True)  # NOQA
        ax = pt.gca()
        drawkw = dict(pos=pos_dict,
                      ax=ax,
                      with_labels=True,
                      node_size=1100,
                      node_color=node_color)
        nx.draw(model, **drawkw)
    else:
        # BE VERY CAREFUL
        if 1:
            graph = model.copy()
            graph.__class__ = nx.DiGraph
            graph.graph['groupattrs'] = ut.ddict(dict)
            #graph = model.
            if getattr(graph, 'ttype2_cpds', None) is not None:
                # Add invis edges and ttype groups
                for ttype in model.ttype2_cpds.keys():
                    ttype_cpds = model.ttype2_cpds[ttype]
                    # use defined ordering
                    ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
                    # ttype_nodes = sorted(ttype_nodes)
                    invis_edges = list(ut.itertwo(ttype_nodes))
                    graph.add_edges_from(invis_edges)
                    nx.set_edge_attributes(
                        graph, 'style',
                        {edge: 'invis'
                         for edge in invis_edges})
                    nx.set_node_attributes(
                        graph, 'groupid',
                        {node: ttype
                         for node in ttype_nodes})
                    graph.graph['groupattrs'][ttype]['rank'] = 'same'
                    graph.graph['groupattrs'][ttype]['cluster'] = False
        else:
            graph = model
        pt.show_nx(graph,
                   layout_kw={'prog': 'dot'},
                   fnum=fnum,
                   pnum=pnum,
                   verbose=0)
        pt.zoom_factory()
        fig = pt.gcf()
        ax = pt.gca()
        pass
    hacks = [
        pt.draw_text_annotations(textprops=textprops, **takw) for takw in takws
        if takw
    ]

    xmin, ymin = np.array(pos_list).min(axis=0)
    xmax, ymax = np.array(pos_list).max(axis=0)
    if 'name' in model.ttype2_template:
        num_names = len(model.ttype2_template['name'].basis)
        num_annots = len(model.ttype2_cpds['name'])
        if num_annots > 4:
            ax.set_xlim((xmin - 40, xmax + 40))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(30, 7)
        else:
            ax.set_xlim((xmin - 42, xmax + 42))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(23, 7)
        title = 'num_names=%r, num_annots=%r' % (
            num_names,
            num_annots,
        )
    else:
        title = ''
    map_assign = kwargs.get('map_assign', None)

    def word_insert(text):
        return '' if len(text) == 0 else text + ' '

    top_assignments = kwargs.get('top_assignments', None)
    if top_assignments is not None:
        map_assign, map_prob = top_assignments[0]
        if map_assign is not None:
            title += '\n%sMAP: ' % (word_insert(kwargs.get('method', '')))
            title += map_assign + ' @' + '%.2f%%' % (100 * map_prob, )
    if kwargs.get('show_title', True):
        pt.set_figtitle(title, size=14)

    for hack in hacks:
        hack()

    if has_infered:
        # Hack in colorbars
        # if ut.list_type(basis) is int:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=np.array(basis) + 1)
        # else:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=basis)
        keys = ['name', 'score']
        locs = ['left', 'right']
        for key, loc in zip(keys, locs):
            if key in ttype_colors:
                basis = model.ttype2_template[key].basis
                # scalars =
                colors = ttype_colors[key]
                scalars = ttype_scalars[key]
                pt.colorbar(scalars,
                            colors,
                            lbl=key,
                            ticklabels=basis,
                            ticklocation=loc)
Example #51
0
def print_acfg_list(acfg_list, expanded_aids_list=None, ibs=None,
                    combined=False, **kwargs):
    r"""
    Args:
        acfg_list (list):
        expanded_aids_list (list): (default = None)
        ibs (IBEISController):  ibeis controller object(default = None)
        combined (bool): (default = False)

    CommandLine:
        python -m ibeis.expt.annotation_configs --exec-print_acfg_list --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.expt.annotation_configs import *  # NOQA
        >>> import ibeis
        >>> acfg_list = '?'
        >>> expanded_aids_list = None
        >>> ibs = None
        >>> combined = False
        >>> result = print_acfg_list(acfg_list, expanded_aids_list, ibs, combined)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    _tup = compress_acfg_list_for_printing(acfg_list)
    nonvaried_compressed_dict, varied_compressed_dict_list = _tup

    ut.colorprint('+=== <Info acfg_list> ===', 'white')
    #print('Printing acfg_list info. len(acfg_list) = %r' % (len(acfg_list),))
    print('non-varied aidcfg = ' + ut.dict_str(nonvaried_compressed_dict))
    seen_ = ut.ddict(list)

    # get default kwkeys for annot info
    if ibs is not None:
        annotstats_kw = kwargs.copy()
        kwkeys = ut.parse_func_kwarg_keys(ibs.get_annot_stats_dict)
        annotstats_kw.update(ut.argparse_dict(
            dict(zip(kwkeys, [None] * len(kwkeys))), only_specified=True))

    for acfgx in range(len(acfg_list)):
        acfg = acfg_list[acfgx]
        title = ('q_cfgname=' + acfg['qcfg']['_cfgname'] +
                 ' d_cfgname=' + acfg['dcfg']['_cfgname'])

        ut.colorprint('+--- acfg %d / %d -- %s ---- ' %
                      (acfgx + 1, len(acfg_list), title), 'lightgray')
        print('acfg = ' + ut.dict_str(varied_compressed_dict_list[acfgx],
                                      strvals=True))

        if expanded_aids_list is not None:
            qaids, daids = expanded_aids_list[acfgx]
            key = (ut.hashstr_arr27(qaids, 'qaids'),
                   ut.hashstr_arr27(daids, 'daids'))
            if key not in seen_:
                if ibs is not None:
                    seen_[key].append(acfgx)
                    annotconfig_stats_strs, _ = ibs.get_annotconfig_stats(
                        qaids, daids, verbose=True, combined=combined,
                        **annotstats_kw)
            else:
                dupindex = seen_[key]
                print('DUPLICATE of index %r' % (dupindex,))
                dupdict = varied_compressed_dict_list[dupindex[0]]
                print('DUP OF acfg = ' + ut.dict_str(dupdict, strvals=True))
    ut.colorprint('L___ </Info acfg_list> ___', 'white')
Example #52
0
from six.moves import range, zip, map  # NOQA
from ibeis.algo.hots import _pipeline_helpers as plh  # NOQA
from ibeis.algo.hots.neighbor_index import NeighborIndex, get_support_data
(print, rrr, profile) = ut.inject2(__name__, '[neighbor_index]', DEBUG=False)


USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs')
NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE

# LRU cache for nn_indexers. Ensures that only a few are ever in memory
#MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2)
MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=1)
# Background process for building indexes
CURRENT_THREAD = None
# Global map to keep track of UUID lists with prebuild indexers.
UUID_MAP = ut.ddict(dict)
NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE)


class UUIDMapHyrbridCache(object):
    """
    Class that lets multiple ways of writing to the uuid_map
    be swapped in and out interchangably

    TODO: the global read / write should periodically sync itself to disk and it
    should be loaded from disk initially
    """
    def __init__(self):
        self.uuid_maps = ut.ddict(dict)
        #self.uuid_map_fpath = uuid_map_fpath
        #self.init(uuid_map_fpath, min_reindex_thresh)
Example #53
0
File: smk1.py Project: whaozl/ibeis
def match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid):
    """
    >>> from ibeis.model.hots.smk.smk import *  # NOQA
    >>> ibs, annots_df, taids, daids, qaids, nWords = testdata()
    >>> words = learn_visual_words(annots_df, taids, nWords)
    >>> invindex = index_data_annots(annots_df, daids, words)
    >>> qaid = qaids[0]
    >>> wx2_qfxs, wx2_qrvecs = compute_query_repr(annots_df, qaid, invindex)
    >>> daid2_totalscore = match_kernel(wx2_qrvecs, wx2_qfxs, invindex, qaid)
    """
    _daids = invindex.daids
    idx2_daid = invindex.idx2_daid
    wx2_drvecs = invindex.wx2_drvecs
    wx2_weight = invindex.wx2_weight
    daid2_gamma = invindex.daid2_gamma

    wx2_rvecs = wx2_qrvecs
    query_gamma = gamma_summation(wx2_rvecs, wx2_weight)

    # Accumulate scores over the entire database
    daid2_aggscore = pd.Series(np.zeros(len(_daids)),
                               index=_daids,
                               name='total_score')
    common_wxs = set(wx2_qrvecs.keys()).intersection(set(wx2_drvecs.keys()))

    daid2_wx2_scoremat = utool.ddict(lambda: utool.ddict(list))

    # for each word compute the pairwise scores between matches
    mark, end = utool.log_progress('query word: ',
                                   len(common_wxs),
                                   flushfreq=100)
    for count, wx in enumerate(common_wxs):
        if count % 100 == 0:
            mark(count)
        # Query and database vectors for wx-th word
        qrvecs = wx2_qrvecs[wx]
        drvecs = wx2_drvecs[wx]
        # Word Weight
        weight = wx2_weight[wx]
        # Compute score matrix
        qfx2_wscore = Match_N(qrvecs, drvecs)
        qfx2_wscore.groupby(idx2_daid)
        # Group scores by database annotation ids
        group = qfx2_wscore.groupby(idx2_daid, axis=1)
        for daid, scoremat in group:
            daid2_wx2_scoremat[daid][wx] = scoremat
        #qfx2_wscore = pd.DataFrame(qfx2_wscore_, index=qfxs, columns=_idxs)
        daid2_wscore = weight * qfx2_wscore.sum(
            axis=0).groupby(idx2_daid).sum()
        daid2_aggscore = daid2_aggscore.add(daid2_wscore, fill_value=0)
    daid2_totalscore = daid2_aggscore * daid2_gamma * query_gamma
    end()

    daid_fm = {}
    daid_fs = {}
    daid_fk = {}
    mark, end = utool.log_progress('accumulating match info: ',
                                   len(daid2_wx2_scoremat),
                                   flushfreq=100)
    for count, item in enumerate(daid2_wx2_scoremat.items()):
        daid, wx2_scoremat = item
        if count % 25 == 0:
            mark(count)
        fm_accum = []
        fs_accum = []
        fk_accum = []
        for wx, scoremat in wx2_scoremat.iteritems():
            qfxs = scoremat.index
            dfxs = invindex.idx2_dfx[scoremat.columns]
            fm_ = np.vstack(np.dstack(np.meshgrid(qfxs, dfxs, indexing='ij')))
            fs_ = scoremat.values.flatten()
            lower_thresh = 0.01
            valid = [fs_ > lower_thresh]
            fm = fm_[valid]
            fs = fs_[valid]
            fk = np.ones(len(fm), dtype=np.int32)
            fm_accum.append(fm)
            fs_accum.append(fs)
            fk_accum.append(fk)
        daid_fm[daid] = np.vstack(fm_accum)
        daid_fs[daid] = np.hstack(fs_accum).T
        daid_fk[daid] = np.hstack(fk_accum).T
    chipmatch = (
        daid_fm,
        daid_fs,
        daid_fk,
    )

    daid2_totalscore.sort(axis=1, ascending=False)
    return daid2_totalscore, chipmatch
Example #54
0
def init_tablecache():
    #return utool.ddict(ColumnsCache)
    return utool.ddict(lambda: utool.ddict(dict))
Example #55
0
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight,
                        alpha=3, thresh=0):
    """
    Internals step4

    Computes gamma normalization scalar for the database annotations
    >>> from ibeis.model.hots.smk.smk_index import *  # NOQA
    >>> from ibeis.model.hots.smk import smk_debug
    >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2()
    >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha
    >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh
    >>> idx2_daid  = invindex.idx2_daid
    >>> wx2_weight = wx2_idf
    >>> daids      = invindex.daids
    >>> use_cache  = USE_CACHE_GAMMA and False
    >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, daids, use_cache=use_cache)
    """
    # Gropuing by aid and words
    wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs))
    if utool.VERBOSE:
        print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh))
        mark1, end1_ = utool.log_progress(
            '[smk_index] Gamma Group: ', len(wx_sublist), flushfreq=100, writefreq=50)
    rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist)
    aids_list  = pdh.ensure_values_subset(wx2_aids, wx_sublist)
    daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list))
    # Group by daids first and then by word index
    for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1):
        group_aids, groupxs = smk_speed.group_indicies(aids)
        rvecs_group = smk_speed.apply_grouping(rvecs, groupxs)  # 2.9 ms
        for aid, rvecs_ in zip(group_aids, rvecs_group):
            daid2_wx2_drvecs[aid][wx] = rvecs_

    if utool.VERBOSE:
        end1_()

    # For every daid, compute its gamma using pregrouped rvecs
    # Summation over words for each aid
    if utool.VERBOSE:
        mark2, end2_ = utool.log_progress(
            '[smk_index] Gamma Sum: ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25)

    aid_list          = list(daid2_wx2_drvecs.keys())
    wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
    aidwxs_list    = [list(wx2_aidrvecs.keys()) for wx2_aidrvecs in wx2_aidrvecs_list]
    aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in wx2_aidrvecs_list]
    aidweight_list = [[wx2_weight[wx] for wx in aidwxs] for aidwxs in aidwxs_list]

    #gamma_list = []
    #for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list):
    #    assert len(weight_list) == len(rvecs_list), 'one list for each word'
    #    gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)  # 66.8 %
    #    #weight_list = np.ones(weight_list.size)
    #    gamma_list.append(gamma)
    gamma_list = [smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh)
                  for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list)]

    daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma')
    if utool.VERBOSE:
        end2_()
    return daid2_gamma
from ibeis.algo.hots import _pipeline_helpers as plh  # NOQA
from ibeis.algo.hots.neighbor_index import NeighborIndex, get_support_data
(print, rrr, profile) = ut.inject2(__name__)

USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs')
NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE

# LRU cache for nn_indexers. Ensures that only a few are ever in memory
#MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2)
MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize',
                                        type_=int,
                                        default=1)
# Background process for building indexes
CURRENT_THREAD = None
# Global map to keep track of UUID lists with prebuild indexers.
UUID_MAP = ut.ddict(dict)
NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE)


class UUIDMapHyrbridCache(object):
    """
    Class that lets multiple ways of writing to the uuid_map
    be swapped in and out interchangably

    TODO: the global read / write should periodically sync itself to disk and it
    should be loaded from disk initially
    """
    def __init__(self):
        self.uuid_maps = ut.ddict(dict)
        #self.uuid_map_fpath = uuid_map_fpath
        #self.init(uuid_map_fpath, min_reindex_thresh)
 def __init__(self):
     self.uuid_maps = ut.ddict(dict)
Example #58
0
 def __init__(self):
     self.uuid_maps = ut.ddict(dict)