Exemple #1
0
def get_namescore_nonvoting_feature_flags(fm_list, fs_list, dnid_list, name_groupxs, kpts1=None):
    r"""
    fm_list = [fm[:min(len(fm), 10)] for fm in fm_list]
    fs_list = [fs[:min(len(fs), 10)] for fs in fs_list]
    """
    fx1_list = [fm.T[0] for fm in fm_list]
    # Group annotation matches by name
    name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs)
    name_grouped_fs_list  = vt.apply_grouping_(fs_list,  name_groupxs)
    # Stack up all matches to a particular name, keep track of original indicies via offets
    name_invertable_flat_fx1_list = list(map(ut.invertible_flatten2_numpy, name_grouped_fx1_list))
    name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list, 0)
    name_grouped_invertable_cumsum_list = ut.get_list_column(name_invertable_flat_fx1_list, 1)
    name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list))
    if kpts1 is not None:
        xys1_ = vt.get_xys(kpts1).T
        kpts_xyid_list = vt.compute_unique_data_ids(xys1_)
        # Make nested group for every name by query feature index (accounting for duplicate orientation)
        name_grouped_xyid_flat = list(kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat)
        xyid_groupxs_list = list(vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_xyid_flat)
        name_group_fx1_groupxs_list = xyid_groupxs_list
    else:
        # Make nested group for every name by query feature index
        fx1_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat]
        name_group_fx1_groupxs_list = fx1_groupxs_list
    name_grouped_fid_grouped_fs_list = [
        vt.apply_grouping(fs_flat, fid_groupxs)
        for fs_flat, fid_groupxs in zip(name_grouped_fs_flat, name_group_fx1_groupxs_list)
    ]

    # Flag which features are valid in this grouped space. Only one keypoint should be able to vote
    # for each group
    name_grouped_fid_grouped_isvalid_list = [
        np.array([fs_group.max() == fs_group for fs_group in fid_grouped_fs_list])
        for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list
    ]

    # Go back to being grouped only in name space
    #dtype = np.bool
    name_grouped_isvalid_flat_list = [
        vt.invert_apply_grouping2(fid_grouped_isvalid_list, fid_groupxs, dtype=np.bool)
        for fid_grouped_isvalid_list, fid_groupxs in zip(name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list)
    ]

    name_grouped_isvalid_unflat_list = [
        ut.unflatten2(isvalid_flat, invertable_cumsum_list)
        for isvalid_flat, invertable_cumsum_list in zip(name_grouped_isvalid_flat_list, name_grouped_invertable_cumsum_list)
    ]

    # Reports which features were valid in name scoring for every annotation
    featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list, name_groupxs)
    return featflag_list
    def _make_anygroup_hashes(annots, nids):
        """helper function

        import wbia
        qreq_ = wbia.testdata_qreq_(
            defaultdb='PZ_MTEST',
            qaid_override=[1, 2, 3, 4, 5, 6, 10, 11],
            daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24],
            )

        import wbia
        qreq_ = wbia.testdata_qreq_(defaultdb='PZ_Master1')
        %timeit qreq_._make_namegroup_data_hashes()
        %timeit qreq_._make_namegroup_data_uuids()

        """
        # make sure items are sorted to ensure same assignment
        # gives same uuids
        # annots = qreq_.ibs.annots(sorted(qreq_.daids))
        unique_nids, groupxs = vt.group_indices(nids)
        grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs)
        group_hashes = [
            ut.combine_hashes(sorted(u.bytes for u in uuids), hasher=hashlib.sha1())
            for uuids in grouped_visual_uuids
        ]
        nid_to_grouphash = dict(zip(unique_nids, group_hashes))
        return nid_to_grouphash
Exemple #3
0
    def get_patches(invassign, wx):
        ax_list = invassign.wx2_axs[wx]
        fx_list = invassign.wx2_fxs[wx]
        config = invassign.fstack.config
        ibs = invassign.fstack.ibs

        unique_axs, groupxs = vt.group_indices(ax_list)
        fxs_groups = vt.apply_grouping(fx_list, groupxs)

        unique_aids = ut.take(invassign.fstack.ax2_aid, unique_axs)

        all_kpts_list = ibs.depc.d.get_feat_kpts(unique_aids, config=config)
        sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0)

        chip_list = ibs.depc_annot.d.get_chips_img(unique_aids)
        # convert to approprate colorspace
        #if colorspace is not None:
        #    chip_list = vt.convert_image_list_colorspace(chip_list, colorspace)
        # ut.print_object_size(chip_list, 'chip_list')
        patch_size = 64
        grouped_patches_list = [
            vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0]
            for chip, kpts in ut.ProgIter(zip(chip_list, sub_kpts_list),
                                          nTotal=len(unique_aids),
                                          lbl='warping patches')
        ]
        # Make it correspond with original fx_list and ax_list
        word_patches = vt.invert_apply_grouping(grouped_patches_list, groupxs)
        return word_patches
Exemple #4
0
    def compute_agg_rvecs(invassign, wx):
        """
        Sums and normalizes all rvecs that belong to the same word and the same
        annotation id
        """
        rvecs_list, error_flags = invassign.compute_nonagg_rvecs(wx)
        ax_list = invassign.wx2_axs[wx]
        maw_list = invassign.wx2_maws[wx]
        # group members of each word by aid, we will collapse these groups
        unique_ax, groupxs = vt.group_indices(ax_list)
        # (weighted aggregation with multi-assign-weights)
        grouped_maws = vt.apply_grouping(maw_list, groupxs)
        grouped_rvecs = vt.apply_grouping(rvecs_list, groupxs)
        grouped_flags = vt.apply_grouping(~error_flags, groupxs)

        grouped_rvecs2_ = vt.zipcompress(grouped_rvecs, grouped_flags, axis=0)
        grouped_maws2_ = vt.zipcompress(grouped_maws, grouped_flags)
        is_good = [len(rvecs) > 0 for rvecs in grouped_rvecs2_]
        aggvecs = [
            aggregate_rvecs(rvecs, maws)[0]
            for rvecs, maws in zip(grouped_rvecs2_, grouped_maws2_)
        ]
        unique_ax2_ = unique_ax.compress(is_good)
        ax2_aggvec = dict(zip(unique_ax2_, aggvecs))
        # Need to recompute flags for consistency
        # flag is true when aggvec is all zeros
        return ax2_aggvec
Exemple #5
0
    def done_part(cand, num_neighbs):
        # Find the first `num_neighbs` complete columns in each row
        rowxs, colxs = np.where(cand.validflags)
        unique_rows, groupxs = vt.group_indices(rowxs, assume_sorted=True)
        first_k_groupxs = [groupx[0:num_neighbs] for groupx in groupxs]
        if DEBUG_REQUERY:
            assert all(ut.issorted(groupx) for groupx in groupxs)
            assert all(
                [len(group) == num_neighbs for group in first_k_groupxs])
        chosen_xs = np.array(ut.flatten(first_k_groupxs), dtype=np.int)
        # chosen_xs = np.hstack(first_k_groupxs)
        # then convert these to multi-indices
        done_rows = rowxs.take(chosen_xs)
        done_cols = colxs.take(chosen_xs)
        multi_index = (done_rows, done_cols)
        # done_shape = (cand.validflags.shape[0], num_neighbs)
        # flat_xs = np.ravel_multi_index(multi_index, done_shape)
        flat_xs = np.ravel_multi_index(multi_index, cand.idxs.shape)
        _shape = (-1, num_neighbs)
        idxs = cand.idxs.take(flat_xs).reshape(_shape)
        dists = cand.dists.take(flat_xs).reshape(_shape)

        trueks = colxs.take(chosen_xs).reshape(_shape)
        if DEBUG_REQUERY:
            # dists2 = dists.copy()
            for count, (row, cols) in enumerate(zip(unique_rows, groupxs)):
                pass
            assert np.all(np.diff(dists, axis=1) >= 0)
            valid = cand.validflags.take(flat_xs).reshape(_shape)
            assert np.all(valid)
        return idxs, dists, trueks
Exemple #6
0
def group_aids_by_featweight_species(ibs, aid_list, config2_=None):
    """ helper

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = None
        >>> aid_list = ibs.get_valid_aids()
        >>> grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(ibs, aid_list, config2_)
    """
    if config2_ is None:
        featweight_species = ibs.cfg.featweight_cfg.featweight_species
    else:
        featweight_species = config2_.get('featweight_species')
        assert featweight_species is not None
    if featweight_species == 'uselabel':
        # Use the labeled species for the detector
        species_list = ibs.get_annot_species_texts(aid_list)
    else:
        species_list = [featweight_species]
    aid_list = np.array(aid_list)
    species_list = np.array(species_list)
    species_rowid = np.array(ibs.get_species_rowids_from_text(species_list))
    unique_species_rowids, groupxs = vtool.group_indices(species_rowid)
    grouped_aids    = vtool.apply_grouping(aid_list, groupxs)
    grouped_species = vtool.apply_grouping(species_list, groupxs)
    unique_species = ut.get_list_column(grouped_species, 0)
    return grouped_aids, unique_species, groupxs
Exemple #7
0
def group_aids_by_featweight_species(ibs, aid_list, config2_=None):
    """ helper

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.preproc.preproc_probchip import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> config2_ = None
        >>> aid_list = ibs.get_valid_aids()
        >>> grouped_aids, unique_species, groupxs = group_aids_by_featweight_species(ibs, aid_list, config2_)
    """
    if config2_ is None:
        featweight_species = ibs.cfg.featweight_cfg.featweight_species
    else:
        featweight_species = config2_.get('featweight_species')
        assert featweight_species is not None
    if featweight_species == 'uselabel':
        # Use the labeled species for the detector
        species_list = ibs.get_annot_species_texts(aid_list)
    else:
        species_list = [featweight_species]
    aid_list = np.array(aid_list)
    species_list = np.array(species_list)
    species_rowid = np.array(ibs.get_species_rowids_from_text(species_list))
    unique_species_rowids, groupxs = vtool.group_indices(species_rowid)
    grouped_aids = vtool.apply_grouping(aid_list, groupxs)
    grouped_species = vtool.apply_grouping(species_list, groupxs)
    unique_species = ut.get_list_column(grouped_species, 0)
    return grouped_aids, unique_species, groupxs
Exemple #8
0
def report_partitioning_statistics(new_reduced_joint):
    # compute partitioning statistics
    import vtool as vt
    vals, idxs = vt.group_indices(new_reduced_joint.values.ravel())
    #groupsize = list(map(len, idxs))
    #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs)
    all_states = new_reduced_joint._row_labels(asindex=True)
    clusterstats = [tuple(sorted(list(ut.dict_hist(a).values())))
                    for a in all_states]
    grouped_vals = ut.group_items(new_reduced_joint.values.ravel(),
                                  clusterstats)

    #probs_assigned_to_clustertype = [(
    #    sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a)
    #    for a, b in grouped_vals.items()]
    probs_assigned_to_clustertype = [(
        ut.dict_hist(np.array(b).round(decimals=5)), a)
        for a, b in grouped_vals.items()]
    sortx = ut.argsort([max(c[0].keys())
                        for c in probs_assigned_to_clustertype])
    probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx)

    # This list of 2-tuples with the first item being the unique
    # probabilies that are assigned to a cluster type along with the number
    # of times they were assigned. A cluster type is the second item. Every
    # number represents how many annotations were assigned to a specific
    # label. The length of that list is the number of total labels.  For
    # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]]
    # indicating that that the assignment of everyone to a different label happend once
    # where the probability was somenum and a 800 times where the probability was 0.

    #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items())
    #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()])
    print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
Exemple #9
0
    def get_patches(invassign, wx):
        ax_list = invassign.wx2_axs[wx]
        fx_list = invassign.wx2_fxs[wx]
        config = invassign.fstack.config
        ibs = invassign.fstack.ibs

        unique_axs, groupxs = vt.group_indices(ax_list)
        fxs_groups = vt.apply_grouping(fx_list, groupxs)

        unique_aids = ut.take(invassign.fstack.ax2_aid, unique_axs)

        all_kpts_list = ibs.depc.d.get_feat_kpts(unique_aids, config=config)
        sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0)

        chip_list = ibs.depc_annot.d.get_chips_img(unique_aids)
        # convert to approprate colorspace
        #if colorspace is not None:
        #    chip_list = vt.convert_image_list_colorspace(chip_list, colorspace)
        # ut.print_object_size(chip_list, 'chip_list')
        patch_size = 64
        grouped_patches_list = [
            vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0]
            for chip, kpts in ut.ProgIter(zip(chip_list, sub_kpts_list),
                                          nTotal=len(unique_aids),
                                          lbl='warping patches')
        ]
        # Make it correspond with original fx_list and ax_list
        word_patches = vt.invert_apply_grouping(grouped_patches_list, groupxs)
        return word_patches
Exemple #10
0
def report_partitioning_statistics(new_reduced_joint):
    # compute partitioning statistics
    import vtool as vt
    vals, idxs = vt.group_indices(new_reduced_joint.values.ravel())
    #groupsize = list(map(len, idxs))
    #groupassigns = ut.unflat_vecmap(new_reduced_joint.assignment, idxs)
    all_states = new_reduced_joint._row_labels(asindex=True)
    clusterstats = [tuple(sorted(list(ut.dict_hist(a).values())))
                    for a in all_states]
    grouped_vals = ut.group_items(new_reduced_joint.values.ravel(),
                                  clusterstats)

    #probs_assigned_to_clustertype = [(
    #    sorted(np.unique(np.array(b).round(decimals=5)).tolist())[::-1], a)
    #    for a, b in grouped_vals.items()]
    probs_assigned_to_clustertype = [(
        ut.dict_hist(np.array(b).round(decimals=5)), a)
        for a, b in grouped_vals.items()]
    sortx = ut.argsort([max(c[0].keys())
                        for c in probs_assigned_to_clustertype])
    probs_assigned_to_clustertype = ut.take(probs_assigned_to_clustertype, sortx)

    # This list of 2-tuples with the first item being the unique
    # probabilies that are assigned to a cluster type along with the number
    # of times they were assigned. A cluster type is the second item. Every
    # number represents how many annotations were assigned to a specific
    # label. The length of that list is the number of total labels.  For
    # all low scores you will see [[{somenum: 1}, {0: 800}], [1, 1, 1, ... 1]]
    # indicating that that the assignment of everyone to a different label happend once
    # where the probability was somenum and a 800 times where the probability was 0.

    #print(sorted([(b, a) for a, b in ut.map_dict_vals(sum, x)]).items())
    #z = sorted([(b, a) for a, b in ut.map_dict_vals(sum, grouped_vals).items()])
    print(ut.repr2(probs_assigned_to_clustertype, nl=2, precision=2, sorted_=True))
Exemple #11
0
def get_annotmatch_rowids_from_aid2(ibs, aid2_list, eager=True, nInput=None,
                                    force_method=None):
    """
    # This one is slow because aid2 is the second part of the index

    TODO autogenerate

    Returns a list of the aids that were reviewed as candidate matches to the input aid

    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid2 --show

    Example2:
        >>> # TIME TEST
        >>> # setup_pzmtest_subgraph()
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> aid2_list = ibs.get_valid_aids()
        >>> func_list = [
        >>>     partial(ibs.get_annotmatch_rowids_from_aid2, force_method=1),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid2, force_method=2),
        >>> ]
        >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500]
        >>> def args_list(count, aid2_list=aid2_list, num_list=num_list):
        >>>    return (aid2_list[0:num_list[count]],)
        >>> searchkw = dict(
        >>>     func_labels=['sql', 'numpy'],
        >>>     count_to_xtick=lambda count, args: len(args[0]),
        >>>     title='Timings of get_annotmatch_rowids_from_aid2',
        >>> )
        >>> niters = len(num_list)
        >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw)
        >>> time_result['plot_timings']()
        >>> ut.show_if_requested()
    """
    from ibeis.control import _autogen_annotmatch_funcs
    if force_method != 2 and (nInput < 128 or (force_method == 1)):
        colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,)
        # FIXME: col_rowid is not correct
        params_iter = zip(aid2_list)
        andwhere_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID2]
        annotmatch_rowid_list = ibs.db.get_where2(
            ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, andwhere_colnames,
            eager=eager, nInput=nInput, unpack_scalars=False)
    elif force_method == 2:
        import vtool as vt
        all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids())
        aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids))
        unique_aid2, groupxs2 = vt.group_indices(aids2)
        rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2)
        rowids2_ = [_.tolist() for _ in rowids2_]
        maping2 = ut.defaultdict(list, zip(unique_aid2, rowids2_))
        annotmatch_rowid_list = ut.dict_take(maping2, aid2_list)
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True):
    r"""
    Args:
        ibs (IBEISController):  wbia controller object
        aid1_list (list):
        aid2_list (list):
        directed (bool): (default = True)

    Returns:
        list: tags_list

    CommandLine:
        python -m wbia.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.tag_funcs import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb(defaultdb='testdb1')
        >>> has_any = ut.get_argval('--tags', type_=list, default=None)
        >>> min_num = ut.get_argval('--min_num', type_=int, default=1)
        >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1)
        >>> aid1_list = aid_pairs.T[0]
        >>> aid2_list = aid_pairs.T[1]
        >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False)
        >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        >>> print(ut.repr2(tagged_pairs))
        >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        >>> print(ut.repr2(tag_dict, nl=2))
        >>> print(ut.repr2(ut.map_dict_vals(len, tag_dict)))
    """
    aid_pairs = np.vstack([aid1_list, aid2_list]).T
    if directed:
        annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(
            aid_pairs.T[0], aid_pairs.T[1])
        tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid)
    else:
        annotmatch_rowid = ibs.get_annotmatch_rowid_from_undirected_superkey(
            aid_pairs.T[0], aid_pairs.T[1])
        tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid)
        if False:
            expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]])
            expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(
                expanded_aid_pairs.T[0], expanded_aid_pairs.T[1])
            expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs)
            unique_edgeids, groupxs = vt.group_indices(expanded_edgeids)
            expanded_tags_list = ibs.get_annotmatch_case_tags(
                expanded_annotmatch_rowid)
            grouped_tags = vt.apply_grouping(
                np.array(expanded_tags_list, dtype=object), groupxs)
            undirected_tags = [
                list(set(ut.flatten(tags))) for tags in grouped_tags
            ]
            edgeid2_tags = dict(zip(unique_edgeids, undirected_tags))
            input_edgeids = expanded_edgeids[:len(aid_pairs)]
            tags_list = ut.dict_take(edgeid2_tags, input_edgeids)
    return tags_list
Exemple #13
0
def get_name_shortlist_aids(
    daid_list,
    dnid_list,
    annot_score_list,
    name_score_list,
    nid2_nidx,
    nNameShortList,
    nAnnotPerName,
):
    r"""
    CommandLine:
        python -m wbia.algo.hots.scoring --test-get_name_shortlist_aids

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.hots.scoring import *  # NOQA
        >>> daid_list        = np.array([11, 12, 13, 14, 15, 16, 17])
        >>> dnid_list        = np.array([21, 21, 21, 22, 22, 23, 24])
        >>> annot_score_list = np.array([ 6,  2,  3,  5,  6,  3,  2])
        >>> name_score_list  = np.array([ 8,  9,  5,  4])
        >>> nid2_nidx        = {21:0, 22:1, 23:2, 24:3}
        >>> nNameShortList, nAnnotPerName = 3, 2
        >>> args = (daid_list, dnid_list, annot_score_list, name_score_list,
        ...         nid2_nidx, nNameShortList, nAnnotPerName)
        >>> top_daids = get_name_shortlist_aids(*args)
        >>> result = str(top_daids)
        >>> print(result)
        [15, 14, 11, 13, 16]
    """
    unique_nids, groupxs = vt.group_indices(np.array(dnid_list))
    grouped_annot_scores = vt.apply_grouping(annot_score_list, groupxs)
    grouped_daids = vt.apply_grouping(np.array(daid_list), groupxs)
    # Ensure name score list is aligned with the unique_nids
    aligned_name_score_list = name_score_list.take(
        ut.dict_take(nid2_nidx, unique_nids))
    # Sort each group by the name score
    group_sortx = aligned_name_score_list.argsort()[::-1]
    _top_daid_groups = ut.take(grouped_daids, group_sortx)
    _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx)
    top_daid_groups = ut.listclip(_top_daid_groups, nNameShortList)
    top_annot_score_groups = ut.listclip(_top_annot_score_groups,
                                         nNameShortList)
    # Sort within each group by the annotation score
    top_daid_sortx_groups = [
        annot_score_group.argsort()[::-1]
        for annot_score_group in top_annot_score_groups
    ]
    top_sorted_daid_groups = vt.ziptake(top_daid_groups, top_daid_sortx_groups)
    top_clipped_daids = [
        ut.listclip(sorted_daid_group, nAnnotPerName)
        for sorted_daid_group in top_sorted_daid_groups
    ]
    top_daids = ut.flatten(top_clipped_daids)
    return top_daids
Exemple #14
0
def general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config,
                                         cov_cfg):
    """
    DEPRICATE

    Yeilds:
        nid, weight_mask_m, weight_mask

    CommandLine:
        python -m wbia.algo.hots.scoring --test-general_name_coverage_mask_generator --show
        python -m wbia.algo.hots.scoring --test-general_name_coverage_mask_generator --show --qaid 18

    Note:
        Evaluate output one at a time or it will get clobbered

    Example0:
        >>> # SLOW_DOCTEST
        >>> # (IMPORTANT)
        >>> from wbia.algo.hots.scoring import *  # NOQA
        >>> qreq_, cm = plh.testdata_scoring('PZ_MTEST', qaid_list=[18])
        >>> config = qreq_.qparams
        >>> make_mask_func, cov_cfg = get_mask_func(config)
        >>> masks_iter = general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg)
        >>> dnid_list, score_list, masks_list = evaluate_masks_iter(masks_iter)
        >>> ut.quit_if_noshow()
        >>> nidx = np.where(dnid_list == cm.qnid)[0][0]
        >>> daids = cm.get_groundtruth_daids()
        >>> dnid, weight_mask_m, weight_mask = masks_list[nidx]
        >>> show_single_coverage_mask(qreq_, cm, weight_mask_m, weight_mask, daids)
        >>> ut.show_if_requested()
    """
    import vtool as vt

    if ut.VERYVERBOSE:
        logger.info('[ncov] make_mask_func = %r' % (make_mask_func, ))
        logger.info('[ncov] cov_cfg = %s' % (ut.repr2(cov_cfg), ))
    assert cm.dnid_list is not None, 'eval nids'
    unique_dnids, groupxs = vt.group_indices(cm.dnid_list)
    fm_groups = vt.apply_grouping_(cm.fm_list, groupxs)
    fs_groups = vt.apply_grouping_(cm.fs_list, groupxs)
    fs_name_list = [np.hstack(fs_group) for fs_group in fs_groups]
    fm_name_list = [np.vstack(fm_group) for fm_group in fm_groups]
    return general_coverage_mask_generator(
        make_mask_func,
        qreq_,
        cm.qaid,
        unique_dnids,
        fm_name_list,
        fs_name_list,
        config,
        cov_cfg,
    )
Exemple #15
0
def group_images_by_label(label_arr, gid_arr):
    """
    Input: Length N list of labels and ids
    Output: Length M list of unique labels, and lenth M list of lists of ids
    """
    # Reverse the image to cluster index mapping
    import vtool as vt
    labels_, groupxs_ = vt.group_indices(label_arr)
    sortx = np.array(list(map(len, groupxs_))).argsort()[::-1]
    labels  = labels_.take(sortx, axis=0)
    groupxs = ut.take(groupxs_, sortx)
    label_gids = vt.apply_grouping(gid_arr, groupxs)
    return labels, label_gids
def group_images_by_label(label_arr, gid_arr):
    """
    Input: Length N list of labels and ids
    Output: Length M list of unique labels, and lenth M list of lists of ids
    """
    # Reverse the image to cluster index mapping
    import vtool as vt
    labels_, groupxs_ = vt.group_indices(label_arr)
    sortx = np.array(list(map(len, groupxs_))).argsort()[::-1]
    labels = labels_.take(sortx, axis=0)
    groupxs = ut.take(groupxs_, sortx)
    label_gids = vt.apply_grouping(gid_arr, groupxs)
    return labels, label_gids
Exemple #17
0
def get_aidpair_tags(ibs, aid1_list, aid2_list, directed=True):
    r"""
    Args:
        ibs (IBEISController):  ibeis controller object
        aid1_list (list):
        aid2_list (list):
        directed (bool): (default = True)

    Returns:
        list: tags_list

    CommandLine:
        python -m ibeis.tag_funcs --exec-get_aidpair_tags --db PZ_Master1 --tags Hard interesting

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.tag_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='testdb1')
        >>> has_any = ut.get_argval('--tags', type_=list, default=None)
        >>> min_num = ut.get_argval('--min_num', type_=int, default=1)
        >>> aid_pairs = filter_aidpairs_by_tags(ibs, has_any=has_any, min_num=1)
        >>> aid1_list = aid_pairs.T[0]
        >>> aid2_list = aid_pairs.T[1]
        >>> undirected_tags = get_aidpair_tags(ibs, aid1_list, aid2_list, directed=False)
        >>> tagged_pairs = list(zip(aid_pairs.tolist(), undirected_tags))
        >>> print(ut.list_str(tagged_pairs))
        >>> tag_dict = ut.groupby_tags(tagged_pairs, undirected_tags)
        >>> print(ut.dict_str(tag_dict, nl=2))
        >>> print(ut.dict_str(ut.map_dict_vals(len, tag_dict)))
    """
    aid_pairs = np.vstack([aid1_list, aid2_list]).T
    if directed:
        annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(aid_pairs.T[0], aid_pairs.T[1])
        tags_list = ibs.get_annotmatch_case_tags(annotmatch_rowid)
    else:
        expanded_aid_pairs = np.vstack([aid_pairs, aid_pairs[:, ::-1]])
        expanded_annotmatch_rowid = ibs.get_annotmatch_rowid_from_superkey(
            expanded_aid_pairs.T[0], expanded_aid_pairs.T[1])
        expanded_edgeids = vt.get_undirected_edge_ids(expanded_aid_pairs)
        unique_edgeids, groupxs = vt.group_indices(expanded_edgeids)
        expanded_tags_list = ibs.get_annotmatch_case_tags(expanded_annotmatch_rowid)
        grouped_tags = vt.apply_grouping(np.array(expanded_tags_list, dtype=object), groupxs)
        undirected_tags = [list(set(ut.flatten(tags))) for tags in grouped_tags]
        edgeid2_tags = dict(zip(unique_edgeids, undirected_tags))
        input_edgeids = expanded_edgeids[:len(aid_pairs)]
        tags_list = ut.dict_take(edgeid2_tags, input_edgeids)
    return tags_list
Exemple #18
0
def group_correspondences(all_matches, all_scores, all_daids, daid2_sccw):
    daid_keys, groupxs = vt.group_indices(all_daids)
    fs_list = vt.apply_grouping(all_scores, groupxs)
    fm_list = vt.apply_grouping(all_matches, groupxs)
    daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)}
    daid2_fs = {
        daid: fs * daid2_sccw[daid]
        for daid, fs in zip(daid_keys, fs_list)
    }
    # FIXME: generalize to when nAssign > 1
    daid2_fk = {
        daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE)
        for daid, fs in zip(daid_keys, fs_list)
    }
    daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk)
    return daid2_chipmatch
Exemple #19
0
def get_name_shortlist_aids(daid_list, dnid_list, annot_score_list,
                            name_score_list, nid2_nidx,
                            nNameShortList, nAnnotPerName):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.scoring --test-get_name_shortlist_aids

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.scoring import *  # NOQA
        >>> # build test data
        >>> daid_list        = np.array([11, 12, 13, 14, 15, 16, 17])
        >>> dnid_list        = np.array([21, 21, 21, 22, 22, 23, 24])
        >>> annot_score_list = np.array([ 6,  2,  3,  5,  6,  3,  2])
        >>> name_score_list  = np.array([ 8,  9,  5,  4])
        >>> nid2_nidx        = {21:0, 22:1, 23:2, 24:3}
        >>> nNameShortList, nAnnotPerName = 3, 2
        >>> # execute function
        >>> args = (daid_list, dnid_list, annot_score_list, name_score_list,
        ...         nid2_nidx, nNameShortList, nAnnotPerName)
        >>> top_daids = get_name_shortlist_aids(*args)
        >>> # verify results
        >>> result = str(top_daids)
        >>> print(result)
        [15, 14, 11, 13, 16]
    """
    unique_nids, groupxs    = vt.group_indices(np.array(dnid_list))
    grouped_annot_scores    = vt.apply_grouping(annot_score_list, groupxs)
    grouped_daids           = vt.apply_grouping(np.array(daid_list), groupxs)
    # Ensure name score list is aligned with the unique_nids
    aligned_name_score_list = name_score_list.take(ut.dict_take(nid2_nidx, unique_nids))
    # Sort each group by the name score
    group_sortx             = aligned_name_score_list.argsort()[::-1]
    _top_daid_groups        = ut.take(grouped_daids, group_sortx)
    _top_annot_score_groups = ut.take(grouped_annot_scores, group_sortx)
    top_daid_groups         = ut.listclip(_top_daid_groups, nNameShortList)
    top_annot_score_groups  = ut.listclip(_top_annot_score_groups, nNameShortList)
    # Sort within each group by the annotation score
    top_daid_sortx_groups   = [annot_score_group.argsort()[::-1]
                               for annot_score_group in top_annot_score_groups]
    top_sorted_daid_groups  = vt.ziptake(top_daid_groups, top_daid_sortx_groups)
    top_clipped_daids = [ut.listclip(sorted_daid_group, nAnnotPerName)
                         for sorted_daid_group in top_sorted_daid_groups]
    top_daids = ut.flatten(top_clipped_daids)
    return top_daids
Exemple #20
0
    def consolidate(self, inplace=False):
        """ removes duplicate entries

        Example:
            >>> # UNSTABLE_DOCTEST
            >>> from ibeis.algo.hots.pgm_ext import *  # NOQA
            >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]]
            >>> weights = [.1, .2, .1]
            >>> variables = ['v1', 'v2', 'v3']
            >>> self = ApproximateFactor(state_idxs, weights, variables)
            >>> inplace = False
            >>> phi = self.consolidate(inplace)
            >>> result = str(phi)
            >>> print(result)
            +------+------+------+-----------------------+
            | v1   | v2   | v3   |   \hat{phi}(v1,v2,v3) |
            |------+------+------+-----------------------|
            | v1_1 | v2_0 | v3_1 |                0.3000 |
            | v1_1 | v2_0 | v3_2 |                0.1000 |
            +------+------+------+-----------------------+
        """
        import vtool as vt

        phi = self.copy() if inplace else self

        data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs)
        unique_ids, groupxs = vt.group_indices(data_ids)
        #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs)))))
        if len(data_ids) != len(unique_ids):
            # Sum the values in the cpd to marginalize the duplicate probs
            # Take only the unique rows under this induced labeling
            unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs])
            self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0)
            self.weights = np.array([
                g.sum() for g in vt.apply_grouping(self.weights, groupxs)
            ])
            #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),))
        #else:
        #    print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),))

        if not inplace:
            return phi
Exemple #21
0
    def consolidate(self, inplace=False):
        """ removes duplicate entries

        Example:
            >>> # UNSTABLE_DOCTEST
            >>> from ibeis.algo.hots.pgm_ext import *  # NOQA
            >>> state_idxs = [[1, 0, 1], [1, 0, 1], [1, 0, 2]]
            >>> weights = [.1, .2, .1]
            >>> variables = ['v1', 'v2', 'v3']
            >>> self = ApproximateFactor(state_idxs, weights, variables)
            >>> inplace = False
            >>> phi = self.consolidate(inplace)
            >>> result = str(phi)
            >>> print(result)
            +------+------+------+-----------------------+
            | v1   | v2   | v3   |   \hat{phi}(v1,v2,v3) |
            |------+------+------+-----------------------|
            | v1_1 | v2_0 | v3_1 |                0.3000 |
            | v1_1 | v2_0 | v3_2 |                0.1000 |
            +------+------+------+-----------------------+
        """
        import vtool as vt

        phi = self.copy() if inplace else self
        #data_ids = vt.compute_ndarray_unique_rowids_unsafe(self.state_idxs)
        data_ids = self._compute_unique_state_ids()
        unique_ids, groupxs = vt.group_indices(data_ids)
        #assert len(unique_ids) == len(np.unique(vt.compute_unique_data_ids_(list(map(tuple, phi.state_idxs)))))
        if len(data_ids) != len(unique_ids):
            # Sum the values in the cpd to marginalize the duplicate probs
            # Take only the unique rows under this induced labeling
            unique_tmp_groupxs = np.array([gxs[0] for gxs in groupxs])
            self.state_idxs = self.state_idxs.take(unique_tmp_groupxs, axis=0)
            self.weights = np.array([
                g.sum() for g in vt.apply_grouping(self.weights, groupxs)
            ])
            #print('[pgm] Consolidated %r states into %r states' % (len(data_ids), len(unique_ids),))
        #else:
        #    print('[pgm] Cannot consolidated %r unique states' % (len(data_ids),))
        if not inplace:
            return phi
Exemple #22
0
def general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg):
    """
    Yeilds:
        nid, weight_mask_m, weight_mask

    CommandLine:
        python -m ibeis.algo.hots.scoring --test-general_name_coverage_mask_generator --show
        python -m ibeis.algo.hots.scoring --test-general_name_coverage_mask_generator --show --qaid 18

    Note:
        Evaluate output one at a time or it will get clobbered

    Example0:
        >>> # SLOW_DOCTEST
        >>> # (IMPORTANT)
        >>> from ibeis.algo.hots.scoring import *  # NOQA
        >>> qreq_, cm = plh.testdata_scoring('PZ_MTEST', qaid_list=[18])
        >>> config = qreq_.qparams
        >>> make_mask_func, cov_cfg = get_mask_func(config)
        >>> masks_iter = general_name_coverage_mask_generator(make_mask_func, qreq_, cm, config, cov_cfg)
        >>> dnid_list, score_list, masks_list = evaluate_masks_iter(masks_iter)
        >>> ut.quit_if_noshow()
        >>> nidx = np.where(dnid_list == cm.qnid)[0][0]
        >>> daids = cm.get_groundtruth_daids()
        >>> dnid, weight_mask_m, weight_mask = masks_list[nidx]
        >>> show_single_coverage_mask(qreq_, cm, weight_mask_m, weight_mask, daids)
        >>> ut.show_if_requested()
    """
    if ut.VERYVERBOSE:
        print('[ncov] make_mask_func = %r' % (make_mask_func,))
        print('[ncov] cov_cfg = %s' % (ut.dict_str(cov_cfg),))
    assert cm.dnid_list is not None, 'eval nids'
    unique_dnids, groupxs = vt.group_indices(cm.dnid_list)
    fm_groups = vt.apply_grouping_(cm.fm_list, groupxs)
    fs_groups = vt.apply_grouping_(cm.fs_list, groupxs)
    fs_name_list = [np.hstack(fs_group) for fs_group in fs_groups]
    fm_name_list = [np.vstack(fm_group) for fm_group in fm_groups]
    return general_coverage_mask_generator(make_mask_func, qreq_, cm.qaid, unique_dnids, fm_name_list, fs_name_list, config, cov_cfg)
Exemple #23
0
    def compute_agg_rvecs(invassign, wx):
        """
        Sums and normalizes all rvecs that belong to the same word and the same
        annotation id
        """
        rvecs_list, error_flags = invassign.compute_nonagg_rvecs(wx)
        ax_list = invassign.wx2_axs[wx]
        maw_list = invassign.wx2_maws[wx]
        # group members of each word by aid, we will collapse these groups
        unique_ax, groupxs = vt.group_indices(ax_list)
        # (weighted aggregation with multi-assign-weights)
        grouped_maws = vt.apply_grouping(maw_list, groupxs)
        grouped_rvecs = vt.apply_grouping(rvecs_list, groupxs)
        grouped_flags = vt.apply_grouping(~error_flags, groupxs)

        grouped_rvecs2_ = vt.zipcompress(grouped_rvecs, grouped_flags, axis=0)
        grouped_maws2_ = vt.zipcompress(grouped_maws, grouped_flags)
        is_good = [len(rvecs) > 0 for rvecs in grouped_rvecs2_]
        aggvecs = [aggregate_rvecs(rvecs, maws)[0] for rvecs, maws in zip(grouped_rvecs2_, grouped_maws2_)]
        unique_ax2_ = unique_ax.compress(is_good)
        ax2_aggvec = dict(zip(unique_ax2_, aggvecs))
        # Need to recompute flags for consistency
        # flag is true when aggvec is all zeros
        return ax2_aggvec
Exemple #24
0
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs,
                    reduced_values):
    import vtool as vt
    #assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten())
    reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables]

    evidence_vars = list(evidence.keys())
    evidence_state_idxs = ut.dict_take(evidence, evidence_vars)
    evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars]

    ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes)))
    ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes)))
    # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes)
    # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes)

    # Allow specific types of labels to change
    # everything is the same, only the names have changed.
    # TODO: allow for multiple different label_ttypes
    # for label_ttype in label_ttypes
    if 'name' not in model.ttype2_template:
        return reduced_row_idxs, reduced_values
    label_ttypes = ['name']
    for label_ttype in label_ttypes:
        ev_colxs = ttype2_ev_indices[label_ttype]
        re_colxs = ttype2_re_indices[label_ttype]

        ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs)
        ev_state_idxs_tile = np.tile(ev_state_idxs, (len(reduced_values), 1)).astype(np.int)
        num_ev_ = len(ev_colxs)

        aug_colxs = list(range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist()
        aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs])

        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.

        num_cols = len(aug_state_idxs.T)
        mask = vt.index_to_boolmask(aug_colxs, num_cols)
        other_colxs, = np.where(~mask)
        relbl_states = aug_state_idxs.compress(mask, axis=1)
        other_states = aug_state_idxs.compress(~mask, axis=1)
        tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states)))

        max_tmp_state = -1
        min_tmp_state = tmp_relbl_states.min()

        # rebuild original state structure with temp state idxs
        tmp_state_cols = [None] * num_cols
        for count, colx in enumerate(aug_colxs):
            tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1]
        for count, colx in enumerate(other_colxs):
            tmp_state_cols[colx] = other_states[:, count:count + 1]
        tmp_state_idxs = np.hstack(tmp_state_cols)

        data_ids = np.array(
            vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs))))
        unique_ids, groupxs = vt.group_indices(data_ids)
        print('Collapsed %r states into %r states' % (
            len(data_ids), len(unique_ids),))
        # Sum the values in the cpd to marginalize the duplicate probs
        new_values = np.array([
            g.sum() for g in vt.apply_grouping(reduced_values, groupxs)
        ])
        # Take only the unique rows under this induced labeling
        unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0))
        new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0)

        tmp_idx_set = set((-np.arange(-max_tmp_state,
                                      (-min_tmp_state) + 1)).tolist())
        true_idx_set = set(range(len(model.ttype2_template[label_ttype].basis)))

        # Relabel the rows one more time to agree with initial constraints
        for colx, true_idx in enumerate(ev_state_idxs):
            tmp_idx = np.unique(new_aug_state_idxs.T[colx])
            assert len(tmp_idx) == 1
            tmp_idx_set -= {tmp_idx[0]}
            true_idx_set -= {true_idx}
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx
        # Relabel the remaining idxs
        remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1]
        remain_true_idxs = sorted(list(true_idx_set))
        for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs):
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx

        # Remove evidence based augmented labels
        new_state_idxs = new_aug_state_idxs.T[num_ev_:].T
        return new_state_idxs, new_values
Exemple #25
0
def cluster_timespace_sec(posixtimes,
                          latlons,
                          thresh_sec=5,
                          km_per_sec=KM_PER_SEC):
    """
    Args:
        X_data (ndarray) : Nx3 array where columns are (seconds, lat, lon)
        thresh_sec (float) : threshold in seconds

    Doctest:
        >>> from wbia.algo.preproc.occurrence_blackbox import *  # NOQA
        >>> # Nx1 matrix denoting groundtruth locations (for testing)
        >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2])
        >>> # Nx3 matrix where each columns are (time, lat, lon)
        >>> X_data = np.array([
        >>>     (0, 42.727985, -73.683994),  # MRC
        >>>     (0, 42.657414, -73.774448),  # Park1
        >>>     (0, 42.658333, -73.770993),  # Park2
        >>>     (0, 42.654384, -73.768919),  # Park3
        >>>     (0, 42.655039, -73.769048),  # Park4
        >>>     (0, 42.657872, -73.764148),  # Park5
        >>>     (0, 42.876974, -73.819311),  # CP1
        >>>     (0, 42.862946, -73.804977),  # CP2
        >>>     (0, 42.849809, -73.758486),  # CP3
        >>> ])
        >>> posixtimes = X_data.T[0]
        >>> latlons = X_data.T[1:3].T
        >>> thresh_sec = 250  # seconds
        >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec)
        >>> result = ('X_labels = %r' % (X_labels,))
        >>> print(result)
        X_labels = array([6, 4, 4, 4, 4, 5, 1, 2, 3])

    Doctest:
        >>> from wbia.algo.preproc.occurrence_blackbox import *  # NOQA
        >>> # Nx1 matrix denoting groundtruth locations (for testing)
        >>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2])
        >>> # Nx3 matrix where each columns are (time, lat, lon)
        >>> X_data = np.array([
        >>>     (np.nan, 42.657414, -73.774448),  # Park1
        >>>     (0, 42.658333, -73.770993),  # Park2
        >>>     (np.nan, np.nan, np.nan),  # Park3
        >>>     (np.nan, np.nan, np.nan),  # Park3.5
        >>>     (0, 42.655039, -73.769048),  # Park4
        >>>     (0, 42.657872, -73.764148),  # Park5
        >>> ])
        >>> posixtimes = X_data.T[0]
        >>> latlons = X_data.T[1:3].T
        >>> thresh_sec = 250  # seconds
        >>> km_per_sec = KM_PER_SEC
        >>> X_labels = cluster_timespace_sec(posixtimes, latlons, thresh_sec)
        >>> result = 'X_labels = {}'.format(ut.repr2(X_labels))
        >>> print(result)
        X_labels = np.array([3, 4, 1, 2, 4, 5])
    """
    X_data, dist_func, columns = prepare_data(posixtimes, latlons, km_per_sec,
                                              'seconds')
    if X_data is None:
        return None

    # Cluster nan distributions differently
    X_bools = ~np.isnan(X_data)
    group_id = (X_bools * np.power(2, [2, 1, 0])).sum(axis=1)
    import vtool as vt

    unique_ids, groupxs = vt.group_indices(group_id)
    grouped_labels = []
    for xs in groupxs:
        X_part = X_data.take(xs, axis=0)
        labels = _cluster_part(X_part, dist_func, columns, thresh_sec,
                               km_per_sec)
        grouped_labels.append((labels, xs))
    # Undo grouping and rectify overlaps
    X_labels = _recombine_labels(grouped_labels)
    # Do clustering
    return X_labels
def convert_category_to_siam_data(category_data, category_labels):
    # CONVERT CATEGORY LABELS TO PAIR LABELS
    # Make genuine imposter pairs
    import vtool as vt
    unique_labels, groupxs_list = vt.group_indices(category_labels)

    num_categories = len(unique_labels)

    num_geninue = 10000 * num_categories
    num_imposter = 10000 * num_categories

    num_gen_per_category = int(num_geninue / len(unique_labels))
    num_imp_per_category = int(num_imposter / len(unique_labels))

    np.random.seed(0)
    groupxs = groupxs_list[0]

    def find_fix_flags(pairxs):
        is_dup = vt.nonunique_row_flags(pairxs)
        is_eye = pairxs.T[0] == pairxs.T[1]
        needs_fix = np.logical_or(is_dup, is_eye)
        #print(pairxs[needs_fix])
        return needs_fix

    def swap_undirected(pairxs):
        """ ensure left indicies are lower """
        needs_swap = pairxs.T[0] > pairxs.T[1]
        arr = pairxs[needs_swap]
        tmp = arr.T[0].copy()
        arr.T[0, :] = arr.T[1]
        arr.T[1, :] = tmp
        pairxs[needs_swap] = arr
        return pairxs

    def sample_pairs(left_list, right_list, size):
        # Sample initial random left and right indices
        _index1 = np.random.choice(left_list, size=size, replace=True)
        _index2 = np.random.choice(right_list, size=size, replace=True)
        # stack
        _pairxs = np.vstack((_index1, _index2)).T
        # make undiractional
        _pairxs = swap_undirected(_pairxs)
        # iterate until feasible
        needs_fix = find_fix_flags(_pairxs)
        while np.any(needs_fix):
            num_fix = needs_fix.sum()
            print('fixing: %d' % num_fix)
            _pairxs.T[1][needs_fix] = np.random.choice(right_list,
                                                       size=num_fix,
                                                       replace=True)
            _pairxs = swap_undirected(_pairxs)
            needs_fix = find_fix_flags(_pairxs)
        return _pairxs

    print('sampling genuine pairs')
    genuine_pairx_list = []
    for groupxs in groupxs_list:
        left_list = groupxs
        right_list = groupxs
        size = num_gen_per_category
        _pairxs = sample_pairs(left_list, right_list, size)
        genuine_pairx_list.extend(_pairxs.tolist())

    print('sampling imposter pairs')
    imposter_pairx_list = []
    for index in range(len(groupxs_list)):
        # Pick random pairs of false matches
        groupxs = groupxs_list[index]
        bar_groupxs = np.hstack(groupxs_list[:index] +
                                groupxs_list[index + 1:])
        left_list = groupxs
        right_list = bar_groupxs
        size = num_imp_per_category
        _pairxs = sample_pairs(left_list, right_list, size)
        imposter_pairx_list.extend(_pairxs.tolist())

    # We might have added duplicate imposters, just remove them for now
    imposter_pairx_list = ut.take(
        imposter_pairx_list,
        vt.unique_row_indexes(np.array(imposter_pairx_list)))

    # structure data for output
    flat_data_pairxs = np.array(genuine_pairx_list + imposter_pairx_list)
    assert np.all(flat_data_pairxs.T[0] < flat_data_pairxs.T[1])
    assert find_fix_flags(flat_data_pairxs).sum() == 0
    # TODO: batch should use indicies into data
    flat_index_list = np.array(
        ut.flatten(list(zip(flat_data_pairxs.T[0], flat_data_pairxs.T[1]))))
    data = np.array(category_data.take(flat_index_list, axis=0))
    labels = np.array([True] * len(genuine_pairx_list) +
                      [False] * len(imposter_pairx_list))
    return data, labels
Exemple #27
0
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs,
                           scores_list, daids_list, query_sccw):
    """
    Builds explicit chipmatches that the rest of the pipeline plays nice with

    Notation:
        An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches,
        feature_scores, and feature_ranks.

        Let N be the number of matches

        A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first
        column corresponds to query_feature_indexes (qfx) and the second column
        corresponds to database_feature_indexes (dfx).

        A feature score, fs{shape=(N,), dtype=float64} is an array of scores

        A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks

    Returns:
        daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk)
        Return Format::
            daid2_fm (dict): {daid: fm, ...}
            daid2_fs (dict): {daid: fs, ...}
            daid2_fk (dict): {daid: fk, ...}

    Example:
        >>> from ibeis.algo.hots.smk.smk_core import *  # NOQA
        >>> from ibeis.algo.hots.smk import smk_debug
        >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2()
        >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex
        >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha
        >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh
        >>> withinfo = True  # takes an 11s vs 2s
        >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh)
        >>> retL1 =  match_kernel_L1(*args)
        >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,)  = retL1
        >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
        >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
        >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0]))
        >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2]))
        >>> print(utool.is_dicteq(daid2_chipmatch_old[1],  daid2_chipmatch_new[1]))

    %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
    %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw)
    """
    # FIXME: move groupby to vtool
    if utool.VERBOSE:
        print('[smk_core] build cmtup_old')

    wx2_dfxs = invindex.wx2_fxs
    daid2_sccw = invindex.daid2_sccw

    qfxs_list = [wx2_qfxs[wx] for wx in common_wxs]
    dfxs_list = [wx2_dfxs[wx] for wx in common_wxs]

    shapes_list = [scores.shape for scores in scores_list]  # 51us
    shape_ranges = [(mem_arange(w), mem_arange(h))
                    for (w, h) in shapes_list]  # 230us
    ijs_list = [
        mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges
    ]  # 278us
    # Normalize scores for words, nMatches, and query sccw (still need daid sccw)
    nscores_iter = (scores * query_sccw for scores in scores_list)

    # FIXME: Preflatten all of these lists
    out_ijs = [list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list]
    out_qfxs = [[qfxs[ix] for (ix, jx) in ijs]
                for (qfxs, ijs) in zip(qfxs_list, out_ijs)]
    out_dfxs = [[dfxs[jx] for (ix, jx) in ijs]
                for (dfxs, ijs) in zip(dfxs_list, out_ijs)]
    out_daids = ([daids[jx] for (ix, jx) in ijs]
                 for (daids, ijs) in zip(daids_list, out_ijs))
    out_scores = ([nscores[ijx] for ijx in ijs]
                  for (nscores, ijs) in zip(nscores_iter, out_ijs))
    nested_fm_iter = [[
        tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs)
    ] for qfxs, dfxs in zip(out_qfxs, out_dfxs)]
    all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))),
                       dtype=hstypes.FM_DTYPE)
    nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter]
    nested_daid_iter = ([
        [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids)
    ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids))
    nested_score_iter = ([
        [score / nMatch] * nMatch
        for nMatch, score in zip(nMatch_list, scores)
    ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores))
    all_daids_ = np.array(list(utool.iflatten(
        utool.iflatten(nested_daid_iter))),
                          dtype=hstypes.INDEX_TYPE)
    all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))),
                       dtype=hstypes.FS_DTYPE)

    # Filter out 0 scores
    keep_xs = np.where(all_fss > 0)[0]
    all_fss = all_fss.take(keep_xs)
    all_fms = all_fms.take(keep_xs, axis=0)
    all_daids_ = all_daids_.take(keep_xs)

    daid_keys, groupxs = vt.group_indices(all_daids_)
    fs_list = vt.apply_grouping(all_fss, groupxs)
    fm_list = vt.apply_grouping(all_fms, groupxs)
    daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)}
    daid2_fs = {
        daid: fs * daid2_sccw[daid]
        for daid, fs in zip(daid_keys, fs_list)
    }
    # FIXME: generalize to when nAssign > 1
    daid2_fk = {
        daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE)
        for daid, fs in zip(daid_keys, fs_list)
    }
    daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk)

    return daid2_chipmatch
Exemple #28
0
def compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets):
    """
    More efficient version of agg on a stacked structure

    Args:
        words (ndarray): entire vocabulary of words
        flat_wxs_assign (ndarray): maps a stacked index to word index
        flat_vecs (ndarray): stacked SIFT descriptors
        flat_offsets (ndarray): offset positions per annotation

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.smk_funcs import *  # NOQA
        >>> data = testdata_rvecs(dim=2, nvecs=1000, nannots=10)
        >>> words = data['words']
        >>> flat_offsets = data['offset_list']
        >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs'])
        >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets)
        >>> all_agg_vecs, all_error_flags, agg_offset_list = tup
        >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> assert len(agg_flags_list) == len(flat_offsets) - 1

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.smk_funcs import *  # NOQA
        >>> data = testdata_rvecs(dim=2, nvecs=100, nannots=5)
        >>> words = data['words']
        >>> flat_offsets = data['offset_list']
        >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs'])
        >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets)
        >>> all_agg_vecs, all_error_flags, agg_offset_list = tup
        >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> assert len(agg_flags_list) == len(flat_offsets) - 1
    """
    grouped_wxs = [
        flat_wxs_assign[left:right] for left, right in ut.itertwo(flat_offsets)
    ]

    # Assume single assignment, aggregate everything
    # across the entire database
    flat_offsets = np.array(flat_offsets)

    idx_to_dx = (np.searchsorted(
        flat_offsets, np.arange(len(flat_wxs_assign)), side='right') -
                 1).astype(np.int32)

    if isinstance(flat_wxs_assign, np.ma.masked_array):
        wx_list = flat_wxs_assign.T[0].compressed()
    else:
        wx_list = flat_wxs_assign.T[0].ravel()
    unique_wx, groupxs = vt.group_indices(wx_list)

    dim = flat_vecs.shape[1]
    if isinstance(flat_wxs_assign, np.ma.masked_array):
        dx_to_wxs = [np.unique(wxs.compressed()) for wxs in grouped_wxs]
    else:
        dx_to_wxs = [np.unique(wxs.ravel()) for wxs in grouped_wxs]
    dx_to_nagg = [len(wxs) for wxs in dx_to_wxs]
    num_agg_vecs = sum(dx_to_nagg)
    # all_agg_wxs = np.hstack(dx_to_wxs)
    agg_offset_list = np.array([0] + ut.cumsum(dx_to_nagg))
    # Preallocate agg residuals for all dxs
    all_agg_vecs = np.empty((num_agg_vecs, dim), dtype=np.float32)
    all_agg_vecs[:, :] = np.nan

    # precompute agg residual stack
    i_to_dxs = vt.apply_grouping(idx_to_dx, groupxs)
    subgroup = [vt.group_indices(dxs) for dxs in ut.ProgIter(i_to_dxs)]
    i_to_unique_dxs = ut.take_column(subgroup, 0)
    i_to_dx_groupxs = ut.take_column(subgroup, 1)
    num_words = len(unique_wx)

    # Overall this takes 5 minutes and 21 seconds
    # I think the other method takes about 12 minutes
    for i in ut.ProgIter(range(num_words), 'agg'):
        wx = unique_wx[i]
        xs = groupxs[i]
        dxs = i_to_unique_dxs[i]
        dx_groupxs = i_to_dx_groupxs[i]
        word = words[wx:wx + 1]

        offsets1 = agg_offset_list.take(dxs)
        offsets2 = [np.where(dx_to_wxs[dx] == wx)[0][0] for dx in dxs]
        offsets = np.add(offsets1, offsets2, out=offsets1)

        # if __debug__:
        #     assert np.bincount(dxs).max() < 2
        #     offset = agg_offset_list[dxs[0]]
        #     assert np.all(dx_to_wxs[dxs[0]] == all_agg_wxs[offset:offset +
        #                                                    dx_to_nagg[dxs[0]]])

        # Compute residuals
        rvecs = flat_vecs[xs] - word
        vt.normalize(rvecs, axis=1, out=rvecs)
        rvecs[np.all(np.isnan(rvecs), axis=1)] = 0
        # Aggregate across same images
        grouped_rvecs = vt.apply_grouping(rvecs, dx_groupxs, axis=0)
        agg_rvecs_ = [rvec_group.sum(axis=0) for rvec_group in grouped_rvecs]
        # agg_rvecs = np.vstack(agg_rvecs_)
        all_agg_vecs[offsets, :] = agg_rvecs_

    assert not np.any(np.isnan(all_agg_vecs))
    logger.info('Apply normalization')
    vt.normalize(all_agg_vecs, axis=1, out=all_agg_vecs)
    all_error_flags = np.all(np.isnan(all_agg_vecs), axis=1)
    all_agg_vecs[all_error_flags, :] = 0

    # ndocs_per_word1 = np.array(ut.lmap(len, wx_to_unique_dxs))
    # ndocs_total1 = len(flat_offsets) - 1
    # idf1 = smk_funcs.inv_doc_freq(ndocs_total1, ndocs_per_word1)

    tup = all_agg_vecs, all_error_flags, agg_offset_list
    return tup
Exemple #29
0
def get_query_result_info(qreq_):
    """
    Helper function.

    Runs queries of a specific configuration returns the best rank of each query

    Args:
        qaids (list) : query annotation ids
        daids (list) : database annotation ids

    Returns:
        qx2_bestranks

    CommandLine:
        python -m ibeis.expt.harness --test-get_query_result_info
        python -m ibeis.expt.harness --test-get_query_result_info:0
        python -m ibeis.expt.harness --test-get_query_result_info:1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5'])
        >>> #ibs = ibeis.opendb('PZ_MTEST')
        >>> #qaids = ibs.get_valid_aids()[0:3]
        >>> #daids = ibs.get_valid_aids()[0:5]
        >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> #cfgdict = dict(codename='vsone')
        >>> # ibs.cfg.query_cfg.codename = 'vsone'
        >>> qaids = ibs.get_valid_aids()[0:3]
        >>> daids = ibs.get_valid_aids()[0:5]
        >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Ignore:

        ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big

        ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW
        --show --debug-depc
        ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc
        --clear-all-depcache
    """
    try:
        ibs = qreq_.ibs
    except AttributeError:
        ibs = qreq_.depc.controller
    import vtool as vt
    cm_list = qreq_.execute()
    #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False)
    qx2_cm = cm_list
    qaids = qreq_.qaids
    #qaids2 = [cm.qaid for cm in cm_list]
    qnids = ibs.get_annot_name_rowids(qaids)

    import utool
    with utool.embed_on_exception_context:
        unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids))

        unique_qnids, groupxs = vt.group_indices(qnids)
        cm_group_list = ut.apply_grouping(cm_list, groupxs)
        qnid2_aggnamescores = {}

    qnx2_nameres_info = []

    #import utool
    #utool.embed()

    # Ranked list aggregation-ish
    nameres_info_list = []
    for qnid, cm_group in zip(unique_qnids, cm_group_list):
        nid2_name_score_group = [
            dict([(nid, cm.name_score_list[nidx])
                  for nid, nidx in cm.nid2_nidx.items()]) for cm in cm_group
        ]
        aligned_name_scores = np.array([
            ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf)
            for nid2_name_score in nid2_name_score_group
        ]).T
        name_score_list = np.nanmax(aligned_name_scores, axis=1)
        qnid2_aggnamescores[qnid] = name_score_list
        # sort
        sortx = name_score_list.argsort()[::-1]
        sorted_namescores = name_score_list[sortx]
        sorted_dnids = unique_dnids[sortx]

        ## infer agg name results
        is_positive = sorted_dnids == qnid
        is_negative = np.logical_and(~is_positive, sorted_dnids > 0)
        gt_name_rank = None if not np.any(is_positive) else np.where(
            is_positive)[0][0]
        gf_name_rank = None if not np.any(is_negative) else np.nonzero(
            is_negative)[0][0]
        gt_nid = sorted_dnids[gt_name_rank]
        gf_nid = sorted_dnids[gf_name_rank]
        gt_name_score = sorted_namescores[gt_name_rank]
        gf_name_score = sorted_namescores[gf_name_rank]
        qnx2_nameres_info = {}
        qnx2_nameres_info['qnid'] = qnid
        qnx2_nameres_info['gt_nid'] = gt_nid
        qnx2_nameres_info['gf_nid'] = gf_nid
        qnx2_nameres_info['gt_name_rank'] = gt_name_rank
        qnx2_nameres_info['gf_name_rank'] = gf_name_rank
        qnx2_nameres_info['gt_name_score'] = gt_name_score
        qnx2_nameres_info['gf_name_score'] = gf_name_score

        nameres_info_list.append(qnx2_nameres_info)
        nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_')

    qaids = qreq_.qaids
    daids = qreq_.daids
    qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids)
    # Get the groundtruth ranks and accuracy measures
    qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm]

    cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_')
    #for key in qx2_qresinfo[0].keys():
    #    'qx2_' + key
    #    ut.get_list_column(qx2_qresinfo, key)

    if False:
        qx2_avepercision = np.array([
            cm.get_average_percision(ibs=ibs, gt_aids=gt_aids)
            for (cm, gt_aids) in zip(qx2_cm, qx2_gtaids)
        ])
        cfgres_info['qx2_avepercision'] = qx2_avepercision
    # Compute mAP score  # TODO: use mAP score
    # (Actually map score doesn't make much sense if using name scoring
    #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean()  # NOQA
    cfgres_info['qx2_bestranks'] = ut.replace_nones(
        cfgres_info['qx2_bestranks'], -1)
    cfgres_info.update(nameres_info)
    return cfgres_info
Exemple #30
0
def compute_nsum_score(cm, qreq_=None):
    r"""
    nsum

    Args:
        cm (ibeis.ChipMatch):

    Returns:
        tuple: (unique_nids, nsum_score_list)

    CommandLine:
        python -m ibeis.algo.hots.name_scoring --test-compute_nsum_score
        python -m ibeis.algo.hots.name_scoring --test-compute_nsum_score:0
        python -m ibeis.algo.hots.name_scoring --test-compute_nsum_score:2
        utprof.py -m ibeis.algo.hots.name_scoring --test-compute_nsum_score:2
        utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> # build test data
        >>> cm = testdata_chipmatch()
        >>> # execute function
        >>> (unique_nids, nsum_score_list) = compute_nsum_score(cm)
        >>> result = ut.list_str((unique_nids, nsum_score_list), label_list=['unique_nids', 'nsum_score_list'], with_dtype=False)
        >>> print(result)
        unique_nids = np.array([1, 2, 3])
        nsum_score_list = np.array([ 4.,  7.,  5.])

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1])
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18])
        >>> cm = cm_list[0]
        >>> cm.evaluate_dnids(qreq_.ibs)
        >>> cm._cast_scores()
        >>> #cm.qnid = 1   # Hack for testdb1 names
        >>> nsum_nid_list, nsum_score_list = compute_nsum_score(cm, qreq_)
        >>> assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment'
        >>> flags = (nsum_nid_list == cm.qnid)
        >>> max_true = nsum_score_list[flags].max()
        >>> max_false = nsum_score_list[~flags].max()
        >>> assert max_true > max_false, 'is this truely a hard case?'
        >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,)
        >>> nsum_nid_list2, nsum_score_list2, _ = compute_nsum_score2(cm, qreq_)
        >>> assert np.allclose(nsum_score_list2, nsum_score_list), 'something is very wrong'
        >>> #assert np.all(nsum_score_list2 == nsum_score_list), 'could be a percision issue'

    Example2:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1])
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(augment_queryside_hack=True))
        >>> cm = cm_list[0]
        >>> cm.score_nsum(qreq_)
        >>> #cm.evaluate_dnids(qreq_.ibs)
        >>> #cm.qnid = 1   # Hack for testdb1 names
        >>> #nsum_nid_list, nsum_score_list = compute_nsum_score(cm, qreq_=qreq_)
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_, ori=True)

    Example3:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1])
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(augment_queryside_hack=True))
        >>> cm = cm_list[0]
        >>> cm.score_nsum(qreq_)
        >>> #cm.evaluate_dnids(qreq_.ibs)
        >>> #cm.qnid = 1   # Hack for testdb1 names
        >>> #nsum_nid_list, nsum_score_list = compute_nsum_score(cm, qreq_=qreq_)
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_, ori=True)

    Example4:
        >>> # ENABLE_DOCTEST
        >>> # FIXME: breaks when fg_on=True
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> from ibeis.algo.hots import name_scoring
        >>> from ibeis.algo.hots import scoring
        >>> import ibeis
        >>> # Test to make sure name score and chips score are equal when per_name=1
        >>> qreq_, args = plh.testdata_pre(
        >>>     'spatial_verification', defaultdb='PZ_MTEST',
        >>>     a=['default:dpername=1,qsize=1,dsize=10'],
        >>>     p=['default:K=1,fg_on=True,sqrd_dist_on=True'])
        >>> cm = args.cm_list_FILT[0]
        >>> ibs = qreq_.ibs
        >>> # Ensure there is only one aid per database name
        >>> assert isinstance(ibs, ibeis.control.IBEISControl.IBEISController)
        >>> #stats_dict = ibs.get_annot_stats_dict(qreq_.get_external_daids(), prefix='d')
        >>> #stats = stats_dict['dper_name']
        >>> stats = ibs.get_annot_per_name_stats(qreq_.get_external_daids())
        >>> print('per_name_stats = %s' % (ut.dict_str(stats, nl=False),))
        >>> assert stats['mean'] == 1 and stats['std'] == 0, 'this test requires one annot per name in the database'
        >>> cm.evaluate_dnids(qreq_.ibs)
        >>> cm.assert_self(qreq_)
        >>> cm._cast_scores()
        >>> # cm.fs_list = cm.fs_list.astype(np.float)
        >>> nsum_nid_list, nsum_score_list = name_scoring.compute_nsum_score(cm, qreq_)
        >>> nsum_nid_list2, nsum_score_list2, _ = name_scoring.compute_nsum_score2(cm, qreq_)
        >>> csum_score_list = scoring.compute_csum_score(cm)
        >>> vt.asserteq(nsum_score_list, csum_score_list)
        >>> vt.asserteq(nsum_score_list, csum_score_list, thresh=0, iswarning=True)
        >>> vt.asserteq(nsum_score_list2, csum_score_list, thresh=0, iswarning=True)
        >>> #assert np.allclose(nsum_score_list, csum_score_list), 'should be the same when K=1 and per_name=1'
        >>> #assert all(nsum_score_list  == csum_score_list), 'should be the same when K=1 and per_name=1'
        >>> #assert all(nsum_score_list2 == csum_score_list), 'should be the same when K=1 and per_name=1'
        >>> # Evaluate parts of the sourcecode


    Ignore:
        assert all(nsum_score_list3 == csum_score_list), 'should be the same when K=1 and per_name=1'
        fm_list = fm_list[0:1]
        fs_list = fs_list[0:1]
        featflag_list2 = featflag_list2[0:1]
        dnid_list = dnid_list[0:1]
        name_groupxs2 = name_groupxs2[0:1]
        nsum_nid_list2 = nsum_nid_list2[0:1]

    """
    #assert qreq_ is not None
    try:
        HACK_SINGLE_ORI =  qreq_ is not None and (qreq_.qparams.augment_queryside_hack or qreq_.qparams.rotation_invariance)
    except AttributeError:
        HACK_SINGLE_ORI =  qreq_ is not None and (qreq_.config.augment_queryside_hack or qreq_.config.feat_cfg.rotation_invariance)
        pass
    # The core for each feature match
    #
    # The query feature index for each feature match
    fm_list = cm.fm_list
    fs_list = cm.get_fsv_prod_list()
    dnid_list = cm.dnid_list
    #--
    fx1_list = [fm.T[0] for fm in fm_list]
    """
    # Try a rebase?
    fx1_list = list(map(vt.compute_unique_data_ids_, fx1_list))
    """
    # Group annotation matches by name
    nsum_nid_list, name_groupxs = vt.group_indices(dnid_list)
    name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs)
    name_grouped_fs_list  = vt.apply_grouping_(fs_list,  name_groupxs)
    # Stack up all matches to a particular name
    name_grouped_fx1_flat = list(map(np.hstack, name_grouped_fx1_list))
    name_grouped_fs_flat  = list(map(np.hstack, name_grouped_fs_list))
    """
    assert np.all(name_grouped_fs_list[0][0] == fs_list[0])
    assert np.all(name_grouped_fs_flat[0] == fs_list[0])
    """
    if HACK_SINGLE_ORI:
        # keypoints with the same xy can only have one of them vote
        kpts1 = qreq_.ibs.get_annot_kpts(cm.qaid, config2_=qreq_.get_external_query_config2())
        xys1_ = vt.get_xys(kpts1).T
        kpts_xyid_list = vt.compute_unique_arr_dataids(xys1_)
        # Make nested group for every name by query feature index (accounting for duplicate orientation)
        name_grouped_xyid_flat = [kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat]
        feat_groupxs_list = [vt.group_indices(xyid_flat)[1] for xyid_flat in name_grouped_xyid_flat]
    else:
        # make unique indicies using feature indexes
        feat_groupxs_list = [vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat]
    # Make nested group for every name by unique query feature index
    feat_grouped_fs_list = [[fs_flat.take(xs, axis=0) for xs in feat_groupxs]
                            for fs_flat, feat_groupxs in zip(name_grouped_fs_flat, feat_groupxs_list)]
    """
    np.array(feat_grouped_fs_list)[0].T[0] == fs_list
    """
    if False:
        valid_fs_list = [
            np.array([group.max() for group in grouped_fs])
            #np.array([group[group.argmax()] for group in grouped_fs])
            for grouped_fs in feat_grouped_fs_list
        ]
        nsum_score_list4 = np.array([valid_fs.sum() for valid_fs in valid_fs_list])  # NOQA
    # Prevent a feature from voting twice:
    # take only the max score that a query feature produced
    #name_grouped_valid_fs_list1 =[np.array([fs_group.max() for fs_group in feat_grouped_fs])
    #                            for feat_grouped_fs in feat_grouped_fs_list]
    nsum_score_list = np.array([np.sum([fs_group.max() for fs_group in feat_grouped_fs])
                                for feat_grouped_fs in feat_grouped_fs_list])
    return nsum_nid_list, nsum_score_list
Exemple #31
0
def get_namescore_nonvoting_feature_flags(fm_list,
                                          fs_list,
                                          dnid_list,
                                          name_groupxs,
                                          kpts1=None):
    r"""
    DEPRICATE

    fm_list = [fm[:min(len(fm), 10)] for fm in fm_list]
    fs_list = [fs[:min(len(fs), 10)] for fs in fs_list]
    """
    fx1_list = [fm.T[0] for fm in fm_list]
    # Group annotation matches by name
    name_grouped_fx1_list = vt.apply_grouping_(fx1_list, name_groupxs)
    name_grouped_fs_list = vt.apply_grouping_(fs_list, name_groupxs)
    # Stack up all matches to a particular name, keep track of original indicies via offets
    name_invertable_flat_fx1_list = list(
        map(ut.invertible_flatten2_numpy, name_grouped_fx1_list))
    name_grouped_fx1_flat = ut.get_list_column(name_invertable_flat_fx1_list,
                                               0)
    name_grouped_invertable_cumsum_list = ut.get_list_column(
        name_invertable_flat_fx1_list, 1)
    name_grouped_fs_flat = list(map(np.hstack, name_grouped_fs_list))
    if kpts1 is not None:
        xys1_ = vt.get_xys(kpts1).T
        kpts_xyid_list = vt.compute_unique_data_ids(xys1_)
        # Make nested group for every name by query feature index (accounting for duplicate orientation)
        name_grouped_comboid_flat = list(
            kpts_xyid_list.take(fx1) for fx1 in name_grouped_fx1_flat)
        xyid_groupxs_list = list(
            vt.group_indices(xyid_flat)[1]
            for xyid_flat in name_grouped_comboid_flat)
        name_group_fx1_groupxs_list = xyid_groupxs_list
    else:
        # Make nested group for every name by query feature index
        fx1_groupxs_list = [
            vt.group_indices(fx1_flat)[1] for fx1_flat in name_grouped_fx1_flat
        ]
        name_group_fx1_groupxs_list = fx1_groupxs_list
    name_grouped_fid_grouped_fs_list = [
        vt.apply_grouping(fs_flat, fid_groupxs) for fs_flat, fid_groupxs in
        zip(name_grouped_fs_flat, name_group_fx1_groupxs_list)
    ]

    # Flag which features are valid in this grouped space. Only one keypoint should be able to vote
    # for each group
    name_grouped_fid_grouped_isvalid_list = [
        np.array(
            [fs_group.max() == fs_group for fs_group in fid_grouped_fs_list])
        for fid_grouped_fs_list in name_grouped_fid_grouped_fs_list
    ]

    # Go back to being grouped only in name space
    #dtype = np.bool
    name_grouped_isvalid_flat_list = [
        vt.invert_apply_grouping2(fid_grouped_isvalid_list,
                                  fid_groupxs,
                                  dtype=np.bool)
        for fid_grouped_isvalid_list, fid_groupxs in zip(
            name_grouped_fid_grouped_isvalid_list, name_group_fx1_groupxs_list)
    ]

    name_grouped_isvalid_unflat_list = [
        ut.unflatten2(isvalid_flat, invertable_cumsum_list)
        for isvalid_flat, invertable_cumsum_list in zip(
            name_grouped_isvalid_flat_list,
            name_grouped_invertable_cumsum_list)
    ]

    # Reports which features were valid in name scoring for every annotation
    featflag_list = vt.invert_apply_grouping(name_grouped_isvalid_unflat_list,
                                             name_groupxs)
    return featflag_list
Exemple #32
0
def get_query_result_info(qreq_):
    """
    Helper function.

    Runs queries of a specific configuration returns the best rank of each query

    Args:
        qaids (list) : query annotation ids
        daids (list) : database annotation ids

    Returns:
        qx2_bestranks

    CommandLine:
        python -m ibeis.expt.harness --test-get_query_result_info
        python -m ibeis.expt.harness --test-get_query_result_info:0
        python -m ibeis.expt.harness --test-get_query_result_info:1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1
        python -m ibeis.expt.harness --test-get_query_result_info:0 --db lynx -a default:qsame_imageset=True,been_adjusted=True,excluderef=True -t default:K=1 --cmd

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_(a=['default:qindex=0:3,dindex=0:5'])
        >>> #ibs = ibeis.opendb('PZ_MTEST')
        >>> #qaids = ibs.get_valid_aids()[0:3]
        >>> #daids = ibs.get_valid_aids()[0:5]
        >>> #qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.harness import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> #cfgdict = dict(codename='vsone')
        >>> # ibs.cfg.query_cfg.codename = 'vsone'
        >>> qaids = ibs.get_valid_aids()[0:3]
        >>> daids = ibs.get_valid_aids()[0:5]
        >>> qreq_ = ibs.new_query_request(qaids, daids, verbose=True, cfgdict={})
        >>> cfgres_info = get_query_result_info(qreq_)
        >>> print(ut.dict_str(cfgres_info))

    Ignore:

        ibeis -e rank_cdf --db humpbacks -a default:has_any=hasnotch,mingt=2 -t default:proot=BC_DTW --show --nocache-big

        ibeis -e rank_cdf --db humpbacks -a default:is_known=True,mingt=2 -t default:pipeline_root=BC_DTW
        --show --debug-depc
        ibeis -e rank_cdf --db humpbacks -a default:is_known=True -t default:pipeline_root=BC_DTW --qaid=1,9,15,16,18 --daid-override=1,9,15,16,18,21,22 --show --debug-depc
        --clear-all-depcache
    """
    try:
        ibs = qreq_.ibs
    except AttributeError:
        ibs = qreq_.depc.controller
    import vtool as vt
    cm_list = qreq_.execute()
    #qreq_.ibs.query_chips(qreq_=qreq_, use_bigcache=False)
    qx2_cm = cm_list
    qaids = qreq_.qaids
    #qaids2 = [cm.qaid for cm in cm_list]
    qnids = ibs.get_annot_name_rowids(qaids)

    import utool
    with utool.embed_on_exception_context:
        unique_dnids = np.unique(ibs.get_annot_name_rowids(qreq_.daids))

        unique_qnids, groupxs = vt.group_indices(qnids)
        cm_group_list = ut.apply_grouping(cm_list, groupxs)
        qnid2_aggnamescores = {}

    qnx2_nameres_info = []

    #import utool
    #utool.embed()

    # Ranked list aggregation-ish
    nameres_info_list = []
    for qnid, cm_group in zip(unique_qnids, cm_group_list):
        nid2_name_score_group = [
            dict([(nid, cm.name_score_list[nidx]) for nid, nidx in cm.nid2_nidx.items()])
            for cm in cm_group
        ]
        aligned_name_scores = np.array([
            ut.dict_take(nid2_name_score, unique_dnids.tolist(), -np.inf)
            for nid2_name_score in nid2_name_score_group
        ]).T
        name_score_list = np.nanmax(aligned_name_scores, axis=1)
        qnid2_aggnamescores[qnid] = name_score_list
        # sort
        sortx = name_score_list.argsort()[::-1]
        sorted_namescores = name_score_list[sortx]
        sorted_dnids = unique_dnids[sortx]

        ## infer agg name results
        is_positive = sorted_dnids == qnid
        is_negative = np.logical_and(~is_positive, sorted_dnids > 0)
        gt_name_rank = None if not np.any(is_positive) else np.where(is_positive)[0][0]
        gf_name_rank = None if not np.any(is_negative) else np.nonzero(is_negative)[0][0]
        gt_nid = sorted_dnids[gt_name_rank]
        gf_nid = sorted_dnids[gf_name_rank]
        gt_name_score = sorted_namescores[gt_name_rank]
        gf_name_score = sorted_namescores[gf_name_rank]
        qnx2_nameres_info = {}
        qnx2_nameres_info['qnid'] = qnid
        qnx2_nameres_info['gt_nid'] = gt_nid
        qnx2_nameres_info['gf_nid'] = gf_nid
        qnx2_nameres_info['gt_name_rank'] = gt_name_rank
        qnx2_nameres_info['gf_name_rank'] = gf_name_rank
        qnx2_nameres_info['gt_name_score'] = gt_name_score
        qnx2_nameres_info['gf_name_score'] = gf_name_score

        nameres_info_list.append(qnx2_nameres_info)
        nameres_info = ut.dict_stack(nameres_info_list, 'qnx2_')

    qaids = qreq_.qaids
    daids = qreq_.daids
    qx2_gtaids = ibs.get_annot_groundtruth(qaids, daid_list=daids)
    # Get the groundtruth ranks and accuracy measures
    qx2_qresinfo = [get_qres_name_result_info(ibs, cm, qreq_) for cm in qx2_cm]

    cfgres_info = ut.dict_stack(qx2_qresinfo, 'qx2_')
    #for key in qx2_qresinfo[0].keys():
    #    'qx2_' + key
    #    ut.get_list_column(qx2_qresinfo, key)

    if False:
        qx2_avepercision = np.array(
            [cm.get_average_percision(ibs=ibs, gt_aids=gt_aids) for
             (cm, gt_aids) in zip(qx2_cm, qx2_gtaids)])
        cfgres_info['qx2_avepercision'] = qx2_avepercision
    # Compute mAP score  # TODO: use mAP score
    # (Actually map score doesn't make much sense if using name scoring
    #mAP = qx2_avepercision[~np.isnan(qx2_avepercision)].mean()  # NOQA
    cfgres_info['qx2_bestranks'] = ut.replace_nones(cfgres_info['qx2_bestranks'] , -1)
    cfgres_info.update(nameres_info)
    return cfgres_info
Exemple #33
0
def group_scores_by_name(ibs, aid_list, score_list):
    r"""
    Converts annotation scores to name scores.
    Over multiple annotations finds keypoints best match and uses that score.

    CommandLine:
        python -m ibeis.algo.hots.name_scoring --test-group_scores_by_name

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *   # NOQA
        >>> import ibeis
        >>> cm, qreq_ = ibeis.testdata_cm('PZ_MTEST')
        >>> ibs = qreq_.ibs
        >>> #print(cm.get_inspect_str(qreq_))
        >>> aid_list = cm.daid_list
        >>> score_list = cm.annot_score_list
        >>> nscoretup = group_scores_by_name(ibs, aid_list, score_list)
        >>> (sorted_nids, sorted_nscore, sorted_aids, sorted_scores) = nscoretup
        >>> ut.assert_eq(sorted_nids[0], cm.qnid)

    TODO:
        # TODO: this code needs a really good test case
        #>>> result = np.array_repr(sorted_nids[0:2])
        #>>> print(result)
        #array([1, 5])

        Ignore::
            # hack in dict of Nones prob for testing
            import six
            qres.aid2_prob = {aid:None for aid in six.iterkeys(qres.aid2_score)}

        array([ 1,  5, 26])
        [2 6 5]

        Timeit::
            import ibeis
            ibs = ibeis.opendb('PZ_MTEST')
            aid_list = ibs.get_valid_aids()
            aid_arr = np.array(aid_list)
            %timeit ibs.get_annot_name_rowids(aid_list)
            %timeit ibs.get_annot_name_rowids(aid_arr)


    """
    assert len(score_list) == len(aid_list), 'scores and aids must be associated'
    score_arr = np.array(score_list)
    nid_list  = np.array(ibs.get_annot_name_rowids(aid_list))
    aid_list  = np.array(aid_list)
    # Group scores by name
    unique_nids, groupxs = vt.group_indices(nid_list)
    grouped_scores = np.array(vt.apply_grouping(score_arr, groupxs))
    grouped_aids   = np.array(vt.apply_grouping(aid_list, groupxs))
    # Build representative score per group
    # (find each keypoints best match per annotation within the name)
    group_nscore = np.array([scores.max() for scores in grouped_scores])
    group_sortx = group_nscore.argsort()[::-1]
    # Top nids
    sorted_nids = unique_nids.take(group_sortx, axis=0)
    sorted_nscore = group_nscore.take(group_sortx, axis=0)
    # Initial sort of aids
    _sorted_aids   = grouped_aids.take(group_sortx, axis=0)
    _sorted_scores = grouped_scores.take(group_sortx, axis=0)
    # Secondary sort of aids
    sorted_sortx  = [scores.argsort()[::-1] for scores in _sorted_scores]
    sorted_scores = [scores.take(sortx) for scores, sortx in zip(_sorted_scores, sorted_sortx)]
    sorted_aids   = [aids.take(sortx) for aids, sortx in zip(_sorted_aids, sorted_sortx)]
    nscoretup     = NameScoreTup(sorted_nids, sorted_nscore, sorted_aids, sorted_scores)
    return nscoretup
Exemple #34
0
def get_annotmatch_rowids_from_aid2(ibs,
                                    aid2_list,
                                    eager=True,
                                    nInput=None,
                                    force_method=None):
    """
    # This one is slow because aid2 is the second part of the index

    TODO autogenerate

    Returns a list of the aids that were reviewed as candidate matches to the input aid

    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid2 --show

    Example2:
        >>> # TIME TEST
        >>> # setup_pzmtest_subgraph()
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> aid2_list = ibs.get_valid_aids()
        >>> func_list = [
        >>>     partial(ibs.get_annotmatch_rowids_from_aid2, force_method=1),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid2, force_method=2),
        >>> ]
        >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500]
        >>> def args_list(count, aid2_list=aid2_list, num_list=num_list):
        >>>    return (aid2_list[0:num_list[count]],)
        >>> searchkw = dict(
        >>>     func_labels=['sql', 'numpy'],
        >>>     count_to_xtick=lambda count, args: len(args[0]),
        >>>     title='Timings of get_annotmatch_rowids_from_aid2',
        >>> )
        >>> niters = len(num_list)
        >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw)
        >>> time_result['plot_timings']()
        >>> ut.show_if_requested()
    """
    from ibeis.control import _autogen_annotmatch_funcs
    if force_method != 2 and (nInput < 128 or (force_method == 1)):
        colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID, )
        # FIXME: col_rowid is not correct
        params_iter = zip(aid2_list)
        andwhere_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID2]
        annotmatch_rowid_list = ibs.db.get_where2(ibs.const.ANNOTMATCH_TABLE,
                                                  colnames,
                                                  params_iter,
                                                  andwhere_colnames,
                                                  eager=eager,
                                                  nInput=nInput,
                                                  unpack_scalars=False)
    elif force_method == 2:
        import vtool as vt
        all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids())
        aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids))
        unique_aid2, groupxs2 = vt.group_indices(aids2)
        rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2)
        rowids2_ = [_.tolist() for _ in rowids2_]
        maping2 = ut.defaultdict(list, zip(unique_aid2, rowids2_))
        annotmatch_rowid_list = ut.dict_take(maping2, aid2_list)
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
Exemple #35
0
    def conditional_knn(nnindexer, qfx2_vec, num_neighbors, invalid_axs):
        """
            >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
            >>> qreq_ = ibeis.testdata_qreq_(defaultdb='seaturtles')
            >>> qreq_.load_indexer()
            >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.qaids[0])
            >>> num_neighbors = 2
            >>> nnindexer = qreq_.indexer
            >>> ibs = qreq_.ibs
            >>> qaid = 1
            >>> qencid = ibs.get_annot_encounter_text([qaid])[0]
            >>> ax2_encid = np.array(ibs.get_annot_encounter_text(nnindexer.ax2_aid))
            >>> invalid_axs = np.where(ax2_encid == qencid)[0]
        """
        #import ibeis
        import itertools

        def in1d_shape(arr1, arr2):
            return np.in1d(arr1, arr2).reshape(arr1.shape)

        get_neighbors = ut.partial(nnindexer.flann.nn_index,
                                   checks=nnindexer.checks,
                                   cores=nnindexer.cores)

        # Alloc space for final results
        K = num_neighbors
        shape = (len(qfx2_vec), K)
        qfx2_idx = np.full(shape, -1, dtype=np.int32)
        qfx2_rawdist = np.full(shape, np.nan, dtype=np.float64)
        qfx2_truek = np.full(shape, -1, dtype=np.int32)

        # Make a set of temporary indexes and loop variables
        limit = None
        limit = 4
        K_ = K
        tx2_qfx = np.arange(len(qfx2_vec))
        tx2_vec = qfx2_vec
        iter_count = 0
        for iter_count in itertools.count():
            if limit is not None and iter_count >= limit:
                break
            # Find a set of neighbors
            (tx2_idx, tx2_rawdist) = get_neighbors(tx2_vec, K_)
            tx2_idx = vt.atleast_nd(tx2_idx, 2)
            tx2_rawdist = vt.atleast_nd(tx2_rawdist, 2)
            tx2_ax = nnindexer.get_nn_axs(tx2_idx)
            # Check to see if they meet the criteria
            tx2_invalid = in1d_shape(tx2_ax, invalid_axs)
            tx2_valid = np.logical_not(tx2_invalid)
            tx2_num_valid = tx2_valid.sum(axis=1)
            tx2_notdone = tx2_num_valid < K
            tx2_done = np.logical_not(tx2_notdone)

            # Move completely valid queries into the results
            if np.any(tx2_done):
                done_qfx = tx2_qfx.compress(tx2_done, axis=0)
                # Need to parse which columns are the completed ones
                done_valid_ = tx2_valid.compress(tx2_done, axis=0)
                done_rawdist_ = tx2_rawdist.compress(tx2_done, axis=0)
                done_idx_ = tx2_idx.compress(tx2_done, axis=0)
                # Get the complete valid indicies
                rowxs, colxs = np.where(done_valid_)
                unique_rows, groupxs = vt.group_indices(rowxs)
                first_k_groupxs = [groupx[0:K] for groupx in groupxs]
                chosen_xs = np.hstack(first_k_groupxs)
                multi_index = (rowxs.take(chosen_xs), colxs.take(chosen_xs))
                flat_xs = np.ravel_multi_index(multi_index, done_valid_.shape)
                done_rawdist = done_rawdist_.take(flat_xs).reshape((-1, K))
                done_idx = done_idx_.take(flat_xs).reshape((-1, K))
                # Write done results in output
                qfx2_idx[done_qfx, :] = done_idx
                qfx2_rawdist[done_qfx, :] = done_rawdist
                qfx2_truek[done_qfx, :] = vt.apply_grouping(
                    colxs, first_k_groupxs)
            if np.all(tx2_done):
                break
            K_increase = (K - tx2_num_valid.min())
            K_ += K_increase
            tx2_qfx = tx2_qfx.compress(tx2_notdone, axis=0)
            tx2_vec = tx2_vec.compress(tx2_notdone, axis=0)

        if nnindexer.max_distance_sqrd is not None:
            qfx2_dist = np.divide(qfx2_rawdist, nnindexer.max_distance_sqrd)
        else:
            qfx2_dist = qfx2_rawdist
        return (qfx2_idx, qfx2_dist, iter_count)
def flow():
    """
    http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin

    pip install PyMaxFlow
    pip install pystruct
    pip install hdbscan
    """
    # Toy problem representing attempting to discover names via annotation
    # scores

    import pystruct  # NOQA
    import pystruct.models  # NOQA
    import networkx as netx  # NOQA

    import vtool as vt

    num_annots = 10
    num_names = num_annots
    hidden_nids = np.random.randint(0, num_names, num_annots)
    unique_nids, groupxs = vt.group_indices(hidden_nids)

    toy_params = {
        True: {
            'mu': 1.0,
            'sigma': 2.2
        },
        False: {
            'mu': 7.0,
            'sigma': 0.9
        }
    }

    if True:
        import vtool as vt
        import wbia.plottool as pt

        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array(
        [hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    if num_annots <= 10:
        logger.info(ut.repr2(pairwise_scores_mat, precision=1))

    # aids = list(range(num_annots))
    # g = netx.DiGraph()
    # g.add_nodes_from(aids)
    # g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]])
    # netx.draw_graphviz(g)
    # pr = netx.pagerank(g)

    X = pairwise_scores
    Y = pairwise_labels

    encoder = vt.ScoreNormalizer()
    encoder.fit(X, Y)
    encoder.visualize()

    # meanshift clustering
    import sklearn

    bandwidth = sklearn.cluster.estimate_bandwidth(
        X[:, None])  # , quantile=quantile, n_samples=500)
    assert bandwidth != 0, '[] bandwidth is 0. Cannot cluster'
    # bandwidth is with respect to the RBF used in clustering
    # ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
    ms = sklearn.cluster.MeanShift(bandwidth=bandwidth,
                                   bin_seeding=True,
                                   cluster_all=False)
    ms.fit(X[:, None])
    label_arr = ms.labels_
    unique_labels = np.unique(label_arr)
    max_label = max(0, unique_labels.max())
    num_orphans = (label_arr == -1).sum()
    label_arr[label_arr == -1] = np.arange(max_label + 1,
                                           max_label + 1 + num_orphans)

    X_data = np.arange(num_annots)[:, None].astype(np.int64)

    # graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method='lp',
    #    class_weight=None,
    #    directed=False,
    # )

    import scipy
    import scipy.cluster
    import scipy.cluster.hierarchy

    thresh = 2.0
    labels = scipy.cluster.hierarchy.fclusterdata(X_data,
                                                  thresh,
                                                  metric=metric)
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    logger.info(groupxs)
    logger.info(lblgroupxs)
    logger.info('groupdiff = %r' %
                (ut.compare_groupings(groupxs, lblgroupxs), ))
    logger.info('common groups = %r' %
                (ut.find_grouping_consistencies(groupxs, lblgroupxs), ))
    # X_data, seconds_thresh, criterion='distance')

    # help(hdbscan.HDBSCAN)

    import hdbscan

    alg = hdbscan.HDBSCAN(metric=metric,
                          min_cluster_size=1,
                          p=1,
                          gen_min_span_tree=1,
                          min_samples=2)
    labels = alg.fit_predict(X_data)
    labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    logger.info(groupxs)
    logger.info(lblgroupxs)
    logger.info('groupdiff = %r' %
                (ut.compare_groupings(groupxs, lblgroupxs), ))
    logger.info('common groups = %r' %
                (ut.find_grouping_consistencies(groupxs, lblgroupxs), ))

    # import ddbscan
    # help(ddbscan.DDBSCAN)
    # alg = ddbscan.DDBSCAN(2, 2)

    # D = np.zeros((len(aids), len(aids) + 1))
    # D.T[-1] = np.arange(len(aids))

    ## Can alpha-expansion be used when the pairwise potentials are not in a grid?

    # hidden_ut.group_items(aids, hidden_nids)
    if False:
        import maxflow

        # from maxflow import fastmin
        # Create a graph with integer capacities.
        g = maxflow.Graph[int](2, 2)
        # Add two (non-terminal) nodes. Get the index to the first one.
        nodes = g.add_nodes(2)
        # Create two edges (forwards and backwards) with the given capacities.
        # The indices of the nodes are always consecutive.
        g.add_edge(nodes[0], nodes[1], 1, 2)
        # Set the capacities of the terminal edges...
        # ...for the first node.
        g.add_tedge(nodes[0], 2, 5)
        # ...for the second node.
        g.add_tedge(nodes[1], 9, 4)
        g = maxflow.Graph[float](2, 2)
        g.maxflow()
        g.get_nx_graph()
        g.get_segment(nodes[0])
Exemple #37
0
def compute_fmech_score(cm, qreq_=None, hack_single_ori=False):
    r"""
    nsum. This is the fmech scoring mechanism.


    Args:
        cm (ibeis.ChipMatch):

    Returns:
        tuple: (unique_nids, nsum_score_list)

    CommandLine:
        python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score
        python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:0
        python -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2
        utprof.py -m ibeis.algo.hots.name_scoring --test-compute_fmech_score:2
        utprof.py -m ibeis.algo.hots.pipeline --test-request_ibeis_query_L0:0 --db PZ_Master1 -a timectrl:qindex=0:256

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> cm = testdata_chipmatch()
        >>> nsum_score_list = compute_fmech_score(cm)
        >>> assert np.all(nsum_score_list == [ 4.,  7.,  5.])

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18])
        >>> cm = cm_list[0]
        >>> cm.evaluate_dnids(qreq_)
        >>> cm._cast_scores()
        >>> #cm.qnid = 1   # Hack for testdb1 names
        >>> nsum_score_list = compute_fmech_score(cm, qreq_)
        >>> #assert np.all(nsum_nid_list == cm.unique_nids), 'nids out of alignment'
        >>> flags = (cm.unique_nids == cm.qnid)
        >>> max_true = nsum_score_list[flags].max()
        >>> max_false = nsum_score_list[~flags].max()
        >>> assert max_true > max_false, 'is this truely a hard case?'
        >>> assert max_true > 1.2, 'score=%r should be higher for aid=18' % (max_true,)

    Example2:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('PZ_MTEST', qaid_list=[18], cfgdict=dict(query_rotation_heuristic=True))
        >>> cm = cm_list[0]
        >>> cm.score_name_nsum(qreq_)
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_, ori=True)

    Example3:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.name_scoring import *  # NOQA
        >>> #ibs, qreq_, cm_list = plh.testdata_pre_sver('testdb1', qaid_list=[1])
        >>> ibs, qreq_, cm_list = plh.testdata_post_sver('testdb1', qaid_list=[1], cfgdict=dict(query_rotation_heuristic=True))
        >>> cm = cm_list[0]
        >>> cm.score_name_nsum(qreq_)
        >>> ut.quit_if_noshow()
        >>> cm.show_ranked_matches(qreq_, ori=True)
    """
    #assert qreq_ is not None
    if hack_single_ori is None:
        try:
            hack_single_ori = qreq_ is not None and (
                qreq_.qparams.query_rotation_heuristic
                or qreq_.qparams.rotation_invariance)
        except AttributeError:
            hack_single_ori = True
    # The core for each feature match
    #
    # The query feature index for each feature match
    fm_list = cm.fm_list
    fs_list = cm.get_fsv_prod_list()
    fx1_list = [fm.T[0] for fm in fm_list]
    if hack_single_ori:
        # Group keypoints with the same xy-coordinate.
        # Combine these feature so each only recieves one vote
        kpts1 = qreq_.ibs.get_annot_kpts(cm.qaid,
                                         config2_=qreq_.extern_query_config2)
        xys1_ = vt.get_xys(kpts1).T
        fx1_to_comboid = vt.compute_unique_arr_dataids(xys1_)
        fcombo_ids = [fx1_to_comboid.take(fx1) for fx1 in fx1_list]
    else:
        # use the feature index itself as a combo id
        # so each feature only recieves one vote
        fcombo_ids = fx1_list

    if False:
        import ubelt as ub
        for ids in fcombo_ids:
            ub.find_duplicates(ids)

    # Group annotation matches by name
    # nsum_nid_list, name_groupxs = vt.group_indices(cm.dnid_list)
    # nsum_nid_list = cm.unique_nids
    name_groupxs = cm.name_groupxs

    nsum_score_list = []
    # For all indicies matched to a particular name
    for name_idxs in name_groupxs:
        # Get feat indicies and scores corresponding to the name's annots
        name_combo_ids = ut.take(fcombo_ids, name_idxs)
        name_fss = ut.take(fs_list, name_idxs)
        # Flatten over annots in the name
        fs = np.hstack(name_fss)
        if len(fs) == 0:
            nsum_score_list.append(0)
            continue
        combo_ids = np.hstack(name_combo_ids)
        # Features (with the same id) can't vote for this name twice
        group_idxs = vt.group_indices(combo_ids)[1]
        flagged_idxs = [idxs[fs.take(idxs).argmax()] for idxs in group_idxs]
        # Detail: sorting the idxs preseveres summation order
        # this fixes the numerical issue where nsum and csum were off
        flagged_idxs = np.sort(flagged_idxs)
        name_score = fs.take(flagged_idxs).sum()

        nsum_score_list.append(name_score)
    nsum_score_list = np.array(nsum_score_list)

    return nsum_score_list
Exemple #38
0
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True):
    r"""
    CommandLine:
        python -m ibeis.algo.hots.bayes --exec-try_query --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.bayes import *  # NOQA
        >>> verbose = True
        >>> other_evidence = {}
        >>> name_evidence = [1, None, 0, None]
        >>> score_evidence = ['high', 'low', 'low']
        >>> query_vars = None
        >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1)
        >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence)
        >>> interest_ttypes = ['name']
        >>> infr = pgmpy.inference.BeliefPropagation(model)
        >>> evidence = infr._ensure_internal_evidence(evidence, model)
        >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose)
        >>> result = ('query_results = %s' % (str(query_results),))
        >>> ut.quit_if_noshow()
        >>> show_model(model, show_prior=True, **query_results)
        >>> ut.show_if_requested()

    Ignore:
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        probs = infr.query(query_vars, evidence)
        map_assignment = infr.map_query(query_vars, evidence)
    """
    infr = pgmpy.inference.VariableElimination(model)
    #infr = pgmpy.inference.BeliefPropagation(model)
    if True:
        return bruteforce(model, query_vars=None, evidence=evidence)
    else:
        import vtool as vt
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        # hack
        query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable'))
        if verbose:
            evidence_str = ', '.join(model.pretty_evidence(evidence))
            print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ')
        # Compute MAP joints
        # There is a bug here.
        #map_assign = infr.map_query(query_vars, evidence)
        # (probably an invalid thing to do)
        #joint_factor = pgmpy.factors.factor_product(*factor_list)
        # Brute force MAP

        name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys()))
        # TODO: incorporate case where Na is assigned to Fred
        #evidence_h = ut.delete_keys(evidence.copy(), ['Na'])

        joint = model.joint_distribution()
        joint.evidence_based_reduction(
            query_name_vars, evidence, inplace=True)

        # Find static row labels in the evidence
        given_name_vars = [var for var in name_vars if var in evidence]
        given_name_idx = ut.dict_take(evidence, given_name_vars)
        given_name_val = [joint.statename_dict[var][idx]
                          for var, idx in zip(given_name_vars, given_name_idx)]
        new_vals = joint.values.ravel()
        # Add static evidence variables to the relabeled name states
        new_vars = given_name_vars + joint.variables
        new_rows = [tuple(given_name_val) + row for row in joint._row_labels()]
        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.
        temp_basis = [i for i in range(model.num_names)]
        def relabel_names(names, temp_basis=temp_basis):
            names = list(map(six.text_type, names))
            mapping = {}
            for n in names:
                if n not in mapping:
                    mapping[n] = len(mapping)
            new_names = tuple([temp_basis[mapping[n]] for n in names])
            return new_names
        relabeled_rows = list(map(relabel_names, new_rows))
        # Combine probability of rows with the same (new) label
        data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows))
        unique_ids, groupxs = vt.group_indices(data_ids)
        reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0))
        reduced_row_lbls = list(map(list, reduced_row_lbls))
        reduced_values = np.array([
            g.sum() for g in vt.apply_grouping(new_vals, groupxs)
        ])
        # Relabel the rows one more time to agree with initial constraints
        used_ = []
        replaced = []
        for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)):
            # All columns must be the same for this labeling
            alias = reduced_row_lbls[0][colx]
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val)
            replaced.append(alias)
            used_.append(val)
        basis = model.ttype2_cpds['name'][0]._template_.basis
        find_remain_ = ut.setdiff_ordered(temp_basis, replaced)
        repl_remain_ = ut.setdiff_ordered(basis, used_)
        for find, repl in zip(find_remain_, repl_remain_):
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl)

        # Now find the most likely state
        sortx = reduced_values.argsort()[::-1]
        sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist())
        sort_reduced_values = reduced_values[sortx]

        # Remove evidence based labels
        new_vars_ = new_vars[len(given_name_vars):]
        sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None))

        sort_reduced_row_lbls_[0]

        # hack into a new joint factor
        var_states = ut.lmap(ut.unique_keep_order, zip(*sort_reduced_row_lbls_))
        statename_dict = dict(zip(new_vars, var_states))
        cardinality = ut.lmap(len, var_states)
        val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values))
        values = np.zeros(np.prod(cardinality))
        for idx, state in enumerate(ut.iprod(*var_states)):
            if state in val_lookup:
                values[idx] = val_lookup[state]
        joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict)
        print(joint2)
        max_marginals = {}
        for i, var in enumerate(query_name_vars):
            one_out = query_name_vars[:i] + query_name_vars[i + 1:]
            max_marginals[var] = joint2.marginalize(one_out, inplace=False)
            # max_marginals[var] = joint2.maximize(one_out, inplace=False)
        print(joint2.marginalize(['Nb', 'Nc'], inplace=False))
        factor_list = max_marginals.values()

        # Better map assignment based on knowledge of labels
        map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0]))

        sort_reduced_rowstr_lbls = [
            ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True,
                     strvals=True)
            for lbls in sort_reduced_row_lbls_
        ]

        top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values))
        if len(sort_reduced_values) > 3:
            top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))]

        # import utool
        # utool.embed()

        # Compute all marginals
        # probs = infr.query(query_vars, evidence)
        #probs = infr.query(query_vars, evidence)
        # factor_list = probs.values()

        ## Marginalize over non-query, non-evidence
        #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars)
        #joint.marginalize(irrelevant_vars)
        #joint.normalize()
        #new_rows = joint._row_labels()
        #new_vals = joint.values.ravel()
        #map_vals = new_rows[new_vals.argmax()]
        #map_assign = dict(zip(joint.variables, map_vals))
        # Compute Marginalized MAP joints
        #marginalized_joints = {}
        #for ttype in interest_ttypes:
        #    other_vars = [v for v in joint_factor.scope()
        #                  if model.var2_cpd[v].ttype != ttype]
        #    marginal = joint_factor.marginalize(other_vars, inplace=False)
        #    marginalized_joints[ttype] = marginal
        query_results = {
            'factor_list': factor_list,
            'top_assignments': top_assignments,
            'map_assign': map_assign,
            'marginalized_joints': None,
        }
        return query_results
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs):
    """
    very hacky, but cute way to cache keypoint distinctivness

    Args:
        ibs (IBEISController):  ibeis controller object
        aid_list (list):
        dstncvs_normer (None):

    Returns:
        list: dstncvs_list

    CommandLine:
        python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.control.manual_ibeiscontrol_funcs import *  # NOQA
        >>> from ibeis.algo.hots import distinctiveness_normalizer
        >>> import ibeis
        >>> import numpy as np
        >>> config2_ = None
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN)
        >>> # execute function
        >>> aid_list1 = aid_list[::2]
        >>> aid_list2 = aid_list[1::3]
        >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1)
        >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2)
        >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list)
        >>> print(ut.depth_profile(dstncvs_list1))
        >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list])
        >>> print(ut.dict_str(stats_dict))
        >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds'
        >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds'
    """
    from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer

    # per-species disinctivness wrapper around ibeis cached function
    # get feature rowids
    aid_list = np.array(aid_list)
    fid_list = np.array(ibs.get_annot_feat_rowids(aid_list, ensure=True,
                                                  eager=True, nInput=None,
                                                  config2_=config2_))
    species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list))
    # Compute distinctivness separately for each species
    unique_sids, groupxs = vt.group_indices(species_rowid_list)
    fids_groups          = vt.apply_grouping(fid_list, groupxs)
    species_text_list    = ibs.get_species_texts(unique_sids)
    # Map distinctivness computation
    normer_list = [dcvs_normer.request_species_distinctiveness_normalizer(species)
                   for species in species_text_list]
    # Reduce to get results
    dstncvs_groups = [
        get_feat_kpts_distinctiveness(ibs, fids, dstncvs_normer=dstncvs_normer,
                                      species_rowid=sid, **kwargs)
        for dstncvs_normer, fids, sid in zip(normer_list, fids_groups, unique_sids)
    ]
    dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs)
    return dstncvs_list
Exemple #40
0
    def conditional_knn(nnindexer, qfx2_vec, num_neighbors, invalid_axs):
        """
            >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
            >>> qreq_ = ibeis.testdata_qreq_(defaultdb='seaturtles')
            >>> qreq_.load_indexer()
            >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.qaids[0])
            >>> num_neighbors = 2
            >>> nnindexer = qreq_.indexer
            >>> ibs = qreq_.ibs
            >>> qaid = 1
            >>> qencid = ibs.get_annot_encounter_text([qaid])[0]
            >>> ax2_encid = np.array(ibs.get_annot_encounter_text(nnindexer.ax2_aid))
            >>> invalid_axs = np.where(ax2_encid == qencid)[0]
        """
        #import ibeis
        import itertools

        def in1d_shape(arr1, arr2):
            return np.in1d(arr1, arr2).reshape(arr1.shape)

        get_neighbors = ut.partial(nnindexer.flann.nn_index,
                                   checks=nnindexer.checks,
                                   cores=nnindexer.cores)

        # Alloc space for final results
        K = num_neighbors
        shape = (len(qfx2_vec), K)
        qfx2_idx = np.full(shape, -1, dtype=np.int32)
        qfx2_rawdist = np.full(shape, np.nan, dtype=np.float64)
        qfx2_truek = np.full(shape, -1, dtype=np.int32)

        # Make a set of temporary indexes and loop variables
        limit = None
        limit = 4
        K_ = K
        tx2_qfx = np.arange(len(qfx2_vec))
        tx2_vec = qfx2_vec
        iter_count = 0
        for iter_count in itertools.count():
            if limit is not None and iter_count >= limit:
                break
            # Find a set of neighbors
            (tx2_idx, tx2_rawdist) = get_neighbors(tx2_vec, K_)
            tx2_idx = vt.atleast_nd(tx2_idx, 2)
            tx2_rawdist = vt.atleast_nd(tx2_rawdist, 2)
            tx2_ax = nnindexer.get_nn_axs(tx2_idx)
            # Check to see if they meet the criteria
            tx2_invalid = in1d_shape(tx2_ax, invalid_axs)
            tx2_valid = np.logical_not(tx2_invalid)
            tx2_num_valid = tx2_valid.sum(axis=1)
            tx2_notdone = tx2_num_valid < K
            tx2_done = np.logical_not(tx2_notdone)

            # Move completely valid queries into the results
            if np.any(tx2_done):
                done_qfx = tx2_qfx.compress(tx2_done, axis=0)
                # Need to parse which columns are the completed ones
                done_valid_ = tx2_valid.compress(tx2_done, axis=0)
                done_rawdist_ = tx2_rawdist.compress(tx2_done, axis=0)
                done_idx_ = tx2_idx.compress(tx2_done, axis=0)
                # Get the complete valid indicies
                rowxs, colxs = np.where(done_valid_)
                unique_rows, groupxs = vt.group_indices(rowxs)
                first_k_groupxs = [groupx[0:K] for groupx in groupxs]
                chosen_xs = np.hstack(first_k_groupxs)
                multi_index = (rowxs.take(chosen_xs), colxs.take(chosen_xs))
                flat_xs = np.ravel_multi_index(multi_index, done_valid_.shape)
                done_rawdist = done_rawdist_.take(flat_xs).reshape((-1, K))
                done_idx = done_idx_.take(flat_xs).reshape((-1, K))
                # Write done results in output
                qfx2_idx[done_qfx, :] = done_idx
                qfx2_rawdist[done_qfx, :] = done_rawdist
                qfx2_truek[done_qfx, :] = vt.apply_grouping(
                    colxs, first_k_groupxs)
            if np.all(tx2_done):
                break
            K_increase = (K - tx2_num_valid.min())
            K_ += K_increase
            tx2_qfx = tx2_qfx.compress(tx2_notdone, axis=0)
            tx2_vec = tx2_vec.compress(tx2_notdone, axis=0)

        if nnindexer.max_distance_sqrd is not None:
            qfx2_dist = np.divide(qfx2_rawdist, nnindexer.max_distance_sqrd)
        else:
            qfx2_dist = qfx2_rawdist
        return (qfx2_idx, qfx2_dist, iter_count)
Exemple #41
0
def get_name_aids(ibs, nid_list, enable_unknown_fix=True):
    r"""
    # TODO: Rename to get_anot_rowids_from_name_rowid

    Returns:
         list: aids_list a list of list of aids in each name

    RESTful:
        Method: GET
        URL:    /api/name/aids/

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_name_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> # Map annotations to name ids
        >>> aid_list = ibs.get_valid_aids()
        >>> nid_list = ibs.get_annot_name_rowids(aid_list)
        >>> # Get annotation ids for each name
        >>> aids_list = ibs.get_name_aids(nid_list)
        >>> # Run Assertion Test
        >>> groupid2_items = ut.group_items(aids_list, nid_list)
        >>> grouped_items = list(six.itervalues(groupid2_items))
        >>> passed_iter = map(ut.list_allsame, grouped_items)
        >>> passed_list = list(passed_iter)
        >>> assert all(passed_list), 'problem in get_name_aids'
        >>> # Print gropued items
        >>> print(ut.dict_str(groupid2_items, newlines=False))

    Ignore;
        from ibeis.control.manual_name_funcs import *  # NOQA
        import ibeis
        #ibs = ibeis.opendb('testdb1')
        #ibs = ibeis.opendb('PZ_MTEST')
        ibs = ibeis.opendb('PZ_Master0')
        #ibs = ibeis.opendb('GZ_ALL')

        nid_list = ibs.get_valid_nids()
        nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list]

        with ut.Timer('sql'):
            #aids_list1 = ibs.get_name_aids(nid_list, enable_unknown_fix=False)
            aids_list1 = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,), nid_list_, id_colname=NAME_ROWID, unpack_scalars=False)

        with ut.Timer('hackquery + group'):
            opstr = '''
            SELECT annot_rowid, name_rowid
            FROM annotations
            WHERE name_rowid IN
                (%s)
                ORDER BY name_rowid ASC, annot_rowid ASC
            ''' % (', '.join(map(str, nid_list)))
            pair_list = ibs.db.connection.execute(opstr).fetchall()
            aids = np.array(ut.get_list_column(pair_list, 0))
            nids = np.array(ut.get_list_column(pair_list, 1))
            unique_nids, groupx = vt.group_indices(nids)
            grouped_aids_ = vt.apply_grouping(aids, groupx)
            aids_list5 = [sorted(arr.tolist()) for arr in grouped_aids_]

        for aids1, aids5 in zip(aids_list1, aids_list5):
            if (aids1) != (aids5):
                print(aids1)
                print(aids5)
                print('-----')

        ut.assert_lists_eq(list(map(tuple, aids_list5)), list(map(tuple, aids_list1)))

        with ut.Timer('numpy'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list2 = [valid_aids.take(np.flatnonzero(valid_nids == nid)).tolist() for nid in nid_list_]

        with ut.Timer('numpy2'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list3 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_]

        with ut.Timer('numpy3'):
            # alt method
            valid_aids = np.array(ibs.get_valid_aids())
            valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID))
            aids_list4 = [valid_aids.take(np.flatnonzero(np.equal(valid_nids, nid))).tolist() for nid in nid_list_]
        assert aids_list2 == aids_list3
        assert aids_list3 == aids_list4
        assert aids_list1 == aids_list2

        valid_aids = ibs.get_valid_aids()
        %timeit ibs.db.get_all_col_rows('annotations', 'rowid')
        %timeit ibs.db.get_all_col_rows('annotations', 'name_rowid')
        %timeit ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)
        %timeit ibs.get_valid_aids()
        %timeit ibs.get_annot_name_rowids(ibs.get_valid_aids(), distinguish_unknowns=False)
        valid_nids1 = ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False)
        valid_nids2 = ibs.db.get_all_col_rows('annotations', 'name_rowid')
        assert valid_nids1 == valid_nids2

    ibs.db.fname
    ibs.db.fpath

    import sqlite3

    con = sqlite3.connect(ibs.db.fpath)

    opstr = '''
    SELECT annot_rowid, name_rowid
    FROM annotations
    WHERE name_rowid IN
        (SELECT name_rowid FROM name)
        ORDER BY name_rowid ASC, annot_rowid ASC
    '''

    annot_rowid_list = con.execute(opstr).fetchall()
    aid_list = ut.get_list_column(annot_rowid_list, 0)
    nid_list = ut.get_list_column(annot_rowid_list, 1)


    # HACKY HACKY HACK

    with ut.Timer('hackquery + group'):
        #nid_list = ibs.get_valid_nids()[10:15]
        nid_list = ibs.get_valid_nids()
        opstr = '''
        SELECT annot_rowid, name_rowid
        FROM annotations
        WHERE name_rowid IN
            (%s)
            ORDER BY name_rowid ASC, annot_rowid ASC
        ''' % (', '.join(map(str, nid_list)))
        pair_list = ibs.db.connection.execute(opstr).fetchall()
        aids = np.array(ut.get_list_column(pair_list, 0))
        nids = np.array(ut.get_list_column(pair_list, 1))
        unique_nids, groupx = vt.group_indices(nids)
        grouped_aids_ = vt.apply_grouping(aids, groupx)
        grouped_aids = [arr.tolist() for arr in grouped_aids_]

    SELECT
       name_rowid, COUNT(annot_rowid) AS number, GROUP_CONCAT(annot_rowid) AS aid_list
    FROM annotations
    WHERE name_rowid in (SELECT name_rowid FROM name)
     GROUP BY name_rowid
    ORDER BY name_rowid ASC


    import vtool as vt
    vt
    vt.aid_list[0]


    annot_rowid_list = con.execute(opstr).fetchall()
    opstr = '''
        SELECT annot_rowid
        FROM annotations
        WHERE name_rowid=?
        '''

    cur = ibs.db.connection.cursor()

    cur = con.execute('BEGIN IMMEDIATE TRANSACTION')
    cur = ibs.db.connection
    res = [cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_]
    cur.execute('COMMIT TRANSACTION')

    res = [ibs.db.cur.execute(opstr, (nid,)).fetchall() for nid in nid_list_]

    """
    # FIXME: THIS FUNCTION IS VERY SLOW
    # ADD A LOCAL CACHE TO FIX THIS SPEED
    # ALSO FIX GET_IMAGE_AIDS
    # really a getter for the annotation table not the name table
    #return [[] for nid in nid_list]
    # TODO: should a query of the UNKNOWN_NAME_ROWID return anything?
    # TODO: don't even run negative aids as queries
    nid_list_ = [const.UNKNOWN_NAME_ROWID if nid <= 0 else nid for nid in nid_list]
    USE_GROUPING_HACK = False
    if USE_GROUPING_HACK:
        # This code doesn't work because it doesn't respect empty names
        input_list, inverse_unique = np.unique(nid_list_, return_inverse=True)
        input_str = ', '.join(list(map(str, input_list)))
        opstr = '''
        SELECT annot_rowid, name_rowid
        FROM {ANNOTATION_TABLE}
        WHERE name_rowid IN
            ({input_str})
            ORDER BY name_rowid ASC, annot_rowid ASC
        '''.format(input_str=input_str, ANNOTATION_TABLE=const.ANNOTATION_TABLE)
        pair_list = ibs.db.connection.execute(opstr).fetchall()
        aidscol = np.array(ut.get_list_column(pair_list, 0))
        nidscol = np.array(ut.get_list_column(pair_list, 1))
        unique_nids, groupx = vt.group_indices(nidscol)
        grouped_aids_ = vt.apply_grouping(aidscol, groupx)
        #aids_list = [sorted(arr.tolist()) for arr in grouped_aids_]
        structured_aids_list = [arr.tolist() for arr in grouped_aids_]
        aids_list = np.array(structured_aids_list)[inverse_unique].tolist()
    else:
        USE_NUMPY_IMPL = True
        #USE_NUMPY_IMPL = False
        # Use qt if getting one at a time otherwise perform bulk operation
        USE_NUMPY_IMPL = len(nid_list_) > 1
        #USE_NUMPY_IMPL = len(nid_list_) > 10
        if USE_NUMPY_IMPL:
            # This seems to be 30x faster for bigger inputs
            valid_aids = np.array(ibs._get_all_aids())
            valid_nids = np.array(ibs.db.get_all_col_rows(const.ANNOTATION_TABLE, NAME_ROWID))
            #np.array(ibs.get_annot_name_rowids(valid_aids, distinguish_unknowns=False))
            aids_list = [
                valid_aids.take(np.flatnonzero(
                    np.equal(valid_nids, nid))).tolist()
                for nid in nid_list_
            ]
        else:
            # SQL IMPL
            aids_list = ibs.db.get(const.ANNOTATION_TABLE, (ANNOT_ROWID,),
                                   nid_list_, id_colname=NAME_ROWID,
                                   unpack_scalars=False)
    if enable_unknown_fix:
        #enable_unknown_fix == distinguish_unknowns
        # negative name rowids correspond to unknown annoations wherex annot_rowid = -name_rowid
        #aids_list = [None if nid is None else ([-nid] if nid < 0 else aids)
        #             for nid, aids in zip(nid_list, aids_list)]
        # Not sure if this should fail or return empty list on None nid
        aids_list = [[] if nid is None else ([-nid] if nid < 0 else aids)
                     for nid, aids in zip(nid_list, aids_list)]
        #aids_list = [[-nid] if nid < 0 else aids
        #             for nid, aids in zip(nid_list, aids_list)]
    return aids_list
Exemple #42
0
def make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin=4,
                            grid_steps=1, resize=False, out=None, grid_sigma=1.6):
    r"""
    Args:
        kpts (ndarray[float32_t, ndim=2]):  keypoint
        chipsize (tuple):  width, height
        weights (ndarray[float32_t, ndim=1]):
        pxl_per_bin (float):
        grid_steps (int):

    Returns:
        ndarray: weightgrid

    CommandLine:
        python -m vtool.coverage_grid --test-make_grid_coverage_mask --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.coverage_grid import *  # NOQA
        >>> import vtool as vt
        >>> # build test data
        >>> kpts, chipsize, weights = coverage_kpts.testdata_coverage('easy1.png')
        >>> pxl_per_bin = 4
        >>> grid_steps = 2
        >>> # execute function
        >>> weightgrid = make_grid_coverage_mask(kpts, chipsize, weights, pxl_per_bin, grid_steps)
        >>> # verify result
        >>> result = str(weightgrid)
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> pt.imshow(weightgrid)
        >>> ut.show_if_requested()
    """
    import vtool as vt
    coverage_gridtup = sparse_grid_coverage(
        kpts, chipsize, weights,
        pxl_per_bin=pxl_per_bin,
        grid_steps=grid_steps,
        grid_sigma=grid_sigma
    )
    gridshape = coverage_gridtup[0:2]
    neighbor_bin_weights, neighbor_bin_indices = coverage_gridtup[-2:]
    oldshape_indices = neighbor_bin_indices.shape
    newshape_indices = (np.prod(oldshape_indices[0:2]), oldshape_indices[2])
    neighbor_bin_indices =  neighbor_bin_indices.reshape(newshape_indices).T
    neighbor_bin_weights = neighbor_bin_weights.flatten()
    # Get flat indexing into gridbin
    neighbor_bin_flat_indices = np.ravel_multi_index(neighbor_bin_indices, gridshape)
    # Group by bins with weight
    unique_flatxs, grouped_flatxs = vt.group_indices(neighbor_bin_flat_indices)
    grouped_weights = vt.apply_grouping(neighbor_bin_weights, grouped_flatxs)
    # FIXME: boundary cases are not handled right because their vote is split
    # into the same bin and is fighting with itself durring the max
    max_weights = list(map(np.max, grouped_weights))
    if out is None:
        weightgrid = np.zeros(gridshape)
    else:
        # outvar specified
        weightgrid = out
        weightgrid[:] = 0
    unique_rows, unique_cols = np.unravel_index(unique_flatxs, gridshape)
    weightgrid[unique_rows, unique_cols] = max_weights
    #flat_weightgrid = np.zeros(np.prod(gridshape))
    #flat_weightgrid[unique_flatxs] = max_weight
    #ut.embed()
    #weightgrid = np.reshape(flat_weightgrid, gridshape)
    if resize:
        weightgrid = cv2.resize(weightgrid, chipsize,
                                interpolation=cv2.INTER_NEAREST)
    return weightgrid
Exemple #43
0
def flow():
    """
    http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin

    pip install PyMaxFlow
    pip install pystruct
    pip install hdbscan
    """
    # Toy problem representing attempting to discover names via annotation
    # scores

    import pystruct  # NOQA
    import pystruct.models  # NOQA
    import networkx as netx  # NOQA

    import vtool as vt
    num_annots = 10
    num_names = num_annots
    hidden_nids = np.random.randint(0, num_names, num_annots)
    unique_nids, groupxs = vt.group_indices(hidden_nids)

    toy_params = {
        True: {'mu': 1.0, 'sigma': 2.2},
        False: {'mu': 7.0, 'sigma': .9}
    }

    if True:
        import vtool as vt
        import plottool as pt
        xdata = np.linspace(0, 100, 1000)
        tp_pdf = vt.gauss_func1d(xdata, **toy_params[True])
        fp_pdf = vt.gauss_func1d(xdata, **toy_params[False])
        pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata)

    def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params):
        if aidx1 == aidx2:
            return 0
        rng = np.random.RandomState(int(aidx1 + aidx2))
        same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)]
        mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma'])
        return np.clip(rng.normal(mu, sigma), 0, np.inf)

    pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots)))
    pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs])
    pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs])
    pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots)
    if num_annots <= 10:
        print(ut.repr2(pairwise_scores_mat, precision=1))

    #aids = list(range(num_annots))
    #g = netx.DiGraph()
    #g.add_nodes_from(aids)
    #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]])
    #netx.draw_graphviz(g)
    #pr = netx.pagerank(g)

    X = pairwise_scores
    Y = pairwise_labels

    encoder = vt.ScoreNormalizer()
    encoder.fit(X, Y)
    encoder.visualize()

    # meanshift clustering
    import sklearn
    bandwidth = sklearn.cluster.estimate_bandwidth(X[:, None])  # , quantile=quantile, n_samples=500)
    assert bandwidth != 0, ('[enc] bandwidth is 0. Cannot cluster')
    # bandwidth is with respect to the RBF used in clustering
    #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True)
    ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
    ms.fit(X[:, None])
    label_arr = ms.labels_
    unique_labels = np.unique(label_arr)
    max_label = max(0, unique_labels.max())
    num_orphans = (label_arr == -1).sum()
    label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans)

    X_data = np.arange(num_annots)[:, None].astype(np.int64)

    #graph = pystruct.models.GraphCRF(
    #    n_states=None,
    #    n_features=None,
    #    inference_method='lp',
    #    class_weight=None,
    #    directed=False,
    #)

    import scipy
    import scipy.cluster
    import scipy.cluster.hierarchy

    thresh = 2.0
    labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric)
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),))
    print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),))
    #X_data, seconds_thresh, criterion='distance')

    #help(hdbscan.HDBSCAN)

    import hdbscan
    alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2)
    labels = alg.fit_predict(X_data)
    labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1
    unique_lbls, lblgroupxs = vt.group_indices(labels)
    print(groupxs)
    print(lblgroupxs)
    print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),))
    print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),))

    #import ddbscan
    #help(ddbscan.DDBSCAN)
    #alg = ddbscan.DDBSCAN(2, 2)

    #D = np.zeros((len(aids), len(aids) + 1))
    #D.T[-1] = np.arange(len(aids))

    ## Can alpha-expansion be used when the pairwise potentials are not in a grid?

    #hidden_ut.group_items(aids, hidden_nids)
    if False:
        import maxflow
        #from maxflow import fastmin
        # Create a graph with integer capacities.
        g = maxflow.Graph[int](2, 2)
        # Add two (non-terminal) nodes. Get the index to the first one.
        nodes = g.add_nodes(2)
        # Create two edges (forwards and backwards) with the given capacities.
        # The indices of the nodes are always consecutive.
        g.add_edge(nodes[0], nodes[1], 1, 2)
        # Set the capacities of the terminal edges...
        # ...for the first node.
        g.add_tedge(nodes[0], 2, 5)
        # ...for the second node.
        g.add_tedge(nodes[1], 9, 4)
        g = maxflow.Graph[float](2, 2)
        g.maxflow()
        g.get_nx_graph()
        g.get_segment(nodes[0])
Exemple #44
0
def collapse_labels(model, evidence, reduced_variables, reduced_row_idxs,
                    reduced_values):
    import vtool as vt

    # assert np.all(reduced_joint.values.ravel() == reduced_joint.values.flatten())
    reduced_ttypes = [model.var2_cpd[var].ttype for var in reduced_variables]

    evidence_vars = list(evidence.keys())
    evidence_state_idxs = ut.dict_take(evidence, evidence_vars)
    evidence_ttypes = [model.var2_cpd[var].ttype for var in evidence_vars]

    ttype2_ev_indices = dict(zip(*ut.group_indices(evidence_ttypes)))
    ttype2_re_indices = dict(zip(*ut.group_indices(reduced_ttypes)))
    # ttype2_ev_indices = ut.group_items(range(len(evidence_vars)), evidence_ttypes)
    # ttype2_re_indices = ut.group_items(range(len(reduced_variables)), reduced_ttypes)

    # Allow specific types of labels to change
    # everything is the same, only the names have changed.
    # TODO: allow for multiple different label_ttypes
    # for label_ttype in label_ttypes
    if NAME_TTYPE not in model.ttype2_template:
        return reduced_row_idxs, reduced_values
    label_ttypes = [NAME_TTYPE]
    for label_ttype in label_ttypes:
        ev_colxs = ttype2_ev_indices[label_ttype]
        re_colxs = ttype2_re_indices[label_ttype]

        ev_state_idxs = ut.take(evidence_state_idxs, ev_colxs)
        ev_state_idxs_tile = np.tile(ev_state_idxs,
                                     (len(reduced_values), 1)).astype(np.int)
        num_ev_ = len(ev_colxs)

        aug_colxs = list(
            range(num_ev_)) + (np.array(re_colxs) + num_ev_).tolist()
        aug_state_idxs = np.hstack([ev_state_idxs_tile, reduced_row_idxs])

        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.

        num_cols = len(aug_state_idxs.T)
        mask = vt.index_to_boolmask(aug_colxs, num_cols)
        (other_colxs, ) = np.where(~mask)
        relbl_states = aug_state_idxs.compress(mask, axis=1)
        other_states = aug_state_idxs.compress(~mask, axis=1)
        tmp_relbl_states = np.array(list(map(make_temp_state, relbl_states)))

        max_tmp_state = -1
        min_tmp_state = tmp_relbl_states.min()

        # rebuild original state structure with temp state idxs
        tmp_state_cols = [None] * num_cols
        for count, colx in enumerate(aug_colxs):
            tmp_state_cols[colx] = tmp_relbl_states[:, count:count + 1]
        for count, colx in enumerate(other_colxs):
            tmp_state_cols[colx] = other_states[:, count:count + 1]
        tmp_state_idxs = np.hstack(tmp_state_cols)

        data_ids = np.array(
            vt.compute_unique_data_ids_(list(map(tuple, tmp_state_idxs))))
        unique_ids, groupxs = vt.group_indices(data_ids)
        logger.info('Collapsed %r states into %r states' % (
            len(data_ids),
            len(unique_ids),
        ))
        # Sum the values in the cpd to marginalize the duplicate probs
        new_values = np.array(
            [g.sum() for g in vt.apply_grouping(reduced_values, groupxs)])
        # Take only the unique rows under this induced labeling
        unique_tmp_groupxs = np.array(ut.get_list_column(groupxs, 0))
        new_aug_state_idxs = tmp_state_idxs.take(unique_tmp_groupxs, axis=0)

        tmp_idx_set = set((-np.arange(-max_tmp_state,
                                      (-min_tmp_state) + 1)).tolist())
        true_idx_set = set(range(len(
            model.ttype2_template[label_ttype].basis)))

        # Relabel the rows one more time to agree with initial constraints
        for colx, true_idx in enumerate(ev_state_idxs):
            tmp_idx = np.unique(new_aug_state_idxs.T[colx])
            assert len(tmp_idx) == 1
            tmp_idx_set -= {tmp_idx[0]}
            true_idx_set -= {true_idx}
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx
        # Relabel the remaining idxs
        remain_tmp_idxs = sorted(list(tmp_idx_set))[::-1]
        remain_true_idxs = sorted(list(true_idx_set))
        for tmp_idx, true_idx in zip(remain_tmp_idxs, remain_true_idxs):
            new_aug_state_idxs[new_aug_state_idxs == tmp_idx] = true_idx

        # Remove evidence based augmented labels
        new_state_idxs = new_aug_state_idxs.T[num_ev_:].T
        return new_state_idxs, new_values
def get_annot_kpts_distinctiveness(ibs, aid_list, config2_=None, **kwargs):
    """
    very hacky, but cute way to cache keypoint distinctivness

    Args:
        ibs (IBEISController):  ibeis controller object
        aid_list (list):
        dstncvs_normer (None):

    Returns:
        list: dstncvs_list

    CommandLine:
        python -m ibeis.control.manual_ibeiscontrol_funcs --test-get_annot_kpts_distinctiveness

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.control.manual_ibeiscontrol_funcs import *  # NOQA
        >>> from ibeis.algo.hots import distinctiveness_normalizer
        >>> import ibeis
        >>> import numpy as np
        >>> config2_ = None
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN)
        >>> # execute function
        >>> aid_list1 = aid_list[::2]
        >>> aid_list2 = aid_list[1::3]
        >>> dstncvs_list1 = get_annot_kpts_distinctiveness(ibs, aid_list1)
        >>> dstncvs_list2 = get_annot_kpts_distinctiveness(ibs, aid_list2)
        >>> dstncvs_list = get_annot_kpts_distinctiveness(ibs, aid_list)
        >>> print(ut.depth_profile(dstncvs_list1))
        >>> stats_dict = ut.dict_stack([ut.get_stats(dstncvs) for dstncvs in dstncvs_list])
        >>> print(ut.dict_str(stats_dict))
        >>> assert np.all(np.array(stats_dict['min']) >= 0), 'distinctiveness was out of bounds'
        >>> assert np.all(np.array(stats_dict['max']) <= 1), 'distinctiveness was out of bounds'
    """
    from ibeis.algo.hots import distinctiveness_normalizer as dcvs_normer

    # per-species disinctivness wrapper around ibeis cached function
    # get feature rowids
    aid_list = np.array(aid_list)
    fid_list = np.array(
        ibs.get_annot_feat_rowids(aid_list,
                                  ensure=True,
                                  eager=True,
                                  nInput=None,
                                  config2_=config2_))
    species_rowid_list = np.array(ibs.get_annot_species_rowids(aid_list))
    # Compute distinctivness separately for each species
    unique_sids, groupxs = vt.group_indices(species_rowid_list)
    fids_groups = vt.apply_grouping(fid_list, groupxs)
    species_text_list = ibs.get_species_texts(unique_sids)
    # Map distinctivness computation
    normer_list = [
        dcvs_normer.request_species_distinctiveness_normalizer(species)
        for species in species_text_list
    ]
    # Reduce to get results
    dstncvs_groups = [
        get_feat_kpts_distinctiveness(ibs,
                                      fids,
                                      dstncvs_normer=dstncvs_normer,
                                      species_rowid=sid,
                                      **kwargs) for dstncvs_normer, fids, sid
        in zip(normer_list, fids_groups, unique_sids)
    ]
    dstncvs_list = vt.invert_apply_grouping(dstncvs_groups, groupxs)
    return dstncvs_list
Exemple #46
0
def get_annotmatch_rowids_from_aid(ibs, aid_list, eager=True, nInput=None, force_method=None):
    """
    Undirected version

    TODO autogenerate

    Returns a list of the aids that were reviewed as candidate matches to the input aid

    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> # setup_pzmtest_subgraph()
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> aid_list = ibs.get_valid_aids()[0:4]
        >>> eager = True
        >>> nInput = None
        >>> annotmatch_rowid_list = get_annotmatch_rowids_from_aid(ibs, aid_list,
        >>>                                                        eager, nInput)
        >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),))
        >>> print(result)

    Example2:
        >>> # TIME TEST
        >>> # setup_pzmtest_subgraph()
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> aid_list = ibs.get_valid_aids()
        >>> from functools import partial
        >>> func_list = [
        >>>     partial(ibs.get_annotmatch_rowids_from_aid),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid, force_method=1),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid, force_method=2),
        >>> ]
        >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500]
        >>> def args_list(count, aid_list=aid_list, num_list=num_list):
        >>>    return (aid_list[0:num_list[count]],)
        >>> searchkw = dict(
        >>>     func_labels=['combo', 'sql', 'numpy'],
        >>>     count_to_xtick=lambda count, args: len(args[0]),
        >>>     title='Timings of get_annotmatch_rowids_from_aid',
        >>> )
        >>> niters = len(num_list)
        >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw)
        >>> time_result['plot_timings']()
        >>> ut.show_if_requested()
    """
    from ibeis.control import _autogen_annotmatch_funcs
    if nInput is None:
        nInput = len(aid_list)

    if force_method != 2 and (nInput < 256 or (force_method == 1)):
        rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list)
        # This one is slow because aid2 is the second part of the index
        rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list)
        annotmatch_rowid_list = list(map(ut.flatten, zip(rowids1, rowids2)))  # NOQA
    else:
        # This is much much faster than the other methods for large queries
        import vtool as vt
        all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids())
        aids1 = np.array(ibs.get_annotmatch_aid1(all_annotmatch_rowids))
        aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids))
        unique_aid1, groupxs1 = vt.group_indices(aids1)
        unique_aid2, groupxs2 = vt.group_indices(aids2)
        rowids1_ = vt.apply_grouping(all_annotmatch_rowids, groupxs1)
        rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2)
        rowids1_ = [_.tolist() for _ in rowids1_]
        rowids2_ = [_.tolist() for _ in rowids2_]
        maping1 = dict(zip(unique_aid1, rowids1_))
        maping2 = dict(zip(unique_aid2, rowids2_))
        mapping = ut.defaultdict(list, ut.dict_union3(maping1, maping2))
        annotmatch_rowid_list = ut.dict_take(mapping, aid_list)

    if False:
        # VERY SLOW
        colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID,)
        # FIXME: col_rowid is not correct
        params_iter = list(zip(aid_list, aid_list))
        where_colnames = [_autogen_annotmatch_funcs.ANNOT_ROWID1, _autogen_annotmatch_funcs.ANNOT_ROWID2]
        with ut.Timer('one'):
            annotmatch_rowid_list1 = ibs.db.get_where3(  # NOQA
                ibs.const.ANNOTMATCH_TABLE, colnames, params_iter, where_colnames,
                logicop='OR', eager=eager, nInput=nInput, unpack_scalars=False)
    # Ensure funciton output is consistent
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
Exemple #47
0
def invert_assigns(idx_to_wxs, idx_to_maws, verbose=False):
    r"""
    Inverts assignment of
    vectors->to->words into words->to->vectors.
    Invert mapping -- Group by word indexes

    This gives a HUGE speedup over the old invert_assigns

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.smk.smk_funcs import *  # NOQA
        >>> idx_to_wxs = np.ma.array([
        >>>     (0, 4),
        >>>     (2, -1),
        >>>     (2, 0)], dtype=np.int32)
        >>> idx_to_wxs[1, 1] = np.ma.masked
        >>> idx_to_maws = np.ma.array(
        >>>     [(.5, 1.), (1., np.nan), (.5, .5)], dtype=np.float32)
        >>> idx_to_maws[1, 1] = np.ma.masked
        >>> tup = invert_assigns(idx_to_wxs, idx_to_maws)
        >>> wx_to_idxs, wx_to_maws = tup
        >>> result = 'wx_to_idxs = %s' % (ut.repr4(wx_to_idxs, with_dtype=True),)
        >>> result += '\nwx_to_maws = %s' % (ut.repr4(wx_to_maws, with_dtype=True),)
        >>> print(result)
        wx_to_idxs = {
            0: np.array([0, 2], dtype=np.int32),
            2: np.array([1, 2], dtype=np.int32),
            4: np.array([0], dtype=np.int32),
        }
        wx_to_maws = {
            0: np.array([0.5, 0.5], dtype=np.float32),
            2: np.array([1. , 0.5], dtype=np.float32),
            4: np.array([1.], dtype=np.float32),
        }
    """
    assert isinstance(idx_to_wxs, np.ma.masked_array)
    assert isinstance(idx_to_maws, np.ma.masked_array)

    nrows, ncols = idx_to_wxs.shape
    if len(idx_to_wxs.mask.shape) == 0:
        valid_mask = np.ones((nrows, ncols), dtype=np.bool)
    else:
        valid_mask = ~idx_to_maws.mask
        # idx_to_nAssign = (valid_mask).sum(axis=1)

    _valid_x2d = np.flatnonzero(valid_mask)
    flat_idxs = np.floor_divide(_valid_x2d, ncols, dtype=np.int32)
    flat_wxs = idx_to_wxs.compressed()
    flat_maws = idx_to_maws.compressed()

    sortx = flat_wxs.argsort()
    flat_wxs = flat_wxs.take(sortx)
    flat_idxs = flat_idxs.take(sortx)
    flat_maws = flat_maws.take(sortx)

    wx_keys, groupxs = vt.group_indices(flat_wxs)
    idxs_list = vt.apply_grouping(flat_idxs, groupxs)
    maws_list = vt.apply_grouping(flat_maws, groupxs)

    wx_to_idxs = dict(zip(wx_keys, idxs_list))
    wx_to_maws = dict(zip(wx_keys, maws_list))

    if verbose:
        logger.info('[vocab] L___ End Assign vecs to words.')
    return (wx_to_idxs, wx_to_maws)
Exemple #48
0
def get_annotmatch_rowids_from_aid(ibs,
                                   aid_list,
                                   eager=True,
                                   nInput=None,
                                   force_method=None):
    """
    Undirected version

    TODO autogenerate

    Returns a list of the aids that were reviewed as candidate matches to the input aid

    aid_list = ibs.get_valid_aids()

    CommandLine:
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid
        python -m ibeis.annotmatch_funcs --exec-get_annotmatch_rowids_from_aid:1 --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> # setup_pzmtest_subgraph()
        >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST')
        >>> aid_list = ibs.get_valid_aids()[0:4]
        >>> eager = True
        >>> nInput = None
        >>> annotmatch_rowid_list = get_annotmatch_rowids_from_aid(ibs, aid_list,
        >>>                                                        eager, nInput)
        >>> result = ('annotmatch_rowid_list = %s' % (str(annotmatch_rowid_list),))
        >>> print(result)

    Example2:
        >>> # TIME TEST
        >>> # setup_pzmtest_subgraph()
        >>> from ibeis.annotmatch_funcs import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb(defaultdb='PZ_Master1')
        >>> aid_list = ibs.get_valid_aids()
        >>> from functools import partial
        >>> func_list = [
        >>>     partial(ibs.get_annotmatch_rowids_from_aid),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid, force_method=1),
        >>>     partial(ibs.get_annotmatch_rowids_from_aid, force_method=2),
        >>> ]
        >>> num_list = [1, 10, 50, 100, 300, 325, 350, 400, 500]
        >>> def args_list(count, aid_list=aid_list, num_list=num_list):
        >>>    return (aid_list[0:num_list[count]],)
        >>> searchkw = dict(
        >>>     func_labels=['combo', 'sql', 'numpy'],
        >>>     count_to_xtick=lambda count, args: len(args[0]),
        >>>     title='Timings of get_annotmatch_rowids_from_aid',
        >>> )
        >>> niters = len(num_list)
        >>> time_result = ut.gridsearch_timer(func_list, args_list, niters, **searchkw)
        >>> time_result['plot_timings']()
        >>> ut.show_if_requested()
    """
    from ibeis.control import _autogen_annotmatch_funcs
    if nInput is None:
        nInput = len(aid_list)

    if force_method != 2 and (nInput < 256 or (force_method == 1)):
        rowids1 = ibs.get_annotmatch_rowids_from_aid1(aid_list)
        # This one is slow because aid2 is the second part of the index
        rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list)
        annotmatch_rowid_list = list(map(ut.flatten, zip(rowids1,
                                                         rowids2)))  # NOQA
    else:
        # This is much much faster than the other methods for large queries
        import vtool as vt
        all_annotmatch_rowids = np.array(ibs._get_all_annotmatch_rowids())
        aids1 = np.array(ibs.get_annotmatch_aid1(all_annotmatch_rowids))
        aids2 = np.array(ibs.get_annotmatch_aid2(all_annotmatch_rowids))
        unique_aid1, groupxs1 = vt.group_indices(aids1)
        unique_aid2, groupxs2 = vt.group_indices(aids2)
        rowids1_ = vt.apply_grouping(all_annotmatch_rowids, groupxs1)
        rowids2_ = vt.apply_grouping(all_annotmatch_rowids, groupxs2)
        rowids1_ = [_.tolist() for _ in rowids1_]
        rowids2_ = [_.tolist() for _ in rowids2_]
        maping1 = dict(zip(unique_aid1, rowids1_))
        maping2 = dict(zip(unique_aid2, rowids2_))
        mapping = ut.defaultdict(list, ut.dict_union3(maping1, maping2))
        annotmatch_rowid_list = ut.dict_take(mapping, aid_list)

    if False:
        # VERY SLOW
        colnames = (_autogen_annotmatch_funcs.ANNOTMATCH_ROWID, )
        # FIXME: col_rowid is not correct
        params_iter = list(zip(aid_list, aid_list))
        where_colnames = [
            _autogen_annotmatch_funcs.ANNOT_ROWID1,
            _autogen_annotmatch_funcs.ANNOT_ROWID2
        ]
        with ut.Timer('one'):
            annotmatch_rowid_list1 = ibs.db.get_where3(  # NOQA
                ibs.const.ANNOTMATCH_TABLE,
                colnames,
                params_iter,
                where_colnames,
                logicop='OR',
                eager=eager,
                nInput=nInput,
                unpack_scalars=False)
    # Ensure funciton output is consistent
    annotmatch_rowid_list = list(map(sorted, annotmatch_rowid_list))
    return annotmatch_rowid_list
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True):
    r"""
    CommandLine:
        python -m wbia.algo.hots.bayes --exec-try_query --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.bayes import *  # NOQA
        >>> verbose = True
        >>> other_evidence = {}
        >>> name_evidence = [1, None, 0, None]
        >>> score_evidence = ['high', 'low', 'low']
        >>> query_vars = None
        >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1)
        >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence)
        >>> interest_ttypes = ['name']
        >>> infr = pgmpy.inference.BeliefPropagation(model)
        >>> evidence = infr._ensure_internal_evidence(evidence, model)
        >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose)
        >>> result = ('query_results = %s' % (str(query_results),))
        >>> ut.quit_if_noshow()
        >>> show_model(model, show_prior=True, **query_results)
        >>> ut.show_if_requested()

    Ignore:
        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        probs = infr.query(query_vars, evidence)
        map_assignment = infr.map_query(query_vars, evidence)
    """
    infr = pgmpy.inference.VariableElimination(model)
    # infr = pgmpy.inference.BeliefPropagation(model)
    if True:
        return bruteforce(model, query_vars=None, evidence=evidence)
    else:
        import vtool as vt

        query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys()))
        # hack
        query_vars = ut.setdiff_ordered(
            query_vars, ut.list_getattr(model.ttype2_cpds['score'],
                                        'variable'))
        if verbose:
            evidence_str = ', '.join(model.pretty_evidence(evidence))
            logger.info('P(' + ', '.join(query_vars) + ' | ' + evidence_str +
                        ') = ')
        # Compute MAP joints
        # There is a bug here.
        # map_assign = infr.map_query(query_vars, evidence)
        # (probably an invalid thing to do)
        # joint_factor = pgmpy.factors.factor_product(*factor_list)
        # Brute force MAP

        name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys()))
        # TODO: incorporate case where Na is assigned to Fred
        # evidence_h = ut.delete_keys(evidence.copy(), ['Na'])

        joint = model.joint_distribution()
        joint.evidence_based_reduction(query_name_vars, evidence, inplace=True)

        # Find static row labels in the evidence
        given_name_vars = [var for var in name_vars if var in evidence]
        given_name_idx = ut.dict_take(evidence, given_name_vars)
        given_name_val = [
            joint.statename_dict[var][idx]
            for var, idx in zip(given_name_vars, given_name_idx)
        ]
        new_vals = joint.values.ravel()
        # Add static evidence variables to the relabeled name states
        new_vars = given_name_vars + joint.variables
        new_rows = [tuple(given_name_val) + row for row in joint._row_labels()]
        # Relabel rows based on the knowledge that
        # everything is the same, only the names have changed.
        temp_basis = [i for i in range(model.num_names)]

        def relabel_names(names, temp_basis=temp_basis):
            names = list(map(six.text_type, names))
            mapping = {}
            for n in names:
                if n not in mapping:
                    mapping[n] = len(mapping)
            new_names = tuple([temp_basis[mapping[n]] for n in names])
            return new_names

        relabeled_rows = list(map(relabel_names, new_rows))
        # Combine probability of rows with the same (new) label
        data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows))
        unique_ids, groupxs = vt.group_indices(data_ids)
        reduced_row_lbls = ut.take(relabeled_rows,
                                   ut.get_list_column(groupxs, 0))
        reduced_row_lbls = list(map(list, reduced_row_lbls))
        reduced_values = np.array(
            [g.sum() for g in vt.apply_grouping(new_vals, groupxs)])
        # Relabel the rows one more time to agree with initial constraints
        used_ = []
        replaced = []
        for colx, (var, val) in enumerate(zip(given_name_vars,
                                              given_name_val)):
            # All columns must be the same for this labeling
            alias = reduced_row_lbls[0][colx]
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val)
            replaced.append(alias)
            used_.append(val)
        basis = model.ttype2_cpds['name'][0]._template_.basis
        find_remain_ = ut.setdiff_ordered(temp_basis, replaced)
        repl_remain_ = ut.setdiff_ordered(basis, used_)
        for find, repl in zip(find_remain_, repl_remain_):
            reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl)

        # Now find the most likely state
        sortx = reduced_values.argsort()[::-1]
        sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist())
        sort_reduced_values = reduced_values[sortx]

        # Remove evidence based labels
        new_vars_ = new_vars[len(given_name_vars):]
        sort_reduced_row_lbls_ = ut.get_list_column(
            sort_reduced_row_lbls, slice(len(given_name_vars), None))

        sort_reduced_row_lbls_[0]

        # hack into a new joint factor
        var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_))
        statename_dict = dict(zip(new_vars, var_states))
        cardinality = ut.lmap(len, var_states)
        val_lookup = dict(
            zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values))
        values = np.zeros(np.prod(cardinality))
        for idx, state in enumerate(ut.iprod(*var_states)):
            if state in val_lookup:
                values[idx] = val_lookup[state]
        joint2 = pgmpy.factors.Factor(new_vars_,
                                      cardinality,
                                      values,
                                      statename_dict=statename_dict)
        logger.info(joint2)
        max_marginals = {}
        for i, var in enumerate(query_name_vars):
            one_out = query_name_vars[:i] + query_name_vars[i + 1:]
            max_marginals[var] = joint2.marginalize(one_out, inplace=False)
            # max_marginals[var] = joint2.maximize(one_out, inplace=False)
        logger.info(joint2.marginalize(['Nb', 'Nc'], inplace=False))
        factor_list = max_marginals.values()

        # Better map assignment based on knowledge of labels
        map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0]))

        sort_reduced_rowstr_lbls = [
            ut.repr2(dict(zip(new_vars, lbls)),
                     explicit=True,
                     nobraces=True,
                     strvals=True) for lbls in sort_reduced_row_lbls_
        ]

        top_assignments = list(
            zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values))
        if len(sort_reduced_values) > 3:
            top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))]

        # import utool
        # utool.embed()

        # Compute all marginals
        # probs = infr.query(query_vars, evidence)
        # probs = infr.query(query_vars, evidence)
        # factor_list = probs.values()

        ## Marginalize over non-query, non-evidence
        # irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars)
        # joint.marginalize(irrelevant_vars)
        # joint.normalize()
        # new_rows = joint._row_labels()
        # new_vals = joint.values.ravel()
        # map_vals = new_rows[new_vals.argmax()]
        # map_assign = dict(zip(joint.variables, map_vals))
        # Compute Marginalized MAP joints
        # marginalized_joints = {}
        # for ttype in interest_ttypes:
        #    other_vars = [v for v in joint_factor.scope()
        #                  if model.var2_cpd[v].ttype != ttype]
        #    marginal = joint_factor.marginalize(other_vars, inplace=False)
        #    marginalized_joints[ttype] = marginal
        query_results = {
            'factor_list': factor_list,
            'top_assignments': top_assignments,
            'map_assign': map_assign,
            'marginalized_joints': None,
        }
        return query_results
Exemple #50
0
def get_automatch_candidates(cm_list, ranks_lt=5, directed=True,
                             name_scoring=False, ibs=None, filter_reviewed=False,
                             filter_duplicate_namepair_matches=False):
    """
    THIS IS PROBABLY ONE OF THE ONLY THINGS IN THIS FILE THAT SHOULD NOT BE
    DEPRICATED

    Returns a list of matches that should be inspected
    This function is more lightweight than orgres or allres.
    Used in inspect_gui and interact_qres2

    Args:
        qaid2_qres (dict): mapping from query annotaiton id to query result object
        ranks_lt (int): put all ranks less than this number into the graph
        directed (bool):

    Returns:
        tuple: candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr)

    CommandLine:
        python -m ibeis.expt.results_organizer --test-get_automatch_candidates:2
        python -m ibeis.expt.results_organizer --test-get_automatch_candidates:0

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_()
        >>> cm_list = ibs.query_chips(qreq_=qreq_, return_cm=True)
        >>> ranks_lt = 5
        >>> directed = True
        >>> name_scoring = False
        >>> candidate_matches = get_automatch_candidates(cm_list, ranks_lt, directed, ibs=ibs)
        >>> print(candidate_matches)

    Example1:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qaid_list = ibs.get_valid_aids()[0:5]
        >>> daid_list = ibs.get_valid_aids()[0:20]
        >>> cm_list = ibs.query_chips(qaid_list, daid_list, return_cm=True)
        >>> ranks_lt = 5
        >>> directed = False
        >>> name_scoring = False
        >>> filter_reviewed = False
        >>> filter_duplicate_namepair_matches = True
        >>> candidate_matches = get_automatch_candidates(
        ...    cm_list, ranks_lt, directed, name_scoring=name_scoring,
        ...    filter_reviewed=filter_reviewed,
        ...    filter_duplicate_namepair_matches=filter_duplicate_namepair_matches,
        ...    ibs=ibs)
        >>> print(candidate_matches)

    Example3:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qaid_list = ibs.get_valid_aids()[0:1]
        >>> daid_list = ibs.get_valid_aids()[10:100]
        >>> qaid2_cm = ibs.query_chips(qaid_list, daid_list, return_cm=True)
        >>> ranks_lt = 1
        >>> directed = False
        >>> name_scoring = False
        >>> filter_reviewed = False
        >>> filter_duplicate_namepair_matches = True
        >>> candidate_matches = get_automatch_candidates(
        ...    cm_list, ranks_lt, directed, name_scoring=name_scoring,
        ...    filter_reviewed=filter_reviewed,
        ...    filter_duplicate_namepair_matches=filter_duplicate_namepair_matches,
        ...    ibs=ibs)
        >>> print(candidate_matches)

    Example4:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.expt.results_organizer import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qaid_list = ibs.get_valid_aids()[0:10]
        >>> daid_list = ibs.get_valid_aids()[0:10]
        >>> qres_list = ibs.query_chips(qaid_list, daid_list)
        >>> ranks_lt = 3
        >>> directed = False
        >>> name_scoring = False
        >>> filter_reviewed = False
        >>> filter_duplicate_namepair_matches = True
        >>> candidate_matches = get_automatch_candidates(
        ...    qaid2_cm, ranks_lt, directed, name_scoring=name_scoring,
        ...    filter_reviewed=filter_reviewed,
        ...    filter_duplicate_namepair_matches=filter_duplicate_namepair_matches,
        ...    ibs=ibs)
        >>> print(candidate_matches)
    """
    import vtool as vt
    from ibeis.model.hots import chip_match
    print(('[resorg] get_automatch_candidates('
           'filter_reviewed={filter_reviewed},'
           'filter_duplicate_namepair_matches={filter_duplicate_namepair_matches},'
           'directed={directed},'
           'ranks_lt={ranks_lt},'
           ).format(**locals()))
    print('[resorg] len(cm_list) = %d' % (len(cm_list)))
    qaids_stack  = []
    daids_stack  = []
    ranks_stack  = []
    scores_stack = []

    # For each QueryResult, Extract inspectable candidate matches
    if isinstance(cm_list, dict):
        cm_list = list(cm_list.values())

    for cm in cm_list:
        if isinstance(cm, chip_match.ChipMatch2):
            daids  = cm.get_top_aids(ntop=ranks_lt)
            scores = cm.get_top_scores(ntop=ranks_lt)
            ranks  = np.arange(len(daids))
            qaids  = np.full(daids.shape, cm.qaid, dtype=daids.dtype)
        else:
            (qaids, daids, scores, ranks) = cm.get_match_tbldata(
                ranks_lt=ranks_lt, name_scoring=name_scoring, ibs=ibs)
        qaids_stack.append(qaids)
        daids_stack.append(daids)
        scores_stack.append(scores)
        ranks_stack.append(ranks)

    # Stack them into a giant array
    # utool.embed()
    qaid_arr  = np.hstack(qaids_stack)
    daid_arr  = np.hstack(daids_stack)
    score_arr = np.hstack(scores_stack)
    rank_arr  = np.hstack(ranks_stack)

    # Sort by scores
    sortx = score_arr.argsort()[::-1]
    qaid_arr  = qaid_arr[sortx]
    daid_arr   = daid_arr[sortx]
    score_arr = score_arr[sortx]
    rank_arr  = rank_arr[sortx]

    if filter_reviewed:
        _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(), daid_arr.tolist())
        is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool)
        qaid_arr  = qaid_arr.compress(is_unreviewed)
        daid_arr   = daid_arr.compress(is_unreviewed)
        score_arr = score_arr.compress(is_unreviewed)
        rank_arr  = rank_arr.compress(is_unreviewed)

    # Remove directed edges
    if not directed:
        #nodes = np.unique(directed_edges.flatten())
        directed_edges = np.vstack((qaid_arr, daid_arr)).T
        #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1])

        unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges, score_arr)

        qaid_arr  = qaid_arr.take(unique_rowx)
        daid_arr  = daid_arr.take(unique_rowx)
        score_arr = score_arr.take(unique_rowx)
        rank_arr  = rank_arr.take(unique_rowx)

    # Filter Double Name Matches
    if filter_duplicate_namepair_matches:
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        if not directed:
            directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T
            unique_rowx2 = vt.find_best_undirected_edge_indexes(directed_name_edges, score_arr)
        else:
            namepair_id_list = np.array(vt.compute_unique_data_ids_(list(zip(qnid_arr, dnid_arr))))
            unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list)
            score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs)
            unique_rowx2 = np.array(sorted([
                groupx[score_group.argmax()]
                for groupx, score_group in zip(namepair_groupxs, score_namepair_groups)
            ]), dtype=np.int32)
        qaid_arr  = qaid_arr.take(unique_rowx2)
        daid_arr  = daid_arr.take(unique_rowx2)
        score_arr = score_arr.take(unique_rowx2)
        rank_arr  = rank_arr.take(unique_rowx2)

    candidate_matches = (qaid_arr, daid_arr, score_arr, rank_arr)
    return candidate_matches