Exemple #1
0
def test_pyflann_searches():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_searches

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> result = test_pyflann_searches()
        >>> # verify results
        >>> print(result)
    """
    try:
        num_neighbors = 3
        pts = testdata_points(nPts=5743, nDims=2)
        qpts = testdata_points(nPts=7, nDims=2)
        import vtool as vt
        # sample a radius
        radius = vt.L2(pts[0:1], qpts[0:1])[0] * 2 + 1

        flann = pyflann.FLANN()

        print('NN_OnTheFly')
        # build nn_index on the fly
        indices1, dists1 = flann.nn(pts,
                                    qpts,
                                    num_neighbors,
                                    algorithm='hierarchical')
        print(utool.hz_str('indices1, dists1 = ', indices1, dists1))

        _build_params = flann.build_index(pts, algorithm='kmeans')
        del _build_params

        print('NN_Index')
        indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors)
        print(utool.hz_str('indices2, dists2 = ', indices2, dists2))

        # this can only be called on one query point at a time
        # because the output size is unknown
        print('NN_Radius, radius=%r' % (radius, ))
        indices3, dists3 = flann.nn_radius(pts[0], radius)
        print('indices3 = %r ' % (indices3, ))
        print('dists3 = %r ' % (dists3, ))

        assert np.all(dists3 < radius)
    except Exception as ex:
        utool.printex(ex,
                      key_list=[
                          'query',
                          'query.shape',
                          'pts.shape',
                      ],
                      pad_stdout=True)
        #utool.embed()
        raise
Exemple #2
0
def check_expr_eq(expr1, expr2, verbose=True):
    """
    Does not work in general. Problem is not decidable.
    Thanks Richard.

    Args:
        expr1 (?):
        expr2 (?):

    CommandLine:
        python -m vtool.symbolic --test-check_expr_eq

    SeeALso:
        vt.symbolic_randcheck

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.symbolic import *  # NOQA
        >>> expr1 = sympy.Matrix([ [sx*x + 1.0*tx + w1*y], [sy*y + 1.0*ty + w2*x], [1.0]])
        >>> expr2 = sympy.Matrix([ [sx*x + tx + w1*y], [sy*y + ty + w2*x], [1]])
        >>> result = check_expr_eq(expr1, expr2)
        >>> print(result)
    """
    if isinstance(expr1, six.string_types):
        expr1 = sympy.simplify(expr1)
    if isinstance(expr2, six.string_types):
        expr2 = sympy.simplify(expr2)
    print(ut.hz_str('Checking if ', repr(expr1), ' == ', repr(expr2)))
    random_point_check = expr1.equals(expr2)
    if random_point_check is None:
        failexpr = expr1.equals(expr2, failing_expression=True)
        print('failexpr = %r' % (failexpr,))
        random_point_check = False
    print('... seems %r' % (random_point_check,))
    #return random_point_check
    expr3 = expr1 - expr2
    if not random_point_check and True:
        common_symbols = expr1.free_symbols.intersection(expr2.free_symbols)
        if len(common_symbols):
            y = sympy.symbols('y')  # Hack, should be a new symbol
            symbol = common_symbols.pop()
            soln1 = sympy.solve(sympy.Eq(sympy.simplify(expr1), y), symbol)
            soln2 = sympy.solve(sympy.Eq(sympy.simplify(expr2), y), symbol)
            print('Solving expr1 for common symbol: ' + str(soln1))
            print('Solving expr2 for common symbol: ' + str(soln2))
            if soln1 == soln2:
                print('This seems True')
            else:
                print('This seems False')
        sympy.solve(sympy.Eq(sympy.simplify(expr2), y), 'd')
    print(ut.hz_str('... checking 0 ', repr(expr3)))
    # Does not always work.
    print('(not gaurenteed to work) expr3.is_zero = %r' % (expr3.is_zero,))
    return expr3.is_zero
Exemple #3
0
def check_expr_eq(expr1, expr2, verbose=True):
    """
    Does not work in general. Problem is not decidable.
    Thanks Richard.

    Args:
        expr1 (?):
        expr2 (?):

    CommandLine:
        python -m vtool.symbolic --test-check_expr_eq

    SeeALso:
        vt.symbolic_randcheck

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.symbolic import *  # NOQA
        >>> expr1 = sympy.Matrix([ [sx*x + 1.0*tx + w1*y], [sy*y + 1.0*ty + w2*x], [1.0]])
        >>> expr2 = sympy.Matrix([ [sx*x + tx + w1*y], [sy*y + ty + w2*x], [1]])
        >>> result = check_expr_eq(expr1, expr2)
        >>> print(result)
    """
    if isinstance(expr1, six.string_types):
        expr1 = sympy.simplify(expr1)
    if isinstance(expr2, six.string_types):
        expr2 = sympy.simplify(expr2)
    print(ut.hz_str("Checking if ", repr(expr1), " == ", repr(expr2)))
    random_point_check = expr1.equals(expr2)
    if random_point_check is None:
        failexpr = expr1.equals(expr2, failing_expression=True)
        print("failexpr = %r" % (failexpr,))
        random_point_check = False
    print("... seems %r" % (random_point_check,))
    # return random_point_check
    expr3 = expr1 - expr2
    if not random_point_check and True:
        common_symbols = expr1.free_symbols.intersection(expr2.free_symbols)
        if len(common_symbols):
            y = sympy.symbols("y")  # Hack, should be a new symbol
            symbol = common_symbols.pop()
            soln1 = sympy.solve(sympy.Eq(sympy.simplify(expr1), y), symbol)
            soln2 = sympy.solve(sympy.Eq(sympy.simplify(expr2), y), symbol)
            print("Solving expr1 for common symbol: " + str(soln1))
            print("Solving expr2 for common symbol: " + str(soln2))
            if soln1 == soln2:
                print("This seems True")
            else:
                print("This seems False")
        sympy.solve(sympy.Eq(sympy.simplify(expr2), y), "d")
    print(ut.hz_str("... checking 0 ", repr(expr3)))
    # Does not always work.
    print("(not gaurenteed to work) expr3.is_zero = %r" % (expr3.is_zero,))
    return expr3.is_zero
Exemple #4
0
def test_pyflann_searches():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_searches

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> result = test_pyflann_searches()
        >>> # verify results
        >>> print(result)
    """
    try:
        num_neighbors = 3
        pts = testdata_points(nPts=5743, nDims=2)
        qpts = testdata_points(nPts=7, nDims=2)
        import vtool as vt
        # sample a radius
        radius = vt.L2(pts[0:1], qpts[0:1])[0] * 2 + 1

        flann = pyflann.FLANN()

        print('NN_OnTheFly')
        # build nn_index on the fly
        indices1, dists1 = flann.nn(pts, qpts, num_neighbors, algorithm='hierarchical')
        print(utool.hz_str('indices1, dists1 = ', indices1,  dists1))

        _build_params = flann.build_index(pts, algorithm='kmeans')
        del _build_params

        print('NN_Index')
        indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors)
        print(utool.hz_str('indices2, dists2 = ', indices2,  dists2))

        # this can only be called on one query point at a time
        # because the output size is unknown
        print('NN_Radius, radius=%r' % (radius,))
        indices3, dists3  = flann.nn_radius(pts[0], radius)
        print('indices3 = %r ' % (indices3,))
        print('dists3 = %r ' % (dists3,))

        assert np.all(dists3 < radius)
    except Exception as ex:
        utool.printex(ex, key_list=[
            'query',
            'query.shape',
            'pts.shape',
        ], pad_stdout=True)
        #utool.embed()
        raise
Exemple #5
0
 def print_priors(model, ignore_ttypes=[], title='Priors', color='blue'):
     ut.colorprint('\n --- %s ---' % (title, ), color=color)
     for ttype, cpds in model.ttype2_cpds.items():
         if ttype not in ignore_ttypes:
             for fs_ in ut.ichunks(cpds, 4):
                 ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]),
                               color)
Exemple #6
0
def test_pyflann_tune():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_tune

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> result = test_pyflann_tune()
        >>> # verify results
        >>> print(result)
    """
    print('Create random qpts and database data')
    pts = testdata_points(nPts=1009)
    qpts = testdata_points(nPts=7)
    num_neighbors = 3
    #num_data = len(data)
    # untuned query

    flann = pyflann.FLANN()
    index_untuned, dist_untuned = flann.nn(pts, qpts, num_neighbors)

    # tuned query
    flannkw = dict(algorithm='autotuned',
                   target_precision=.01,
                   build_weight=0.01,
                   memory_weight=0.0,
                   sample_fraction=0.001)
    flann_tuned = pyflann.FLANN()
    tuned_params = flann_tuned.build_index(pts, **flannkw)
    index_tuned, dist_tuned = flann_tuned.nn_index(qpts,
                                                   num_neighbors=num_neighbors)

    print(
        utool.hz_str('index_tuned, dist_tuned     = ', index_tuned,
                     dist_tuned))
    print('')
    print(
        utool.hz_str('index_untuned, dist_untuned = ', index_untuned,
                     dist_untuned))

    print(dist_untuned >= dist_tuned)

    return tuned_params
Exemple #7
0
def test_pyflann_tune():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_tune

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> result = test_pyflann_tune()
        >>> # verify results
        >>> print(result)
    """
    print('Create random qpts and database data')
    pts = testdata_points(nPts=1009)
    qpts = testdata_points(nPts=7)
    num_neighbors = 3
    #num_data = len(data)
    # untuned query

    flann = pyflann.FLANN()
    index_untuned, dist_untuned = flann.nn(pts, qpts, num_neighbors)

    # tuned query
    flannkw = dict(
        algorithm='autotuned',
        target_precision=.01,
        build_weight=0.01,
        memory_weight=0.0,
        sample_fraction=0.001
    )
    flann_tuned = pyflann.FLANN()
    tuned_params = flann_tuned.build_index(pts, **flannkw)
    index_tuned, dist_tuned = flann_tuned.nn_index(qpts, num_neighbors=num_neighbors)

    print(utool.hz_str('index_tuned, dist_tuned     = ', index_tuned,  dist_tuned))
    print('')
    print(utool.hz_str('index_untuned, dist_untuned = ', index_untuned,  dist_untuned))

    print(dist_untuned >= dist_tuned)

    return tuned_params
Exemple #8
0
def test_pyflann_add_point():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_add_point

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> result = test_pyflann_add_point()
        >>> # verify results
        >>> print(result)
    """
    # Test parameters
    num_neighbors = 3
    pts = testdata_points(nPts=1009)
    qpts = testdata_points(nPts=7)
    newpts = testdata_points(nPts=1013)

    # build index
    print('Build Index')
    flann = pyflann.FLANN()
    _build_params = flann.build_index(pts)
    print(_build_params)

    print('NN_Index')
    indices1, dists1 = flann.nn_index(qpts, num_neighbors=num_neighbors)
    assert np.all(
        indices1 < pts.shape[0]), 'indicies should be less than num pts'
    print(utool.hz_str('indices1, dists1 = ', indices1, dists1))

    print('Adding points')
    flann.add_points(newpts, rebuild_threshold=2)

    print('NN_Index')
    indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors)
    print(utool.hz_str('indices2, dists2 = ', indices2, dists2))
    assert np.any(
        indices2 > pts.shape[0]), 'should be some indexes into new points'
    assert np.all(indices2 < pts.shape[0] +
                  newpts.shape[0]), 'but not more than the points being added'
Exemple #9
0
def print_factors(model, factor_list):
    if hasattr(model, 'var2_cpd'):
        semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list]
    else:
        semtypes = [0] * len(factor_list)
    for type_, factors in ut.group_items(factor_list, semtypes).items():
        logger.info('Result Factors (%r)' % (type_, ))
        factors = ut.sortedby(factors, [f.variables[0] for f in factors])
        for fs_ in ut.ichunks(factors, 4):
            ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                          'yellow')
Exemple #10
0
def print_factors(model, factor_list):
    if hasattr(model, 'var2_cpd'):
        semtypes = [model.var2_cpd[f.variables[0]].ttype
                    for f in factor_list]
    else:
        semtypes = [0] * len(factor_list)
    for type_, factors in ut.group_items(factor_list, semtypes).items():
        print('Result Factors (%r)' % (type_,))
        factors = ut.sortedby(factors, [f.variables[0] for f in factors])
        for fs_ in ut.ichunks(factors, 4):
            ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                          'yellow')
Exemple #11
0
def test_pyflann_add_point():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_add_point

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> result = test_pyflann_add_point()
        >>> # verify results
        >>> print(result)
    """
    # Test parameters
    num_neighbors = 3
    pts = testdata_points(nPts=1009)
    qpts = testdata_points(nPts=7)
    newpts = testdata_points(nPts=1013)

    # build index
    print('Build Index')
    flann = pyflann.FLANN()
    _build_params = flann.build_index(pts)
    print(_build_params)

    print('NN_Index')
    indices1, dists1 = flann.nn_index(qpts, num_neighbors=num_neighbors)
    assert np.all(indices1 < pts.shape[0]), 'indicies should be less than num pts'
    print(utool.hz_str('indices1, dists1 = ', indices1,  dists1))

    print('Adding points')
    flann.add_points(newpts, rebuild_threshold=2)

    print('NN_Index')
    indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors)
    print(utool.hz_str('indices2, dists2 = ', indices2,  dists2))
    assert np.any(indices2 > pts.shape[0]), 'should be some indexes into new points'
    assert np.all(indices2 < pts.shape[0] + newpts.shape[0]), 'but not more than the points being added'
Exemple #12
0
def evalprint(str_, globals_=None, locals_=None, simplify=False):
    if globals_ is None:
        globals_ = ut.get_parent_globals()
    if locals_ is None:
        locals_ = ut.get_parent_locals()
    if isinstance(str_, six.string_types):
        var = eval(str_, globals_, locals_)
    else:
        var = str_
        str_ = ut.get_varname_from_stack(var, N=1)
    if simplify is True:
        var = sympy.simplify(var)
    print(ut.hz_str(str_ + ' = ', repr(var)))
Exemple #13
0
def evalprint(str_, globals_=None, locals_=None, simplify=False):
    if globals_ is None:
        globals_ = ut.get_parent_globals()
    if locals_ is None:
        locals_ = ut.get_parent_locals()
    if isinstance(str_, six.string_types):
        var = eval(str_, globals_, locals_)
    else:
        var = str_
        str_ = ut.get_varname_from_stack(var, N=1)
    if simplify is True:
        var = sympy.simplify(var)
    print(ut.hz_str(str_ + " = ", repr(var)))
Exemple #14
0
    def get_inspect_str(qres, ibs=None, name_scoring=False):
        qres.assert_self()
        #ut.embed()

        top_lbls = [' top aids', ' scores', ' rawscores', ' ranks']

        top_aids   = np.array(qres.get_top_aids(num=6, name_scoring=name_scoring, ibs=ibs), dtype=np.int32)
        top_scores = np.array(qres.get_aid_scores(top_aids), dtype=np.float64)
        top_rawscores = np.array(qres.get_aid_scores(top_aids, rawscore=True), dtype=np.float64)
        top_ranks  = np.array(qres.get_aid_ranks(top_aids), dtype=np.int32)
        top_list   = [top_aids, top_scores, top_rawscores, top_ranks]

        if ibs is not None:
            top_lbls += [' isgt']
            istrue = qres.get_aid_truth(ibs, top_aids)
            top_list.append(np.array(istrue, dtype=np.int32))
        if name_scoring:
            top_lbls = ['top nid'] + top_lbls
            top_list = [ibs.get_annot_name_rowids(top_aids)] + top_list

        top_stack = np.vstack(top_list)
        #top_stack = np.array(top_stack, dtype=object)
        top_stack = np.array(top_stack, dtype=np.float32)
        #np.int32)
        top_str = np.array_str(top_stack, precision=3, suppress_small=True, max_line_width=200)

        top_lbl = '\n'.join(top_lbls)
        inspect_list = ['QueryResult',
                        qres.cfgstr,
                        ]
        if ibs is not None:
            gt_ranks  = qres.get_gt_ranks(ibs=ibs)
            gt_scores = qres.get_gt_scores(ibs=ibs)
            inspect_list.append('gt_ranks = %r' % gt_ranks)
            inspect_list.append('gt_scores = %r' % gt_scores)

        nFeatMatch_list = get_num_feats_in_matches(qres)
        nFeatMatch_stats_str = ut.get_stats_str(nFeatMatch_list, newlines=True, exclude_keys=('nMin', 'nMax'))

        inspect_list.extend([
            'qaid=%r ' % qres.qaid,
            ut.hz_str(top_lbl, ' ', top_str),
            'num feat matches per annotation stats:',
            #ut.indent(ut.dict_str(nFeatMatch_stats)),
            ut.indent(nFeatMatch_stats_str),
        ])

        inspect_str = '\n'.join(inspect_list)

        #inspect_str = ut.indent(inspect_str, '[INSPECT] ')
        return inspect_str
Exemple #15
0
    def get_inspect_str(qres, ibs=None, name_scoring=False):
        qres.assert_self()
        #ut.embed()

        top_lbls = [' top aids', ' scores', ' rawscores', ' ranks']

        top_aids   = np.array(qres.get_top_aids(num=6, name_scoring=name_scoring, ibs=ibs), dtype=np.int32)
        top_scores = np.array(qres.get_aid_scores(top_aids), dtype=np.float64)
        top_rawscores = np.array(qres.get_aid_scores(top_aids, rawscore=True), dtype=np.float64)
        top_ranks  = np.array(qres.get_aid_ranks(top_aids), dtype=np.int32)
        top_list   = [top_aids, top_scores, top_rawscores, top_ranks]

        if ibs is not None:
            top_lbls += [' isgt']
            istrue = qres.get_aid_truth(ibs, top_aids)
            top_list.append(np.array(istrue, dtype=np.int32))
        if name_scoring:
            top_lbls = ['top nid'] + top_lbls
            top_list = [ibs.get_annot_name_rowids(top_aids)] + top_list

        top_stack = np.vstack(top_list)
        #top_stack = np.array(top_stack, dtype=object)
        top_stack = np.array(top_stack, dtype=np.float32)
        #np.int32)
        top_str = np.array_str(top_stack, precision=3, suppress_small=True, max_line_width=200)

        top_lbl = '\n'.join(top_lbls)
        inspect_list = ['QueryResult',
                        qres.cfgstr,
                        ]
        if ibs is not None:
            gt_ranks  = qres.get_gt_ranks(ibs=ibs)
            gt_scores = qres.get_gt_scores(ibs=ibs)
            inspect_list.append('gt_ranks = %r' % gt_ranks)
            inspect_list.append('gt_scores = %r' % gt_scores)

        nFeatMatch_list = get_num_feats_in_matches(qres)
        nFeatMatch_stats_str = ut.get_stats_str(nFeatMatch_list, newlines=True, exclude_keys=('nMin', 'nMax'))

        inspect_list.extend([
            'qaid=%r ' % qres.qaid,
            ut.hz_str(top_lbl, ' ', top_str),
            'num feat matches per annotation stats:',
            #ut.indent(ut.dict_str(nFeatMatch_stats)),
            ut.indent(nFeatMatch_stats_str),
        ])

        inspect_str = '\n'.join(inspect_list)

        #inspect_str = ut.indent(inspect_str, '[INSPECT] ')
        return inspect_str
Exemple #16
0
def pandas_repr(df):
    import utool as ut
    args = [
        df.values,
    ]
    kwargs = [
        ('columns', df.columns.values.tolist()),
        ('index', df.index.values.tolist()),
    ]
    header = 'pd.DataFrame('
    footer = ')'

    arg_parts = [
        ut.hz_str('    ', ut.repr2(arg))
        for arg in args if arg is not None
    ]
    kwarg_parts = [
        ut.hz_str('    {}={}'.format(key, ut.repr2(val)))
        for key, val in kwargs if val is not None
    ]
    body = ',\n'.join(arg_parts + kwarg_parts)
    dfrepr = '\n'.join([header, body, footer])
    print(dfrepr)
    pass
Exemple #17
0
    def make_test_similarity(test_case):
        # toy_params = {
        #    True:  {'mu': 0.9, 'sigma': .1},
        #    False: {'mu': 0.1, 'sigma': .4}
        # }
        # tau = np.pi * 2
        from wbia import constants as const

        # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS
        view_to_ori = ut.map_dict_keys(
            lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS
        )
        # view_to_ori = {
        #     'F': -1 * tau / 4,
        #     'L':  0 * tau / 4,
        #     'B':  1 * tau / 4,
        #     'R':  2 * tau / 4,
        # }
        import vtool as vt

        nid_list = np.array(ut.dict_take_column(test_case, 'name'))
        yaw_list = np.array(
            ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view'))
        )

        rng = np.random.RandomState(0)
        pmat = []
        for idx in range(len(test_case)):
            nid = nid_list[idx]
            yaw = yaw_list[idx]
            p_same = nid == nid_list
            p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi
            # estimate noisy measurements
            p_same_m = np.clip(p_same + rng.normal(0, 0.5, size=len(p_same)), 0, 0.9)
            p_comp_m = np.clip(p_comp + rng.normal(0, 0.5, size=len(p_comp)), 0, 0.9)
            #
            p_same_and_comp = p_same_m * p_comp_m
            pmat.append(p_same_and_comp)
        #
        P = np.array(pmat)
        P[np.diag_indices(len(P))] = 0
        P = P + P.T / 2
        P = np.clip(P, 0.01, 0.99)
        logger.info(ut.hz_str(' P = ', ut.repr2(P, precision=2, max_line_width=140)))
        return P
Exemple #18
0
    def make_test_similarity(test_case):
        #toy_params = {
        #    True:  {'mu': 0.9, 'sigma': .1},
        #    False: {'mu': 0.1, 'sigma': .4}
        #}
        # tau = np.pi * 2
        from ibeis import constants as const
        # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS
        view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS)
        # view_to_ori = {
        #     'F': -1 * tau / 4,
        #     'L':  0 * tau / 4,
        #     'B':  1 * tau / 4,
        #     'R':  2 * tau / 4,
        # }
        import vtool as vt

        nid_list = np.array(ut.dict_take_column(test_case, 'name'))
        yaw_list = np.array(ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view')))

        rng = np.random.RandomState(0)
        pmat = []
        for idx in range(len(test_case)):
            nid = nid_list[idx]
            yaw = yaw_list[idx]
            p_same = nid == nid_list
            p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi
            # estimate noisy measurements
            p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0, .9)
            p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0, .9)
            #
            p_same_and_comp = p_same_m * p_comp_m
            pmat.append(p_same_and_comp)
        #
        P = np.array(pmat)
        P[np.diag_indices(len(P))] = 0
        P = P + P.T / 2
        P = np.clip(P, .01, .99)
        print(ut.hz_str(' P = ', ut.array_repr2(P, precision=2, max_line_width=140)))
        return P
Exemple #19
0
    def make_graph(infr, show=False):
        import networkx as nx
        import itertools
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        thresh = infr.choose_thresh()

        # Simply cut any edge with a weight less than a threshold
        qaid_list = [cm.qaid for cm in cm_list]
        postcut = prob_names > thresh
        qxs, nxs = np.where(postcut)
        if False:
            kw = dict(precision=2, max_line_width=140, suppress_small=True)
            print(
                ut.hz_str('prob_names = ', ut.array2string2((prob_names),
                                                            **kw)))
            print(
                ut.hz_str('postcut = ',
                          ut.array2string2((postcut).astype(np.int), **kw)))
        matching_qaids = ut.take(qaid_list, qxs)
        matched_nids = ut.take(unique_nids, nxs)

        qreq_ = infr.qreq_

        nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist())
        if not hasattr(qreq_, 'dnids'):
            qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids)
            qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids)
        dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids)
        grouped_aids = dnid2_daids.values()
        matched_daids = ut.take(dnid2_daids, matched_nids)
        name_cliques = [
            list(itertools.combinations(aids, 2)) for aids in grouped_aids
        ]
        aid_matches = [
            list(ut.product([qaid], daids))
            for qaid, daids in zip(matching_qaids, matched_daids)
        ]

        graph = nx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(ut.flatten(name_cliques))
        graph.add_edges_from(ut.flatten(aid_matches))

        #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list)))
        name_nodes = [('nid', l) for l in qreq_.dnids]
        db_aid_nid_edges = list(zip(qreq_.daids, name_nodes))
        #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids]))
        #G = nx.Graph()
        #G.add_nodes_from(matchless_quries)
        #G.add_edges_from(db_aid_nid_edges)
        #G.add_edges_from(query_aid_nid_edges)

        graph.add_edges_from(db_aid_nid_edges)

        if infr.user_feedback is not None:
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            p_bg = 0.0
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
            for aid1, aid2, p_same in zip(user_feedback['aid1'],
                                          user_feedback['aid2'], p_same_list):
                if p_same > .5:
                    if not graph.has_edge(aid1, aid2):
                        graph.add_edge(aid1, aid2)
                else:
                    if graph.has_edge(aid1, aid2):
                        graph.remove_edge(aid1, aid2)
        if show:
            import plottool as pt
            nx.set_node_attributes(graph, 'color',
                                   {aid: pt.LIGHT_PINK
                                    for aid in qreq_.daids})
            nx.set_node_attributes(graph, 'color',
                                   {aid: pt.TRUE_BLUE
                                    for aid in qreq_.qaids})
            nx.set_node_attributes(
                graph, 'color', {
                    aid: pt.LIGHT_PURPLE
                    for aid in np.intersect1d(qreq_.qaids, qreq_.daids)
                })
            nx.set_node_attributes(
                graph, 'label',
                {node: 'n%r' % (node[1], )
                 for node in name_nodes})
            nx.set_node_attributes(
                graph, 'color', {node: pt.LIGHT_GREEN
                                 for node in name_nodes})
        if show:
            import plottool as pt
            pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False)
        return graph
Exemple #20
0
def draw_em_graph(P, Pn, PL, gam, num_labels):
    """
        python -m ibeis.algo.hots.testem test_em --show --no-cnn
    """
    num_labels = PL.shape[1]
    name_nodes = ['N%d' % x for x in list(range(1, num_labels + 1))]
    annot_nodes = ['X%d' % x for x in list(range(1, len(Pn) + 1))]

    nodes = name_nodes + annot_nodes

    PL2 = gam[:, num_labels:].T
    PL2 += .01
    PL2 = PL2 / PL2.sum(axis=1)[:, None]
    # PL2 = PL2 / np.linalg.norm(PL2, axis=0)
    zero_part = np.zeros((num_labels, len(Pn) + num_labels))
    prob_part = np.hstack([PL2, Pn])
    print(ut.hz_str(' PL2 = ', ut.repr2(PL2, precision=2)))
    # Redo p with posteriors
    if ut.get_argflag('--postem'):
        P = np.vstack([zero_part, prob_part])

    weight_matrix = P  # NOQA
    graph = ut.nx_from_matrix(P, nodes=nodes)
    graph = graph.to_directed()
    # delete graph
    dup_edges = []
    seen_ = set([])
    for u, v in graph.edges():
        if u < v:
            u, v = v, u
        if (u, v) not in seen_:
            seen_.add((u, v))
        else:
            dup_edges.append((u, v))
    graph.remove_edges_from(dup_edges)
    import plottool_ibeis as pt
    import networkx as nx

    if len(name_nodes) == 3 and len(annot_nodes) == 4:
        graph.nodes[annot_nodes[0]]['pos'] = (20., 200.)
        graph.nodes[annot_nodes[1]]['pos'] = (220., 200.)
        graph.nodes[annot_nodes[2]]['pos'] = (20., 100.)
        graph.nodes[annot_nodes[3]]['pos'] = (220., 100.)
        graph.nodes[name_nodes[0]]['pos'] = (10., 300.)
        graph.nodes[name_nodes[1]]['pos'] = (120., 300.)
        graph.nodes[name_nodes[2]]['pos'] = (230., 300.)
        nx.set_node_attributes(graph, name='pin', values='true')

        print('annot_nodes = %r' % (annot_nodes, ))
        print('name_nodes = %r' % (name_nodes, ))

        for u in annot_nodes:
            for v in name_nodes:
                if graph.has_edge(u, v):
                    print('1) u, v = %r' % ((u, v), ))
                    graph.edge[u][v]['taillabel'] = graph.edge[u][v]['label']
                    graph.edge[u][v]['color'] = pt.ORANGE
                    graph.edge[u][v]['labelcolor'] = pt.BLUE
                    del graph.edge[u][v]['label']
                elif graph.has_edge(v, u):
                    print('2) u, v = %r' % ((u, v), ))
                    graph.edge[v][u]['headlabel'] = graph.edge[v][u]['label']
                    graph.edge[v][u]['color'] = pt.ORANGE
                    graph.edge[v][u]['labelcolor'] = pt.BLUE
                    del graph.edge[v][u]['label']
                else:
                    print((u, v))
                    print('!!')

    # import itertools
    # name_const_edges = [(u, v, {'style': 'invis'}) for u, v in itertools.combinations(name_nodes, 2)]
    # graph.add_edges_from(name_const_edges)
    # nx.set_edge_attributes(graph, name='constraint', values={edge: False for edge in graph.edges() if edge[0] == 'b' or edge[1] == 'b'})
    # nx.set_edge_attributes(graph, name='constraint', values={edge: False for edge in graph.edges() if edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if edge[0] in name_nodes or edge[1] in name_nodes})
    # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['a', 'b'] and edge[1] in ['a', 'b']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['c'] or edge[1] in ['c']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['a'] or edge[1] in ['a']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['b'] or edge[1] in ['b']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # graph.add_edges_from([('root', n) for n in nodes])
    # {node: 'names' for node in name_nodes})
    nx.set_node_attributes(graph,
                           name='color',
                           values={node: pt.RED
                                   for node in name_nodes})
    # nx.set_node_attributes(graph, name='width', values={node: 20 for node in nodes})
    # nx.set_node_attributes(graph, name='height', values={node: 20 for node in nodes})
    #nx.set_node_attributes(graph, name='group', values={node: 'names' for node in name_nodes})
    #nx.set_node_attributes(graph, name='group', values={node: 'annots' for node in annot_nodes})
    nx.set_node_attributes(graph,
                           name='groupid',
                           values={node: 'names'
                                   for node in name_nodes})
    nx.set_node_attributes(graph,
                           name='groupid',
                           values={node: 'annots'
                                   for node in annot_nodes})
    graph.graph['clusterrank'] = 'local'
    # graph.graph['groupattrs'] = {
    #     'names': {'rankdir': 'LR', 'rank': 'source'},
    #     'annots': {'rankdir': 'TB', 'rank': 'source'},
    # }
    ut.nx_delete_edge_attr(graph, 'weight')
    # pt.show_nx(graph, fontsize=10, layoutkw={'splines': 'spline', 'prog': 'dot', 'sep': 2.0}, verbose=1)
    layoutkw = {
        # 'rankdir': 'LR',
        'splines': 'spline',
        # 'splines': 'ortho',
        # 'splines': 'curved',
        # 'compound': 'True',
        # 'prog': 'dot',
        'prog': 'neato',
        # 'packMode': 'clust',
        # 'sep': 4,
        # 'nodesep': 1,
        # 'ranksep': 1,
    }
    #pt.show_nx(graph, fontsize=12, layoutkw=layoutkw, verbose=0, as_directed=False)
    pt.show_nx(graph,
               fontsize=6,
               fontname='Ubuntu',
               layoutkw=layoutkw,
               verbose=0,
               as_directed=False)
    pt.interactions.zoom_factory()
Exemple #21
0
def try_em2(prob_names, prob_annots=None):
    """
    assert prob_names.shape == (nAnnots, nNames)
    """
    learn_rate = 0.05
    num_iters = 1

    # Matrix if unary probabilites, The probability that each node takes on a
    # given label, independent of its edges.
    num_annots, num_names = prob_names.shape

    # prevent zero probabilities
    prob_names_ = prob_names + 1E-9
    prob_names_ /= prob_names_.sum(axis=1)[:, None]

    if prob_annots is None:
        prob_annots_ = np.full((num_annots, num_annots), 1 / num_annots)
        prob_annots_[np.diag_indices(num_annots)] *= 1.01
        # perterb
        rng = np.random.RandomState(0)
        prob_annots_ += (rng.randn(*prob_annots_.shape)) / 100
        prob_annots_ /= prob_annots_.sum(axis=1)[:, None]
        prob_annots_ = (prob_annots_.T + prob_annots_) / 2
    else:
        prob_annots_ = prob_annots + 1E-9
        prob_annots_ /= prob_annots_.sum(axis=1)[:, None]

    # Stack everything into a single matrix
    prob_part = np.hstack([prob_names_, prob_annots_])
    zero_part = np.zeros((num_names, num_annots + num_names))
    prior = np.vstack([zero_part, prob_part])

    # Gamma will hold a probability distribution over the nodes
    # The labeled nodes must match themselves.
    # The unlabeld nodes are initialized with a uniform distribution.
    gam = np.hstack(
        [np.eye(num_names),
         np.ones((num_names, num_annots)) / num_names])

    verbose = 1
    if verbose:
        print('Initialize')
        print('num_names = %r' % (num_names, ))
        print(
            ut.hz_str(
                'prior = ',
                ut.repr2(prob_part[:, :],
                         precision=2,
                         max_line_width=140,
                         suppress_small=True)))
        print(
            ut.hz_str(
                'gamma = ',
                ut.repr2(gam[:, :],
                         max_line_width=140,
                         precision=2,
                         suppress_small=True)))
    #print(ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2)))

    delta_i = np.zeros(num_names)

    def dErr(i, gam, prior, delta_i=delta_i, num_names=num_names):
        # exepcted liklihood is cross entropy error
        delta_i[:] = 0
        # Compute the gradient of the cross entropy error
        # This is over both names and annotations
        jdxs = [j for j in range(prior.shape[0]) if j != i]
        prior_ij = prior[i, jdxs]
        np.log(prior_ij / (1 - prior_ij))
        gam[:, jdxs]

        for j in range(prior.shape[0]):
            if i != j:
                delta_i += gam[:, j] * np.log(prior[i, j] / (1 - prior[i, j]))
        # compute the projected gradient
        delta_i_hat = delta_i - delta_i.sum() / num_names
        return delta_i_hat

    # Build node for each annot and each name
    num_nodes = num_annots + num_names

    # Maximies the expected liklihood of gamma
    dGam = np.zeros(gam.shape)
    # for count in range(num_iters):
    for count in ut.ProgIter(range(num_iters), label='EM', bs=True):
        # Compute error gradient
        for i in range(num_names, num_nodes):
            dGam[:, i] = dErr(i, gam, prior)
        # Make a step in the gradient direction
        # print(ut.hz_str(' dGam = ', ut.repr2(dGam, max_line_width=140, precision=2)))
        gam = gam + learn_rate * dGam
        # Normalize
        gam = np.clip(gam, 0, 1)
        for i in range(num_names, num_nodes):
            gam[:, i] = gam[:, i] / np.sum(gam[:, i])
    # print(ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2)))
    if verbose:
        print(
            ut.hz_str(
                ' gamma = ',
                ut.repr2(gam[:, num_names:],
                         max_line_width=140,
                         precision=2,
                         suppress_small=True)))
        print('Finished')
    return gam
Exemple #22
0
def try_em():
    """
    CommandLine:
        python -m ibeis.algo.hots.testem test_em --show
        python -m ibeis.algo.hots.testem test_em --show --no-cnn

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.testem import *  # NOQA
        >>> P, Pn, PL, gam, num_labels = test_em()
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> pt.qt4ensure()
        >>> draw_em_graph(P, Pn, PL, gam, num_labels)
        >>> ut.show_if_requested()
    """
    print('EM')

    # Matrix if unary probabilites, The probability that each node takes on a
    # given label, independent of its edges.

    test_case = [
        {
            'name': 1,
            'view': 'L'
        },
        {
            'name': 1,
            'view': 'L'
        },
        {
            'name': 2,
            'view': 'L'
        },
        {
            'name': 2,
            'view': 'R'
        },
        {
            'name': 2,
            'view': 'B'
        },
        {
            'name': 3,
            'view': 'L'
        },
        #{'name': 3, 'view': 'L'},
        #{'name': 4, 'view': 'L'},
    ]

    def make_test_similarity(test_case):
        #toy_params = {
        #    True:  {'mu': 0.9, 'sigma': .1},
        #    False: {'mu': 0.1, 'sigma': .4}
        #}
        # tau = np.pi * 2
        from ibeis import constants as const
        # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS
        view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x],
                                       const.VIEWTEXT_TO_YAW_RADIANS)
        # view_to_ori = {
        #     'F': -1 * tau / 4,
        #     'L':  0 * tau / 4,
        #     'B':  1 * tau / 4,
        #     'R':  2 * tau / 4,
        # }
        import vtool_ibeis as vt

        nid_list = np.array(ut.dict_take_column(test_case, 'name'))
        yaw_list = np.array(
            ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view')))

        rng = np.random.RandomState(0)
        pmat = []
        for idx in range(len(test_case)):
            nid = nid_list[idx]
            yaw = yaw_list[idx]
            p_same = nid == nid_list
            p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi
            # estimate noisy measurements
            p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0,
                               .9)
            p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0,
                               .9)
            #
            p_same_and_comp = p_same_m * p_comp_m
            pmat.append(p_same_and_comp)
        #
        P = np.array(pmat)
        P[np.diag_indices(len(P))] = 0
        P = P + P.T / 2
        P = np.clip(P, .01, .99)
        print(ut.hz_str(' P = ', ut.repr2(P, precision=2, max_line_width=140)))
        return P

    Pn = make_test_similarity(test_case)

    if False:
        Pn = np.array(
            np.matrix(b"""
            .0 .7 .3 .2 .4 .5;
            .7 .0 .4 .4 .3 .5;
            .3 .4 .0 .6 .1 .5;
            .2 .4 .6 .0 .2 .3;
            .4 .3 .1 .2 .0 .8;
            .5 .5 .5 .3 .8 .0
            """))

        PL = np.array(
            np.matrix(b"""
            .7 .5 .5;
            .8 .4 .3;
            .5 .7 .3;
            .5 .8 .4;
            .3 .2 .8;
            .5 .5 .8
            """))

    if True:
        Pn = np.array(
            np.matrix(b"""
            1.0  0.7  0.4  0.2;
            0.7  1.0  0.4  0.4;
            0.4  0.4  1.0  0.6;
            0.2  0.4  0.6  1.0
            """))

        PL = np.array(
            np.matrix(b"""
            0.7  0.5  0.5;
            0.8  0.4  0.3;
            0.5  0.7  0.3;
            0.5  0.8  0.4
            """))
    num_nodes = Pn.shape[0]

    for num_labels in range(1, 2):
        #Pn = np.array(np.matrix(
        #    b"""
        #    .0 .7 .3 .2 .4 .5;
        #    .7 .0 .4 .4 .3 .5;
        #    .3 .4 .0 .6 .1 .5;
        #    .2 .4 .6 .0 .2 .3;
        #    .4 .3 .1 .2 .0 .8;
        #    .5 .5 .5 .3 .8 .0
        #    """))

        # Uniform distribution over labels
        if 0:
            PL = np.ones((num_nodes, num_labels)) / num_labels
            # Give nodes preferences
            PL[np.diag_indices(num_labels)] *= 1.01
            PL /= np.linalg.norm(PL, axis=0)
            # PL[0, :] = .01 / (num_labels - 1)
            # PL[0, 0] = .99
        else:
            PL /= np.linalg.norm(PL, axis=0)

        # Number of nodes
        num_nodes = Pn.shape[0]
        # Number of classes
        num_labels = PL.shape[1]
        #num_labels = num_nodes
        #if 0 or num_labels != 3:
        #    PL = np.ones((num_nodes, num_labels)) / num_labels
        #    # PL[0, :] = .01 / (num_labels - 1)
        #    # PL[0, 0] = .99
        d = num_labels + num_nodes

        # Stack everything into a single matrix
        zero_part = np.zeros((num_labels, num_nodes + num_labels))
        prob_part = np.hstack([PL, Pn])
        #print(ut.hz_str(' prob_part = ', ut.repr2(prob_part[:, :], precision=2)))
        P = np.vstack([zero_part, prob_part])

        # Gamma will hold a probability distribution over the nodes
        # The labeled nodes must match themselves.
        # The unlabeld nodes are initialized with a uniform distribution.
        gam = np.hstack([
            np.eye(num_labels),
            np.ones((num_labels, num_nodes)) / num_labels
        ])

        print('Initialize')
        print('num_labels = %r' % (num_labels, ))
        # print(ut.hz_str(' gamma = ', ut.repr2(gam[:, num_labels:], max_line_width=140, precision=2)))
        print(
            ut.hz_str(' gamma = ',
                      ut.repr2(gam, max_line_width=140, precision=2)))

        delta_i = np.zeros(num_labels)

        def dErr(i, gam, P, delta_i=delta_i):
            # exepcted liklihood is cross entropy error
            delta_i[:] = 0
            # Compute the gradient of the cross entropy error
            # This is over both names and annotations
            for j in range(d):
                if i != j:
                    delta_i += gam[:, j] * np.log(P[i, j] / (1 - P[i, j]))
            # compute the projected gradient
            delta_i_hat = delta_i - delta_i.sum() / num_labels
            return delta_i_hat

        # Maximies the expected liklihood of gamma
        learn_rate = 0.05
        num_iters = 1000
        dGam = np.zeros(gam.shape)
        # for count in range(num_iters):
        for count in ut.ProgIter(range(num_iters), label='EM', bs=True):
            # Compute error gradient
            for i in range(num_labels, d):
                dGam[:, i] = dErr(i, gam, P)
            # Make a step in the gradient direction
            # print(ut.hz_str(' dGam = ', ut.repr2(dGam, max_line_width=140, precision=2)))
            gam = gam + learn_rate * dGam
            # Normalize
            gam = np.clip(gam, 0, 1)
            for i in range(num_labels, d):
                gam[:, i] = gam[:, i] / np.sum(gam[:, i])
        # print(ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2)))
        # print(ut.hz_str(' gamma = ', ut.repr2(gam[:, num_labels:], max_line_width=140, precision=2)))
        print('Finished')
    return P, Pn, PL, gam, num_labels
Exemple #23
0
def test_em2(prob_names, prob_annots=None):
    """
    assert prob_names.shape == (nAnnots, nNames)
    """
    learn_rate = 0.05
    num_iters = 1

    # Matrix if unary probabilites, The probability that each node takes on a
    # given label, independent of its edges.
    num_annots, num_names = prob_names.shape

    # prevent zero probabilities
    prob_names_ = prob_names + 1E-9
    prob_names_ /= prob_names_.sum(axis=1)[:, None]

    if prob_annots is None:
        prob_annots_ = np.full((num_annots, num_annots), 1 / num_annots)
        prob_annots_[np.diag_indices(num_annots)] *= 1.01
        # perterb
        rng = np.random.RandomState(0)
        prob_annots_ += (rng.randn(*prob_annots_.shape)) / 100
        prob_annots_ /= prob_annots_.sum(axis=1)[:, None]
        prob_annots_ = (prob_annots_.T + prob_annots_) / 2
    else:
        prob_annots_ = prob_annots + 1E-9
        prob_annots_ /= prob_annots_.sum(axis=1)[:, None]

    # Stack everything into a single matrix
    prob_part = np.hstack([prob_names_, prob_annots_])
    zero_part = np.zeros((num_names, num_annots + num_names))
    prior = np.vstack([zero_part, prob_part])

    # Gamma will hold a probability distribution over the nodes
    # The labeled nodes must match themselves.
    # The unlabeld nodes are initialized with a uniform distribution.
    gam = np.hstack([np.eye(num_names), np.ones((num_names, num_annots)) / num_names])

    verbose = 1
    if verbose:
        print('Initialize')
        print('num_names = %r' % (num_names,))
        print(ut.hz_str('prior = ', ut.array2string2(prob_part[:, :], precision=2, max_line_width=140, suppress_small=True)))
        print(ut.hz_str('gamma = ', ut.array2string2(gam[:, :], max_line_width=140, precision=2, suppress_small=True)))
    #print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2)))

    delta_i = np.zeros(num_names)
    def dErr(i, gam, prior, delta_i=delta_i, num_names=num_names):
        # exepcted liklihood is cross entropy error
        delta_i[:] = 0
        # Compute the gradient of the cross entropy error
        # This is over both names and annotations
        jdxs = [j for j in range(prior.shape[0]) if j != i]
        prior_ij = prior[i, jdxs]
        np.log(prior_ij / (1 - prior_ij))
        gam[:, jdxs]

        for j in range(prior.shape[0]):
            if i != j:
                delta_i += gam[:, j] * np.log(prior[i, j] / (1 - prior[i, j]))
        # compute the projected gradient
        delta_i_hat = delta_i - delta_i.sum() / num_names
        return delta_i_hat

    # Build node for each annot and each name
    num_nodes = num_annots + num_names

    # Maximies the expected liklihood of gamma
    dGam = np.zeros(gam.shape)
    # for count in range(num_iters):
    for count in ut.ProgIter(range(num_iters), label='EM', bs=True):
        # Compute error gradient
        for i in range(num_names, num_nodes):
            dGam[:, i] = dErr(i, gam, prior)
        # Make a step in the gradient direction
        # print(ut.hz_str(' dGam = ', ut.array_repr2(dGam, max_line_width=140, precision=2)))
        gam = gam + learn_rate * dGam
        # Normalize
        gam = np.clip(gam, 0, 1)
        for i in range(num_names, num_nodes):
            gam[:, i] = gam[:, i] / np.sum(gam[:, i])
    # print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2)))
    if verbose:
        print(ut.hz_str(' gamma = ', ut.array2string2(gam[:, num_names:], max_line_width=140, precision=2, suppress_small=True)))
        print('Finished')
    return gam
Exemple #24
0
def draw_em_graph(P, Pn, PL, gam, num_labels):
    """
        python -m ibeis.algo.hots.testem test_em --show --no-cnn
    """
    num_labels = PL.shape[1]
    name_nodes = ['N%d' % x for x in list(range(1, num_labels + 1))]
    #annot_nodes = ut.chr_range(len(Pn), base='A')
    annot_nodes = ['X%d' % x for x in list(range(1, len(Pn) + 1))]

    # name_nodes = ut.chr_range(num_labels, base='A')

    nodes = name_nodes + annot_nodes

    PL2 = gam[:, num_labels:].T
    PL2 += .01
    PL2 = PL2 / PL2.sum(axis=1)[:, None]
    # PL2 = PL2 / np.linalg.norm(PL2, axis=0)
    zero_part = np.zeros((num_labels, len(Pn) + num_labels))
    prob_part = np.hstack([PL2, Pn])
    print(ut.hz_str(' PL2 = ', ut.array_repr2(PL2, precision=2)))
    # Redo p with posteriors
    if ut.get_argflag('--postem'):
        P = np.vstack([zero_part, prob_part])

    weight_matrix = P  # NOQA
    graph = ut.nx_from_matrix(P, nodes=nodes)
    graph = graph.to_directed()
    # delete graph
    dup_edges = []
    seen_ = set([])
    for u, v in graph.edges():
        if u < v:
            u, v = v, u
        if (u, v) not in seen_:
            seen_.add((u, v))
        else:
            dup_edges.append((u, v))
    graph.remove_edges_from(dup_edges)
    import plottool as pt
    import networkx as nx

    if len(name_nodes) == 3 and len(annot_nodes) == 4:
        graph.node[annot_nodes[0]]['pos'] = (20.,  200.)
        graph.node[annot_nodes[1]]['pos'] = (220., 200.)
        graph.node[annot_nodes[2]]['pos'] = (20.,  100.)
        graph.node[annot_nodes[3]]['pos'] = (220., 100.)
        graph.node[name_nodes[0]]['pos'] = (10., 300.)
        graph.node[name_nodes[1]]['pos'] = (120., 300.)
        graph.node[name_nodes[2]]['pos'] = (230., 300.)
        nx.set_node_attributes(graph, 'pin', 'true')

        print('annot_nodes = %r' % (annot_nodes,))
        print('name_nodes = %r' % (name_nodes,))

        for u in annot_nodes:
            for v in name_nodes:
                if graph.has_edge(u, v):
                    print('1) u, v = %r' % ((u, v),))
                    graph.edge[u][v]['taillabel'] = graph.edge[u][v]['label']
                    graph.edge[u][v]['color'] = pt.ORANGE
                    graph.edge[u][v]['labelcolor'] = pt.BLUE
                    del graph.edge[u][v]['label']
                elif graph.has_edge(v, u):
                    print('2) u, v = %r' % ((u, v),))
                    graph.edge[v][u]['headlabel'] = graph.edge[v][u]['label']
                    graph.edge[v][u]['color'] = pt.ORANGE
                    graph.edge[v][u]['labelcolor'] = pt.BLUE
                    del graph.edge[v][u]['label']
                else:
                    print((u, v))
                    print('!!')

    # import itertools
    # name_const_edges = [(u, v, {'style': 'invis'}) for u, v in itertools.combinations(name_nodes, 2)]
    # graph.add_edges_from(name_const_edges)
    # nx.set_edge_attributes(graph, 'constraint', {edge: False for edge in graph.edges() if edge[0] == 'b' or edge[1] == 'b'})
    # nx.set_edge_attributes(graph, 'constraint', {edge: False for edge in graph.edges() if edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if edge[0] in name_nodes or edge[1] in name_nodes})
    # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['a', 'b'] and edge[1] in ['a', 'b']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['c'] or edge[1] in ['c']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['a'] or edge[1] in ['a']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['b'] or edge[1] in ['b']) and edge[0] in annot_nodes and edge[1] in annot_nodes})
    # graph.add_edges_from([('root', n) for n in nodes])
    # {node: 'names' for node in name_nodes})
    nx.set_node_attributes(graph, 'color', {node: pt.RED for node in name_nodes})
    # nx.set_node_attributes(graph, 'width', {node: 20 for node in nodes})
    # nx.set_node_attributes(graph, 'height', {node: 20 for node in nodes})
    #nx.set_node_attributes(graph, 'group', {node: 'names' for node in name_nodes})
    #nx.set_node_attributes(graph, 'group', {node: 'annots' for node in annot_nodes})
    nx.set_node_attributes(graph, 'groupid', {node: 'names' for node in name_nodes})
    nx.set_node_attributes(graph, 'groupid', {node: 'annots' for node in annot_nodes})
    graph.graph['clusterrank'] = 'local'
    # graph.graph['groupattrs'] = {
    #     'names': {'rankdir': 'LR', 'rank': 'source'},
    #     'annots': {'rankdir': 'TB', 'rank': 'source'},
    # }
    ut.nx_delete_edge_attr(graph, 'weight')
    # pt.show_nx(graph, fontsize=10, layoutkw={'splines': 'spline', 'prog': 'dot', 'sep': 2.0}, verbose=1)
    layoutkw = {
        # 'rankdir': 'LR',
        'splines': 'spline',
        # 'splines': 'ortho',
        # 'splines': 'curved',
        # 'compound': 'True',
        # 'prog': 'dot',
        'prog': 'neato',
        # 'packMode': 'clust',
        # 'sep': 4,
        # 'nodesep': 1,
        # 'ranksep': 1,
    }
    #pt.show_nx(graph, fontsize=12, layoutkw=layoutkw, verbose=0, as_directed=False)
    pt.show_nx(graph, fontsize=6, fontname='Ubuntu', layoutkw=layoutkw, verbose=0, as_directed=False)
    pt.interactions.zoom_factory()
Exemple #25
0
def temp_model(num_annots,
               num_names,
               score_evidence=[],
               name_evidence=[],
               other_evidence={},
               noquery=False,
               verbose=None,
               **kwargs):
    if verbose is None:
        verbose = ut.VERBOSE

    method = kwargs.pop('method', None)
    model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs)

    if verbose:
        model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE])

    model, evidence, soft_evidence = update_model_evidence(
        model, name_evidence, score_evidence, other_evidence)

    if verbose and len(soft_evidence) != 0:
        model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE],
                           title='Soft Evidence',
                           color='green')

    # if verbose:
    #    ut.colorprint('\n --- Soft Evidence ---', 'white')
    #    for ttype, cpds in model.ttype2_cpds.items():
    #        if ttype != MATCH_TTYPE:
    #            for fs_ in ut.ichunks(cpds, 4):
    #                ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]),
    #                              'green')

    if verbose:
        ut.colorprint('\n --- Inference ---', 'red')

    if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery:
        evidence = model._ensure_internal_evidence(evidence)
        query_vars = []
        query_vars += ut.list_getattr(model.ttype2_cpds[NAME_TTYPE],
                                      'variable')
        # query_vars += ut.list_getattr(model.ttype2_cpds[MATCH_TTYPE], 'variable')
        query_vars = ut.setdiff(query_vars, evidence.keys())
        # query_vars = ut.setdiff(query_vars, soft_evidence.keys())
        query_results = cluster_query(model, query_vars, evidence,
                                      soft_evidence, method)
    else:
        query_results = {}

    factor_list = query_results['factor_list']

    if verbose:
        if verbose:
            logger.info('+--------')
        semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list]
        for type_, factors in ut.group_items(factor_list, semtypes).items():
            logger.info('Result Factors (%r)' % (type_, ))
            factors = ut.sortedby(factors, [f.variables[0] for f in factors])
            for fs_ in ut.ichunks(factors, 4):
                ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                              'yellow')
        logger.info('MAP assignments')
        top_assignments = query_results.get('top_assignments', [])
        tmp = []
        for lbl, val in top_assignments:
            tmp.append('%s : %.4f' % (ut.repr2(lbl), val))
        logger.info(ut.align('\n'.join(tmp), ' :'))
        logger.info('L_____\n')

    showkw = dict(evidence=evidence,
                  soft_evidence=soft_evidence,
                  **query_results)

    from wbia.algo.hots import pgm_viz

    pgm_viz.show_model(model, **showkw)
    return (model, evidence, query_results)
Exemple #26
0
    def make_graph(infr, show=False):
        import networkx as nx
        import itertools
        cm_list = infr.cm_list
        unique_nids, prob_names = infr.make_prob_names()
        thresh = infr.choose_thresh()

        # Simply cut any edge with a weight less than a threshold
        qaid_list = [cm.qaid for cm in cm_list]
        postcut = prob_names > thresh
        qxs, nxs = np.where(postcut)
        if False:
            kw = dict(precision=2, max_line_width=140, suppress_small=True)
            print(ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw)))
            print(ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw)))
        matching_qaids = ut.take(qaid_list, qxs)
        matched_nids = ut.take(unique_nids, nxs)

        qreq_ = infr.qreq_

        nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist())
        if not hasattr(qreq_, 'dnids'):
            qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids)
            qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids)
        dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids)
        grouped_aids = dnid2_daids.values()
        matched_daids = ut.take(dnid2_daids, matched_nids)
        name_cliques = [list(itertools.combinations(aids, 2)) for aids in grouped_aids]
        aid_matches = [list(ut.product([qaid], daids)) for qaid, daids in
                       zip(matching_qaids, matched_daids)]

        graph = nx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(ut.flatten(name_cliques))
        graph.add_edges_from(ut.flatten(aid_matches))

        #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list)))
        name_nodes = [('nid', l) for l in qreq_.dnids]
        db_aid_nid_edges = list(zip(qreq_.daids, name_nodes))
        #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids]))
        #G = nx.Graph()
        #G.add_nodes_from(matchless_quries)
        #G.add_edges_from(db_aid_nid_edges)
        #G.add_edges_from(query_aid_nid_edges)

        graph.add_edges_from(db_aid_nid_edges)

        if infr.user_feedback is not None:
            user_feedback = ut.map_dict_vals(np.array, infr.user_feedback)
            p_bg = 0.0
            part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp'])
            part2 = p_bg * user_feedback['p_notcomp']
            p_same_list = part1 + part2
            for aid1, aid2, p_same in zip(user_feedback['aid1'],
                                          user_feedback['aid2'], p_same_list):
                if p_same > .5:
                    if not graph.has_edge(aid1, aid2):
                        graph.add_edge(aid1, aid2)
                else:
                    if graph.has_edge(aid1, aid2):
                        graph.remove_edge(aid1, aid2)
        if show:
            import plottool as pt
            nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK
                                                    for aid in qreq_.daids})
            nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE
                                                    for aid in qreq_.qaids})
            nx.set_node_attributes(graph, 'color', {
                aid: pt.LIGHT_PURPLE
                for aid in np.intersect1d(qreq_.qaids, qreq_.daids)})
            nx.set_node_attributes(graph, 'label', {node: 'n%r' % (node[1],)
                                                    for node in name_nodes})
            nx.set_node_attributes(graph, 'color', {node: pt.LIGHT_GREEN
                                                    for node in name_nodes})
        if show:
            import plottool as pt
            pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False)
        return graph
Exemple #27
0
def dummy_example_depcacahe():
    r"""
    CommandLine:
        python -m dtool.example_depcache --exec-dummy_example_depcacahe

    Example:
        >>> # ENABLE_DOCTEST
        >>> from dtool.example_depcache import *  # NOQA
        >>> depc = dummy_example_depcacahe()
        >>> ut.show_if_requested()
    """
    fname = None
    # fname = 'dummy_default_depcache'
    fname = ':memory:'

    depc = testdata_depc(fname)

    tablename = 'fgweight'
    # print('[test] fgweight_path =\n%s' % (ut.repr3(depc.get_dependencies(tablename), nl=1),))
    # print('[test] keypoint =\n%s' % (ut.repr3(depc.get_dependencies('keypoint'), nl=1),))
    # print('[test] descriptor =\n%s' % (ut.repr3(depc.get_dependencies('descriptor'), nl=1),))
    # print('[test] spam =\n%s' % (ut.repr3(depc.get_dependencies('spam'), nl=1),))

    root_rowids = [5, 3]
    desc_rowids = depc.get_rowids('descriptor', root_rowids)  # NOQA

    table = depc[tablename]  # NOQA

    #example_getter_methods(depc, 'vsmany', root_rowids)
    # example_getter_methods(depc, 'chipmask', root_rowids)
    # example_getter_methods(depc, 'keypoint', root_rowids)
    # example_getter_methods(depc, 'chip', root_rowids)

    test_getters(depc)

    #import plottool as pt
    # pt.ensure_pylab_qt4()

    graph = depc.make_graph()  # NOQA
    #pt.show_nx(graph)

    print('---------- 111 -----------')

    # Try testing the algorithm
    req = depc.new_request('vsmany', root_rowids, root_rowids, {})
    print('req = %r' % (req,))
    req.execute()

    print('---------- 222 -----------')

    cfgdict = {'sver_on': False}
    req = depc.new_request('vsmany', root_rowids, root_rowids, cfgdict)
    req.execute()

    print('---------- 333 -----------')

    cfgdict = {'sver_on': False, 'adapt_shape': False}
    req = depc.new_request('vsmany', root_rowids, root_rowids, cfgdict)
    req.execute()

    print('---------- 444 -----------')

    req = depc.new_request('vsmany', root_rowids, root_rowids, {})
    req.execute()

    #ut.InstanceList(
    db = list(depc.fname_to_db.values())[0]
    #db_list = ut.InstanceList(depc.fname_to_db.values())
    #db_list.print_table_csv('config', exclude_columns='config_strid')

    print('config table')
    column_list, column_names = db.get_table_column_data(tablename,
                                                         ['config_strid'])
    print('\n'.join([ut.hz_str(*list(ut.interleave((r, [', '] * (len(r) - 1)))))
                     for r in list(zip(*[[ut.repr3(r, nl=2) for r in col] for col in column_list]))]))

    return depc
Exemple #28
0
def test_em():
    """
    CommandLine:
        python -m ibeis.algo.hots.testem test_em --show
        python -m ibeis.algo.hots.testem test_em --show --no-cnn

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.algo.hots.testem import *  # NOQA
        >>> P, Pn, PL, gam, num_labels = test_em()
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> pt.qt4ensure()
        >>> draw_em_graph(P, Pn, PL, gam, num_labels)
        >>> ut.show_if_requested()
    """
    print('EM')

    # Matrix if unary probabilites, The probability that each node takes on a
    # given label, independent of its edges.

    test_case = [
        {'name': 1, 'view': 'L'},
        {'name': 1, 'view': 'L'},
        {'name': 2, 'view': 'L'},
        {'name': 2, 'view': 'R'},
        {'name': 2, 'view': 'B'},
        {'name': 3, 'view': 'L'},
        #{'name': 3, 'view': 'L'},
        #{'name': 4, 'view': 'L'},
    ]

    def make_test_similarity(test_case):
        #toy_params = {
        #    True:  {'mu': 0.9, 'sigma': .1},
        #    False: {'mu': 0.1, 'sigma': .4}
        #}
        # tau = np.pi * 2
        from ibeis import constants as const
        # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS
        view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS)
        # view_to_ori = {
        #     'F': -1 * tau / 4,
        #     'L':  0 * tau / 4,
        #     'B':  1 * tau / 4,
        #     'R':  2 * tau / 4,
        # }
        import vtool as vt

        nid_list = np.array(ut.dict_take_column(test_case, 'name'))
        yaw_list = np.array(ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view')))

        rng = np.random.RandomState(0)
        pmat = []
        for idx in range(len(test_case)):
            nid = nid_list[idx]
            yaw = yaw_list[idx]
            p_same = nid == nid_list
            p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi
            # estimate noisy measurements
            p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0, .9)
            p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0, .9)
            #
            p_same_and_comp = p_same_m * p_comp_m
            pmat.append(p_same_and_comp)
        #
        P = np.array(pmat)
        P[np.diag_indices(len(P))] = 0
        P = P + P.T / 2
        P = np.clip(P, .01, .99)
        print(ut.hz_str(' P = ', ut.array_repr2(P, precision=2, max_line_width=140)))
        return P

    Pn = make_test_similarity(test_case)

    if False:
        Pn = np.array(np.matrix(
            b"""
            .0 .7 .3 .2 .4 .5;
            .7 .0 .4 .4 .3 .5;
            .3 .4 .0 .6 .1 .5;
            .2 .4 .6 .0 .2 .3;
            .4 .3 .1 .2 .0 .8;
            .5 .5 .5 .3 .8 .0
            """))

        PL = np.array(np.matrix(
            b"""
            .7 .5 .5;
            .8 .4 .3;
            .5 .7 .3;
            .5 .8 .4;
            .3 .2 .8;
            .5 .5 .8
            """))

    if True:
        Pn = np.array(np.matrix(
            b"""
            1.0  0.7  0.4  0.2;
            0.7  1.0  0.4  0.4;
            0.4  0.4  1.0  0.6;
            0.2  0.4  0.6  1.0
            """))

        PL = np.array(np.matrix(
            b"""
            0.7  0.5  0.5;
            0.8  0.4  0.3;
            0.5  0.7  0.3;
            0.5  0.8  0.4
            """))
    num_nodes = Pn.shape[0]

    for num_labels in range(1, 2):
        #Pn = np.array(np.matrix(
        #    b"""
        #    .0 .7 .3 .2 .4 .5;
        #    .7 .0 .4 .4 .3 .5;
        #    .3 .4 .0 .6 .1 .5;
        #    .2 .4 .6 .0 .2 .3;
        #    .4 .3 .1 .2 .0 .8;
        #    .5 .5 .5 .3 .8 .0
        #    """))

        # Uniform distribution over labels
        if 0:
            PL = np.ones((num_nodes, num_labels)) / num_labels
            # Give nodes preferences
            PL[np.diag_indices(num_labels)] *= 1.01
            PL /= np.linalg.norm(PL, axis=0)
            # PL[0, :] = .01 / (num_labels - 1)
            # PL[0, 0] = .99
        else:
            PL /= np.linalg.norm(PL, axis=0)

        # Number of nodes
        num_nodes = Pn.shape[0]
        # Number of classes
        num_labels = PL.shape[1]
        #num_labels = num_nodes
        #if 0 or num_labels != 3:
        #    PL = np.ones((num_nodes, num_labels)) / num_labels
        #    # PL[0, :] = .01 / (num_labels - 1)
        #    # PL[0, 0] = .99
        d = num_labels + num_nodes

        # Stack everything into a single matrix
        zero_part = np.zeros((num_labels, num_nodes + num_labels))
        prob_part = np.hstack([PL, Pn])
        #print(ut.hz_str(' prob_part = ', ut.array_repr2(prob_part[:, :], precision=2)))
        P = np.vstack([zero_part, prob_part])

        # Gamma will hold a probability distribution over the nodes
        # The labeled nodes must match themselves.
        # The unlabeld nodes are initialized with a uniform distribution.
        gam = np.hstack([np.eye(num_labels), np.ones((num_labels, num_nodes)) / num_labels])

        print('Initialize')
        print('num_labels = %r' % (num_labels,))
        # print(ut.hz_str(' gamma = ', ut.array_repr2(gam[:, num_labels:], max_line_width=140, precision=2)))
        print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2)))

        delta_i = np.zeros(num_labels)
        def dErr(i, gam, P, delta_i=delta_i):
            # exepcted liklihood is cross entropy error
            delta_i[:] = 0
            # Compute the gradient of the cross entropy error
            # This is over both names and annotations
            for j in range(d):
                if i != j:
                    delta_i += gam[:, j] * np.log(P[i, j] / (1 - P[i, j]))
            # compute the projected gradient
            delta_i_hat = delta_i - delta_i.sum() / num_labels
            return delta_i_hat

        # Maximies the expected liklihood of gamma
        learn_rate = 0.05
        num_iters = 1000
        dGam = np.zeros(gam.shape)
        # for count in range(num_iters):
        for count in ut.ProgIter(range(num_iters), label='EM', bs=True):
            # Compute error gradient
            for i in range(num_labels, d):
                dGam[:, i] = dErr(i, gam, P)
            # Make a step in the gradient direction
            # print(ut.hz_str(' dGam = ', ut.array_repr2(dGam, max_line_width=140, precision=2)))
            gam = gam + learn_rate * dGam
            # Normalize
            gam = np.clip(gam, 0, 1)
            for i in range(num_labels, d):
                gam[:, i] = gam[:, i] / np.sum(gam[:, i])
        # print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2)))
        # print(ut.hz_str(' gamma = ', ut.array_repr2(gam[:, num_labels:], max_line_width=140, precision=2)))
        print('Finished')
    return P, Pn, PL, gam, num_labels
Exemple #29
0
def classification_report2(y_true, y_pred, target_names=None,
                           sample_weight=None, verbose=True):
    """
    References:
        https://csem.flinders.edu.au/research/techreps/SIE07001.pdf
        https://www.mathworks.com/matlabcentral/fileexchange/5648-bm-cm-?requestedDomain=www.mathworks.com
        Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN
            Error Measures in MultiClass Prediction

    Example:
        >>> from ibeis.algo.verif.sklearn_utils import *  # NOQA
        >>> y_true = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3]
        >>> y_pred = [1, 2, 1, 3, 1, 2, 2, 3, 2, 2, 3, 3, 2, 3, 3, 3, 1, 3]
        >>> target_names = None
        >>> sample_weight = None
        >>> verbose = True
        >>> report = classification_report2(y_true, y_pred, verbose=verbose)

    Ignore:
        >>> size = 100
        >>> rng = np.random.RandomState(0)
        >>> p_classes = np.array([.90, .05, .05][0:2])
        >>> p_classes = p_classes / p_classes.sum()
        >>> p_wrong   = np.array([.03, .01, .02][0:2])
        >>> y_true = testdata_ytrue(p_classes, p_wrong, size, rng)
        >>> rs = []
        >>> for x in range(17):
        >>>     p_wrong += .05
        >>>     y_pred = testdata_ypred(y_true, p_wrong, rng)
        >>>     report = classification_report2(y_true, y_pred, verbose='hack')
        >>>     rs.append(report)
        >>> import plottool as pt
        >>> pt.qtensure()
        >>> df = pd.DataFrame(rs).drop(['raw'], axis=1)
        >>> delta = df.subtract(df['target'], axis=0)
        >>> sqrd_error = np.sqrt((delta ** 2).sum(axis=0))
        >>> print('Error')
        >>> print(sqrd_error.sort_values())
        >>> ys = df.to_dict(orient='list')
        >>> pt.multi_plot(ydata_list=ys)
    """
    import sklearn.metrics
    from sklearn.preprocessing import LabelEncoder

    if target_names is None:
        unique_labels = np.unique(np.hstack([y_true, y_pred]))
        if len(unique_labels) == 1 and (unique_labels[0] == 0 or unique_labels[0] == 1):
            target_names = np.array([False, True])
            y_true_ = y_true
            y_pred_ = y_pred
        else:
            lb = LabelEncoder()
            lb.fit(unique_labels)
            y_true_ = lb.transform(y_true)
            y_pred_ = lb.transform(y_pred)
            target_names = lb.classes_
    else:
        y_true_ = y_true
        y_pred_ = y_pred

    # Real data is on the rows,
    # Pred data is on the cols.

    cm = sklearn.metrics.confusion_matrix(
        y_true_, y_pred_, sample_weight=sample_weight)
    confusion = cm  # NOQA

    k = len(cm)  # number of classes
    N = cm.sum()  # number of examples

    real_total = cm.sum(axis=1)
    pred_total = cm.sum(axis=0)

    # the number of "positive" cases **per class**
    n_pos = real_total  # NOQA
    # the number of times a class was predicted.
    n_neg = N - n_pos  # NOQA

    # number of true positives per class
    n_tps = np.diag(cm)
    # number of true negatives per class
    n_fps = (cm - np.diagflat(np.diag(cm))).sum(axis=0)

    tprs = n_tps / real_total  # true pos rate (recall)
    tpas = n_tps / pred_total  # true pos accuracy (precision)

    unused = (real_total + pred_total) == 0

    fprs = n_fps / n_neg  # false pose rate
    fprs[unused] = np.nan
    # tnrs = 1 - fprs

    rprob = real_total / N
    pprob = pred_total / N

    if len(cm) == 2:
        [[A, B],
         [C, D]] = cm
        (A * D - B * C) / np.sqrt((A + C) * (B + D) * (A + B) * (C + D))

        # c2 = vt.ConfusionMetrics().fit(scores, y)

    # bookmaker is analogous to recall, but unbiased by class frequency
    rprob_mat = np.tile(rprob, [k, 1]).T - (1 - np.eye(k))
    bmcm = cm.T / rprob_mat
    bms = np.sum(bmcm.T, axis=0) / N

    # markedness is analogous to precision, but unbiased by class frequency
    pprob_mat = np.tile(pprob, [k, 1]).T - (1 - np.eye(k))
    mkcm = cm / pprob_mat
    mks = np.sum(mkcm.T, axis=0) / N

    mccs = np.sign(bms) * np.sqrt(np.abs(bms * mks))

    perclass_data = ut.odict([
        ('precision', tpas),
        ('recall', tprs),
        ('fpr', fprs),
        ('markedness', mks),
        ('bookmaker', bms),
        ('mcc', mccs),
        ('support', real_total),
    ])

    tpa = np.nansum(tpas * rprob)
    tpr = np.nansum(tprs * rprob)

    fpr = np.nansum(fprs * rprob)

    mk = np.nansum(mks * rprob)
    bm = np.nansum(bms * pprob)

    # The simple mean seems to do the best
    mccs_ = mccs[~np.isnan(mccs)]
    if len(mccs_) == 0:
        mcc_combo = np.nan
    else:
        mcc_combo = np.nanmean(mccs_)

    combined_data = ut.odict([
        ('precision', tpa),
        ('recall', tpr),
        ('fpr', fpr),
        ('markedness', mk),
        ('bookmaker', bm),
        # ('mcc', np.sign(bm) * np.sqrt(np.abs(bm * mk))),
        ('mcc', mcc_combo),
        # np.sign(bm) * np.sqrt(np.abs(bm * mk))),
        ('support', real_total.sum())
    ])

    # Not sure how to compute this. Should it agree with the sklearn impl?
    if verbose == 'hack':
        verbose = False
        mcc_known = sklearn.metrics.matthews_corrcoef(
            y_true, y_pred, sample_weight=sample_weight)
        mcc_raw = np.sign(bm) * np.sqrt(np.abs(bm * mk))

        import scipy as sp
        def gmean(x, w=None):
            if w is None:
                return sp.stats.gmean(x)
            return np.exp(np.nansum(w * np.log(x)) / np.nansum(w))

        def hmean(x, w=None):
            if w is None:
                return sp.stats.hmean(x)
            return 1 / (np.nansum(w * (1 / x)) / np.nansum(w))

        def amean(x, w=None):
            if w is None:
                return np.mean(x)
            return np.nansum(w * x) / np.nansum(w)

        report = {
            'target': mcc_known,
            'raw': mcc_raw,
        }

        # print('%r <<<' % (mcc_known,))
        means = {
            'a': amean,
            # 'h': hmean,
            'g': gmean,
        }
        weights = {
            'p': pprob,
            'r': rprob,
            '': None,
        }
        for mean_key, mean in means.items():
            for w_key, w in weights.items():
                # Hack of very wrong items
                if mean_key == 'g':
                    if w_key in ['r', 'p', '']:
                        continue
                if mean_key == 'g':
                    if w_key in ['r']:
                        continue
                m = mean(mccs, w)
                r_key = '{} {}'.format(mean_key, w_key)
                report[r_key] = m
                # print(r_key)
                # print(np.abs(m - mcc_known))

        # print(ut.repr4(report, precision=8))
        return report
        # print('mcc_known = %r' % (mcc_known,))
        # print('mcc_combo1 = %r' % (mcc_combo1,))
        # print('mcc_combo2 = %r' % (mcc_combo2,))
        # print('mcc_combo3 = %r' % (mcc_combo3,))

    # if target_names is None:
    #     target_names = list(range(k))
    index = pd.Index(target_names, name='class')

    perclass_df = pd.DataFrame(perclass_data, index=index)
    # combined_df = pd.DataFrame(combined_data, index=['ave/sum'])
    combined_df = pd.DataFrame(combined_data, index=['combined'])

    metric_df = pd.concat([perclass_df, combined_df])
    metric_df.index.name = 'class'
    metric_df.columns.name = 'metric'

    pred_id = ['%s' % m for m in target_names]
    real_id = ['%s' % m for m in target_names]
    confusion_df = pd.DataFrame(confusion, columns=pred_id, index=real_id)

    confusion_df = confusion_df.append(pd.DataFrame(
        [confusion.sum(axis=0)], columns=pred_id, index=['Σp']))
    confusion_df['Σr'] = np.hstack([confusion.sum(axis=1), [0]])
    confusion_df.index.name = 'real'
    confusion_df.columns.name = 'pred'

    if np.all(confusion_df - np.floor(confusion_df) < .000001):
        confusion_df = confusion_df.astype(np.int)
    confusion_df.iloc[(-1, -1)] = N
    if np.all(confusion_df - np.floor(confusion_df) < .000001):
        confusion_df = confusion_df.astype(np.int)
    # np.nan

    if verbose:
        cfsm_str = confusion_df.to_string(float_format=lambda x: '%.1f' % (x,))
        print('Confusion Matrix (real × pred) :')
        print(ut.hz_str('    ', cfsm_str))

        # ut.cprint('\nExtended Report', 'turquoise')
        print('\nEvaluation Metric Report:')
        float_precision = 2
        float_format = '%.' + str(float_precision) + 'f'
        ext_report = metric_df.to_string(float_format=float_format)
        print(ut.hz_str('    ', ext_report))

    report = {
        'metrics': metric_df,
        'confusion': confusion_df,
    }

    # FIXME: What is the difference between sklearn multiclass-MCC
    # and BM * MK MCC?

    def matthews_corrcoef(y_true, y_pred, sample_weight=None):
        from sklearn.metrics.classification import (
            _check_targets, LabelEncoder, confusion_matrix)
        y_type, y_true, y_pred = _check_targets(y_true, y_pred)
        if y_type not in {"binary", "multiclass"}:
            raise ValueError("%s is not supported" % y_type)
        lb = LabelEncoder()
        lb.fit(np.hstack([y_true, y_pred]))
        y_true = lb.transform(y_true)
        y_pred = lb.transform(y_pred)
        C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
        t_sum = C.sum(axis=1)
        p_sum = C.sum(axis=0)
        n_correct = np.trace(C)
        n_samples = p_sum.sum()
        cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum)
        cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum)
        cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum)
        mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
        if np.isnan(mcc):
            return 0.
        else:
            return mcc

    try:
        # mcc = sklearn.metrics.matthews_corrcoef(
        #     y_true, y_pred, sample_weight=sample_weight)
        mcc = matthews_corrcoef(y_true, y_pred, sample_weight=sample_weight)
        # These scales are chosen somewhat arbitrarily in the context of a
        # computer vision application with relatively reasonable quality data
        # https://stats.stackexchange.com/questions/118219/how-to-interpret
        mcc_significance_scales = ut.odict([
            (1.0, 'perfect'),
            (0.9, 'very strong'),
            (0.7, 'strong'),
            (0.5, 'significant'),
            (0.3, 'moderate'),
            (0.2, 'weak'),
            (0.0, 'negligible'),
        ])
        for k, v in mcc_significance_scales.items():
            if np.abs(mcc) >= k:
                if verbose:
                    print('classifier correlation is %s' % (v,))
                break
        if verbose:
            float_precision = 2
            print(('MCC\' = %.' + str(float_precision) + 'f') % (mcc,))
        report['mcc'] = mcc
    except ValueError:
        pass
    return report
Exemple #30
0
 def print_priors(model, ignore_ttypes=[], title='Priors', color='darkblue'):
     ut.colorprint('\n --- %s ---' % (title,), color=color)
     for ttype, cpds in model.ttype2_cpds.items():
         if ttype not in ignore_ttypes:
             for fs_ in ut.ichunks(cpds, 4):
                 ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), color)
Exemple #31
0
def test_model(num_annots, num_names, score_evidence=[], name_evidence=[],
               other_evidence={}, noquery=False, verbose=None,
               **kwargs):
    if verbose is None:
        verbose = ut.VERBOSE

    method = kwargs.pop('method', None)
    model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs)

    if verbose:
        model.print_priors(ignore_ttypes=['match', 'score'])

    model, evidence, soft_evidence = update_model_evidence(
        model, name_evidence, score_evidence, other_evidence)

    if verbose and len(soft_evidence) != 0:
        model.print_priors(ignore_ttypes=['match', 'score'],
                           title='Soft Evidence', color='green')

    #if verbose:
    #    ut.colorprint('\n --- Soft Evidence ---', 'white')
    #    for ttype, cpds in model.ttype2_cpds.items():
    #        if ttype != 'match':
    #            for fs_ in ut.ichunks(cpds, 4):
    #                ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]),
    #                              'green')

    if verbose:
        ut.colorprint('\n --- Inference ---', 'red')

    if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery:
        evidence = model._ensure_internal_evidence(evidence)
        query_vars = []
        query_vars += ut.list_getattr(model.ttype2_cpds['name'], 'variable')
        #query_vars += ut.list_getattr(model.ttype2_cpds['match'], 'variable')
        query_vars = ut.setdiff(query_vars, evidence.keys())
        #query_vars = ut.setdiff(query_vars, soft_evidence.keys())
        query_results = cluster_query(model, query_vars, evidence,
                                      soft_evidence, method)
    else:
        query_results = {}

    factor_list = query_results['factor_list']

    if verbose:
        if verbose:
            print('+--------')
        semtypes = [model.var2_cpd[f.variables[0]].ttype
                    for f in factor_list]
        for type_, factors in ut.group_items(factor_list, semtypes).items():
            print('Result Factors (%r)' % (type_,))
            factors = ut.sortedby(factors, [f.variables[0] for f in factors])
            for fs_ in ut.ichunks(factors, 4):
                ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]),
                              'yellow')
        print('MAP assignments')
        top_assignments = query_results.get('top_assignments', [])
        tmp = []
        for lbl, val in top_assignments:
            tmp.append('%s : %.4f' % (ut.repr2(lbl), val))
        print(ut.align('\n'.join(tmp), ' :'))
        print('L_____\n')

    showkw = dict(evidence=evidence,
                  soft_evidence=soft_evidence,
                  **query_results)

    pgm_viz.show_model(model, **showkw)
    return (model, evidence, query_results)
Exemple #32
0
def setcover_example():
    """
    CommandLine:
        python -m ibeis.scripts.specialdraw setcover_example --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.specialdraw import *  # NOQA
        >>> result = setcover_example()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import ibeis
    import plottool as pt
    from ibeis.viz import viz_graph
    import networkx as nx
    pt.ensure_pylab_qt4()
    ibs = ibeis.opendb(defaultdb='testdb2')

    if False:
        # Select a good set
        aids = ibs.get_name_aids(ibs.get_valid_nids())
        # ibeis.testdata_aids('testdb2', a='default:mingt=2')
        aids = [a for a in aids if len(a) > 1]
        for a in aids:
            print(ut.repr3(ibs.get_annot_stats_dict(a)))
        print(aids[-2])
    #aids = [78, 79, 80, 81, 88, 91]
    aids = [78, 79, 81, 88, 91]
    qreq_ = ibs.depc.new_request('vsone', aids, aids, cfgdict={})
    cm_list = qreq_.execute()
    from ibeis.algo.hots import graph_iden
    infr = graph_iden.AnnotInference(cm_list)
    unique_aids, prob_annots = infr.make_prob_annots()
    import numpy as np
    print(
        ut.hz_str(
            'prob_annots = ',
            ut.array2string2(prob_annots,
                             precision=2,
                             max_line_width=140,
                             suppress_small=True)))
    # ut.setcover_greedy(candidate_sets_dict)
    max_weight = 3
    prob_annots[np.diag_indices(len(prob_annots))] = np.inf
    prob_annots = prob_annots
    thresh_points = np.sort(prob_annots[np.isfinite(prob_annots)])

    # probably not the best way to go about searching for these thresholds
    # but when you have a hammer...
    if False:
        quant = sorted(np.diff(thresh_points))[(len(thresh_points) - 1) // 2]
        candset = {
            point: thresh_points[np.abs(thresh_points - point) < quant]
            for point in thresh_points
        }
        check_thresholds = len(aids) * 2
        thresh_points2 = np.array(
            ut.setcover_greedy(candset, max_weight=check_thresholds).keys())
        thresh_points = thresh_points2

    # pt.plot(sorted(thresh_points), 'rx')
    # pt.plot(sorted(thresh_points2), 'o')

    # prob_annots = prob_annots.T

    # thresh_start = np.mean(thresh_points)
    current_idxs = []
    current_covers = []
    current_val = np.inf
    for thresh in thresh_points:
        covering_sets = [np.where(row >= thresh)[0] for row in (prob_annots)]
        candidate_sets_dict = {
            ax: others
            for ax, others in enumerate(covering_sets)
        }
        soln_cover = ut.setcover_ilp(candidate_sets_dict,
                                     max_weight=max_weight)
        exemplar_idxs = list(soln_cover.keys())
        soln_weight = len(exemplar_idxs)
        val = max_weight - soln_weight
        # print('val = %r' % (val,))
        # print('soln_weight = %r' % (soln_weight,))
        if val < current_val:
            current_val = val
            current_covers = covering_sets
            current_idxs = exemplar_idxs
    exemplars = ut.take(aids, current_idxs)
    ensure_edges = [(aids[ax], aids[ax2])
                    for ax, other_xs in enumerate(current_covers)
                    for ax2 in other_xs]
    graph = viz_graph.make_netx_graph_from_aid_groups(
        ibs, [aids],
        allow_directed=True,
        ensure_edges=ensure_edges,
        temp_nids=[1] * len(aids))
    viz_graph.ensure_node_images(ibs, graph)

    nx.set_node_attributes(graph, 'framewidth', False)
    nx.set_node_attributes(graph, 'framewidth',
                           {aid: 4.0
                            for aid in exemplars})
    nx.set_edge_attributes(graph, 'color', pt.ORANGE)
    nx.set_node_attributes(graph, 'color', pt.LIGHT_BLUE)
    nx.set_node_attributes(graph, 'shape', 'rect')

    layoutkw = {
        'sep': 1 / 10,
        'prog': 'neato',
        'overlap': 'false',
        #'splines': 'ortho',
        'splines': 'spline',
    }
    pt.show_nx(graph, layout='agraph', layoutkw=layoutkw)
    pt.zoom_factory()
def dummy_example_depcacahe():
    r"""
    CommandLine:
        python -m dtool.example_depcache --exec-dummy_example_depcacahe

    Example:
        >>> # ENABLE_DOCTEST
        >>> from dtool.example_depcache import *  # NOQA
        >>> depc = dummy_example_depcacahe()
        >>> ut.show_if_requested()
    """
    fname = None
    # fname = 'dummy_default_depcache'
    fname = ':memory:'

    depc = testdata_depc(fname)

    tablename = 'fgweight'
    # print('[test] fgweight_path =\n%s' % (ut.repr3(depc.get_dependencies(tablename), nl=1),))
    # print('[test] keypoint =\n%s' % (ut.repr3(depc.get_dependencies('keypoint'), nl=1),))
    # print('[test] descriptor =\n%s' % (ut.repr3(depc.get_dependencies('descriptor'), nl=1),))
    # print('[test] spam =\n%s' % (ut.repr3(depc.get_dependencies('spam'), nl=1),))

    root_rowids = [5, 3]
    desc_rowids = depc.get_rowids('descriptor', root_rowids)  # NOQA

    table = depc[tablename]  # NOQA

    #example_getter_methods(depc, 'vsmany', root_rowids)
    # example_getter_methods(depc, 'chipmask', root_rowids)
    # example_getter_methods(depc, 'keypoint', root_rowids)
    # example_getter_methods(depc, 'chip', root_rowids)

    test_getters(depc)

    #import plottool as pt
    # pt.ensureqt()

    graph = depc.make_graph()  # NOQA
    #pt.show_nx(graph)

    print('---------- 111 -----------')

    # Try testing the algorithm
    req = depc.new_request('vsmany', root_rowids, root_rowids, {})
    print('req = %r' % (req, ))
    req.execute()

    print('---------- 222 -----------')

    cfgdict = {'sver_on': False}
    req = depc.new_request('vsmany', root_rowids, [root_rowids], cfgdict)
    req.execute()

    print('---------- 333 -----------')

    cfgdict = {'sver_on': False, 'adapt_shape': False}
    req = depc.new_request('vsmany', root_rowids, root_rowids, cfgdict)
    req.execute()

    print('---------- 444 -----------')

    req = depc.new_request('vsmany', root_rowids, root_rowids, {})
    req.execute()

    #ut.InstanceList(
    db = list(depc.fname_to_db.values())[0]
    #db_list = ut.InstanceList(depc.fname_to_db.values())
    #db_list.print_table_csv('config', exclude_columns='config_strid')

    print('config table')
    tablename = 'config'
    column_list, column_names = db.get_table_column_data(
        tablename, ['config_strid'])
    print('\n'.join([
        ut.hz_str(*list(ut.interleave((r, [', '] * (len(r) - 1)))))
        for r in list(
            zip(*[[ut.repr3(r, nl=2) for r in col] for col in column_list]))
    ]))

    return depc
Exemple #34
0
def setcover_example():
    """
    CommandLine:
        python -m ibeis.scripts.specialdraw setcover_example --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.scripts.specialdraw import *  # NOQA
        >>> result = setcover_example()
        >>> print(result)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()
    """
    import ibeis
    import plottool as pt
    from ibeis.viz import viz_graph
    import networkx as nx
    pt.ensure_pylab_qt4()
    ibs = ibeis.opendb(defaultdb='testdb2')

    if False:
        # Select a good set
        aids = ibs.get_name_aids(ibs.get_valid_nids())
        # ibeis.testdata_aids('testdb2', a='default:mingt=2')
        aids = [a for a in aids if len(a) > 1]
        for a in aids:
            print(ut.repr3(ibs.get_annot_stats_dict(a)))
        print(aids[-2])
    #aids = [78, 79, 80, 81, 88, 91]
    aids = [78, 79, 81, 88, 91]
    qreq_ = ibs.depc.new_request('vsone', aids, aids, cfgdict={})
    cm_list = qreq_.execute()
    from ibeis.algo.hots import graph_iden
    infr = graph_iden.AnnotInference(cm_list)
    unique_aids, prob_annots = infr.make_prob_annots()
    import numpy as np
    print(ut.hz_str('prob_annots = ', ut.array2string2(prob_annots, precision=2, max_line_width=140, suppress_small=True)))
    # ut.setcover_greedy(candidate_sets_dict)
    max_weight = 3
    prob_annots[np.diag_indices(len(prob_annots))] = np.inf
    prob_annots = prob_annots
    thresh_points = np.sort(prob_annots[np.isfinite(prob_annots)])

    # probably not the best way to go about searching for these thresholds
    # but when you have a hammer...
    if False:
        quant = sorted(np.diff(thresh_points))[(len(thresh_points) - 1) // 2 ]
        candset = {point: thresh_points[np.abs(thresh_points - point) < quant] for point in thresh_points}
        check_thresholds = len(aids) * 2
        thresh_points2 = np.array(ut.setcover_greedy(candset, max_weight=check_thresholds).keys())
        thresh_points = thresh_points2

    # pt.plot(sorted(thresh_points), 'rx')
    # pt.plot(sorted(thresh_points2), 'o')

    # prob_annots = prob_annots.T

    # thresh_start = np.mean(thresh_points)
    current_idxs = []
    current_covers = []
    current_val = np.inf
    for thresh in thresh_points:
        covering_sets = [np.where(row >= thresh)[0] for row in (prob_annots)]
        candidate_sets_dict = {ax: others for ax, others in enumerate(covering_sets)}
        soln_cover = ut.setcover_ilp(candidate_sets_dict, max_weight=max_weight)
        exemplar_idxs = list(soln_cover.keys())
        soln_weight = len(exemplar_idxs)
        val = max_weight - soln_weight
        # print('val = %r' % (val,))
        # print('soln_weight = %r' % (soln_weight,))
        if val < current_val:
            current_val = val
            current_covers = covering_sets
            current_idxs = exemplar_idxs
    exemplars = ut.take(aids, current_idxs)
    ensure_edges = [(aids[ax], aids[ax2]) for ax, other_xs in enumerate(current_covers) for ax2 in other_xs]
    graph = viz_graph.make_netx_graph_from_aid_groups(
        ibs, [aids], allow_directed=True, ensure_edges=ensure_edges,
        temp_nids=[1] * len(aids))
    viz_graph.ensure_node_images(ibs, graph)

    nx.set_node_attributes(graph, 'framewidth', False)
    nx.set_node_attributes(graph, 'framewidth', {aid: 4.0 for aid in exemplars})
    nx.set_edge_attributes(graph, 'color', pt.ORANGE)
    nx.set_node_attributes(graph, 'color', pt.LIGHT_BLUE)
    nx.set_node_attributes(graph, 'shape', 'rect')

    layoutkw = {
        'sep' : 1 / 10,
        'prog': 'neato',
        'overlap': 'false',
        #'splines': 'ortho',
        'splines': 'spline',
    }
    pt.show_nx(graph, layout='agraph', layoutkw=layoutkw)
    pt.zoom_factory()