Example #1
0
def test_mcc():
    import plottool as pt
    import sklearn.metrics
    num = 100
    xdata = np.linspace(0, 1, num * 2)
    ydata = np.linspace(1, -1, num * 2)
    pt.plt.plot(xdata, ydata, '--k', label='linear')

    y_true = [1] * num + [0] * num
    y_pred = y_true[:]
    xs = []
    for i in range(0, len(y_true)):
        y_pred[-i] = 1 - y_pred[-i]
        xs.append(sklearn.metrics.matthews_corrcoef(y_true, y_pred))

    pt.plot(xdata, xs, label='change one class at a time')

    y_true = ut.flatten(zip([1] * num, [0] * num))
    y_pred = y_true[:]
    xs = []
    for i in range(0, len(y_true)):
        y_pred[-i] = 1 - y_pred[-i]
        xs.append(sklearn.metrics.matthews_corrcoef(y_true, y_pred))

    pt.plot(xdata, xs, label='change classes evenly')
    pt.gca().legend()
Example #2
0
    def choose_thresh(infr):
        #prob_annots /= prob_annots.sum(axis=1)[:, None]
        # Find connected components
        #thresh = .25
        #thresh = 1 / (1.2 * np.sqrt(prob_names.shape[1]))
        unique_nids, prob_names = infr.make_prob_names()

        if len(unique_nids) <= 2:
            return .5

        nscores = np.sort(prob_names.flatten())
        # x = np.gradient(nscores).argmax()
        # x = (np.gradient(np.gradient(nscores)) ** 2).argmax()
        # thresh = nscores[x]

        curve = nscores
        idx1 = vt.find_elbow_point(curve)
        idx2 = vt.find_elbow_point(curve[idx1:]) + idx1
        if False:
            import plottool as pt
            idx3 = vt.find_elbow_point(curve[idx1:idx2 + 1]) + idx1
            pt.plot(curve)
            pt.plot(idx1, curve[idx1], 'bo')
            pt.plot(idx2, curve[idx2], 'ro')
            pt.plot(idx3, curve[idx3], 'go')
        thresh = nscores[idx2]
        #print('thresh = %r' % (thresh,))
        #thresh = .999
        #thresh = .1
        return thresh
Example #3
0
    def choose_thresh(infr):
        #prob_annots /= prob_annots.sum(axis=1)[:, None]
        # Find connected components
        #thresh = .25
        #thresh = 1 / (1.2 * np.sqrt(prob_names.shape[1]))
        unique_nids, prob_names = infr.make_prob_names()

        if len(unique_nids) <= 2:
            return .5

        nscores = np.sort(prob_names.flatten())
        # x = np.gradient(nscores).argmax()
        # x = (np.gradient(np.gradient(nscores)) ** 2).argmax()
        # thresh = nscores[x]

        curve = nscores
        idx1 = vt.find_elbow_point(curve)
        idx2 = vt.find_elbow_point(curve[idx1:]) + idx1
        if False:
            import plottool as pt
            idx3 = vt.find_elbow_point(curve[idx1:idx2 + 1]) + idx1
            pt.plot(curve)
            pt.plot(idx1, curve[idx1], 'bo')
            pt.plot(idx2, curve[idx2], 'ro')
            pt.plot(idx3, curve[idx3], 'go')
        thresh = nscores[idx2]
        #print('thresh = %r' % (thresh,))
        #thresh = .999
        #thresh = .1
        return thresh
Example #4
0
def gridsearch_ratio_thresh(matches):
    import sklearn
    import sklearn.metrics
    import vtool as vt
    # Param search for vsone
    import plottool as pt
    pt.qt4ensure()

    skf = sklearn.model_selection.StratifiedKFold(n_splits=10,
                                                  random_state=119372)

    y = np.array([m.annot1['nid'] == m.annot2['nid'] for m in matches])

    basis = {'ratio_thresh': np.linspace(.6, .7, 50).tolist()}
    grid = ut.all_dict_combinations(basis)
    xdata = np.array(ut.take_column(grid, 'ratio_thresh'))

    def _ratio_thresh(y_true, match_list):
        # Try and find optional ratio threshold
        auc_list = []
        for cfgdict in ut.ProgIter(grid, lbl='gridsearch'):
            y_score = [
                match.fs.compress(match.ratio_test_flags(cfgdict)).sum()
                for match in match_list
            ]
            auc = sklearn.metrics.roc_auc_score(y_true, y_score)
            auc_list.append(auc)
        auc_list = np.array(auc_list)
        return auc_list

    auc_list = _ratio_thresh(y, matches)
    pt.plot(xdata, auc_list)
    subx, suby = vt.argsubmaxima(auc_list, xdata)
    best_ratio_thresh = subx[suby.argmax()]

    skf_results = []
    y_true = y
    for train_idx, test_idx in skf.split(matches, y):
        match_list_ = ut.take(matches, train_idx)
        y_true = y.take(train_idx)
        auc_list = _ratio_thresh(y_true, match_list_)
        subx, suby = vt.argsubmaxima(auc_list, xdata, maxima_thresh=.8)
        best_ratio_thresh = subx[suby.argmax()]
        skf_results.append(best_ratio_thresh)
    print('skf_results.append = %r' % (np.mean(skf_results), ))
    import utool
    utool.embed()
Example #5
0
def test_average_contrast():
    import vtool as vt
    ut.get_valid_test_imgkeys()
    img_fpath_list = [ut.grab_test_imgpath(key) for key in ut.get_valid_test_imgkeys()]
    img_list = [vt.imread(img, grayscale=True) for img in img_fpath_list]
    avecontrast_list = np.array([compute_average_contrast(img) for img in img_list])
    import plottool as pt
    nCols = len(img_list)
    fnum = None
    if fnum is None:
        fnum = pt.next_fnum()
    pt.figure(fnum=fnum, pnum=(2, 1, 1))
    sortx = avecontrast_list.argsort()
    y_list = avecontrast_list[sortx]
    x_list = np.arange(0, nCols) + .5
    pt.plot(x_list, y_list, 'bo-')
    sorted_imgs = ut.take(img_list, sortx)
    for px, img in ut.ProgressIter(enumerate(sorted_imgs, start=1)):
        pt.imshow(img, fnum=fnum, pnum=(2, nCols, nCols + px))
Example #6
0
def compare_data(Y_list_):
    import ibeis
    qreq_ = ibeis.testdata_qreq_(
        defaultdb='Oxford',
        a='oxford',
        p='smk:nWords=[64000],nAssign=[1],SV=[False],can_match_sameimg=True,dim_size=None'
    )
    qreq_.ensure_data()

    gamma1s = []
    gamma2s = []

    print(len(Y_list_))
    print(len(qreq_.daids))

    dinva = qreq_.dinva
    bady = []
    for Y in Y_list_:
        aid = Y.aid
        gamma1 = Y.gamma
        if aid in dinva.aid_to_idx:
            idx = dinva.aid_to_idx[aid]
            gamma2 = dinva.gamma_list[idx]
            gamma1s.append(gamma1)
            gamma2s.append(gamma2)
        else:
            bady += [Y]
            print(Y.nid)
            # print(Y.qual)

    # ibs = qreq_.ibs
    # z = ibs.annots([a.aid for a in bady])

    import plottool as pt
    ut.qtensure()
    gamma1s = np.array(gamma1s)
    gamma2s = np.array(gamma2s)
    sortx = gamma1s.argsort()
    pt.plot(gamma1s[sortx], label='script')
    pt.plot(gamma2s[sortx], label='pipe')
    pt.legend()
Example #7
0
def test_average_contrast():
    import vtool as vt
    ut.get_valid_test_imgkeys()
    img_fpath_list = [
        ut.grab_test_imgpath(key) for key in ut.get_valid_test_imgkeys()
    ]
    img_list = [vt.imread(img, grayscale=True) for img in img_fpath_list]
    avecontrast_list = np.array(
        [compute_average_contrast(img) for img in img_list])
    import plottool as pt
    nCols = len(img_list)
    fnum = None
    if fnum is None:
        fnum = pt.next_fnum()
    pt.figure(fnum=fnum, pnum=(2, 1, 1))
    sortx = avecontrast_list.argsort()
    y_list = avecontrast_list[sortx]
    x_list = np.arange(0, nCols) + .5
    pt.plot(x_list, y_list, 'bo-')
    sorted_imgs = ut.take(img_list, sortx)
    for px, img in ut.ProgressIter(enumerate(sorted_imgs, start=1)):
        pt.imshow(img, fnum=fnum, pnum=(2, nCols, nCols + px))
Example #8
0
def ewma():
    import plottool as pt
    import ubelt as ub
    import numpy as np
    pt.qtensure()

    # Investigate the span parameter
    span = 20
    alpha = 2 / (span + 1)

    # how long does it take for the estimation to hit 0?
    # (ie, it no longer cares about the initial 1?)
    # about 93 iterations to get to 1e-4
    # about 47 iterations to get to 1e-2
    # about 24 iterations to get to 1e-1
    # 20 iterations goes to .135
    data = ([1] + [0] * 20 + [1] * 40 + [0] * 20 + [1] * 50 + [0] * 20 +
            [1] * 60 + [0] * 20 + [1] * 165 + [0] * 20 + [0])
    mave = []

    iter_ = iter(data)
    current = next(iter_)
    mave += [current]
    for x in iter_:
        current = (alpha * x) + (1 - alpha) * current
        mave += [current]

    if False:
        pt.figure(fnum=1, doclf=True)
        pt.plot(data)
        pt.plot(mave)

    np.where(np.array(mave) < 1e-1)

    import sympy as sym

    # span, alpha, n = sym.symbols('span, alpha, n')
    n = sym.symbols('n', integer=True, nonnegative=True, finite=True)
    span = sym.symbols('span', integer=True, nonnegative=True, finite=True)
    thresh = sym.symbols('thresh', real=True, nonnegative=True, finite=True)
    # alpha = 2 / (span + 1)

    a, b, c = sym.symbols('a, b, c', real=True, nonnegative=True, finite=True)
    sym.solve(sym.Eq(b**a, c), a)

    current = 1
    x = 0
    steps = []
    for _ in range(10):
        current = (alpha * x) + (1 - alpha) * current
        steps.append(current)

    alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True)
    base = sym.symbols('base', real=True, finite=True)
    alpha = 2 / (span + 1)
    thresh_expr = (1 - alpha)**n
    thresthresh_exprh_expr = base**n
    n_expr = sym.ceiling(sym.log(thresh) / sym.log(1 - 2 / (span + 1)))

    sym.pprint(sym.simplify(thresh_expr))
    sym.pprint(sym.simplify(n_expr))
    print(sym.latex(sym.simplify(n_expr)))

    # def calc_n2(span, thresh):
    #     return np.log(thresh) / np.log(1 - 2 / (span + 1))

    def calc_n(span, thresh):
        return np.log(thresh) / np.log((span - 1) / (span + 1))

    def calc_thresh_val(n, span):
        alpha = 2 / (span + 1)
        return (1 - alpha)**n

    span = np.arange(2, 200)
    n_frac = calc_n(span, thresh=.5)
    n = np.ceil(n_frac)
    calc_thresh_val(n, span)

    pt.figure(fnum=1, doclf=True)
    ydatas = ut.odict([('thresh=%f' % thresh,
                        np.ceil(calc_n(span, thresh=thresh)))
                       for thresh in [1e-3, .01, .1, .2, .3, .4, .5]])
    pt.multi_plot(
        span,
        ydatas,
        xlabel='span',
        ylabel='n iters to acheive thresh',
        marker='',
        # num_xticks=len(span),
        fnum=1)
    pt.gca().set_aspect('equal')

    def both_sides(eqn, func):
        return sym.Eq(func(eqn.lhs), func(eqn.rhs))

    eqn = sym.Eq(thresh_expr, thresh)
    n_expr = sym.solve(eqn,
                       n)[0].subs(base,
                                  (1 - alpha)).subs(alpha, (2 / (span + 1)))

    eqn = both_sides(eqn, lambda x: sym.log(x, (1 - alpha)))
    lhs = eqn.lhs

    from sympy.solvers.inequalities import solve_univariate_inequality

    def eval_expr(span_value, n_value):
        return np.array(
            [thresh_expr.subs(span, span_value).subs(n, n_) for n_ in n_value],
            dtype=np.float)

    eval_expr(20, np.arange(20))

    def linear(x, a, b):
        return a * x + b

    def sigmoidal_4pl(x, a, b, c, d):
        return d + (a - d) / (1 + (x / c)**b)

    def exponential(x, a, b, c):
        return a + b * np.exp(-c * x)

    import scipy.optimize

    # Determine how to choose span, such that you get to .01 from 1
    # in n timesteps
    thresh_to_span_to_n = []
    thresh_to_n_to_span = []
    for thresh_value in ub.ProgIter([.0001, .001, .01, .1, .2, .3, .4, .5]):
        print('')
        test_vals = sorted([2, 3, 4, 5, 6])
        n_to_span = []
        for n_value in ub.ProgIter(test_vals):
            # In n iterations I want to choose a span that the expression go
            # less than a threshold
            constraint = thresh_expr.subs(n, n_value) < thresh_value
            solution = solve_univariate_inequality(constraint, span)
            try:
                lowbound = np.ceil(float(solution.args[0].lhs))
                highbound = np.floor(float(solution.args[1].rhs))
                assert lowbound <= highbound
                span_value = lowbound
            except AttributeError:
                span_value = np.floor(float(solution.rhs))
            n_to_span.append((n_value, span_value))

        # Given a threshold, find a minimum number of steps
        # that brings you up to that threshold given a span
        test_vals = sorted(set(list(range(2, 1000, 50)) + [2, 3, 4, 5, 6]))
        span_to_n = []
        for span_value in ub.ProgIter(test_vals):
            constraint = thresh_expr.subs(span, span_value) < thresh_value
            solution = solve_univariate_inequality(constraint, n)
            n_value = solution.lhs
            span_to_n.append((span_value, n_value))

        thresh_to_n_to_span.append((thresh_value, n_to_span))
        thresh_to_span_to_n.append((thresh_value, span_to_n))

    thresh_to_params = []
    for thresh_value, span_to_n in thresh_to_span_to_n:
        xdata, ydata = [np.array(_, dtype=np.float) for _ in zip(*span_to_n)]

        p0 = (1 / np.diff((ydata - ydata[0])[1:]).mean(), ydata[0])
        func = linear
        popt, pcov = scipy.optimize.curve_fit(func, xdata, ydata, p0)
        # popt, pcov = scipy.optimize.curve_fit(exponential, xdata, ydata)

        if False:
            yhat = func(xdata, *popt)
            pt.figure(fnum=1, doclf=True)
            pt.plot(xdata, ydata, label='measured')
            pt.plot(xdata, yhat, label='predicteed')
            pt.legend()
        # slope = np.diff(ydata).mean()
        # pt.plot(d)
        thresh_to_params.append((thresh_value, popt))

    # pt.plt.plot(*zip(*thresh_to_slope), 'x-')

    # for thresh_value=.01, we get a rough line with slop ~2.302,
    # for thresh_value=.5, we get a line with slop ~34.66

    # if we want to get to 0 in n timesteps, with a thresh_value of
    # choose span=f(thresh_value) * (n + 2))
    # f is some inverse exponential

    # 0.0001, 460.551314197147
    # 0.001, 345.413485647860,
    # 0.01, 230.275657098573,
    # 0.1, 115.137828549287,
    # 0.2, 80.4778885203347,
    # 0.3, 60.2031233261536,
    # 0.4, 45.8179484913827,
    # 0.5, 34.6599400289520

    # Seems to be 4PL symetrical sigmoid
    # f(x) = -66500.85 + (66515.88 - -66500.85) / (1 + (x/0.8604672)^0.001503716)
    # f(x) = -66500.85 + (66515.88 - -66500.85)/(1 + (x/0.8604672)^0.001503716)

    def f(x):
        return -66500.85 + (66515.88 -
                            -66500.85) / (1 + (x / 0.8604672)**0.001503716)
        # return (10000 * (-6.65 + (13.3015) / (1 + (x/0.86) ** 0.00150)))

    # f(.5) * (n - 1)

    # f(
    solve_rational_inequalities(thresh_expr < .01, n)
Example #9
0
def iters_until_threshold():
    """
    How many iterations of ewma until you hit the poisson / biniomal threshold

    This establishes a principled way to choose the threshold for the refresh
    criterion in my thesis. There are paramters --- moving parts --- that we
    need to work with: `a` the patience, `s` the span, and `mu` our ewma.

    `s` is a span paramter indicating how far we look back.

    `mu` is the average number of label-changing reviews in roughly the last
    `s` manual decisions.

    These numbers are used to estimate the probability that any of the next `a`
    manual decisions will be label-chanigng. When that probability falls below
    a threshold we terminate. The goal is to choose `a`, `s`, and the threshold
    `t`, such that the probability will fall below the threshold after a maximum
    of `a` consecutive non-label-chaning reviews. IE we want to tie the patience
    paramter (how far we look ahead) to how far we actually are willing to go.
    """
    import numpy as np
    import utool as ut
    import sympy as sym
    i = sym.symbols('i', integer=True, nonnegative=True, finite=True)
    # mu_i = sym.symbols('mu_i', integer=True, nonnegative=True, finite=True)
    s = sym.symbols('s', integer=True, nonnegative=True, finite=True)  # NOQA
    thresh = sym.symbols('tau', real=True, nonnegative=True,
                         finite=True)  # NOQA
    alpha = sym.symbols('alpha', real=True, nonnegative=True,
                        finite=True)  # NOQA
    c_alpha = sym.symbols('c_alpha', real=True, nonnegative=True, finite=True)
    # patience
    a = sym.symbols('a', real=True, nonnegative=True, finite=True)

    available_subs = {
        a: 20,
        s: a,
        alpha: 2 / (s + 1),
        c_alpha: (1 - alpha),
    }

    def dosubs(expr, d=available_subs):
        """ recursive expression substitution """
        expr1 = expr.subs(d)
        if expr == expr1:
            return expr1
        else:
            return dosubs(expr1, d=d)

    # mu is either the support for the poisson distribution
    # or is is the p in the binomial distribution
    # It is updated at timestep i based on ewma, assuming each incoming responce is 0
    mu_0 = 1.0
    mu_i = c_alpha**i

    # Estimate probability that any event will happen in the next `a` reviews
    # at time `i`.
    poisson_i = 1 - sym.exp(-mu_i * a)
    binom_i = 1 - (1 - mu_i)**a

    # Expand probabilities to be a function of i, s, and a
    part = ut.delete_dict_keys(available_subs.copy(), [a, s])
    mu_i = dosubs(mu_i, d=part)
    poisson_i = dosubs(poisson_i, d=part)
    binom_i = dosubs(binom_i, d=part)

    if True:
        # ewma of mu at time i if review is always not label-changing (meaningful)
        mu_1 = c_alpha * mu_0  # NOQA
        mu_2 = c_alpha * mu_1  # NOQA

    if True:
        i_vals = np.arange(0, 100)
        mu_vals = np.array(
            [dosubs(mu_i).subs({
                i: i_
            }).evalf() for i_ in i_vals])  # NOQA
        binom_vals = np.array(
            [dosubs(binom_i).subs({
                i: i_
            }).evalf() for i_ in i_vals])  # NOQA
        poisson_vals = np.array(
            [dosubs(poisson_i).subs({
                i: i_
            }).evalf() for i_ in i_vals])  # NOQA

        # Find how many iters it actually takes my expt to terminate
        thesis_draft_thresh = np.exp(-2)
        np.where(mu_vals < thesis_draft_thresh)[0]
        np.where(binom_vals < thesis_draft_thresh)[0]
        np.where(poisson_vals < thesis_draft_thresh)[0]

    sym.pprint(sym.simplify(mu_i))
    sym.pprint(sym.simplify(binom_i))
    sym.pprint(sym.simplify(poisson_i))

    # Find the thresholds that force termination after `a` reviews have passed
    # do this by setting i=a
    poisson_thresh = poisson_i.subs({i: a})
    binom_thresh = binom_i.subs({i: a})

    print('Poisson thresh')
    print(sym.latex(sym.Eq(thresh, poisson_thresh)))
    print(sym.latex(sym.Eq(thresh, sym.simplify(poisson_thresh))))

    poisson_thresh.subs({a: 115, s: 30}).evalf()

    sym.pprint(sym.Eq(thresh, poisson_thresh))
    sym.pprint(sym.Eq(thresh, sym.simplify(poisson_thresh)))

    print('Binomial thresh')
    sym.pprint(sym.simplify(binom_thresh))

    sym.pprint(sym.simplify(poisson_thresh.subs({s: a})))

    def taud(coeff):
        return coeff * 360

    if 'poisson_cache' not in vars():
        poisson_cache = {}
        binom_cache = {}

    S, A = np.meshgrid(np.arange(1, 150, 1), np.arange(0, 150, 1))

    import plottool as pt
    SA_coords = list(zip(S.ravel(), A.ravel()))
    for sval, aval in ut.ProgIter(SA_coords):
        if (sval, aval) not in poisson_cache:
            poisson_cache[(sval, aval)] = float(
                poisson_thresh.subs({
                    a: aval,
                    s: sval
                }).evalf())
    poisson_zdata = np.array([
        poisson_cache[(sval, aval)] for sval, aval in SA_coords
    ]).reshape(A.shape)
    fig = pt.figure(fnum=1, doclf=True)
    pt.gca().set_axis_off()
    pt.plot_surface3d(S,
                      A,
                      poisson_zdata,
                      xlabel='s',
                      ylabel='a',
                      rstride=3,
                      cstride=3,
                      zlabel='poisson',
                      mode='wire',
                      contour=True,
                      title='poisson3d')
    pt.gca().set_zlim(0, 1)
    pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8))
    fig.set_size_inches(10, 6)
    fig.savefig('a-s-t-poisson3d.png',
                dpi=300,
                bbox_inches=pt.extract_axes_extents(fig, combine=True))

    for sval, aval in ut.ProgIter(SA_coords):
        if (sval, aval) not in binom_cache:
            binom_cache[(sval, aval)] = float(
                binom_thresh.subs({
                    a: aval,
                    s: sval
                }).evalf())
    binom_zdata = np.array([
        binom_cache[(sval, aval)] for sval, aval in SA_coords
    ]).reshape(A.shape)
    fig = pt.figure(fnum=2, doclf=True)
    pt.gca().set_axis_off()
    pt.plot_surface3d(S,
                      A,
                      binom_zdata,
                      xlabel='s',
                      ylabel='a',
                      rstride=3,
                      cstride=3,
                      zlabel='binom',
                      mode='wire',
                      contour=True,
                      title='binom3d')
    pt.gca().set_zlim(0, 1)
    pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8))
    fig.set_size_inches(10, 6)
    fig.savefig('a-s-t-binom3d.png',
                dpi=300,
                bbox_inches=pt.extract_axes_extents(fig, combine=True))

    # Find point on the surface that achieves a reasonable threshold

    # Sympy can't solve this
    # sym.solve(sym.Eq(binom_thresh.subs({s: 50}), .05))
    # sym.solve(sym.Eq(poisson_thresh.subs({s: 50}), .05))
    # Find a numerical solution
    def solve_numeric(expr,
                      target,
                      solve_for,
                      fixed={},
                      method=None,
                      bounds=None):
        """
        Args:
            expr (Expr): symbolic expression
            target (float): numberic value
            solve_for (sympy.Symbol): The symbol you care about
            fixed (dict): fixed values of the symbol

        solve_numeric(poisson_thresh, .05, {s: 30}, method=None)
        solve_numeric(poisson_thresh, .05, {s: 30}, method='Nelder-Mead')
        solve_numeric(poisson_thresh, .05, {s: 30}, method='BFGS')
        """
        import scipy.optimize
        # Find the symbol you want to solve for
        want_symbols = expr.free_symbols - set(fixed.keys())
        # TODO: can probably extend this to multiple params
        assert len(want_symbols) == 1, 'specify all but one var'
        assert solve_for == list(want_symbols)[0]
        fixed_expr = expr.subs(fixed)

        def func(a1):
            expr_value = float(fixed_expr.subs({solve_for: a1}).evalf())
            return (expr_value - target)**2

        if not fixed:
            a1 = 0
        else:
            a1 = list(fixed.values())[0]
        # if method is None:
        #     method = 'Nelder-Mead'
        #     method = 'Newton-CG'
        #     method = 'BFGS'
        result = scipy.optimize.minimize(func,
                                         x0=a1,
                                         method=method,
                                         bounds=bounds)
        if not result.success:
            print('\n')
            print(result)
            print('\n')
        return result

    # Numeric measurments of thie line

    thresh_vals = [.001, .01, .05, .1, .135]
    svals = np.arange(1, 100)

    target_poisson_plots = {}
    for target in ut.ProgIter(thresh_vals, bs=False, freq=1):
        poisson_avals = []
        for sval in ut.ProgIter(svals, 'poisson', freq=1):
            expr = poisson_thresh
            fixed = {s: sval}
            want = a
            aval = solve_numeric(expr,
                                 target,
                                 want,
                                 fixed,
                                 method='Nelder-Mead').x[0]
            poisson_avals.append(aval)
        target_poisson_plots[target] = (svals, poisson_avals)

    fig = pt.figure(fnum=3)
    for target, dat in target_poisson_plots.items():
        pt.plt.plot(*dat, label='prob={}'.format(target))
    pt.gca().set_xlabel('s')
    pt.gca().set_ylabel('a')
    pt.legend()
    pt.gca().set_title('poisson')
    fig.set_size_inches(5, 3)
    fig.savefig('a-vs-s-poisson.png',
                dpi=300,
                bbox_inches=pt.extract_axes_extents(fig, combine=True))

    target_binom_plots = {}
    for target in ut.ProgIter(thresh_vals, bs=False, freq=1):
        binom_avals = []
        for sval in ut.ProgIter(svals, 'binom', freq=1):
            aval = solve_numeric(binom_thresh,
                                 target,
                                 a, {
                                     s: sval
                                 },
                                 method='Nelder-Mead').x[0]
            binom_avals.append(aval)
        target_binom_plots[target] = (svals, binom_avals)

    fig = pt.figure(fnum=4)
    for target, dat in target_binom_plots.items():
        pt.plt.plot(*dat, label='prob={}'.format(target))
    pt.gca().set_xlabel('s')
    pt.gca().set_ylabel('a')
    pt.legend()
    pt.gca().set_title('binom')
    fig.set_size_inches(5, 3)
    fig.savefig('a-vs-s-binom.png',
                dpi=300,
                bbox_inches=pt.extract_axes_extents(fig, combine=True))

    # ----
    if True:

        fig = pt.figure(fnum=5, doclf=True)
        s_vals = [1, 2, 3, 10, 20, 30, 40, 50]
        for sval in s_vals:
            pp = poisson_thresh.subs({s: sval})

            a_vals = np.arange(0, 200)
            pp_vals = np.array(
                [float(pp.subs({
                    a: aval
                }).evalf()) for aval in a_vals])  # NOQA

            pt.plot(a_vals, pp_vals, label='s=%r' % (sval, ))
        pt.legend()
        pt.gca().set_xlabel('a')
        pt.gca().set_ylabel('poisson prob after a reviews')
        fig.set_size_inches(5, 3)
        fig.savefig('a-vs-thresh-poisson.png',
                    dpi=300,
                    bbox_inches=pt.extract_axes_extents(fig, combine=True))

        fig = pt.figure(fnum=6, doclf=True)
        s_vals = [1, 2, 3, 10, 20, 30, 40, 50]
        for sval in s_vals:
            pp = binom_thresh.subs({s: sval})
            a_vals = np.arange(0, 200)
            pp_vals = np.array(
                [float(pp.subs({
                    a: aval
                }).evalf()) for aval in a_vals])  # NOQA
            pt.plot(a_vals, pp_vals, label='s=%r' % (sval, ))
        pt.legend()
        pt.gca().set_xlabel('a')
        pt.gca().set_ylabel('binom prob after a reviews')
        fig.set_size_inches(5, 3)
        fig.savefig('a-vs-thresh-binom.png',
                    dpi=300,
                    bbox_inches=pt.extract_axes_extents(fig, combine=True))

        # -------

        fig = pt.figure(fnum=5, doclf=True)
        a_vals = [1, 2, 3, 10, 20, 30, 40, 50]
        for aval in a_vals:
            pp = poisson_thresh.subs({a: aval})
            s_vals = np.arange(1, 200)
            pp_vals = np.array(
                [float(pp.subs({
                    s: sval
                }).evalf()) for sval in s_vals])  # NOQA
            pt.plot(s_vals, pp_vals, label='a=%r' % (aval, ))
        pt.legend()
        pt.gca().set_xlabel('s')
        pt.gca().set_ylabel('poisson prob')
        fig.set_size_inches(5, 3)
        fig.savefig('s-vs-thresh-poisson.png',
                    dpi=300,
                    bbox_inches=pt.extract_axes_extents(fig, combine=True))

        fig = pt.figure(fnum=5, doclf=True)
        a_vals = [1, 2, 3, 10, 20, 30, 40, 50]
        for aval in a_vals:
            pp = binom_thresh.subs({a: aval})
            s_vals = np.arange(1, 200)
            pp_vals = np.array(
                [float(pp.subs({
                    s: sval
                }).evalf()) for sval in s_vals])  # NOQA
            pt.plot(s_vals, pp_vals, label='a=%r' % (aval, ))
        pt.legend()
        pt.gca().set_xlabel('s')
        pt.gca().set_ylabel('binom prob')
        fig.set_size_inches(5, 3)
        fig.savefig('s-vs-thresh-binom.png',
                    dpi=300,
                    bbox_inches=pt.extract_axes_extents(fig, combine=True))

    #---------------------
    # Plot out a table

    mu_i.subs({s: 75, a: 75}).evalf()
    poisson_thresh.subs({s: 75, a: 75}).evalf()

    sval = 50
    for target, dat in target_poisson_plots.items():
        slope = np.median(np.diff(dat[1]))
        aval = int(np.ceil(sval * slope))
        thresh = float(poisson_thresh.subs({s: sval, a: aval}).evalf())
        print('aval={}, sval={}, thresh={}, target={}'.format(
            aval, sval, thresh, target))

    for target, dat in target_binom_plots.items():
        slope = np.median(np.diff(dat[1]))
        aval = int(np.ceil(sval * slope))
        pass
Example #10
0
def theano_gradient_funtimes():
    import theano
    import numpy as np
    import theano.tensor as T
    import lasagne
    import ibeis_cnn.theano_ext as theano_ext

    TEST = True

    x_data = np.linspace(-10, 10, 100).astype(np.float32)[:, None, None, None]
    y_data = (x_data**2).flatten()[:, None]

    X = T.tensor4('x')
    y = T.matrix('y')

    #x_data_batch =
    #y_data_batch =
    inputs_to_value = {X: x_data[0:16], y: y_data[0:16]}

    l_in = lasagne.layers.InputLayer((16, 1, 1, 1))
    l_out = lasagne.layers.DenseLayer(
        l_in,
        num_units=1,
        nonlinearity=lasagne.nonlinearities.rectify,
        W=lasagne.init.Orthogonal())

    network_output = lasagne.layers.get_output(l_out, X)

    # TEST NETWORK OUTPUT

    if TEST:
        result = theano_ext.eval_symbol(network_output, inputs_to_value)
        print('network_output = %r' % (result, ))

    loss_function = lasagne.objectives.squared_error
    #def loss_function(network_output, labels):
    #    return (network_output - labels) ** 2

    losses = loss_function(network_output, y)
    if TEST:
        result = theano_ext.eval_symbol(losses, inputs_to_value)
        print('losses = %r' % (result, ))

    loss = lasagne.objectives.aggregate(losses, mode='mean')

    if TEST:
        result = theano_ext.eval_symbol(loss, inputs_to_value)
        print('loss = %r' % (result, ))

    L2 = lasagne.regularization.regularize_network_params(
        l_out, lasagne.regularization.l2)
    weight_decay = .0001
    loss_regularized = loss + weight_decay * L2
    loss_regularized.name = 'loss_regularized'

    parameters = lasagne.layers.get_all_params(l_out)

    gradients_regularized = theano.grad(loss_regularized,
                                        parameters,
                                        add_names=True)

    if TEST:
        if False:
            s = T.sum(1 / (1 + T.exp(-X)))
            s.name = 's'
            gs = T.grad(s, X, add_names=True)
            theano.pp(gs)
            inputs_to_value = {X: x_data[0:16], y: y_data[0:16]}
            result = theano_ext.eval_symbol(gs, inputs_to_value)
            print('%s = %r' % (
                gs.name,
                result,
            ))
            inputs_to_value = {X: x_data[16:32], y: y_data[16:32]}
            result = theano_ext.eval_symbol(gs, inputs_to_value)
            print('%s = %r' % (
                gs.name,
                result,
            ))

        for grad in gradients_regularized:
            result = theano_ext.eval_symbol(grad, inputs_to_value)
            print('%s = %r' % (
                grad.name,
                result,
            ))

        grad_on_losses = theano.grad(losses, parameters, add_names=True)

    learning_rate_theano = .0001
    momentum = .9
    updates = lasagne.updates.nesterov_momentum(gradients_regularized,
                                                parameters,
                                                learning_rate_theano, momentum)

    X_batch = T.tensor4('x_batch')
    y_batch = T.fvector('y_batch')

    func = theano.function(
        inputs=[theano.Param(X_batch),
                theano.Param(y_batch)],
        outputs=[network_output, losses],
        #updates=updates,
        givens={
            X: X_batch,
            y: y_batch,
        },
    )

    y_predict_batch, loss_batch = func(inputs_to_value[X], inputs_to_value[y])

    if ut.inIPython():
        import IPython
        IPython.get_ipython().magic('pylab qt4')

    import plottool as pt
    pt.plot(x_data, y_predict)
    pt.iup()
    pass
Example #11
0
def draw_roc_curve(
    fpr,
    tpr,
    fnum=None,
    pnum=None,
    marker="-",
    target_tpr=None,
    target_fpr=None,
    thresholds=None,
    color=None,
    show_operating_point=False,
):
    r"""
    Args:
        fpr (?):
        tpr (?):
        fnum (int):  figure number(default = None)
        pnum (tuple):  plot number(default = None)
        marker (str): (default = '-x')
        target_tpr (None): (default = None)
        target_fpr (None): (default = None)
        thresholds (None): (default = None)
        color (None): (default = None)
        show_operating_point (bool): (default = False)

    CommandLine:
        python -m vtool.confusion --exec-draw_roc_curve --show --lightbg

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.confusion import *  # NOQA
        >>> scores, labels = testdata_scores_labels()
        >>> confusions = get_confusion_metrics(scores, labels)
        >>> fpr = confusions.fpr
        >>> tpr = confusions.tpr
        >>> thresholds = confusions.thresholds
        >>> fnum = None
        >>> pnum = None
        >>> marker = '-x'
        >>> target_tpr = .85
        >>> target_fpr = None
        >>> color = None
        >>> show_operating_point = True
        >>> draw_roc_curve(fpr, tpr, fnum, pnum, marker, target_tpr, target_fpr,
        >>>   thresholds, color, show_operating_point)
        >>> ut.show_if_requested()
    """
    import plottool as pt

    if fnum is None:
        fnum = pt.next_fnum()

    if color is None:
        color = (0.4, 1.0, 0.4) if pt.is_default_dark_bg() else (0.1, 0.4, 0.4)

    roc_auc = sklearn.metrics.auc(fpr, tpr)

    title_suffix = ""

    if target_fpr is not None:
        # func = scipy.interpolate.interp1d(fpr, tpr, kind='linear', assume_sorted=False)
        # func = scipy.interpolate.interp1d(xdata, ydata, kind='nearest', assume_sorted=False)
        # interp_vals[interp_mask] = func(pt[interp_mask])
        target_fpr = np.clip(target_fpr, 0, 1)
        interp_tpr = interpolate_replbounds(fpr, tpr, target_fpr)
        choice_tpr = interp_tpr
        choice_fpr = target_fpr
    elif target_tpr is not None:
        target_tpr = np.clip(target_tpr, 0, 1)
        interp_fpr = interpolate_replbounds(tpr, fpr, target_tpr)
        choice_tpr = target_tpr
        choice_fpr = interp_fpr
    else:
        choice_tpr = None
        choice_fpr = None

    if choice_fpr is not None:
        choice_thresh = 0
        if thresholds is not None:
            try:
                index = np.nonzero(tpr >= choice_tpr)[0][0]
            except IndexError:
                index = len(thresholds) - 1
            choice_thresh = thresholds[index]
        # percent = ut.scalar_str(choice_tpr * 100).split('.')[0]
        # title_suffix = ', FPR%s=%05.2f%%' % (percent, choice_fpr)
        title_suffix = ""
        if show_operating_point:
            title_suffix = ", fpr=%.2f, tpr=%.2f, thresh=%.2f" % (choice_fpr, choice_tpr, choice_thresh)
    else:
        title_suffix = ""

    # if recall_domain is None:
    #    ave_p = np.nan
    # else:
    #    ave_p = p_interp.sum() / p_interp.size
    title = "Receiver operating characteristic\n" + "AUC=%.3f" % (roc_auc,)
    title += title_suffix

    pt.plot2(
        fpr,
        tpr,
        marker=marker,
        x_label="False Positive Rate",
        y_label="True Positive Rate",
        unitbox=True,
        flipx=False,
        color=color,
        fnum=fnum,
        pnum=pnum,
        title=title,
    )

    if False:
        # Interp does not work right because of duplicate values
        # in xdomain
        line_ = np.linspace(0.11, 0.9, 20)
        # np.append([np.inf], np.diff(fpr)) > 0
        # np.append([np.inf], np.diff(tpr)) > 0
        unique_tpr_idxs = np.nonzero(np.append([np.inf], np.diff(tpr)) > 0)[0]
        unique_fpr_idxs = np.nonzero(np.append([np.inf], np.diff(fpr)) > 0)[0]

        pt.plt.plot(line_, interpolate_replbounds(fpr[unique_fpr_idxs], tpr[unique_fpr_idxs], line_), "b-x")
        pt.plt.plot(interpolate_replbounds(tpr[unique_tpr_idxs], fpr[unique_tpr_idxs], line_), line_, "r-x")
    if choice_fpr is not None:
        pt.plot(choice_fpr, choice_tpr, "o", color=pt.PINK)
Example #12
0
def draw_roc_curve(fpr,
                   tpr,
                   fnum=None,
                   pnum=None,
                   marker='-',
                   target_tpr=None,
                   target_fpr=None,
                   thresholds=None,
                   color=None,
                   show_operating_point=False):
    r"""
    Args:
        fpr (?):
        tpr (?):
        fnum (int):  figure number(default = None)
        pnum (tuple):  plot number(default = None)
        marker (str): (default = '-x')
        target_tpr (None): (default = None)
        target_fpr (None): (default = None)
        thresholds (None): (default = None)
        color (None): (default = None)
        show_operating_point (bool): (default = False)

    CommandLine:
        python -m vtool.confusion --exec-draw_roc_curve --show --lightbg

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.confusion import *  # NOQA
        >>> scores, labels = testdata_scores_labels()
        >>> confusions = get_confusion_metrics(scores, labels)
        >>> fpr = confusions.fpr
        >>> tpr = confusions.tpr
        >>> thresholds = confusions.thresholds
        >>> fnum = None
        >>> pnum = None
        >>> marker = '-x'
        >>> target_tpr = .85
        >>> target_fpr = None
        >>> color = None
        >>> show_operating_point = True
        >>> draw_roc_curve(fpr, tpr, fnum, pnum, marker, target_tpr, target_fpr,
        >>>   thresholds, color, show_operating_point)
        >>> ut.show_if_requested()
    """
    import plottool as pt
    if fnum is None:
        fnum = pt.next_fnum()

    if color is None:
        color = (0.4, 1.0, 0.4) if pt.is_default_dark_bg() else (0.1, 0.4, 0.4)

    roc_auc = sklearn.metrics.auc(fpr, tpr)

    title_suffix = ''

    if target_fpr is not None:
        #func = scipy.interpolate.interp1d(fpr, tpr, kind='linear', assume_sorted=False)
        #func = scipy.interpolate.interp1d(xdata, ydata, kind='nearest', assume_sorted=False)
        #interp_vals[interp_mask] = func(pt[interp_mask])
        target_fpr = np.clip(target_fpr, 0, 1)
        interp_tpr = interpolate_replbounds(fpr, tpr, target_fpr)
        choice_tpr = interp_tpr
        choice_fpr = target_fpr
    elif target_tpr is not None:
        target_tpr = np.clip(target_tpr, 0, 1)
        interp_fpr = interpolate_replbounds(tpr, fpr, target_tpr)
        choice_tpr = target_tpr
        choice_fpr = interp_fpr
    else:
        choice_tpr = None
        choice_fpr = None

    if choice_fpr is not None:
        choice_thresh = 0
        if thresholds is not None:
            try:
                index = np.nonzero(tpr >= choice_tpr)[0][0]
            except IndexError:
                index = len(thresholds) - 1
            choice_thresh = thresholds[index]
        #percent = ut.scalar_str(choice_tpr * 100).split('.')[0]
        #title_suffix = ', FPR%s=%05.2f%%' % (percent, choice_fpr)
        title_suffix = ''
        if show_operating_point:
            title_suffix = ', fpr=%.2f, tpr=%.2f, thresh=%.2f' % (
                choice_fpr, choice_tpr, choice_thresh)
    else:
        title_suffix = ''

    #if recall_domain is None:
    #    ave_p = np.nan
    #else:
    #    ave_p = p_interp.sum() / p_interp.size
    title = 'Receiver operating characteristic\n' + 'AUC=%.3f' % (roc_auc, )
    title += title_suffix

    pt.plot2(fpr,
             tpr,
             marker=marker,
             x_label='False Positive Rate',
             y_label='True Positive Rate',
             unitbox=True,
             flipx=False,
             color=color,
             fnum=fnum,
             pnum=pnum,
             title=title)

    if False:
        # Interp does not work right because of duplicate values
        # in xdomain
        line_ = np.linspace(.11, .9, 20)
        #np.append([np.inf], np.diff(fpr)) > 0
        #np.append([np.inf], np.diff(tpr)) > 0
        unique_tpr_idxs = np.nonzero(np.append([np.inf], np.diff(tpr)) > 0)[0]
        unique_fpr_idxs = np.nonzero(np.append([np.inf], np.diff(fpr)) > 0)[0]

        pt.plt.plot(
            line_,
            interpolate_replbounds(fpr[unique_fpr_idxs], tpr[unique_fpr_idxs],
                                   line_), 'b-x')
        pt.plt.plot(
            interpolate_replbounds(tpr[unique_tpr_idxs], fpr[unique_tpr_idxs],
                                   line_), line_, 'r-x')
    if choice_fpr is not None:
        pt.plot(choice_fpr, choice_tpr, 'o', color=pt.PINK)
Example #13
0
def find_location_disparate_splits(ibs):
    """
    CommandLine:
        python -m ibeis.other.ibsfuncs --test-find_location_disparate_splits

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.other.ibsfuncs import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('NNP_Master3')
        >>> # execute function
        >>> offending_nids = find_location_disparate_splits(ibs)
        >>> # verify results
        >>> print('offending_nids = %r' % (offending_nids,))

    """
    import scipy.spatial.distance as spdist
    import functools
    #aid_list_count = ibs.get_valid_aids()
    aid_list_count = ibs.filter_aids_count()
    nid_list, gps_track_list, aid_track_list = ibs.get_name_gps_tracks(
        aid_list=aid_list_count)

    # Filter to only multitons
    has_multiple_list = [len(gps_track) > 1 for gps_track in gps_track_list]
    gps_track_list_ = ut.list_compress(gps_track_list, has_multiple_list)
    aid_track_list_ = ut.list_compress(aid_track_list, has_multiple_list)
    nid_list_ = ut.list_compress(nid_list, has_multiple_list)

    # Other properties
    unixtime_track_list_ = ibs.unflat_map(
        ibs.get_annot_image_unixtimes_asfloat, aid_track_list_)

    # Move into arrays
    gpsarr_track_list_ = list(map(np.array, gps_track_list_))
    unixtimearr_track_list_ = [
        np.array(unixtimes)[:, None] for unixtimes in unixtime_track_list_
    ]

    def unixtime_hourdiff(x, y):
        return np.abs(np.subtract(x, y)) / (60**2)

    haversin_pdist = functools.partial(spdist.pdist, metric=ut.haversine)
    unixtime_pdist = functools.partial(spdist.pdist, metric=unixtime_hourdiff)
    # Get distances
    gpsdist_vector_list = list(map(haversin_pdist, gpsarr_track_list_))
    hourdist_vector_list = list(map(unixtime_pdist, unixtimearr_track_list_))

    # Get the speed in kilometers per hour for each animal
    speed_vector_list = [
        gpsdist / hourdist
        for gpsdist, hourdist in zip(gpsdist_vector_list, hourdist_vector_list)
    ]

    #maxhourdist_list = np.array([hourdist_vector.max() for hourdist_vector in hourdist_vector_list])
    maxgpsdist_list = np.array(
        [gpsdist_vector.max() for gpsdist_vector in gpsdist_vector_list])
    maxspeed_list = np.array(
        [speed_vector.max() for speed_vector in speed_vector_list])
    sortx = maxspeed_list.argsort()
    sorted_maxspeed_list = maxspeed_list[sortx]
    #sorted_nid_list = np.array(ut.list_take(nid_list_, sortx))

    if False:
        import plottool as pt
        pt.plot(sorted_maxspeed_list)
        allgpsdist_list = np.array(ut.flatten(gpsdist_vector_list))
        alltimedist_list = np.array(ut.flatten(hourdist_vector_list))

        pt.figure(fnum1=1, doclf=True, docla=True)
        alltime_sortx = alltimedist_list.argsort()
        pt.plot(allgpsdist_list[alltime_sortx])
        pt.plot(alltimedist_list[alltime_sortx])
        pt.iup()

        pt.figure(fnum1=2, doclf=True, docla=True)
        allgps_sortx = allgpsdist_list.argsort()
        pt.plot(allgpsdist_list[allgps_sortx])
        pt.plot(alltimedist_list[allgps_sortx])
        pt.iup()

        #maxgps_sortx = maxgpsdist_list.argsort()
        #pt.plot(maxgpsdist_list[maxgps_sortx])
        pt.iup()

    maxgps_sortx = maxgpsdist_list.argsort()
    gpsdist_thresh = 15
    sorted_maxgps_list = maxgpsdist_list[maxgps_sortx]
    offending_sortx = maxgps_sortx.compress(
        sorted_maxgps_list > gpsdist_thresh)

    speed_thresh_kph = 6  # kilometers per hour
    offending_sortx = sortx.compress(sorted_maxspeed_list > speed_thresh_kph)
    #sorted_isoffending = sorted_maxspeed_list > speed_thresh_kph
    #offending_nids = sorted_nid_list.compress(sorted_isoffending)
    offending_nids = ut.list_take(nid_list_, offending_sortx)
    #offending_speeds = ut.list_take(maxspeed_list, offending_sortx)
    print('offending_nids = %r' % (offending_nids, ))

    for index in offending_sortx:
        print('\n\n--- Offender index=%d ---' % (index, ))
        # Inspect a specific index
        aids = aid_track_list_[index]
        nid = nid_list_[index]
        assert np.all(np.array(ibs.get_annot_name_rowids(aids)) == nid)

        aid1_list, aid2_list = zip(*list(ut.product(aids, aids)))
        annotmatch_rowid_list = ibs.get_annotmatch_rowid_from_superkey(
            aid1_list, aid2_list)
        annotmatch_truth_list = ibs.get_annotmatch_truth(annotmatch_rowid_list)
        annotmatch_truth_list = ut.replace_nones(annotmatch_truth_list, -1)
        truth_mat = np.array(annotmatch_truth_list).reshape(
            (len(aids), len(aids)))

        contrib_rowids = ibs.get_image_contributor_rowid(
            ibs.get_annot_gids(aids))
        contrib_tags = ibs.get_contributor_tag(contrib_rowids)

        print('nid = %r' % (nid, ))
        print('maxspeed = %.2f km/h' % (maxspeed_list[index], ))
        print('aids = %r' % (aids, ))
        print('gpss = %s' % (ut.list_str(gps_track_list_[index]), ))
        print('contribs = %s' % (ut.list_str(contrib_tags), ))

        print('speedist_mat = \n' + ut.numpy_str(
            spdist.squareform(speed_vector_list[index]), precision=2))
        truth_mat_str = ut.numpy_str(truth_mat, precision=2)
        truth_mat_str = truth_mat_str.replace('-1', ' _')

        print('truth_mat = \n' + truth_mat_str)
        print('gpsdist_mat  = \n' + ut.numpy_str(
            spdist.squareform(gpsdist_vector_list[index]), precision=2))
        print('hourdist_mat = \n' + ut.numpy_str(
            spdist.squareform(hourdist_vector_list[index]), precision=2))

    return offending_nids
Example #14
0
def find_location_disparate_splits(ibs):
    """
    CommandLine:
        python -m ibeis.ibsfuncs --test-find_location_disparate_splits

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.ibsfuncs import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('NNP_Master3')
        >>> # execute function
        >>> offending_nids = find_location_disparate_splits(ibs)
        >>> # verify results
        >>> print('offending_nids = %r' % (offending_nids,))

    """
    import scipy.spatial.distance as spdist
    import functools
    #aid_list_count = ibs.get_valid_aids()
    aid_list_count = ibs.filter_aids_count()
    nid_list, gps_track_list, aid_track_list = ibs.get_name_gps_tracks(aid_list=aid_list_count)

    # Filter to only multitons
    has_multiple_list = [len(gps_track) > 1 for gps_track in gps_track_list]
    gps_track_list_ = ut.list_compress(gps_track_list, has_multiple_list)
    aid_track_list_ = ut.list_compress(aid_track_list, has_multiple_list)
    nid_list_ = ut.list_compress(nid_list, has_multiple_list)

    # Other properties
    unixtime_track_list_ = ibs.unflat_map(ibs.get_annot_image_unixtimes_asfloat, aid_track_list_)

    # Move into arrays
    gpsarr_track_list_ = list(map(np.array, gps_track_list_))
    unixtimearr_track_list_ = [np.array(unixtimes)[:, None] for unixtimes in unixtime_track_list_]

    def unixtime_hourdiff(x, y):
        return np.abs(np.subtract(x, y)) / (60 ** 2)

    haversin_pdist = functools.partial(spdist.pdist, metric=ut.haversine)
    unixtime_pdist = functools.partial(spdist.pdist, metric=unixtime_hourdiff)
    # Get distances
    gpsdist_vector_list = list(map(haversin_pdist, gpsarr_track_list_))
    hourdist_vector_list = list(map(unixtime_pdist, unixtimearr_track_list_))

    # Get the speed in kilometers per hour for each animal
    speed_vector_list = [gpsdist / hourdist for gpsdist, hourdist in
                         zip(gpsdist_vector_list, hourdist_vector_list)]

    #maxhourdist_list = np.array([hourdist_vector.max() for hourdist_vector in hourdist_vector_list])
    maxgpsdist_list  = np.array([gpsdist_vector.max() for gpsdist_vector in gpsdist_vector_list])
    maxspeed_list = np.array([speed_vector.max() for speed_vector in speed_vector_list])
    sortx  = maxspeed_list.argsort()
    sorted_maxspeed_list = maxspeed_list[sortx]
    #sorted_nid_list = np.array(ut.list_take(nid_list_, sortx))

    if False:
        import plottool as pt
        pt.plot(sorted_maxspeed_list)
        allgpsdist_list = np.array(ut.flatten(gpsdist_vector_list))
        alltimedist_list = np.array(ut.flatten(hourdist_vector_list))

        pt.figure(fnum1=1, doclf=True, docla=True)
        alltime_sortx = alltimedist_list.argsort()
        pt.plot(allgpsdist_list[alltime_sortx])
        pt.plot(alltimedist_list[alltime_sortx])
        pt.iup()

        pt.figure(fnum1=2, doclf=True, docla=True)
        allgps_sortx = allgpsdist_list.argsort()
        pt.plot(allgpsdist_list[allgps_sortx])
        pt.plot(alltimedist_list[allgps_sortx])
        pt.iup()

        #maxgps_sortx = maxgpsdist_list.argsort()
        #pt.plot(maxgpsdist_list[maxgps_sortx])
        pt.iup()

    maxgps_sortx = maxgpsdist_list.argsort()
    gpsdist_thresh = 15
    sorted_maxgps_list = maxgpsdist_list[maxgps_sortx]
    offending_sortx = maxgps_sortx.compress(sorted_maxgps_list > gpsdist_thresh)

    speed_thresh_kph = 6  # kilometers per hour
    offending_sortx = sortx.compress(sorted_maxspeed_list > speed_thresh_kph)
    #sorted_isoffending = sorted_maxspeed_list > speed_thresh_kph
    #offending_nids = sorted_nid_list.compress(sorted_isoffending)
    offending_nids = ut.list_take(nid_list_, offending_sortx)
    #offending_speeds = ut.list_take(maxspeed_list, offending_sortx)
    print('offending_nids = %r' % (offending_nids,))

    for index in offending_sortx:
        print('\n\n--- Offender index=%d ---' % (index,))
        # Inspect a specific index
        aids = aid_track_list_[index]
        nid = nid_list_[index]
        assert np.all(np.array(ibs.get_annot_name_rowids(aids)) == nid)

        aid1_list, aid2_list = zip(*list(ut.product(aids, aids)))
        annotmatch_rowid_list = ibs.get_annotmatch_rowid_from_superkey(aid1_list, aid2_list)
        annotmatch_truth_list = ibs.get_annotmatch_truth(annotmatch_rowid_list)
        annotmatch_truth_list = ut.replace_nones(annotmatch_truth_list, -1)
        truth_mat = np.array(annotmatch_truth_list).reshape((len(aids), len(aids)))

        contrib_rowids = ibs.get_image_contributor_rowid(ibs.get_annot_gids(aids))
        contrib_tags = ibs.get_contributor_tag(contrib_rowids)

        print('nid = %r' % (nid,))
        print('maxspeed = %.2f km/h' % (maxspeed_list[index],))
        print('aids = %r' % (aids,))
        print('gpss = %s' % (ut.list_str(gps_track_list_[index]),))
        print('contribs = %s' % (ut.list_str(contrib_tags),))

        print('speedist_mat = \n' + ut.numpy_str(spdist.squareform(speed_vector_list[index]), precision=2))
        truth_mat_str = ut.numpy_str(truth_mat, precision=2)
        truth_mat_str = truth_mat_str.replace('-1' , ' _')

        print('truth_mat = \n' + truth_mat_str)
        print('gpsdist_mat  = \n' + ut.numpy_str(spdist.squareform(gpsdist_vector_list[index]), precision=2))
        print('hourdist_mat = \n' + ut.numpy_str(spdist.squareform(hourdist_vector_list[index]), precision=2))

    return offending_nids
Example #15
0
def shadowform_probability():
    """ its hearthstone, but whatev

    probability of
    raza + no shadowform on turn 5 +

    probability of
    raza + shadowform on turn 5 +

    probability of
    kazakus turn 4, raza turn 5, + no shadowform

    """
    from scipy.stats import hypergeom

    def p_badstuff_shadowform(turn=5, hand_size=3):
        deck_size = 30
        num_shadowform = 2

        def prob_nohave_card_never_mulled(copies=2, hand_size=3):
            deck_size = 30
            prb = hypergeom(deck_size, copies, hand_size)
            # P(initial_miss)
            p_none_premul = prb.cdf(0)

            # GIVEN that we mul our first 3 what is prob we still are unlucky
            # P(miss_turn0 | initial_miss)
            prb = hypergeom(deck_size - hand_size, copies, hand_size)
            p_none_in_mul = prb.cdf(0)
            # TODO: add constraints about 2 drops
            #  P(miss_turn0) = P(miss_turn0 | initial_miss) *  P(initial_miss)
            p_none_at_start = p_none_in_mul * p_none_premul
            return p_none_at_start

        def prob_nohave_card_always_mulled(copies=2, hand_size=3):
            # probability of getting the card initially
            p_none_premul = hypergeom(deck_size, copies, hand_size).cdf(0)
            # probability of getting the card if everything is thrown away
            # (TODO: factor in the probability that you need to keep something)
            # for now its fine because if we keep shadowform the end calculation is fine
            p_nohave_postmul_given_nohave = hypergeom(deck_size - hand_size, copies, hand_size).cdf(0)
            # not necessary, but it shows the theory
            p_nohave_postmul_given_had = 1
            p_nohave_turn0 = (
                p_nohave_postmul_given_nohave * p_none_premul + (1 - p_none_premul) * p_nohave_postmul_given_had
            )
            return p_nohave_turn0

        def prob_nohave_by_turn(p_none_turn0, turn, copies, hand_size):
            # P(miss_turnN | miss_mul)
            p_none_turnN_given_mulmis = hypergeom(deck_size - hand_size, copies, turn).cdf(0)
            # P(miss_turnN) = P(miss_turnN | miss_mul) P(miss_mul)
            p_none_turnN = p_none_turnN_given_mulmis * p_none_turn0
            return p_none_turnN

        p_no_shadowform_on_turn0 = prob_nohave_card_never_mulled(copies=num_shadowform, hand_size=hand_size)
        no_shadowform_turnN = prob_nohave_by_turn(p_no_shadowform_on_turn0, turn, num_shadowform, hand_size)

        # Assume you always mul raza
        p_noraza_initial = prob_nohave_card_always_mulled(copies=1, hand_size=hand_size)
        p_noraza_turnN = prob_nohave_by_turn(p_noraza_initial, turn, copies=1, hand_size=hand_size)
        p_raza_turnN = 1 - p_noraza_turnN

        # probability that you have raza and no shadowform by turn 5
        p_raza_and_noshadowform_turnN = p_raza_turnN * no_shadowform_turnN
        return p_raza_and_noshadowform_turnN

    import plottool as pt  # NOQA

    turns = list(range(0, 26))
    probs = [p_badstuff_shadowform(turn, hand_size=3) for turn in turns]
    pt.plot(turns, probs, label="on play")
    probs = [p_badstuff_shadowform(turn, hand_size=4) for turn in turns]
    pt.plot(turns, probs, label="with coin")
    pt.set_xlabel("turn")
    pt.set_ylabel("probability")
    pt.set_title("Probability of Having Raza without a Shadowform")
    pt.legend()
    pt.gca().set_ylim(0, 1)