def test_mcc(): import plottool as pt import sklearn.metrics num = 100 xdata = np.linspace(0, 1, num * 2) ydata = np.linspace(1, -1, num * 2) pt.plt.plot(xdata, ydata, '--k', label='linear') y_true = [1] * num + [0] * num y_pred = y_true[:] xs = [] for i in range(0, len(y_true)): y_pred[-i] = 1 - y_pred[-i] xs.append(sklearn.metrics.matthews_corrcoef(y_true, y_pred)) pt.plot(xdata, xs, label='change one class at a time') y_true = ut.flatten(zip([1] * num, [0] * num)) y_pred = y_true[:] xs = [] for i in range(0, len(y_true)): y_pred[-i] = 1 - y_pred[-i] xs.append(sklearn.metrics.matthews_corrcoef(y_true, y_pred)) pt.plot(xdata, xs, label='change classes evenly') pt.gca().legend()
def choose_thresh(infr): #prob_annots /= prob_annots.sum(axis=1)[:, None] # Find connected components #thresh = .25 #thresh = 1 / (1.2 * np.sqrt(prob_names.shape[1])) unique_nids, prob_names = infr.make_prob_names() if len(unique_nids) <= 2: return .5 nscores = np.sort(prob_names.flatten()) # x = np.gradient(nscores).argmax() # x = (np.gradient(np.gradient(nscores)) ** 2).argmax() # thresh = nscores[x] curve = nscores idx1 = vt.find_elbow_point(curve) idx2 = vt.find_elbow_point(curve[idx1:]) + idx1 if False: import plottool as pt idx3 = vt.find_elbow_point(curve[idx1:idx2 + 1]) + idx1 pt.plot(curve) pt.plot(idx1, curve[idx1], 'bo') pt.plot(idx2, curve[idx2], 'ro') pt.plot(idx3, curve[idx3], 'go') thresh = nscores[idx2] #print('thresh = %r' % (thresh,)) #thresh = .999 #thresh = .1 return thresh
def gridsearch_ratio_thresh(matches): import sklearn import sklearn.metrics import vtool as vt # Param search for vsone import plottool as pt pt.qt4ensure() skf = sklearn.model_selection.StratifiedKFold(n_splits=10, random_state=119372) y = np.array([m.annot1['nid'] == m.annot2['nid'] for m in matches]) basis = {'ratio_thresh': np.linspace(.6, .7, 50).tolist()} grid = ut.all_dict_combinations(basis) xdata = np.array(ut.take_column(grid, 'ratio_thresh')) def _ratio_thresh(y_true, match_list): # Try and find optional ratio threshold auc_list = [] for cfgdict in ut.ProgIter(grid, lbl='gridsearch'): y_score = [ match.fs.compress(match.ratio_test_flags(cfgdict)).sum() for match in match_list ] auc = sklearn.metrics.roc_auc_score(y_true, y_score) auc_list.append(auc) auc_list = np.array(auc_list) return auc_list auc_list = _ratio_thresh(y, matches) pt.plot(xdata, auc_list) subx, suby = vt.argsubmaxima(auc_list, xdata) best_ratio_thresh = subx[suby.argmax()] skf_results = [] y_true = y for train_idx, test_idx in skf.split(matches, y): match_list_ = ut.take(matches, train_idx) y_true = y.take(train_idx) auc_list = _ratio_thresh(y_true, match_list_) subx, suby = vt.argsubmaxima(auc_list, xdata, maxima_thresh=.8) best_ratio_thresh = subx[suby.argmax()] skf_results.append(best_ratio_thresh) print('skf_results.append = %r' % (np.mean(skf_results), )) import utool utool.embed()
def test_average_contrast(): import vtool as vt ut.get_valid_test_imgkeys() img_fpath_list = [ut.grab_test_imgpath(key) for key in ut.get_valid_test_imgkeys()] img_list = [vt.imread(img, grayscale=True) for img in img_fpath_list] avecontrast_list = np.array([compute_average_contrast(img) for img in img_list]) import plottool as pt nCols = len(img_list) fnum = None if fnum is None: fnum = pt.next_fnum() pt.figure(fnum=fnum, pnum=(2, 1, 1)) sortx = avecontrast_list.argsort() y_list = avecontrast_list[sortx] x_list = np.arange(0, nCols) + .5 pt.plot(x_list, y_list, 'bo-') sorted_imgs = ut.take(img_list, sortx) for px, img in ut.ProgressIter(enumerate(sorted_imgs, start=1)): pt.imshow(img, fnum=fnum, pnum=(2, nCols, nCols + px))
def compare_data(Y_list_): import ibeis qreq_ = ibeis.testdata_qreq_( defaultdb='Oxford', a='oxford', p='smk:nWords=[64000],nAssign=[1],SV=[False],can_match_sameimg=True,dim_size=None' ) qreq_.ensure_data() gamma1s = [] gamma2s = [] print(len(Y_list_)) print(len(qreq_.daids)) dinva = qreq_.dinva bady = [] for Y in Y_list_: aid = Y.aid gamma1 = Y.gamma if aid in dinva.aid_to_idx: idx = dinva.aid_to_idx[aid] gamma2 = dinva.gamma_list[idx] gamma1s.append(gamma1) gamma2s.append(gamma2) else: bady += [Y] print(Y.nid) # print(Y.qual) # ibs = qreq_.ibs # z = ibs.annots([a.aid for a in bady]) import plottool as pt ut.qtensure() gamma1s = np.array(gamma1s) gamma2s = np.array(gamma2s) sortx = gamma1s.argsort() pt.plot(gamma1s[sortx], label='script') pt.plot(gamma2s[sortx], label='pipe') pt.legend()
def test_average_contrast(): import vtool as vt ut.get_valid_test_imgkeys() img_fpath_list = [ ut.grab_test_imgpath(key) for key in ut.get_valid_test_imgkeys() ] img_list = [vt.imread(img, grayscale=True) for img in img_fpath_list] avecontrast_list = np.array( [compute_average_contrast(img) for img in img_list]) import plottool as pt nCols = len(img_list) fnum = None if fnum is None: fnum = pt.next_fnum() pt.figure(fnum=fnum, pnum=(2, 1, 1)) sortx = avecontrast_list.argsort() y_list = avecontrast_list[sortx] x_list = np.arange(0, nCols) + .5 pt.plot(x_list, y_list, 'bo-') sorted_imgs = ut.take(img_list, sortx) for px, img in ut.ProgressIter(enumerate(sorted_imgs, start=1)): pt.imshow(img, fnum=fnum, pnum=(2, nCols, nCols + px))
def ewma(): import plottool as pt import ubelt as ub import numpy as np pt.qtensure() # Investigate the span parameter span = 20 alpha = 2 / (span + 1) # how long does it take for the estimation to hit 0? # (ie, it no longer cares about the initial 1?) # about 93 iterations to get to 1e-4 # about 47 iterations to get to 1e-2 # about 24 iterations to get to 1e-1 # 20 iterations goes to .135 data = ([1] + [0] * 20 + [1] * 40 + [0] * 20 + [1] * 50 + [0] * 20 + [1] * 60 + [0] * 20 + [1] * 165 + [0] * 20 + [0]) mave = [] iter_ = iter(data) current = next(iter_) mave += [current] for x in iter_: current = (alpha * x) + (1 - alpha) * current mave += [current] if False: pt.figure(fnum=1, doclf=True) pt.plot(data) pt.plot(mave) np.where(np.array(mave) < 1e-1) import sympy as sym # span, alpha, n = sym.symbols('span, alpha, n') n = sym.symbols('n', integer=True, nonnegative=True, finite=True) span = sym.symbols('span', integer=True, nonnegative=True, finite=True) thresh = sym.symbols('thresh', real=True, nonnegative=True, finite=True) # alpha = 2 / (span + 1) a, b, c = sym.symbols('a, b, c', real=True, nonnegative=True, finite=True) sym.solve(sym.Eq(b**a, c), a) current = 1 x = 0 steps = [] for _ in range(10): current = (alpha * x) + (1 - alpha) * current steps.append(current) alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True) base = sym.symbols('base', real=True, finite=True) alpha = 2 / (span + 1) thresh_expr = (1 - alpha)**n thresthresh_exprh_expr = base**n n_expr = sym.ceiling(sym.log(thresh) / sym.log(1 - 2 / (span + 1))) sym.pprint(sym.simplify(thresh_expr)) sym.pprint(sym.simplify(n_expr)) print(sym.latex(sym.simplify(n_expr))) # def calc_n2(span, thresh): # return np.log(thresh) / np.log(1 - 2 / (span + 1)) def calc_n(span, thresh): return np.log(thresh) / np.log((span - 1) / (span + 1)) def calc_thresh_val(n, span): alpha = 2 / (span + 1) return (1 - alpha)**n span = np.arange(2, 200) n_frac = calc_n(span, thresh=.5) n = np.ceil(n_frac) calc_thresh_val(n, span) pt.figure(fnum=1, doclf=True) ydatas = ut.odict([('thresh=%f' % thresh, np.ceil(calc_n(span, thresh=thresh))) for thresh in [1e-3, .01, .1, .2, .3, .4, .5]]) pt.multi_plot( span, ydatas, xlabel='span', ylabel='n iters to acheive thresh', marker='', # num_xticks=len(span), fnum=1) pt.gca().set_aspect('equal') def both_sides(eqn, func): return sym.Eq(func(eqn.lhs), func(eqn.rhs)) eqn = sym.Eq(thresh_expr, thresh) n_expr = sym.solve(eqn, n)[0].subs(base, (1 - alpha)).subs(alpha, (2 / (span + 1))) eqn = both_sides(eqn, lambda x: sym.log(x, (1 - alpha))) lhs = eqn.lhs from sympy.solvers.inequalities import solve_univariate_inequality def eval_expr(span_value, n_value): return np.array( [thresh_expr.subs(span, span_value).subs(n, n_) for n_ in n_value], dtype=np.float) eval_expr(20, np.arange(20)) def linear(x, a, b): return a * x + b def sigmoidal_4pl(x, a, b, c, d): return d + (a - d) / (1 + (x / c)**b) def exponential(x, a, b, c): return a + b * np.exp(-c * x) import scipy.optimize # Determine how to choose span, such that you get to .01 from 1 # in n timesteps thresh_to_span_to_n = [] thresh_to_n_to_span = [] for thresh_value in ub.ProgIter([.0001, .001, .01, .1, .2, .3, .4, .5]): print('') test_vals = sorted([2, 3, 4, 5, 6]) n_to_span = [] for n_value in ub.ProgIter(test_vals): # In n iterations I want to choose a span that the expression go # less than a threshold constraint = thresh_expr.subs(n, n_value) < thresh_value solution = solve_univariate_inequality(constraint, span) try: lowbound = np.ceil(float(solution.args[0].lhs)) highbound = np.floor(float(solution.args[1].rhs)) assert lowbound <= highbound span_value = lowbound except AttributeError: span_value = np.floor(float(solution.rhs)) n_to_span.append((n_value, span_value)) # Given a threshold, find a minimum number of steps # that brings you up to that threshold given a span test_vals = sorted(set(list(range(2, 1000, 50)) + [2, 3, 4, 5, 6])) span_to_n = [] for span_value in ub.ProgIter(test_vals): constraint = thresh_expr.subs(span, span_value) < thresh_value solution = solve_univariate_inequality(constraint, n) n_value = solution.lhs span_to_n.append((span_value, n_value)) thresh_to_n_to_span.append((thresh_value, n_to_span)) thresh_to_span_to_n.append((thresh_value, span_to_n)) thresh_to_params = [] for thresh_value, span_to_n in thresh_to_span_to_n: xdata, ydata = [np.array(_, dtype=np.float) for _ in zip(*span_to_n)] p0 = (1 / np.diff((ydata - ydata[0])[1:]).mean(), ydata[0]) func = linear popt, pcov = scipy.optimize.curve_fit(func, xdata, ydata, p0) # popt, pcov = scipy.optimize.curve_fit(exponential, xdata, ydata) if False: yhat = func(xdata, *popt) pt.figure(fnum=1, doclf=True) pt.plot(xdata, ydata, label='measured') pt.plot(xdata, yhat, label='predicteed') pt.legend() # slope = np.diff(ydata).mean() # pt.plot(d) thresh_to_params.append((thresh_value, popt)) # pt.plt.plot(*zip(*thresh_to_slope), 'x-') # for thresh_value=.01, we get a rough line with slop ~2.302, # for thresh_value=.5, we get a line with slop ~34.66 # if we want to get to 0 in n timesteps, with a thresh_value of # choose span=f(thresh_value) * (n + 2)) # f is some inverse exponential # 0.0001, 460.551314197147 # 0.001, 345.413485647860, # 0.01, 230.275657098573, # 0.1, 115.137828549287, # 0.2, 80.4778885203347, # 0.3, 60.2031233261536, # 0.4, 45.8179484913827, # 0.5, 34.6599400289520 # Seems to be 4PL symetrical sigmoid # f(x) = -66500.85 + (66515.88 - -66500.85) / (1 + (x/0.8604672)^0.001503716) # f(x) = -66500.85 + (66515.88 - -66500.85)/(1 + (x/0.8604672)^0.001503716) def f(x): return -66500.85 + (66515.88 - -66500.85) / (1 + (x / 0.8604672)**0.001503716) # return (10000 * (-6.65 + (13.3015) / (1 + (x/0.86) ** 0.00150))) # f(.5) * (n - 1) # f( solve_rational_inequalities(thresh_expr < .01, n)
def iters_until_threshold(): """ How many iterations of ewma until you hit the poisson / biniomal threshold This establishes a principled way to choose the threshold for the refresh criterion in my thesis. There are paramters --- moving parts --- that we need to work with: `a` the patience, `s` the span, and `mu` our ewma. `s` is a span paramter indicating how far we look back. `mu` is the average number of label-changing reviews in roughly the last `s` manual decisions. These numbers are used to estimate the probability that any of the next `a` manual decisions will be label-chanigng. When that probability falls below a threshold we terminate. The goal is to choose `a`, `s`, and the threshold `t`, such that the probability will fall below the threshold after a maximum of `a` consecutive non-label-chaning reviews. IE we want to tie the patience paramter (how far we look ahead) to how far we actually are willing to go. """ import numpy as np import utool as ut import sympy as sym i = sym.symbols('i', integer=True, nonnegative=True, finite=True) # mu_i = sym.symbols('mu_i', integer=True, nonnegative=True, finite=True) s = sym.symbols('s', integer=True, nonnegative=True, finite=True) # NOQA thresh = sym.symbols('tau', real=True, nonnegative=True, finite=True) # NOQA alpha = sym.symbols('alpha', real=True, nonnegative=True, finite=True) # NOQA c_alpha = sym.symbols('c_alpha', real=True, nonnegative=True, finite=True) # patience a = sym.symbols('a', real=True, nonnegative=True, finite=True) available_subs = { a: 20, s: a, alpha: 2 / (s + 1), c_alpha: (1 - alpha), } def dosubs(expr, d=available_subs): """ recursive expression substitution """ expr1 = expr.subs(d) if expr == expr1: return expr1 else: return dosubs(expr1, d=d) # mu is either the support for the poisson distribution # or is is the p in the binomial distribution # It is updated at timestep i based on ewma, assuming each incoming responce is 0 mu_0 = 1.0 mu_i = c_alpha**i # Estimate probability that any event will happen in the next `a` reviews # at time `i`. poisson_i = 1 - sym.exp(-mu_i * a) binom_i = 1 - (1 - mu_i)**a # Expand probabilities to be a function of i, s, and a part = ut.delete_dict_keys(available_subs.copy(), [a, s]) mu_i = dosubs(mu_i, d=part) poisson_i = dosubs(poisson_i, d=part) binom_i = dosubs(binom_i, d=part) if True: # ewma of mu at time i if review is always not label-changing (meaningful) mu_1 = c_alpha * mu_0 # NOQA mu_2 = c_alpha * mu_1 # NOQA if True: i_vals = np.arange(0, 100) mu_vals = np.array( [dosubs(mu_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA binom_vals = np.array( [dosubs(binom_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA poisson_vals = np.array( [dosubs(poisson_i).subs({ i: i_ }).evalf() for i_ in i_vals]) # NOQA # Find how many iters it actually takes my expt to terminate thesis_draft_thresh = np.exp(-2) np.where(mu_vals < thesis_draft_thresh)[0] np.where(binom_vals < thesis_draft_thresh)[0] np.where(poisson_vals < thesis_draft_thresh)[0] sym.pprint(sym.simplify(mu_i)) sym.pprint(sym.simplify(binom_i)) sym.pprint(sym.simplify(poisson_i)) # Find the thresholds that force termination after `a` reviews have passed # do this by setting i=a poisson_thresh = poisson_i.subs({i: a}) binom_thresh = binom_i.subs({i: a}) print('Poisson thresh') print(sym.latex(sym.Eq(thresh, poisson_thresh))) print(sym.latex(sym.Eq(thresh, sym.simplify(poisson_thresh)))) poisson_thresh.subs({a: 115, s: 30}).evalf() sym.pprint(sym.Eq(thresh, poisson_thresh)) sym.pprint(sym.Eq(thresh, sym.simplify(poisson_thresh))) print('Binomial thresh') sym.pprint(sym.simplify(binom_thresh)) sym.pprint(sym.simplify(poisson_thresh.subs({s: a}))) def taud(coeff): return coeff * 360 if 'poisson_cache' not in vars(): poisson_cache = {} binom_cache = {} S, A = np.meshgrid(np.arange(1, 150, 1), np.arange(0, 150, 1)) import plottool as pt SA_coords = list(zip(S.ravel(), A.ravel())) for sval, aval in ut.ProgIter(SA_coords): if (sval, aval) not in poisson_cache: poisson_cache[(sval, aval)] = float( poisson_thresh.subs({ a: aval, s: sval }).evalf()) poisson_zdata = np.array([ poisson_cache[(sval, aval)] for sval, aval in SA_coords ]).reshape(A.shape) fig = pt.figure(fnum=1, doclf=True) pt.gca().set_axis_off() pt.plot_surface3d(S, A, poisson_zdata, xlabel='s', ylabel='a', rstride=3, cstride=3, zlabel='poisson', mode='wire', contour=True, title='poisson3d') pt.gca().set_zlim(0, 1) pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8)) fig.set_size_inches(10, 6) fig.savefig('a-s-t-poisson3d.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) for sval, aval in ut.ProgIter(SA_coords): if (sval, aval) not in binom_cache: binom_cache[(sval, aval)] = float( binom_thresh.subs({ a: aval, s: sval }).evalf()) binom_zdata = np.array([ binom_cache[(sval, aval)] for sval, aval in SA_coords ]).reshape(A.shape) fig = pt.figure(fnum=2, doclf=True) pt.gca().set_axis_off() pt.plot_surface3d(S, A, binom_zdata, xlabel='s', ylabel='a', rstride=3, cstride=3, zlabel='binom', mode='wire', contour=True, title='binom3d') pt.gca().set_zlim(0, 1) pt.gca().view_init(elev=taud(1 / 16), azim=taud(5 / 8)) fig.set_size_inches(10, 6) fig.savefig('a-s-t-binom3d.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # Find point on the surface that achieves a reasonable threshold # Sympy can't solve this # sym.solve(sym.Eq(binom_thresh.subs({s: 50}), .05)) # sym.solve(sym.Eq(poisson_thresh.subs({s: 50}), .05)) # Find a numerical solution def solve_numeric(expr, target, solve_for, fixed={}, method=None, bounds=None): """ Args: expr (Expr): symbolic expression target (float): numberic value solve_for (sympy.Symbol): The symbol you care about fixed (dict): fixed values of the symbol solve_numeric(poisson_thresh, .05, {s: 30}, method=None) solve_numeric(poisson_thresh, .05, {s: 30}, method='Nelder-Mead') solve_numeric(poisson_thresh, .05, {s: 30}, method='BFGS') """ import scipy.optimize # Find the symbol you want to solve for want_symbols = expr.free_symbols - set(fixed.keys()) # TODO: can probably extend this to multiple params assert len(want_symbols) == 1, 'specify all but one var' assert solve_for == list(want_symbols)[0] fixed_expr = expr.subs(fixed) def func(a1): expr_value = float(fixed_expr.subs({solve_for: a1}).evalf()) return (expr_value - target)**2 if not fixed: a1 = 0 else: a1 = list(fixed.values())[0] # if method is None: # method = 'Nelder-Mead' # method = 'Newton-CG' # method = 'BFGS' result = scipy.optimize.minimize(func, x0=a1, method=method, bounds=bounds) if not result.success: print('\n') print(result) print('\n') return result # Numeric measurments of thie line thresh_vals = [.001, .01, .05, .1, .135] svals = np.arange(1, 100) target_poisson_plots = {} for target in ut.ProgIter(thresh_vals, bs=False, freq=1): poisson_avals = [] for sval in ut.ProgIter(svals, 'poisson', freq=1): expr = poisson_thresh fixed = {s: sval} want = a aval = solve_numeric(expr, target, want, fixed, method='Nelder-Mead').x[0] poisson_avals.append(aval) target_poisson_plots[target] = (svals, poisson_avals) fig = pt.figure(fnum=3) for target, dat in target_poisson_plots.items(): pt.plt.plot(*dat, label='prob={}'.format(target)) pt.gca().set_xlabel('s') pt.gca().set_ylabel('a') pt.legend() pt.gca().set_title('poisson') fig.set_size_inches(5, 3) fig.savefig('a-vs-s-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) target_binom_plots = {} for target in ut.ProgIter(thresh_vals, bs=False, freq=1): binom_avals = [] for sval in ut.ProgIter(svals, 'binom', freq=1): aval = solve_numeric(binom_thresh, target, a, { s: sval }, method='Nelder-Mead').x[0] binom_avals.append(aval) target_binom_plots[target] = (svals, binom_avals) fig = pt.figure(fnum=4) for target, dat in target_binom_plots.items(): pt.plt.plot(*dat, label='prob={}'.format(target)) pt.gca().set_xlabel('s') pt.gca().set_ylabel('a') pt.legend() pt.gca().set_title('binom') fig.set_size_inches(5, 3) fig.savefig('a-vs-s-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # ---- if True: fig = pt.figure(fnum=5, doclf=True) s_vals = [1, 2, 3, 10, 20, 30, 40, 50] for sval in s_vals: pp = poisson_thresh.subs({s: sval}) a_vals = np.arange(0, 200) pp_vals = np.array( [float(pp.subs({ a: aval }).evalf()) for aval in a_vals]) # NOQA pt.plot(a_vals, pp_vals, label='s=%r' % (sval, )) pt.legend() pt.gca().set_xlabel('a') pt.gca().set_ylabel('poisson prob after a reviews') fig.set_size_inches(5, 3) fig.savefig('a-vs-thresh-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) fig = pt.figure(fnum=6, doclf=True) s_vals = [1, 2, 3, 10, 20, 30, 40, 50] for sval in s_vals: pp = binom_thresh.subs({s: sval}) a_vals = np.arange(0, 200) pp_vals = np.array( [float(pp.subs({ a: aval }).evalf()) for aval in a_vals]) # NOQA pt.plot(a_vals, pp_vals, label='s=%r' % (sval, )) pt.legend() pt.gca().set_xlabel('a') pt.gca().set_ylabel('binom prob after a reviews') fig.set_size_inches(5, 3) fig.savefig('a-vs-thresh-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) # ------- fig = pt.figure(fnum=5, doclf=True) a_vals = [1, 2, 3, 10, 20, 30, 40, 50] for aval in a_vals: pp = poisson_thresh.subs({a: aval}) s_vals = np.arange(1, 200) pp_vals = np.array( [float(pp.subs({ s: sval }).evalf()) for sval in s_vals]) # NOQA pt.plot(s_vals, pp_vals, label='a=%r' % (aval, )) pt.legend() pt.gca().set_xlabel('s') pt.gca().set_ylabel('poisson prob') fig.set_size_inches(5, 3) fig.savefig('s-vs-thresh-poisson.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) fig = pt.figure(fnum=5, doclf=True) a_vals = [1, 2, 3, 10, 20, 30, 40, 50] for aval in a_vals: pp = binom_thresh.subs({a: aval}) s_vals = np.arange(1, 200) pp_vals = np.array( [float(pp.subs({ s: sval }).evalf()) for sval in s_vals]) # NOQA pt.plot(s_vals, pp_vals, label='a=%r' % (aval, )) pt.legend() pt.gca().set_xlabel('s') pt.gca().set_ylabel('binom prob') fig.set_size_inches(5, 3) fig.savefig('s-vs-thresh-binom.png', dpi=300, bbox_inches=pt.extract_axes_extents(fig, combine=True)) #--------------------- # Plot out a table mu_i.subs({s: 75, a: 75}).evalf() poisson_thresh.subs({s: 75, a: 75}).evalf() sval = 50 for target, dat in target_poisson_plots.items(): slope = np.median(np.diff(dat[1])) aval = int(np.ceil(sval * slope)) thresh = float(poisson_thresh.subs({s: sval, a: aval}).evalf()) print('aval={}, sval={}, thresh={}, target={}'.format( aval, sval, thresh, target)) for target, dat in target_binom_plots.items(): slope = np.median(np.diff(dat[1])) aval = int(np.ceil(sval * slope)) pass
def theano_gradient_funtimes(): import theano import numpy as np import theano.tensor as T import lasagne import ibeis_cnn.theano_ext as theano_ext TEST = True x_data = np.linspace(-10, 10, 100).astype(np.float32)[:, None, None, None] y_data = (x_data**2).flatten()[:, None] X = T.tensor4('x') y = T.matrix('y') #x_data_batch = #y_data_batch = inputs_to_value = {X: x_data[0:16], y: y_data[0:16]} l_in = lasagne.layers.InputLayer((16, 1, 1, 1)) l_out = lasagne.layers.DenseLayer( l_in, num_units=1, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Orthogonal()) network_output = lasagne.layers.get_output(l_out, X) # TEST NETWORK OUTPUT if TEST: result = theano_ext.eval_symbol(network_output, inputs_to_value) print('network_output = %r' % (result, )) loss_function = lasagne.objectives.squared_error #def loss_function(network_output, labels): # return (network_output - labels) ** 2 losses = loss_function(network_output, y) if TEST: result = theano_ext.eval_symbol(losses, inputs_to_value) print('losses = %r' % (result, )) loss = lasagne.objectives.aggregate(losses, mode='mean') if TEST: result = theano_ext.eval_symbol(loss, inputs_to_value) print('loss = %r' % (result, )) L2 = lasagne.regularization.regularize_network_params( l_out, lasagne.regularization.l2) weight_decay = .0001 loss_regularized = loss + weight_decay * L2 loss_regularized.name = 'loss_regularized' parameters = lasagne.layers.get_all_params(l_out) gradients_regularized = theano.grad(loss_regularized, parameters, add_names=True) if TEST: if False: s = T.sum(1 / (1 + T.exp(-X))) s.name = 's' gs = T.grad(s, X, add_names=True) theano.pp(gs) inputs_to_value = {X: x_data[0:16], y: y_data[0:16]} result = theano_ext.eval_symbol(gs, inputs_to_value) print('%s = %r' % ( gs.name, result, )) inputs_to_value = {X: x_data[16:32], y: y_data[16:32]} result = theano_ext.eval_symbol(gs, inputs_to_value) print('%s = %r' % ( gs.name, result, )) for grad in gradients_regularized: result = theano_ext.eval_symbol(grad, inputs_to_value) print('%s = %r' % ( grad.name, result, )) grad_on_losses = theano.grad(losses, parameters, add_names=True) learning_rate_theano = .0001 momentum = .9 updates = lasagne.updates.nesterov_momentum(gradients_regularized, parameters, learning_rate_theano, momentum) X_batch = T.tensor4('x_batch') y_batch = T.fvector('y_batch') func = theano.function( inputs=[theano.Param(X_batch), theano.Param(y_batch)], outputs=[network_output, losses], #updates=updates, givens={ X: X_batch, y: y_batch, }, ) y_predict_batch, loss_batch = func(inputs_to_value[X], inputs_to_value[y]) if ut.inIPython(): import IPython IPython.get_ipython().magic('pylab qt4') import plottool as pt pt.plot(x_data, y_predict) pt.iup() pass
def draw_roc_curve( fpr, tpr, fnum=None, pnum=None, marker="-", target_tpr=None, target_fpr=None, thresholds=None, color=None, show_operating_point=False, ): r""" Args: fpr (?): tpr (?): fnum (int): figure number(default = None) pnum (tuple): plot number(default = None) marker (str): (default = '-x') target_tpr (None): (default = None) target_fpr (None): (default = None) thresholds (None): (default = None) color (None): (default = None) show_operating_point (bool): (default = False) CommandLine: python -m vtool.confusion --exec-draw_roc_curve --show --lightbg Example: >>> # DISABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> confusions = get_confusion_metrics(scores, labels) >>> fpr = confusions.fpr >>> tpr = confusions.tpr >>> thresholds = confusions.thresholds >>> fnum = None >>> pnum = None >>> marker = '-x' >>> target_tpr = .85 >>> target_fpr = None >>> color = None >>> show_operating_point = True >>> draw_roc_curve(fpr, tpr, fnum, pnum, marker, target_tpr, target_fpr, >>> thresholds, color, show_operating_point) >>> ut.show_if_requested() """ import plottool as pt if fnum is None: fnum = pt.next_fnum() if color is None: color = (0.4, 1.0, 0.4) if pt.is_default_dark_bg() else (0.1, 0.4, 0.4) roc_auc = sklearn.metrics.auc(fpr, tpr) title_suffix = "" if target_fpr is not None: # func = scipy.interpolate.interp1d(fpr, tpr, kind='linear', assume_sorted=False) # func = scipy.interpolate.interp1d(xdata, ydata, kind='nearest', assume_sorted=False) # interp_vals[interp_mask] = func(pt[interp_mask]) target_fpr = np.clip(target_fpr, 0, 1) interp_tpr = interpolate_replbounds(fpr, tpr, target_fpr) choice_tpr = interp_tpr choice_fpr = target_fpr elif target_tpr is not None: target_tpr = np.clip(target_tpr, 0, 1) interp_fpr = interpolate_replbounds(tpr, fpr, target_tpr) choice_tpr = target_tpr choice_fpr = interp_fpr else: choice_tpr = None choice_fpr = None if choice_fpr is not None: choice_thresh = 0 if thresholds is not None: try: index = np.nonzero(tpr >= choice_tpr)[0][0] except IndexError: index = len(thresholds) - 1 choice_thresh = thresholds[index] # percent = ut.scalar_str(choice_tpr * 100).split('.')[0] # title_suffix = ', FPR%s=%05.2f%%' % (percent, choice_fpr) title_suffix = "" if show_operating_point: title_suffix = ", fpr=%.2f, tpr=%.2f, thresh=%.2f" % (choice_fpr, choice_tpr, choice_thresh) else: title_suffix = "" # if recall_domain is None: # ave_p = np.nan # else: # ave_p = p_interp.sum() / p_interp.size title = "Receiver operating characteristic\n" + "AUC=%.3f" % (roc_auc,) title += title_suffix pt.plot2( fpr, tpr, marker=marker, x_label="False Positive Rate", y_label="True Positive Rate", unitbox=True, flipx=False, color=color, fnum=fnum, pnum=pnum, title=title, ) if False: # Interp does not work right because of duplicate values # in xdomain line_ = np.linspace(0.11, 0.9, 20) # np.append([np.inf], np.diff(fpr)) > 0 # np.append([np.inf], np.diff(tpr)) > 0 unique_tpr_idxs = np.nonzero(np.append([np.inf], np.diff(tpr)) > 0)[0] unique_fpr_idxs = np.nonzero(np.append([np.inf], np.diff(fpr)) > 0)[0] pt.plt.plot(line_, interpolate_replbounds(fpr[unique_fpr_idxs], tpr[unique_fpr_idxs], line_), "b-x") pt.plt.plot(interpolate_replbounds(tpr[unique_tpr_idxs], fpr[unique_tpr_idxs], line_), line_, "r-x") if choice_fpr is not None: pt.plot(choice_fpr, choice_tpr, "o", color=pt.PINK)
def draw_roc_curve(fpr, tpr, fnum=None, pnum=None, marker='-', target_tpr=None, target_fpr=None, thresholds=None, color=None, show_operating_point=False): r""" Args: fpr (?): tpr (?): fnum (int): figure number(default = None) pnum (tuple): plot number(default = None) marker (str): (default = '-x') target_tpr (None): (default = None) target_fpr (None): (default = None) thresholds (None): (default = None) color (None): (default = None) show_operating_point (bool): (default = False) CommandLine: python -m vtool.confusion --exec-draw_roc_curve --show --lightbg Example: >>> # DISABLE_DOCTEST >>> from vtool.confusion import * # NOQA >>> scores, labels = testdata_scores_labels() >>> confusions = get_confusion_metrics(scores, labels) >>> fpr = confusions.fpr >>> tpr = confusions.tpr >>> thresholds = confusions.thresholds >>> fnum = None >>> pnum = None >>> marker = '-x' >>> target_tpr = .85 >>> target_fpr = None >>> color = None >>> show_operating_point = True >>> draw_roc_curve(fpr, tpr, fnum, pnum, marker, target_tpr, target_fpr, >>> thresholds, color, show_operating_point) >>> ut.show_if_requested() """ import plottool as pt if fnum is None: fnum = pt.next_fnum() if color is None: color = (0.4, 1.0, 0.4) if pt.is_default_dark_bg() else (0.1, 0.4, 0.4) roc_auc = sklearn.metrics.auc(fpr, tpr) title_suffix = '' if target_fpr is not None: #func = scipy.interpolate.interp1d(fpr, tpr, kind='linear', assume_sorted=False) #func = scipy.interpolate.interp1d(xdata, ydata, kind='nearest', assume_sorted=False) #interp_vals[interp_mask] = func(pt[interp_mask]) target_fpr = np.clip(target_fpr, 0, 1) interp_tpr = interpolate_replbounds(fpr, tpr, target_fpr) choice_tpr = interp_tpr choice_fpr = target_fpr elif target_tpr is not None: target_tpr = np.clip(target_tpr, 0, 1) interp_fpr = interpolate_replbounds(tpr, fpr, target_tpr) choice_tpr = target_tpr choice_fpr = interp_fpr else: choice_tpr = None choice_fpr = None if choice_fpr is not None: choice_thresh = 0 if thresholds is not None: try: index = np.nonzero(tpr >= choice_tpr)[0][0] except IndexError: index = len(thresholds) - 1 choice_thresh = thresholds[index] #percent = ut.scalar_str(choice_tpr * 100).split('.')[0] #title_suffix = ', FPR%s=%05.2f%%' % (percent, choice_fpr) title_suffix = '' if show_operating_point: title_suffix = ', fpr=%.2f, tpr=%.2f, thresh=%.2f' % ( choice_fpr, choice_tpr, choice_thresh) else: title_suffix = '' #if recall_domain is None: # ave_p = np.nan #else: # ave_p = p_interp.sum() / p_interp.size title = 'Receiver operating characteristic\n' + 'AUC=%.3f' % (roc_auc, ) title += title_suffix pt.plot2(fpr, tpr, marker=marker, x_label='False Positive Rate', y_label='True Positive Rate', unitbox=True, flipx=False, color=color, fnum=fnum, pnum=pnum, title=title) if False: # Interp does not work right because of duplicate values # in xdomain line_ = np.linspace(.11, .9, 20) #np.append([np.inf], np.diff(fpr)) > 0 #np.append([np.inf], np.diff(tpr)) > 0 unique_tpr_idxs = np.nonzero(np.append([np.inf], np.diff(tpr)) > 0)[0] unique_fpr_idxs = np.nonzero(np.append([np.inf], np.diff(fpr)) > 0)[0] pt.plt.plot( line_, interpolate_replbounds(fpr[unique_fpr_idxs], tpr[unique_fpr_idxs], line_), 'b-x') pt.plt.plot( interpolate_replbounds(tpr[unique_tpr_idxs], fpr[unique_tpr_idxs], line_), line_, 'r-x') if choice_fpr is not None: pt.plot(choice_fpr, choice_tpr, 'o', color=pt.PINK)
def find_location_disparate_splits(ibs): """ CommandLine: python -m ibeis.other.ibsfuncs --test-find_location_disparate_splits Example: >>> # DISABLE_DOCTEST >>> from ibeis.other.ibsfuncs import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('NNP_Master3') >>> # execute function >>> offending_nids = find_location_disparate_splits(ibs) >>> # verify results >>> print('offending_nids = %r' % (offending_nids,)) """ import scipy.spatial.distance as spdist import functools #aid_list_count = ibs.get_valid_aids() aid_list_count = ibs.filter_aids_count() nid_list, gps_track_list, aid_track_list = ibs.get_name_gps_tracks( aid_list=aid_list_count) # Filter to only multitons has_multiple_list = [len(gps_track) > 1 for gps_track in gps_track_list] gps_track_list_ = ut.list_compress(gps_track_list, has_multiple_list) aid_track_list_ = ut.list_compress(aid_track_list, has_multiple_list) nid_list_ = ut.list_compress(nid_list, has_multiple_list) # Other properties unixtime_track_list_ = ibs.unflat_map( ibs.get_annot_image_unixtimes_asfloat, aid_track_list_) # Move into arrays gpsarr_track_list_ = list(map(np.array, gps_track_list_)) unixtimearr_track_list_ = [ np.array(unixtimes)[:, None] for unixtimes in unixtime_track_list_ ] def unixtime_hourdiff(x, y): return np.abs(np.subtract(x, y)) / (60**2) haversin_pdist = functools.partial(spdist.pdist, metric=ut.haversine) unixtime_pdist = functools.partial(spdist.pdist, metric=unixtime_hourdiff) # Get distances gpsdist_vector_list = list(map(haversin_pdist, gpsarr_track_list_)) hourdist_vector_list = list(map(unixtime_pdist, unixtimearr_track_list_)) # Get the speed in kilometers per hour for each animal speed_vector_list = [ gpsdist / hourdist for gpsdist, hourdist in zip(gpsdist_vector_list, hourdist_vector_list) ] #maxhourdist_list = np.array([hourdist_vector.max() for hourdist_vector in hourdist_vector_list]) maxgpsdist_list = np.array( [gpsdist_vector.max() for gpsdist_vector in gpsdist_vector_list]) maxspeed_list = np.array( [speed_vector.max() for speed_vector in speed_vector_list]) sortx = maxspeed_list.argsort() sorted_maxspeed_list = maxspeed_list[sortx] #sorted_nid_list = np.array(ut.list_take(nid_list_, sortx)) if False: import plottool as pt pt.plot(sorted_maxspeed_list) allgpsdist_list = np.array(ut.flatten(gpsdist_vector_list)) alltimedist_list = np.array(ut.flatten(hourdist_vector_list)) pt.figure(fnum1=1, doclf=True, docla=True) alltime_sortx = alltimedist_list.argsort() pt.plot(allgpsdist_list[alltime_sortx]) pt.plot(alltimedist_list[alltime_sortx]) pt.iup() pt.figure(fnum1=2, doclf=True, docla=True) allgps_sortx = allgpsdist_list.argsort() pt.plot(allgpsdist_list[allgps_sortx]) pt.plot(alltimedist_list[allgps_sortx]) pt.iup() #maxgps_sortx = maxgpsdist_list.argsort() #pt.plot(maxgpsdist_list[maxgps_sortx]) pt.iup() maxgps_sortx = maxgpsdist_list.argsort() gpsdist_thresh = 15 sorted_maxgps_list = maxgpsdist_list[maxgps_sortx] offending_sortx = maxgps_sortx.compress( sorted_maxgps_list > gpsdist_thresh) speed_thresh_kph = 6 # kilometers per hour offending_sortx = sortx.compress(sorted_maxspeed_list > speed_thresh_kph) #sorted_isoffending = sorted_maxspeed_list > speed_thresh_kph #offending_nids = sorted_nid_list.compress(sorted_isoffending) offending_nids = ut.list_take(nid_list_, offending_sortx) #offending_speeds = ut.list_take(maxspeed_list, offending_sortx) print('offending_nids = %r' % (offending_nids, )) for index in offending_sortx: print('\n\n--- Offender index=%d ---' % (index, )) # Inspect a specific index aids = aid_track_list_[index] nid = nid_list_[index] assert np.all(np.array(ibs.get_annot_name_rowids(aids)) == nid) aid1_list, aid2_list = zip(*list(ut.product(aids, aids))) annotmatch_rowid_list = ibs.get_annotmatch_rowid_from_superkey( aid1_list, aid2_list) annotmatch_truth_list = ibs.get_annotmatch_truth(annotmatch_rowid_list) annotmatch_truth_list = ut.replace_nones(annotmatch_truth_list, -1) truth_mat = np.array(annotmatch_truth_list).reshape( (len(aids), len(aids))) contrib_rowids = ibs.get_image_contributor_rowid( ibs.get_annot_gids(aids)) contrib_tags = ibs.get_contributor_tag(contrib_rowids) print('nid = %r' % (nid, )) print('maxspeed = %.2f km/h' % (maxspeed_list[index], )) print('aids = %r' % (aids, )) print('gpss = %s' % (ut.list_str(gps_track_list_[index]), )) print('contribs = %s' % (ut.list_str(contrib_tags), )) print('speedist_mat = \n' + ut.numpy_str( spdist.squareform(speed_vector_list[index]), precision=2)) truth_mat_str = ut.numpy_str(truth_mat, precision=2) truth_mat_str = truth_mat_str.replace('-1', ' _') print('truth_mat = \n' + truth_mat_str) print('gpsdist_mat = \n' + ut.numpy_str( spdist.squareform(gpsdist_vector_list[index]), precision=2)) print('hourdist_mat = \n' + ut.numpy_str( spdist.squareform(hourdist_vector_list[index]), precision=2)) return offending_nids
def find_location_disparate_splits(ibs): """ CommandLine: python -m ibeis.ibsfuncs --test-find_location_disparate_splits Example: >>> # DISABLE_DOCTEST >>> from ibeis.ibsfuncs import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('NNP_Master3') >>> # execute function >>> offending_nids = find_location_disparate_splits(ibs) >>> # verify results >>> print('offending_nids = %r' % (offending_nids,)) """ import scipy.spatial.distance as spdist import functools #aid_list_count = ibs.get_valid_aids() aid_list_count = ibs.filter_aids_count() nid_list, gps_track_list, aid_track_list = ibs.get_name_gps_tracks(aid_list=aid_list_count) # Filter to only multitons has_multiple_list = [len(gps_track) > 1 for gps_track in gps_track_list] gps_track_list_ = ut.list_compress(gps_track_list, has_multiple_list) aid_track_list_ = ut.list_compress(aid_track_list, has_multiple_list) nid_list_ = ut.list_compress(nid_list, has_multiple_list) # Other properties unixtime_track_list_ = ibs.unflat_map(ibs.get_annot_image_unixtimes_asfloat, aid_track_list_) # Move into arrays gpsarr_track_list_ = list(map(np.array, gps_track_list_)) unixtimearr_track_list_ = [np.array(unixtimes)[:, None] for unixtimes in unixtime_track_list_] def unixtime_hourdiff(x, y): return np.abs(np.subtract(x, y)) / (60 ** 2) haversin_pdist = functools.partial(spdist.pdist, metric=ut.haversine) unixtime_pdist = functools.partial(spdist.pdist, metric=unixtime_hourdiff) # Get distances gpsdist_vector_list = list(map(haversin_pdist, gpsarr_track_list_)) hourdist_vector_list = list(map(unixtime_pdist, unixtimearr_track_list_)) # Get the speed in kilometers per hour for each animal speed_vector_list = [gpsdist / hourdist for gpsdist, hourdist in zip(gpsdist_vector_list, hourdist_vector_list)] #maxhourdist_list = np.array([hourdist_vector.max() for hourdist_vector in hourdist_vector_list]) maxgpsdist_list = np.array([gpsdist_vector.max() for gpsdist_vector in gpsdist_vector_list]) maxspeed_list = np.array([speed_vector.max() for speed_vector in speed_vector_list]) sortx = maxspeed_list.argsort() sorted_maxspeed_list = maxspeed_list[sortx] #sorted_nid_list = np.array(ut.list_take(nid_list_, sortx)) if False: import plottool as pt pt.plot(sorted_maxspeed_list) allgpsdist_list = np.array(ut.flatten(gpsdist_vector_list)) alltimedist_list = np.array(ut.flatten(hourdist_vector_list)) pt.figure(fnum1=1, doclf=True, docla=True) alltime_sortx = alltimedist_list.argsort() pt.plot(allgpsdist_list[alltime_sortx]) pt.plot(alltimedist_list[alltime_sortx]) pt.iup() pt.figure(fnum1=2, doclf=True, docla=True) allgps_sortx = allgpsdist_list.argsort() pt.plot(allgpsdist_list[allgps_sortx]) pt.plot(alltimedist_list[allgps_sortx]) pt.iup() #maxgps_sortx = maxgpsdist_list.argsort() #pt.plot(maxgpsdist_list[maxgps_sortx]) pt.iup() maxgps_sortx = maxgpsdist_list.argsort() gpsdist_thresh = 15 sorted_maxgps_list = maxgpsdist_list[maxgps_sortx] offending_sortx = maxgps_sortx.compress(sorted_maxgps_list > gpsdist_thresh) speed_thresh_kph = 6 # kilometers per hour offending_sortx = sortx.compress(sorted_maxspeed_list > speed_thresh_kph) #sorted_isoffending = sorted_maxspeed_list > speed_thresh_kph #offending_nids = sorted_nid_list.compress(sorted_isoffending) offending_nids = ut.list_take(nid_list_, offending_sortx) #offending_speeds = ut.list_take(maxspeed_list, offending_sortx) print('offending_nids = %r' % (offending_nids,)) for index in offending_sortx: print('\n\n--- Offender index=%d ---' % (index,)) # Inspect a specific index aids = aid_track_list_[index] nid = nid_list_[index] assert np.all(np.array(ibs.get_annot_name_rowids(aids)) == nid) aid1_list, aid2_list = zip(*list(ut.product(aids, aids))) annotmatch_rowid_list = ibs.get_annotmatch_rowid_from_superkey(aid1_list, aid2_list) annotmatch_truth_list = ibs.get_annotmatch_truth(annotmatch_rowid_list) annotmatch_truth_list = ut.replace_nones(annotmatch_truth_list, -1) truth_mat = np.array(annotmatch_truth_list).reshape((len(aids), len(aids))) contrib_rowids = ibs.get_image_contributor_rowid(ibs.get_annot_gids(aids)) contrib_tags = ibs.get_contributor_tag(contrib_rowids) print('nid = %r' % (nid,)) print('maxspeed = %.2f km/h' % (maxspeed_list[index],)) print('aids = %r' % (aids,)) print('gpss = %s' % (ut.list_str(gps_track_list_[index]),)) print('contribs = %s' % (ut.list_str(contrib_tags),)) print('speedist_mat = \n' + ut.numpy_str(spdist.squareform(speed_vector_list[index]), precision=2)) truth_mat_str = ut.numpy_str(truth_mat, precision=2) truth_mat_str = truth_mat_str.replace('-1' , ' _') print('truth_mat = \n' + truth_mat_str) print('gpsdist_mat = \n' + ut.numpy_str(spdist.squareform(gpsdist_vector_list[index]), precision=2)) print('hourdist_mat = \n' + ut.numpy_str(spdist.squareform(hourdist_vector_list[index]), precision=2)) return offending_nids
def shadowform_probability(): """ its hearthstone, but whatev probability of raza + no shadowform on turn 5 + probability of raza + shadowform on turn 5 + probability of kazakus turn 4, raza turn 5, + no shadowform """ from scipy.stats import hypergeom def p_badstuff_shadowform(turn=5, hand_size=3): deck_size = 30 num_shadowform = 2 def prob_nohave_card_never_mulled(copies=2, hand_size=3): deck_size = 30 prb = hypergeom(deck_size, copies, hand_size) # P(initial_miss) p_none_premul = prb.cdf(0) # GIVEN that we mul our first 3 what is prob we still are unlucky # P(miss_turn0 | initial_miss) prb = hypergeom(deck_size - hand_size, copies, hand_size) p_none_in_mul = prb.cdf(0) # TODO: add constraints about 2 drops # P(miss_turn0) = P(miss_turn0 | initial_miss) * P(initial_miss) p_none_at_start = p_none_in_mul * p_none_premul return p_none_at_start def prob_nohave_card_always_mulled(copies=2, hand_size=3): # probability of getting the card initially p_none_premul = hypergeom(deck_size, copies, hand_size).cdf(0) # probability of getting the card if everything is thrown away # (TODO: factor in the probability that you need to keep something) # for now its fine because if we keep shadowform the end calculation is fine p_nohave_postmul_given_nohave = hypergeom(deck_size - hand_size, copies, hand_size).cdf(0) # not necessary, but it shows the theory p_nohave_postmul_given_had = 1 p_nohave_turn0 = ( p_nohave_postmul_given_nohave * p_none_premul + (1 - p_none_premul) * p_nohave_postmul_given_had ) return p_nohave_turn0 def prob_nohave_by_turn(p_none_turn0, turn, copies, hand_size): # P(miss_turnN | miss_mul) p_none_turnN_given_mulmis = hypergeom(deck_size - hand_size, copies, turn).cdf(0) # P(miss_turnN) = P(miss_turnN | miss_mul) P(miss_mul) p_none_turnN = p_none_turnN_given_mulmis * p_none_turn0 return p_none_turnN p_no_shadowform_on_turn0 = prob_nohave_card_never_mulled(copies=num_shadowform, hand_size=hand_size) no_shadowform_turnN = prob_nohave_by_turn(p_no_shadowform_on_turn0, turn, num_shadowform, hand_size) # Assume you always mul raza p_noraza_initial = prob_nohave_card_always_mulled(copies=1, hand_size=hand_size) p_noraza_turnN = prob_nohave_by_turn(p_noraza_initial, turn, copies=1, hand_size=hand_size) p_raza_turnN = 1 - p_noraza_turnN # probability that you have raza and no shadowform by turn 5 p_raza_and_noshadowform_turnN = p_raza_turnN * no_shadowform_turnN return p_raza_and_noshadowform_turnN import plottool as pt # NOQA turns = list(range(0, 26)) probs = [p_badstuff_shadowform(turn, hand_size=3) for turn in turns] pt.plot(turns, probs, label="on play") probs = [p_badstuff_shadowform(turn, hand_size=4) for turn in turns] pt.plot(turns, probs, label="with coin") pt.set_xlabel("turn") pt.set_ylabel("probability") pt.set_title("Probability of Having Raza without a Shadowform") pt.legend() pt.gca().set_ylim(0, 1)