def _test_gradient(example, grammar, m): for gamma in [1]: print 'gamma =', gamma print 'prune = %s' % progress(len(example.nodes)-sum(m[x] for x in example.nodes), len(example.nodes)) if gamma == 1: annealed_grammar = grammar else: annealed_grammar = grammar.anneal(gamma) __test_gradient(example, annealed_grammar, m*1, gamma)
def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, expect_label=None, got_label=None, verbose=1): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `expect`, `got`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `expect` and `got` to be column labels. - `name`: name of this comparison. Note: - when plotting `expect` is the y-axis, `got` is the x-axis. This is by convention that `expect` is the dependent variable (regression target). TODO: - Add an option to drop NaNs and continue comparison. - Indicate which dimensions have the largest errors. - Add option to allow alignment/matchings? """ self.name = name if isinstance(alphabet, Alphabet): alphabet = alphabet.tolist() if isinstance(expect, dict) and isinstance(got, dict): alphabet = list(expect.keys()) if alphabet is None else alphabet assert set(got.keys()) == set(alphabet), \ 'Keys differ.\n got keys = %s\n want keys = %s' % (set(got.keys()), set(alphabet)) expect = [expect[k] for k in alphabet] got = [got[k] for k in alphabet] if isinstance(expect, np.ndarray) and isinstance(got, np.ndarray): assert expect.shape == got.shape, [expect.shape, got.shape] expect = expect.flatten() got = got.flatten() if data is not None: assert isinstance(expect, (int, str)), \ 'expected a column name got %s' % type(expect) assert isinstance(got, (int, str)), \ 'expected a column name got %s' % type(got) if expect_label is None: expect_label = expect if got_label is None: got_label = got expect = data[expect] got = data[got] else: if expect_label is None: expect_label = 'expect' if got_label is None: got_label = 'got' expect = np.asarray(expect) got = np.asarray(got) data = pd.DataFrame({expect_label: expect, got_label: got}) assert expect.shape == got.shape, [expect.shape, got.shape] [n] = expect.shape self.expect = expect self.got = got self.alphabet = alphabet self.ax = ax self.got_label = got_label self.expect_label = expect_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not np.isfinite(expect).all(): tests.append([ 'expect finite', progress(np.isfinite(expect).sum(), n), False ]) if not np.isfinite(got).all(): tests.append( ['got finite', progress(np.isfinite(got).sum(), n), False]) ne = norm(expect) ng = norm(got) ok = abs(ne - ng) / ne < 0.01 if ne != 0 else True if n > 1: tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) F = zero_retrieval(expect, got) tests.append(['zero F1', F, F > 0.99]) if n > 1: #self.cosine = cosine(expect, got) #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)]) # cosine similarities must be really high. self.pearson = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0] tests.append(['pearson', self.pearson, (self.pearson > 0.99999)]) self.spearman = spearmanr(expect, got)[0] tests.append( ['spearman', self.spearman, (self.spearman > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(expect, got) self.max_err = d tests.append(['Linf', d, d < 1e-8]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = expect y = got s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append( ['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0]) # relative error r = relative_difference(expect, got) r = np.max(r[np.isfinite(r)]) tests.append(['max rel err', r, r <= 0.01]) self.max_relative_error = r self.max_rel_err = r # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (expect)', [expect.min(), expect.max()], 2]) #tests.append(['range (got) ', [got.min(), got.max()], 2]) # regression and rescaled error only valid for n >= 2 if n >= 2: es = abs(expect).max() gs = abs(got).max() if es == 0: es = 1 if gs == 0: gs = 1 if 0: # rescaled error E = expect / es G = got / gs R = abs(E - G) r = np.mean(R) tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((expect - got) > 0).sum() if L >= P_LARGER * n: tests.append(['expect is larger', progress(L, n), 0]) L = ((got - expect) > 0).sum() if L >= P_LARGER * n: tests.append(['got is larger', progress(L, n), 0]) self.tests = tests if verbose: self.message()
def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9, scatter=False, regression=False, show_regression=False, ax=None, alphabet=None, expect_label=None, got_label=None, scatter_kw=None): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `expect`, `got`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `expect` and `got` to be column labels. - `name`: name of this comparison. Note: - when plotting `expect` is the y-axis, `got` is the x-axis. This is by convention that `expect` is the dependent variable (regression target). TODO: - Allow user to specify alternative names to `expect` and `got` since they are often confusing. - Add an option to drop NaNs and continue comparison. - Support dictionarys as input with named dimensions and/or possibly an alphabet for naming dimensions. - Indicate which dimensions have the largest errors. """ if show_regression: regression = 1 scatter_kw = scatter_kw or {} if data is not None: assert isinstance(expect, (int, basestring)), \ 'expected a column name got %s' % type(expect) assert isinstance(got, (int, basestring)), \ 'expected a column name got %s' % type(got) if expect_label is None: expect_label = expect if got_label is None: got_label = got expect = data[expect] got = data[got] else: if expect_label is None: expect_label = 'expect' if got_label is None: got_label = 'got' expect = asarray(expect) got = asarray(got) data = DataFrame({expect_label: expect, got_label: got}) assert expect.shape == got.shape [n] = expect.shape self.expect = expect self.got = got self.alphabet = alphabet tests = [] # Check that vectors are finite. if not isfinite(expect).all(): tests.append(['expect finite', progress(isfinite(expect).sum(), n), False]) if not isfinite(got).all(): tests.append(['got finite', progress(isfinite(got).sum(), n), False]) tests.append(['norms', [norm(expect), norm(got)], -1]) tests.append(['zeros', '%s %s' % (progress((expect==0).sum(), n), progress((got==0).sum(), n)), -1]) #print expect #print got #inds = isfinite(expect) & isfinite(got) #if not inds.any(): # print red % 'TOO MANY NANS' # return #expect = expect[inds] #got = got[inds] c = cosine(expect, got) tests.append(['cosine-sim', c, (c > 0.99999)]) if norm(expect) == 0 and norm(got) == 0: p = 1.0 else: p = pearsonr(expect, got)[0] tests.append(['pearson', p, (p > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(expect, got) tests.append(['Linf', d, d < 1e-8]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = expect y = got s = asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append(['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0]) # relative error r = relative_difference(expect, got) r = mean(r[isfinite(r)]) tests.append(['mean relative error', r, r <= 0.01]) # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (expect)', [expect.min(), expect.max()], 2]) #tests.append(['range (got) ', [got.min(), got.max()], 2]) if scatter: if 1: if ax is None: ax = pl.figure().add_subplot(111) ax.scatter(got, expect, lw=0, alpha=0.5, **scatter_kw) if name is not None: ax.set_title(name) ax.set_xlabel(got_label) ax.set_ylabel(expect_label) else: import seaborn as sns sns.set_context(rc={"figure.figsize": (7, 5)}) g = sns.JointGrid(got_label, expect_label, data=data) g.plot(sns.regplot, sns.distplot, stats.spearmanr) print "Pearson's r: {0}".format(pearsonr(got, expect)) # regression and rescaled error only valid for n >= 2 if n >= 2: es = abs(expect).max() gs = abs(got).max() if es == 0: es = 1 if gs == 0: gs = 1 # rescaled error E = expect / es G = got / gs R = abs(E - G) r = mean(R) tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: # least squares linear regression # # TODO: for regression we want parameters `[1 0]` and a small # residual. We want both these conditions to hold. Might be # useful to look at R^2 statistic since it normalizes scale and # number of data-points. (it's often used for reduction in # variance.) # from scipy.linalg import lstsq A = ones((n, 2)) A[:,0] = got if isfinite(got).all() and isfinite(expect).all(): # data can't contain any NaNs result = lstsq(A, expect) tests.append(['regression', result[0], 2]) else: # contains a NaN result = None tests.append(['regression', 'did not run due to NaNs in data', 0]) if show_regression and regression and scatter and result is not None: coeff = result[0] xa, xb = ax.get_xlim() A = ones((n, 2)) A[:,0] = got # plot estimated line ys = A.dot(coeff) ax.plot(A[:,0], ys, c='r', alpha=0.5) if 0: # plot target line (sometimes this ruins the plot -- e.g. if the # data is really off the y=x line). ys = A.dot([1,0]) ax.plot(A[:,0], ys, c='g', alpha=0.5) ax.grid(True) ax.set_xlim(xa,xb) if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((expect-got) > 0).sum() if L >= P_LARGER * n: tests.append(['expect is larger', progress(L, n), 0]) L = ((got-expect) > 0).sum() if L >= P_LARGER * n: tests.append(['got is larger', progress(L, n), 0]) print print 'Comparison%s:' % (' (%s)' % name if name else ''), 'n=%s' % n #print yellow % 'expected:' #print expect #print yellow % 'got:' #print got for k, v, passed in tests: if passed == 1: c = green elif passed == 0: c = red else: c = yellow try: v = '%g' % v except TypeError: pass print ' %s: %s' % (k, c % (v,)) print if alphabet is not None: self.show_largest_rel_errors()
def __init__(self, want, have, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, want_label=None, have_label=None, verbose=1): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `want`, `have`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `want` and `have` to be column labels. - `name`: name of this comparison. Note: - when plotting `want` is the y-axis, `have` is the x-axis. This is by convention that `want` is the dependent variable (regression target). TODO: - Add an option to drop NaNs and continue comparison. - Indicate which dimensions have the largest errors. - Add option to allow alignment/matchings? """ self.name = name if isinstance(alphabet, Alphabet): alphabet = list(alphabet) if isinstance(want, dict) and isinstance(have, dict): alphabet = list(want.keys() | have.keys()) if alphabet is None else alphabet #assert set(have.keys()) == set(alphabet), \ # 'Keys differ.\n have keys = %s\n want keys = %s' % (set(have.keys()), set(alphabet)) want = [want.get(k, 0) for k in alphabet] have = [have.get(k, 0) for k in alphabet] if isinstance(want, np.ndarray) and isinstance(have, np.ndarray): assert alphabet is None alphabet = Alphabet(dict(np.ndenumerate(want)).keys()) assert want.shape == have.shape, [want.shape, have.shape] want = want.flatten() have = have.flatten() if data is not None: # assert isinstance(want, (int, str)), \ # 'expected a column name have %s' % type(want) # assert isinstance(have, (int, str)), \ # 'expected a column name have %s' % type(have) if want_label is None: want_label = want if have_label is None: have_label = have want = data[want] have = data[have] else: if want_label is None: want_label = 'want' if have_label is None: have_label = 'have' want = np.asarray(want) have = np.asarray(have) data = pd.DataFrame({want_label: want, have_label: have}) assert want.shape == have.shape, [want.shape, have.shape] [n] = want.shape self.want = want self.have = have self.alphabet = alphabet self.ax = ax self.have_label = have_label self.want_label = want_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not np.isfinite(want).all(): tests.append(['want finite', progress(np.isfinite(want).sum(), n), False]) if not np.isfinite(have).all(): tests.append(['have finite', progress(np.isfinite(have).sum(), n), False]) ne = norm(want) ng = norm(have) ok = abs(ne-ng)/ne < 0.01 if ne != 0 else True if n > 1: tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) #F = zero_retrieval(want, have) #tests.append(['zero F1', F, F > 0.99]) # Correlation statistics if n > 1: #self.cosine = cosine(want, have) #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)]) # cosine similarities must be really high. self.pearson = 1.0 if ne == ng == 0 else pearsonr(want, have)[0] tests.append(['pearson', self.pearson, (self.pearson > 0.99999)]) self.spearman = spearmanr(want, have)[0] tests.append(['spearman', self.spearman, (self.spearman > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(want, have) self.max_err = d tests.append(['ℓ∞', d, None]) tests.append(['ℓ₂', np.linalg.norm(want - have), None]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = want y = have s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append(['same-sign', f'{p:.2f}% ({s.sum()}/{len(s)})', p == 100.0]) # relative error r = relative_difference(want, have) r = np.max(r[np.isfinite(r)]) #tests.append(['max rel err', r, r <= 0.01]) self.max_relative_error = r self.max_rel_err = r # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (want)', [want.min(), want.max()], 2]) #tests.append(['range (have) ', [have.min(), have.max()], 2]) # regression and rescaled error only valid for n >= 2 # if n >= 2: # es = abs(want).max() # gs = abs(have).max() # if es == 0: # es = 1 # if gs == 0: # gs = 1 # if 0: # # rescaled error # E = want / es # G = have / gs # R = abs(E - G) # r = np.mean(R) # tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((want-have) > 0).sum() if L >= P_LARGER * n: tests.append(['want is larger', progress(L, n), 0]) L = ((have-want) > 0).sum() if L >= P_LARGER * n: tests.append(['have is larger', progress(L, n), 0]) self.tests = tests if verbose: self.message()
for t,p,x,y in sorted(zip(y_true,y_hat,X,Y)): ux = A[x] not in seen uy = A[y] not in seen if ux or uy: unseen += 1 continue # filter-out unseen test examples if p != t: err += 1 # print x, y, colors.red % L[p], colors.green % L[t], 'unseen(x)' if ux else '', 'unseen(y)' if uy else '' # else: # if ux or uy: # correct_unseen += 1 # correct += 1 print 'p(err|seen) = %s' % progress(err, len(y_true)) print 'p(unseen) = %s' % progress(unseen, len(y_true)) #print 'correct unseen = %s' % progress(correct_unseen, correct) """TODO - How transitive is the learned relation? """ """
def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, expect_label=None, got_label=None, verbose=1): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `expect`, `got`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `expect` and `got` to be column labels. - `name`: name of this comparison. Note: - when plotting `expect` is the y-axis, `got` is the x-axis. This is by convention that `expect` is the dependent variable (regression target). TODO: - Add an option to drop NaNs and continue comparison. - Indicate which dimensions have the largest errors. """ if isinstance(expect, dict) and isinstance(got, dict): alphabet = expect.keys() if alphabet is None else alphabet assert set(got.keys()) == set(alphabet), \ 'Keys differ.\n got keys = %s\n want keys = %s' % (got.keys(), alphabet) expect = [expect[k] for k in alphabet] got = [got[k] for k in alphabet] if isinstance(expect, np.ndarray) and isinstance(got, np.ndarray): assert expect.shape == got.shape expect = expect.flatten() got = got.flatten() if data is not None: assert isinstance(expect, (int, basestring)), \ 'expected a column name got %s' % type(expect) assert isinstance(got, (int, basestring)), \ 'expected a column name got %s' % type(got) if expect_label is None: expect_label = expect if got_label is None: got_label = got expect = data[expect] got = data[got] else: if expect_label is None: expect_label = 'expect' if got_label is None: got_label = 'got' expect = np.asarray(expect) got = np.asarray(got) data = pd.DataFrame({expect_label: expect, got_label: got}) assert expect.shape == got.shape [n] = expect.shape self.expect = expect self.got = got self.alphabet = alphabet self.ax = ax self.name = name self.got_label = got_label self.expect_label = expect_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not np.isfinite(expect).all(): tests.append(['expect finite', progress(np.isfinite(expect).sum(), n), False]) if not np.isfinite(got).all(): tests.append(['got finite', progress(np.isfinite(got).sum(), n), False]) ne = norm(expect) ng = norm(got) ok = abs(ne-ng)/ne < 0.01 if ne != 0 else True if n > 1: tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) F = zero_retrieval(expect, got) tests.append(['zero F1', F, F > 0.99]) if n > 1: self.cosine = cosine(expect, got) tests.append(['cosine', self.cosine, (self.cosine > 0.99999)]) # cosine similarities must be really high. self.pearson = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0] tests.append(['pearson', self.pearson, (self.pearson > 0.99999)]) self.spearman = spearmanr(expect, got)[0] tests.append(['spearman', self.spearman, (self.spearman > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(expect, got) self.max_err = d tests.append(['Linf', d, d < 1e-8]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = expect y = got s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append(['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0]) # relative error r = relative_difference(expect, got) r = np.mean(r[np.isfinite(r)]) tests.append(['mean relative error', r, r <= 0.01]) self.mean_relative_error = r # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (expect)', [expect.min(), expect.max()], 2]) #tests.append(['range (got) ', [got.min(), got.max()], 2]) # regression and rescaled error only valid for n >= 2 if n >= 2: es = abs(expect).max() gs = abs(got).max() if es == 0: es = 1 if gs == 0: gs = 1 if 0: # rescaled error E = expect / es G = got / gs R = abs(E - G) r = np.mean(R) tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((expect-got) > 0).sum() if L >= P_LARGER * n: tests.append(['expect is larger', progress(L, n), 0]) L = ((got-expect) > 0).sum() if L >= P_LARGER * n: tests.append(['got is larger', progress(L, n), 0]) self.tests = tests if verbose: self.message()
for t, p, x, y in sorted(zip(y_true, y_hat, X, Y)): ux = A[x] not in seen uy = A[y] not in seen if ux or uy: unseen += 1 continue # filter-out unseen test examples if p != t: err += 1 # print x, y, colors.red % L[p], colors.green % L[t], 'unseen(x)' if ux else '', 'unseen(y)' if uy else '' # else: # if ux or uy: # correct_unseen += 1 # correct += 1 print 'p(err|seen) = %s' % progress(err, len(y_true)) print 'p(unseen) = %s' % progress(unseen, len(y_true)) #print 'correct unseen = %s' % progress(correct_unseen, correct) """TODO - How transitive is the learned relation? """ """ Confusion matrix ================ C[i,j] = "pred i, true j" """
def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, expect_label=None, got_label=None): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `expect`, `got`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `expect` and `got` to be column labels. - `name`: name of this comparison. Note: - when plotting `expect` is the y-axis, `got` is the x-axis. This is by convention that `expect` is the dependent variable (regression target). TODO: - Allow user to specify alternative names to `expect` and `got` since they are often confusing. - Add an option to drop NaNs and continue comparison. - Support dictionarys as input with named dimensions and/or possibly an alphabet for naming dimensions. - Indicate which dimensions have the largest errors. """ if data is not None: assert isinstance(expect, (int, basestring)), \ 'expected a column name got %s' % type(expect) assert isinstance(got, (int, basestring)), \ 'expected a column name got %s' % type(got) if expect_label is None: expect_label = expect if got_label is None: got_label = got expect = data[expect] got = data[got] else: if expect_label is None: expect_label = 'expect' if got_label is None: got_label = 'got' expect = asarray(expect) got = asarray(got) data = DataFrame({expect_label: expect, got_label: got}) assert expect.shape == got.shape [n] = expect.shape self.expect = expect self.got = got self.alphabet = alphabet self.ax = ax self.name = name self.got_label = got_label self.expect_label = expect_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not isfinite(expect).all(): tests.append( ['expect finite', progress(isfinite(expect).sum(), n), False]) if not isfinite(got).all(): tests.append( ['got finite', progress(isfinite(got).sum(), n), False]) ne = norm(expect) ng = norm(got) ok = abs(ne - ng) / ne < 0.01 tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) # TODO: what do we want to say about sparsity? #tests.append(['zeros', '%s %s' % (progress((expect==0).sum(), n), # progress((got==0).sum(), n)), # -1]) F = zero_retrieval(expect, got) tests.append(['zero F1', F, F > 0.99]) c = cosine(expect, got) self.cosine = c tests.append(['cosine-sim', c, (c > 0.99999) ]) # cosine similarities must be really high. self.pearsonr = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0] tests.append(['pearson', self.pearsonr, (self.pearsonr > 0.99999)]) p = spearmanr(expect, got)[0] tests.append(['spearman', p, (p > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(expect, got) self.max_err = d tests.append(['Linf', d, d < 1e-8]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = expect y = got s = asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append( ['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0]) # relative error r = relative_difference(expect, got) r = mean(r[isfinite(r)]) tests.append(['mean relative error', r, r <= 0.01]) # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (expect)', [expect.min(), expect.max()], 2]) #tests.append(['range (got) ', [got.min(), got.max()], 2]) # regression and rescaled error only valid for n >= 2 if n >= 2: es = abs(expect).max() gs = abs(got).max() if es == 0: es = 1 if gs == 0: gs = 1 # rescaled error E = expect / es G = got / gs R = abs(E - G) r = mean(R) tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((expect - got) > 0).sum() if L >= P_LARGER * n: tests.append(['expect is larger', progress(L, n), 0]) L = ((got - expect) > 0).sum() if L >= P_LARGER * n: tests.append(['got is larger', progress(L, n), 0]) print print 'Comparison%s:' % (' (%s)' % name if name else ''), 'n=%s' % n #print yellow % 'expected:' #print expect #print yellow % 'got:' #print got for k, v, passed in tests: if passed == 1: c = green elif passed == 0: c = red else: c = yellow try: v = '%g' % v except TypeError: pass print ' %s: %s' % (k, c % (v, )) print if alphabet is not None: self.show_largest_rel_errors()
def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, expect_label=None, got_label=None): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `expect`, `got`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `expect` and `got` to be column labels. - `name`: name of this comparison. Note: - when plotting `expect` is the y-axis, `got` is the x-axis. This is by convention that `expect` is the dependent variable (regression target). TODO: - Allow user to specify alternative names to `expect` and `got` since they are often confusing. - Add an option to drop NaNs and continue comparison. - Support dictionarys as input with named dimensions and/or possibly an alphabet for naming dimensions. - Indicate which dimensions have the largest errors. """ if data is not None: assert isinstance(expect, (int, basestring)), \ 'expected a column name got %s' % type(expect) assert isinstance(got, (int, basestring)), \ 'expected a column name got %s' % type(got) if expect_label is None: expect_label = expect if got_label is None: got_label = got expect = data[expect] got = data[got] else: if expect_label is None: expect_label = 'expect' if got_label is None: got_label = 'got' expect = asarray(expect) got = asarray(got) data = DataFrame({expect_label: expect, got_label: got}) assert expect.shape == got.shape [n] = expect.shape self.expect = expect self.got = got self.alphabet = alphabet self.ax = ax self.name = name self.got_label = got_label self.expect_label = expect_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not isfinite(expect).all(): tests.append(['expect finite', progress(isfinite(expect).sum(), n), False]) if not isfinite(got).all(): tests.append(['got finite', progress(isfinite(got).sum(), n), False]) ne = norm(expect) ng = norm(got) ok = abs(ne-ng)/ne < 0.01 tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) # TODO: what do we want to say about sparsity? #tests.append(['zeros', '%s %s' % (progress((expect==0).sum(), n), # progress((got==0).sum(), n)), # -1]) F = zero_retrieval(expect, got) tests.append(['zero F1', F, F > 0.99]) c = cosine(expect, got) self.cosine = c tests.append(['cosine-sim', c, (c > 0.99999)]) # cosine similarities must be really high. self.pearsonr = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0] tests.append(['pearson', self.pearsonr, (self.pearsonr > 0.99999)]) p = spearmanr(expect, got)[0] tests.append(['spearman', p, (p > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(expect, got) self.max_err = d tests.append(['Linf', d, d < 1e-8]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = expect y = got s = asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append(['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0]) # relative error r = relative_difference(expect, got) r = mean(r[isfinite(r)]) tests.append(['mean relative error', r, r <= 0.01]) # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (expect)', [expect.min(), expect.max()], 2]) #tests.append(['range (got) ', [got.min(), got.max()], 2]) # regression and rescaled error only valid for n >= 2 if n >= 2: es = abs(expect).max() gs = abs(got).max() if es == 0: es = 1 if gs == 0: gs = 1 # rescaled error E = expect / es G = got / gs R = abs(E - G) r = mean(R) tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((expect-got) > 0).sum() if L >= P_LARGER * n: tests.append(['expect is larger', progress(L, n), 0]) L = ((got-expect) > 0).sum() if L >= P_LARGER * n: tests.append(['got is larger', progress(L, n), 0]) print print 'Comparison%s:' % (' (%s)' % name if name else ''), 'n=%s' % n #print yellow % 'expected:' #print expect #print yellow % 'got:' #print got for k, v, passed in tests: if passed == 1: c = green elif passed == 0: c = red else: c = yellow try: v = '%g' % v except TypeError: pass print ' %s: %s' % (k, c % (v,)) print if alphabet is not None: self.show_largest_rel_errors()