def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, expect_label=None, got_label=None, verbose=1): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `expect`, `got`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `expect` and `got` to be column labels. - `name`: name of this comparison. Note: - when plotting `expect` is the y-axis, `got` is the x-axis. This is by convention that `expect` is the dependent variable (regression target). TODO: - Add an option to drop NaNs and continue comparison. - Indicate which dimensions have the largest errors. - Add option to allow alignment/matchings? """ self.name = name if isinstance(alphabet, Alphabet): alphabet = alphabet.tolist() if isinstance(expect, dict) and isinstance(got, dict): alphabet = list(expect.keys()) if alphabet is None else alphabet assert set(got.keys()) == set(alphabet), \ 'Keys differ.\n got keys = %s\n want keys = %s' % (set(got.keys()), set(alphabet)) expect = [expect[k] for k in alphabet] got = [got[k] for k in alphabet] if isinstance(expect, np.ndarray) and isinstance(got, np.ndarray): assert expect.shape == got.shape, [expect.shape, got.shape] expect = expect.flatten() got = got.flatten() if data is not None: assert isinstance(expect, (int, str)), \ 'expected a column name got %s' % type(expect) assert isinstance(got, (int, str)), \ 'expected a column name got %s' % type(got) if expect_label is None: expect_label = expect if got_label is None: got_label = got expect = data[expect] got = data[got] else: if expect_label is None: expect_label = 'expect' if got_label is None: got_label = 'got' expect = np.asarray(expect) got = np.asarray(got) data = pd.DataFrame({expect_label: expect, got_label: got}) assert expect.shape == got.shape, [expect.shape, got.shape] [n] = expect.shape self.expect = expect self.got = got self.alphabet = alphabet self.ax = ax self.got_label = got_label self.expect_label = expect_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not np.isfinite(expect).all(): tests.append([ 'expect finite', progress(np.isfinite(expect).sum(), n), False ]) if not np.isfinite(got).all(): tests.append( ['got finite', progress(np.isfinite(got).sum(), n), False]) ne = norm(expect) ng = norm(got) ok = abs(ne - ng) / ne < 0.01 if ne != 0 else True if n > 1: tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) F = zero_retrieval(expect, got) tests.append(['zero F1', F, F > 0.99]) if n > 1: #self.cosine = cosine(expect, got) #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)]) # cosine similarities must be really high. self.pearson = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0] tests.append(['pearson', self.pearson, (self.pearson > 0.99999)]) self.spearman = spearmanr(expect, got)[0] tests.append( ['spearman', self.spearman, (self.spearman > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(expect, got) self.max_err = d tests.append(['Linf', d, d < 1e-8]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = expect y = got s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append( ['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0]) # relative error r = relative_difference(expect, got) r = np.max(r[np.isfinite(r)]) tests.append(['max rel err', r, r <= 0.01]) self.max_relative_error = r self.max_rel_err = r # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (expect)', [expect.min(), expect.max()], 2]) #tests.append(['range (got) ', [got.min(), got.max()], 2]) # regression and rescaled error only valid for n >= 2 if n >= 2: es = abs(expect).max() gs = abs(got).max() if es == 0: es = 1 if gs == 0: gs = 1 if 0: # rescaled error E = expect / es G = got / gs R = abs(E - G) r = np.mean(R) tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((expect - got) > 0).sum() if L >= P_LARGER * n: tests.append(['expect is larger', progress(L, n), 0]) L = ((got - expect) > 0).sum() if L >= P_LARGER * n: tests.append(['got is larger', progress(L, n), 0]) self.tests = tests if verbose: self.message()
def __init__(self, want, have, name=None, data=None, P_LARGER=0.9, regression=True, ax=None, alphabet=None, want_label=None, have_label=None, verbose=1): """Compare vectors. Arguments: - Specifying data for comparison two methods: 1) `want`, `have`: two numeric one-dimensional arrays which we'd like to compare (the argument names come for software testing). This method requires argument `data=None`. 2) `data`: instance of `DataFrame`, expects arguments `want` and `have` to be column labels. - `name`: name of this comparison. Note: - when plotting `want` is the y-axis, `have` is the x-axis. This is by convention that `want` is the dependent variable (regression target). TODO: - Add an option to drop NaNs and continue comparison. - Indicate which dimensions have the largest errors. - Add option to allow alignment/matchings? """ self.name = name if isinstance(alphabet, Alphabet): alphabet = list(alphabet) if isinstance(want, dict) and isinstance(have, dict): alphabet = list(want.keys() | have.keys()) if alphabet is None else alphabet #assert set(have.keys()) == set(alphabet), \ # 'Keys differ.\n have keys = %s\n want keys = %s' % (set(have.keys()), set(alphabet)) want = [want.get(k, 0) for k in alphabet] have = [have.get(k, 0) for k in alphabet] if isinstance(want, np.ndarray) and isinstance(have, np.ndarray): assert alphabet is None alphabet = Alphabet(dict(np.ndenumerate(want)).keys()) assert want.shape == have.shape, [want.shape, have.shape] want = want.flatten() have = have.flatten() if data is not None: # assert isinstance(want, (int, str)), \ # 'expected a column name have %s' % type(want) # assert isinstance(have, (int, str)), \ # 'expected a column name have %s' % type(have) if want_label is None: want_label = want if have_label is None: have_label = have want = data[want] have = data[have] else: if want_label is None: want_label = 'want' if have_label is None: have_label = 'have' want = np.asarray(want) have = np.asarray(have) data = pd.DataFrame({want_label: want, have_label: have}) assert want.shape == have.shape, [want.shape, have.shape] [n] = want.shape self.want = want self.have = have self.alphabet = alphabet self.ax = ax self.have_label = have_label self.want_label = want_label self.n = n self.coeff = None self.tests = tests = [] # Check that vectors are finite. if not np.isfinite(want).all(): tests.append(['want finite', progress(np.isfinite(want).sum(), n), False]) if not np.isfinite(have).all(): tests.append(['have finite', progress(np.isfinite(have).sum(), n), False]) ne = norm(want) ng = norm(have) ok = abs(ne-ng)/ne < 0.01 if ne != 0 else True if n > 1: tests.append(['norms', '[%g, %g]' % (ne, ng), ok]) #F = zero_retrieval(want, have) #tests.append(['zero F1', F, F > 0.99]) # Correlation statistics if n > 1: #self.cosine = cosine(want, have) #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)]) # cosine similarities must be really high. self.pearson = 1.0 if ne == ng == 0 else pearsonr(want, have)[0] tests.append(['pearson', self.pearson, (self.pearson > 0.99999)]) self.spearman = spearmanr(want, have)[0] tests.append(['spearman', self.spearman, (self.spearman > 0.99999)]) # TODO: this check should probably take into account the scale of the data. d = linf(want, have) self.max_err = d tests.append(['ℓ∞', d, None]) tests.append(['ℓ₂', np.linalg.norm(want - have), None]) # same sign check (weak agreement, but useful sanity check -- especially # for gradients) x = want y = have s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int) p = s.sum() * 100.0 / len(s) tests.append(['same-sign', f'{p:.2f}% ({s.sum()}/{len(s)})', p == 100.0]) # relative error r = relative_difference(want, have) r = np.max(r[np.isfinite(r)]) #tests.append(['max rel err', r, r <= 0.01]) self.max_relative_error = r self.max_rel_err = r # TODO: suggest that if relative error is high and rescaled error is low (or # something to do wtih regression residuals) that maybe there is a # (hopefully) simple fix via scale/offset. # TODO: can provide descriptive statistics for each vector #tests.append(['range (want)', [want.min(), want.max()], 2]) #tests.append(['range (have) ', [have.min(), have.max()], 2]) # regression and rescaled error only valid for n >= 2 # if n >= 2: # es = abs(want).max() # gs = abs(have).max() # if es == 0: # es = 1 # if gs == 0: # gs = 1 # if 0: # # rescaled error # E = want / es # G = have / gs # R = abs(E - G) # r = np.mean(R) # tests.append(['mean rescaled error', r, r <= 1e-5]) if regression: self.regression() if n >= 2: # These tests check if one of the datasets is consistently larger than the # other. The threshold for error is based on `P_LARGER` ("percent larger"). L = ((want-have) > 0).sum() if L >= P_LARGER * n: tests.append(['want is larger', progress(L, n), 0]) L = ((have-want) > 0).sum() if L >= P_LARGER * n: tests.append(['have is larger', progress(L, n), 0]) self.tests = tests if verbose: self.message()