예제 #1
0
    def __init__(self,
                 expect,
                 got,
                 name=None,
                 data=None,
                 P_LARGER=0.9,
                 regression=True,
                 ax=None,
                 alphabet=None,
                 expect_label=None,
                 got_label=None,
                 verbose=1):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `expect`, `got`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `expect` and `got`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `expect` is the y-axis, `got` is the x-axis. This is by
           convention that `expect` is the dependent variable (regression target).

        TODO:

         - Add an option to drop NaNs and continue comparison.

         - Indicate which dimensions have the largest errors.

         - Add option to allow alignment/matchings?

        """

        self.name = name

        if isinstance(alphabet, Alphabet):
            alphabet = alphabet.tolist()

        if isinstance(expect, dict) and isinstance(got, dict):
            alphabet = list(expect.keys()) if alphabet is None else alphabet
            assert set(got.keys()) == set(alphabet), \
                'Keys differ.\n  got keys  = %s\n  want keys = %s' % (set(got.keys()), set(alphabet))
            expect = [expect[k] for k in alphabet]
            got = [got[k] for k in alphabet]

        if isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
            assert expect.shape == got.shape, [expect.shape, got.shape]
            expect = expect.flatten()
            got = got.flatten()

        if data is not None:
            assert isinstance(expect, (int, str)), \
                'expected a column name got %s' % type(expect)
            assert isinstance(got, (int, str)), \
                'expected a column name got %s' % type(got)

            if expect_label is None:
                expect_label = expect
            if got_label is None:
                got_label = got

            expect = data[expect]
            got = data[got]

        else:
            if expect_label is None:
                expect_label = 'expect'
            if got_label is None:
                got_label = 'got'

            expect = np.asarray(expect)
            got = np.asarray(got)

            data = pd.DataFrame({expect_label: expect, got_label: got})

        assert expect.shape == got.shape, [expect.shape, got.shape]
        [n] = expect.shape

        self.expect = expect
        self.got = got
        self.alphabet = alphabet
        self.ax = ax
        self.got_label = got_label
        self.expect_label = expect_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not np.isfinite(expect).all():
            tests.append([
                'expect finite',
                progress(np.isfinite(expect).sum(), n), False
            ])
        if not np.isfinite(got).all():
            tests.append(
                ['got finite',
                 progress(np.isfinite(got).sum(), n), False])

        ne = norm(expect)
        ng = norm(got)
        ok = abs(ne - ng) / ne < 0.01 if ne != 0 else True

        if n > 1:
            tests.append(['norms', '[%g, %g]' % (ne, ng), ok])
            F = zero_retrieval(expect, got)
            tests.append(['zero F1', F, F > 0.99])

        if n > 1:
            #self.cosine = cosine(expect, got)
            #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)])   # cosine similarities must be really high.

            self.pearson = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0]
            tests.append(['pearson', self.pearson, (self.pearson > 0.99999)])

            self.spearman = spearmanr(expect, got)[0]
            tests.append(
                ['spearman', self.spearman, (self.spearman > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(expect, got)
        self.max_err = d
        tests.append(['Linf', d, d < 1e-8])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = expect
        y = got
        s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(
            ['same-sign',
             '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0])

        # relative error
        r = relative_difference(expect, got)
        r = np.max(r[np.isfinite(r)])
        tests.append(['max rel err', r, r <= 0.01])
        self.max_relative_error = r
        self.max_rel_err = r

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (expect)', [expect.min(), expect.max()], 2])
        #tests.append(['range (got)   ', [got.min(), got.max()], 2])

        # regression and rescaled error only valid for n >= 2
        if n >= 2:
            es = abs(expect).max()
            gs = abs(got).max()
            if es == 0:
                es = 1
            if gs == 0:
                gs = 1
            if 0:
                # rescaled error
                E = expect / es
                G = got / gs
                R = abs(E - G)
                r = np.mean(R)
                tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((expect - got) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['expect is larger', progress(L, n), 0])
            L = ((got - expect) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['got is larger', progress(L, n), 0])

        self.tests = tests
        if verbose:
            self.message()
예제 #2
0
    def __init__(self, want, have, name=None, data=None, P_LARGER=0.9,
                 regression=True, ax=None, alphabet=None,
                 want_label=None, have_label=None, verbose=1):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `want`, `have`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `want` and `have`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `want` is the y-axis, `have` is the x-axis. This is by
           convention that `want` is the dependent variable (regression target).

        TODO:

         - Add an option to drop NaNs and continue comparison.

         - Indicate which dimensions have the largest errors.

         - Add option to allow alignment/matchings?

        """

        self.name = name

        if isinstance(alphabet, Alphabet):
            alphabet = list(alphabet)

        if isinstance(want, dict) and isinstance(have, dict):
            alphabet = list(want.keys() | have.keys()) if alphabet is None else alphabet
            #assert set(have.keys()) == set(alphabet), \
            #    'Keys differ.\n  have keys  = %s\n  want keys = %s' % (set(have.keys()), set(alphabet))
            want = [want.get(k, 0) for k in alphabet]
            have = [have.get(k, 0) for k in alphabet]

        if isinstance(want, np.ndarray) and isinstance(have, np.ndarray):
            assert alphabet is None
            alphabet = Alphabet(dict(np.ndenumerate(want)).keys())

            assert want.shape == have.shape, [want.shape, have.shape]
            want = want.flatten()
            have = have.flatten()

        if data is not None:
#            assert isinstance(want, (int, str)), \
#                'expected a column name have %s' % type(want)
#            assert isinstance(have, (int, str)), \
#                'expected a column name have %s' % type(have)

            if want_label is None: want_label = want
            if have_label is None: have_label = have

            want = data[want]
            have = data[have]

        else:
            if want_label is None: want_label = 'want'
            if have_label is None: have_label = 'have'

            want = np.asarray(want)
            have = np.asarray(have)

            data = pd.DataFrame({want_label: want, have_label: have})

        assert want.shape == have.shape, [want.shape, have.shape]
        [n] = want.shape

        self.want = want
        self.have = have
        self.alphabet = alphabet
        self.ax = ax
        self.have_label = have_label
        self.want_label = want_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not np.isfinite(want).all():
            tests.append(['want finite', progress(np.isfinite(want).sum(), n), False])
        if not np.isfinite(have).all():
            tests.append(['have finite', progress(np.isfinite(have).sum(), n), False])

        ne = norm(want)
        ng = norm(have)
        ok = abs(ne-ng)/ne < 0.01 if ne != 0 else True

        if n > 1:
            tests.append(['norms', '[%g, %g]' % (ne, ng), ok])
            #F = zero_retrieval(want, have)
            #tests.append(['zero F1', F, F > 0.99])

        # Correlation statistics
        if n > 1:
            #self.cosine = cosine(want, have)
            #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)])   # cosine similarities must be really high.
            self.pearson = 1.0 if ne == ng == 0 else pearsonr(want, have)[0]
            tests.append(['pearson', self.pearson, (self.pearson > 0.99999)])
            self.spearman = spearmanr(want, have)[0]
            tests.append(['spearman', self.spearman, (self.spearman > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(want, have)
        self.max_err = d
        tests.append(['ℓ∞', d, None])
        tests.append(['ℓ₂', np.linalg.norm(want - have), None])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = want
        y = have
        s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(['same-sign', f'{p:.2f}% ({s.sum()}/{len(s)})', p == 100.0])

        # relative error
        r = relative_difference(want, have)
        r = np.max(r[np.isfinite(r)])
        #tests.append(['max rel err', r, r <= 0.01])
        self.max_relative_error = r
        self.max_rel_err = r

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (want)', [want.min(), want.max()], 2])
        #tests.append(['range (have)   ', [have.min(), have.max()], 2])

        # regression and rescaled error only valid for n >= 2
#        if n >= 2:
#            es = abs(want).max()
#            gs = abs(have).max()
#            if es == 0:
#                es = 1
#            if gs == 0:
#                gs = 1
#            if 0:
#                # rescaled error
#                E = want / es
#                G = have / gs
#                R = abs(E - G)
#                r = np.mean(R)
#                tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((want-have) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['want is larger', progress(L, n), 0])
            L = ((have-want) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['have is larger', progress(L, n), 0])

        self.tests = tests
        if verbose:
            self.message()