Ejemplo n.º 1
0
def _test_gradient(example, grammar, m):
    for gamma in [1]:
        print 'gamma =', gamma
        print 'prune = %s' % progress(len(example.nodes)-sum(m[x] for x in example.nodes), len(example.nodes))
        if gamma == 1:
            annealed_grammar = grammar
        else:
            annealed_grammar = grammar.anneal(gamma)
        __test_gradient(example, annealed_grammar, m*1, gamma)
Ejemplo n.º 2
0
    def __init__(self,
                 expect,
                 got,
                 name=None,
                 data=None,
                 P_LARGER=0.9,
                 regression=True,
                 ax=None,
                 alphabet=None,
                 expect_label=None,
                 got_label=None,
                 verbose=1):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `expect`, `got`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `expect` and `got`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `expect` is the y-axis, `got` is the x-axis. This is by
           convention that `expect` is the dependent variable (regression target).

        TODO:

         - Add an option to drop NaNs and continue comparison.

         - Indicate which dimensions have the largest errors.

         - Add option to allow alignment/matchings?

        """

        self.name = name

        if isinstance(alphabet, Alphabet):
            alphabet = alphabet.tolist()

        if isinstance(expect, dict) and isinstance(got, dict):
            alphabet = list(expect.keys()) if alphabet is None else alphabet
            assert set(got.keys()) == set(alphabet), \
                'Keys differ.\n  got keys  = %s\n  want keys = %s' % (set(got.keys()), set(alphabet))
            expect = [expect[k] for k in alphabet]
            got = [got[k] for k in alphabet]

        if isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
            assert expect.shape == got.shape, [expect.shape, got.shape]
            expect = expect.flatten()
            got = got.flatten()

        if data is not None:
            assert isinstance(expect, (int, str)), \
                'expected a column name got %s' % type(expect)
            assert isinstance(got, (int, str)), \
                'expected a column name got %s' % type(got)

            if expect_label is None:
                expect_label = expect
            if got_label is None:
                got_label = got

            expect = data[expect]
            got = data[got]

        else:
            if expect_label is None:
                expect_label = 'expect'
            if got_label is None:
                got_label = 'got'

            expect = np.asarray(expect)
            got = np.asarray(got)

            data = pd.DataFrame({expect_label: expect, got_label: got})

        assert expect.shape == got.shape, [expect.shape, got.shape]
        [n] = expect.shape

        self.expect = expect
        self.got = got
        self.alphabet = alphabet
        self.ax = ax
        self.got_label = got_label
        self.expect_label = expect_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not np.isfinite(expect).all():
            tests.append([
                'expect finite',
                progress(np.isfinite(expect).sum(), n), False
            ])
        if not np.isfinite(got).all():
            tests.append(
                ['got finite',
                 progress(np.isfinite(got).sum(), n), False])

        ne = norm(expect)
        ng = norm(got)
        ok = abs(ne - ng) / ne < 0.01 if ne != 0 else True

        if n > 1:
            tests.append(['norms', '[%g, %g]' % (ne, ng), ok])
            F = zero_retrieval(expect, got)
            tests.append(['zero F1', F, F > 0.99])

        if n > 1:
            #self.cosine = cosine(expect, got)
            #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)])   # cosine similarities must be really high.

            self.pearson = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0]
            tests.append(['pearson', self.pearson, (self.pearson > 0.99999)])

            self.spearman = spearmanr(expect, got)[0]
            tests.append(
                ['spearman', self.spearman, (self.spearman > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(expect, got)
        self.max_err = d
        tests.append(['Linf', d, d < 1e-8])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = expect
        y = got
        s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(
            ['same-sign',
             '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0])

        # relative error
        r = relative_difference(expect, got)
        r = np.max(r[np.isfinite(r)])
        tests.append(['max rel err', r, r <= 0.01])
        self.max_relative_error = r
        self.max_rel_err = r

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (expect)', [expect.min(), expect.max()], 2])
        #tests.append(['range (got)   ', [got.min(), got.max()], 2])

        # regression and rescaled error only valid for n >= 2
        if n >= 2:
            es = abs(expect).max()
            gs = abs(got).max()
            if es == 0:
                es = 1
            if gs == 0:
                gs = 1
            if 0:
                # rescaled error
                E = expect / es
                G = got / gs
                R = abs(E - G)
                r = np.mean(R)
                tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((expect - got) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['expect is larger', progress(L, n), 0])
            L = ((got - expect) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['got is larger', progress(L, n), 0])

        self.tests = tests
        if verbose:
            self.message()
Ejemplo n.º 3
0
    def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9,
                 scatter=False, regression=False, show_regression=False, ax=None,
                 alphabet=None, expect_label=None, got_label=None, scatter_kw=None):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `expect`, `got`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `expect` and `got`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `expect` is the y-axis, `got` is the x-axis. This is by
           convention that `expect` is the dependent variable (regression target).

        TODO:

         - Allow user to specify alternative names to `expect` and `got` since they
           are often confusing.

         - Add an option to drop NaNs and continue comparison.

         - Support dictionarys as input with named dimensions and/or possibly an
           alphabet for naming dimensions.

         - Indicate which dimensions have the largest errors.

        """

        if show_regression:
            regression = 1

        scatter_kw = scatter_kw or {}

        if data is not None:
            assert isinstance(expect, (int, basestring)), \
                'expected a column name got %s' % type(expect)
            assert isinstance(got, (int, basestring)), \
                'expected a column name got %s' % type(got)

            if expect_label is None:
                expect_label = expect
            if got_label is None:
                got_label = got

            expect = data[expect]
            got = data[got]

        else:

            if expect_label is None:
                expect_label = 'expect'
            if got_label is None:
                got_label = 'got'

            expect = asarray(expect)
            got = asarray(got)

            data = DataFrame({expect_label: expect, got_label: got})

        assert expect.shape == got.shape
        [n] = expect.shape

        self.expect = expect
        self.got = got
        self.alphabet = alphabet

        tests = []

        # Check that vectors are finite.
        if not isfinite(expect).all():
            tests.append(['expect finite', progress(isfinite(expect).sum(), n), False])
        if not isfinite(got).all():
            tests.append(['got finite', progress(isfinite(got).sum(), n), False])


        tests.append(['norms', [norm(expect), norm(got)], -1])
        tests.append(['zeros', '%s %s' % (progress((expect==0).sum(), n),
                                          progress((got==0).sum(), n)),
                      -1])

        #print expect
        #print got
        #inds = isfinite(expect) & isfinite(got)
        #if not inds.any():
        #    print red % 'TOO MANY NANS'
        #    return
        #expect = expect[inds]
        #got = got[inds]

        c = cosine(expect, got)
        tests.append(['cosine-sim', c, (c > 0.99999)])

        if norm(expect) == 0 and norm(got) == 0:
            p = 1.0
        else:
            p = pearsonr(expect, got)[0]
        tests.append(['pearson', p, (p > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(expect, got)
        tests.append(['Linf', d, d < 1e-8])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = expect
        y = got
        s = asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0])

        # relative error
        r = relative_difference(expect, got)
        r = mean(r[isfinite(r)])
        tests.append(['mean relative error', r, r <= 0.01])

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (expect)', [expect.min(), expect.max()], 2])
        #tests.append(['range (got)   ', [got.min(), got.max()], 2])

        if scatter:
            if 1:
                if ax is None:
                    ax = pl.figure().add_subplot(111)
                ax.scatter(got, expect, lw=0, alpha=0.5, **scatter_kw)
                if name is not None:
                    ax.set_title(name)
                ax.set_xlabel(got_label)
                ax.set_ylabel(expect_label)
            else:
                import seaborn as sns
                sns.set_context(rc={"figure.figsize": (7, 5)})

                g = sns.JointGrid(got_label, expect_label, data=data)
                g.plot(sns.regplot, sns.distplot, stats.spearmanr)

                print "Pearson's r: {0}".format(pearsonr(got, expect))


        # regression and rescaled error only valid for n >= 2
        if n >= 2:

            es = abs(expect).max()
            gs = abs(got).max()
            if es == 0:
                es = 1
            if gs == 0:
                gs = 1
            # rescaled error
            E = expect / es
            G = got / gs
            R = abs(E - G)
            r = mean(R)
            tests.append(['mean rescaled error', r, r <= 1e-5])

            if regression:
                # least squares linear regression
                #
                # TODO: for regression we want parameters `[1 0]` and a small
                # residual. We want both these conditions to hold. Might be
                # useful to look at R^2 statistic since it normalizes scale and
                # number of data-points. (it's often used for reduction in
                # variance.)
                #
                from scipy.linalg import lstsq
                A = ones((n, 2))
                A[:,0] = got

                if isfinite(got).all() and isfinite(expect).all():
                    # data can't contain any NaNs
                    result = lstsq(A, expect)
                    tests.append(['regression', result[0], 2])
                else:
                    # contains a NaN
                    result = None
                    tests.append(['regression',
                                  'did not run due to NaNs in data',
                                  0])

            if show_regression and regression and scatter and result is not None:
                coeff = result[0]
                xa, xb = ax.get_xlim()
                A = ones((n, 2))
                A[:,0] = got

                # plot estimated line
                ys = A.dot(coeff)
                ax.plot(A[:,0], ys, c='r', alpha=0.5)

                if 0:
                    # plot target line (sometimes this ruins the plot -- e.g. if the
                    # data is really off the y=x line).
                    ys = A.dot([1,0])
                    ax.plot(A[:,0], ys, c='g', alpha=0.5)

                ax.grid(True)
                ax.set_xlim(xa,xb)


        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((expect-got) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['expect is larger', progress(L, n), 0])
            L = ((got-expect) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['got is larger', progress(L, n), 0])

        print
        print 'Comparison%s:' % (' (%s)' % name if name else ''), 'n=%s' % n
        #print yellow % 'expected:'
        #print expect
        #print yellow % 'got:'
        #print got
        for k, v, passed in tests:
            if passed == 1:
                c = green
            elif passed == 0:
                c = red
            else:
                c = yellow

            try:
                v = '%g' % v
            except TypeError:
                pass

            print '  %s: %s' % (k, c % (v,))
        print

        if alphabet is not None:
            self.show_largest_rel_errors()
Ejemplo n.º 4
0
    def __init__(self, want, have, name=None, data=None, P_LARGER=0.9,
                 regression=True, ax=None, alphabet=None,
                 want_label=None, have_label=None, verbose=1):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `want`, `have`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `want` and `have`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `want` is the y-axis, `have` is the x-axis. This is by
           convention that `want` is the dependent variable (regression target).

        TODO:

         - Add an option to drop NaNs and continue comparison.

         - Indicate which dimensions have the largest errors.

         - Add option to allow alignment/matchings?

        """

        self.name = name

        if isinstance(alphabet, Alphabet):
            alphabet = list(alphabet)

        if isinstance(want, dict) and isinstance(have, dict):
            alphabet = list(want.keys() | have.keys()) if alphabet is None else alphabet
            #assert set(have.keys()) == set(alphabet), \
            #    'Keys differ.\n  have keys  = %s\n  want keys = %s' % (set(have.keys()), set(alphabet))
            want = [want.get(k, 0) for k in alphabet]
            have = [have.get(k, 0) for k in alphabet]

        if isinstance(want, np.ndarray) and isinstance(have, np.ndarray):
            assert alphabet is None
            alphabet = Alphabet(dict(np.ndenumerate(want)).keys())

            assert want.shape == have.shape, [want.shape, have.shape]
            want = want.flatten()
            have = have.flatten()

        if data is not None:
#            assert isinstance(want, (int, str)), \
#                'expected a column name have %s' % type(want)
#            assert isinstance(have, (int, str)), \
#                'expected a column name have %s' % type(have)

            if want_label is None: want_label = want
            if have_label is None: have_label = have

            want = data[want]
            have = data[have]

        else:
            if want_label is None: want_label = 'want'
            if have_label is None: have_label = 'have'

            want = np.asarray(want)
            have = np.asarray(have)

            data = pd.DataFrame({want_label: want, have_label: have})

        assert want.shape == have.shape, [want.shape, have.shape]
        [n] = want.shape

        self.want = want
        self.have = have
        self.alphabet = alphabet
        self.ax = ax
        self.have_label = have_label
        self.want_label = want_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not np.isfinite(want).all():
            tests.append(['want finite', progress(np.isfinite(want).sum(), n), False])
        if not np.isfinite(have).all():
            tests.append(['have finite', progress(np.isfinite(have).sum(), n), False])

        ne = norm(want)
        ng = norm(have)
        ok = abs(ne-ng)/ne < 0.01 if ne != 0 else True

        if n > 1:
            tests.append(['norms', '[%g, %g]' % (ne, ng), ok])
            #F = zero_retrieval(want, have)
            #tests.append(['zero F1', F, F > 0.99])

        # Correlation statistics
        if n > 1:
            #self.cosine = cosine(want, have)
            #tests.append(['cosine', self.cosine, (self.cosine > 0.99999)])   # cosine similarities must be really high.
            self.pearson = 1.0 if ne == ng == 0 else pearsonr(want, have)[0]
            tests.append(['pearson', self.pearson, (self.pearson > 0.99999)])
            self.spearman = spearmanr(want, have)[0]
            tests.append(['spearman', self.spearman, (self.spearman > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(want, have)
        self.max_err = d
        tests.append(['ℓ∞', d, None])
        tests.append(['ℓ₂', np.linalg.norm(want - have), None])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = want
        y = have
        s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(['same-sign', f'{p:.2f}% ({s.sum()}/{len(s)})', p == 100.0])

        # relative error
        r = relative_difference(want, have)
        r = np.max(r[np.isfinite(r)])
        #tests.append(['max rel err', r, r <= 0.01])
        self.max_relative_error = r
        self.max_rel_err = r

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (want)', [want.min(), want.max()], 2])
        #tests.append(['range (have)   ', [have.min(), have.max()], 2])

        # regression and rescaled error only valid for n >= 2
#        if n >= 2:
#            es = abs(want).max()
#            gs = abs(have).max()
#            if es == 0:
#                es = 1
#            if gs == 0:
#                gs = 1
#            if 0:
#                # rescaled error
#                E = want / es
#                G = have / gs
#                R = abs(E - G)
#                r = np.mean(R)
#                tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((want-have) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['want is larger', progress(L, n), 0])
            L = ((have-want) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['have is larger', progress(L, n), 0])

        self.tests = tests
        if verbose:
            self.message()
Ejemplo n.º 5
0
for t,p,x,y in sorted(zip(y_true,y_hat,X,Y)):
    ux = A[x] not in seen
    uy = A[y] not in seen
    if ux or uy:
        unseen += 1
        continue       # filter-out unseen test examples
    if p != t:
        err += 1
#        print x, y, colors.red % L[p], colors.green % L[t], 'unseen(x)' if ux else '', 'unseen(y)' if uy else ''
#    else:
#        if ux or uy:
#            correct_unseen += 1
#        correct += 1


print 'p(err|seen) = %s' % progress(err, len(y_true))
print 'p(unseen)   = %s' % progress(unseen, len(y_true))

#print 'correct unseen = %s' % progress(correct_unseen, correct)


"""TODO

 - How transitive is the learned relation?

"""




"""
Ejemplo n.º 6
0
    def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9,
                 regression=True, ax=None, alphabet=None,
                 expect_label=None, got_label=None, verbose=1):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `expect`, `got`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `expect` and `got`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `expect` is the y-axis, `got` is the x-axis. This is by
           convention that `expect` is the dependent variable (regression target).

        TODO:

         - Add an option to drop NaNs and continue comparison.

         - Indicate which dimensions have the largest errors.

        """

        if isinstance(expect, dict) and isinstance(got, dict):
            alphabet = expect.keys() if alphabet is None else alphabet
            assert set(got.keys()) == set(alphabet), \
                'Keys differ.\n  got keys  = %s\n  want keys = %s' % (got.keys(), alphabet)
            expect = [expect[k] for k in alphabet]
            got = [got[k] for k in alphabet]

        if isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
            assert expect.shape == got.shape
            expect = expect.flatten()
            got = got.flatten()

        if data is not None:
            assert isinstance(expect, (int, basestring)), \
                'expected a column name got %s' % type(expect)
            assert isinstance(got, (int, basestring)), \
                'expected a column name got %s' % type(got)

            if expect_label is None:
                expect_label = expect
            if got_label is None:
                got_label = got

            expect = data[expect]
            got = data[got]

        else:
            if expect_label is None:
                expect_label = 'expect'
            if got_label is None:
                got_label = 'got'

            expect = np.asarray(expect)
            got = np.asarray(got)

            data = pd.DataFrame({expect_label: expect, got_label: got})

        assert expect.shape == got.shape
        [n] = expect.shape

        self.expect = expect
        self.got = got
        self.alphabet = alphabet
        self.ax = ax
        self.name = name
        self.got_label = got_label
        self.expect_label = expect_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not np.isfinite(expect).all():
            tests.append(['expect finite', progress(np.isfinite(expect).sum(), n), False])
        if not np.isfinite(got).all():
            tests.append(['got finite', progress(np.isfinite(got).sum(), n), False])

        ne = norm(expect)
        ng = norm(got)
        ok = abs(ne-ng)/ne < 0.01 if ne != 0 else True

        if n > 1:
            tests.append(['norms', '[%g, %g]' % (ne, ng), ok])
            F = zero_retrieval(expect, got)
            tests.append(['zero F1', F, F > 0.99])

        if n > 1:
            self.cosine = cosine(expect, got)
            tests.append(['cosine', self.cosine, (self.cosine > 0.99999)])   # cosine similarities must be really high.

            self.pearson = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0]
            tests.append(['pearson', self.pearson, (self.pearson > 0.99999)])

            self.spearman = spearmanr(expect, got)[0]
            tests.append(['spearman', self.spearman, (self.spearman > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(expect, got)
        self.max_err = d
        tests.append(['Linf', d, d < 1e-8])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = expect
        y = got
        s = np.asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0])

        # relative error
        r = relative_difference(expect, got)
        r = np.mean(r[np.isfinite(r)])
        tests.append(['mean relative error', r, r <= 0.01])
        self.mean_relative_error = r

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (expect)', [expect.min(), expect.max()], 2])
        #tests.append(['range (got)   ', [got.min(), got.max()], 2])

        # regression and rescaled error only valid for n >= 2
        if n >= 2:
            es = abs(expect).max()
            gs = abs(got).max()
            if es == 0:
                es = 1
            if gs == 0:
                gs = 1
            if 0:
                # rescaled error
                E = expect / es
                G = got / gs
                R = abs(E - G)
                r = np.mean(R)
                tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((expect-got) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['expect is larger', progress(L, n), 0])
            L = ((got-expect) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['got is larger', progress(L, n), 0])

        self.tests = tests
        if verbose:
            self.message()
Ejemplo n.º 7
0
for t, p, x, y in sorted(zip(y_true, y_hat, X, Y)):
    ux = A[x] not in seen
    uy = A[y] not in seen
    if ux or uy:
        unseen += 1
        continue  # filter-out unseen test examples
    if p != t:
        err += 1
#        print x, y, colors.red % L[p], colors.green % L[t], 'unseen(x)' if ux else '', 'unseen(y)' if uy else ''
#    else:
#        if ux or uy:
#            correct_unseen += 1
#        correct += 1

print 'p(err|seen) = %s' % progress(err, len(y_true))
print 'p(unseen)   = %s' % progress(unseen, len(y_true))

#print 'correct unseen = %s' % progress(correct_unseen, correct)
"""TODO

 - How transitive is the learned relation?

"""
"""
Confusion matrix
================

C[i,j] = "pred i, true j"
"""
Ejemplo n.º 8
0
    def __init__(self,
                 expect,
                 got,
                 name=None,
                 data=None,
                 P_LARGER=0.9,
                 regression=True,
                 ax=None,
                 alphabet=None,
                 expect_label=None,
                 got_label=None):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `expect`, `got`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `expect` and `got`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `expect` is the y-axis, `got` is the x-axis. This is by
           convention that `expect` is the dependent variable (regression target).

        TODO:

         - Allow user to specify alternative names to `expect` and `got` since they
           are often confusing.

         - Add an option to drop NaNs and continue comparison.

         - Support dictionarys as input with named dimensions and/or possibly an
           alphabet for naming dimensions.

         - Indicate which dimensions have the largest errors.

        """

        if data is not None:
            assert isinstance(expect, (int, basestring)), \
                'expected a column name got %s' % type(expect)
            assert isinstance(got, (int, basestring)), \
                'expected a column name got %s' % type(got)

            if expect_label is None:
                expect_label = expect
            if got_label is None:
                got_label = got

            expect = data[expect]
            got = data[got]

        else:
            if expect_label is None:
                expect_label = 'expect'
            if got_label is None:
                got_label = 'got'

            expect = asarray(expect)
            got = asarray(got)

            data = DataFrame({expect_label: expect, got_label: got})

        assert expect.shape == got.shape
        [n] = expect.shape

        self.expect = expect
        self.got = got
        self.alphabet = alphabet
        self.ax = ax
        self.name = name
        self.got_label = got_label
        self.expect_label = expect_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not isfinite(expect).all():
            tests.append(
                ['expect finite',
                 progress(isfinite(expect).sum(), n), False])
        if not isfinite(got).all():
            tests.append(
                ['got finite',
                 progress(isfinite(got).sum(), n), False])

        ne = norm(expect)
        ng = norm(got)
        ok = abs(ne - ng) / ne < 0.01
        tests.append(['norms', '[%g, %g]' % (ne, ng), ok])

        # TODO: what do we want to say about sparsity?
        #tests.append(['zeros', '%s %s' % (progress((expect==0).sum(), n),
        #                                  progress((got==0).sum(), n)),
        #              -1])
        F = zero_retrieval(expect, got)
        tests.append(['zero F1', F, F > 0.99])

        c = cosine(expect, got)
        self.cosine = c
        tests.append(['cosine-sim', c, (c > 0.99999)
                      ])  # cosine similarities must be really high.

        self.pearsonr = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0]
        tests.append(['pearson', self.pearsonr, (self.pearsonr > 0.99999)])

        p = spearmanr(expect, got)[0]
        tests.append(['spearman', p, (p > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(expect, got)
        self.max_err = d
        tests.append(['Linf', d, d < 1e-8])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = expect
        y = got
        s = asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(
            ['same-sign',
             '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0])

        # relative error
        r = relative_difference(expect, got)
        r = mean(r[isfinite(r)])
        tests.append(['mean relative error', r, r <= 0.01])

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (expect)', [expect.min(), expect.max()], 2])
        #tests.append(['range (got)   ', [got.min(), got.max()], 2])

        # regression and rescaled error only valid for n >= 2
        if n >= 2:
            es = abs(expect).max()
            gs = abs(got).max()
            if es == 0:
                es = 1
            if gs == 0:
                gs = 1
            # rescaled error
            E = expect / es
            G = got / gs
            R = abs(E - G)
            r = mean(R)
            tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((expect - got) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['expect is larger', progress(L, n), 0])
            L = ((got - expect) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['got is larger', progress(L, n), 0])

        print
        print 'Comparison%s:' % (' (%s)' % name if name else ''), 'n=%s' % n
        #print yellow % 'expected:'
        #print expect
        #print yellow % 'got:'
        #print got
        for k, v, passed in tests:
            if passed == 1:
                c = green
            elif passed == 0:
                c = red
            else:
                c = yellow

            try:
                v = '%g' % v
            except TypeError:
                pass

            print '  %s: %s' % (k, c % (v, ))
        print

        if alphabet is not None:
            self.show_largest_rel_errors()
Ejemplo n.º 9
0
    def __init__(self, expect, got, name=None, data=None, P_LARGER=0.9,
                 regression=True, ax=None, alphabet=None, expect_label=None, got_label=None):
        """Compare vectors.

        Arguments:

          - Specifying data for comparison two methods:

            1) `expect`, `got`: two numeric one-dimensional arrays which we'd like
               to compare (the argument names come for software testing). This
               method requires argument `data=None`.

            2) `data`: instance of `DataFrame`, expects arguments `expect` and `got`
               to be column labels.

          - `name`: name of this comparison.

        Note:

         - when plotting `expect` is the y-axis, `got` is the x-axis. This is by
           convention that `expect` is the dependent variable (regression target).

        TODO:

         - Allow user to specify alternative names to `expect` and `got` since they
           are often confusing.

         - Add an option to drop NaNs and continue comparison.

         - Support dictionarys as input with named dimensions and/or possibly an
           alphabet for naming dimensions.

         - Indicate which dimensions have the largest errors.

        """

        if data is not None:
            assert isinstance(expect, (int, basestring)), \
                'expected a column name got %s' % type(expect)
            assert isinstance(got, (int, basestring)), \
                'expected a column name got %s' % type(got)

            if expect_label is None:
                expect_label = expect
            if got_label is None:
                got_label = got

            expect = data[expect]
            got = data[got]

        else:
            if expect_label is None:
                expect_label = 'expect'
            if got_label is None:
                got_label = 'got'

            expect = asarray(expect)
            got = asarray(got)

            data = DataFrame({expect_label: expect, got_label: got})

        assert expect.shape == got.shape
        [n] = expect.shape

        self.expect = expect
        self.got = got
        self.alphabet = alphabet
        self.ax = ax
        self.name = name
        self.got_label = got_label
        self.expect_label = expect_label
        self.n = n
        self.coeff = None

        self.tests = tests = []

        # Check that vectors are finite.
        if not isfinite(expect).all():
            tests.append(['expect finite', progress(isfinite(expect).sum(), n), False])
        if not isfinite(got).all():
            tests.append(['got finite', progress(isfinite(got).sum(), n), False])

        ne = norm(expect)
        ng = norm(got)
        ok = abs(ne-ng)/ne < 0.01
        tests.append(['norms', '[%g, %g]' % (ne, ng), ok])

        # TODO: what do we want to say about sparsity?
        #tests.append(['zeros', '%s %s' % (progress((expect==0).sum(), n),
        #                                  progress((got==0).sum(), n)),
        #              -1])
        F = zero_retrieval(expect, got)
        tests.append(['zero F1', F, F > 0.99])

        c = cosine(expect, got)
        self.cosine = c
        tests.append(['cosine-sim', c, (c > 0.99999)])   # cosine similarities must be really high.

        self.pearsonr = 1.0 if ne == ng == 0 else pearsonr(expect, got)[0]
        tests.append(['pearson', self.pearsonr, (self.pearsonr > 0.99999)])

        p = spearmanr(expect, got)[0]
        tests.append(['spearman', p, (p > 0.99999)])

        # TODO: this check should probably take into account the scale of the data.
        d = linf(expect, got)
        self.max_err = d
        tests.append(['Linf', d, d < 1e-8])

        # same sign check (weak agreement, but useful sanity check -- especially
        # for gradients)
        x = expect
        y = got
        s = asarray(~((x >= 0) ^ (y >= 0)), dtype=int)
        p = s.sum() * 100.0 / len(s)
        tests.append(['same-sign', '%s%% (%s/%s)' % (p, s.sum(), len(s)), p == 100.0])

        # relative error
        r = relative_difference(expect, got)
        r = mean(r[isfinite(r)])
        tests.append(['mean relative error', r, r <= 0.01])

        # TODO: suggest that if relative error is high and rescaled error is low (or
        # something to do wtih regression residuals) that maybe there is a
        # (hopefully) simple fix via scale/offset.

        # TODO: can provide descriptive statistics for each vector
        #tests.append(['range (expect)', [expect.min(), expect.max()], 2])
        #tests.append(['range (got)   ', [got.min(), got.max()], 2])

        # regression and rescaled error only valid for n >= 2
        if n >= 2:
            es = abs(expect).max()
            gs = abs(got).max()
            if es == 0:
                es = 1
            if gs == 0:
                gs = 1
            # rescaled error
            E = expect / es
            G = got / gs
            R = abs(E - G)
            r = mean(R)
            tests.append(['mean rescaled error', r, r <= 1e-5])

        if regression:
            self.regression()

        if n >= 2:
            # These tests check if one of the datasets is consistently larger than the
            # other. The threshold for error is based on `P_LARGER` ("percent larger").
            L = ((expect-got) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['expect is larger', progress(L, n), 0])
            L = ((got-expect) > 0).sum()
            if L >= P_LARGER * n:
                tests.append(['got is larger', progress(L, n), 0])

        print
        print 'Comparison%s:' % (' (%s)' % name if name else ''), 'n=%s' % n
        #print yellow % 'expected:'
        #print expect
        #print yellow % 'got:'
        #print got
        for k, v, passed in tests:
            if passed == 1:
                c = green
            elif passed == 0:
                c = red
            else:
                c = yellow

            try:
                v = '%g' % v
            except TypeError:
                pass

            print '  %s: %s' % (k, c % (v,))
        print

        if alphabet is not None:
            self.show_largest_rel_errors()