def test_limits():
    p = (ggplot(df, aes('x')) +
         stat_function(fun=np.cos, size=2,
                       color='blue', arrow=arrow(ends='first')) +
         stat_function(fun=np.cos, xlim=(10, 20), size=2,
                       color='red', arrow=arrow(ends='last')))
    assert p == 'limits'
def test_limits():
    p = (ggplot(df, aes('x')) +
         stat_function(fun=np.cos, size=2,
                       color='blue', arrow=arrow(ends='first')) +
         stat_function(fun=np.cos, xlim=(10, 20), size=2,
                       color='red', arrow=arrow(ends='last')))
    assert p == 'limits'
def test_exceptions():
    # no x limits
    with pytest.raises(PlotnineError):
        p = ggplot(df)
        print(p + stat_function(fun=np.sin))

    # fun not callable
    with pytest.raises(PlotnineError):
        p = ggplot(df, aes('x'))
        print(p + stat_function(fun=1))
def test_exceptions():
    # no x limits
    with pytest.raises(PlotnineError):
        p = ggplot(df)
        print(p + stat_function(fun=np.sin))

    # fun not callable
    with pytest.raises(PlotnineError):
        p = ggplot(df, aes('x'))
        print(p + stat_function(fun=1))
def test_args():
    def fun(x, f=lambda x: x, mul=1, add=0):
        return f(x)*mul + add

    # no args, single arg, tuple of args, dict of args
    p = (ggplot(df, aes('x')) +
         stat_function(fun=fun, size=2, color='blue') +
         stat_function(fun=fun, size=2, color='red', args=np.cos) +
         stat_function(fun=fun, size=2, color='green',
                       args=(np.cos, 2, 1)) +
         stat_function(fun=fun, size=2, color='purple',
                       args=dict(f=np.cos, mul=3, add=2)))

    assert p == 'args'
def test_args():
    def fun(x, f=lambda x: x, mul=1, add=0):
        return f(x)*mul + add

    # no args, single arg, tuple of args, dict of args
    p = (ggplot(df, aes('x')) +
         stat_function(fun=fun, size=2, color='blue') +
         stat_function(fun=fun, size=2, color='red', args=np.cos) +
         stat_function(fun=fun, size=2, color='green',
                       args=(np.cos, 2, 1)) +
         stat_function(fun=fun, size=2, color='purple',
                       args=dict(f=np.cos, mul=3, add=2)))

    assert p == 'args'
Exemple #7
0
    def fit_curve(self):
        df = load_protobowl()
        # convert prompt to false
        df.result = df.result.apply(lambda x: x is True)

        xy = list(zip(df.relative_position.tolist(), df.result.tolist()))
        xy = sorted(xy, key=lambda x: x[0])
        ratios = dict()
        cnt = 0
        for x, y in xy:
            x = int(x * 1000)
            ratios[x] = cnt
            cnt += y
        ratios = sorted(ratios.items(), key=lambda x: x[0])
        ratios = [(x / 1000, y) for x, y in ratios]

        ttl_correct = df.result.tolist().count(True)
        ttl_correct = len(xy)
        curve = [(x, 1 - y / ttl_correct) for x, y in ratios]
        X, y = list(map(list, zip(*curve)))

        X = np.asarray(X)
        y = np.asarray(y)
        degree = 3
        polynomial_features = PolynomialFeatures(degree=degree,
                                                 include_bias=False)
        linear_regression = LinearRegression()
        pipeline = Pipeline([
            ("polynomial_features", polynomial_features),
            ("linear_regression", linear_regression),
        ])
        pipeline.fit(X[:, np.newaxis], y)
        print(pipeline.steps[1][1].coef_)

        def get_weight(x):
            return pipeline.predict(np.asarray([[x]]))[0]

        ddf = pd.DataFrame({"x": X, "y": y})
        p0 = (ggplot(ddf, aes(x="x", y="y")) +
              geom_point(size=0.3, color="blue", alpha=0.5, shape="+") +
              stat_function(fun=get_weight, color="red", size=2, alpha=0.5) +
              labs(x="Position", y="Weight"))
        p0.save("output/reporting/curve_score.pdf")
        p0.draw()

        return pipeline
Exemple #8
0
    def fit_curve(self):
        df, questions = load_protobowl()
        # convert prompt to false
        df.result = df.result.apply(lambda x: x is True)

        xy = list(zip(df.relative_position.tolist(), df.result.tolist()))
        xy = sorted(xy, key=lambda x: x[0])
        ratios = dict()
        cnt = 0
        for x, y in xy:
            x = int(x*1000)
            ratios[x] = cnt
            cnt += y
        ratios = sorted(ratios.items(), key=lambda x: x[0])
        ratios = [(x / 1000, y) for x, y in ratios]

        ttl_correct = df.result.tolist().count(True)
        ttl_correct = len(xy)
        curve = [(x, 1 - y / ttl_correct) for x, y in ratios]
        X, y = list(map(list, zip(*curve)))

        X = np.asarray(X)
        y = np.asarray(y)
        degree = 3
        polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
        linear_regression = LinearRegression()
        pipeline = Pipeline([("polynomial_features", polynomial_features),
                             ("linear_regression", linear_regression)])
        pipeline.fit(X[:, np.newaxis], y)
        print(pipeline.steps[1][1].coef_)

        def get_weight(x):
            return pipeline.predict(np.asarray([[x]]))[0]

        ddf = pd.DataFrame({'x': X, 'y': y})
        p0 = ggplot(ddf, aes(x='x', y='y')) \
            + geom_point(size=0.3, color='blue', alpha=0.5, shape='+') \
            + stat_function(fun=get_weight, color='red', size=2, alpha=0.5) \
            + labs(x='Position', y='Weight')
        p0.save('output/reporting/curve_score.pdf')
        p0.draw()

        return pipeline