Ejemplo n.º 1
0
def test_standard_scaler_add_remove_columns():
    """Checks that no exceptions are raised whenever columns are dropped and/or added."""

    X = pd.read_csv(datasets.TrumpApproval().path)

    ss = preprocessing.StandardScaler()
    for xb in np.array_split(X, 10):
        # Pick half of the columns at random
        cols = np.random.choice(X.columns, len(X.columns) // 2, replace=False)
        ss.fit_many(xb[cols])
Ejemplo n.º 2
0
def test_add_remove_columns():
    """Checks that no exceptions are raised whenever columns are dropped and/or added."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    lin_reg = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, 10), np.array_split(Y, 10)):
        # Pick half of the columns at random
        cols = np.random.choice(X.columns, len(X.columns) // 2, replace=False)
        lin_reg.fit_many(xb[cols], yb)
Ejemplo n.º 3
0
def test_one_many_consistent():
    """Checks that using fit_one or fit_many produces the same result."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    one = lm.LinearRegression()
    for x, y in stream.iter_pandas(X, Y):
        one.fit_one(x, y)

    many = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, len(X)), np.array_split(Y, len(Y))):
        many.fit_many(xb, yb)

    for i in X:
        assert math.isclose(one.weights[i], many.weights[i])
Ejemplo n.º 4
0
def test_standard_scaler_one_many_consistent():
    """Checks that using fit_one or fit_many produces the same result."""

    X = pd.read_csv(datasets.TrumpApproval().path)

    one = preprocessing.StandardScaler()
    for x, _ in stream.iter_pandas(X):
        one.fit_one(x)

    many = preprocessing.StandardScaler()
    for xb in np.array_split(X, 10):
        many.fit_many(xb)

    for i in X:
        assert math.isclose(one.counts[i], many.counts[i])
        assert math.isclose(one.means[i], many.means[i])
        assert math.isclose(one.vars[i], many.vars[i])
Ejemplo n.º 5
0
def test_shuffle_columns():
    """Checks that fit_many works identically whether columns are shuffled or not."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    normal = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, 10), np.array_split(Y, 10)):
        normal.fit_many(xb, yb)

    shuffled = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, 10), np.array_split(Y, 10)):
        cols = np.random.permutation(X.columns)
        shuffled.fit_many(xb[cols], yb)

    for i in X:
        assert math.isclose(normal.weights[i], shuffled.weights[i])
Ejemplo n.º 6
0
def test_standard_scaler_shuffle_columns():
    """Checks that fit_many works identically whether columns are shuffled or not."""

    X = pd.read_csv(datasets.TrumpApproval().path)

    normal = preprocessing.StandardScaler()
    for xb in np.array_split(X, 10):
        normal.fit_many(xb)

    shuffled = preprocessing.StandardScaler()
    for xb in np.array_split(X, 10):
        cols = np.random.permutation(X.columns)
        shuffled.fit_many(xb[cols])

    for i in X:
        assert math.isclose(shuffled.counts[i], shuffled.counts[i])
        assert math.isclose(shuffled.means[i], shuffled.means[i])
        assert math.isclose(shuffled.vars[i], shuffled.vars[i])
Ejemplo n.º 7
0
def yield_datasets(model):

    from creme import base
    from creme import datasets
    from creme import stream
    from sklearn import datasets as sk_datasets

    model = guess_model(model)

    if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)):
        yield datasets.Phishing()
    if isinstance(model, base.MultiClassifier):
        yield datasets.ImageSegments().take(500)
    if isinstance(model, base.Regressor):
        yield datasets.TrumpApproval()
    if isinstance(model, base.MultiOutputRegressor):
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())
    if isinstance(model, base.MultiOutputClassifier):
        yield datasets.Music()
Ejemplo n.º 8
0
def yield_datasets(model):

    from creme import base
    from creme import compose
    from creme import datasets
    from creme import preprocessing
    from creme import stream
    from sklearn import datasets as sk_datasets

    model = guess_model(model)

    # Classification
    if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)):
        yield datasets.Phishing()

    # Multi-class classification
    if isinstance(model, base.MultiClassifier):
        yield datasets.ImageSegments().take(500)

    # Regression
    if isinstance(model, base.Regressor):
        yield datasets.TrumpApproval()

    # Multi-output regression
    if isinstance(model, base.MultiOutputRegressor):

        # 1
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())

        # 2
        class SolarFlare:
            """One-hot encoded version of `datasets.SolarFlare`."""
            def __iter__(self):
                oh = (compose.SelectType(str) |
                      preprocessing.OneHotEncoder()) + compose.SelectType(int)
                for x, y in datasets.SolarFlare():
                    yield oh.transform_one(x), y

        yield SolarFlare()

    # Multi-output classification
    if isinstance(model, base.MultiOutputClassifier):
        yield datasets.Music()
Ejemplo n.º 9
0
def yield_datasets(model):

    from creme import base
    from creme import datasets
    from creme import stream
    from sklearn import datasets as sk_datasets

    model = guess_model(model)

    if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)):
        yield datasets.Phishing()
    if isinstance(model, base.MultiClassifier):
        yield datasets.ImageSegments().take(500)
    if isinstance(model, base.Regressor):
        yield datasets.TrumpApproval()
    if isinstance(model, base.MultiOutputRegressor):
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())
    if isinstance(model, base.MultiOutputClassifier):
        yeast = stream.iter_sklearn_dataset(sk_datasets.fetch_openml('yeast', version=4))
        yield itertools.islice(yeast, 100)
Ejemplo n.º 10
0
def test_lin_reg_sklearn_coherence():
    """Checks that the sklearn and creme implementations produce the same results."""
    class SquaredLoss:
        """sklearn removes the leading 2 from the gradient of the squared loss."""
        def gradient(self, y_true, y_pred):
            return y_pred - y_true

    ss = preprocessing.StandardScaler()
    cr = lm.LinearRegression(optimizer=optim.SGD(.01), loss=SquaredLoss())
    sk = sklm.SGDRegressor(learning_rate='constant', eta0=.01, alpha=.0)

    for x, y in datasets.TrumpApproval():
        x = ss.fit_one(x).transform_one(x)
        cr.fit_one(x, y)
        sk.partial_fit([list(x.values())], [y])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
Ejemplo n.º 11
0
def yield_datasets(model):

    from creme import compose
    from creme import datasets
    from creme import preprocessing
    from creme import stream
    from creme import utils
    from sklearn import datasets as sk_datasets

    # Classification
    if utils.inspect.isclassifier(model):
        yield datasets.Phishing()

        # Multi-class classification
        if model._multiclass:
            yield datasets.ImageSegments().take(500)

    # Regression
    if utils.inspect.isregressor(model):
        yield datasets.TrumpApproval()

    # Multi-output regression
    if utils.inspect.ismoregressor(model):

        # 1
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())

        # 2
        class SolarFlare:
            """One-hot encoded version of `datasets.SolarFlare`."""
            def __iter__(self):
                oh = (compose.SelectType(str) | preprocessing.OneHotEncoder()) + compose.SelectType(int)
                for x, y in datasets.SolarFlare():
                    yield oh.transform_one(x), y
        yield SolarFlare()

    # Multi-output classification
    if utils.inspect.ismoclassifier(model):
        yield datasets.Music()
Ejemplo n.º 12
0
        norm = utils.math.norm(p, order=2)
        for j in p:
            p[j] /= norm
        yield p


@pytest.mark.parametrize(
    'lm, dataset',
    [
        pytest.param(
            lm(optimizer=copy.deepcopy(optimizer), initializer=initializer, l2=0),
            dataset,
            id=f'{lm.__name__} - {optimizer} - {initializer}'
        )
        for lm, dataset in [
            (lm.LinearRegression, datasets.TrumpApproval()),
            (lm.LogisticRegression, datasets.Bananas())
        ]
        for optimizer, initializer in itertools.product(
            [
                optim.AdaBound(),
                optim.AdaDelta(),
                optim.AdaGrad(),
                optim.AdaMax(),
                optim.Adam(),
                optim.AMSGrad(),
                # TODO: check momentum optimizers
                # optim.Momentum(),
                # optim.NesterovMomentum(),
                optim.RMSProp(),
                optim.SGD()
Ejemplo n.º 13
0
            logger.debug(f"Overall model accuracy: {mod_acc} \n\n")
        if callback is not None:
            history.append(callback(res))
        step_count += 1

    return step_count >= min_steps, history


def stock_data():
    BASE_DIR = Path(
        __file__).resolve().parent.parent.cwd() / 'data' / 'stock_data.csv'
    BASE_DIR_STR = str(BASE_DIR)
    return pd.read_csv(BASE_DIR_STR)


X_y = datasets.TrumpApproval()


def main():
    df = stock_data()
    df = ta.utils.dropna(df)
    df = format_timeseries_dataframe(df, "Timestamp")
    df = format_look_ahead(df, "Close", size=-4)
    df.dropna()
    df['log_returns'] = 0
    df['log_returns'] = np.where(df["Close_future"] > df["Close"], 1, 1)
    df['log_returns'] = np.where(df["Close_future"] < df["Close"], -1,
                                 df['log_returns'])
    df = fibonacci(df)
    df = fibonacci_rsi(df)
    # df = super_hyper_mega_average_true_range(df)
Ejemplo n.º 14
0
from creme import datasets
from creme import linear_model
from creme import preprocessing
import dill
import requests

if __name__ == '__main__':

    host = 'http://localhost:5000'

    # Set a flavor
    r = requests.post(host + '/api/init', json={'flavor': 'regression'})
    assert r.status_code == 201

    # Upload a model
    model = preprocessing.StandardScaler() | linear_model.LinearRegression()
    r = requests.post(host + '/api/model', data=dill.dumps(model))
    assert r.status_code == 201

    # Train on some data
    for x, y in datasets.TrumpApproval().take(30):
        r = requests.post(host + '/api/learn',
                          json={
                              'features': x,
                              'ground_truth': y
                          })
        assert r.status_code == 201