def test_regressor():
    """
    Sanity check. MindsDB point predictions should be within range
    of predicted bounds by the inductive conformal predictor.
    """
    def _df_from_x(x, columns=None):
        x = pd.DataFrame(x)
        if columns is None:
            x.columns = 'c' + pd.Series([i for i in range(len(x.columns))
                                         ]).astype(str)
        else:
            x.columns = columns
        return x

    def _df_from_xy(x, y, target):
        x = _df_from_x(x)
        x[target] = pd.DataFrame(y)
        return x

    X, y = load_boston(return_X_y=True)
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=5)
    target = 'medv'

    x_tr = _df_from_xy(X_train, Y_train, target)
    p = Predictor("ConformalTest")
    p.learn(from_data=x_tr, to_predict=target)

    x_te = _df_from_xy(X_test, Y_test, target)
    r = p.predict(when_data=x_te)
    r = [x.explanation[target] for x in r]

    for x in r:
        assert x['confidence_interval'][0] <= x['predicted_value'] <= x[
            'confidence_interval'][1]
Example #2
0
import pandas as pd
from mindsdb_native import Predictor

mdb = Predictor(name='description_predictor')

mdb.learn(from_data=pd.read_csv('processed_data/train.csv'),
          to_predict='description')

predictions = mdb.predict('processed_data/test.csv')

for pred in predictions:
    print(pred['description'])
 def train(self, train_df):
     mdb = Predictor(name=self.name)
     mdb.learn(to_predict=self.target_column,
               from_data=train_df,
               backend='lightwood')
     return mdb
Example #4
0
from mindsdb_native import Predictor

mdb = Predictor(name='coffee_predictor')
mdb.learn(from_data='data.tsv',
          to_predict=[
              'Coffe_Malt', 'Chocolat', 'Gold', 'Medium_Barley', 'Dark_Barley',
              'Dandelion', 'Beets', 'Chicory_Roots', 'Figs'
          ])
Example #5
0
    def test_database_history(self):
        return
        from mindsdb_datasources import ClickhouseDS

        TEMP_DB = 'test_database_history_' + random_string()
        TEMP_TABLE = 'tmp_test_database_history_' + random_string()

        params = {'user': self.USER, 'password': self.PASSWORD}

        clickhouse_url = f'http://{self.HOST}:{self.PORT}'

        values = []
        for i in range(200):
            values.append([str(i % 4), i, i * 2])

        queries = [
            f'CREATE DATABASE IF NOT EXISTS {TEMP_DB}',
            f'DROP TABLE IF EXISTS {TEMP_DB}.{TEMP_TABLE}',
            f'''
                CREATE TABLE {TEMP_DB}.{TEMP_TABLE}(
                    col1 String
                    ,col2 Int64
                    ,col3 Int64
                ) ENGINE = MergeTree()
                    ORDER BY col2
                    PARTITION BY col1
            ''',
        ]
        gc.collect()

        for value in values:
            value_ins_str = str(value).replace('[','').replace(']','')
            queries.append(f"INSERT INTO {TEMP_DB}.{TEMP_TABLE} VALUES ({value_ins_str})")

        for q in queries:
            r = requests.post(clickhouse_url, data=q, params=params)
            assert r.status_code == 200, r.text

        clickhouse_ds = ClickhouseDS(
            f'SELECT * FROM {TEMP_DB}.{TEMP_TABLE}',
            host=self.HOST,
            port=self.PORT,
            user=self.USER,
            password=self.PASSWORD
        )

        temp_predictor = Predictor(name='query_history_based_ts_predictor')
        temp_predictor.learn(
            to_predict='col3',
            from_data=clickhouse_ds,
            stop_training_in_x_seconds=5,
            timeseries_settings={
                'order_by': ['col2']
                ,'window': 6
                ,'group_by': ['col1']
            }
        )
        del temp_predictor

        ts_predictor = Predictor(name='query_history_based_ts_predictor')
        predictions = ts_predictor.predict(
            when_data={
                'col2': 800
                ,'col1': '2'
            },
            advanced_args={
                'use_database_history': True
            }
        )

        assert predictions[0]['col3'] is not None

        r = requests.post(
            clickhouse_url,
            data=f'DROP DATABASE {TEMP_DB}',
            params=params
        )
        assert r.status_code == 200, 'failed to drop temporary database "{}"'.format(TEMP_DB)