Ejemplo n.º 1
0
class NativeDataFrame(BasePredictor):
    def __init__(self, dataset):
        super().__init__(dataset)
        self.df = pd.read_csv(f"{self.dataset.name}_test.csv")
        self.predictor = Predictor(name=self.dataset.name)

    def predict(self, row_number=1):
        return self.predictor.predict(self.df[:row_number])
Ejemplo n.º 2
0
class NativeClickhouse(BasePredictor):
    host = CONFIG['database']['host']
    port = CONFIG['database']['port']
    user = CONFIG['database']['user']
    password = CONFIG['database']['password']

    def __init__(self, dataset):
        super().__init__(dataset)
        self.predictor = Predictor(name=self.dataset.name)
        self.query_template = f"SELECT * FROM test_data.{self.dataset.name} LIMIT %s"

    def predict(self, row_number=1):
        _query = self.query_template % row_number
        return self.predictor.predict(when_data=ClickhouseDS(
            _query, host=self.host, user=self.user, password=self.password))
Ejemplo n.º 3
0
def test_regressor():
    """
    Sanity check. MindsDB point predictions should be within range
    of predicted bounds by the inductive conformal predictor.
    """
    def _df_from_x(x, columns=None):
        x = pd.DataFrame(x)
        if columns is None:
            x.columns = 'c' + pd.Series([i for i in range(len(x.columns))
                                         ]).astype(str)
        else:
            x.columns = columns
        return x

    def _df_from_xy(x, y, target):
        x = _df_from_x(x)
        x[target] = pd.DataFrame(y)
        return x

    X, y = load_boston(return_X_y=True)
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=5)
    target = 'medv'

    x_tr = _df_from_xy(X_train, Y_train, target)
    p = Predictor("ConformalTest")
    p.learn(from_data=x_tr, to_predict=target)

    x_te = _df_from_xy(X_test, Y_test, target)
    r = p.predict(when_data=x_te)
    r = [x.explanation[target] for x in r]

    for x in r:
        assert x['confidence_interval'][0] <= x['predicted_value'] <= x[
            'confidence_interval'][1]
Ejemplo n.º 4
0
import pandas as pd
from mindsdb_native import Predictor

mdb = Predictor(name='description_predictor')

mdb.learn(from_data=pd.read_csv('processed_data/train.csv'),
          to_predict='description')

predictions = mdb.predict('processed_data/test.csv')

for pred in predictions:
    print(pred['description'])
Ejemplo n.º 5
0
    def test_database_history(self):
        return
        from mindsdb_datasources import ClickhouseDS

        TEMP_DB = 'test_database_history_' + random_string()
        TEMP_TABLE = 'tmp_test_database_history_' + random_string()

        params = {'user': self.USER, 'password': self.PASSWORD}

        clickhouse_url = f'http://{self.HOST}:{self.PORT}'

        values = []
        for i in range(200):
            values.append([str(i % 4), i, i * 2])

        queries = [
            f'CREATE DATABASE IF NOT EXISTS {TEMP_DB}',
            f'DROP TABLE IF EXISTS {TEMP_DB}.{TEMP_TABLE}',
            f'''
                CREATE TABLE {TEMP_DB}.{TEMP_TABLE}(
                    col1 String
                    ,col2 Int64
                    ,col3 Int64
                ) ENGINE = MergeTree()
                    ORDER BY col2
                    PARTITION BY col1
            ''',
        ]
        gc.collect()

        for value in values:
            value_ins_str = str(value).replace('[','').replace(']','')
            queries.append(f"INSERT INTO {TEMP_DB}.{TEMP_TABLE} VALUES ({value_ins_str})")

        for q in queries:
            r = requests.post(clickhouse_url, data=q, params=params)
            assert r.status_code == 200, r.text

        clickhouse_ds = ClickhouseDS(
            f'SELECT * FROM {TEMP_DB}.{TEMP_TABLE}',
            host=self.HOST,
            port=self.PORT,
            user=self.USER,
            password=self.PASSWORD
        )

        temp_predictor = Predictor(name='query_history_based_ts_predictor')
        temp_predictor.learn(
            to_predict='col3',
            from_data=clickhouse_ds,
            stop_training_in_x_seconds=5,
            timeseries_settings={
                'order_by': ['col2']
                ,'window': 6
                ,'group_by': ['col1']
            }
        )
        del temp_predictor

        ts_predictor = Predictor(name='query_history_based_ts_predictor')
        predictions = ts_predictor.predict(
            when_data={
                'col2': 800
                ,'col1': '2'
            },
            advanced_args={
                'use_database_history': True
            }
        )

        assert predictions[0]['col3'] is not None

        r = requests.post(
            clickhouse_url,
            data=f'DROP DATABASE {TEMP_DB}',
            params=params
        )
        assert r.status_code == 200, 'failed to drop temporary database "{}"'.format(TEMP_DB)