def test_regressor(): """ Sanity check. MindsDB point predictions should be within range of predicted bounds by the inductive conformal predictor. """ def _df_from_x(x, columns=None): x = pd.DataFrame(x) if columns is None: x.columns = 'c' + pd.Series([i for i in range(len(x.columns)) ]).astype(str) else: x.columns = columns return x def _df_from_xy(x, y, target): x = _df_from_x(x) x[target] = pd.DataFrame(y) return x X, y = load_boston(return_X_y=True) X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.1, random_state=5) target = 'medv' x_tr = _df_from_xy(X_train, Y_train, target) p = Predictor("ConformalTest") p.learn(from_data=x_tr, to_predict=target) x_te = _df_from_xy(X_test, Y_test, target) r = p.predict(when_data=x_te) r = [x.explanation[target] for x in r] for x in r: assert x['confidence_interval'][0] <= x['predicted_value'] <= x[ 'confidence_interval'][1]
import pandas as pd from mindsdb_native import Predictor mdb = Predictor(name='description_predictor') mdb.learn(from_data=pd.read_csv('processed_data/train.csv'), to_predict='description') predictions = mdb.predict('processed_data/test.csv') for pred in predictions: print(pred['description'])
def train(self, train_df): mdb = Predictor(name=self.name) mdb.learn(to_predict=self.target_column, from_data=train_df, backend='lightwood') return mdb
from mindsdb_native import Predictor mdb = Predictor(name='coffee_predictor') mdb.learn(from_data='data.tsv', to_predict=[ 'Coffe_Malt', 'Chocolat', 'Gold', 'Medium_Barley', 'Dark_Barley', 'Dandelion', 'Beets', 'Chicory_Roots', 'Figs' ])
def test_database_history(self): return from mindsdb_datasources import ClickhouseDS TEMP_DB = 'test_database_history_' + random_string() TEMP_TABLE = 'tmp_test_database_history_' + random_string() params = {'user': self.USER, 'password': self.PASSWORD} clickhouse_url = f'http://{self.HOST}:{self.PORT}' values = [] for i in range(200): values.append([str(i % 4), i, i * 2]) queries = [ f'CREATE DATABASE IF NOT EXISTS {TEMP_DB}', f'DROP TABLE IF EXISTS {TEMP_DB}.{TEMP_TABLE}', f''' CREATE TABLE {TEMP_DB}.{TEMP_TABLE}( col1 String ,col2 Int64 ,col3 Int64 ) ENGINE = MergeTree() ORDER BY col2 PARTITION BY col1 ''', ] gc.collect() for value in values: value_ins_str = str(value).replace('[','').replace(']','') queries.append(f"INSERT INTO {TEMP_DB}.{TEMP_TABLE} VALUES ({value_ins_str})") for q in queries: r = requests.post(clickhouse_url, data=q, params=params) assert r.status_code == 200, r.text clickhouse_ds = ClickhouseDS( f'SELECT * FROM {TEMP_DB}.{TEMP_TABLE}', host=self.HOST, port=self.PORT, user=self.USER, password=self.PASSWORD ) temp_predictor = Predictor(name='query_history_based_ts_predictor') temp_predictor.learn( to_predict='col3', from_data=clickhouse_ds, stop_training_in_x_seconds=5, timeseries_settings={ 'order_by': ['col2'] ,'window': 6 ,'group_by': ['col1'] } ) del temp_predictor ts_predictor = Predictor(name='query_history_based_ts_predictor') predictions = ts_predictor.predict( when_data={ 'col2': 800 ,'col1': '2' }, advanced_args={ 'use_database_history': True } ) assert predictions[0]['col3'] is not None r = requests.post( clickhouse_url, data=f'DROP DATABASE {TEMP_DB}', params=params ) assert r.status_code == 200, 'failed to drop temporary database "{}"'.format(TEMP_DB)