def basic_test(backend='ludwig', use_gpu=True, ignore_columns=[], run_extra=False): if run_extra: for py_file in [ x for x in os.listdir('../functional_testing') if '.py' in x ]: os.system(f'python3 ../functional_testing/{py_file}') # Create & Learn mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=backend, stop_training_in_x_seconds=20, use_gpu=use_gpu) # Reload & Predict mdb = Predictor(name='home_rentals_price') prediction = mdb.predict(when={'sqft': 300}, use_gpu=use_gpu) # Test all different forms of output # No need to print them, we're just doing so for debugging purposes, we just want to see if the interface will crash or not print(prediction) print(prediction[0]) for item in prediction: print(item) print(type(list(prediction.evaluations.values())[0][0])) assert ('ProbabilityEvaluation' in str(type(list(prediction.evaluations.values())[0][0]))) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) print('\n\n') # See if we can get the adapted metadata amd = mdb.get_model_data('home_rentals_price') # Make some simple assertions about it assert (5 < len(list(amd.keys())))
def test_mysql_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DATABASE = 'mysql' PORT = 3306 con = mysql.connector.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 BIGINT, col_3 BOOL)') for i in range(0, 200): cur.execute( f'INSERT INTO test_mindsdb VALUES ("This is string number {i}", {i}, {i % 2 == 0})' ) con.commit() con.close() mysql_ds = MySqlDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DATABASE, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) mdb.analyse_dataset(from_data=mysql_ds)
def test_clickhouse_ds(): HOST = 'localhost' PORT = 8123 clickhouse_url = f'http://{HOST}:{PORT}' requests.post(clickhouse_url, data='CREATE DATABASE IF NOT EXISTS test') requests.post(clickhouse_url, data='DROP TABLE IF EXISTS test.mock') requests.post(clickhouse_url, data="""CREATE TABLE test.mock( col1 String ,col2 Int64 ,col3 Array(UInt8) ) ENGINE=Memory""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""") clickhouse_ds = ClickhouseDS( 'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2', host=HOST, port=PORT) assert (len(clickhouse_ds.df) == 2) assert (sum(map(int, clickhouse_ds.df['col2'])) == 5) assert (len(list(clickhouse_ds.df['col3'][1])) == 3) assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3'])) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=clickhouse_ds)
def run(sample=False): backend = 'lightwood' mdb = Predictor(name='german_data') mdb.learn(to_predict='class', from_data='processed_data/train.csv', backend=backend) predictions = mdb.predict(when_data='processed_data/test.csv') predicted_val = [ x.explanation['class']['predicted_value'] for x in predictions ] real_val = list(pd.read_csv('processed_data/test.csv')['class']) accuracy = balanced_accuracy_score(real_val, predicted_val) cm = confusion_matrix(real_val, predicted_val) print(cm) #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend, 'single_row_predictions': additional_info }
def test_postgres_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DBNAME = 'postgres' PORT = 5432 con = pg8000.connect(database=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean, col_4 Date, col_5 Int [])' ) for i in range(0, 200): dt = datetime.datetime.now() - datetime.timedelta(days=i) dt_str = dt.strftime('%Y-%m-%d') cur.execute( f'INSERT INTO test_mindsdb VALUES (\'String {i}\', {i}, {i % 2 == 0}, \'{dt_str}\', ARRAY [1, 2, {i}])' ) con.commit() con.close() mysql_ds = PostgresDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DBNAME, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) mdb.analyse_dataset(from_data=mysql_ds)
from mindsdb import Predictor import pprint DEBUG_LOG_LEVEL = 10 INFO_LOG_LEVEL = 20 WARNING_LOG_LEVEL = 30 ERROR_LOG_LEVEL = 40 NO_LOGS_LOG_LEVEL = 50 ANDY_LOGLEVEL = INFO_LOG_LEVEL print("training") # tell mindsDB what we want to learn and from what data Predictor(name='spam_test', log_level=ANDY_LOGLEVEL).learn( to_predict='answer', # the column we want to learn to predict given all the data in the file from_data="spam_small.csv", # the path to the file where we can learn from, (note: can be url) use_gpu=False, stop_training_in_x_seconds=10 ) # use the model to make predictions tests = [ {'text': 'how are you going, what have you been doing today', 'is_spam': None, 'confidence': 0}, {'text': 'ready to buy a new dvd today?', 'is_spam': None, 'confidence': 0}, {'text': 'WINNER!!', 'is_spam': None, 'confidence': 0}, ] for test in tests: print("predicting...") result = Predictor(name='spam_test', log_level=ANDY_LOGLEVEL).predict(when={'conversation': test['text']}) test['is_spam'] = result[0]['answer'] test['confidence'] = result[0]['answer_confidence']
def __init__(self): self.mindsDb = Predictor(name='temperature')
df = self.transaction.input_data.validation_df elif mode == 'test': df = self.transaction.input_data.test_df X = [] for col in self.input_columns: X.append(self.le_arr[col].transform(df[col])) X = np.swapaxes(X, 1, 0) predictions = self.clf.predict(X) formated_predictions = {self.output_columns[0]: predictions} return formated_predictions predictor = Predictor(name='custom_model_test_predictor') dt_model = CustomDTModel() predictor.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=dt_model) predictions = predictor.predict( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=dt_model)
from mindsdb import Predictor # use the model to make predictions result = Predictor(name='restaurant_score').predict(when={'inspection_score': 92, 'business_state': 'CA'}) print(result[0])
from mindsdb import Predictor print("learning...") # tell mindsDB what we want to learn and from what data Predictor(name='home_rentals_price').learn( to_predict= 'rental_price', # the column we want to learn to predict given all the data in the file from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", # the path to the file where we can learn from, (note: can be url) use_gpu= False # 25 seconds using powershell: Measure-Command {python .\main.py} # 29 seconds on iMac # use_gpu=True # 25 seconds also (much less CPU used though) ) # use the model to make predictions result = Predictor(name='home_rentals_price').predict(when={ 'number_of_rooms': 2, 'number_of_bathrooms': 1, 'sqft': 1190 }) # you can now print the results print('The predicted price is ${price} with {conf} confidence'.format( price=result[0]['rental_price'], conf=result[0]['rental_price_confidence'])) print("done")
PASSWORD = '' DBNAME = 'postgres' PORT = 5432 con = psycopg2.connect(dbname=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean)') for i in range(0, 200): cur.execute( f'INSERT INTO test_mindsdb VALUES (\'This is tring number {i}\', {i}, {i % 2 == 0})' ) con.commit() con.close() mysql_ds = PostgresDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DBNAME, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=mysql_ds)
def test_maria_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DATABASE = 'mysql' PORT = 4306 con = mysql.connector.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute("""CREATE TABLE test_mindsdb ( col_int BIGINT, col_float FLOAT, col_categorical Text, col_bool BOOL, col_text Text, col_date DATE, col_datetime DATETIME, col_timestamp TIMESTAMP, col_time TIME ) """) for i in range(0, 200): dt = datetime.datetime.now() - datetime.timedelta(days=i) query = f"""INSERT INTO test_mindsdb (col_int, col_float, col_categorical, col_bool, col_text, col_date, col_datetime, col_timestamp, col_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ values = (i, i + 0.01, f"Cat {i}", i % 2 == 0, f"long long long text {i}", dt.date(), dt, dt.strftime('%Y-%m-%d %H:%M:%S.%f'), dt.strftime('%H:%M:%S.%f')) cur.execute(query, values) con.commit() con.close() maria_ds = MariaDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DATABASE, port=PORT) assert (len(maria_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) model_data = mdb.analyse_dataset(from_data=maria_ds) analysis = model_data['data_analysis_v2'] assert model_data assert analysis def assert_expected_type(column_typing, expected_type, expected_subtype): assert column_typing['data_type'] == expected_type assert column_typing['data_subtype'] == expected_subtype assert column_typing['data_type_dist'][expected_type] == 199 assert column_typing['data_subtype_dist'][expected_subtype] == 199 assert_expected_type(analysis['col_categorical']['typing'], DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.MULTIPLE) assert_expected_type(analysis['col_bool']['typing'], DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.SINGLE) assert_expected_type(analysis['col_int']['typing'], DATA_TYPES.NUMERIC, DATA_SUBTYPES.INT) assert_expected_type(analysis['col_float']['typing'], DATA_TYPES.NUMERIC, DATA_SUBTYPES.FLOAT) assert_expected_type(analysis['col_date']['typing'], DATA_TYPES.DATE, DATA_SUBTYPES.DATE) assert_expected_type(analysis['col_datetime']['typing'], DATA_TYPES.DATE, DATA_SUBTYPES.TIMESTAMP) assert_expected_type(analysis['col_timestamp']['typing'], DATA_TYPES.DATE, DATA_SUBTYPES.TIMESTAMP) assert_expected_type(analysis['col_text']['typing'], DATA_TYPES.SEQUENTIAL, DATA_SUBTYPES.TEXT)
from mindsdb import Predictor # We tell mindsDB what we want to learn and from what data mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict= 'rental_price', # the column we want to learn to predict given all the data in the file from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv" # the path to the file where we can learn from, (note: can be url) ) prediction = mdb.predict(when={'sqft': 300}) print(prediction[0]) amd = mdb.get_model_data('home_rentals_price') print(amd)
from mindsdb import Predictor mdb = Predictor(name='marvel') mdb.learn(from_data="marvel-wikia.xlsx", to_predict='FIRST_APPEARANCE') print( '------------------------------------------------------------Done training------------------------------------------------------------' ) """ predicted = mdb.predict(when={ 'Date':'11/03/2020', 'Time':'18.00.00', 'NMHC_GT': 1360.0, 'AH': 0.655 }) print('------------------------------------------------------------Preidiction output------------------------------------------------------------') for val in predicted: print(val['CO_GT']) print(val['CO_GT_confidence']) """
""" This example we will walk you over the basics of MindsDB The example code objective here is to: - learn a model to predict the best retal price for a given property. In order to to this we have a dataset "data_sources/home_rentals.csv" """ from mindsdb import Predictor # We tell mindsDB what we want to learn and from what data Predictor(name='home_rentals_price').learn( to_predict='rental_price', # the column we want to learn to predict given all the data in the file from_data="home_rentals.csv" # the path to the file where we can learn from, (note: can be url) )
""" This example we will walk you over the basics of MindsDB The example code objective here is to predict the best retail price for a given property. """ from mindsdb import Predictor # use the model to make predictions result = Predictor(name='home_rentals_price').predict(when={'number_of_rooms': 2,'number_of_bathrooms':1, 'sqft': 1190}) # you can now print the results print('The predicted price is ${price} with {conf} confidence'.format(price=result[0]['rental_price'], conf=result[0]['rental_price_confidence']))
from mindsdb import Predictor # We tell mindsDB what we want to learn and from what data Predictor(name='home_rentals_price').learn( to_predict= 'rental_price', # the column we want to learn to predict given all the data in the file from_data= "https://raw.githubusercontent.com/mindsdb/mindsdb/master/docs/examples/basic/home_rentals.csv" # the path to the file where we can learn from, (note: can be url) )
def __init__(self): self.mindsDb = Predictor(name='insurance1')
from mindsdb import Predictor # use the model to make predictions result = Predictor(name='btc-price').predict( when={ 'txVolume(USD)': 6739584540.73, 'adjustedTxVolume(USD)': 3868097401.91, 'txCount': 204913, 'exchangeVolume(USD)': 7394019840, 'generatedCoins': 1875, 'fees': 35.900, 'blockCount': 150 }) print(result[0])
from mindsdb import Predictor # use the model to make predictions result = Predictor(name='beer_consumption').predict(when={ 'Temperatura Media': '27.3', 'Final de Semana': 0, 'Precipitacao': '1.2' }) print(result[0])
def __init__(self): self.mindsDb = Predictor(name='demand_30')
from mindsdb import Predictor # use the model to make predictions result = Predictor(name='player-stats').predict( when={ 'home_team': 'Scotland', 'away_team': 'England', 'tournament': 'Friendly', 'country': 'Scotland' }) print(result[0])
import mindsdb from mindsdb import Predictor # We tell mindsDB what we want to learn and from what data Predictor(name='beer_consumption').learn( to_predict= 'Consumo de cerveja', # the column we want to learn to predict given all the data in the file from_data= "dataset/Consumo_cerveja_train.csv", # the path to the file where we can learn from, (note: can be url) )
from mindsdb import Predictor mdb = Predictor(name='photo_score_model12') mdb.learn(from_data="integration_testing/image_testing/train.csv", to_predict=['Score']) print( '------------------------------------------------------------Done training------------------------------------------------------------' ) predicted = mdb.predict( when_data="integration_testing/image_testing/predict.csv") print( '------------------------------------------------------------Preidiction output------------------------------------------------------------' ) for val in predicted: print(val)
from mindsdb import Predictor import sys import pandas as pd import json import time mdb = Predictor(name='test_predictor') #'rental_price', mdb.learn(to_predict=['neighborhood'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=False,stop_training_in_x_seconds=3000, backend='lightwood', unstable_parameters_dict={'use_selfaware_model':True}) p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True) e = p[0].explanation print(e) p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv', use_gpu=True) for p in p_arr: e = p.explanation p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True) for p in p_arr: exp_s = p.epitomize() exp = p.explanation print(exp_s) print(p.as_dict()) print(p.as_list()) print(p.raw_predictions())
from mindsdb import Predictor, MySqlDS # Get data pg_ds = MySqlDS( query= "SELECT age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, target FROM sys.heartdisease", user="******", password="******", port=3306, host="localhost", table="heartdisease", database="sys") # Train model mdb = Predictor(name="heart-disease") mdb.learn(from_data=pg_ds, to_predict="target") # Get prediction prediction = mdb.predict(when={ "age": "40", "sex": 0, "chol": 180, "fbs": 0, "thal": 3, "exang": 0 }) print(prediction[0].explanation)
def __init__(self): self.mindsDb = Predictor(name='human_activity')
from mindsdb import Predictor import sys mdb = Predictor(name='sensor123') mdb.learn( to_predict='output', from_data= "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv", use_gpu=False, stop_training_in_x_seconds=40) p_arr = mdb.predict( when_data= 'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv') pdct = mdb.predict(when={ 'sensor 1': 0.5, 'sensor 2': 2, 'sensor 3': 0, 'sensor4': 5 }) print(pdct) for p in p_arr: exp_s = p.epitomize() exp = p.explain() if len(exp['output']) > 0: print(exp) print(exp_s)
""" """ from mindsdb import Predictor # Here we use the model to make predictions (NOTE: You need to run train.py first) result = Predictor(name='fuel').predict(when_data='fuel_predict.csv') # you can now print the results print('The predicted main engine fuel consumption') for row in result: print(row)
from mindsdb import Predictor Predictor(name='fuel').learn( to_predict='Main_Engine_Fuel_Consumption_MT_day', from_data = 'fuel.csv', # Time series arguments: order_by='Time', group_by='id', window_size=24, # just 24 hours )