예제 #1
0
파일: tests.py 프로젝트: wh-forker/mindsdb
def basic_test(backend='ludwig',
               use_gpu=True,
               ignore_columns=[],
               run_extra=False):
    if run_extra:
        for py_file in [
                x for x in os.listdir('../functional_testing') if '.py' in x
        ]:
            os.system(f'python3 ../functional_testing/{py_file}')

    # Create & Learn
    mdb = Predictor(name='home_rentals_price')
    mdb.learn(
        to_predict='rental_price',
        from_data=
        "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
        backend=backend,
        stop_training_in_x_seconds=20,
        use_gpu=use_gpu)

    # Reload & Predict
    mdb = Predictor(name='home_rentals_price')
    prediction = mdb.predict(when={'sqft': 300}, use_gpu=use_gpu)

    # Test all different forms of output
    # No need to print them, we're just doing so for debugging purposes, we just want to see if the interface will crash or not

    print(prediction)
    print(prediction[0])

    for item in prediction:
        print(item)

    print(type(list(prediction.evaluations.values())[0][0]))
    assert ('ProbabilityEvaluation'
            in str(type(list(prediction.evaluations.values())[0][0])))

    for p in prediction:
        print(p)
    print(prediction[0].as_dict())
    print(prediction[0].as_list())
    print(prediction[0]['rental_price_confidence'])
    print(type(prediction[0]['rental_price_confidence']))

    print('\n\n========================\n\n')
    print(prediction[0].explain())
    print('\n\n')

    # See if we can get the adapted metadata
    amd = mdb.get_model_data('home_rentals_price')
    # Make some simple assertions about it
    assert (5 < len(list(amd.keys())))
예제 #2
0
def test_mysql_ds():
    HOST = 'localhost'
    USER = '******'
    PASSWORD = ''
    DATABASE = 'mysql'
    PORT = 3306

    con = mysql.connector.connect(host=HOST,
                                  port=PORT,
                                  user=USER,
                                  password=PASSWORD,
                                  database=DATABASE)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute(
        'CREATE TABLE test_mindsdb(col_1 Text, col_2 BIGINT, col_3 BOOL)')
    for i in range(0, 200):
        cur.execute(
            f'INSERT INTO test_mindsdb VALUES ("This is string number {i}", {i}, {i % 2 == 0})'
        )
    con.commit()
    con.close()

    mysql_ds = MySqlDS(table='test_mindsdb',
                       host=HOST,
                       user=USER,
                       password=PASSWORD,
                       database=DATABASE,
                       port=PORT)
    assert (len(mysql_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor',
                    log_level=logging.ERROR)
    mdb.analyse_dataset(from_data=mysql_ds)
예제 #3
0
def test_clickhouse_ds():
    HOST = 'localhost'
    PORT = 8123

    clickhouse_url = f'http://{HOST}:{PORT}'
    requests.post(clickhouse_url, data='CREATE DATABASE IF NOT EXISTS test')
    requests.post(clickhouse_url, data='DROP TABLE IF EXISTS test.mock')
    requests.post(clickhouse_url,
                  data="""CREATE TABLE test.mock(
            col1 String
            ,col2 Int64
            ,col3 Array(UInt8)
        ) ENGINE=Memory""")
    requests.post(clickhouse_url,
                  data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""")
    requests.post(clickhouse_url,
                  data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""")
    requests.post(clickhouse_url,
                  data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""")

    clickhouse_ds = ClickhouseDS(
        'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2',
        host=HOST,
        port=PORT)

    assert (len(clickhouse_ds.df) == 2)
    assert (sum(map(int, clickhouse_ds.df['col2'])) == 5)
    assert (len(list(clickhouse_ds.df['col3'][1])) == 3)
    assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3']))

    mdb = Predictor(name='analyse_dataset_test_predictor')
    mdb.analyse_dataset(from_data=clickhouse_ds)
예제 #4
0
def run(sample=False):
    backend = 'lightwood'

    mdb = Predictor(name='german_data')

    mdb.learn(to_predict='class',
              from_data='processed_data/train.csv',
              backend=backend)

    predictions = mdb.predict(when_data='processed_data/test.csv')

    predicted_val = [
        x.explanation['class']['predicted_value'] for x in predictions
    ]
    real_val = list(pd.read_csv('processed_data/test.csv')['class'])

    accuracy = balanced_accuracy_score(real_val, predicted_val)

    cm = confusion_matrix(real_val, predicted_val)
    print(cm)

    #show additional info for each transaction row
    additional_info = [x.explanation for x in predictions]

    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend,
        'single_row_predictions': additional_info
    }
예제 #5
0
def test_postgres_ds():
    HOST = 'localhost'
    USER = '******'
    PASSWORD = ''
    DBNAME = 'postgres'
    PORT = 5432

    con = pg8000.connect(database=DBNAME,
                         user=USER,
                         password=PASSWORD,
                         host=HOST,
                         port=PORT)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute(
        'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int,  col_3 Boolean, col_4 Date, col_5 Int [])'
    )
    for i in range(0, 200):
        dt = datetime.datetime.now() - datetime.timedelta(days=i)
        dt_str = dt.strftime('%Y-%m-%d')
        cur.execute(
            f'INSERT INTO test_mindsdb VALUES (\'String {i}\', {i}, {i % 2 == 0}, \'{dt_str}\', ARRAY [1, 2, {i}])'
        )
    con.commit()
    con.close()

    mysql_ds = PostgresDS(table='test_mindsdb',
                          host=HOST,
                          user=USER,
                          password=PASSWORD,
                          database=DBNAME,
                          port=PORT)
    assert (len(mysql_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor',
                    log_level=logging.ERROR)
    mdb.analyse_dataset(from_data=mysql_ds)
예제 #6
0
from mindsdb import Predictor
import pprint

DEBUG_LOG_LEVEL = 10
INFO_LOG_LEVEL = 20
WARNING_LOG_LEVEL = 30
ERROR_LOG_LEVEL = 40
NO_LOGS_LOG_LEVEL = 50

ANDY_LOGLEVEL = INFO_LOG_LEVEL

print("training")
# tell mindsDB what we want to learn and from what data
Predictor(name='spam_test', log_level=ANDY_LOGLEVEL).learn(
    to_predict='answer', # the column we want to learn to predict given all the data in the file
    from_data="spam_small.csv", # the path to the file where we can learn from, (note: can be url)
    use_gpu=False,
    stop_training_in_x_seconds=10
)

# use the model to make predictions
tests = [
    {'text': 'how are you going, what have you been doing today', 'is_spam': None, 'confidence': 0},
    {'text': 'ready to buy a new dvd today?', 'is_spam': None, 'confidence': 0},
    {'text': 'WINNER!!', 'is_spam': None, 'confidence': 0},
]
for test in tests:
    print("predicting...")
    result = Predictor(name='spam_test', log_level=ANDY_LOGLEVEL).predict(when={'conversation': test['text']})
    test['is_spam'] = result[0]['answer']
    test['confidence'] = result[0]['answer_confidence']
예제 #7
0
 def __init__(self):
     self.mindsDb = Predictor(name='temperature')
예제 #8
0
            df = self.transaction.input_data.validation_df
        elif mode == 'test':
            df = self.transaction.input_data.test_df

        X = []
        for col in self.input_columns:
            X.append(self.le_arr[col].transform(df[col]))

        X = np.swapaxes(X, 1, 0)

        predictions = self.clf.predict(X)

        formated_predictions = {self.output_columns[0]: predictions}

        return formated_predictions


predictor = Predictor(name='custom_model_test_predictor')

dt_model = CustomDTModel()

predictor.learn(
    to_predict='rental_price',
    from_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
    backend=dt_model)
predictions = predictor.predict(
    when_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
    backend=dt_model)
예제 #9
0
from mindsdb import Predictor

# use the model to make predictions
result = Predictor(name='restaurant_score').predict(when={'inspection_score': 92, 'business_state': 'CA'})
print(result[0])
예제 #10
0
파일: main.py 프로젝트: abulka/mindsdb-play
from mindsdb import Predictor

print("learning...")

# tell mindsDB what we want to learn and from what data
Predictor(name='home_rentals_price').learn(
    to_predict=
    'rental_price',  # the column we want to learn to predict given all the data in the file
    from_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",  # the path to the file where we can learn from, (note: can be url)
    use_gpu=
    False  # 25 seconds using powershell: Measure-Command {python .\main.py} 
    # 29 seconds on iMac
    # use_gpu=True  # 25 seconds also (much less CPU used though)
)

# use the model to make predictions
result = Predictor(name='home_rentals_price').predict(when={
    'number_of_rooms': 2,
    'number_of_bathrooms': 1,
    'sqft': 1190
})

# you can now print the results
print('The predicted price is ${price} with {conf} confidence'.format(
    price=result[0]['rental_price'],
    conf=result[0]['rental_price_confidence']))

print("done")
예제 #11
0
    PASSWORD = ''
    DBNAME = 'postgres'
    PORT = 5432

    con = psycopg2.connect(dbname=DBNAME,
                           user=USER,
                           password=PASSWORD,
                           host=HOST,
                           port=PORT)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute(
        'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean)')
    for i in range(0, 200):
        cur.execute(
            f'INSERT INTO test_mindsdb VALUES (\'This is tring number {i}\', {i}, {i % 2 == 0})'
        )
    con.commit()
    con.close()

    mysql_ds = PostgresDS(table='test_mindsdb',
                          host=HOST,
                          user=USER,
                          password=PASSWORD,
                          database=DBNAME,
                          port=PORT)
    assert (len(mysql_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor')
    mdb.analyse_dataset(from_data=mysql_ds)
예제 #12
0
def test_maria_ds():
    HOST = 'localhost'
    USER = '******'
    PASSWORD = ''
    DATABASE = 'mysql'
    PORT = 4306

    con = mysql.connector.connect(host=HOST,
                                  port=PORT,
                                  user=USER,
                                  password=PASSWORD,
                                  database=DATABASE)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute("""CREATE TABLE test_mindsdb (
                                col_int BIGINT,
                                col_float FLOAT, 
                                col_categorical Text, 
                                col_bool BOOL, 
                                col_text Text,
                                col_date DATE,
                                col_datetime DATETIME,
                                col_timestamp TIMESTAMP,
                                col_time TIME
                                )
                                """)
    for i in range(0, 200):
        dt = datetime.datetime.now() - datetime.timedelta(days=i)

        query = f"""INSERT INTO test_mindsdb (col_int,
                                col_float, 
                                col_categorical, 
                                col_bool, 
                                col_text,
                                col_date,
                                col_datetime,
                                col_timestamp,
                                col_time) 
                                VALUES (%s, %s,  %s,  %s,  %s, %s, %s, %s, %s) 
                                """
        values = (i, i + 0.01, f"Cat {i}", i % 2 == 0,
                  f"long long long text {i}", dt.date(), dt,
                  dt.strftime('%Y-%m-%d %H:%M:%S.%f'),
                  dt.strftime('%H:%M:%S.%f'))
        cur.execute(query, values)
    con.commit()
    con.close()

    maria_ds = MariaDS(table='test_mindsdb',
                       host=HOST,
                       user=USER,
                       password=PASSWORD,
                       database=DATABASE,
                       port=PORT)
    assert (len(maria_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor',
                    log_level=logging.ERROR)
    model_data = mdb.analyse_dataset(from_data=maria_ds)
    analysis = model_data['data_analysis_v2']
    assert model_data
    assert analysis

    def assert_expected_type(column_typing, expected_type, expected_subtype):
        assert column_typing['data_type'] == expected_type
        assert column_typing['data_subtype'] == expected_subtype
        assert column_typing['data_type_dist'][expected_type] == 199
        assert column_typing['data_subtype_dist'][expected_subtype] == 199

    assert_expected_type(analysis['col_categorical']['typing'],
                         DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.MULTIPLE)
    assert_expected_type(analysis['col_bool']['typing'],
                         DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.SINGLE)
    assert_expected_type(analysis['col_int']['typing'], DATA_TYPES.NUMERIC,
                         DATA_SUBTYPES.INT)
    assert_expected_type(analysis['col_float']['typing'], DATA_TYPES.NUMERIC,
                         DATA_SUBTYPES.FLOAT)
    assert_expected_type(analysis['col_date']['typing'], DATA_TYPES.DATE,
                         DATA_SUBTYPES.DATE)
    assert_expected_type(analysis['col_datetime']['typing'], DATA_TYPES.DATE,
                         DATA_SUBTYPES.TIMESTAMP)
    assert_expected_type(analysis['col_timestamp']['typing'], DATA_TYPES.DATE,
                         DATA_SUBTYPES.TIMESTAMP)
    assert_expected_type(analysis['col_text']['typing'], DATA_TYPES.SEQUENTIAL,
                         DATA_SUBTYPES.TEXT)
예제 #13
0
from mindsdb import Predictor

# We tell mindsDB what we want to learn and from what data
mdb = Predictor(name='home_rentals_price')

mdb.learn(
    to_predict=
    'rental_price',  # the column we want to learn to predict given all the data in the file
    from_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv"
    # the path to the file where we can learn from, (note: can be url)
)

prediction = mdb.predict(when={'sqft': 300})
print(prediction[0])
amd = mdb.get_model_data('home_rentals_price')
print(amd)
예제 #14
0
from mindsdb import Predictor

mdb = Predictor(name='marvel')

mdb.learn(from_data="marvel-wikia.xlsx", to_predict='FIRST_APPEARANCE')

print(
    '------------------------------------------------------------Done training------------------------------------------------------------'
)
"""
predicted = mdb.predict(when={
    'Date':'11/03/2020',
    'Time':'18.00.00',
    'NMHC_GT': 1360.0,
    'AH': 0.655
})
print('------------------------------------------------------------Preidiction output------------------------------------------------------------')
for val in predicted:
    print(val['CO_GT'])
    print(val['CO_GT_confidence'])
"""
예제 #15
0
"""

This example we will walk you over the basics of MindsDB

The example code objective here is to:

- learn a model to predict the best retal price for a given property.

In order to to this we have a dataset "data_sources/home_rentals.csv"

"""

from mindsdb import Predictor


# We tell mindsDB what we want to learn and from what data
Predictor(name='home_rentals_price').learn(
    to_predict='rental_price', # the column we want to learn to predict given all the data in the file
    from_data="home_rentals.csv" # the path to the file where we can learn from, (note: can be url)
)

예제 #16
0
"""

This example we will walk you over the basics of MindsDB

The example code objective here is to predict the best retail price for a given property.

"""

from mindsdb import Predictor

# use the model to make predictions
result = Predictor(name='home_rentals_price').predict(when={'number_of_rooms': 2,'number_of_bathrooms':1, 'sqft': 1190})

# you can now print the results
print('The predicted price is ${price} with {conf} confidence'.format(price=result[0]['rental_price'], conf=result[0]['rental_price_confidence']))
예제 #17
0
from mindsdb import Predictor

# We tell mindsDB what we want to learn and from what data
Predictor(name='home_rentals_price').learn(
    to_predict=
    'rental_price',  # the column we want to learn to predict given all the data in the file
    from_data=
    "https://raw.githubusercontent.com/mindsdb/mindsdb/master/docs/examples/basic/home_rentals.csv"
    # the path to the file where we can learn from, (note: can be url)
)
예제 #18
0
 def __init__(self):
     self.mindsDb = Predictor(name='insurance1')
예제 #19
0
from mindsdb import Predictor

# use the model to make predictions
result = Predictor(name='btc-price').predict(
    when={
        'txVolume(USD)': 6739584540.73,
        'adjustedTxVolume(USD)': 3868097401.91,
        'txCount': 204913,
        'exchangeVolume(USD)': 7394019840,
        'generatedCoins': 1875,
        'fees': 35.900,
        'blockCount': 150
    })
print(result[0])
예제 #20
0
from mindsdb import Predictor

# use the model to make predictions
result = Predictor(name='beer_consumption').predict(when={
    'Temperatura Media': '27.3',
    'Final de Semana': 0,
    'Precipitacao': '1.2'
})
print(result[0])
 def __init__(self):
     self.mindsDb = Predictor(name='demand_30')
예제 #22
0
from mindsdb import Predictor

# use the model to make predictions
result = Predictor(name='player-stats').predict(
    when={
        'home_team': 'Scotland',
        'away_team': 'England',
        'tournament': 'Friendly',
        'country': 'Scotland'
    })

print(result[0])
예제 #23
0
import mindsdb
from mindsdb import Predictor

# We tell mindsDB what we want to learn and from what data
Predictor(name='beer_consumption').learn(
    to_predict=
    'Consumo de cerveja',  # the column we want to learn to predict given all the data in the file
    from_data=
    "dataset/Consumo_cerveja_train.csv",  # the path to the file where we can learn from, (note: can be url)
)
예제 #24
0
파일: main.py 프로젝트: ye-man/mindsdb
from mindsdb import Predictor

mdb = Predictor(name='photo_score_model12')

mdb.learn(from_data="integration_testing/image_testing/train.csv",
          to_predict=['Score'])
print(
    '------------------------------------------------------------Done training------------------------------------------------------------'
)

predicted = mdb.predict(
    when_data="integration_testing/image_testing/predict.csv")
print(
    '------------------------------------------------------------Preidiction output------------------------------------------------------------'
)
for val in predicted:
    print(val)
예제 #25
0
from mindsdb import Predictor
import sys
import pandas as pd
import json
import time


mdb = Predictor(name='test_predictor')
#'rental_price',
mdb.learn(to_predict=['neighborhood'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=False,stop_training_in_x_seconds=3000, backend='lightwood', unstable_parameters_dict={'use_selfaware_model':True})

p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True)
e = p[0].explanation
print(e)

p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv', use_gpu=True)

for p in p_arr:
    e = p.explanation

p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True)

for p in p_arr:
    exp_s = p.epitomize()
    exp = p.explanation
    print(exp_s)

    print(p.as_dict())
    print(p.as_list())
    print(p.raw_predictions())
예제 #26
0
from mindsdb import Predictor, MySqlDS

# Get data
pg_ds = MySqlDS(
    query=
    "SELECT age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, target FROM sys.heartdisease",
    user="******",
    password="******",
    port=3306,
    host="localhost",
    table="heartdisease",
    database="sys")

# Train model
mdb = Predictor(name="heart-disease")
mdb.learn(from_data=pg_ds, to_predict="target")

# Get prediction
prediction = mdb.predict(when={
    "age": "40",
    "sex": 0,
    "chol": 180,
    "fbs": 0,
    "thal": 3,
    "exang": 0
})
print(prediction[0].explanation)
예제 #27
0
 def __init__(self):
     self.mindsDb = Predictor(name='human_activity')
예제 #28
0
from mindsdb import Predictor
import sys

mdb = Predictor(name='sensor123')

mdb.learn(
    to_predict='output',
    from_data=
    "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv",
    use_gpu=False,
    stop_training_in_x_seconds=40)

p_arr = mdb.predict(
    when_data=
    'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv')

pdct = mdb.predict(when={
    'sensor 1': 0.5,
    'sensor 2': 2,
    'sensor 3': 0,
    'sensor4': 5
})
print(pdct)

for p in p_arr:
    exp_s = p.epitomize()
    exp = p.explain()

    if len(exp['output']) > 0:
        print(exp)
        print(exp_s)
예제 #29
0
"""

"""

from mindsdb import Predictor

# Here we use the model to make predictions (NOTE: You need to run train.py first)
result = Predictor(name='fuel').predict(when_data='fuel_predict.csv')

# you can now print the results
print('The predicted main engine fuel consumption')
for row in result:
    print(row)
예제 #30
0
from mindsdb import Predictor


Predictor(name='fuel').learn(
    to_predict='Main_Engine_Fuel_Consumption_MT_day',
    from_data = 'fuel.csv',

    # Time series arguments:

    order_by='Time',
    group_by='id',
    window_size=24, # just 24 hours

)