コード例 #1
0
def test_clickhouse_ds():
    HOST = 'localhost'
    PORT = 8123

    clickhouse_url = f'http://{HOST}:{PORT}'
    requests.post(clickhouse_url, data='CREATE DATABASE IF NOT EXISTS test')
    requests.post(clickhouse_url, data='DROP TABLE IF EXISTS test.mock')
    requests.post(clickhouse_url,
                  data="""CREATE TABLE test.mock(
            col1 String
            ,col2 Int64
            ,col3 Array(UInt8)
        ) ENGINE=Memory""")
    requests.post(clickhouse_url,
                  data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""")
    requests.post(clickhouse_url,
                  data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""")
    requests.post(clickhouse_url,
                  data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""")

    clickhouse_ds = ClickhouseDS(
        'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2',
        host=HOST,
        port=PORT)

    assert (len(clickhouse_ds.df) == 2)
    assert (sum(map(int, clickhouse_ds.df['col2'])) == 5)
    assert (len(list(clickhouse_ds.df['col3'][1])) == 3)
    assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3']))

    mdb = Predictor(name='analyse_dataset_test_predictor')
    mdb.analyse_dataset(from_data=clickhouse_ds)
コード例 #2
0
ファイル: test_mysql_ds.py プロジェクト: anniyanvr/mindsdb
def test_mysql_ds():
    HOST = 'localhost'
    USER = '******'
    PASSWORD = ''
    DATABASE = 'mysql'
    PORT = 3306

    con = mysql.connector.connect(host=HOST,
                                  port=PORT,
                                  user=USER,
                                  password=PASSWORD,
                                  database=DATABASE)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute(
        'CREATE TABLE test_mindsdb(col_1 Text, col_2 BIGINT, col_3 BOOL)')
    for i in range(0, 200):
        cur.execute(
            f'INSERT INTO test_mindsdb VALUES ("This is string number {i}", {i}, {i % 2 == 0})'
        )
    con.commit()
    con.close()

    mysql_ds = MySqlDS(table='test_mindsdb',
                       host=HOST,
                       user=USER,
                       password=PASSWORD,
                       database=DATABASE,
                       port=PORT)
    assert (len(mysql_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor',
                    log_level=logging.ERROR)
    mdb.analyse_dataset(from_data=mysql_ds)
コード例 #3
0
ファイル: test_postgres_ds.py プロジェクト: anniyanvr/mindsdb
def test_postgres_ds():
    HOST = 'localhost'
    USER = '******'
    PASSWORD = ''
    DBNAME = 'postgres'
    PORT = 5432

    con = pg8000.connect(database=DBNAME,
                         user=USER,
                         password=PASSWORD,
                         host=HOST,
                         port=PORT)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute(
        'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int,  col_3 Boolean, col_4 Date, col_5 Int [])'
    )
    for i in range(0, 200):
        dt = datetime.datetime.now() - datetime.timedelta(days=i)
        dt_str = dt.strftime('%Y-%m-%d')
        cur.execute(
            f'INSERT INTO test_mindsdb VALUES (\'String {i}\', {i}, {i % 2 == 0}, \'{dt_str}\', ARRAY [1, 2, {i}])'
        )
    con.commit()
    con.close()

    mysql_ds = PostgresDS(table='test_mindsdb',
                          host=HOST,
                          user=USER,
                          password=PASSWORD,
                          database=DBNAME,
                          port=PORT)
    assert (len(mysql_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor',
                    log_level=logging.ERROR)
    mdb.analyse_dataset(from_data=mysql_ds)
コード例 #4
0
ファイル: postgres_ds.py プロジェクト: SanjayGorur/mindsdb
    DBNAME = 'postgres'
    PORT = 5432

    con = psycopg2.connect(dbname=DBNAME,
                           user=USER,
                           password=PASSWORD,
                           host=HOST,
                           port=PORT)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute(
        'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean)')
    for i in range(0, 200):
        cur.execute(
            f'INSERT INTO test_mindsdb VALUES (\'This is tring number {i}\', {i}, {i % 2 == 0})'
        )
    con.commit()
    con.close()

    mysql_ds = PostgresDS(table='test_mindsdb',
                          host=HOST,
                          user=USER,
                          password=PASSWORD,
                          database=DBNAME,
                          port=PORT)
    assert (len(mysql_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor')
    mdb.analyse_dataset(from_data=mysql_ds)
コード例 #5
0
                  data='CREATE DATABASE IF NOT EXISTS test')
    requests.post('http://localhost:8123',
                  data='DROP TABLE IF EXISTS test.mock')
    requests.post('http://localhost:8123',
                  data="""CREATE TABLE test.mock(
        col1 String
        ,col2 Int64
        ,col3 Array(UInt8)
    ) ENGINE=Memory""")
    requests.post('http://localhost:8123',
                  data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""")
    requests.post('http://localhost:8123',
                  data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""")
    requests.post('http://localhost:8123',
                  data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""")

    log.info('Querying data')
    clickhouse_ds = ClickhouseDS(
        'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2')

    log.info('Validating data integrity')
    assert (len(clickhouse_ds.df) == 2)
    assert (sum(map(int, clickhouse_ds.df['col2'])) == 5)
    assert (len(list(clickhouse_ds.df['col3'][1])) == 3)
    assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3']))

    mdb = Predictor(name='analyse_dataset_test_predictor')
    mdb.analyse_dataset(from_data=clickhouse_ds)

    log.info('Finished running ClickhouseDS tests successfully !')
コード例 #6
0
ファイル: data_from_s3.py プロジェクト: yuganshjain/mindsdb
import os

from mindsdb import Predictor, S3DS


mdb = Predictor(name='analyse_dataset_test_predictor')
s3_ds = S3DS(bucket_name='mindsdb-example-data', file_path='home_rentals.csv')
mdb.analyse_dataset(from_data=s3_ds)
コード例 #7
0
ファイル: analyse_dataset.py プロジェクト: wh-forker/mindsdb
from mindsdb import Predictor


mdb = Predictor(name='analyse_dataset_test_predictor')
mdb.analyse_dataset(from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv")
コード例 #8
0
ファイル: test_maria_ds.py プロジェクト: anniyanvr/mindsdb
def test_maria_ds():
    HOST = 'localhost'
    USER = '******'
    PASSWORD = ''
    DATABASE = 'mysql'
    PORT = 4306

    con = mysql.connector.connect(host=HOST,
                                  port=PORT,
                                  user=USER,
                                  password=PASSWORD,
                                  database=DATABASE)
    cur = con.cursor()

    cur.execute('DROP TABLE IF EXISTS test_mindsdb')
    cur.execute("""CREATE TABLE test_mindsdb (
                                col_int BIGINT,
                                col_float FLOAT, 
                                col_categorical Text, 
                                col_bool BOOL, 
                                col_text Text,
                                col_date DATE,
                                col_datetime DATETIME,
                                col_timestamp TIMESTAMP,
                                col_time TIME
                                )
                                """)
    for i in range(0, 200):
        dt = datetime.datetime.now() - datetime.timedelta(days=i)

        query = f"""INSERT INTO test_mindsdb (col_int,
                                col_float, 
                                col_categorical, 
                                col_bool, 
                                col_text,
                                col_date,
                                col_datetime,
                                col_timestamp,
                                col_time) 
                                VALUES (%s, %s,  %s,  %s,  %s, %s, %s, %s, %s) 
                                """
        values = (i, i + 0.01, f"Cat {i}", i % 2 == 0,
                  f"long long long text {i}", dt.date(), dt,
                  dt.strftime('%Y-%m-%d %H:%M:%S.%f'),
                  dt.strftime('%H:%M:%S.%f'))
        cur.execute(query, values)
    con.commit()
    con.close()

    maria_ds = MariaDS(table='test_mindsdb',
                       host=HOST,
                       user=USER,
                       password=PASSWORD,
                       database=DATABASE,
                       port=PORT)
    assert (len(maria_ds._df) == 200)

    mdb = Predictor(name='analyse_dataset_test_predictor',
                    log_level=logging.ERROR)
    model_data = mdb.analyse_dataset(from_data=maria_ds)
    analysis = model_data['data_analysis_v2']
    assert model_data
    assert analysis

    def assert_expected_type(column_typing, expected_type, expected_subtype):
        assert column_typing['data_type'] == expected_type
        assert column_typing['data_subtype'] == expected_subtype
        assert column_typing['data_type_dist'][expected_type] == 199
        assert column_typing['data_subtype_dist'][expected_subtype] == 199

    assert_expected_type(analysis['col_categorical']['typing'],
                         DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.MULTIPLE)
    assert_expected_type(analysis['col_bool']['typing'],
                         DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.SINGLE)
    assert_expected_type(analysis['col_int']['typing'], DATA_TYPES.NUMERIC,
                         DATA_SUBTYPES.INT)
    assert_expected_type(analysis['col_float']['typing'], DATA_TYPES.NUMERIC,
                         DATA_SUBTYPES.FLOAT)
    assert_expected_type(analysis['col_date']['typing'], DATA_TYPES.DATE,
                         DATA_SUBTYPES.DATE)
    assert_expected_type(analysis['col_datetime']['typing'], DATA_TYPES.DATE,
                         DATA_SUBTYPES.TIMESTAMP)
    assert_expected_type(analysis['col_timestamp']['typing'], DATA_TYPES.DATE,
                         DATA_SUBTYPES.TIMESTAMP)
    assert_expected_type(analysis['col_text']['typing'], DATA_TYPES.SEQUENTIAL,
                         DATA_SUBTYPES.TEXT)