Ejemplo n.º 1
0
def test_datasource():
    with pytest.raises(ValidationError):
        MySQLDataSource(name='mycon',
                        domain='mydomain',
                        database='mysql_db',
                        query='')

    with pytest.raises(ValueError) as exc_info:
        MySQLDataSource(name='mycon', domain='mydomain', database='mysql_db')
    assert "'query' or 'table' must be set" in str(exc_info.value)

    with pytest.raises(ValueError) as exc_info:
        MySQLDataSource(name='mycon',
                        domain='mydomain',
                        database='mysql_db',
                        query='myquery',
                        table='mytable')
    assert "Only one of 'query' or 'table' must be set" in str(exc_info.value)

    MySQLDataSource(name='mycon',
                    domain='mydomain',
                    database='mysql_db',
                    table='mytable')
    MySQLDataSource(name='mycon',
                    domain='mydomain',
                    database='mysql_db',
                    query='myquery')
Ejemplo n.º 2
0
def test_get_df_db_follow(mysql_connector):
    """" It should extract the table City and make some merge with some foreign key """
    data_sources_spec = [
        {
            'domain': 'MySQL test',
            'type': 'external_database',
            'name': 'Some MySQL provider',
            'database': 'mysql_db',
            'table': 'City',
            'follow_relations': True
        }
    ]

    expected_columns = ['ID', 'Name_City', 'CountryCode', 'District',
                        'Population_City', 'Name_Country', 'Continent',
                        'Region', 'SurfaceArea', 'IndepYear',
                        'Population_Country', 'LifeExpectancy', 'GNP',
                        'GNPOld', 'LocalName', 'GovernmentForm', 'HeadOfState',
                        'Capital', 'Code2']

    data_source = MySQLDataSource(**data_sources_spec[0])
    df = mysql_connector.get_df(data_source)

    assert not df.empty
    assert len(df.columns) == 19

    assert collections.Counter(df.columns) == collections.Counter(expected_columns)
    assert len(df.columns) == len(expected_columns)

    assert len(df[df['Population_City'] > 5000000]) == 24
Ejemplo n.º 3
0
def test_get_form_empty_query(mysql_connector):
    """It should give suggestions of the databases without changing the rest"""
    current_config = {}
    form = MySQLDataSource.get_form(mysql_connector, current_config)
    assert form['properties']['database'] == {
        'title': 'Database',
        'type': 'string',
        'enum': ['information_schema', 'mysql_db'],
    }
Ejemplo n.º 4
0
def test_get_form_query_with_good_database(mysql_connector):
    """It should give suggestions of the collections"""
    current_config = {'database': 'mysql_db'}
    form = MySQLDataSource.get_form(mysql_connector, current_config)
    assert form['properties']['database'] == {
        'title': 'Database',
        'type': 'string',
        'enum': ['information_schema', 'mysql_db'],
    }
    assert form['properties']['table'] == {
        'title': 'Table',
        'type': 'string',
        'enum': ['City', 'Country', 'CountryLanguage'],
    }
Ejemplo n.º 5
0
def test_get_df_forbidden_table_interpolation(mysql_connector):
    data_source_spec = {
        'domain': 'MySQL test',
        'type': 'external_database',
        'name': 'Some MySQL provider',
        'database': 'mysql_db',
        'query': 'SELECT * FROM %(tablename)s WHERE Population > 5000000',
        'parameters': {
            'tablename': 'City'
        },
    }

    data_source = MySQLDataSource(**data_source_spec)
    with pytest.raises(pd.io.sql.DatabaseError) as e:
        mysql_connector.get_df(data_source)
    assert 'interpolating table name is forbidden' in str(e.value)
Ejemplo n.º 6
0
def test_get_df_db(mysql_connector):
    """" It should extract the table City without merges """
    data_source_spec = {
        'domain': 'MySQL test',
        'type': 'external_database',
        'name': 'Some MySQL provider',
        'database': 'mysql_db',
        'query': 'SELECT * FROM City WHERE Population > %(max_pop)s',
        'parameters': {'max_pop': 5000000},
    }

    expected_columns = {'ID', 'Name', 'CountryCode', 'District', 'Population'}
    data_source = MySQLDataSource(**data_source_spec)
    df = mysql_connector.get_df(data_source)

    assert not df.empty
    assert set(df.columns) == expected_columns
    assert df.shape == (24, 5)
Ejemplo n.º 7
0
def test_get_form_query_with_good_database(mysql_connector):
    """It should give suggestions of the collections"""
    current_config = {'database': 'mysql_db'}
    form = MySQLDataSource.get_form(mysql_connector, current_config)
    assert form['properties']['database'] == {'$ref': '#/definitions/database'}
    assert form['definitions']['database'] == {
        'title': 'database',
        'description': 'An enumeration.',
        'type': 'string',
        'enum': ['information_schema', 'mysql_db'],
    }
    assert form['properties']['table'] == {'$ref': '#/definitions/table'}
    assert form['definitions']['table'] == {
        'title': 'table',
        'description': 'An enumeration.',
        'type': 'string',
        'enum': ['City', 'Country', 'CountryLanguage'],
    }
    assert form['required'] == ['domain', 'name', 'database']
Ejemplo n.º 8
0
def test_get_df(mocker):
    """ It should call the sql extractor """
    snock = mocker.patch('pymysql.connect')
    reasq = mocker.patch('pandas.read_sql')
    mocker.patch(
        'toucan_connectors.mysql.mysql_connector.MySQLConnector.get_foreign_key_info'
    ).return_value = []

    data_sources_spec = [
        {
            'domain': 'MySQL test',
            'type': 'external_database',
            'name': 'Some MySQL provider',
            'database': 'mysql_db',
            'table': 'City'
        }
    ]
    mysql_connector = MySQLConnector(name='mycon', host='localhost', port=22,
                                     user='******', password='******')

    data_source = MySQLDataSource(**data_sources_spec[0])
    mysql_connector.get_df(data_source)

    conv = pymysql.converters.conversions.copy()
    conv[246] = float
    snock.assert_called_once_with(
        host='localhost',
        user='******',
        database='mysql_db',
        password='******',
        port=22,
        charset='utf8mb4',
        conv=conv,
        cursorclass=pymysql.cursors.DictCursor
    )

    reasq.assert_called_once_with(
        'select * from City',
        con=snock(),
        params={}
    )