Esempio n. 1
0
def test_write_df_to_db_wrong_types(db_transact):
    table_create_query = """
        CREATE TABLE public.test_table (
        id INT UNIQUE,
        text_col TEXT,
        float_col NUMERIC NOT NULL
        )
    """
    db_transact.execute(table_create_query)
    test_df = pd.DataFrame(
        data=[
            [1, 1.01, "foo"],
            [2, "bar", 2.02],
            [3, "baz", 3.03],
        ],
        columns=["id", "text_col", "float_col"]
    )
    with pytest.raises(Exception) as excinfo:
        dbu.write_df_to_db(
            df=test_df,
            db_connection=db_transact,
            tablename='test_table',
            schemaname='public'
        )
    assert 'invalid input syntax for type numeric: "foo"' in str(excinfo.value)
Esempio n. 2
0
def test_write_df_to_db_simple(db_transact):
    table_create_query = """
        CREATE TABLE public.test_table (
        id INT UNIQUE,
        text_col TEXT,
        float_col NUMERIC NOT NULL
        )
    """
    db_transact.execute(table_create_query)
    test_df = pd.DataFrame(
        data=[
            [1, "foo", 1.01],
            [2, "bar", 2.02],
            [3, "baz", 3.03],
        ],
        columns=["id", "text_col", "float_col"]
    )
    dbu.write_df_to_db(
        df=test_df,
        db_connection=db_transact,
        tablename='test_table',
        schemaname='public'
    )
    db_results = pd.read_sql(
        sql="SELECT * from public.test_table",
        con=db_transact
    )
    npt.assert_array_equal(
        db_results,
        test_df
    )
    npt.assert_array_equal(
        db_results.columns,
        ["id", "text_col", "float_col"]
    )
Esempio n. 3
0
def test_write_df_to_db_db_missing_empty_columns(db_transact):
    table_create_query = """
        CREATE TABLE public.test_table (
        id INT UNIQUE,
        text_col TEXT,
        float_col NUMERIC NOT NULL
        )
    """
    db_transact.execute(table_create_query)
    test_df = pd.DataFrame(
        data=[
            [1, "foo", 1.01, None],
            [2, "bar", 2.02, None],
            [3, "baz", 3.03, None],
        ],
        columns=["id", "text_col", "float_col", "bool_col"]
    )
    with pytest.raises(Exception) as excinfo:
        dbu.write_df_to_db(
            df=test_df,
            db_connection=db_transact,
            tablename='test_table',
            schemaname='public'
        )
    assert str(excinfo.value) == \
        "Unexpected column names could not map to DB columns:\n\tbool_col"
Esempio n. 4
0
def test_write_df_to_db_df_offset_columns(db_transact):
    table_create_query = """
        CREATE TABLE public.test_table (
        id INT UNIQUE,
        text_col TEXT,
        float_col NUMERIC NOT NULL,
        bool_col BOOLEAN
        )
    """
    db_transact.execute(table_create_query)
    test_df = pd.DataFrame(
        data=[
            [1, "foo", 1.01],
            [2, "bar", 2.02],
        ],
        columns=["id", "text_col", "float_col"]
    )
    dbu.write_df_to_db(
        df=test_df,
        db_connection=db_transact,
        tablename='test_table',
        schemaname='public'
    )
    test_df = pd.DataFrame(
        data=[
            [3, False, 3.03],
            [4, True, 4.04],
        ],
        columns=["id", "bool_col", "float_col"]
    )
    dbu.write_df_to_db(
        df=test_df,
        db_connection=db_transact,
        tablename='test_table',
        schemaname='public'
    )
    db_results = pd.read_sql(
        sql="SELECT * from public.test_table",
        con=db_transact
    )
    npt.assert_array_equal(
        db_results,
        pd.DataFrame(
            data=[
                [1, "foo", 1.01, None],
                [2, "bar", 2.02, None],
                [3, None, 3.03, False],
                [4, None, 4.04, True],
            ],
            columns=["id", "text_col", "float_col", "bool_col"]
        )
    )
    npt.assert_array_equal(
        db_results.columns,
        ["id", "text_col", "float_col", "bool_col"]
    )
def import_single_file(
    filepath,
    db_engine,
    data_files_path=pathlib.PurePosixPath('/')
        ):
    """Orchestrate reading and import a file."""
    if dbu.check_if_file_imported(filepath, db_engine):
        logging.info(
            f"Already imported: {filepath.relative_to(data_files_path)}"
        )
        return None
    logging.info(f"Importing: {filepath.relative_to(data_files_path)}")
    logging.debug(f"Absolute path: {filepath}")
    file_info = _determine_file_type(filepath=filepath)
    schemaname = 'rawdata'

    with db_engine.begin() as db_con:
        columns_info = dbu.get_db_column_info(
            db_connection=db_con,
            tablename=file_info['tablename'],
            schemaname=schemaname
        )
    columns_name_list = list(columns_info['column_name'])
    df = file_info['parser'](filepath, columns_name_list)

    # Using the context manager allows the adding to import history and writing
    # to DB to be in the same transaction, and it will rollback if it fails.
    with db_engine.begin() as db_con:
        import_id = dbu.add_to_import_history(
            filepath=filepath,
            db_connection=db_con,
            filetype=file_info['filetype']
        )
        df['import_history_id'] = import_id
        dbu.write_df_to_db(
            df=df,
            db_connection=db_con,
            tablename=file_info['tablename'],
            schemaname=schemaname
        )
    return import_id