Beispiel #1
0
def process_file(input_file, clean_tables=False, complete=False):
    ''' Process a single extract workbook file'''
    workbook = parser.load_workbook(input_file)
    dataframes = parser.parse_workbook(workbook, input_file)
    loader.import_extract(dataframes, clean_tables, complete)

    logging.info('Successfully processed file %s', input_file)
Beispiel #2
0
def test_load_workbook():
    '''Test that a workbook in XLSX format can be successfully loaded'''
    workbook = parser.load_workbook(TEST_DATA_FILE_PATH)
    assert len(workbook.sheet_names) == 16
    assert workbook.sheet_names[0] == 'WMT_Extract'
    assert workbook.sheet_names[1] == 'WMT_Extract_Filtered'
    assert workbook.sheet_names[2] == 'Court_Reports'
    assert workbook.sheet_names[13] == 'Suspended_Lifers'
    assert workbook.sheet_names[14] == 'T2A_Detail'
    assert workbook.sheet_names[15] == 'OMIC_Teams'
Beispiel #3
0
def test_parse_workbook():
    '''Test that a workbook can be parsed correctly'''
    workbook = parser.load_workbook(TEST_DATA_FILE_PATH)
    dataframes = parser.parse_workbook(workbook)
    print len(dataframes)
    assert len(dataframes) == 16
    assert not dataframes['wmt_extract'].empty
    assert len(dataframes['wmt_extract'].columns) == 41
    assert len(dataframes['wmt_extract'].index) == 2
    assert len(dataframes['court_reports'].columns) == 17
    assert len(dataframes['court_reports'].index) == 2
    assert dataframes['wmt_extract'].columns[3] == 'ldu_desc'
    assert dataframes['court_reports'].columns[7] == 'om_surname'
def test_import_extract_rollback():
    '''Failed staging load should roll back all operations'''
    workbook = parser.load_workbook(TEST_DATA_FILE_PATH)
    dataframes = parser.parse_workbook(workbook)
    engine = loader.get_db_engine()
    connection = engine.connect()

    try:
        with pytest.raises(ProgrammingError,
                           message='Expecting ProgrammingError') as error:
            config.DB_STG_SCHEMA = 'invalid'
            loader.import_extract(dataframes)
        assert 'invalid' in str(error.value)
    finally:
        config.DB_STG_SCHEMA = 'staging'
        cleanup_staging(connection)
        connection.close()
def test_should_import_extract():
    '''Should import valid dataframes to staging schema'''
    engine = loader.get_db_engine()
    connection = engine.connect()
    cleanup_staging(connection)
    workbook = parser.load_workbook(TEST_DATA_FILE_PATH)
    dataframes = parser.parse_workbook(workbook)
    loader.import_extract(dataframes)

    try:
        for name in config.VALID_SHEET_NAMES:
            select = 'SELECT COUNT(*) FROM {0}.{1}'.format(
                config.DB_STG_SCHEMA, name)

            results = connection.execute(select)
            for row in results:
                assert row[0] == 3, "%r" % name
    finally:
        cleanup_staging(connection)
        connection.close()
Beispiel #6
0
def test_invalid_file_type():
    '''Loading any file other than a valid workbook will raise an error'''
    with pytest.raises(XLRDError) as error:
        parser.load_workbook(INVALID_FILE_TYPE_PATH)
    assert 'Unsupported format, or corrupt file' in str(error.value)
Beispiel #7
0
def test_load_workbook_invalid():
    '''Loading a workbook with invalid format should raise an error'''
    with pytest.raises(ValueError) as error:
        parser.load_workbook(INVALID_DATA_FILE_PATH)
    assert 'Workbook does not contain the expected worksheets' in str(error.value)
Beispiel #8
0
def test_load_workbook_missing_file():
    '''Loading a missing workbook file should raise an error'''
    with pytest.raises(IOError) as error:
        parser.load_workbook('./data/missing.xlsx')
    assert 'No such file or directory' in str(error.value)