def test_convert_to_numeric(): tables, headings = AGS4.AGS4_to_dataframe('tests/test_data.ags') LOCA = AGS4.convert_to_numeric(tables['LOCA']) assert LOCA.loc[0, 'LOCA_NATE'] == 100000.01 assert LOCA.loc[2, 'LOCA_NATN'] == 5000000.20 assert LOCA.loc[3, 'LOCA_FDEP'] == 50.44
def test_AGS4_to_sorted_excel(): tables, headings = AGS4.AGS4_to_dataframe('tests/test_data.ags') AGS4.AGS4_to_excel('tests/test_data.ags', 'tests/test_data.xlsx', sort_tables=True) sorted_tables = pd.read_excel('tests/test_data.xlsx', sheet_name=None, engine='openpyxl') # dict keys first converted to lists to check order assert list(sorted(tables.keys())) == list(sorted_tables.keys())
def convert(filename: Path, results_dir: Path) -> Tuple[Optional[Path], dict]: """ Convert filename between .ags and .xlsx. Write output to file in results_dir and return path alongside job status data in dictionary.""" # Prepare variables and directory new_extension = '.ags' if filename.suffix == '.xlsx' else '.xlsx' converted_file = results_dir / (filename.stem + new_extension) logger.info("Converting %s to %s", filename.name, converted_file.name) if not results_dir.exists(): results_dir.mkdir() # Prepare response with metadata response = _prepare_response_metadata(filename) # Do the conversion success = True if filename.suffix == '.ags': try: AGS4.AGS4_to_excel(filename, converted_file) except IndexError: success = False error_message = "ERROR: File does not have AGS4 format layout" except UnboundLocalError: # This error is thrown in response to a bug in the upstream code, # which in turn is only triggered if the AGS file has duplicate # headers. success = False error_message = "ERROR: File contains duplicate headers" except SystemExit: # There are two function calls in python_ags4.AGS4 that throw a # sys.exit in reponse to a bad file. The associated errors are # summarised here. success = False error_message = "ERROR: UNIT and/or TYPE rows missing OR mismatched column numbers" elif filename.suffix == '.xlsx': try: AGS4.excel_to_AGS4(filename, converted_file) except AttributeError as err: # Include error details here in case they provide a clue e.g. which # attribute is missing success = False error_message = f"ERROR: Bad spreadsheet layout ({err.args[0]})" else: success = False error_message = f"ERROR: {filename.name} is not .ags or .xlsx format" # Update response and clean failed files if success: response[ 'message'] = f"SUCCESS: {filename.name} converted to {converted_file.name}" response['valid'] = True else: response['message'] = error_message response['valid'] = False converted_file.unlink(missing_ok=True) converted_file = None return (converted_file, response)
def test_check_file(): error_list = AGS4.check_file('tests/test_data.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') # assert error_list == ['Rule 1\t Line 12:\t Has one or more non-ASCII characters.', # 'Rule 3\t Line 37:\t Consists only of spaces.', # 'Rule 3\t Line 54:\t Does not start with a valid tag (i.e. GROUP, HEADING, TYPE, UNIT, or DATA).'] # File without any errors error_list = AGS4.check_file('tests/test_files/example1.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule' not in error_list.keys()
def test_excel_to_AGS4_with_numeric_column_with_missing_TYPE(): # Read LLPL table from xlsx file directly # LLPL_425 has numeric data but TYPE is erroneously set to '' LLPL = pd.read_excel('tests/test.xlsx', sheet_name='LLPL', engine='openpyxl') LLPL_425_from_xlsx = LLPL.loc[LLPL.HEADING.eq('DATA'), 'LLPL_425']\ .apply(pd.to_numeric, errors='coerce')\ # Convert .xlsx file to AGS4 file and read it back AGS4.excel_to_AGS4('tests/test.xlsx', 'tests/test.out') tables, _ = AGS4.AGS4_to_dataframe('tests/test.out') LLPL_425_from_ags = tables['LLPL'].pipe(lambda df: df.loc[df.HEADING.eq('DATA'), 'LLPL_425'])\ .apply(pd.to_numeric, errors='coerce') # Check whether LLPL_425 was exported even though TYPE is not specified assert LLPL_425_from_ags.equals(LLPL_425_from_xlsx)
def test_rule_AGS3(): error_list = AGS4.check_file( 'tests/test_files/AGS3.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'AGS Format Rule 3' in error_list.keys() assert 'AGS3' in error_list['AGS Format Rule 3'][0]['desc']
def test_rule_19b_2(): error_list = AGS4.check_file('tests/test_files/4.1-rule19b-2.ags') assert 'AGS Format Rule 19b' in error_list.keys() assert error_list['AGS Format Rule 19b'][2]['group'] == 'LLPL' assert error_list['AGS Format Rule 19b'][2][ 'desc'] == 'XXXX_425 does not start with the name of this group, nor is it defined in another group.'
def test_checking_without_dictionary_raises_error(): with pytest.raises(AGS4.AGS4Error, match=r'No DICT tables available to proceed with checking.*'): # Check file without a DICT table # The same file is passed as the standard dictionary to # force exception to be raised _ = AGS4.check_file('tests/test_files/4.1-rule1.ags', standard_AGS4_dictionary='tests/test_files/4.1-rule1.ags')
def test_rule_7_1(): error_list = AGS4.check_file('tests/test_files/DuplicateHeaders.ags') assert 'AGS Format Rule 7' in error_list.keys() assert error_list['AGS Format Rule 7'][0]['line'] == 81 assert error_list['AGS Format Rule 7'][0][ 'desc'] == 'HEADER row has duplicate fields.'
def checkandcompare(AGSFilename): ErrorFilename = AGSFilename.replace(".ags", ".errors") CheckFilename = AGSFilename.replace(".ags", ".check") try: error_list = AGS4.check_file( AGSFilename, standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags' ) # remove Metadata item it is exists as this will change everytime it is checked if "Metadata" in error_list.keys(): error_list.pop("Metadata") f = open(ErrorFilename, "w") app_json = json.dumps(error_list) f.write(app_json) f.close() except AGS4.AGS4Error as err: f = open(ErrorFilename, "w") f.write(str(err)) f.close() finally: if glob.glob(CheckFilename): if filecmp.cmp(ErrorFilename, CheckFilename, shallow=False): os.remove( ErrorFilename ) # remove the error file as this is no longer required return 'Passed - ' + AGSFilename else: return 'Failed - ' + AGSFilename + ' - errors different from file' else: return 'Failed - ' + AGSFilename + ' - no file to check against'
def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> dict: # Get error information from file try: errors = AGS4.check_file( filename, standard_AGS4_dictionary=standard_AGS4_dictionary) try: metadata = errors.pop( 'Metadata') # This also removes it from returned errors dictionary = [ d['desc'] for d in metadata if d['line'] == 'Dictionary' ][0] except KeyError: # 'Metadata' is not created for some files with errors dictionary = '' except UnicodeDecodeError as err: line_no = len(err.object[:err.end].split(b'\n')) description = f"UnicodeDecodeError: {err.reason}" errors = { 'File read error': [{ 'line': line_no, 'group': '', 'desc': description }] } dictionary = '' return dict(checker=f'python_ags4 v{python_ags4.__version__}', errors=errors, dictionary=dictionary)
def check(input_file, dictionary, output_file): '''Check .ags file for error based AGS4 rules. INPUT_FILE Path to .ags file to be checked ''' if input_file.endswith('.ags'): console.print( f'[green]Running [bold]python_ags4 v{__version__}[/bold][/green]') console.print( f'[green]Opening file... [bold]{input_file}[/bold][/green]') console.print('') ags_errors = AGS4.check_file(input_file, standard_AGS4_dictionary=dictionary) # Dictionay evaluates to False if empty if bool(ags_errors) is False: console.print( '\n[green]File check complete! No errors found.[/green]\n') if output_file is not None: save_to_file(output_file, ags_errors, input_file, 'No') else: # Count number of entries in error log error_count = 0 for key, val in ags_errors.items(): if 'Rule' in key: error_count += len(val) # Print errors to screen if list is short enough. if error_count < 100: print_to_screen(ags_errors) console.print( f'\n[yellow]File check complete! {error_count} errors found![/yellow]' ) if output_file is not None: save_to_file(output_file, ags_errors, input_file, error_count) else: console.print( f'\n[yellow]File check complete! {error_count} errors found![/yellow]' ) console.print( '\n[yellow]Error report too long to print to screen.[/yellow]' ) if output_file is None: output_dir = os.path.dirname(input_file) output_file = os.path.join(output_dir, 'error_log.txt') save_to_file(output_file, ags_errors, input_file, error_count) else: console.print( '[red]ERROR: Only .ags files are accepted as input.[/red]')
def test_AGS4_to_excel(LOCA=LOCA, LLPL=LLPL): AGS4.AGS4_to_excel('tests/test_data.ags', 'tests/test_data.xlsx') tables = pd.read_excel('tests/test_data.xlsx', sheet_name=None, engine='openpyxl') assert tables['PROJ'].loc[:, 'PROJ_ID'].values[2] == '123456' assert tables['LOCA'].loc[:, 'LOCA_ID'].values[1] == 'ID' assert tables['LOCA'].loc[:, 'LOCA_ID'].values[2] == 'Location_1'
def test_rule_17_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule17-2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 17' in error_list.keys() assert error_list['Rule 17'][0]['group'] == 'TYPE' assert error_list['Rule 17'][0]['desc'] == 'TYPE table not found.'
def test_rule_16b_5(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule16b-5.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 11b' in error_list.keys() assert error_list['Rule 11b'][0]['group'] == 'TRAN' assert error_list['Rule 11b'][0]['desc'] == 'TRAN_RCON missing.'
def test_rule_17_1(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule17-1.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'AGS Format Rule 17' in error_list.keys() assert error_list['AGS Format Rule 17'][0]['group'] == 'TYPE' assert error_list['AGS Format Rule 17'][0][ 'desc'] == 'Data type "ID" not found in TYPE table.'
def test_rule_3(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule3.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 3' in error_list.keys() assert error_list['Rule 3'][0]['line'] == 58 assert error_list['Rule 3'][0][ 'desc'] == 'Does not start with a valid data descriptor.'
def test_rule_20_3(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule20-3.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 20' in error_list.keys() assert error_list['Rule 20'][0]['group'] == 'FILE' assert error_list['Rule 20'][0][ 'desc'] == f'File named "{os.path.join("FILE", "327-16A", "wrong Report.pdf")}" not found even though it is defined in the FILE table.'
def test_rule_19a_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule19a-2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 19a' in error_list.keys() assert error_list['Rule 19a'][0]['group'] == 'TEST' assert error_list['Rule 19a'][0][ 'desc'] == 'Heading TEST_D-H should consist of only uppercase letters, numbers, and an underscore character.'
def test_rule_14_1(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule14-1.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 14' in error_list.keys() assert error_list['Rule 14'][0]['group'] == 'TRAN' assert error_list['Rule 14'][0][ 'desc'] == 'There should not be more than one DATA row in the TRAN table.'
def test_rule_19(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule19.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 19' in error_list.keys() assert error_list['Rule 19'][0]['group'] == 'TESTS' assert error_list['Rule 19'][0][ 'desc'] == 'GROUP name should consist of four uppercase letters.'
def test_rule_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 2' in error_list.keys() # TODO assert error_list['Rule 2'][0]['line'] == ? assert error_list['Rule 2'][0]['group'] == 'SAMP' assert error_list['Rule 2'][0]['desc'] == 'No DATA rows in group.'
def test_rule_18_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule18-2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 7' in error_list.keys() assert error_list['Rule 7'][0]['group'] == 'LOCA' assert error_list['Rule 7'][0][ 'desc'] == 'Headings not in order starting from LOCA_CHKG. Expected order: ...LOCA_APPG|LOCA_CHKG'
def test_rule_20_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule20-2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'AGS Format Rule 20' in error_list.keys() assert error_list['AGS Format Rule 20'][0]['group'] == 'FILE' assert error_list['AGS Format Rule 20'][0][ 'desc'] == f'Sub-folder named "{os.path.join("FILE", "327")}" not found even though it is defined in the FILE table.'
def test_rule_15_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule15-2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 15' in error_list.keys() assert error_list['Rule 15'][0]['group'] == 'UNIT' assert error_list['Rule 15'][0][ 'desc'] == 'Unit "%" not found in UNIT table.'
def test_rule_16_3(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule16-3.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'AGS Format Rule 16' in error_list.keys() assert error_list['AGS Format Rule 16'][0]['group'] == 'ABBR' assert error_list['AGS Format Rule 16'][0][ 'desc'] == 'ABBR table not found.'
def test_rule_2b_3(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule2b3.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 2b' in error_list.keys() # TODO assert error_list['Rule 2b'][0]['line'] == ? assert error_list['Rule 2b'][0]['group'] == 'ABBR' assert error_list['Rule 2b'][0]['desc'] == 'TYPE row missing from group.'
def test_rule_19a_1(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule19a-1.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 19a' in error_list.keys() assert error_list['Rule 19a'][0]['group'] == 'TEST' assert error_list['Rule 19a'][0][ 'desc'] == 'Heading TEST_DEPTH is more than 9 characters in length.'
def test_rule_18_1(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule18-1.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 9' in error_list.keys() assert error_list['Rule 9'][0]['group'] == 'LOCA' assert error_list['Rule 9'][0][ 'desc'] == 'LOCA_APPG not found in DICT table or the standard AGS4 dictionary.'
def test_rule_10_2(): error_list = AGS4.check_file( 'tests/test_files/4.1-rule10-2.ags', standard_AGS4_dictionary='python_ags4/Standard_dictionary_v4_1.ags') assert 'Rule 10a' in error_list.keys() # TODO assert error_list['Rule 10a'][0]['line'] == ? assert error_list['Rule 10a'][0]['group'] == 'LLPL' assert error_list['Rule 10a'][0]['desc'] == 'Key field SAMP_ID not found.'