def main(): """ Use ``create_parser`` to get the commandline arguments, and pass them to the appropriate function in __init__.py (create_template, flatten or unflatten). """ parser = create_parser() # Store the supplied arguments in args args = parser.parse_args() if args.subparser_name is None: parser.print_help() return if not args.verbose: sys.excepthook = non_verbose_error_handler warnings.formatwarning = non_verbose_warning_formatter if args.subparser_name == "create-template": # Pass the arguments to the create_template function # If the schema file does not exist we catch it in this exception try: # Note: Ensures that empty arguments are not passed to the create_template function create_template(**kwargs_from_parsed_args(args)) except (OSError, IOError) as e: print(str(e)) return elif args.subparser_name == "flatten": flatten(**kwargs_from_parsed_args(args)) elif args.subparser_name == "unflatten": unflatten(**kwargs_from_parsed_args(args))
def test_360_fields_case_insensitive(tmpdir): input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx' unflatten( input_name=input_name, output_name=tmpdir.join('output_grant.json').strpath, input_format='xlsx', schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', convert_titles=True) output_json_grants = json.load(tmpdir.join('output_grant.json')) input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_title_space_case.xlsx' unflatten( input_name=input_name, output_name=tmpdir.join('output_space_case.json').strpath, input_format='xlsx', schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', convert_titles=True) output_json_space_case = json.load(tmpdir.join('output_space_case.json')) assert output_json_grants == output_json_space_case
def test_roundtrip_360(tmpdir, output_format, use_titles): input_name = ( "flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json" ) flatten( input_name=input_name, output_name=tmpdir.join("flattened").strpath + "." + output_format, output_format=output_format, schema="flattentool/tests/fixtures/360-giving-schema.json", root_list_path="grants", root_id="", use_titles=use_titles, main_sheet_name="grants", ) unflatten( input_name=tmpdir.join("flattened").strpath + "." + output_format, output_name=tmpdir.join("roundtrip.json").strpath, input_format=output_format, schema="flattentool/tests/fixtures/360-giving-schema.json", root_list_path="grants", root_id="", convert_titles=use_titles, ) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join("roundtrip.json")) assert original_json == roundtripped_json
def convert_spreadsheet(file_path, file_type, tmp_dir): #file_type = file_name.split('.')[-1] encoding = 'utf-8' converted_path = os.path.join(tmp_dir, 'output.json') if file_type == 'csv': destination = os.path.join(tmp_dir, 'grants.csv') shutil.copy(file_path, destination) try: with open(destination, encoding='utf-8') as main_sheet_file: main_sheet_file.read() except UnicodeDecodeError: try: with open(destination, encoding='cp1252') as main_sheet_file: main_sheet_file.read() encoding = 'cp1252' except UnicodeDecodeError: encoding = 'latin_1' input_name = tmp_dir else: input_name = file_path try: flattentool.unflatten( input_name, output_name=converted_path, input_format=file_type, main_sheet_name='grants', root_id='', schema='https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/schema/360-giving-schema.json', convert_titles=True, encoding=encoding ) except Exception: print("Unflattening failed for file {}".format(file_path)) raise
def test_roundtrip(tmpdir, output_format): input_name = 'flattentool/tests/fixtures/tenders_releases_2_releases.json' base_name = 'flattentool/tests/fixtures/tenders_releases_base.json' flatten( input_name=input_name, output_name=tmpdir.join('flattened').strpath+'.'+output_format, output_format=output_format, schema='flattentool/tests/fixtures/release-schema.json', root_list_path='releases', main_sheet_name='releases') unflatten( input_name=tmpdir.join('flattened').strpath+'.'+output_format, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, base_json=base_name, schema='flattentool/tests/fixtures/release-schema.json', root_list_path='releases') original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) # Not currently possible to roundtrip Nones # https://github.com/open-contracting/flattening-ocds/issues/35 for release in roundtripped_json['releases']: release['tender']['awardCriteriaDetails'] = None assert original_json == roundtripped_json
def test_360_main_sheetname_insensitive(tmpdir): input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx" unflatten( input_name=input_name, output_name=tmpdir.join("output_grant.json").strpath, input_format="xlsx", schema="flattentool/tests/fixtures/360-giving-schema.json", main_sheet_name="grants", root_list_path="grants", root_id="", convert_titles=True, ) output_json_grants = json.load(tmpdir.join("output_grant.json")) input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_sheet_title_case.xlsx" unflatten( input_name=input_name, output_name=tmpdir.join("output_grant_sheet_title_case.json").strpath, input_format="xlsx", schema="flattentool/tests/fixtures/360-giving-schema.json", main_sheet_name="grants", root_list_path="grants", root_id="", convert_titles=True, ) output_json_Grants = json.load( tmpdir.join("output_grant_sheet_title_case.json")) assert output_json_grants == output_json_Grants
def main(): """ Use ``create_parser`` to get the commandline arguments, and pass them to the appropriate function in __init__.py (create_template, flatten or unflatten). """ parser = create_parser() # Store the supplied arguments in args args = parser.parse_args() if args.subparser_name is None: parser.print_help() elif args.subparser_name == 'create-template': # Pass the arguments to the create_template function # If the schema file does not exist we catch it in this exception try: # Note: Ensures that empty arguments are not passed to the create_template function create_template(**kwargs_from_parsed_args(args)) except (OSError, IOError) as e: print(text_type(e)) return elif args.subparser_name == 'flatten': flatten(**kwargs_from_parsed_args(args)) elif args.subparser_name == 'unflatten': unflatten(**kwargs_from_parsed_args(args))
def test_roundtrip(tmpdir, output_format): input_name = 'flattentool/tests/fixtures/tenders_releases_2_releases.json' base_name = 'flattentool/tests/fixtures/tenders_releases_base.json' flatten(input_name=input_name, output_name=tmpdir.join('flattened').strpath + '.' + output_format, output_format=output_format, schema='flattentool/tests/fixtures/release-schema.json', root_list_path='releases', main_sheet_name='releases') unflatten(input_name=tmpdir.join('flattened').strpath + '.' + output_format, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, base_json=base_name, schema='flattentool/tests/fixtures/release-schema.json', root_list_path='releases') original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) # Not currently possible to roundtrip Nones # https://github.com/open-contracting/flattening-ocds/issues/35 for release in roundtripped_json['releases']: release['tender']['awardCriteriaDetails'] = None assert original_json == roundtripped_json
def test_roundtrip_360_rollup(tmpdir, use_titles): input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json' output_format = 'csv' output_name = tmpdir.join('flattened').strpath+'.'+output_format moved_name = tmpdir.mkdir('flattened_main_only').strpath flatten( input_name=input_name, output_name=output_name, output_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', root_list_path='grants', root_id='', use_titles=use_titles, rollup=True, main_sheet_name='grants') os.rename(output_name+'/grants.csv', moved_name+'/grants.csv') unflatten( input_name=moved_name, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', root_list_path='grants', root_id='', convert_titles=use_titles) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) assert original_json == roundtripped_json
def test_commands_hashcomments_sourcemap(tmpdir, input_format): unflatten( "flattentool/tests/fixtures/{}/commands_hashcomments_sourcemap.{}".format( input_format, input_format ), input_format=input_format, output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath, cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath, heading_source_map=tmpdir.join( "commands_hashcomments_heading_source_map.json" ).strpath, metatab_name="Meta", metatab_vertical_orientation=True, ) unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json")) cell_source_map = json.load(tmpdir.join("commands_hashcomments_source_map.json")) assert unflattened == { "publishedDate": "2019-06-20T00:00:00Z", "publisher": {"name": "Open Data Services Co-operative"}, "uri": "http://www.example.com", "version": "1.1", "main": [{"date": "2010-03-15T09:30:00Z", "id": "Ocds-1"}], } # check fields have correct column letters assert cell_source_map["main/0/date"][0][1] == "E" assert cell_source_map["main/0/id"][0][1] == "C"
def test_metatab_only(tmpdir): unflatten( 'flattentool/tests/fixtures/xlsx/basic_meta.xlsx', input_format='xlsx', output_name=tmpdir.join('meta_unflattened.json').strpath, metatab_name='Meta', metatab_vertical_orientation=True, metatab_only=True, cell_source_map=tmpdir.join('meta_cell_source_map.json').strpath, heading_source_map=tmpdir.join('meta_heading_source_map.json').strpath, ) metatab_json = json.load(tmpdir.join('meta_unflattened.json')) assert metatab_json == {'a': 'a1', 'b': 'b1', 'c': 'c1'} cell_source_map = json.load(tmpdir.join('meta_cell_source_map.json')) assert cell_source_map == {'': [['Meta', 2]], 'a': [['Meta', '1', 2, 'a']], 'b': [['Meta', '2', 2, 'b']], 'c': [['Meta', '3', 2, 'c']]} heading_source_map = json.load(tmpdir.join('meta_heading_source_map.json')) assert heading_source_map == {'a': [['Meta', 'a']], 'b': [['Meta', 'b']], 'c': [['Meta', 'c']]}
def test_roundtrip_360(tmpdir, output_format, use_titles): input_name = 'flattentool/tests/fixtures/WellcomeTrust-grants_fixed_2_grants.json' flatten(input_name=input_name, output_name=tmpdir.join('flattened').strpath + '.' + output_format, output_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', use_titles=use_titles) unflatten(input_name=tmpdir.join('flattened').strpath + '.' + output_format, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', convert_titles=use_titles) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) # Currently not enough information to successfully roundtrip that values # are numbers, when this is not required by the schema # for CSV, and for openpyxl under Python 2 if output_format == 'csv' or sys.version_info < (3, 0): for grant in original_json['grants']: grant['plannedDates'][0]['duration'] = str( grant['plannedDates'][0]['duration']) assert original_json == roundtripped_json
def test_roundtrip_360(tmpdir, output_format, use_titles): input_name = 'flattentool/tests/fixtures/WellcomeTrust-grants_fixed_2_grants.json' flatten( input_name=input_name, output_name=tmpdir.join('flattened').strpath+'.'+output_format, output_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', use_titles=use_titles) unflatten( input_name=tmpdir.join('flattened').strpath+'.'+output_format, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', convert_titles=use_titles) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) # Currently not enough information to successfully roundtrip that values # are numbers, when this is not required by the schema # for CSV, and for openpyxl under Python 2 if output_format == 'csv' or sys.version_info < (3, 0): for grant in original_json['grants']: grant['plannedDates'][0]['duration'] = str(grant['plannedDates'][0]['duration']) assert original_json == roundtripped_json
def test_unflatten_cf_daily_csv_using_base_json(): CF_DIR = join(TESTS_DIR, "fixtures", "CF_CSV") working_dir = join(CF_DIR, "working_files") csv_path_or_url = join(CF_DIR, "export-2020-08-05_single_buyer.csv") output_file = join(working_dir, "release_packages.json") clean_output_dir = join(working_dir, "cleaned") clean_output_file = join(clean_output_dir, "cleaned.csv") shutil.rmtree(working_dir, ignore_errors=True) os.makedirs(clean_output_dir) df = pd.read_csv(csv_path_or_url) cf_mapper = CSVMapper(mappings_file=CF_MAPPINGS_FILE) fixed_df = fix_contracts_finder_flat_csv(df) fixed_df = cf_mapper.convert_cf_to_1_1(fixed_df) fixed_df.to_csv(open(clean_output_file, "w"), index=False, header=True) base_json_path = join(CF_DIR, "working_files", "base.json") base_json = cf_mapper.prepare_base_json_from_release_df(fixed_df, base_json_path) unflatten(clean_output_dir, base_json=base_json_path, output_name=output_file, root_list_path="releases", input_format="csv", root_id="ocid", root_is_list=False, schema=OCDS_SCHEMA) js = json.load(open(output_file)) assert js
def convert_spreadsheet(input_path, converted_path, file_type): encoding = 'utf-8-sig' if file_type == 'csv': tmp_dir = tempfile.mkdtemp() destination = os.path.join(tmp_dir, 'grants.csv') shutil.copy(input_path, destination) try: with open(destination, encoding='utf-8-sig') as main_sheet_file: main_sheet_file.read() except UnicodeDecodeError: try: with open(destination, encoding='cp1252') as main_sheet_file: main_sheet_file.read() encoding = 'cp1252' except UnicodeDecodeError: encoding = 'latin_1' input_name = tmp_dir else: input_name = input_path flattentool.unflatten( input_name, output_name=converted_path, input_format=file_type, root_list_path='grants', root_id='', schema= 'https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/schema/360-giving-schema.json', convert_titles=True, encoding=encoding)
def test_metatab_only(tmpdir, input_format): unflatten( "flattentool/tests/fixtures/{}/basic_meta.{}".format( input_format, input_format ), input_format=input_format, output_name=tmpdir.join("meta_unflattened.json").strpath, metatab_name="Meta", metatab_vertical_orientation=True, metatab_only=True, cell_source_map=tmpdir.join("meta_cell_source_map.json").strpath, heading_source_map=tmpdir.join("meta_heading_source_map.json").strpath, ) metatab_json = json.load(tmpdir.join("meta_unflattened.json")) assert metatab_json == {"a": "a1", "b": "b1", "c": "c1"} cell_source_map = json.load(tmpdir.join("meta_cell_source_map.json")) assert cell_source_map == { "": [["Meta", 2]], "a": [["Meta", "1", 2, "a"]], "b": [["Meta", "2", 2, "b"]], "c": [["Meta", "3", 2, "c"]], } heading_source_map = json.load(tmpdir.join("meta_heading_source_map.json")) assert heading_source_map == { "a": [["Meta", "a"]], "b": [["Meta", "b"]], "c": [["Meta", "c"]], }
def test_roundtrip_360_rollup(tmpdir, use_titles): input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json' output_format = 'csv' output_name = tmpdir.join('flattened').strpath + '.' + output_format moved_name = tmpdir.mkdir('flattened_main_only').strpath flatten(input_name=input_name, output_name=output_name, output_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', root_list_path='grants', root_id='', use_titles=use_titles, rollup=True, main_sheet_name='grants') os.rename(output_name + '/grants.csv', moved_name + '/grants.csv') unflatten(input_name=moved_name, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', root_list_path='grants', root_id='', convert_titles=use_titles) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) assert original_json == roundtripped_json
def test_roundtrip_xml(tmpdir, output_format): input_name = "examples/iati/expected.xml" flatten( input_name=input_name, output_name=tmpdir.join("flattened").strpath + "." + output_format, output_format=output_format, root_list_path="iati-activity", id_name="iati-identifier", xml=True, ) unflatten( input_name=tmpdir.join("flattened").strpath + "." + output_format, output_name=tmpdir.join("roundtrip.xml").strpath, input_format=output_format, root_list_path="iati-activity", id_name="iati-identifier", xml=True, ) original_xml = open(input_name, "rb") roundtripped_xml = tmpdir.join("roundtrip.xml").open("rb") # Compare without ordering, by using dict_constructor=dict instead of # OrderedDict original = xmltodict.parse(original_xml, dict_constructor=dict) roundtripped = xmltodict.parse(roundtripped_xml, dict_constructor=dict) assert original == roundtripped
def main(): """ Takes any command line arguments and then passes them onto create_template Defaults are not set here, but rather given in the create_template function incase that function is called from elsewhere in future. """ parser = create_parser() # Store the supplied arguments in args args = parser.parse_args() if args.subparser_name is None: parser.print_help() elif args.subparser_name == 'create-template': # Pass the arguments to the create_template function # If the schema file does not exist we catch it in this exception try: # Note: Ensures that empty arguments are not passed to the create_template function create_template(**kwargs_from_parsed_args(args)) except (OSError, IOError) as e: print(text_type(e)) return elif args.subparser_name == 'flatten': flatten(**kwargs_from_parsed_args(args)) elif args.subparser_name == 'unflatten': unflatten(**kwargs_from_parsed_args(args))
def test_commands_id_name(tmpdir, input_format): unflatten( "flattentool/tests/fixtures/{}/commands_id_name.{}".format( input_format, input_format ), input_format=input_format, output_name=tmpdir.join("commands_id_name_unflattened.json").strpath, cell_source_map=tmpdir.join("commands_id_name_source_map.json").strpath, heading_source_map=tmpdir.join( "commands_id_name_heading_source_map.json" ).strpath, metatab_name="Meta", metatab_vertical_orientation=True, ) unflattened = json.load(tmpdir.join("commands_id_name_unflattened.json")) assert unflattened == { "someroot": [ { "actual": "actual", "headings": "data", "someId": "some", "someArray": [ {"heading1": "more data", "heading2": "other data"}, {"heading1": "more more data", "heading2": "more other data"}, ], } ], "some": "data", }
def test_roundtrip(tmpdir, output_format): input_name = "flattentool/tests/fixtures/tenders_releases_2_releases.json" base_name = "flattentool/tests/fixtures/tenders_releases_base.json" flatten( input_name=input_name, output_name=tmpdir.join("flattened").strpath + "." + output_format, output_format=output_format, schema="flattentool/tests/fixtures/release-schema.json", root_list_path="releases", main_sheet_name="releases", ) unflatten( input_name=tmpdir.join("flattened").strpath + "." + output_format, output_name=tmpdir.join("roundtrip.json").strpath, input_format=output_format, base_json=base_name, schema="flattentool/tests/fixtures/release-schema.json", root_list_path="releases", ) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join("roundtrip.json")) # Not currently possible to roundtrip Nones # https://github.com/open-contracting/flattening-ocds/issues/35 for release in roundtripped_json["releases"]: release["tender"]["awardCriteriaDetails"] = None assert original_json == roundtripped_json
def test_commands_single_sheet_default(tmpdir): unflatten( 'flattentool/tests/fixtures/xlsx/commands_defaulted.xlsx', input_format='xlsx', output_name=tmpdir.join('command_single_unflattened.json').strpath, cell_source_map=tmpdir.join('command_single_source_map.json').strpath, heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, default_configuration="SkipRows 1, headerrows 2", ) unflattened = json.load(tmpdir.join('command_single_unflattened.json')) assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]} unflatten( 'flattentool/tests/fixtures/xlsx/commands_defaulted.xlsx', input_format='xlsx', output_name=tmpdir.join('command_single_unflattened.json').strpath, cell_source_map=tmpdir.join('command_single_source_map.json').strpath, heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, default_configuration="SkipRows 1", ) unflattened = json.load(tmpdir.join('command_single_unflattened.json')) assert unflattened == {'main': [{'actual': 'other', 'headings': 'headings', 'some': 'some'}, {'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
def test_commands_hashcomments(tmpdir, input_format): unflatten( "flattentool/tests/fixtures/{}/commands_hashcomments.{}".format( input_format, input_format ), input_format=input_format, output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath, cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath, heading_source_map=tmpdir.join( "commands_hashcomments_heading_source_map.json" ).strpath, metatab_name="Meta", metatab_vertical_orientation=True, ) unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json")) assert unflattened == { "main": [ {"actual": "actual", "headings": "data", "some": "some"}, {"actual": "actual", "headings": "Other data", "some": "some"}, ], "some": "data", }
def test_commands_default_override(tmpdir, input_format): unflatten( "flattentool/tests/fixtures/{}/commands_in_metatab_defaulted.{}".format( input_format, input_format ), input_format=input_format, output_name=tmpdir.join("command_metatab_unflattened.json").strpath, cell_source_map=tmpdir.join("command_metatab_source_map.json").strpath, heading_source_map=tmpdir.join( "command_metatab_heading_source_map.json" ).strpath, metatab_name="Meta", metatab_vertical_orientation=True, default_configuration="headerrows 2", ) unflattened = json.load(tmpdir.join("command_metatab_unflattened.json")) # In this case want both 'headerrows 2' and 'skiprows 1' (which is defined in the metatab) to be used, # as we only override individual commands not all of them, # So the results in this case will be the same as if using commands_in_metatab.xlsx (where all commands are in metatab). assert unflattened == { "main": [ {"actual": "actual", "headings": "data", "some": "some"}, {"actual": "actual", "headings": "Other data", "some": "some"}, ], "some": "data", }
def test_360_main_sheetname_insensitive(tmpdir): input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants.xlsx' unflatten( input_name=input_name, output_name=tmpdir.join('output_grant.json').strpath, input_format='xlsx', schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', convert_titles=True) output_json_grants = json.load(tmpdir.join('output_grant.json')) input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_Grants.xlsx' unflatten( input_name=input_name, output_name=tmpdir.join('output_Grant.json').strpath, input_format='xlsx', schema='flattentool/tests/fixtures/360-giving-schema.json', main_sheet_name='grants', root_list_path='grants', root_id='', convert_titles=True) output_json_Grants = json.load(tmpdir.join('output_Grant.json')) assert output_json_grants == output_json_Grants
def test_roundtrip_360_rollup(tmpdir, use_titles): input_name = ( "flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json" ) output_format = "csv" output_name = tmpdir.join("flattened").strpath + "." + output_format moved_name = tmpdir.mkdir("flattened_main_only").strpath flatten( input_name=input_name, output_name=output_name, output_format=output_format, schema="flattentool/tests/fixtures/360-giving-schema.json", root_list_path="grants", root_id="", use_titles=use_titles, rollup=True, main_sheet_name="grants", ) os.rename(output_name + "/grants.csv", moved_name + "/grants.csv") unflatten( input_name=moved_name, output_name=tmpdir.join("roundtrip.json").strpath, input_format=output_format, schema="flattentool/tests/fixtures/360-giving-schema.json", root_list_path="grants", root_id="", convert_titles=use_titles, ) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join("roundtrip.json")) assert original_json == roundtripped_json
def test_unflatten_xslx_unicode(tmpdir): unflatten( 'flattentool/tests/fixtures/xlsx/unicode.xlsx', input_format='xlsx', output_name=tmpdir.join('release.json').strpath, main_sheet_name='main') reloaded_json = json.load(tmpdir.join('release.json')) assert reloaded_json == {'main': [{'ocid': 1 if sys.version > '3' else '1', 'id': 'éαГ😼𝒞人'}]}
def test_unflatten_unicode(tmpdir, input_format): unflatten( "flattentool/tests/fixtures/{}/unicode.{}".format(input_format, input_format), input_format=input_format, output_name=tmpdir.join("release.json").strpath, main_sheet_name="main", ) reloaded_json = json.load(tmpdir.join("release.json")) assert reloaded_json == {"main": [{"ocid": 1, "id": "éαГ😼𝒞人"}]}
def test_unflatten_org_xml(tmpdir): unflatten(input_name='flattentool/tests/fixtures/xlsx/iati-org.xlsx', output_name=tmpdir.join('output.xml').strpath, input_format='xlsx', id_name='organisation-identifier', xml=True, metatab_name='Meta') assert open('flattentool/tests/fixtures/iati-org.xml').read( ) == tmpdir.join('output.xml').read()
def test_unflatten_csv_latin1(tmpdir): input_dir = tmpdir.ensure('release_input', dir=True) input_dir.join('main.csv').write_text('ocid,id\n1,é\n', encoding='latin1') unflatten(input_dir.strpath, input_format='csv', encoding='latin1', output_name=tmpdir.join('release.json').strpath, main_sheet_name='main') reloaded_json = json.load(tmpdir.join('release.json')) assert reloaded_json == {'main': [{'ocid': '1', 'id': 'é'}]}
def test_commands_single_sheet_csv(tmpdir): unflatten( 'flattentool/tests/fixtures/csv/commands_in_file', input_format='csv', output_name=tmpdir.join('command_single_unflattened.json').strpath, cell_source_map=tmpdir.join('command_single_source_map.json').strpath, heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath, ) unflattened = json.load(tmpdir.join('command_single_unflattened.json')) assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
def process_item(self, item, spider): if not spider.unflatten or not isinstance(item, (File, FileItem)): return item input_name = item["file_name"] if input_name.endswith(".csv"): item["file_name"] = item["file_name"][:-4] + ".json" input_format = "csv" elif input_name.endswith(".xlsx"): item["file_name"] = item["file_name"][:-5] + ".json" input_format = "xlsx" else: raise NotImplementedError( f"the file '{input_name}' has no extension or is not CSV or XLSX, " f"obtained from: {item['url']}" ) spider_ocds_version = spider.ocds_version.replace(".", "__") for tag in reversed(get_tags()): if tag.startswith(spider_ocds_version): schema = get_release_schema_url(tag) break else: raise NotImplementedError(f"no schema found for '{spider_ocds_version}'") with tempfile.TemporaryDirectory() as directory: input_path = os.path.join(directory, input_name) output_name = os.path.join(directory, item["file_name"]) if input_format == "csv": input_name = directory elif input_format == "xlsx": input_name = input_path with open(input_path, "wb") as f: f.write(item["data"]) with warnings.catch_warnings(): warnings.filterwarnings( "ignore" ) # flattentool uses UserWarning, so we can't set a specific category unflatten( input_name, root_list_path="releases", root_id="ocid", schema=schema, input_format=input_format, output_name=output_name, **spider.unflatten_args, ) with open(output_name, "r") as f: item["data"] = f.read() return item
def convert_spreadsheet(request, data, file_type): context = {} converted_path = os.path.join(data.upload_dir(), 'unflattened.json') encoding = 'utf-8' if file_type == 'csv': # flatten-tool expects a directory full of CSVs with file names # matching what xlsx titles would be. # If only one upload file is specified, we rename it and move into # a new directory, such that it fits this pattern. input_name = os.path.join(data.upload_dir(), 'csv_dir') os.makedirs(input_name, exist_ok=True) destination = os.path.join(input_name, request.cove_config['main_sheet_name'] + '.csv') shutil.copy(data.original_file.file.name, destination) try: with open(destination, encoding='utf-8') as main_sheet_file: main_sheet_file.read() except UnicodeDecodeError: try: with open(destination, encoding='cp1252') as main_sheet_file: main_sheet_file.read() encoding = 'cp1252' except UnicodeDecodeError: encoding = 'latin_1' else: input_name = data.original_file.file.name try: if not os.path.exists(converted_path): flattentool.unflatten( input_name, output_name=converted_path, input_format=file_type, main_sheet_name=request.cove_config['main_sheet_name'], root_id=request.cove_config['root_id'], schema=request.cove_config['item_schema_url'], convert_titles=True, encoding=encoding ) context['converted_file_size'] = os.path.getsize(converted_path) except Exception as err: logger.exception(err, extra={ 'request': request, }) raise CoveInputDataError({ 'sub_title': _("Sorry we can't process that data"), 'link': 'cove:index', 'link_text': _('Try Again'), 'msg': _('We think you tried to supply a spreadsheet, but we failed to convert it to JSON.\n\nError message: {}'.format(repr(err))) }) context.update({ 'conversion': 'unflatten', 'converted_path': converted_path, 'converted_url': '{}/unflattened.json'.format(data.upload_url()) }) return context
def test_metatab(tmpdir): tmpdir.join('metatab_schema.json').write( '{"properties": {}}' ) unflatten( 'flattentool/tests/fixtures/xlsx/basic_meta.xlsx', input_format='xlsx', output_name=tmpdir.join('meta_unflattened.json').strpath, metatab_name='Meta', metatab_vertical_orientation=True, metatab_schema = tmpdir.join('metatab_schema.json').strpath, cell_source_map=tmpdir.join('meta_cell_source_map.json').strpath, heading_source_map=tmpdir.join('meta_heading_source_map.json').strpath, ) metatab_json = json.load(tmpdir.join('meta_unflattened.json')) assert metatab_json == {'a': 'a1', 'b': 'b1', 'c': 'c1', 'main': [{'colA': 'cell1', 'colB': 'cell2'}, {'colA': 'cell3', 'colB': 'cell4'}, {'colC': 'cell5', 'colD': 'cell6'}, {'colC': 'cell7', 'colD': 'cell8'}]} cell_source_map = json.load(tmpdir.join('meta_cell_source_map.json')) assert cell_source_map == {'': [['Meta', 2]], 'a': [['Meta', '1', 2, 'a']], 'b': [['Meta', '2', 2, 'b']], 'c': [['Meta', '3', 2, 'c']], 'main/0': [['main', 2]], 'main/0/colA': [['main', 'A', 2, 'colA']], 'main/0/colB': [['main', 'B', 2, 'colB']], 'main/1': [['main', 3]], 'main/1/colA': [['main', 'A', 3, 'colA']], 'main/1/colB': [['main', 'B', 3, 'colB']], 'main/2': [['subsheet', 2]], 'main/2/colC': [['subsheet', 'A', 2, 'colC']], 'main/2/colD': [['subsheet', 'B', 2, 'colD']], 'main/3': [['subsheet', 3]], 'main/3/colC': [['subsheet', 'A', 3, 'colC']], 'main/3/colD': [['subsheet', 'B', 3, 'colD']]} heading_source_map = json.load(tmpdir.join('meta_heading_source_map.json')) assert heading_source_map == {'a': [['Meta', 'a']], 'b': [['Meta', 'b']], 'c': [['Meta', 'c']], 'main/colA': [['main', 'colA']], 'main/colB': [['main', 'colB']], 'main/colC': [['subsheet', 'colC']], 'main/colD': [['subsheet', 'colD']]}
def test_unflatten_empty(tmpdir): input_dir = tmpdir.ensure('release_input', dir=True) input_dir.join('main.csv').write_text( 'ocid,id\n,\n,\n,', encoding='utf8' ) unflatten( input_dir.strpath, input_format='csv', output_name=tmpdir.join('release.json').strpath, main_sheet_name='main') assert lines_strip_whitespace(tmpdir.join('release.json').read()) == lines_strip_whitespace('''{ "main": [] }''')
def test_unflatten_csv_latin1(tmpdir): input_dir = tmpdir.ensure('release_input', dir=True) input_dir.join('main.csv').write_text( 'ocid,id\n1,é\n', encoding='latin1' ) unflatten( input_dir.strpath, input_format='csv', encoding='latin1', output_name=tmpdir.join('release.json').strpath, main_sheet_name='main') reloaded_json = json.load(tmpdir.join('release.json')) assert reloaded_json == {'main': [{'ocid': '1', 'id': 'é'}]}
def test_unflatten_csv_utf8(tmpdir): input_dir = tmpdir.ensure('release_input', dir=True) input_dir.join('main.csv').write_text( 'ocid,id\n1,éαГ😼𝒞人\n', encoding='utf8' ) unflatten( input_dir.strpath, input_format='csv', # Should default to utf8 output_name=tmpdir.join('release.json').strpath, main_sheet_name='main') reloaded_json = json.load(tmpdir.join('release.json')) assert reloaded_json == {'main': [{'ocid': '1', 'id': 'éαГ😼𝒞人'}]} # The JSON we output should be UTF-8, rather than escaped ASCII # https://github.com/OpenDataServices/flatten-tool/issues/71 assert 'éαГ😼𝒞人' in tmpdir.join('release.json').read_text(encoding='utf-8')
def test_roundtrip_360(tmpdir, output_format, use_titles): input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json' flatten( input_name=input_name, output_name=tmpdir.join('flattened').strpath+'.'+output_format, output_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', root_list_path='grants', root_id='', use_titles=use_titles, main_sheet_name='grants') unflatten( input_name=tmpdir.join('flattened').strpath+'.'+output_format, output_name=tmpdir.join('roundtrip.json').strpath, input_format=output_format, schema='flattentool/tests/fixtures/360-giving-schema.json', root_list_path='grants', root_id='', convert_titles=use_titles) original_json = json.load(open(input_name)) roundtripped_json = json.load(tmpdir.join('roundtrip.json')) assert original_json == roundtripped_json
def test_roundtrip_xml(tmpdir, output_format): input_name = 'examples/iati/expected.xml' flatten( input_name=input_name, output_name=tmpdir.join('flattened').strpath+'.'+output_format, output_format=output_format, root_list_path='iati-activity', id_name='iati-identifier', xml=True) unflatten( input_name=tmpdir.join('flattened').strpath+'.'+output_format, output_name=tmpdir.join('roundtrip.xml').strpath, input_format=output_format, root_list_path='iati-activity', id_name='iati-identifier', xml=True) original_xml = open(input_name, 'rb') roundtripped_xml = tmpdir.join('roundtrip.xml').open('rb') # Compare without ordering, by using dict_constructor=dict instead of # OrderedDict original = xmltodict.parse(original_xml, dict_constructor=dict) roundtripped = xmltodict.parse(roundtripped_xml, dict_constructor=dict) assert original == roundtripped
def test_unflatten(tmpdir): """ Perform a full CSV unflattening, and check the output is what we expect. Notable things we are checking for: Ordering is preseved - both the order of columns and rows On an id column haeder, the information following a colon is the key for the array. If this is not provided, the sheet name is used. """ input_dir = tmpdir.ensure('release_input', dir=True) input_dir.join('main.csv').write( 'ocid,id,testA,test/id,test/C\n' '1,2,3,4,5\n' '1,2a,3a,4a,5a\n' '6,7,8,9,10\n' '6,7a,8a,9a,10a\n' ) input_dir.join('subsheet.csv').write( 'ocid,main/id:sub,main/test/id,id,testD,test2/E,test2/F\n' '1,2,,S1,11,12,13\n' '1,2a,,S1,14,15,16\n' '1,2,,S2,17,18,19\n' '6,7,,S1,20,21,22\n' '1,2,4,S3,24,25,26\n' ) input_dir.join('subsubsheet.csv').write( 'ocid,main/id,main/sub[]/id:subsub,testG\n' '1,2,S1,23\n' ) unflatten( input_dir.strpath, input_format='csv', output_name=tmpdir.join('release.json').strpath, main_sheet_name='main') assert lines_strip_whitespace(tmpdir.join('release.json').read()) == lines_strip_whitespace('''{ "main": [ { "ocid": "1", "id": "2", "testA": "3", "test": { "id": "4", "C": "5", "subsheet": [ { "id": "S3", "testD": "24", "test2": { "E": "25", "F": "26" } } ] }, "sub": [ { "id": "S1", "testD": "11", "test2": { "E": "12", "F": "13" }, "subsub": [ { "testG": "23" } ] }, { "id": "S2", "testD": "17", "test2": { "E": "18", "F": "19" } } ] }, { "ocid": "1", "id": "2a", "testA": "3a", "test": { "id": "4a", "C": "5a" }, "sub": [ { "id": "S1", "testD": "14", "test2": { "E": "15", "F": "16" } } ] }, { "ocid": "6", "id": "7", "testA": "8", "test": { "id": "9", "C": "10" }, "sub": [ { "id": "S1", "testD": "20", "test2": { "E": "21", "F": "22" } } ] }, { "ocid": "6", "id": "7a", "testA": "8a", "test": { "id": "9a", "C": "10a" } } ] }''')