def test_rollup(self): schema_parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': [ 'testB' ], 'items': { 'type': 'object', 'properties': { 'testB': {'type': 'string'}, 'testC': {'type': 'string'} } } }, } }, rollup=True) schema_parser.parse() parser = JSONParser( root_json_dict=[OrderedDict([ ('testA', [OrderedDict([('testB', '1'), ('testC', '2')])]), ])], schema_parser=schema_parser ) parser.parse() assert list(parser.main_sheet) == [ 'testA[]/testB' ] assert parser.main_sheet.lines == [ {'testA[]/testB': '1'} ] assert len(parser.sub_sheets) == 1 assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC']) assert parser.sub_sheets['testA'].lines == [{'testB':'1', 'testC': '2'}]
def create_template(schema, output_name='releases', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_): """ Creates template file(s) from given inputs This function is built to deal with commandline input and arguments but to also be called from elswhere in future """ parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name) spreadsheet_output.write_sheets() if output_format == 'all': for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def test_flatten_multiplesheets(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_dict, recwarn, comment, warning_messages, tmpdir, reversible): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter('always') extra_kwargs = {'use_titles': use_titles} extra_kwargs.update(root_id_kwargs) if use_schema: schema_parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, rollup=True, **extra_kwargs ) schema_parser.parse() else: schema_parser = None with tmpdir.join('input.json').open('w') as fp: json.dump({ 'mykey': [inject_root_id(root_id, input_row) for input_row in input_list] }, fp) parser = JSONParser( json_filename=tmpdir.join('input.json').strpath, root_list_path='mykey', schema_parser=schema_parser, **extra_kwargs) parser.parse() expected_output_dict = OrderedDict([(sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in expected_output_dict.items()]) output = {sheet_name:sheet.lines for sheet_name, sheet in parser.sub_sheets.items() if sheet.lines} output['custom_main'] = parser.main_sheet.lines assert output == expected_output_dict
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, reversible): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter('always') extra_kwargs = {'convert_titles': convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput( sheets={ 'custom_main': [ inject_root_id(root_id, input_row) for input_row in input_list ] }, **extra_kwargs) spreadsheet_input.read_sheets() parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, rollup=True ) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary expected_output_list = [] assert list(spreadsheet_input.unflatten()) == expected_output_list # We expect no warning_messages if not convert_titles: # TODO what are the warning_messages here assert [str(x.message) for x in recwarn.list] == warning_messages
def create_template(schema, output_name=None, output_format='all', main_sheet_name='main', rollup=False, root_id=None, use_titles=False, disable_local_refs=False, truncation_length=3, no_deprecated_fields=False, **_): """ Creates template file(s) from given inputs This function is built to deal with commandline input and arguments but to also be called from elswhere in future """ parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, disable_local_refs=disable_local_refs, truncation_length=truncation_length, exclude_deprecated_fields=no_deprecated_fields) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name) spreadsheet_output.write_sheets() if output_format == 'all': if not output_name: output_name = 'template' for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats if not output_name: output_name = 'template' + FORMATS_SUFFIX[output_format] spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def unflatten(input_name, base_json=None, input_format=None, output_name='releases.json', main_sheet_name='releases', encoding='utf8', timezone_name='UTC', root_id='ocid', schema='', convert_titles=False, **_): """ Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON). """ if input_format is None: raise Exception('You must specify an input format (may autodetect in future') elif input_format not in INPUT_FORMATS: raise Exception('The requested format is not available') spreadsheet_input_class = INPUT_FORMATS[input_format] spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, main_sheet_name=main_sheet_name, root_id=root_id, convert_titles=convert_titles) if convert_titles: parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=True, root_id=root_id) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() if base_json: with open(base_json) as fp: base = json.load(fp, object_pairs_hook=OrderedDict) else: base = OrderedDict() base[main_sheet_name] = list(spreadsheet_input.unflatten()) with codecs.open(output_name, 'w', encoding='utf-8') as fp: json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
def test_bad_rollup(recwarn): ''' When rollUp is specified, but the field is missing in the schema, we expect a warning. ''' parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': [ 'testB' ], 'items': { 'type': 'object', 'properties': { 'testC': type_string } } }, } }, rollup=True) parser.parse() w = recwarn.pop(UserWarning) assert 'testB in rollUp but not in schema' in text_type(w.message) assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set(['testA']) assert set(parser.sub_sheets['testA']) == set(['ocid', 'testC'])
def test_column_matching(self, tmpdir): test_schema = tmpdir.join('test.json') test_schema.write('''{ "properties": { "c": { "type": "array", "items": {"type": "string"} } } }''') schema_parser = SchemaParser( schema_filename=test_schema.strpath ) schema_parser.parse() parser = JSONParser( root_json_dict=[OrderedDict([ ('c', ['d']), ])], schema_parser=schema_parser ) parser.parse() assert list(parser.main_sheet) == [ 'c:array' ] assert parser.main_sheet.lines == [ {'c:array': 'd'} ] assert len(parser.sub_sheets) == 0
def flatten(input_name, schema=None, output_name=None, output_format='all', main_sheet_name='main', root_list_path='main', root_is_list=False, sheet_prefix='', filter_field=None, filter_value=None, rollup=False, root_id=None, use_titles=False, xml=False, id_name='id', disable_local_refs=False, remove_empty_schema_columns=False, truncation_length=3, **_): """ Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx). """ if (filter_field is None and filter_value is not None) or (filter_field is not None and filter_value is None): raise Exception('You must use filter_field and filter_value together') if schema: schema_parser = SchemaParser( schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, disable_local_refs=disable_local_refs, truncation_length=truncation_length) schema_parser.parse() else: schema_parser = None parser = JSONParser( json_filename=input_name, root_list_path=None if root_is_list else root_list_path, schema_parser=schema_parser, root_id=root_id, use_titles=use_titles, xml=xml, id_name=id_name, filter_field=filter_field, filter_value=filter_value, remove_empty_schema_columns=remove_empty_schema_columns, truncation_length=truncation_length) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name, sheet_prefix=sheet_prefix) spreadsheet_output.write_sheets() if output_format == 'all': if not output_name: output_name = 'flattened' for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats if not output_name: output_name = 'flattened' + FORMATS_SUFFIX[output_format] spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def test_main_sheet_basic(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': type_string, 'testB': type_string } }) parser.parse() assert set(parser.main_sheet) == set(['testA', 'testB'])
def test_main_sheet_nested(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'object', 'properties': {'testC': type_string} } } }) parser.parse() assert set(parser.main_sheet) == set(['testA/testC'])
def test_parent_is_object(self): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'object', 'properties': object_in_array_example_properties('testB', 'testC') } } }) parser.parse() assert set(parser.main_sheet) == set(['testA/id']) assert set(parser.sub_sheets) == set(['testB']) assert list(parser.sub_sheets['testB']) == ['ocid', 'main/testA/id:testB', 'testC']
def test_sub_sheets(self, tmpdir, remove_empty_schema_columns): test_schema = tmpdir.join('test.json') test_schema.write('''{ "properties": { "c": { "type": "array", "items": {"$ref": "#/testB"} }, "g": { "type": "array", "items": { "type": "object", "properties": { "h": { "type": "string"} } } } }, "testB": { "type": "object", "properties": { "d": { "type": "string" }, "f": { "type": "string" } } } }''') schema_parser = SchemaParser( schema_filename=test_schema.strpath, root_id='ocid' ) schema_parser.parse() parser = JSONParser( root_json_dict=[OrderedDict([ ('a', 'b'), ('c', [OrderedDict([('d', 'e')])]), ])], schema_parser=schema_parser, remove_empty_schema_columns=remove_empty_schema_columns, ) parser.parse() assert list(parser.main_sheet) == [ 'a' ] assert parser.main_sheet.lines == [ {'a': 'b'} ] assert len(parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1 if not remove_empty_schema_columns: assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d', 'c/0/f']) assert list(parser.sub_sheets['g']) == list(['ocid', 'g/0/h']) else: assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d']) assert parser.sub_sheets['c'].lines == [{'c/0/d':'e'}]
def test_references_sheet_names(tmpdir): """The referenced name should be used for the sheet name""" tmpfile = tmpdir.join('test_schema.json') tmpfile.write('''{ "properties": { "testA": { "type": "array", "items": {"$ref": "#/testB"} } }, "testB": { "type": "object", "properties": {"testC":{"type": "string"}} } }''') parser = SchemaParser(schema_filename=tmpfile.strpath) parser.parse() assert set(parser.sub_sheets) == set(['testB']) assert list(parser.sub_sheets['testB']) == ['ocid', 'testC']
def test_sub_sheet(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': {'testB': type_string} } }, } }) parser.parse() assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['testA']) assert list(parser.sub_sheets['testA']) == ['ocid', 'testB']
def test_simple_array(): parser = SchemaParser( root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'string' } } } }, main_sheet_name='custom_main_sheet_name' ) parser.parse() assert set(parser.main_sheet) == set(['testA:array'])
def test_two_parents(self): # This is a copy of test_two_parents from test_schema_parser.py, in # order to check that flattening and template generation use the same # sheet names schema_parser = SchemaParser(root_schema_dict={ 'properties': OrderedDict([ ('Atest', { 'type': 'array', 'items': {'type': 'object', 'properties': object_in_array_example_properties('Btest', 'Ctest')} }), ('Dtest', { 'type': 'array', 'items': {'type': 'object', 'properties': object_in_array_example_properties('Btest', 'Etest')} }) ]) }) schema_parser.parse() parser = JSONParser( root_json_dict=[{ 'Atest': [{ 'id': 1, 'Btest': [{ 'Ctest': 2 }] }], 'Dtest': [{ 'id': 3, 'Btest': [{ 'Etest': 4 }] }] }], schema_parser=schema_parser ) parser.parse() assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest']) assert list(parser.sub_sheets['Atest']) == ['Atest/0/id'] assert list(parser.sub_sheets['Dtest']) == ['Dtest/0/id'] assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest'] assert list(parser.sub_sheets['Dte_Btest']) == ['Dtest/0/id', 'Dtest/0/Btest/0/Etest']
def test_parent_is_object(self): parser = SchemaParser( root_schema_dict={ 'properties': { 'id': type_string, 'testA': { 'type': 'object', 'properties': object_in_array_example_properties('testB', 'testC') } } }) parser.parse() assert set(parser.main_sheet) == set(['id', 'testA/id']) assert set(parser.sub_sheets) == set(['testB']) assert list(parser.sub_sheets['testB']) == [ 'ocid', 'main/id:testB', 'main/testA/id:testB', 'testC' ]
def test_sub_sheet_empty_string_root_id(): parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'Btest': type_string } } }, } }, root_id='') parser.parse() assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['Atest']) assert list(parser.sub_sheets['Atest']) == ['Atest/0/Btest']
def test_sub_sheet(): parser = SchemaParser( root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'testB': type_string } } }, } }) parser.parse() assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['testA']) assert list(parser.sub_sheets['testA']) == ['ocid', 'testB']
def test_custom_main_sheet_name(self): parser = SchemaParser( root_schema_dict={ 'properties': { 'id': type_string, 'Atest': { 'type': 'object', 'properties': object_in_array_example_properties('Btest', 'Ctest') } } }) parser.parse() assert set(parser.main_sheet) == set(['id', 'Atest/id']) assert set(parser.sub_sheets) == set(['Ate_Btest']) assert list(parser.sub_sheets['Ate_Btest']) == [ 'id', 'Atest/id', 'Atest/Btest/0/Ctest' ]
def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, tmpdir, reversible): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter('always') extra_kwargs = {'use_titles': use_titles} extra_kwargs.update(root_id_kwargs) if use_schema: schema_parser = SchemaParser(root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, rollup=True, **extra_kwargs) schema_parser.parse() else: schema_parser = None with tmpdir.join('input.json').open('w') as fp: json.dump( { 'mykey': [ inject_root_id(root_id, input_row) for input_row in input_list ] }, fp) parser = JSONParser(json_filename=tmpdir.join('input.json').strpath, root_list_path='mykey', schema_parser=schema_parser, **extra_kwargs) parser.parse() expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary expected_output_list = [] assert list(parser.main_sheet.lines) == expected_output_list
def test_column_matching(self, tmpdir): test_schema = tmpdir.join('test.json') test_schema.write('''{ "properties": { "c": { "type": "array", "items": {"type": "string"} } } }''') schema_parser = SchemaParser(schema_filename=test_schema.strpath) schema_parser.parse() parser = JSONParser(root_json_dict=[OrderedDict([ ('c', ['d']), ])], schema_parser=schema_parser) parser.parse() assert list(parser.main_sheet) == ['c'] assert parser.main_sheet.lines == [{'c': 'd'}] assert len(parser.sub_sheets) == 0
def test_rollup(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': [ 'testB' ], 'items': { 'type': 'object', 'properties': { 'testB': type_string, 'testC': type_string } } }, } }, rollup=True) parser.parse() assert set(parser.main_sheet) == set(['testA[]/testB']) assert set(parser.sub_sheets) == set(['testA']) assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC'])
def run(sheets, schema=None, source_maps=False): input_headings = OrderedDict() input_sheets = OrderedDict() for sheet in sheets: rows = [] for row in sheet["rows"]: rows.append(OrderedDict(zip(sheet["headings"], row))) input_sheets[sheet["name"]] = rows input_headings[sheet["name"]] = sheet["headings"] if schema is not None: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id="", # QUESTION: I don't understand root_id convert_titles=True, # Without this, the titles aren't understood ) # Without this, the $ref entries in the schema aren't resolved. dereferenced_schema = JsonRef.replace_refs(schema) # raise Exception(dereferenced_schema) parser = SchemaParser(root_schema_dict=dereferenced_schema, root_id="main", rollup=True) parser.parse() spreadsheet_input.parser = parser else: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id="", ) spreadsheet_input.read_sheets() if source_maps: ( result, cell_source_map_data, heading_source_map_data, ) = spreadsheet_input.fancy_unflatten(with_cell_source_map=True, with_heading_source_map=True) return result, cell_source_map_data, heading_source_map_data else: return spreadsheet_input.unflatten(), None, None
def test_rollup(self): schema_parser = SchemaParser( root_schema_dict={ "properties": { "testA": { "type": "array", "rollUp": ["testB"], "items": { "type": "object", "properties": { "testB": {"type": "string"}, "testC": {"type": "string"}, }, }, }, } }, rollup=True, root_id="ocid", ) schema_parser.parse() parser = JSONParser( root_json_dict=[ OrderedDict( [("testA", [OrderedDict([("testB", "1"), ("testC", "2")])]),] ) ], schema_parser=schema_parser, root_id="ocid", rollup=True, ) parser.parse() assert list(parser.main_sheet) == ["testA/0/testB"] assert parser.main_sheet.lines == [{"testA/0/testB": "1"}] assert len(parser.sub_sheets) == 1 assert set(parser.sub_sheets["testA"]) == set( ["ocid", "testA/0/testB", "testA/0/testC"] ) assert parser.sub_sheets["testA"].lines == [ {"testA/0/testB": "1", "testA/0/testC": "2"} ]
def test_rollup(self): schema_parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': ['testB'], 'items': { 'type': 'object', 'properties': { 'testB': { 'type': 'string' }, 'testC': { 'type': 'string' } } } }, } }, rollup=True, root_id='ocid') schema_parser.parse() parser = JSONParser(root_json_dict=[ OrderedDict([ ('testA', [OrderedDict([('testB', '1'), ('testC', '2')])]), ]) ], schema_parser=schema_parser, root_id='ocid', rollup=True) parser.parse() assert list(parser.main_sheet) == ['testA/0/testB'] assert parser.main_sheet.lines == [{'testA/0/testB': '1'}] assert len(parser.sub_sheets) == 1 assert set(parser.sub_sheets['testA']) == set( ['ocid', 'testA/0/testB', 'testA/0/testC']) assert parser.sub_sheets['testA'].lines == [{ 'testA/0/testB': '1', 'testA/0/testC': '2' }]
def test_rollup_multiple_values(self, recwarn): schema_parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': [ 'testB' ], 'items': { 'type': 'object', 'properties': { 'testB': {'type': 'string'}, 'testC': {'type': 'string'} } } }, } }, rollup=True) schema_parser.parse() parser = JSONParser( root_json_dict=[OrderedDict([ ('testA', [ OrderedDict([('testB', '1'), ('testC', '2')]), OrderedDict([('testB', '3'), ('testC', '4')]) ]), ])], schema_parser=schema_parser ) parser.parse() assert list(parser.main_sheet) == [ 'testA[]/testB' ] assert parser.main_sheet.lines == [ { 'testA[]/testB': 'WARNING: More than one value supplied, consult the relevant sub-sheet for the data.' } ] assert len(parser.sub_sheets) == 1 assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC']) assert parser.sub_sheets['testA'].lines == [ {'testB':'1', 'testC': '2'}, {'testB':'3', 'testC': '4'} ] w = recwarn.pop(UserWarning) assert 'Could not provide rollup' in text_type(w.message)
def flatten(input_name, schema=None, output_name='releases', output_format='all', main_sheet_name='main', root_list_path='releases', rollup=False, root_id='ocid', use_titles=False, **_): """ Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx). """ if schema: schema_parser = SchemaParser( schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, main_sheet_name=main_sheet_name) schema_parser.parse() else: schema_parser = None parser = JSONParser( json_filename=input_name, root_list_path=root_list_path, schema_parser=schema_parser, main_sheet_name=main_sheet_name, root_id=root_id, use_titles=use_titles) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name) spreadsheet_output.write_sheets() if output_format == 'all': for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys(): # in dictionary of allowed formats spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages, reversible): extra_kwargs = {'convert_titles': convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput( sheets=OrderedDict([(sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in input_dict.items()]), **extra_kwargs ) spreadsheet_input.read_sheets() parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, rollup=True ) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] assert list(spreadsheet_input.unflatten()) == expected_output_list
def test_column_matching(self, tmpdir): test_schema = tmpdir.join("test.json") test_schema.write( """{ "properties": { "c": { "type": "array", "items": {"type": "string"} } } }""" ) schema_parser = SchemaParser(schema_filename=test_schema.strpath) schema_parser.parse() parser = JSONParser( root_json_dict=[OrderedDict([("c", ["d"]),])], schema_parser=schema_parser ) parser.parse() assert list(parser.main_sheet) == ["c"] assert parser.main_sheet.lines == [{"c": "d"}] assert len(parser.sub_sheets) == 0
def test_rollup(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': ['testB'], 'items': { 'type': 'object', 'properties': { 'testB': type_string, 'testC': type_string } } }, } }, rollup=True) parser.parse() assert set(parser.main_sheet) == set(['testA[]/testB']) assert set(parser.sub_sheets) == set(['testA']) assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC'])
def test_custom_main_sheet_name(self): parser = SchemaParser( root_schema_dict={ 'properties': { 'id': type_string, 'testA': { 'type': 'object', 'properties': object_in_array_example_properties('testB', 'testC') } } }, main_sheet_name='custom_main_sheet_name' ) parser.parse() assert set(parser.main_sheet) == set(['id', 'testA/id']) assert set(parser.sub_sheets) == set(['testB']) assert list(parser.sub_sheets['testB']) == [ 'ocid', 'custom_main_sheet_name/id:testB', 'custom_main_sheet_name/testA/id:testB', 'testC']
def test_use_titles3(recwarn, use_titles): # Array containing a nested object title missing parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'type': 'array', 'title': 'ATitle', 'items': { 'type': 'object', 'properties': { 'Btest': { 'type': 'object', 'properties': { 'Ctest': { 'type': 'string', 'title': 'CTitle' } } } } } }, 'Ctest': { 'type': 'string', 'title': 'CTitle' } } }, use_titles=use_titles) parser.parse() if use_titles: assert set(parser.main_sheet) == set(['CTitle']) assert set(parser.sub_sheets) == set(['Atest']) assert list(parser.sub_sheets['Atest']) == [] assert len(recwarn) == 1 w = recwarn.pop(UserWarning) assert 'Field Atest/0/Btest/Ctest is missing a title' in text_type( w.message) else: assert len(recwarn) == 0
def test_two_parents(self): parser = SchemaParser(root_schema_dict={ 'properties': OrderedDict([ ('testA', { 'type': 'array', 'items': {'type': 'object', 'properties': object_in_array_example_properties('testB', 'testC')} }), ('testD', { 'type': 'array', 'items': {'type': 'object', 'properties': object_in_array_example_properties('testB', 'testE')} }) ]) }) parser.parse() assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set(['testA', 'testB', 'testD']) assert list(parser.sub_sheets['testA']) == ['ocid', 'id'] assert list(parser.sub_sheets['testD']) == ['ocid', 'id'] assert list(parser.sub_sheets['testB']) == ['ocid', 'main/testA[]/id:testB', 'main/testD[]/id:testB', 'testC', 'testE']
def test_rollup(): parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'type': 'array', 'rollUp': ['Btest'], 'items': { 'type': 'object', 'properties': { 'Btest': type_string, 'Ctest': type_string } } }, } }, rollup=True) parser.parse() assert set(parser.main_sheet) == set(['Atest/0/Btest']) assert set(parser.sub_sheets) == set(['Atest']) assert set(parser.sub_sheets['Atest']) == set( ['Atest/0/Btest', 'Atest/0/Ctest'])
def create_template(schema, output_name='releases', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_): """ Creates template file(s) from given inputs This function is built to deal with commandline input and arguments but to also be called from elswhere in future """ parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name) spreadsheet_output.write_sheets() if output_format == 'all': for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys( ): # in dictionary of allowed formats spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def test_two_parents(self): parser = SchemaParser( root_schema_dict={ 'properties': OrderedDict([('id', type_string), ('Atest', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'Btest', 'Ctest') } }), ('Dtest', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'Btest', 'Etest') } })]) }) parser.parse() assert set(parser.main_sheet) == set(['id']) assert set(parser.sub_sheets) == set( ['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest']) assert list(parser.sub_sheets['Atest']) == ['id', 'Atest/0/id'] assert list(parser.sub_sheets['Dtest']) == ['id', 'Dtest/0/id'] assert list(parser.sub_sheets['Ate_Btest']) == [ 'id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest' ] assert list(parser.sub_sheets['Dte_Btest']) == [ 'id', 'Dtest/0/id', 'Dtest/0/Btest/0/Etest' ]
def run(sheets, schema=None, source_maps=False): input_headings = OrderedDict() input_sheets = OrderedDict() for sheet in sheets: rows = [] for row in sheet['rows']: rows.append(OrderedDict(zip(sheet['headings'], row))) input_sheets[sheet['name']] = rows input_headings[sheet['name']] = sheet['headings'] if schema is not None: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id='', # Without this, titles from a schema aren't understood convert_titles=True, ) # Without this, the $ref entries in the schema aren't resolved. dereferenced_schema = JsonRef.replace_refs(schema) parser = SchemaParser(root_schema_dict=dereferenced_schema, root_id='main', rollup=True) parser.parse() spreadsheet_input.parser = parser else: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id='', ) spreadsheet_input.read_sheets() if source_maps: result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten( True, True) else: result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten( False, False) return result, cell_source_map_data, heading_source_map_data
def run(sheets, schema=None, source_maps=False): input_headings = OrderedDict() input_sheets = OrderedDict() for sheet in sheets: rows = [] for row in sheet['rows']: rows.append(OrderedDict(zip(sheet['headings'], row))) input_sheets[sheet['name']] = rows input_headings[sheet['name']] = sheet['headings'] if schema is not None: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id='', # QUESTION: I don't understand root_id convert_titles=True, # Without this, the titles aren't understood ) # Without this, the $ref entries in the schema aren't resolved. dereferenced_schema = JsonRef.replace_refs(schema) # raise Exception(dereferenced_schema) parser = SchemaParser( root_schema_dict=dereferenced_schema, root_id='main', rollup=True ) parser.parse() spreadsheet_input.parser = parser else: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id='', ) spreadsheet_input.read_sheets() if source_maps: result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(with_cell_source_map=True, with_heading_source_map=True) return result, cell_source_map_data, heading_source_map_data else: return spreadsheet_input.unflatten(), None, None
def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, tmpdir, reversible): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter('always') extra_kwargs = {'use_titles': use_titles} extra_kwargs.update(root_id_kwargs) if use_schema: schema_parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, rollup=True, **extra_kwargs ) schema_parser.parse() else: schema_parser = None with tmpdir.join('input.json').open('w') as fp: json.dump({ 'mykey': [inject_root_id(root_id, input_row) for input_row in input_list] }, fp) parser = JSONParser( json_filename=tmpdir.join('input.json').strpath, root_list_path='mykey', schema_parser=schema_parser, **extra_kwargs) parser.parse() expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary expected_output_list = [] assert list(parser.main_sheet.lines) == expected_output_list
def test_parent_is_array(self): parser = SchemaParser( root_schema_dict={ 'properties': { 'Atest': { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'Btest', 'Ctest') } } } }) parser.parse() assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set(['Atest', 'Ate_Btest']) assert list(parser.sub_sheets['Atest']) == ['Atest/0/id'] assert list(parser.sub_sheets['Ate_Btest']) == [ 'Atest/0/id', 'Atest/0/Btest/0/Ctest' ]
def test_two_parents(self): parser = SchemaParser( root_schema_dict={ 'properties': OrderedDict([('id', type_string), ('testA', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'testB', 'testC') } }), ('testD', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'testB', 'testE') } })]) }) parser.parse() assert set(parser.main_sheet) == set(['id']) assert set(parser.sub_sheets) == set(['testA', 'testB', 'testD']) assert list( parser.sub_sheets['testA']) == ['ocid', 'main/id:testA', 'id'] assert list( parser.sub_sheets['testD']) == ['ocid', 'main/id:testD', 'id'] assert list(parser.sub_sheets['testB']) == [ 'ocid', 'main/id:testB', 'main/testA[]/id:testB', 'main/testD[]/id:testB', 'testC', 'testE' ]
def test_sub_sheet_names(self, tmpdir): test_schema = tmpdir.join('test.json') test_schema.write('''{ "properties": { "c": { "type": "array", "items": {"$ref": "#/testB"} } }, "testB": { "type": "object", "properties": { "d": { "type": "string" }, "f": { "type": "string" } } } }''') schema_parser = SchemaParser( schema_filename=test_schema.strpath ) schema_parser.parse() parser = JSONParser( root_json_dict=[OrderedDict([ ('a', 'b'), ('c', [OrderedDict([('d', 'e')])]), ])], schema_parser=schema_parser ) parser.parse() assert list(parser.main_sheet) == [ 'a' ] assert parser.main_sheet.lines == [ {'a': 'b'} ] assert len(parser.sub_sheets) == 1 assert list(parser.sub_sheets['testB']) == list(['ocid', 'd', 'f']) assert parser.sub_sheets['testB'].lines == [{'d':'e'}]
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages, reversible): extra_kwargs = {'convert_titles': convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput(sheets=OrderedDict([ (sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in input_dict.items() ]), **extra_kwargs) spreadsheet_input.read_sheets() parser = SchemaParser(root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, rollup=True) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] assert list(spreadsheet_input.unflatten()) == expected_output_list
def test_use_titles(recwarn): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'testB': { 'type': 'string', 'title': 'BTitle' } } } }, 'testC': { 'type': 'string', 'title': 'CTitle' } } }, use_titles=True) parser.parse() assert set(parser.main_sheet) == set(['CTitle']) assert set(parser.sub_sheets) == set(['testA']) assert list(parser.sub_sheets['testA']) == ['ocid', 'BTitle'] # Main sheet title missing parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'testB': { 'type': 'string', 'title': 'BTitle' } } } }, 'testC': { 'type': 'string' } } }, use_titles=True) parser.parse() assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['testA']) assert list(parser.sub_sheets['testA']) == ['ocid', 'BTitle'] w = recwarn.pop(UserWarning) assert 'does not have a title' in text_type(w.message) # Child sheet title missing parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'testB': { 'type': 'string' } } } }, 'testC': { 'type': 'string', 'title': 'CTitle' } } }, use_titles=True) parser.parse() assert set(parser.main_sheet) == set(['CTitle']) assert set(parser.sub_sheets) == set(['testA']) assert list(parser.sub_sheets['testA']) == ['ocid'] w = recwarn.pop(UserWarning) assert 'does not have a title' in text_type(w.message)
def test_use_titles2(recwarn, use_titles): # Object containing object title missing parser = SchemaParser(root_schema_dict={ 'properties': { 'Xtest': { 'type': 'object', 'properties': { 'Atest': { 'type': 'object', 'title': 'ATitle', 'properties': { 'Btest': { 'type': 'string', 'title': 'BTitle' } } } } }, 'Ctest': { 'type': 'string', 'title': 'CTitle' } } }, use_titles=use_titles) parser.parse() if use_titles: assert set(parser.main_sheet) == set(['CTitle']) assert set(parser.sub_sheets) == set([]) assert len(recwarn) == 1 w = recwarn.pop(UserWarning) assert 'Field Xtest/Atest/Btest does not have a title, skipping' in text_type( w.message) else: assert len(recwarn) == 0 # Main sheet title missing parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'title': 'ATitle', 'type': 'array', 'items': { 'type': 'object', 'properties': { 'Btest': { 'type': 'string', 'title': 'BTitle' } } } }, 'Ctest': { 'type': 'string' } } }, use_titles=use_titles) parser.parse() if use_titles: assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['Atest']) assert list(parser.sub_sheets['Atest']) == ['ATitle:BTitle'] assert len(recwarn) == 1 w = recwarn.pop(UserWarning) assert 'Field Ctest does not have a title' in text_type(w.message) else: assert len(recwarn) == 0
def unflatten(input_name, base_json=None, input_format=None, output_name=None, root_list_path=None, root_is_list=False, encoding='utf8', timezone_name='UTC', root_id=None, schema='', convert_titles=False, cell_source_map=None, heading_source_map=None, id_name=None, xml=False, vertical_orientation=False, metatab_name=None, metatab_only=False, metatab_schema='', metatab_vertical_orientation=False, xml_schemas=None, default_configuration='', disable_local_refs=False, xml_comment=None, truncation_length=3, **_): """ Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON). """ if input_format is None: raise Exception('You must specify an input format (may autodetect in future') elif input_format not in INPUT_FORMATS: raise Exception('The requested format is not available') if metatab_name and base_json: raise Exception('Not allowed to use base_json with metatab') if root_is_list: base = None elif base_json: with open(base_json) as fp: base = json.load(fp, object_pairs_hook=OrderedDict) else: base = OrderedDict() base_configuration = parse_sheet_configuration( [item.strip() for item in default_configuration.split(",")] ) cell_source_map_data = OrderedDict() heading_source_map_data = OrderedDict() if metatab_name and not root_is_list: spreadsheet_input_class = INPUT_FORMATS[input_format] spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, root_list_path='meta', include_sheets=[metatab_name], convert_titles=convert_titles, vertical_orientation=metatab_vertical_orientation, id_name=id_name, xml=xml, use_configuration=False ) if metatab_schema: parser = SchemaParser(schema_filename=metatab_schema, disable_local_refs=disable_local_refs) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() result, cell_source_map_data_meta, heading_source_map_data_meta = spreadsheet_input.fancy_unflatten( with_cell_source_map=cell_source_map, with_heading_source_map=heading_source_map, ) for key, value in (cell_source_map_data_meta or {}).items(): ## strip off meta/0/ from start of source map as actually data is at top level cell_source_map_data[key[7:]] = value for key, value in (heading_source_map_data_meta or {}).items(): ## strip off meta/ from start of source map as actually data is at top level heading_source_map_data[key[5:]] = value # update individual keys from base configuration base_configuration.update(spreadsheet_input.sheet_configuration.get(metatab_name, {})) if result: base.update(result[0]) if root_list_path is None: root_list_path = base_configuration.get('RootListPath', 'main') if id_name is None: id_name = base_configuration.get('IDName', 'id') if not metatab_only or root_is_list: spreadsheet_input_class = INPUT_FORMATS[input_format] spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, root_list_path=root_list_path, root_is_list=root_is_list, root_id=root_id, convert_titles=convert_titles, exclude_sheets=[metatab_name], vertical_orientation=vertical_orientation, id_name=id_name, xml=xml, base_configuration=base_configuration ) if schema: parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id, disable_local_refs=disable_local_refs, truncation_length=truncation_length) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() result, cell_source_map_data_main, heading_source_map_data_main = spreadsheet_input.fancy_unflatten( with_cell_source_map=cell_source_map, with_heading_source_map=heading_source_map, ) cell_source_map_data.update(cell_source_map_data_main or {}) heading_source_map_data.update(heading_source_map_data_main or {}) if root_is_list: base = list(result) else: base[root_list_path] = list(result) if xml: xml_root_tag = base_configuration.get('XMLRootTag', 'iati-activities') xml_output = toxml( base, xml_root_tag, xml_schemas=xml_schemas, root_list_path=root_list_path, xml_comment=xml_comment) if output_name is None: if sys.version > '3': sys.stdout.buffer.write(xml_output) else: sys.stdout.write(xml_output) else: with codecs.open(output_name, 'wb') as fp: fp.write(xml_output) else: if output_name is None: print(json.dumps(base, indent=4, default=decimal_default, ensure_ascii=False)) else: with codecs.open(output_name, 'w', encoding='utf-8') as fp: json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False) if cell_source_map: with codecs.open(cell_source_map, 'w', encoding='utf-8') as fp: json.dump(cell_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False) if heading_source_map: with codecs.open(heading_source_map, 'w', encoding='utf-8') as fp: json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
def flatten(input_name, schema=None, output_name=None, output_format='all', main_sheet_name='main', root_list_path='main', root_is_list=False, sheet_prefix='', filter_field=None, filter_value=None, preserve_fields=None, rollup=False, root_id=None, use_titles=False, xml=False, id_name='id', disable_local_refs=False, remove_empty_schema_columns=False, truncation_length=3, **_): """ Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx). """ if (filter_field is None and filter_value is not None) or (filter_field is not None and filter_value is None): raise Exception('You must use filter_field and filter_value together') if schema: schema_parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, disable_local_refs=disable_local_refs, truncation_length=truncation_length) schema_parser.parse() else: schema_parser = None parser = JSONParser( json_filename=input_name, root_list_path=None if root_is_list else root_list_path, schema_parser=schema_parser, rollup=rollup, root_id=root_id, use_titles=use_titles, xml=xml, id_name=id_name, filter_field=filter_field, filter_value=filter_value, preserve_fields=preserve_fields, remove_empty_schema_columns=remove_empty_schema_columns, truncation_length=truncation_length) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name, sheet_prefix=sheet_prefix) spreadsheet_output.write_sheets() if output_format == 'all': if not output_name: output_name = 'flattened' for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys( ): # in dictionary of allowed formats if not output_name: output_name = 'flattened' + FORMATS_SUFFIX[output_format] spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def unflatten(input_name, base_json=None, input_format=None, output_name=None, root_list_path=None, root_is_list=False, encoding='utf8', timezone_name='UTC', root_id=None, schema='', convert_titles=False, cell_source_map=None, heading_source_map=None, id_name=None, xml=False, vertical_orientation=False, metatab_name=None, metatab_only=False, metatab_schema='', metatab_vertical_orientation=False, xml_schemas=None, default_configuration='', disable_local_refs=False, xml_comment=None, truncation_length=3, **_): """ Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON). """ if input_format is None: raise Exception( 'You must specify an input format (may autodetect in future') elif input_format not in INPUT_FORMATS: raise Exception('The requested format is not available') if metatab_name and base_json: raise Exception('Not allowed to use base_json with metatab') if root_is_list: base = None elif base_json: with open(base_json) as fp: base = json.load(fp, object_pairs_hook=OrderedDict) else: base = OrderedDict() base_configuration = parse_sheet_configuration( [item.strip() for item in default_configuration.split(",")]) cell_source_map_data = OrderedDict() heading_source_map_data = OrderedDict() if metatab_name and not root_is_list: spreadsheet_input_class = INPUT_FORMATS[input_format] spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, root_list_path='meta', include_sheets=[metatab_name], convert_titles=convert_titles, vertical_orientation=metatab_vertical_orientation, id_name=id_name, xml=xml, use_configuration=False) if metatab_schema: parser = SchemaParser(schema_filename=metatab_schema, disable_local_refs=disable_local_refs) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() result, cell_source_map_data_meta, heading_source_map_data_meta = spreadsheet_input.fancy_unflatten( with_cell_source_map=cell_source_map, with_heading_source_map=heading_source_map, ) for key, value in (cell_source_map_data_meta or {}).items(): ## strip off meta/0/ from start of source map as actually data is at top level cell_source_map_data[key[7:]] = value for key, value in (heading_source_map_data_meta or {}).items(): ## strip off meta/ from start of source map as actually data is at top level heading_source_map_data[key[5:]] = value # update individual keys from base configuration base_configuration.update( spreadsheet_input.sheet_configuration.get(metatab_name, {})) if result: base.update(result[0]) if root_list_path is None: root_list_path = base_configuration.get('RootListPath', 'main') if id_name is None: id_name = base_configuration.get('IDName', 'id') if not metatab_only or root_is_list: spreadsheet_input_class = INPUT_FORMATS[input_format] spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, root_list_path=root_list_path, root_is_list=root_is_list, root_id=root_id, convert_titles=convert_titles, exclude_sheets=[metatab_name], vertical_orientation=vertical_orientation, id_name=id_name, xml=xml, base_configuration=base_configuration) if schema: parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id, disable_local_refs=disable_local_refs, truncation_length=truncation_length) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() result, cell_source_map_data_main, heading_source_map_data_main = spreadsheet_input.fancy_unflatten( with_cell_source_map=cell_source_map, with_heading_source_map=heading_source_map, ) cell_source_map_data.update(cell_source_map_data_main or {}) heading_source_map_data.update(heading_source_map_data_main or {}) if root_is_list: base = list(result) else: base[root_list_path] = list(result) if xml: xml_root_tag = base_configuration.get('XMLRootTag', 'iati-activities') xml_output = toxml(base, xml_root_tag, xml_schemas=xml_schemas, root_list_path=root_list_path, xml_comment=xml_comment) if output_name is None: sys.stdout.buffer.write(xml_output) else: with codecs.open(output_name, 'wb') as fp: fp.write(xml_output) else: if output_name is None: print( json.dumps(base, indent=4, default=decimal_default, ensure_ascii=False)) else: with codecs.open(output_name, 'w', encoding='utf-8') as fp: json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False) if cell_source_map: with codecs.open(cell_source_map, 'w', encoding='utf-8') as fp: json.dump(cell_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False) if heading_source_map: with codecs.open(heading_source_map, 'w', encoding='utf-8') as fp: json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
def test_two_parents(self): # This is a copy of test_two_parents from test_schema_parser.py, in # order to check that flattening and template generation use the same # sheet names schema_parser = SchemaParser( root_schema_dict={ "properties": OrderedDict( [ ( "Atest", { "type": "array", "items": { "type": "object", "properties": object_in_array_example_properties( "Btest", "Ctest" ), }, }, ), ( "Dtest", { "type": "array", "items": { "type": "object", "properties": object_in_array_example_properties( "Btest", "Etest" ), }, }, ), ] ) } ) schema_parser.parse() parser = JSONParser( root_json_dict=[ { "Atest": [{"id": 1, "Btest": [{"Ctest": 2}]}], "Dtest": [{"id": 3, "Btest": [{"Etest": 4}]}], } ], schema_parser=schema_parser, ) parser.parse() assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set( ["Atest", "Dtest", "Ate_Btest", "Dte_Btest"] ) assert list(parser.sub_sheets["Atest"]) == ["Atest/0/id"] assert list(parser.sub_sheets["Dtest"]) == ["Dtest/0/id"] assert list(parser.sub_sheets["Ate_Btest"]) == [ "Atest/0/id", "Atest/0/Btest/0/Ctest", ] assert list(parser.sub_sheets["Dte_Btest"]) == [ "Dtest/0/id", "Dtest/0/Btest/0/Etest", ]