def test_filename_and_dict_error(tmpdir): """A value error should be raised if both schema_filename and root_schema_dict are supplied to SchemaParser""" tmpfile = tmpdir.join('test_schema.json') tmpfile.write('{}') with pytest.raises(ValueError): SchemaParser(schema_filename=tmpfile.strpath, root_schema_dict={}) # Supplying neither should also raise a ValueError with pytest.raises(ValueError): SchemaParser()
def test_parent_is_array(self): parser = SchemaParser( root_schema_dict={ 'properties': { 'id': type_string, 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'testB', 'testC') } } } }) parser.parse() assert set(parser.main_sheet) == set(['id']) assert set(parser.sub_sheets) == set(['testA', 'testB']) assert list( parser.sub_sheets['testA']) == ['ocid', 'main/id:testA', 'id'] assert list(parser.sub_sheets['testB']) == [ 'ocid', 'main/id:testB', 'main/testA[]/id:testB', 'testC' ]
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, reversible): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter('always') extra_kwargs = {'convert_titles': convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput(sheets={ 'custom_main': [inject_root_id(root_id, input_row) for input_row in input_list] }, **extra_kwargs) spreadsheet_input.read_sheets() parser = SchemaParser(root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, rollup=True) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary expected_output_list = [] assert list(spreadsheet_input.unflatten()) == expected_output_list # We expect no warning_messages if not convert_titles: # TODO what are the warning_messages here assert [str(x.message) for x in recwarn.list] == warning_messages
def test_two_parents(self): parser = SchemaParser( root_schema_dict={ 'properties': OrderedDict([('testA', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties('testB', 'testC') } }), ('testD', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'testB', 'testE') } })]) }) parser.parse() assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set(['testA', 'testB', 'testD']) assert list(parser.sub_sheets['testA']) == ['ocid', 'id'] assert list(parser.sub_sheets['testD']) == ['ocid', 'id'] assert list(parser.sub_sheets['testB']) == [ 'ocid', 'main/testA[]/id:testB', 'main/testD[]/id:testB', 'testC', 'testE' ]
def test_bad_rollup(recwarn): ''' When rollUp is specified, but the field is missing in the schema, we expect a warning. ''' parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': ['testB'], 'items': { 'type': 'object', 'properties': { 'testC': type_string } } }, } }, rollup=True) parser.parse() w = recwarn.pop(UserWarning) assert 'testB in rollUp but not in schema' in text_type(w.message) assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set(['testA']) assert set(parser.sub_sheets['testA']) == set(['ocid', 'testC'])
def test_unflatten( convert_titles, use_schema, root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages, reversible, ): extra_kwargs = {"convert_titles": convert_titles} extra_kwargs.update(root_id_kwargs) spreadsheet_input = ListInput(sheets=OrderedDict([ (sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in input_dict.items() ]), **extra_kwargs) spreadsheet_input.read_sheets() parser = SchemaParser( root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, root_id=root_id, rollup=True, ) parser.parse() spreadsheet_input.parser = parser expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] assert list(spreadsheet_input.unflatten()) == expected_output_list
def test_use_titles5(recwarn): # Child sheet title missing parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'title': 'ATitle', 'type': 'array', 'items': { 'type': 'object', 'properties': { 'Btest': { 'type': 'string' } } } }, 'Ctest': { 'type': 'string', 'title': 'CTitle' } } }, use_titles=True) parser.parse() assert set(parser.main_sheet) == set(['CTitle']) assert set(parser.sub_sheets) == set(['Atest']) assert list(parser.sub_sheets['Atest']) == [] w = recwarn.pop(UserWarning) assert 'Field Atest/0/Btest is missing a title' in text_type(w.message)
def test_titles_rollup(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'title': 'ATitle', 'rollUp': ['testB'], 'items': { 'type': 'object', 'properties': { 'testB': { 'type': 'string', 'title': 'BTitle', }, 'testC': { 'type': 'string', 'title': 'CTitle', } } } }, } }, rollup=True, use_titles=True) parser.parse() assert set(parser.main_sheet) == set(['ATitle:BTitle']) assert set(parser.sub_sheets) == set(['testA']) assert set(parser.sub_sheets['testA']) == set(['ocid', 'BTitle', 'CTitle'])
def test_sub_sheet_names(self, tmpdir): test_schema = tmpdir.join('test.json') test_schema.write('''{ "properties": { "c": { "type": "array", "items": {"$ref": "#/testB"} } }, "testB": { "type": "object", "properties": { "d": { "type": "string" }, "f": { "type": "string" } } } }''') schema_parser = SchemaParser(schema_filename=test_schema.strpath) schema_parser.parse() parser = JSONParser(root_json_dict=[ OrderedDict([ ('a', 'b'), ('c', [OrderedDict([('d', 'e')])]), ]) ], schema_parser=schema_parser) parser.parse() assert list(parser.main_sheet) == ['a'] assert parser.main_sheet.lines == [{'a': 'b'}] assert len(parser.sub_sheets) == 1 assert list(parser.sub_sheets['testB']) == list(['ocid', 'd', 'f']) assert parser.sub_sheets['testB'].lines == [{'d': 'e'}]
def test_flatten_multiplesheets( use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_dict, recwarn, comment, warning_messages, tmpdir, reversible, ): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter("always") extra_kwargs = {"use_titles": use_titles} extra_kwargs.update(root_id_kwargs) if use_schema: schema_parser = SchemaParser(root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, rollup=True, **extra_kwargs) schema_parser.parse() else: schema_parser = None with tmpdir.join("input.json").open("w") as fp: json.dump( { "mykey": [ inject_root_id(root_id, input_row) for input_row in input_list ] }, fp, ) parser = JSONParser(json_filename=tmpdir.join("input.json").strpath, root_list_path="mykey", schema_parser=schema_parser, **extra_kwargs) parser.parse() expected_output_dict = OrderedDict([ (sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in expected_output_dict.items() ]) output = { sheet_name: sheet.lines for sheet_name, sheet in parser.sub_sheets.items() if sheet.lines } output["custom_main"] = parser.main_sheet.lines assert output == expected_output_dict
def test_main_sheet_basic(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': type_string, 'testB': type_string } }) parser.parse() assert set(parser.main_sheet) == set(['testA', 'testB'])
def test_flatten( use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, tmpdir, reversible, ): # Not sure why, but this seems to be necessary to have warnings picked up # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it import warnings warnings.simplefilter("always") extra_kwargs = {"use_titles": use_titles} extra_kwargs.update(root_id_kwargs) if use_schema: schema_parser = SchemaParser(root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}}, rollup=True, **extra_kwargs) schema_parser.parse() else: schema_parser = None with tmpdir.join("input.json").open("w") as fp: json.dump( { "mykey": [ inject_root_id(root_id, input_row) for input_row in input_list ] }, fp, ) parser = JSONParser(json_filename=tmpdir.join("input.json").strpath, root_list_path="mykey", schema_parser=schema_parser, **extra_kwargs) parser.parse() expected_output_list = [ inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list ] if expected_output_list == [{}]: # We don't expect an empty dictionary expected_output_list = [] assert list(parser.main_sheet.lines) == expected_output_list
def test_two_parents(self): # This is a copy of test_two_parents from test_schema_parser.py, in # order to check that flattening and template generation use the same # sheet names schema_parser = SchemaParser( root_schema_dict={ 'properties': OrderedDict([('Atest', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties('Btest', 'Ctest') } }), ('Dtest', { 'type': 'array', 'items': { 'type': 'object', 'properties': object_in_array_example_properties( 'Btest', 'Etest') } })]) }) schema_parser.parse() parser = JSONParser(root_json_dict=[{ 'Atest': [{ 'id': 1, 'Btest': [{ 'Ctest': 2 }] }], 'Dtest': [{ 'id': 3, 'Btest': [{ 'Etest': 4 }] }] }], schema_parser=schema_parser) parser.parse() assert set(parser.main_sheet) == set() assert set(parser.sub_sheets) == set( ['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest']) assert list(parser.sub_sheets['Atest']) == ['Atest/0/id'] assert list(parser.sub_sheets['Dtest']) == ['Dtest/0/id'] assert list(parser.sub_sheets['Ate_Btest']) == [ 'Atest/0/id', 'Atest/0/Btest/0/Ctest' ] assert list(parser.sub_sheets['Dte_Btest']) == [ 'Dtest/0/id', 'Dtest/0/Btest/0/Etest' ]
def test_main_sheet_basic(): parser = SchemaParser( root_schema_dict={ 'properties': { 'Atest': type_string, # type is allowed to be empty, and we should assume string 'Btest': {}, } }) parser.parse() assert set(parser.main_sheet) == set(['Atest', 'Btest'])
def test_rollup_multiple_values(self, recwarn): schema_parser = SchemaParser( root_schema_dict={ "properties": { "testA": { "type": "array", "rollUp": ["testB"], "items": { "type": "object", "properties": { "testB": {"type": "string"}, "testC": {"type": "string"}, }, }, }, } }, rollup=True, ) schema_parser.parse() parser = JSONParser( root_json_dict=[ OrderedDict( [ ( "testA", [ OrderedDict([("testB", "1"), ("testC", "2")]), OrderedDict([("testB", "3"), ("testC", "4")]), ], ), ] ) ], schema_parser=schema_parser, rollup=True, ) parser.parse() assert list(parser.main_sheet) == ["testA/0/testB"] assert parser.main_sheet.lines == [ { "testA/0/testB": "WARNING: More than one value supplied, consult the relevant sub-sheet for the data." } ] assert len(parser.sub_sheets) == 1 assert set(parser.sub_sheets["testA"]) == set( ["testA/0/testB", "testA/0/testC"] ) assert parser.sub_sheets["testA"].lines == [ {"testA/0/testB": "1", "testA/0/testC": "2"}, {"testA/0/testB": "3", "testA/0/testC": "4"}, ] w = recwarn.pop(UserWarning) assert "Could not provide rollup" in str(w.message)
def test_simple_array(type_): parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'type': 'array', 'items': { 'type': type_ } } } }) parser.parse() assert set(parser.main_sheet) == set(['Atest'])
def test_sub_sheets(self, tmpdir, remove_empty_schema_columns): test_schema = tmpdir.join('test.json') test_schema.write('''{ "properties": { "c": { "type": "array", "items": {"$ref": "#/testB"} }, "g": { "type": "array", "items": { "type": "object", "properties": { "h": { "type": "string"} } } } }, "testB": { "type": "object", "properties": { "d": { "type": "string" }, "f": { "type": "string" } } } }''') schema_parser = SchemaParser(schema_filename=test_schema.strpath, root_id='ocid') schema_parser.parse() parser = JSONParser( root_json_dict=[ OrderedDict([ ('a', 'b'), ('c', [OrderedDict([('d', 'e')])]), ]) ], schema_parser=schema_parser, remove_empty_schema_columns=remove_empty_schema_columns, ) parser.parse() assert list(parser.main_sheet) == ['a'] assert parser.main_sheet.lines == [{'a': 'b'}] assert len( parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1 if not remove_empty_schema_columns: assert list(parser.sub_sheets['c']) == list( ['ocid', 'c/0/d', 'c/0/f']) assert list(parser.sub_sheets['g']) == list(['ocid', 'g/0/h']) else: assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d']) assert parser.sub_sheets['c'].lines == [{'c/0/d': 'e'}]
def test_main_sheet_nested(): parser = SchemaParser( root_schema_dict={ 'properties': { 'testA': { 'type': 'object', 'properties': { 'testC': type_string } } } }) parser.parse() assert set(parser.main_sheet) == set(['testA/testC'])
def test_rollup_multiple_values(self, recwarn): schema_parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'rollUp': ['testB'], 'items': { 'type': 'object', 'properties': { 'testB': { 'type': 'string' }, 'testC': { 'type': 'string' } } } }, } }, rollup=True) schema_parser.parse() parser = JSONParser(root_json_dict=[ OrderedDict([ ('testA', [ OrderedDict([('testB', '1'), ('testC', '2')]), OrderedDict([('testB', '3'), ('testC', '4')]) ]), ]) ], schema_parser=schema_parser, rollup=True) parser.parse() assert list(parser.main_sheet) == ['testA/0/testB'] assert parser.main_sheet.lines == [{ 'testA/0/testB': 'WARNING: More than one value supplied, consult the relevant sub-sheet for the data.' }] assert len(parser.sub_sheets) == 1 assert set(parser.sub_sheets['testA']) == set( ['testA/0/testB', 'testA/0/testC']) assert parser.sub_sheets['testA'].lines == [{ 'testA/0/testB': '1', 'testA/0/testC': '2' }, { 'testA/0/testB': '3', 'testA/0/testC': '4' }] w = recwarn.pop(UserWarning) assert 'Could not provide rollup' in str(w.message)
def test_references_sheet_names(tmpdir): """The referenced name should be used for the sheet name""" tmpfile = tmpdir.join('test_schema.json') tmpfile.write('''{ "properties": { "testA": { "type": "array", "items": {"$ref": "#/testB"} } }, "testB": { "type": "object", "properties": {"testC":{"type": "string"}} } }''') parser = SchemaParser(schema_filename=tmpfile.strpath) parser.parse() assert set(parser.sub_sheets) == set(['testB']) assert list(parser.sub_sheets['testB']) == ['ocid', 'testC']
def test_simple_array(): parser = SchemaParser(root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'string' } } } }, main_sheet_name='custom_main_sheet_name') parser.parse() assert set(parser.main_sheet) == set(['testA:array'])
def unflatten(input_name, base_json=None, input_format=None, output_name='releases.json', main_sheet_name='releases', encoding='utf8', timezone_name='UTC', root_id='ocid', schema='', convert_titles=False, **_): """ Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON). """ if input_format is None: raise Exception( 'You must specify an input format (may autodetect in future') elif input_format not in INPUT_FORMATS: raise Exception('The requested format is not available') spreadsheet_input_class = INPUT_FORMATS[input_format] spreadsheet_input = spreadsheet_input_class( input_name=input_name, timezone_name=timezone_name, main_sheet_name=main_sheet_name, root_id=root_id, convert_titles=convert_titles) if convert_titles: parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=True, root_id=root_id) parser.parse() spreadsheet_input.parser = parser spreadsheet_input.encoding = encoding spreadsheet_input.read_sheets() if base_json: with open(base_json) as fp: base = json.load(fp, object_pairs_hook=OrderedDict) else: base = OrderedDict() base[main_sheet_name] = list(spreadsheet_input.unflatten()) with codecs.open(output_name, 'w', encoding='utf-8') as fp: json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
def test_sub_sheets(self, tmpdir, remove_empty_schema_columns): test_schema = tmpdir.join("test.json") test_schema.write( """{ "properties": { "c": { "type": "array", "items": {"$ref": "#/testB"} }, "g": { "type": "array", "items": { "type": "object", "properties": { "h": { "type": "string"} } } } }, "testB": { "type": "object", "properties": { "d": { "type": "string" }, "f": { "type": "string" } } } }""" ) schema_parser = SchemaParser( schema_filename=test_schema.strpath, root_id="ocid" ) schema_parser.parse() parser = JSONParser( root_json_dict=[ OrderedDict([("a", "b"), ("c", [OrderedDict([("d", "e")])]),]) ], schema_parser=schema_parser, remove_empty_schema_columns=remove_empty_schema_columns, ) parser.parse() assert list(parser.main_sheet) == ["a"] assert parser.main_sheet.lines == [{"a": "b"}] assert len(parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1 if not remove_empty_schema_columns: assert list(parser.sub_sheets["c"]) == list(["ocid", "c/0/d", "c/0/f"]) assert list(parser.sub_sheets["g"]) == list(["ocid", "g/0/h"]) else: assert list(parser.sub_sheets["c"]) == list(["ocid", "c/0/d"]) assert parser.sub_sheets["c"].lines == [{"c/0/d": "e"}]
def flatten(input_name, schema=None, output_name='releases', output_format='all', main_sheet_name='main', root_list_path='releases', rollup=False, root_id='ocid', use_titles=False, **_): """ Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx). """ if schema: schema_parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, main_sheet_name=main_sheet_name) schema_parser.parse() else: schema_parser = None parser = JSONParser(json_filename=input_name, root_list_path=root_list_path, schema_parser=schema_parser, main_sheet_name=main_sheet_name, root_id=root_id, use_titles=use_titles) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name) spreadsheet_output.write_sheets() if output_format == 'all': for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys( ): # in dictionary of allowed formats spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def create_template(schema, output_name=None, output_format='all', main_sheet_name='main', rollup=False, root_id=None, use_titles=False, disable_local_refs=False, truncation_length=3, no_deprecated_fields=False, **_): """ Creates template file(s) from given inputs This function is built to deal with commandline input and arguments but to also be called from elswhere in future """ parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles, disable_local_refs=disable_local_refs, truncation_length=truncation_length, exclude_deprecated_fields=no_deprecated_fields) parser.parse() def spreadsheet_output(spreadsheet_output_class, name): spreadsheet_output = spreadsheet_output_class( parser=parser, main_sheet_name=main_sheet_name, output_name=name) spreadsheet_output.write_sheets() if output_format == 'all': if not output_name: output_name = 'template' for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items(): spreadsheet_output(spreadsheet_output_class, output_name + FORMATS_SUFFIX[format_name]) elif output_format in OUTPUT_FORMATS.keys( ): # in dictionary of allowed formats if not output_name: output_name = 'template' + FORMATS_SUFFIX[output_format] spreadsheet_output(OUTPUT_FORMATS[output_format], output_name) else: raise Exception('The requested format is not available')
def test_parent_is_object(self): parser = SchemaParser( root_schema_dict={ 'properties': { 'Atest': { 'type': 'object', 'properties': object_in_array_example_properties('Btest', 'Ctest') } } }) parser.parse() assert set(parser.main_sheet) == set(['Atest/id']) assert set(parser.sub_sheets) == set(['Ate_Btest']) assert list(parser.sub_sheets['Ate_Btest']) == [ 'Atest/id', 'Atest/Btest/0/Ctest' ]
def test_references_sheet_names(tmpdir): """ The referenced name used to be used for the sheet name, but is NOT any more. """ tmpfile = tmpdir.join('test_schema.json') tmpfile.write('''{ "properties": { "Atest": { "type": "array", "items": {"$ref": "#/Btest"} } }, "Btest": { "type": "object", "properties": {"Ctest":{"type": "string"}} } }''') parser = SchemaParser(schema_filename=tmpfile.strpath) parser.parse() assert set(parser.sub_sheets) == set(['Atest']) # used to be Btest assert list(parser.sub_sheets['Atest']) == ['Atest/0/Ctest']
def run(sheets, schema=None, source_maps=False): input_headings = OrderedDict() input_sheets = OrderedDict() for sheet in sheets: rows = [] for row in sheet["rows"]: rows.append(OrderedDict(zip(sheet["headings"], row))) input_sheets[sheet["name"]] = rows input_headings[sheet["name"]] = sheet["headings"] if schema is not None: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id="", # Without this, titles from a schema aren't understood convert_titles=True, ) # Without this, the $ref entries in the schema aren't resolved. dereferenced_schema = JsonRef.replace_refs(schema) parser = SchemaParser(root_schema_dict=dereferenced_schema, root_id="main", rollup=True) parser.parse() spreadsheet_input.parser = parser else: spreadsheet_input = HeadingListInput( input_sheets, input_headings, root_id="", ) spreadsheet_input.read_sheets() if source_maps: ( result, cell_source_map_data, heading_source_map_data, ) = spreadsheet_input.fancy_unflatten(True, True) else: ( result, cell_source_map_data, heading_source_map_data, ) = spreadsheet_input.fancy_unflatten(False, False) return result, cell_source_map_data, heading_source_map_data
def test_sub_sheet(): parser = SchemaParser( root_schema_dict={ 'properties': { 'testA': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'testB': type_string } } }, } }) parser.parse() assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['testA']) assert list(parser.sub_sheets['testA']) == ['ocid', 'testB']
def test_sub_sheet_empty_string_root_id(): parser = SchemaParser(root_schema_dict={ 'properties': { 'Atest': { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'Btest': type_string } } }, } }, root_id='') parser.parse() assert set(parser.main_sheet) == set([]) assert set(parser.sub_sheets) == set(['Atest']) assert list(parser.sub_sheets['Atest']) == ['Atest/0/Btest']