예제 #1
0
 def test_rollup(self):
     schema_parser = SchemaParser(root_schema_dict={
         'properties': {
             'testA': {
                 'type': 'array',
                 'rollUp': [ 'testB' ],
                 'items': {
                     'type': 'object',
                     'properties': {
                         'testB': {'type': 'string'},
                         'testC': {'type': 'string'}
                     }
                 }
             },
         }
     }, rollup=True)
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[OrderedDict([
             ('testA', [OrderedDict([('testB', '1'), ('testC', '2')])]),
         ])],
         schema_parser=schema_parser
     )
     parser.parse()
     assert list(parser.main_sheet) == [ 'testA[]/testB' ]
     assert parser.main_sheet.lines == [
         {'testA[]/testB': '1'}
     ]
     assert len(parser.sub_sheets) == 1
     assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC'])
     assert parser.sub_sheets['testA'].lines == [{'testB':'1', 'testC': '2'}]
예제 #2
0
def create_template(schema, output_name='releases', output_format='all', main_sheet_name='main', flatten=False, rollup=False, root_id='ocid', use_titles=False, **_):
    """
    Creates template file(s) from given inputs
    This function is built to deal with commandline input and arguments
    but to also be called from elswhere in future

    """

    parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=rollup, root_id=root_id, use_titles=use_titles)
    parser.parse()

    def spreadsheet_output(spreadsheet_output_class, name):
        spreadsheet_output = spreadsheet_output_class(
            parser=parser,
            main_sheet_name=main_sheet_name,
            output_name=name)
        spreadsheet_output.write_sheets()

    if output_format == 'all':
        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
            spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name])

    elif output_format in OUTPUT_FORMATS.keys():   # in dictionary of allowed formats
        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)

    else:
        raise Exception('The requested format is not available')
def test_flatten_multiplesheets(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_dict, recwarn, comment, warning_messages, tmpdir, reversible):
    # Not sure why, but this seems to be necessary to have warnings picked up
    # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it
    import warnings
    warnings.simplefilter('always')

    extra_kwargs = {'use_titles': use_titles}
    extra_kwargs.update(root_id_kwargs)
    
    if use_schema:
        schema_parser = SchemaParser(
            root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
            rollup=True,
            **extra_kwargs
        )
        schema_parser.parse()
    else:
        schema_parser = None

    with tmpdir.join('input.json').open('w') as fp:
        json.dump({
            'mykey': [inject_root_id(root_id, input_row) for input_row in input_list]
        }, fp)

    parser = JSONParser(
        json_filename=tmpdir.join('input.json').strpath,
        root_list_path='mykey',
        schema_parser=schema_parser,
        **extra_kwargs)
    parser.parse()

    expected_output_dict = OrderedDict([(sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in expected_output_dict.items()])
    output = {sheet_name:sheet.lines for sheet_name, sheet in parser.sub_sheets.items() if sheet.lines}
    output['custom_main'] = parser.main_sheet.lines
    assert output == expected_output_dict
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, reversible):
    # Not sure why, but this seems to be necessary to have warnings picked up
    # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it
    import warnings
    warnings.simplefilter('always')

    extra_kwargs = {'convert_titles': convert_titles}
    extra_kwargs.update(root_id_kwargs)
    spreadsheet_input = ListInput(
        sheets={
            'custom_main': [
                inject_root_id(root_id, input_row) for input_row in input_list
            ]
        },
        **extra_kwargs)
    spreadsheet_input.read_sheets()

    parser = SchemaParser(
        root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
        root_id=root_id,
        rollup=True
    )
    parser.parse()
    spreadsheet_input.parser = parser

    expected_output_list = [
        inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list
    ]
    if expected_output_list == [{}]:
        # We don't expect an empty dictionary
        expected_output_list = []
    assert list(spreadsheet_input.unflatten()) == expected_output_list
    # We expect no warning_messages
    if not convert_titles: # TODO what are the warning_messages here
        assert [str(x.message) for x in recwarn.list] == warning_messages
예제 #5
0
def create_template(schema, output_name=None, output_format='all', main_sheet_name='main',
                    rollup=False, root_id=None, use_titles=False, disable_local_refs=False, truncation_length=3,
                    no_deprecated_fields=False, **_):
    """
    Creates template file(s) from given inputs
    This function is built to deal with commandline input and arguments
    but to also be called from elswhere in future

    """

    parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles,
                          disable_local_refs=disable_local_refs, truncation_length=truncation_length,
                          exclude_deprecated_fields=no_deprecated_fields)
    parser.parse()

    def spreadsheet_output(spreadsheet_output_class, name):
        spreadsheet_output = spreadsheet_output_class(
            parser=parser,
            main_sheet_name=main_sheet_name,
            output_name=name)
        spreadsheet_output.write_sheets()

    if output_format == 'all':
        if not output_name:
            output_name = 'template'
        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
            spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name])

    elif output_format in OUTPUT_FORMATS.keys():   # in dictionary of allowed formats
        if not output_name:
            output_name = 'template' + FORMATS_SUFFIX[output_format]
        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)

    else:
        raise Exception('The requested format is not available')
예제 #6
0
def unflatten(input_name, base_json=None, input_format=None, output_name='releases.json',
              main_sheet_name='releases', encoding='utf8', timezone_name='UTC',
              root_id='ocid', schema='', convert_titles=False, **_):
    """
    Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON).

    """
    if input_format is None:
        raise Exception('You must specify an input format (may autodetect in future')
    elif input_format not in INPUT_FORMATS:
        raise Exception('The requested format is not available')

    spreadsheet_input_class = INPUT_FORMATS[input_format]
    spreadsheet_input = spreadsheet_input_class(
        input_name=input_name,
        timezone_name=timezone_name,
        main_sheet_name=main_sheet_name,
        root_id=root_id,
        convert_titles=convert_titles)
    if convert_titles:
        parser = SchemaParser(schema_filename=schema, main_sheet_name=main_sheet_name, rollup=True, root_id=root_id)
        parser.parse()
        spreadsheet_input.parser = parser
    spreadsheet_input.encoding = encoding
    spreadsheet_input.read_sheets()
    if base_json:
        with open(base_json) as fp:
            base = json.load(fp, object_pairs_hook=OrderedDict)
    else:
        base = OrderedDict()
    base[main_sheet_name] = list(spreadsheet_input.unflatten())
    with codecs.open(output_name, 'w', encoding='utf-8') as fp:
        json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
def test_bad_rollup(recwarn):
    '''
    When rollUp is specified, but the field is missing in the schema, we expect
    a warning.

    '''
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'rollUp': [ 'testB' ],
                'items': {
                    'type': 'object',
                    'properties': {
                        'testC': type_string
                    }
                }
            },
        }
    }, rollup=True)
    parser.parse()

    w = recwarn.pop(UserWarning)
    assert 'testB in rollUp but not in schema' in text_type(w.message)

    assert set(parser.main_sheet) == set()
    assert set(parser.sub_sheets) == set(['testA'])
    assert set(parser.sub_sheets['testA']) == set(['ocid', 'testC'])
예제 #8
0
 def test_column_matching(self, tmpdir): 
     test_schema = tmpdir.join('test.json')
     test_schema.write('''{
         "properties": {
             "c": {
                 "type": "array",
                 "items": {"type": "string"}
             }
         }
     }''')
     schema_parser = SchemaParser(
         schema_filename=test_schema.strpath
     )
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[OrderedDict([
             ('c', ['d']),
         ])],
         schema_parser=schema_parser
     )
     parser.parse()
     assert list(parser.main_sheet) == [ 'c:array' ]
     assert parser.main_sheet.lines == [
             {'c:array': 'd'}
     ]
     assert len(parser.sub_sheets) == 0
예제 #9
0
def flatten(input_name, schema=None, output_name=None, output_format='all', main_sheet_name='main',
            root_list_path='main', root_is_list=False, sheet_prefix='', filter_field=None, filter_value=None,
            rollup=False, root_id=None, use_titles=False, xml=False, id_name='id', disable_local_refs=False,
            remove_empty_schema_columns=False, truncation_length=3, **_):
    """
    Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx).

    """

    if (filter_field is None and filter_value is not None) or (filter_field is not None and filter_value is None):
        raise Exception('You must use filter_field and filter_value together')

    if schema:
        schema_parser = SchemaParser(
            schema_filename=schema,
            rollup=rollup,
            root_id=root_id,
            use_titles=use_titles,
            disable_local_refs=disable_local_refs,
            truncation_length=truncation_length)
        schema_parser.parse()
    else:
        schema_parser = None
    parser = JSONParser(
        json_filename=input_name,
        root_list_path=None if root_is_list else root_list_path,
        schema_parser=schema_parser,
        root_id=root_id,
        use_titles=use_titles,
        xml=xml,
        id_name=id_name,
        filter_field=filter_field,
        filter_value=filter_value,
        remove_empty_schema_columns=remove_empty_schema_columns,
        truncation_length=truncation_length)
    parser.parse()

    def spreadsheet_output(spreadsheet_output_class, name):
        spreadsheet_output = spreadsheet_output_class(
            parser=parser,
            main_sheet_name=main_sheet_name,
            output_name=name,
            sheet_prefix=sheet_prefix)
        spreadsheet_output.write_sheets()

    if output_format == 'all':
        if not output_name:
            output_name = 'flattened'
        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
            spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name])

    elif output_format in OUTPUT_FORMATS.keys():   # in dictionary of allowed formats
        if not output_name:
            output_name = 'flattened' + FORMATS_SUFFIX[output_format]
        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)

    else:
        raise Exception('The requested format is not available')
def test_main_sheet_basic():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': type_string,
            'testB': type_string
        }
    })
    parser.parse()
    assert set(parser.main_sheet) == set(['testA', 'testB'])
def test_main_sheet_nested():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'object',
                'properties': {'testC': type_string}
            }
        }
    })
    parser.parse()
    assert set(parser.main_sheet) == set(['testA/testC'])
 def test_parent_is_object(self):
     parser = SchemaParser(root_schema_dict={
         'properties': {
             'testA': {
                 'type': 'object',
                 'properties': object_in_array_example_properties('testB', 'testC')
             }
         }
     })
     parser.parse()
     assert set(parser.main_sheet) == set(['testA/id'])
     assert set(parser.sub_sheets) == set(['testB'])
     assert list(parser.sub_sheets['testB']) == ['ocid', 'main/testA/id:testB', 'testC']
 def test_sub_sheets(self, tmpdir, remove_empty_schema_columns):
     test_schema = tmpdir.join('test.json')
     test_schema.write('''{
         "properties": {
             "c": {
                 "type": "array",
                 "items": {"$ref": "#/testB"}
             },
             "g": {
                 "type": "array",
                 "items": {
                     "type": "object",
                     "properties": {
                         "h": { "type": "string"}
                     }
                 }
             }
         },
         "testB": {
             "type": "object",
             "properties": {
                 "d": { "type": "string" },
                 "f": { "type": "string" }
             }
         }
     }''')
     schema_parser = SchemaParser(
         schema_filename=test_schema.strpath,
         root_id='ocid'
     )
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[OrderedDict([
             ('a', 'b'),
             ('c', [OrderedDict([('d', 'e')])]),
         ])],
         schema_parser=schema_parser,
         remove_empty_schema_columns=remove_empty_schema_columns,
     )
     parser.parse()
     assert list(parser.main_sheet) == [ 'a' ]
     assert parser.main_sheet.lines == [
         {'a': 'b'}
     ]
     assert len(parser.sub_sheets) == 2 if not remove_empty_schema_columns else 1
     if not remove_empty_schema_columns:
         assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d', 'c/0/f'])
         assert list(parser.sub_sheets['g']) == list(['ocid', 'g/0/h'])
     else:
         assert list(parser.sub_sheets['c']) == list(['ocid', 'c/0/d'])
     assert parser.sub_sheets['c'].lines == [{'c/0/d':'e'}]
def test_references_sheet_names(tmpdir):
    """The referenced name should be used for the sheet name"""
    tmpfile = tmpdir.join('test_schema.json')
    tmpfile.write('''{
        "properties": { "testA": {
            "type": "array",
            "items": {"$ref": "#/testB"}
        } },
        "testB": { "type": "object", "properties": {"testC":{"type": "string"}} }
    }''')
    parser = SchemaParser(schema_filename=tmpfile.strpath)
    parser.parse()
    assert set(parser.sub_sheets) == set(['testB'])
    assert list(parser.sub_sheets['testB']) == ['ocid', 'testC']
def test_sub_sheet():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {'testB': type_string}
                }
            },
        }
    })
    parser.parse()
    assert set(parser.main_sheet) == set([])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid', 'testB']
def test_simple_array():
    parser = SchemaParser(
        root_schema_dict={
            'properties': {
                'testA': {
                    'type': 'array',
                    'items': {
                        'type': 'string'
                    }
                }
            }
        },
        main_sheet_name='custom_main_sheet_name'
    )
    parser.parse()
    assert set(parser.main_sheet) == set(['testA:array'])
 def test_two_parents(self):
     # This is a copy of test_two_parents from test_schema_parser.py, in
     # order to check that flattening and template generation use the same
     # sheet names
     schema_parser = SchemaParser(root_schema_dict={
         'properties': OrderedDict([
             ('Atest', {
                 'type': 'array',
                 'items': {'type': 'object',
                           'properties': object_in_array_example_properties('Btest', 'Ctest')}
             }),
             ('Dtest', {
                 'type': 'array',
                 'items': {'type': 'object',
                           'properties': object_in_array_example_properties('Btest', 'Etest')}
             })
         ])
     })
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[{
             'Atest': [{
                 'id': 1,
                 'Btest': [{
                     'Ctest': 2
                 }]
             }],
             'Dtest': [{
                 'id': 3,
                 'Btest': [{
                     'Etest': 4
                 }]
             }]
         }],
         schema_parser=schema_parser
     )
     parser.parse()
     assert set(parser.main_sheet) == set()
     assert set(parser.sub_sheets) == set(['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest'])
     assert list(parser.sub_sheets['Atest']) == ['Atest/0/id']
     assert list(parser.sub_sheets['Dtest']) == ['Dtest/0/id']
     assert list(parser.sub_sheets['Ate_Btest']) == ['Atest/0/id', 'Atest/0/Btest/0/Ctest']
     assert list(parser.sub_sheets['Dte_Btest']) == ['Dtest/0/id', 'Dtest/0/Btest/0/Etest']
예제 #18
0
 def test_parent_is_object(self):
     parser = SchemaParser(
         root_schema_dict={
             'properties': {
                 'id': type_string,
                 'testA': {
                     'type':
                     'object',
                     'properties':
                     object_in_array_example_properties('testB', 'testC')
                 }
             }
         })
     parser.parse()
     assert set(parser.main_sheet) == set(['id', 'testA/id'])
     assert set(parser.sub_sheets) == set(['testB'])
     assert list(parser.sub_sheets['testB']) == [
         'ocid', 'main/id:testB', 'main/testA/id:testB', 'testC'
     ]
예제 #19
0
def test_sub_sheet_empty_string_root_id():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'Atest': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'Btest': type_string
                    }
                }
            },
        }
    },
                          root_id='')
    parser.parse()
    assert set(parser.main_sheet) == set([])
    assert set(parser.sub_sheets) == set(['Atest'])
    assert list(parser.sub_sheets['Atest']) == ['Atest/0/Btest']
예제 #20
0
def test_sub_sheet():
    parser = SchemaParser(
        root_schema_dict={
            'properties': {
                'testA': {
                    'type': 'array',
                    'items': {
                        'type': 'object',
                        'properties': {
                            'testB': type_string
                        }
                    }
                },
            }
        })
    parser.parse()
    assert set(parser.main_sheet) == set([])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid', 'testB']
예제 #21
0
 def test_custom_main_sheet_name(self):
     parser = SchemaParser(
         root_schema_dict={
             'properties': {
                 'id': type_string,
                 'Atest': {
                     'type':
                     'object',
                     'properties':
                     object_in_array_example_properties('Btest', 'Ctest')
                 }
             }
         })
     parser.parse()
     assert set(parser.main_sheet) == set(['id', 'Atest/id'])
     assert set(parser.sub_sheets) == set(['Ate_Btest'])
     assert list(parser.sub_sheets['Ate_Btest']) == [
         'id', 'Atest/id', 'Atest/Btest/0/Ctest'
     ]
def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list,
                 expected_output_list, recwarn, comment, warning_messages,
                 tmpdir, reversible):
    # Not sure why, but this seems to be necessary to have warnings picked up
    # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it
    import warnings
    warnings.simplefilter('always')

    extra_kwargs = {'use_titles': use_titles}
    extra_kwargs.update(root_id_kwargs)

    if use_schema:
        schema_parser = SchemaParser(root_schema_dict=create_schema(root_id)
                                     if use_schema else {"properties": {}},
                                     rollup=True,
                                     **extra_kwargs)
        schema_parser.parse()
    else:
        schema_parser = None

    with tmpdir.join('input.json').open('w') as fp:
        json.dump(
            {
                'mykey': [
                    inject_root_id(root_id, input_row)
                    for input_row in input_list
                ]
            }, fp)

    parser = JSONParser(json_filename=tmpdir.join('input.json').strpath,
                        root_list_path='mykey',
                        schema_parser=schema_parser,
                        **extra_kwargs)
    parser.parse()

    expected_output_list = [
        inject_root_id(root_id, expected_output_dict)
        for expected_output_dict in expected_output_list
    ]
    if expected_output_list == [{}]:
        # We don't expect an empty dictionary
        expected_output_list = []
    assert list(parser.main_sheet.lines) == expected_output_list
예제 #23
0
 def test_column_matching(self, tmpdir):
     test_schema = tmpdir.join('test.json')
     test_schema.write('''{
         "properties": {
             "c": {
                 "type": "array",
                 "items": {"type": "string"}
             }
         }
     }''')
     schema_parser = SchemaParser(schema_filename=test_schema.strpath)
     schema_parser.parse()
     parser = JSONParser(root_json_dict=[OrderedDict([
         ('c', ['d']),
     ])],
                         schema_parser=schema_parser)
     parser.parse()
     assert list(parser.main_sheet) == ['c']
     assert parser.main_sheet.lines == [{'c': 'd'}]
     assert len(parser.sub_sheets) == 0
def test_rollup():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'rollUp': [ 'testB' ],
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': type_string,
                        'testC': type_string
                    }
                }
            },
        }
    }, rollup=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['testA[]/testB'])
    assert set(parser.sub_sheets) == set(['testA'])
    assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC'])
예제 #25
0
def run(sheets, schema=None, source_maps=False):
    input_headings = OrderedDict()
    input_sheets = OrderedDict()
    for sheet in sheets:
        rows = []
        for row in sheet["rows"]:
            rows.append(OrderedDict(zip(sheet["headings"], row)))
        input_sheets[sheet["name"]] = rows
        input_headings[sheet["name"]] = sheet["headings"]
    if schema is not None:
        spreadsheet_input = HeadingListInput(
            input_sheets,
            input_headings,
            root_id="",  # QUESTION: I don't understand root_id
            convert_titles=True,  # Without this, the titles aren't understood
        )
        # Without this, the $ref entries in the schema aren't resolved.
        dereferenced_schema = JsonRef.replace_refs(schema)
        # raise Exception(dereferenced_schema)
        parser = SchemaParser(root_schema_dict=dereferenced_schema,
                              root_id="main",
                              rollup=True)
        parser.parse()
        spreadsheet_input.parser = parser
    else:
        spreadsheet_input = HeadingListInput(
            input_sheets,
            input_headings,
            root_id="",
        )
    spreadsheet_input.read_sheets()
    if source_maps:
        (
            result,
            cell_source_map_data,
            heading_source_map_data,
        ) = spreadsheet_input.fancy_unflatten(with_cell_source_map=True,
                                              with_heading_source_map=True)
        return result, cell_source_map_data, heading_source_map_data
    else:
        return spreadsheet_input.unflatten(), None, None
예제 #26
0
 def test_rollup(self):
     schema_parser = SchemaParser(
         root_schema_dict={
             "properties": {
                 "testA": {
                     "type": "array",
                     "rollUp": ["testB"],
                     "items": {
                         "type": "object",
                         "properties": {
                             "testB": {"type": "string"},
                             "testC": {"type": "string"},
                         },
                     },
                 },
             }
         },
         rollup=True,
         root_id="ocid",
     )
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[
             OrderedDict(
                 [("testA", [OrderedDict([("testB", "1"), ("testC", "2")])]),]
             )
         ],
         schema_parser=schema_parser,
         root_id="ocid",
         rollup=True,
     )
     parser.parse()
     assert list(parser.main_sheet) == ["testA/0/testB"]
     assert parser.main_sheet.lines == [{"testA/0/testB": "1"}]
     assert len(parser.sub_sheets) == 1
     assert set(parser.sub_sheets["testA"]) == set(
         ["ocid", "testA/0/testB", "testA/0/testC"]
     )
     assert parser.sub_sheets["testA"].lines == [
         {"testA/0/testB": "1", "testA/0/testC": "2"}
     ]
예제 #27
0
 def test_rollup(self):
     schema_parser = SchemaParser(root_schema_dict={
         'properties': {
             'testA': {
                 'type': 'array',
                 'rollUp': ['testB'],
                 'items': {
                     'type': 'object',
                     'properties': {
                         'testB': {
                             'type': 'string'
                         },
                         'testC': {
                             'type': 'string'
                         }
                     }
                 }
             },
         }
     },
                                  rollup=True,
                                  root_id='ocid')
     schema_parser.parse()
     parser = JSONParser(root_json_dict=[
         OrderedDict([
             ('testA', [OrderedDict([('testB', '1'), ('testC', '2')])]),
         ])
     ],
                         schema_parser=schema_parser,
                         root_id='ocid',
                         rollup=True)
     parser.parse()
     assert list(parser.main_sheet) == ['testA/0/testB']
     assert parser.main_sheet.lines == [{'testA/0/testB': '1'}]
     assert len(parser.sub_sheets) == 1
     assert set(parser.sub_sheets['testA']) == set(
         ['ocid', 'testA/0/testB', 'testA/0/testC'])
     assert parser.sub_sheets['testA'].lines == [{
         'testA/0/testB': '1',
         'testA/0/testC': '2'
     }]
예제 #28
0
 def test_rollup_multiple_values(self, recwarn):
     schema_parser = SchemaParser(root_schema_dict={
         'properties': {
             'testA': {
                 'type': 'array',
                 'rollUp': [ 'testB' ],
                 'items': {
                     'type': 'object',
                     'properties': {
                         'testB': {'type': 'string'},
                         'testC': {'type': 'string'}
                     }
                 }
             },
         }
     }, rollup=True)
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[OrderedDict([
             ('testA', [
                 OrderedDict([('testB', '1'), ('testC', '2')]),
                 OrderedDict([('testB', '3'), ('testC', '4')])
                 ]),
         ])],
         schema_parser=schema_parser
     )
     parser.parse()
     assert list(parser.main_sheet) == [ 'testA[]/testB' ]
     assert parser.main_sheet.lines == [
         {
             'testA[]/testB': 'WARNING: More than one value supplied, consult the relevant sub-sheet for the data.'
         }
     ]
     assert len(parser.sub_sheets) == 1
     assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC'])
     assert parser.sub_sheets['testA'].lines == [
         {'testB':'1', 'testC': '2'},
         {'testB':'3', 'testC': '4'}
         ]
     w = recwarn.pop(UserWarning)
     assert 'Could not provide rollup' in text_type(w.message)
예제 #29
0
def flatten(input_name, schema=None, output_name='releases', output_format='all', main_sheet_name='main', root_list_path='releases', rollup=False, root_id='ocid', use_titles=False, **_):
    """
    Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx).

    """

    if schema:
        schema_parser = SchemaParser(
            schema_filename=schema,
            rollup=rollup,
            root_id=root_id,
            use_titles=use_titles,
            main_sheet_name=main_sheet_name)
        schema_parser.parse()
    else:
        schema_parser = None
    parser = JSONParser(
        json_filename=input_name,
        root_list_path=root_list_path,
        schema_parser=schema_parser,
        main_sheet_name=main_sheet_name,
        root_id=root_id,
        use_titles=use_titles)
    parser.parse()

    def spreadsheet_output(spreadsheet_output_class, name):
        spreadsheet_output = spreadsheet_output_class(
            parser=parser,
            main_sheet_name=main_sheet_name,
            output_name=name)
        spreadsheet_output.write_sheets()

    if output_format == 'all':
        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
            spreadsheet_output(spreadsheet_output_class, output_name+FORMATS_SUFFIX[format_name])

    elif output_format in OUTPUT_FORMATS.keys():   # in dictionary of allowed formats
        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)

    else:
        raise Exception('The requested format is not available')
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs, input_dict, expected_output_list, recwarn, comment, warning_messages, reversible):
    extra_kwargs = {'convert_titles': convert_titles}
    extra_kwargs.update(root_id_kwargs)
    spreadsheet_input = ListInput(
        sheets=OrderedDict([(sheet_name, [inject_root_id(root_id, line) for line in lines]) for sheet_name, lines in input_dict.items()]),
        **extra_kwargs
        )
    spreadsheet_input.read_sheets()

    parser = SchemaParser(
        root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
        root_id=root_id,
        rollup=True
    )
    parser.parse()
    spreadsheet_input.parser = parser

    expected_output_list = [
        inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list
    ]
    assert list(spreadsheet_input.unflatten()) == expected_output_list
예제 #31
0
 def test_column_matching(self, tmpdir):
     test_schema = tmpdir.join("test.json")
     test_schema.write(
         """{
         "properties": {
             "c": {
                 "type": "array",
                 "items": {"type": "string"}
             }
         }
     }"""
     )
     schema_parser = SchemaParser(schema_filename=test_schema.strpath)
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[OrderedDict([("c", ["d"]),])], schema_parser=schema_parser
     )
     parser.parse()
     assert list(parser.main_sheet) == ["c"]
     assert parser.main_sheet.lines == [{"c": "d"}]
     assert len(parser.sub_sheets) == 0
예제 #32
0
def test_rollup():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'rollUp': ['testB'],
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': type_string,
                        'testC': type_string
                    }
                }
            },
        }
    },
                          rollup=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['testA[]/testB'])
    assert set(parser.sub_sheets) == set(['testA'])
    assert set(parser.sub_sheets['testA']) == set(['ocid', 'testB', 'testC'])
 def test_custom_main_sheet_name(self):
     parser = SchemaParser(
         root_schema_dict={
             'properties': {
                 'id': type_string,
                 'testA': {
                     'type': 'object',
                     'properties': object_in_array_example_properties('testB', 'testC')
                 }
             }
         },
         main_sheet_name='custom_main_sheet_name'
     )
     parser.parse()
     assert set(parser.main_sheet) == set(['id', 'testA/id'])
     assert set(parser.sub_sheets) == set(['testB'])
     assert list(parser.sub_sheets['testB']) == [
         'ocid',
         'custom_main_sheet_name/id:testB',
         'custom_main_sheet_name/testA/id:testB',
         'testC']
예제 #34
0
def test_use_titles3(recwarn, use_titles):
    # Array containing a nested object title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'Atest': {
                'type': 'array',
                'title': 'ATitle',
                'items': {
                    'type': 'object',
                    'properties': {
                        'Btest': {
                            'type': 'object',
                            'properties': {
                                'Ctest': {
                                    'type': 'string',
                                    'title': 'CTitle'
                                }
                            }
                        }
                    }
                }
            },
            'Ctest': {
                'type': 'string',
                'title': 'CTitle'
            }
        }
    },
                          use_titles=use_titles)
    parser.parse()
    if use_titles:
        assert set(parser.main_sheet) == set(['CTitle'])
        assert set(parser.sub_sheets) == set(['Atest'])
        assert list(parser.sub_sheets['Atest']) == []
        assert len(recwarn) == 1
        w = recwarn.pop(UserWarning)
        assert 'Field Atest/0/Btest/Ctest is missing a title' in text_type(
            w.message)
    else:
        assert len(recwarn) == 0
 def test_two_parents(self):
     parser = SchemaParser(root_schema_dict={
         'properties': OrderedDict([
             ('testA', {
                 'type': 'array',
                 'items': {'type': 'object',
                           'properties': object_in_array_example_properties('testB', 'testC')}
             }),
             ('testD', {
                 'type': 'array',
                 'items': {'type': 'object',
                           'properties': object_in_array_example_properties('testB', 'testE')}
             })
         ])
     })
     parser.parse()
     assert set(parser.main_sheet) == set()
     assert set(parser.sub_sheets) == set(['testA', 'testB', 'testD'])
     assert list(parser.sub_sheets['testA']) == ['ocid', 'id']
     assert list(parser.sub_sheets['testD']) == ['ocid', 'id']
     assert list(parser.sub_sheets['testB']) == ['ocid', 'main/testA[]/id:testB',
                                                 'main/testD[]/id:testB', 'testC', 'testE']
예제 #36
0
def test_rollup():
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'Atest': {
                'type': 'array',
                'rollUp': ['Btest'],
                'items': {
                    'type': 'object',
                    'properties': {
                        'Btest': type_string,
                        'Ctest': type_string
                    }
                }
            },
        }
    },
                          rollup=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['Atest/0/Btest'])
    assert set(parser.sub_sheets) == set(['Atest'])
    assert set(parser.sub_sheets['Atest']) == set(
        ['Atest/0/Btest', 'Atest/0/Ctest'])
예제 #37
0
def create_template(schema,
                    output_name='releases',
                    output_format='all',
                    main_sheet_name='main',
                    flatten=False,
                    rollup=False,
                    root_id='ocid',
                    use_titles=False,
                    **_):
    """
    Creates template file(s) from given inputs
    This function is built to deal with commandline input and arguments
    but to also be called from elswhere in future

    """

    parser = SchemaParser(schema_filename=schema,
                          main_sheet_name=main_sheet_name,
                          rollup=rollup,
                          root_id=root_id,
                          use_titles=use_titles)
    parser.parse()

    def spreadsheet_output(spreadsheet_output_class, name):
        spreadsheet_output = spreadsheet_output_class(
            parser=parser, main_sheet_name=main_sheet_name, output_name=name)
        spreadsheet_output.write_sheets()

    if output_format == 'all':
        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
            spreadsheet_output(spreadsheet_output_class,
                               output_name + FORMATS_SUFFIX[format_name])

    elif output_format in OUTPUT_FORMATS.keys(
    ):  # in dictionary of allowed formats
        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)

    else:
        raise Exception('The requested format is not available')
예제 #38
0
 def test_two_parents(self):
     parser = SchemaParser(
         root_schema_dict={
             'properties':
             OrderedDict([('id', type_string),
                          ('Atest', {
                              'type': 'array',
                              'items': {
                                  'type':
                                  'object',
                                  'properties':
                                  object_in_array_example_properties(
                                      'Btest', 'Ctest')
                              }
                          }),
                          ('Dtest', {
                              'type': 'array',
                              'items': {
                                  'type':
                                  'object',
                                  'properties':
                                  object_in_array_example_properties(
                                      'Btest', 'Etest')
                              }
                          })])
         })
     parser.parse()
     assert set(parser.main_sheet) == set(['id'])
     assert set(parser.sub_sheets) == set(
         ['Atest', 'Dtest', 'Ate_Btest', 'Dte_Btest'])
     assert list(parser.sub_sheets['Atest']) == ['id', 'Atest/0/id']
     assert list(parser.sub_sheets['Dtest']) == ['id', 'Dtest/0/id']
     assert list(parser.sub_sheets['Ate_Btest']) == [
         'id', 'Atest/0/id', 'Atest/0/Btest/0/Ctest'
     ]
     assert list(parser.sub_sheets['Dte_Btest']) == [
         'id', 'Dtest/0/id', 'Dtest/0/Btest/0/Etest'
     ]
예제 #39
0
def run(sheets, schema=None, source_maps=False):
    input_headings = OrderedDict()
    input_sheets = OrderedDict()
    for sheet in sheets:
        rows = []
        for row in sheet['rows']:
            rows.append(OrderedDict(zip(sheet['headings'], row)))
        input_sheets[sheet['name']] = rows
        input_headings[sheet['name']] = sheet['headings']
    if schema is not None:
        spreadsheet_input = HeadingListInput(
            input_sheets,
            input_headings,
            root_id='',
            # Without this, titles from a schema aren't understood
            convert_titles=True,
        )
        # Without this, the $ref entries in the schema aren't resolved.
        dereferenced_schema = JsonRef.replace_refs(schema)
        parser = SchemaParser(root_schema_dict=dereferenced_schema,
                              root_id='main',
                              rollup=True)
        parser.parse()
        spreadsheet_input.parser = parser
    else:
        spreadsheet_input = HeadingListInput(
            input_sheets,
            input_headings,
            root_id='',
        )
    spreadsheet_input.read_sheets()
    if source_maps:
        result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(
            True, True)
    else:
        result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(
            False, False)
    return result, cell_source_map_data, heading_source_map_data
def run(sheets, schema=None, source_maps=False):
    input_headings = OrderedDict()
    input_sheets = OrderedDict()
    for sheet in sheets:
        rows = []
        for row in sheet['rows']:
            rows.append(OrderedDict(zip(sheet['headings'], row)))
        input_sheets[sheet['name']] = rows
        input_headings[sheet['name']] = sheet['headings']
    if schema is not None:
        spreadsheet_input = HeadingListInput(
            input_sheets,
            input_headings,
            root_id='',                         # QUESTION: I don't understand root_id
            convert_titles=True,                # Without this, the titles aren't understood
        )
        # Without this, the $ref entries in the schema aren't resolved.
        dereferenced_schema = JsonRef.replace_refs(schema)
        # raise Exception(dereferenced_schema)
        parser = SchemaParser(
            root_schema_dict=dereferenced_schema,
            root_id='main',
            rollup=True
        )
        parser.parse()
        spreadsheet_input.parser = parser
    else:
        spreadsheet_input = HeadingListInput(
            input_sheets,
            input_headings,
            root_id='',
        )
    spreadsheet_input.read_sheets()
    if source_maps:
        result, cell_source_map_data, heading_source_map_data = spreadsheet_input.fancy_unflatten(with_cell_source_map=True, with_heading_source_map=True)
        return result, cell_source_map_data, heading_source_map_data
    else:
        return spreadsheet_input.unflatten(), None, None
def test_flatten(use_titles, use_schema, root_id, root_id_kwargs, input_list, expected_output_list, recwarn, comment, warning_messages, tmpdir, reversible):
    # Not sure why, but this seems to be necessary to have warnings picked up
    # on Python 2.7 and 3.3, but 3.4 and 3.5 are fine without it
    import warnings
    warnings.simplefilter('always')

    extra_kwargs = {'use_titles': use_titles}
    extra_kwargs.update(root_id_kwargs)
    
    if use_schema:
        schema_parser = SchemaParser(
            root_schema_dict=create_schema(root_id) if use_schema else {"properties": {}},
            rollup=True,
            **extra_kwargs
        )
        schema_parser.parse()
    else:
        schema_parser = None

    with tmpdir.join('input.json').open('w') as fp:
        json.dump({
            'mykey': [inject_root_id(root_id, input_row) for input_row in input_list]
        }, fp)

    parser = JSONParser(
        json_filename=tmpdir.join('input.json').strpath,
        root_list_path='mykey',
        schema_parser=schema_parser,
        **extra_kwargs)
    parser.parse()

    expected_output_list = [
        inject_root_id(root_id, expected_output_dict) for expected_output_dict in expected_output_list
    ]
    if expected_output_list == [{}]:
        # We don't expect an empty dictionary
        expected_output_list = []
    assert list(parser.main_sheet.lines) == expected_output_list
예제 #42
0
 def test_parent_is_array(self):
     parser = SchemaParser(
         root_schema_dict={
             'properties': {
                 'Atest': {
                     'type': 'array',
                     'items': {
                         'type':
                         'object',
                         'properties':
                         object_in_array_example_properties(
                             'Btest', 'Ctest')
                     }
                 }
             }
         })
     parser.parse()
     assert set(parser.main_sheet) == set()
     assert set(parser.sub_sheets) == set(['Atest', 'Ate_Btest'])
     assert list(parser.sub_sheets['Atest']) == ['Atest/0/id']
     assert list(parser.sub_sheets['Ate_Btest']) == [
         'Atest/0/id', 'Atest/0/Btest/0/Ctest'
     ]
예제 #43
0
 def test_two_parents(self):
     parser = SchemaParser(
         root_schema_dict={
             'properties':
             OrderedDict([('id', type_string),
                          ('testA', {
                              'type': 'array',
                              'items': {
                                  'type':
                                  'object',
                                  'properties':
                                  object_in_array_example_properties(
                                      'testB', 'testC')
                              }
                          }),
                          ('testD', {
                              'type': 'array',
                              'items': {
                                  'type':
                                  'object',
                                  'properties':
                                  object_in_array_example_properties(
                                      'testB', 'testE')
                              }
                          })])
         })
     parser.parse()
     assert set(parser.main_sheet) == set(['id'])
     assert set(parser.sub_sheets) == set(['testA', 'testB', 'testD'])
     assert list(
         parser.sub_sheets['testA']) == ['ocid', 'main/id:testA', 'id']
     assert list(
         parser.sub_sheets['testD']) == ['ocid', 'main/id:testD', 'id']
     assert list(parser.sub_sheets['testB']) == [
         'ocid', 'main/id:testB', 'main/testA[]/id:testB',
         'main/testD[]/id:testB', 'testC', 'testE'
     ]
예제 #44
0
 def test_sub_sheet_names(self, tmpdir):
     test_schema = tmpdir.join('test.json')
     test_schema.write('''{
         "properties": {
             "c": {
                 "type": "array",
                 "items": {"$ref": "#/testB"}
             }
         },
         "testB": {
             "type": "object",
             "properties": {
                 "d": { "type": "string" },
                 "f": { "type": "string" }
             }
         }
     }''')
     schema_parser = SchemaParser(
         schema_filename=test_schema.strpath
     )
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[OrderedDict([
             ('a', 'b'),
             ('c', [OrderedDict([('d', 'e')])]),
         ])],
         schema_parser=schema_parser
     )
     parser.parse()
     assert list(parser.main_sheet) == [ 'a' ]
     assert parser.main_sheet.lines == [
         {'a': 'b'}
     ]
     assert len(parser.sub_sheets) == 1
     assert list(parser.sub_sheets['testB']) == list(['ocid', 'd', 'f'])
     assert parser.sub_sheets['testB'].lines == [{'d':'e'}]
예제 #45
0
def test_unflatten(convert_titles, use_schema, root_id, root_id_kwargs,
                   input_dict, expected_output_list, recwarn, comment,
                   warning_messages, reversible):
    extra_kwargs = {'convert_titles': convert_titles}
    extra_kwargs.update(root_id_kwargs)
    spreadsheet_input = ListInput(sheets=OrderedDict([
        (sheet_name, [inject_root_id(root_id, line) for line in lines])
        for sheet_name, lines in input_dict.items()
    ]),
                                  **extra_kwargs)
    spreadsheet_input.read_sheets()

    parser = SchemaParser(root_schema_dict=create_schema(root_id)
                          if use_schema else {"properties": {}},
                          root_id=root_id,
                          rollup=True)
    parser.parse()
    spreadsheet_input.parser = parser

    expected_output_list = [
        inject_root_id(root_id, expected_output_dict)
        for expected_output_dict in expected_output_list
    ]
    assert list(spreadsheet_input.unflatten()) == expected_output_list
def test_use_titles(recwarn):
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': {
                            'type': 'string',
                            'title': 'BTitle'
                        }
                    }
                }
            },
            'testC': {
                'type': 'string',
                'title': 'CTitle'
            }
        }
    }, use_titles=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['CTitle'])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid', 'BTitle']

    # Main sheet title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': {
                            'type': 'string',
                            'title': 'BTitle'
                        }
                    }
                }
            },
            'testC': {
                'type': 'string'
            }
        }
    }, use_titles=True)
    parser.parse()
    assert set(parser.main_sheet) == set([])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid', 'BTitle']
    w = recwarn.pop(UserWarning)
    assert 'does not have a title' in text_type(w.message)

    # Child sheet title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': {
                            'type': 'string'
                        }
                    }
                }
            },
            'testC': {
                'type': 'string',
                'title': 'CTitle'
            }
        }
    }, use_titles=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['CTitle'])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid']
    w = recwarn.pop(UserWarning)
    assert 'does not have a title' in text_type(w.message)
예제 #47
0
def test_use_titles2(recwarn, use_titles):
    # Object containing object title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'Xtest': {
                'type': 'object',
                'properties': {
                    'Atest': {
                        'type': 'object',
                        'title': 'ATitle',
                        'properties': {
                            'Btest': {
                                'type': 'string',
                                'title': 'BTitle'
                            }
                        }
                    }
                }
            },
            'Ctest': {
                'type': 'string',
                'title': 'CTitle'
            }
        }
    },
                          use_titles=use_titles)
    parser.parse()
    if use_titles:
        assert set(parser.main_sheet) == set(['CTitle'])
        assert set(parser.sub_sheets) == set([])
        assert len(recwarn) == 1
        w = recwarn.pop(UserWarning)
        assert 'Field Xtest/Atest/Btest does not have a title, skipping' in text_type(
            w.message)
    else:
        assert len(recwarn) == 0

    # Main sheet title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'Atest': {
                'title': 'ATitle',
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'Btest': {
                            'type': 'string',
                            'title': 'BTitle'
                        }
                    }
                }
            },
            'Ctest': {
                'type': 'string'
            }
        }
    },
                          use_titles=use_titles)
    parser.parse()
    if use_titles:
        assert set(parser.main_sheet) == set([])
        assert set(parser.sub_sheets) == set(['Atest'])
        assert list(parser.sub_sheets['Atest']) == ['ATitle:BTitle']
        assert len(recwarn) == 1
        w = recwarn.pop(UserWarning)
        assert 'Field Ctest does not have a title' in text_type(w.message)
    else:
        assert len(recwarn) == 0
예제 #48
0
def unflatten(input_name, base_json=None, input_format=None, output_name=None,
              root_list_path=None, root_is_list=False, encoding='utf8', timezone_name='UTC',
              root_id=None, schema='', convert_titles=False, cell_source_map=None,
              heading_source_map=None, id_name=None, xml=False,
              vertical_orientation=False,
              metatab_name=None, metatab_only=False, metatab_schema='',
              metatab_vertical_orientation=False,
              xml_schemas=None,
              default_configuration='',
              disable_local_refs=False,
              xml_comment=None,
              truncation_length=3,
              **_):
    """
    Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON).

    """
    if input_format is None:
        raise Exception('You must specify an input format (may autodetect in future')
    elif input_format not in INPUT_FORMATS:
        raise Exception('The requested format is not available')
    if metatab_name and base_json:
        raise Exception('Not allowed to use base_json with metatab')

    if root_is_list:
        base = None
    elif base_json:
        with open(base_json) as fp:
            base = json.load(fp, object_pairs_hook=OrderedDict)
    else:
        base = OrderedDict()


    base_configuration = parse_sheet_configuration(
        [item.strip() for item in default_configuration.split(",")]
    )

    cell_source_map_data = OrderedDict()
    heading_source_map_data = OrderedDict()

    if metatab_name and not root_is_list:
        spreadsheet_input_class = INPUT_FORMATS[input_format]
        spreadsheet_input = spreadsheet_input_class(
            input_name=input_name,
            timezone_name=timezone_name,
            root_list_path='meta',
            include_sheets=[metatab_name],
            convert_titles=convert_titles,
            vertical_orientation=metatab_vertical_orientation,
            id_name=id_name,
            xml=xml,
            use_configuration=False
        )
        if metatab_schema:
            parser = SchemaParser(schema_filename=metatab_schema, disable_local_refs=disable_local_refs)
            parser.parse()
            spreadsheet_input.parser = parser
        spreadsheet_input.encoding = encoding
        spreadsheet_input.read_sheets()
        result, cell_source_map_data_meta, heading_source_map_data_meta = spreadsheet_input.fancy_unflatten(
            with_cell_source_map=cell_source_map,
            with_heading_source_map=heading_source_map,
        )
        for key, value in (cell_source_map_data_meta or {}).items():
            ## strip off meta/0/ from start of source map as actually data is at top level
            cell_source_map_data[key[7:]] = value
        for key, value in (heading_source_map_data_meta or {}).items():
            ## strip off meta/ from start of source map as actually data is at top level
            heading_source_map_data[key[5:]] = value

        # update individual keys from base configuration
        base_configuration.update(spreadsheet_input.sheet_configuration.get(metatab_name, {}))

        if result:
            base.update(result[0])

    if root_list_path is None:
        root_list_path = base_configuration.get('RootListPath', 'main')
    if id_name is None:
        id_name = base_configuration.get('IDName', 'id')

    if not metatab_only or root_is_list:
        spreadsheet_input_class = INPUT_FORMATS[input_format]
        spreadsheet_input = spreadsheet_input_class(
            input_name=input_name,
            timezone_name=timezone_name,
            root_list_path=root_list_path,
            root_is_list=root_is_list,
            root_id=root_id,
            convert_titles=convert_titles,
            exclude_sheets=[metatab_name],
            vertical_orientation=vertical_orientation,
            id_name=id_name,
            xml=xml,
            base_configuration=base_configuration
        )
        if schema:
            parser = SchemaParser(schema_filename=schema, rollup=True, root_id=root_id,
                                  disable_local_refs=disable_local_refs, truncation_length=truncation_length)
            parser.parse()
            spreadsheet_input.parser = parser
        spreadsheet_input.encoding = encoding
        spreadsheet_input.read_sheets()
        result, cell_source_map_data_main, heading_source_map_data_main = spreadsheet_input.fancy_unflatten(
            with_cell_source_map=cell_source_map,
            with_heading_source_map=heading_source_map,
        )
        cell_source_map_data.update(cell_source_map_data_main or {})
        heading_source_map_data.update(heading_source_map_data_main or {})
        if root_is_list:
            base = list(result)
        else:
            base[root_list_path] = list(result)

    if xml:
        xml_root_tag = base_configuration.get('XMLRootTag', 'iati-activities')
        xml_output = toxml(
            base, xml_root_tag, xml_schemas=xml_schemas, root_list_path=root_list_path, xml_comment=xml_comment)
        if output_name is None:
            if sys.version > '3':
                sys.stdout.buffer.write(xml_output)
            else:
                sys.stdout.write(xml_output)
        else:
            with codecs.open(output_name, 'wb') as fp:
                fp.write(xml_output)
    else:
        if output_name is None:
            print(json.dumps(base, indent=4, default=decimal_default, ensure_ascii=False))
        else:
            with codecs.open(output_name, 'w', encoding='utf-8') as fp:
                json.dump(base, fp, indent=4, default=decimal_default, ensure_ascii=False)
    if cell_source_map:
        with codecs.open(cell_source_map, 'w', encoding='utf-8') as fp:
            json.dump(cell_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
    if heading_source_map:
        with codecs.open(heading_source_map, 'w', encoding='utf-8') as fp:
            json.dump(heading_source_map_data, fp, indent=4, default=decimal_default, ensure_ascii=False)
예제 #49
0
def flatten(input_name,
            schema=None,
            output_name=None,
            output_format='all',
            main_sheet_name='main',
            root_list_path='main',
            root_is_list=False,
            sheet_prefix='',
            filter_field=None,
            filter_value=None,
            preserve_fields=None,
            rollup=False,
            root_id=None,
            use_titles=False,
            xml=False,
            id_name='id',
            disable_local_refs=False,
            remove_empty_schema_columns=False,
            truncation_length=3,
            **_):
    """
    Flatten a nested structure (JSON) to a flat structure (spreadsheet - csv or xlsx).

    """

    if (filter_field is None
            and filter_value is not None) or (filter_field is not None
                                              and filter_value is None):
        raise Exception('You must use filter_field and filter_value together')

    if schema:
        schema_parser = SchemaParser(schema_filename=schema,
                                     rollup=rollup,
                                     root_id=root_id,
                                     use_titles=use_titles,
                                     disable_local_refs=disable_local_refs,
                                     truncation_length=truncation_length)
        schema_parser.parse()
    else:
        schema_parser = None

    parser = JSONParser(
        json_filename=input_name,
        root_list_path=None if root_is_list else root_list_path,
        schema_parser=schema_parser,
        rollup=rollup,
        root_id=root_id,
        use_titles=use_titles,
        xml=xml,
        id_name=id_name,
        filter_field=filter_field,
        filter_value=filter_value,
        preserve_fields=preserve_fields,
        remove_empty_schema_columns=remove_empty_schema_columns,
        truncation_length=truncation_length)
    parser.parse()

    def spreadsheet_output(spreadsheet_output_class, name):
        spreadsheet_output = spreadsheet_output_class(
            parser=parser,
            main_sheet_name=main_sheet_name,
            output_name=name,
            sheet_prefix=sheet_prefix)
        spreadsheet_output.write_sheets()

    if output_format == 'all':
        if not output_name:
            output_name = 'flattened'
        for format_name, spreadsheet_output_class in OUTPUT_FORMATS.items():
            spreadsheet_output(spreadsheet_output_class,
                               output_name + FORMATS_SUFFIX[format_name])

    elif output_format in OUTPUT_FORMATS.keys(
    ):  # in dictionary of allowed formats
        if not output_name:
            output_name = 'flattened' + FORMATS_SUFFIX[output_format]
        spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)

    else:
        raise Exception('The requested format is not available')
예제 #50
0
def unflatten(input_name,
              base_json=None,
              input_format=None,
              output_name=None,
              root_list_path=None,
              root_is_list=False,
              encoding='utf8',
              timezone_name='UTC',
              root_id=None,
              schema='',
              convert_titles=False,
              cell_source_map=None,
              heading_source_map=None,
              id_name=None,
              xml=False,
              vertical_orientation=False,
              metatab_name=None,
              metatab_only=False,
              metatab_schema='',
              metatab_vertical_orientation=False,
              xml_schemas=None,
              default_configuration='',
              disable_local_refs=False,
              xml_comment=None,
              truncation_length=3,
              **_):
    """
    Unflatten a flat structure (spreadsheet - csv or xlsx) into a nested structure (JSON).

    """
    if input_format is None:
        raise Exception(
            'You must specify an input format (may autodetect in future')
    elif input_format not in INPUT_FORMATS:
        raise Exception('The requested format is not available')
    if metatab_name and base_json:
        raise Exception('Not allowed to use base_json with metatab')

    if root_is_list:
        base = None
    elif base_json:
        with open(base_json) as fp:
            base = json.load(fp, object_pairs_hook=OrderedDict)
    else:
        base = OrderedDict()

    base_configuration = parse_sheet_configuration(
        [item.strip() for item in default_configuration.split(",")])

    cell_source_map_data = OrderedDict()
    heading_source_map_data = OrderedDict()

    if metatab_name and not root_is_list:
        spreadsheet_input_class = INPUT_FORMATS[input_format]
        spreadsheet_input = spreadsheet_input_class(
            input_name=input_name,
            timezone_name=timezone_name,
            root_list_path='meta',
            include_sheets=[metatab_name],
            convert_titles=convert_titles,
            vertical_orientation=metatab_vertical_orientation,
            id_name=id_name,
            xml=xml,
            use_configuration=False)
        if metatab_schema:
            parser = SchemaParser(schema_filename=metatab_schema,
                                  disable_local_refs=disable_local_refs)
            parser.parse()
            spreadsheet_input.parser = parser
        spreadsheet_input.encoding = encoding
        spreadsheet_input.read_sheets()
        result, cell_source_map_data_meta, heading_source_map_data_meta = spreadsheet_input.fancy_unflatten(
            with_cell_source_map=cell_source_map,
            with_heading_source_map=heading_source_map,
        )
        for key, value in (cell_source_map_data_meta or {}).items():
            ## strip off meta/0/ from start of source map as actually data is at top level
            cell_source_map_data[key[7:]] = value
        for key, value in (heading_source_map_data_meta or {}).items():
            ## strip off meta/ from start of source map as actually data is at top level
            heading_source_map_data[key[5:]] = value

        # update individual keys from base configuration
        base_configuration.update(
            spreadsheet_input.sheet_configuration.get(metatab_name, {}))

        if result:
            base.update(result[0])

    if root_list_path is None:
        root_list_path = base_configuration.get('RootListPath', 'main')
    if id_name is None:
        id_name = base_configuration.get('IDName', 'id')

    if not metatab_only or root_is_list:
        spreadsheet_input_class = INPUT_FORMATS[input_format]
        spreadsheet_input = spreadsheet_input_class(
            input_name=input_name,
            timezone_name=timezone_name,
            root_list_path=root_list_path,
            root_is_list=root_is_list,
            root_id=root_id,
            convert_titles=convert_titles,
            exclude_sheets=[metatab_name],
            vertical_orientation=vertical_orientation,
            id_name=id_name,
            xml=xml,
            base_configuration=base_configuration)
        if schema:
            parser = SchemaParser(schema_filename=schema,
                                  rollup=True,
                                  root_id=root_id,
                                  disable_local_refs=disable_local_refs,
                                  truncation_length=truncation_length)
            parser.parse()
            spreadsheet_input.parser = parser
        spreadsheet_input.encoding = encoding
        spreadsheet_input.read_sheets()
        result, cell_source_map_data_main, heading_source_map_data_main = spreadsheet_input.fancy_unflatten(
            with_cell_source_map=cell_source_map,
            with_heading_source_map=heading_source_map,
        )
        cell_source_map_data.update(cell_source_map_data_main or {})
        heading_source_map_data.update(heading_source_map_data_main or {})
        if root_is_list:
            base = list(result)
        else:
            base[root_list_path] = list(result)

    if xml:
        xml_root_tag = base_configuration.get('XMLRootTag', 'iati-activities')
        xml_output = toxml(base,
                           xml_root_tag,
                           xml_schemas=xml_schemas,
                           root_list_path=root_list_path,
                           xml_comment=xml_comment)
        if output_name is None:
            sys.stdout.buffer.write(xml_output)
        else:
            with codecs.open(output_name, 'wb') as fp:
                fp.write(xml_output)
    else:
        if output_name is None:
            print(
                json.dumps(base,
                           indent=4,
                           default=decimal_default,
                           ensure_ascii=False))
        else:
            with codecs.open(output_name, 'w', encoding='utf-8') as fp:
                json.dump(base,
                          fp,
                          indent=4,
                          default=decimal_default,
                          ensure_ascii=False)
    if cell_source_map:
        with codecs.open(cell_source_map, 'w', encoding='utf-8') as fp:
            json.dump(cell_source_map_data,
                      fp,
                      indent=4,
                      default=decimal_default,
                      ensure_ascii=False)
    if heading_source_map:
        with codecs.open(heading_source_map, 'w', encoding='utf-8') as fp:
            json.dump(heading_source_map_data,
                      fp,
                      indent=4,
                      default=decimal_default,
                      ensure_ascii=False)
예제 #51
0
def test_use_titles(recwarn):
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': {
                            'type': 'string',
                            'title': 'BTitle'
                        }
                    }
                }
            },
            'testC': {
                'type': 'string',
                'title': 'CTitle'
            }
        }
    },
                          use_titles=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['CTitle'])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid', 'BTitle']

    # Main sheet title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': {
                            'type': 'string',
                            'title': 'BTitle'
                        }
                    }
                }
            },
            'testC': {
                'type': 'string'
            }
        }
    },
                          use_titles=True)
    parser.parse()
    assert set(parser.main_sheet) == set([])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid', 'BTitle']
    w = recwarn.pop(UserWarning)
    assert 'does not have a title' in text_type(w.message)

    # Child sheet title missing
    parser = SchemaParser(root_schema_dict={
        'properties': {
            'testA': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'properties': {
                        'testB': {
                            'type': 'string'
                        }
                    }
                }
            },
            'testC': {
                'type': 'string',
                'title': 'CTitle'
            }
        }
    },
                          use_titles=True)
    parser.parse()
    assert set(parser.main_sheet) == set(['CTitle'])
    assert set(parser.sub_sheets) == set(['testA'])
    assert list(parser.sub_sheets['testA']) == ['ocid']
    w = recwarn.pop(UserWarning)
    assert 'does not have a title' in text_type(w.message)
예제 #52
0
 def test_two_parents(self):
     # This is a copy of test_two_parents from test_schema_parser.py, in
     # order to check that flattening and template generation use the same
     # sheet names
     schema_parser = SchemaParser(
         root_schema_dict={
             "properties": OrderedDict(
                 [
                     (
                         "Atest",
                         {
                             "type": "array",
                             "items": {
                                 "type": "object",
                                 "properties": object_in_array_example_properties(
                                     "Btest", "Ctest"
                                 ),
                             },
                         },
                     ),
                     (
                         "Dtest",
                         {
                             "type": "array",
                             "items": {
                                 "type": "object",
                                 "properties": object_in_array_example_properties(
                                     "Btest", "Etest"
                                 ),
                             },
                         },
                     ),
                 ]
             )
         }
     )
     schema_parser.parse()
     parser = JSONParser(
         root_json_dict=[
             {
                 "Atest": [{"id": 1, "Btest": [{"Ctest": 2}]}],
                 "Dtest": [{"id": 3, "Btest": [{"Etest": 4}]}],
             }
         ],
         schema_parser=schema_parser,
     )
     parser.parse()
     assert set(parser.main_sheet) == set()
     assert set(parser.sub_sheets) == set(
         ["Atest", "Dtest", "Ate_Btest", "Dte_Btest"]
     )
     assert list(parser.sub_sheets["Atest"]) == ["Atest/0/id"]
     assert list(parser.sub_sheets["Dtest"]) == ["Dtest/0/id"]
     assert list(parser.sub_sheets["Ate_Btest"]) == [
         "Atest/0/id",
         "Atest/0/Btest/0/Ctest",
     ]
     assert list(parser.sub_sheets["Dte_Btest"]) == [
         "Dtest/0/id",
         "Dtest/0/Btest/0/Etest",
     ]