Example #1
0
def get_default_yaml_parsers(
        parser_finder: ParserFinder,
        conversion_finder: ConversionFinder) -> List[AnyParser]:
    """
    Utility method to return the default parsers able to parse an object from a file.
    Note that MultifileObjectParser is not provided in this list, as it is already added in a hardcoded way in
    RootParser
    :return:
    """
    return [  # yaml for any object
        SingleFileParserFunction(
            parser_function=read_object_from_yaml,
            streaming_mode=True,
            supported_exts={'.yaml', '.yml'},
            supported_types={AnyObject},
        ),
        # yaml for collection objects
        SingleFileParserFunction(
            parser_function=read_object_from_yaml,
            custom_name='read_collection_from_yaml',
            streaming_mode=True,
            supported_exts={'.yaml', '.yml'},
            supported_types={Tuple, Dict, List, Set},
        )
    ]
def get_default_primitive_parsers():
    return [
        SingleFileParserFunction(parser_function=read_str_from_txt,
                                 streaming_mode=True,
                                 supported_exts={'.txt'},
                                 supported_types={str})
    ]
    def test_parse_with_custom_parser(self):
        """
        Parses a subclass of DataFrame with a custom converter.
        :return:
        """

        from typing import Type
        from parsyfiles.converting_core import T
        from logging import Logger
        from xml.etree.ElementTree import ElementTree, parse, tostring

        def read_xml(desired_type: Type[T], file_path: str, encoding: str,
                     logger: Logger, **kwargs):
            """
            Opens an XML file and returns the tree parsed from it as an ElementTree.

            :param desired_type:
            :param file_path:
            :param encoding:
            :param logger:
            :param kwargs:
            :return:
            """
            return parse(file_path)

        my_parser = SingleFileParserFunction(parser_function=read_xml,
                                             streaming_mode=False,
                                             supported_exts={'.xml'},
                                             supported_types={ElementTree})

        parser = RootParser('parsyfiles with timeseries')
        parser.register_parser(my_parser)
        xmls = parser.parse_collection(fix_path('./xml_collection'),
                                       ElementTree)
        pprint({name: tostring(x.getroot()) for name, x in xmls.items()})
Example #4
0
def get_default_pandas_parsers() -> List[AnyParser]:
    """
    Utility method to return the default parsers able to parse a dictionary from a file.
    :return:
    """
    return [
        SingleFileParserFunction(parser_function=read_dataframe_from_xls,
                                 streaming_mode=False,
                                 supported_exts={'.xls', '.xlsx', '.xlsm'},
                                 supported_types={pd.DataFrame},
                                 option_hints=pandas_parsers_option_hints_xls),
        SingleFileParserFunction(parser_function=read_df_or_series_from_csv,
                                 streaming_mode=False,
                                 supported_exts={'.csv', '.txt'},
                                 supported_types={pd.DataFrame, pd.Series},
                                 option_hints=pandas_parsers_option_hints_csv),
    ]
Example #5
0
def get_default_config_parsers() -> List[AnyParser]:
    """
    Utility method to return the default parsers able to parse a dictionary from a file.
    :return:
    """
    return [SingleFileParserFunction(parser_function=read_config,
                                     streaming_mode=True,
                                     supported_exts={'.cfg', '.ini'},
                                     supported_types={ConfigParser}),
            ]
Example #6
0
def get_default_object_parsers(parser_finder: ParserFinder, conversion_finder: ConversionFinder) -> List[AnyParser]:
    """
    Utility method to return the default parsers able to parse an object from a file.
    Note that MultifileObjectParser is not provided in this list, as it is already added in a hardcoded way in
    RootParser
    :return:
    """
    return [SingleFileParserFunction(parser_function=read_object_from_pickle,
                                     streaming_mode=False,
                                     supported_exts={'.pyc'},
                                     supported_types={AnyObject}),
            MultifileObjectParser(parser_finder, conversion_finder)
            ]
def get_default_jprops_parsers(parser_finder: ParserFinder, conversion_finder: ConversionFinder) -> List[AnyParser]:
    """
    Utility method to return the default parsers able to parse a dictionary from a properties file.
    :return:
    """
    return [SingleFileParserFunction(parser_function=read_dict_from_properties,
                                     streaming_mode=True, custom_name='read_dict_from_properties',
                                     supported_exts={'.properties', '.txt'},
                                     supported_types={dict},
                                     function_args={'conversion_finder': conversion_finder}),
            # SingleFileParserFunction(parser_function=read_list_from_properties,
            #                          streaming_mode=True,
            #                          supported_exts={'.properties', '.txt'},
            #                          supported_types={list}),
        ]
Example #8
0
def get_default_collection_parsers(
        parser_finder: ParserFinder,
        conversion_finder: ConversionFinder) -> List[AnyParser]:
    """
    Utility method to return the default parsers able to parse a dictionary from a file.
    :return:
    """
    return [
        SingleFileParserFunction(
            parser_function=read_dict_or_list_from_json,
            streaming_mode=True,
            custom_name='read_dict_or_list_from_json',
            supported_exts={'.json'},
            supported_types={dict, list},
            function_args={'conversion_finder': conversion_finder}),
        MultifileCollectionParser(parser_finder)
    ]
Example #9
0
def test_custom_parser_ok_for_subclasses():
    """
    Tests that if you register a custom parser for a subclass of A, it gets correctly used to parse A (in non-strict
    mode, which is the default)
    :return:
    """
    root_parser = RootParser()

    class A:
        def __init__(self, txt):
            self.txt = txt

    class B(A):
        """ a subclass of A """
        pass

    def read_B_from_txt(desired_type: Type[dict], file_object: TextIOBase,
                        logger: Logger, *args, **kwargs) -> str:
        # read the entire stream into a string
        str_io = StringIO()
        shutil.copyfileobj(file_object, str_io)
        # only return the first character
        return B(str_io.getvalue()[0])

    # before registering a parser for B, only generic parsers are able to parse a A
    before_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert list(before_capa.keys()) == ['3_generic']

    # register a parser for B
    root_parser.register_parser(
        SingleFileParserFunction(parser_function=read_B_from_txt,
                                 streaming_mode=True,
                                 supported_exts={'.txt'},
                                 supported_types={B}))

    # after registering the new parser appears in the list able to parse A
    after_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert str(after_capa['2_approx_match'][0]) == '<read_B_from_txt>'

    a = root_parser.parse_item(get_path('b64pickle-float-1.0=True'), A)
    # check that the custom parser was used, not the generic 'construct from string'
    assert len(a.txt) == 1
    assert a.txt == 'g'
Example #10
0
    def setUp(self):
        # 2 simple classes with inheritance
        class A(object):
            pass

        class B(A):
            pass

        # 2 generic classes with inheritance
        Y = TypeVar('Y')
        Z = TypeVar('Z')

        class C(Generic[Y]):
            pass

        class D(Generic[Z, Y], C[Y]):
            pass

        self.A = A
        self.B = B
        self.C = C
        self.D = D
        self.all_types = {A, B, C, D, AnyObject}

        self.all_parsers_for_a = set()
        self.all_parsers_for_b = set()
        self.all_parsers_for_c = set()
        self.all_parsers_for_d = set()
        self.all_parsers_generic = set()

        # create all combinations of file extensions
        self.all_a_extensions = []
        all_b_extensions = []
        all_c_extensions = []
        all_d_extensions = []
        self.all_extensions = {MULTIFILE_EXT}
        for a in [False, True]:
            for b in [False, True]:
                for c in [False, True]:
                    for d in [False, True]:
                        ext = '.'
                        if a:
                            ext += 'a'
                        if b:
                            ext += 'b'
                        if c:
                            ext += 'c'
                        if d:
                            ext += 'd'
                        print(ext)
                        if a or b or c or d:
                            self.all_extensions.add(ext)
                        if a:
                            self.all_a_extensions.append(ext)
                        if b:
                            all_b_extensions.append(ext)
                        if c:
                            all_c_extensions.append(ext)
                        if d:
                            all_d_extensions.append(ext)

        # parsers

        # *********** Specific SingleFile ************
        # -- one type
        def parse_a():
            pass

        def parse_b():
            pass

        def parse_c():
            pass

        def parse_d():
            pass

        a_parser_single = SingleFileParserFunction(parse_a,
                                                   supported_types={A},
                                                   supported_exts=set(
                                                       self.all_a_extensions))
        b_parser_single = SingleFileParserFunction(
            parse_b, supported_types={B}, supported_exts=set(all_b_extensions))
        c_parser_single = SingleFileParserFunction(
            parse_c, supported_types={C}, supported_exts=set(all_c_extensions))
        d_parser_single = SingleFileParserFunction(
            parse_d, supported_types={D}, supported_exts=set(all_d_extensions))
        parsers_specific_singlefile_onetype = [
            a_parser_single, b_parser_single, c_parser_single, d_parser_single
        ]
        self.all_parsers_for_a.add(a_parser_single)
        self.all_parsers_for_b.add(b_parser_single)
        self.all_parsers_for_c.add(c_parser_single)
        self.all_parsers_for_d.add(d_parser_single)

        # -- several types
        def parse_a_or_d():
            pass

        def parse_b_or_c():
            pass

        ad_parser_single = SingleFileParserFunction(
            parse_a_or_d,
            supported_types={A, D},
            supported_exts=set(self.all_a_extensions).union(all_d_extensions))
        bc_parser_single = SingleFileParserFunction(
            parse_b_or_c,
            supported_types={B, C},
            supported_exts=set(all_b_extensions).union(all_c_extensions))
        parsers_specific_singlefile_severaltypes = [
            ad_parser_single, bc_parser_single
        ]
        self.all_parsers_for_a.add(ad_parser_single)
        self.all_parsers_for_b.add(bc_parser_single)
        self.all_parsers_for_c.add(bc_parser_single)
        self.all_parsers_for_d.add(ad_parser_single)

        # ******** Specific Multifile ************
        class DummyMultifileParser(MultiFileParser):
            def __init__(self, supported_types):
                super(DummyMultifileParser, self).__init__(supported_types)

            def __str__(self):
                return 'MultifileParser for ' + str([get_pretty_type_str(typ) for typ in self.supported_types]) \
                       + 'for ' + str(self.supported_exts)

            def _get_parsing_plan_for_multifile_children(
                    self, obj_on_fs: PersistedObject, desired_type: Type[T],
                    logger: Logger) -> Dict[str, _BaseParsingPlan[Any]]:
                pass

            def _parse_multifile(self, desired_type: Type[T],
                                 obj: PersistedObject,
                                 parsing_plan_for_children: Dict[
                                     str, AnyParser._RecursiveParsingPlan],
                                 logger: Logger,
                                 options: Dict[str, Dict[str, Any]]) -> T:
                pass

        # -- one type
        a_parser_multi = DummyMultifileParser(supported_types={A})
        b_parser_multi = DummyMultifileParser(supported_types={B})
        c_parser_multi = DummyMultifileParser(supported_types={C})
        d_parser_multi = DummyMultifileParser(supported_types={D})
        parsers_specific_multifile_onetype = [
            a_parser_multi, b_parser_multi, c_parser_multi, d_parser_multi
        ]
        self.all_parsers_for_a.add(a_parser_multi)
        self.all_parsers_for_b.add(b_parser_multi)
        self.all_parsers_for_c.add(c_parser_multi)
        self.all_parsers_for_d.add(d_parser_multi)

        # -- several types
        bd_parser_multi = DummyMultifileParser(supported_types={B, D})
        ac_parser_multi = DummyMultifileParser(supported_types={A, C})
        parsers_specific_multifile_severaltypes = [
            bd_parser_multi, ac_parser_multi
        ]
        self.all_parsers_for_a.add(ac_parser_multi)
        self.all_parsers_for_b.add(bd_parser_multi)
        self.all_parsers_for_c.add(ac_parser_multi)
        self.all_parsers_for_d.add(bd_parser_multi)

        # ******** Specific BOTH **************
        class DummyParser(AnyParser):
            def __init__(self, supported_types, supported_exts):
                super(DummyParser,
                      self).__init__(supported_types=supported_types,
                                     supported_exts=supported_exts)

            def _get_parsing_plan_for_multifile_children(
                    self, obj_on_fs: PersistedObject, desired_type: Type[T],
                    logger: Logger) -> Dict[str, _BaseParsingPlan[Any]]:
                pass

            def _parse_multifile(self, desired_type: Type[T],
                                 obj: PersistedObject,
                                 parsing_plan_for_children: Dict[
                                     str, AnyParser._RecursiveParsingPlan],
                                 logger: Logger,
                                 options: Dict[str, Dict[str, Any]]) -> T:
                pass

            def _parse_singlefile(self, desired_type: Type[T], file_path: str,
                                  encoding: str, logger: Logger,
                                  options: Dict[str, Dict[str, Any]]) -> T:
                pass

        # -- one type
        a_parser_both = DummyParser(supported_types={A},
                                    supported_exts=set(self.all_a_extensions))
        b_parser_both = DummyParser(supported_types={B},
                                    supported_exts=set(all_b_extensions))
        c_parser_both = DummyParser(supported_types={C},
                                    supported_exts=set(all_c_extensions))
        d_parser_both = DummyParser(supported_types={D},
                                    supported_exts=set(all_d_extensions))
        parsers_specific_bothfile_onetype = [
            a_parser_both, b_parser_both, c_parser_both, d_parser_both
        ]
        self.all_parsers_for_a.add(a_parser_both)
        self.all_parsers_for_b.add(b_parser_both)
        self.all_parsers_for_c.add(c_parser_both)
        self.all_parsers_for_d.add(d_parser_both)

        # -- several types
        abc_parser_both = DummyParser(
            supported_types={A, B, C},
            supported_exts=set(self.all_a_extensions).union(
                all_b_extensions).union(all_c_extensions))
        acd_parser_both = DummyParser(
            supported_types={A, C, D},
            supported_exts=set(self.all_a_extensions).union(
                all_c_extensions).union(all_d_extensions))
        parsers_specific_bothfile_severaltypes = [
            abc_parser_both, acd_parser_both
        ]
        self.all_parsers_for_a = self.all_parsers_for_a.union(
            {abc_parser_both, acd_parser_both})
        self.all_parsers_for_b.add(abc_parser_both)
        self.all_parsers_for_c = self.all_parsers_for_c.union(
            {abc_parser_both, acd_parser_both})
        self.all_parsers_for_d.add(acd_parser_both)

        # ******** GENERIC *******
        def parse_any():
            pass

        any_parser_singlefile = SingleFileParserFunction(
            parse_any,
            supported_types={AnyObject},
            supported_exts=set(all_d_extensions))
        parsers_generic_singlefile = [any_parser_singlefile]

        any_parser_multifile = DummyMultifileParser(
            supported_types={AnyObject})
        parsers_generic_multifile = [any_parser_multifile]

        any_parser_bothfile = DummyParser(supported_types={AnyObject},
                                          supported_exts=set(
                                              self.all_a_extensions))
        parsers_generic_bothfile = [any_parser_bothfile]

        self.all_parsers_generic = self.all_parsers_generic.union(set(parsers_generic_singlefile)) \
                                                            .union(set(parsers_generic_multifile)) \
                                                            .union(set(parsers_generic_bothfile))

        self.all_parsers_lists = [
            parsers_specific_singlefile_onetype,
            parsers_specific_singlefile_severaltypes,
            parsers_specific_multifile_onetype,
            parsers_specific_multifile_severaltypes,
            parsers_specific_bothfile_onetype,
            parsers_specific_bothfile_severaltypes, parsers_generic_singlefile,
            parsers_generic_multifile, parsers_generic_bothfile
        ]

        self.all_parsers = {
            A: self.all_parsers_for_a,
            B: self.all_parsers_for_b,
            C: self.all_parsers_for_c,
            D: self.all_parsers_for_d,
            AnyObject: self.all_parsers_generic
        }
Example #11
0
    def setUp(self):
        """
        This setup function defines
        * the classes that will be used in the whole test class.
        * file extensions associated to one or several classes
        * parsers

        :return:
        """
        # defines the classes
        A, B, C, D = self.define_types()
        self.A, self.B, self.C, self.D, = A, B, C, D
        self.all_types = {A, B, C, D, AnyObject}

        # create all combinations of file extensions
        all_a_exts, all_b_exts, all_c_exts, all_d_exts, all_extensions = self.define_file_extensions(
        )
        self.all_extensions = all_extensions

        # defines the parsers
        self.all_parsers_for_a = set()
        self.all_parsers_for_b = set()
        self.all_parsers_for_c = set()
        self.all_parsers_for_d = set()
        self.all_parsers_generic = set()

        # *********** Specific SingleFile ************
        # -- one type
        def parse_a():
            pass

        def parse_b():
            pass

        def parse_c():
            pass

        def parse_d():
            pass

        a_parser_single = SingleFileParserFunction(
            parse_a, supported_types={A}, supported_exts=set(all_a_exts))
        b_parser_single = SingleFileParserFunction(
            parse_b, supported_types={B}, supported_exts=set(all_b_exts))
        c_parser_single = SingleFileParserFunction(
            parse_c, supported_types={C}, supported_exts=set(all_c_exts))
        d_parser_single = SingleFileParserFunction(
            parse_d, supported_types={D}, supported_exts=set(all_d_exts))
        parsers_specific_singlefile_onetype = [
            a_parser_single, b_parser_single, c_parser_single, d_parser_single
        ]
        self.all_parsers_for_a.add(a_parser_single)
        self.all_parsers_for_b.add(b_parser_single)
        self.all_parsers_for_c.add(c_parser_single)
        self.all_parsers_for_d.add(d_parser_single)

        # -- several types
        def parse_a_or_d():
            pass

        def parse_b_or_c():
            pass

        ad_parser_single = SingleFileParserFunction(
            parse_a_or_d,
            supported_types={A, D},
            supported_exts=set(all_a_exts).union(all_d_exts))
        bc_parser_single = SingleFileParserFunction(
            parse_b_or_c,
            supported_types={B, C},
            supported_exts=set(all_b_exts).union(all_c_exts))
        parsers_specific_singlefile_severaltypes = [
            ad_parser_single, bc_parser_single
        ]
        self.all_parsers_for_a.add(ad_parser_single)
        self.all_parsers_for_b.add(bc_parser_single)
        self.all_parsers_for_c.add(bc_parser_single)
        self.all_parsers_for_d.add(ad_parser_single)

        # ******** Specific Multifile ************
        class DummyMultifileParser(MultiFileParser):
            def __init__(self, supported_types):
                super(DummyMultifileParser, self).__init__(supported_types)

            def __str__(self):
                return 'MultifileParser for ' + str([get_pretty_type_str(typ) for typ in self.supported_types]) \
                       + 'for ' + str(self.supported_exts)

            def _get_parsing_plan_for_multifile_children(
                    self, obj_on_fs: PersistedObject, desired_type: Type[T],
                    logger: Logger) -> Dict[str, _BaseParsingPlan[Any]]:
                pass

            def _parse_multifile(self, desired_type: Type[T],
                                 obj: PersistedObject,
                                 parsing_plan_for_children: Dict[
                                     str, AnyParser._RecursiveParsingPlan],
                                 logger: Logger,
                                 options: Dict[str, Dict[str, Any]]) -> T:
                pass

        # -- one type
        a_parser_multi = DummyMultifileParser(supported_types={A})
        b_parser_multi = DummyMultifileParser(supported_types={B})
        c_parser_multi = DummyMultifileParser(supported_types={C})
        d_parser_multi = DummyMultifileParser(supported_types={D})
        parsers_specific_multifile_onetype = [
            a_parser_multi, b_parser_multi, c_parser_multi, d_parser_multi
        ]
        self.all_parsers_for_a.add(a_parser_multi)
        self.all_parsers_for_b.add(b_parser_multi)
        self.all_parsers_for_c.add(c_parser_multi)
        self.all_parsers_for_d.add(d_parser_multi)

        # -- several types
        bd_parser_multi = DummyMultifileParser(supported_types={B, D})
        ac_parser_multi = DummyMultifileParser(supported_types={A, C})
        parsers_specific_multifile_severaltypes = [
            bd_parser_multi, ac_parser_multi
        ]
        self.all_parsers_for_a.add(ac_parser_multi)
        self.all_parsers_for_b.add(bd_parser_multi)
        self.all_parsers_for_c.add(ac_parser_multi)
        self.all_parsers_for_d.add(bd_parser_multi)

        # ******** Specific BOTH **************
        class DummyParser(AnyParser):
            def __init__(self, supported_types, supported_exts):
                super(DummyParser,
                      self).__init__(supported_types=supported_types,
                                     supported_exts=supported_exts)

            def _get_parsing_plan_for_multifile_children(
                    self, obj_on_fs: PersistedObject, desired_type: Type[T],
                    logger: Logger) -> Dict[str, _BaseParsingPlan[Any]]:
                pass

            def _parse_multifile(self, desired_type: Type[T],
                                 obj: PersistedObject,
                                 parsing_plan_for_children: Dict[
                                     str, AnyParser._RecursiveParsingPlan],
                                 logger: Logger,
                                 options: Dict[str, Dict[str, Any]]) -> T:
                pass

            def _parse_singlefile(self, desired_type: Type[T], file_path: str,
                                  encoding: str, logger: Logger,
                                  options: Dict[str, Dict[str, Any]]) -> T:
                pass

        # -- one type
        a_parser_both = DummyParser(supported_types={A},
                                    supported_exts=set(all_a_exts))
        b_parser_both = DummyParser(supported_types={B},
                                    supported_exts=set(all_b_exts))
        c_parser_both = DummyParser(supported_types={C},
                                    supported_exts=set(all_c_exts))
        d_parser_both = DummyParser(supported_types={D},
                                    supported_exts=set(all_d_exts))
        parsers_specific_bothfile_onetype = [
            a_parser_both, b_parser_both, c_parser_both, d_parser_both
        ]
        self.all_parsers_for_a.add(a_parser_both)
        self.all_parsers_for_b.add(b_parser_both)
        self.all_parsers_for_c.add(c_parser_both)
        self.all_parsers_for_d.add(d_parser_both)

        # -- several types
        abc_parser_both = DummyParser(
            supported_types={A, B, C},
            supported_exts=set(all_a_exts).union(all_b_exts).union(all_c_exts))
        acd_parser_both = DummyParser(
            supported_types={A, C, D},
            supported_exts=set(all_a_exts).union(all_c_exts).union(all_d_exts))
        parsers_specific_bothfile_severaltypes = [
            abc_parser_both, acd_parser_both
        ]
        self.all_parsers_for_a = self.all_parsers_for_a.union(
            {abc_parser_both, acd_parser_both})
        self.all_parsers_for_b.add(abc_parser_both)
        self.all_parsers_for_c = self.all_parsers_for_c.union(
            {abc_parser_both, acd_parser_both})
        self.all_parsers_for_d.add(acd_parser_both)

        # ******** GENERIC *******
        def parse_any():
            pass

        any_parser_singlefile = SingleFileParserFunction(
            parse_any,
            supported_types={AnyObject},
            supported_exts=set(all_d_exts))
        parsers_generic_singlefile = [any_parser_singlefile]

        any_parser_multifile = DummyMultifileParser(
            supported_types={AnyObject})
        parsers_generic_multifile = [any_parser_multifile]

        any_parser_bothfile = DummyParser(supported_types={AnyObject},
                                          supported_exts=set(all_a_exts))
        parsers_generic_bothfile = [any_parser_bothfile]

        self.all_parsers_generic = self.all_parsers_generic.union(set(parsers_generic_singlefile)) \
                                                           .union(set(parsers_generic_multifile)) \
                                                           .union(set(parsers_generic_bothfile))

        self.all_parsers_lists = [
            parsers_specific_singlefile_onetype,
            parsers_specific_singlefile_severaltypes,
            parsers_specific_multifile_onetype,
            parsers_specific_multifile_severaltypes,
            parsers_specific_bothfile_onetype,
            parsers_specific_bothfile_severaltypes, parsers_generic_singlefile,
            parsers_generic_multifile, parsers_generic_bothfile
        ]

        self.all_parsers = {
            A: self.all_parsers_for_a,
            B: self.all_parsers_for_b,
            C: self.all_parsers_for_c,
            D: self.all_parsers_for_d,
            AnyObject: self.all_parsers_generic
        }
    def test_old_demo(self):
        # In this demonstrative example, we will parse 'test cases' for an imaginary function that performs operations :
        # op_function(a:int, b:int, operation:str = '+') -> int
        #
        # Each of our 'test case' items will be made of several things:
        # * mandatory input data (here, a and b)
        # * optional configuration (here, operation)
        # * mandatory expected result (here, the output)

        # We would like these things stored in four separate files. Typically the reason is that you will want to
        # separate the various formats that you wish to use: csv, xml, json...
        # So our data folder structure looks like this:

        # test_cases
        # ├── case1
        # │   ├── input_a.txt
        # │   ├── input_b.txt
        # │   └── output.txt
        # ├── case2
        # │   ├── input_a.txt
        # │   ├── input_b.txt
        # │   ├── options.txt
        # │   └── output.txt
        # └── case3
        # ├── input_a.txt
        # ├── input_b.txt
        # ├── options.cfg
        # └── output.txt

        # Note that the configuration file is optional. Here, only `case2` and `case3` will have a non-default configuration.
        # You may also have noticed that the configuration file is present with two different extensions :
        # `.txt` (in case2) and `.cfg` (in case3). This framework allows to register several file extensions for the
        # same type of object to parse. Each extension may have its own parser function.

        # First import the package and create a root parser.

        # for test : we dont register the default parsers, just to check
        root_parser = RootParser('parsyfiles defaults+test custom functions',
                                 register_default_parsers=False)

        # install multifile support at least
        root_parser.install_basic_multifile_support()

        # Then register a parser function for all items that will be represented as **single** files.
        # * In this example, all inputs and outputs are `int` so we create a first function to parse an int from a text file:

        #from io import TextIOBase
        def test_parse_int_file(desired_type: Type[int],
                                file_object: TextIOBase,
                                logger: logging.Logger, *args,
                                **kwargs) -> int:
            integer_str = file_object.readline()
            return int(integer_str)

        root_parser.register_parser(
            SingleFileParserFunction(test_parse_int_file,
                                     streaming_mode=True,
                                     supported_types={int},
                                     supported_exts={'.txt'}))

        # Note that the parsing framework automatically opens and closes the file for you, even in case of exception.

        # We also need to be able to read an `configuration`, that is a `Dict[str, str]` in our case. We propose two formats:
        # * one `.txt` format where the first row will directly contain the value for the operation, and
        # * one `.cfg` format where the configuration will be available in the configparser format.

        #from typing import Dict
        class OpConfig(dict):
            """
            An OpConfig object is a Dict[str, str] object
            """
            def __init__(self, config: Dict[str, str]):
                check_var(config, var_types=dict, var_name='config')
                super(OpConfig, self).__init__()
                self.__wrapped_impl = config

                # here you may wish to perform additional checks on the wrapped object
                unrecognized = set(config.keys()) - {'operation'}
                if len(unrecognized) > 0:
                    raise ValueError('Unrecognized options : ' +
                                     str(unrecognized))

            def __getitem__(self, item):
                return self.__wrapped_impl.__getitem__(item)

            def __setitem__(self, key, value):
                return self.__wrapped_impl.__setitem__(key, value)

            # Delegate all calls to the implementation:
            def __getattr__(self, name):
                return getattr(self.__wrapped_impl, name)

        def test_parse_configuration_txt_file(desired_type: Type[Dict],
                                              file_object: TextIOBase,
                                              logger: logging.Logger, *args,
                                              **kwargs) -> Dict[str, str]:
            return OpConfig({'operation': file_object.readline()})

        def test_parse_configuration_cfg_file(desired_type: Type[Dict],
                                              file_object: TextIOBase,
                                              logger: logging.Logger, *args,
                                              **kwargs) -> Dict[str, str]:
            import configparser
            config = configparser.ConfigParser()
            config.read_file(file_object)
            return OpConfig(dict(config['main'].items()))

        root_parser.register_parser(
            SingleFileParserFunction(test_parse_configuration_txt_file,
                                     streaming_mode=True,
                                     supported_types={OpConfig},
                                     supported_exts={'.txt'}))
        root_parser.register_parser(
            SingleFileParserFunction(test_parse_configuration_cfg_file,
                                     streaming_mode=True,
                                     supported_types={OpConfig},
                                     supported_exts={'.cfg'}))

        # Finally we define the 'test case' objects
        class OpTestCase(object):
            def __init__(self,
                         input_a: int,
                         input_b: int,
                         output: int,
                         options: OpConfig = None):
                self.input_a, self.input_b, self.output = input_a, input_b, output
                if options is None:
                    self.op = '+'
                else:
                    self.op = options['operation']

            def __str__(self):
                return self.__repr__()

            def __repr__(self):
                return str(self.input_a) + ' ' + self.op + ' ' + str(
                    self.input_b) + ' =? ' + str(self.output)

        # And we parse a collection of these
        results = root_parser.parse_collection(
            fix_path('./test_data/custom_old_demo'), OpTestCase)
        pprint(results)

        conf = FlatFileMappingConfiguration(separator='--')
        results = root_parser.parse_collection(
            fix_path('./test_data/custom_old_demo_flat'),
            OpTestCase,
            file_mapping_conf=conf)
        pprint(results)

        class OpTestCaseColl(object):
            def __init__(self,
                         input_a: int,
                         input_b: int,
                         output: int,
                         input_c: Dict[str, List[int]] = None,
                         options: OpConfig = None):
                self.input_a, self.input_b, self.output = input_a, input_b, output
                if options is None:
                    self.op = '+'
                else:
                    self.op = options['operation']
                self.input_c = input_c or None

                def __str__(self):
                    return self.__repr__()

                def __repr__(self):
                    return str(self.input_a) + ' ' + self.op + ' ' + str(
                        self.input_b) + ' =? ' + str(self.output) + ' ' + str(
                            self.input_c)

        results = root_parser.parse_collection(
            fix_path('./test_data/custom_old_demo_flat_coll'),
            OpTestCaseColl,
            file_mapping_conf=conf)
        pprint(results['case3'].input_c)

        results = root_parser.parse_collection(
            fix_path('./test_data/custom_old_demo_coll'), OpTestCaseColl)
        pprint(results['case3'].input_c)
    def _create_root_parser(self):

        # This framework allows users to parse (collections of) objects from files. Each object is a dictionary with
        # user-defined fields. Each field is mapped to a file, or a collection of files in a folder.

        # Step 1: define the applicative data model.
        # ------------------------------------------
        # a) define the *basic* types of the objects you want to read from files.
        # Each of these object types should be readable from *one* (and only one) file.
        #
        # -- Sometimes you will feel the need to use a class of your own, that probably already exists in your program:
        class OneLiner(object):
            """
            A very simple type wrapping some text contents.
            """
            def __init__(self, contents: str):
                self.contents = contents

            def __str__(self):
                """ readable representations """
                return self.contents

            def __repr__(self):
                """ unambiguous representation """
                return self.contents

        # -- In many cases, you will feel the need to wrap an existing object, that a parsing library already returns
        from configparser import ConfigParser

        class Config(ConfigParser):
            """
            A Config object is a ConfigParser object with only one 'main' section
            """
            def __init__(self, config: ConfigParser):
                super(Config, self).__init__()
                self.__wrapped_impl = config

                # here you may wish to perform additional checks on the wrapped object,
                # for example here we check that the config only has one section named 'main'
                if 'main' not in config.sections():
                    raise Exception(
                        'Wrong Config configuration : mandatory main section is missing'
                    )
                if len(config.sections()) != 1:
                    raise Exception(
                        'Wrong Config configuration : there should only be one main section'
                    )

            # Delegate all calls to the implementation:
            def __getattr__(self, name):
                return getattr(self.__wrapped_impl, name)

        # OneLiner = type('OneLiner', (), {}) # NewType('ParamDict', Dict[str, str])

        # b) Define the main schema of the dictionary object that you want to parse.
        #
        # , that spans across several files. This is the object you
        # *really* want as the outcome of the parsing step. For example it might represent a test case.
        class MainFooBarItem(object):
            """
            The main class of objects that we want to parse. The signature of its constructor will be used to infer
            the parsers to use and to check which attributes are optional or not.
            """
            def __init__(self,
                         input_simple: OneLiner,
                         expected_out: Config,
                         expected_perf: Config = None):
                self.input_simple = input_simple
                self.expected_out = expected_out
                self.expected_perf = expected_perf

            def get_as_dict(self):
                return {
                    'input_simple': self.input_simple,
                    'expected_out': self.expected_out,
                    'expected_perf': self.expected_perf
                }

            def __str__(self):
                return self.get_as_dict().__str__()

            def __repr__(self):
                return self.get_as_dict().__repr__()

        # Step 2: for each basic type, define at least one parsing function, that has one mandatory input
        # (file_object: TextIOBase, stream opened from the file by the framework) and that returns an object of the
        # expected type. Note that the stream will be closed automatically by the framework
        #
        def test_custom_read_oneliner_from_txt_file_stream(
                desired_type: Type[Config], file_object: TextIOBase,
                logger: logging.Logger, *args, **kwargs) -> OneLiner:
            """
            Helper method to read a txt file and return its content as a OneLiner object.
            :param file_object: stream opened from the file by the framework, and closed automatically after parsing
            :return: a new object created from the file.
            """

            # read the file - or at least the first row
            first_row = file_object.readline()
            # no need to close, that will be done by caller

            return OneLiner(first_row)

        def test_custom_read_config_from_config_file_stream(
                desired_type: Type[Config], file_object: TextIOBase,
                logger: logging.Logger, *args, **kwargs) -> Config:
            """
            Helper method to read a txt file and return its content as a Config object.
            :param file_object: stream opened from the file by the framework, and closed automatically after parsing
            :return: a new object created from the file.
            """
            import configparser
            config = configparser.ConfigParser()
            config.read_file(file_object)

            return Config(config)

        # Step 3: register all basic types, along with at least one parsing function. The framework allows to register
        # several file extensions for each basic type - each file extension being associated with a unique parsing
        # function. For example an object might be available as a .cfg, a .txt or a .csv three parsing functions would
        # then be registered. Note that you may register the same function twice if it is able to handle several file
        # extensions.
        #
        # parsers = {
        #     OneLiner: {
        #         '.txt': read_oneliner_from_txt_file_stream
        #     },
        #     Config: {
        #         '.cfg': read_config_from_config_file_stream,
        #         '.txt': read_config_from_config_file_stream
        #     }
        # }
        parsers = [
            SingleFileParserFunction(
                parser_function=test_custom_read_oneliner_from_txt_file_stream,
                supported_exts={'.txt'},
                supported_types={OneLiner}),
            SingleFileParserFunction(
                parser_function=test_custom_read_config_from_config_file_stream,
                supported_exts={'.cfg', '.txt'},
                supported_types={Config}),
        ]

        # create root parser
        root_parser = RootParser('parsyfiles defaults+test custom functions')
        root_parser.register_parsers(parsers)

        return root_parser, MainFooBarItem