def test_parse_with_custom_parser(self):
        """
        Parses a subclass of DataFrame with a custom converter.
        :return:
        """

        from typing import Type
        from parsyfiles.converting_core import T
        from logging import Logger
        from xml.etree.ElementTree import ElementTree, parse, tostring

        def read_xml(desired_type: Type[T], file_path: str, encoding: str,
                     logger: Logger, **kwargs):
            """
            Opens an XML file and returns the tree parsed from it as an ElementTree.

            :param desired_type:
            :param file_path:
            :param encoding:
            :param logger:
            :param kwargs:
            :return:
            """
            return parse(file_path)

        my_parser = SingleFileParserFunction(parser_function=read_xml,
                                             streaming_mode=False,
                                             supported_exts={'.xml'},
                                             supported_types={ElementTree})

        parser = RootParser('parsyfiles with timeseries')
        parser.register_parser(my_parser)
        xmls = parser.parse_collection(fix_path('./xml_collection'),
                                       ElementTree)
        pprint({name: tostring(x.getroot()) for name, x in xmls.items()})
Example #2
0
def test_custom_parser_ok_for_subclasses():
    """
    Tests that if you register a custom parser for a subclass of A, it gets correctly used to parse A (in non-strict
    mode, which is the default)
    :return:
    """
    root_parser = RootParser()

    class A:
        def __init__(self, txt):
            self.txt = txt

    class B(A):
        """ a subclass of A """
        pass

    def read_B_from_txt(desired_type: Type[dict], file_object: TextIOBase,
                        logger: Logger, *args, **kwargs) -> str:
        # read the entire stream into a string
        str_io = StringIO()
        shutil.copyfileobj(file_object, str_io)
        # only return the first character
        return B(str_io.getvalue()[0])

    # before registering a parser for B, only generic parsers are able to parse a A
    before_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert list(before_capa.keys()) == ['3_generic']

    # register a parser for B
    root_parser.register_parser(
        SingleFileParserFunction(parser_function=read_B_from_txt,
                                 streaming_mode=True,
                                 supported_exts={'.txt'},
                                 supported_types={B}))

    # after registering the new parser appears in the list able to parse A
    after_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert str(after_capa['2_approx_match'][0]) == '<read_B_from_txt>'

    a = root_parser.parse_item(get_path('b64pickle-float-1.0=True'), A)
    # check that the custom parser was used, not the generic 'construct from string'
    assert len(a.txt) == 1
    assert a.txt == 'g'