def test_parse_with_custom_parser(self): """ Parses a subclass of DataFrame with a custom converter. :return: """ from typing import Type from parsyfiles.converting_core import T from logging import Logger from xml.etree.ElementTree import ElementTree, parse, tostring def read_xml(desired_type: Type[T], file_path: str, encoding: str, logger: Logger, **kwargs): """ Opens an XML file and returns the tree parsed from it as an ElementTree. :param desired_type: :param file_path: :param encoding: :param logger: :param kwargs: :return: """ return parse(file_path) my_parser = SingleFileParserFunction(parser_function=read_xml, streaming_mode=False, supported_exts={'.xml'}, supported_types={ElementTree}) parser = RootParser('parsyfiles with timeseries') parser.register_parser(my_parser) xmls = parser.parse_collection(fix_path('./xml_collection'), ElementTree) pprint({name: tostring(x.getroot()) for name, x in xmls.items()})
def test_custom_parser_ok_for_subclasses(): """ Tests that if you register a custom parser for a subclass of A, it gets correctly used to parse A (in non-strict mode, which is the default) :return: """ root_parser = RootParser() class A: def __init__(self, txt): self.txt = txt class B(A): """ a subclass of A """ pass def read_B_from_txt(desired_type: Type[dict], file_object: TextIOBase, logger: Logger, *args, **kwargs) -> str: # read the entire stream into a string str_io = StringIO() shutil.copyfileobj(file_object, str_io) # only return the first character return B(str_io.getvalue()[0]) # before registering a parser for B, only generic parsers are able to parse a A before_capa = root_parser.get_capabilities_for_type(A)['.txt'] assert list(before_capa.keys()) == ['3_generic'] # register a parser for B root_parser.register_parser( SingleFileParserFunction(parser_function=read_B_from_txt, streaming_mode=True, supported_exts={'.txt'}, supported_types={B})) # after registering the new parser appears in the list able to parse A after_capa = root_parser.get_capabilities_for_type(A)['.txt'] assert str(after_capa['2_approx_match'][0]) == '<read_B_from_txt>' a = root_parser.parse_item(get_path('b64pickle-float-1.0=True'), A) # check that the custom parser was used, not the generic 'construct from string' assert len(a.txt) == 1 assert a.txt == 'g'