Esempio n. 1
0
def test_custom_parser_ok_for_subclasses():
    """
    Tests that if you register a custom parser for a subclass of A, it gets correctly used to parse A (in non-strict
    mode, which is the default)
    :return:
    """
    root_parser = RootParser()

    class A:
        def __init__(self, txt):
            self.txt = txt

    class B(A):
        """ a subclass of A """
        pass

    def read_B_from_txt(desired_type: Type[dict], file_object: TextIOBase,
                        logger: Logger, *args, **kwargs) -> str:
        # read the entire stream into a string
        str_io = StringIO()
        shutil.copyfileobj(file_object, str_io)
        # only return the first character
        return B(str_io.getvalue()[0])

    # before registering a parser for B, only generic parsers are able to parse a A
    before_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert list(before_capa.keys()) == ['3_generic']

    # register a parser for B
    root_parser.register_parser(
        SingleFileParserFunction(parser_function=read_B_from_txt,
                                 streaming_mode=True,
                                 supported_exts={'.txt'},
                                 supported_types={B}))

    # after registering the new parser appears in the list able to parse A
    after_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert str(after_capa['2_approx_match'][0]) == '<read_B_from_txt>'

    a = root_parser.parse_item(get_path('b64pickle-float-1.0=True'), A)
    # check that the custom parser was used, not the generic 'construct from string'
    assert len(a.txt) == 1
    assert a.txt == 'g'
    def test_pass_parser_options(self):
        """
        Passes options to the pandas parser
        :return:
        """
        from pandas import DataFrame
        from parsyfiles import RootParser

        # create a root parser
        parser = RootParser()

        # retrieve the parsers of interest
        parsers = parser.get_capabilities_for_type(DataFrame,
                                                   strict_type_matching=False)
        df_csv_parser = parsers['.csv']['1_exact_match'][0]
        p_id_csv = df_csv_parser.get_id_for_options()
        print('Parser id for csv is : ' + p_id_csv +
              ', implementing function is ' + repr(df_csv_parser._parser_func))
        print('option hints : ' + df_csv_parser.options_hints())
        df_xls_parser = parsers['.xls']['1_exact_match'][0]
        p_id_xls = df_xls_parser.get_id_for_options()
        print('Parser id for csv is : ' + p_id_xls +
              ', implementing function is ' + repr(df_xls_parser._parser_func))
        print('option hints : ' + df_xls_parser.options_hints())

        from parsyfiles import create_parser_options, add_parser_options

        # configure the DataFrame parsers to automatically parse dates and use the first column as index
        opts = create_parser_options()
        opts = add_parser_options(opts, 'read_df_or_series_from_csv', {
            'parse_dates': True,
            'index_col': 0
        })
        opts = add_parser_options(opts, 'read_dataframe_from_xls',
                                  {'index_col': 0})

        dfs = parser.parse_collection(
            fix_path('./test_data/demo/ts_collection'),
            DataFrame,
            options=opts)
        print(dfs)