def test_parse_with_custom_parser(self):
        """
        Parses a subclass of DataFrame with a custom converter.
        :return:
        """

        from typing import Type
        from parsyfiles.converting_core import T
        from logging import Logger
        from xml.etree.ElementTree import ElementTree, parse, tostring

        def read_xml(desired_type: Type[T], file_path: str, encoding: str,
                     logger: Logger, **kwargs):
            """
            Opens an XML file and returns the tree parsed from it as an ElementTree.

            :param desired_type:
            :param file_path:
            :param encoding:
            :param logger:
            :param kwargs:
            :return:
            """
            return parse(file_path)

        my_parser = SingleFileParserFunction(parser_function=read_xml,
                                             streaming_mode=False,
                                             supported_exts={'.xml'},
                                             supported_types={ElementTree})

        parser = RootParser('parsyfiles with timeseries')
        parser.register_parser(my_parser)
        xmls = parser.parse_collection(fix_path('./xml_collection'),
                                       ElementTree)
        pprint({name: tostring(x.getroot()) for name, x in xmls.items()})
Ejemplo n.º 2
0
def test_union_recursive_1(root_parser: RootParser):
    """ Tests that you can parse infinitely-nested dictionaries from a folder using forward references """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    # First (preferred) way
    InfiniteRecursiveDictOfA = Dict[str, Union[A, 'InfiniteRecursiveDictOfA']]

    items = root_parser.parse_item(get_path('test2'), InfiniteRecursiveDictOfA)

    assert type(items['a']['a']['a']) == A
    assert type(items['a']['a']['b']) == A
    assert type(items['a']['b']) == A
    assert type(items['b']) == A

    # Less preferred way, but check that it works too
    InfiniteRecursiveDictOfA2 = Union[A, Dict[str,
                                              'InfiniteRecursiveDictOfA2']]

    items = root_parser.parse_collection(get_path('test2'),
                                         InfiniteRecursiveDictOfA2)

    assert type(items['a']['a']['a']) == A
    assert type(items['a']['a']['b']) == A
    assert type(items['a']['b']) == A
    assert type(items['b']) == A

    # This is a forward reference that is equivalent to 'A'.
    # It should be handled correctly by parsyfiles so as not to lead to infinite recursiong
    InfiniteRecursiveDictOfA3 = Union[A, 'InfiniteRecursiveDictOfA3']

    item = root_parser.parse_item(get_path('test2', 'b'),
                                  InfiniteRecursiveDictOfA3)
    assert type(item) == A
Ejemplo n.º 3
0
def test_print_and_get_capabilities_by_type(root_parser: RootParser):
    """ Tests that the declared capabilities by type are correct """

    c = root_parser.get_capabilities_by_type(strict_type_matching=False)
    print('\n' + str(len(c)) + ' Root parser capabilities by type:')
    assert len(c) == 15

    cdict = to_str_coll(c)

    # dump(cdict, 'reference_capabilities_by_type.json')
    assert cdict == load('reference_capabilities_by_type.json')

    root_parser.print_capabilities_by_type(strict_type_matching=False)
Ejemplo n.º 4
0
def test_root_parser_any():
    """
    Tests that we can ask the rootparser for its capabilities to parse a given type
    :return:
    """
    root_parser = RootParser()
    # print
    root_parser.print_capabilities_for_type(typ=Any)

    # details
    res = root_parser.find_all_matching_parsers(strict=False,
                                                desired_type=AnyObject,
                                                required_ext='.cfg')
    match_generic, match_approx, match_exact = res[0]
    assert len(match_generic) == 0
    assert len(match_approx) == 0
Ejemplo n.º 5
0
def test_get_all_supported_types_pretty_str(root_parser: RootParser):
    """ Tests that the declared supported types are there and that their number is correct """

    t = root_parser.get_all_supported_types_pretty_str()
    print('\n' + str(len(t)) + ' Root parser supported types:')
    pprint(t)
    assert len(t) == 15
    # dump(list(t), 'reference_supported_types.json')
    assert t == set(load('reference_supported_types.json'))
Ejemplo n.º 6
0
def test_get_all_supported_exts(root_parser: RootParser):
    """ Tests that the declared supported extensions are there and that their number is correct """

    e = root_parser.get_all_supported_exts()
    print('\n' + str(len(e)) + ' Root parser supported extensions:')
    pprint(e)
    assert len(e) == 13

    # dump(list(e), 'reference_supported_exts.json')
    assert e == set(load('reference_supported_exts.json'))
Ejemplo n.º 7
0
def test_get_all_parsers(root_parser: RootParser):
    """ Tests that the default parsers are there and that their number is correct """

    parsers = root_parser.get_all_parsers(strict_type_matching=False)
    print('\n' + str(len(parsers)) + ' Root parser parsers:')
    pprint(parsers)
    assert len(parsers) == 127

    parsers_str = to_str_coll(parsers)
    # dump(parsers_str, 'reference_parsers.json')
    assert parsers_str == load('reference_parsers.json')
    def test_pass_parser_options(self):
        """
        Passes options to the pandas parser
        :return:
        """
        from pandas import DataFrame
        from parsyfiles import RootParser

        # create a root parser
        parser = RootParser()

        # retrieve the parsers of interest
        parsers = parser.get_capabilities_for_type(DataFrame,
                                                   strict_type_matching=False)
        df_csv_parser = parsers['.csv']['1_exact_match'][0]
        p_id_csv = df_csv_parser.get_id_for_options()
        print('Parser id for csv is : ' + p_id_csv +
              ', implementing function is ' + repr(df_csv_parser._parser_func))
        print('option hints : ' + df_csv_parser.options_hints())
        df_xls_parser = parsers['.xls']['1_exact_match'][0]
        p_id_xls = df_xls_parser.get_id_for_options()
        print('Parser id for csv is : ' + p_id_xls +
              ', implementing function is ' + repr(df_xls_parser._parser_func))
        print('option hints : ' + df_xls_parser.options_hints())

        from parsyfiles import create_parser_options, add_parser_options

        # configure the DataFrame parsers to automatically parse dates and use the first column as index
        opts = create_parser_options()
        opts = add_parser_options(opts, 'read_df_or_series_from_csv', {
            'parse_dates': True,
            'index_col': 0
        })
        opts = add_parser_options(opts, 'read_dataframe_from_xls',
                                  {'index_col': 0})

        dfs = parser.parse_collection(
            fix_path('./test_data/demo/ts_collection'),
            DataFrame,
            options=opts)
        print(dfs)
Ejemplo n.º 9
0
def test_union_1(root_parser: RootParser):
    """ Tests that parsing a Union works """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B:
        def __init__(self, bar: float):
            self.bar = bar

    item = root_parser.parse_item(get_path('test1', 'a'), Union[A, B])
    assert type(item) == A
    def test_simple_collection(self):
        """
        Parsing a collection of dataframes as a dictionary
        :return:
        """
        from pandas import DataFrame
        dfs = parse_collection(fix_path('./simple_collection'), DataFrame)
        pprint(dfs)

        df = parse_item(fix_path('./simple_collection/c'), DataFrame)
        pprint(df)

        RootParser().print_capabilities_for_type(typ=DataFrame)
Ejemplo n.º 11
0
def test_typevars_3(root_parser: RootParser):
    """ Tests that a TypeVar with 'constraints' may be used as a desired Type -> it will be a Union """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B:
        def __init__(self, bar: float):
            self.bar = bar

    TV = TypeVar('TV', A, B)

    item = root_parser.parse_item(get_path('test2', 'a'), TV)
    assert type(item) == A

    item = root_parser.parse_item(get_path('test2', 'b'), TV)
    assert type(item) == B

    items = root_parser.parse_collection(get_path('test2'), TV)

    assert len(items) == 2
    assert type(items['a']) == A
    assert type(items['b']) == B
Ejemplo n.º 12
0
def test_union_2(root_parser: RootParser):
    """ Tests that parsing a collection of Union works """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B:
        def __init__(self, bar: float):
            self.bar = bar

    items = root_parser.parse_collection(get_path('test1'), Union[A, B])
    assert len(items) == 2
    assert type(items['a']) == A
    assert type(items['b']) == B
Ejemplo n.º 13
0
def test_typevars_2(root_parser: RootParser):
    """ Tests that a TypeVar with 'bound' may be used as a desired Type directly -> it will be replaced with the bound 
    type """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B(A):
        def __init__(self, bar: float):
            super(B, self).__init__(foo=str(bar))

    TV = TypeVar('TV', bound=A)

    item = root_parser.parse_item(get_path('test2', 'a'), TV)
    assert type(item) == A

    item = root_parser.parse_item(get_path('test2', 'b'), TV)
    assert type(item) == B

    items = root_parser.parse_collection(get_path('test2'), TV)

    assert len(items) == 2
    assert type(items['a']) == A
    assert type(items['b']) == B
    def test_simple_objects(self):
        """
        Parsing a collection of simple objects_data
        :return:
        """

        # First define the function that we want to test
        # (not useful, but just to show a complete story in the readme...)
        def exec_op(x: float, y: float, op: str) -> float:
            if op is '+':
                return x + y
            elif op is '-':
                return x - y
            else:
                raise ValueError('Unsupported operation : \'' + op + '\'')

        # Then define the simple class representing your test case
        class ExecOpTest(object):
            def __init__(self, x: float, y: float, op: str,
                         expected_result: float):
                self.x = x
                self.y = y
                self.op = op
                self.expected_result = expected_result

            def __str__(self):
                return self.__repr__()

            def __repr__(self):
                return str(self.x) + ' ' + self.op + ' ' + str(
                    self.y) + ' =? ' + str(self.expected_result)

        # Create the parser and parse a single file
        # e = parse_item('./test_data/objects_data/test_diff_1', ExecOpTest)
        # pprint(e)

        # parse all of them as dicts
        sf_tests_dct = parse_collection(fix_path('./simple_objects'), Dict)

        # assert that they are sorted
        assert list(sf_tests_dct.keys()) == list(sorted(sf_tests_dct.keys()))

        # parse all of them as objects_data
        sf_tests = parse_collection(fix_path('./simple_objects'), ExecOpTest)
        pprint(sf_tests)

        #
        RootParser().print_capabilities_for_type(typ=ExecOpTest)
Ejemplo n.º 15
0
def test_parse_subtypes(root_parser: RootParser):
    """ Tests that subclasses can be parsed """
    class A:
        pass

    class B(A):
        def __init__(self, foo: str):
            self.foo = foo

    class C(B):
        def __init__(self, bar: str):
            super(C, self).__init__(foo=bar)

    items = root_parser.parse_collection(get_path('test2'), A)
    assert type(items['b']) == B
    assert type(items['c']) == C
Ejemplo n.º 16
0
def test_custom_parser_ok_for_subclasses():
    """
    Tests that if you register a custom parser for a subclass of A, it gets correctly used to parse A (in non-strict
    mode, which is the default)
    :return:
    """
    root_parser = RootParser()

    class A:
        def __init__(self, txt):
            self.txt = txt

    class B(A):
        """ a subclass of A """
        pass

    def read_B_from_txt(desired_type: Type[dict], file_object: TextIOBase,
                        logger: Logger, *args, **kwargs) -> str:
        # read the entire stream into a string
        str_io = StringIO()
        shutil.copyfileobj(file_object, str_io)
        # only return the first character
        return B(str_io.getvalue()[0])

    # before registering a parser for B, only generic parsers are able to parse a A
    before_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert list(before_capa.keys()) == ['3_generic']

    # register a parser for B
    root_parser.register_parser(
        SingleFileParserFunction(parser_function=read_B_from_txt,
                                 streaming_mode=True,
                                 supported_exts={'.txt'},
                                 supported_types={B}))

    # after registering the new parser appears in the list able to parse A
    after_capa = root_parser.get_capabilities_for_type(A)['.txt']
    assert str(after_capa['2_approx_match'][0]) == '<read_B_from_txt>'

    a = root_parser.parse_item(get_path('b64pickle-float-1.0=True'), A)
    # check that the custom parser was used, not the generic 'construct from string'
    assert len(a.txt) == 1
    assert a.txt == 'g'
Ejemplo n.º 17
0
def test_typevars_1(root_parser: RootParser):
    """ Tests that a constructor containing TypeVars is correctly handled """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B(A):
        def __init__(self, bar: float):
            super(B, self).__init__(foo=bar)

    TV = TypeVar('TV', bound=A)

    class Test(Generic[TV]):
        def __init__(self, obj: TV):
            self.obj = obj

    items = root_parser.parse_collection(get_path('test1'), Test)

    assert len(items) == 2
    assert type(items['a'].obj) == A
    assert type(items['b'].obj) == B
Ejemplo n.º 18
0
def test_get_all_conversion_chains(root_parser: RootParser):
    """ Tests that the default conversion chains are there and that their number is correct """

    chains = root_parser.get_all_conversion_chains()
    print('\n' + str(len(chains[0])) + '(generic) + ' + str(len(chains[2])) +
          '(specific) Root parser converters:')
    pprint(chains)
    assert len(chains[0]) == 22
    assert len(chains[1]) == 0
    assert len(chains[2]) == 200

    generic_chains_str = to_str_coll(chains[0])
    specific_chains_str = to_str_coll(chains[2])

    # dump(generic_chains_str, 'reference_generic_conversion_chains.json')
    assert generic_chains_str == load(
        'reference_generic_conversion_chains.json')

    # dump(specific_chains_str, 'reference_specific_conversion_chains.json')
    assert specific_chains_str == load(
        'reference_specific_conversion_chains.json')
    def test_multifile_objects(self):
        """
        Parsing a list of multifile objects_data
        :return:
        """
        from pandas import Series, DataFrame

        class AlgoConf(object):
            def __init__(self, foo_param: str, bar_param: int):
                self.foo_param = foo_param
                self.bar_param = bar_param

        class AlgoResults(object):
            def __init__(self, score: float, perf: float):
                self.score = score
                self.perf = perf

        def exec_op_series(x: Series, y: AlgoConf) -> AlgoResults:
            pass

        class ExecOpSeriesTest(object):
            def __init__(self, x: Series, y: AlgoConf,
                         expected_results: AlgoResults):
                self.x = x
                self.y = y
                self.expected_results = expected_results

        # parse all of them
        mf_tests = parse_collection(fix_path('./complex_objects'),
                                    ExecOpSeriesTest)
        pprint(mf_tests)

        RootParser().print_capabilities_for_type(typ=ExecOpSeriesTest)

        from parsyfiles import FlatFileMappingConfiguration
        dfs = parse_collection(
            fix_path('./complex_objects_flat'),
            DataFrame,
            file_mapping_conf=FlatFileMappingConfiguration())
        pprint(dfs)
Ejemplo n.º 20
0
class AllTests(TestCase):
    def setUp(self):
        """
        Creates the root parser to be used in most tests
        :return:
        """
        self.root_parser = RootParser()

    def test_a_root_parser_capabilities(self):
        """
        Tests that we can print the capabilities of the root parser: registered parsers and converters, supported
        extensions and types, etc.
        :return:
        """
        p = self.root_parser.get_all_parsers(strict_type_matching=False)
        print('\n' + str(len(p)) + ' Root parser parsers:')
        pprint(p)

        print('Testing option hints for parsing chain')
        print(p[0].options_hints())

        c = self.root_parser.get_all_conversion_chains()
        print('\n' + str(len(c[0]) + len(c[2])) + ' Root parser converters:')
        pprint(c)
        e = self.root_parser.get_all_supported_exts()
        print('\n' + str(len(e)) + ' Root parser supported extensions:')
        pprint(e)
        t = self.root_parser.get_all_supported_types_pretty_str()
        print('\n' + str(len(t)) + ' Root parser supported types:')
        pprint(t)
        print('\nRoot parser parsers by extensions:')
        self.root_parser.print_capabilities_by_ext(strict_type_matching=False)
        print('\nRoot parser parsers by types:')
        self.root_parser.print_capabilities_by_type(strict_type_matching=False)
        return

    def test_b_root_parser_any(self):
        """
        Tests that we can ask the rootparser for its capabilities to parse a given type
        :return:
        """
        # print
        self.root_parser.print_capabilities_for_type(typ=Any)

        # details
        res = self.root_parser.find_all_matching_parsers(
            strict=False, desired_type=AnyObject, required_ext='.cfg')
        match_generic, match_approx, match_exact = res[0]
        self.assertEquals(len(match_generic), 0)
        self.assertEquals(len(match_approx), 0)

    def test_objects_support(self):
        """
        Tests all the supported ways to parse a simple object
        :return:
        """

        # Then define the simple class representing your test case
        class ExecOpTest(object):
            def __init__(self, x: float, y: float, op: str,
                         expected_result: float):
                self.x = x
                self.y = y
                self.op = op
                self.expected_result = expected_result

            def __str__(self):
                return self.__repr__()

            def __repr__(self):
                return str(self.x) + ' ' + self.op + ' ' + str(
                    self.y) + ' =? ' + str(self.expected_result)

        # create the parser and parse a single file
        e = parse_item(fix_path('./test_data/objects/test_diff_1'), ExecOpTest)
        pprint(e)

        # parse all of them
        e = parse_collection(fix_path('./test_data/objects'), ExecOpTest)
        pprint(e)

    def test_collections(self):
        """
        Tests all the supported ways to parse collections
        :return:
        """
        l = parse_item(
            fix_path('./test_data/collections'),
            Tuple[Dict[str, int], List[int], Set[int], Tuple[str, int, str]])
        print(l)
    def test_parse_subclass_of_known_with_custom_converter(self):
        """
        Parses a subclass of DataFrame with a custom converter.
        :return:
        """

        # define your class
        from pandas import DataFrame, DatetimeIndex

        class TimeSeries(DataFrame):
            """
            A basic timeseries class that extends DataFrame
            """
            def __init__(self, df: DataFrame):
                """
                Constructor from a DataFrame. The DataFrame index should be an instance of DatetimeIndex
                :param df:
                """
                if isinstance(df, DataFrame) and isinstance(
                        df.index, DatetimeIndex):
                    if df.index.tz is None:
                        df.index = df.index.tz_localize(
                            tz='UTC'
                        )  # use the UTC hypothesis in absence of other hints
                    self._df = df
                else:
                    raise ValueError(
                        'Error creating TimeSeries from DataFrame: provided DataFrame does not have a '
                        'valid DatetimeIndex')

            def __getattr__(self, item):
                # Redirects anything that is not implemented here to the base dataframe.
                # this is called only if the attribute was not found the usual way

                # easy version of the dynamic proxy just to save time :)
                # see http://code.activestate.com/recipes/496741-object-proxying/ for "the answer"
                df = object.__getattribute__(self, '_df')
                if hasattr(df, item):
                    return getattr(df, item)
                else:
                    raise AttributeError('\'' + self.__class__.__name__ +
                                         '\' object has no attribute \'' +
                                         item + '\'')

            def update(self,
                       other,
                       join='left',
                       overwrite=True,
                       filter_func=None,
                       raise_conflict=False):
                """ For some reason this method was abstract in DataFrame so we have to implement it """
                return self._df.update(other,
                                       join=join,
                                       overwrite=overwrite,
                                       filter_func=filter_func,
                                       raise_conflict=raise_conflict)

        # -- create your converter
        from typing import Type
        from logging import Logger
        from parsyfiles.converting_core import ConverterFunction

        def df_to_ts(desired_type: Type[TimeSeries], df: DataFrame,
                     logger: Logger) -> TimeSeries:
            """ Converter from DataFrame to TimeSeries """
            return TimeSeries(df)

        my_converter = ConverterFunction(from_type=DataFrame,
                                         to_type=TimeSeries,
                                         conversion_method=df_to_ts)

        # -- create a parser and register your converter
        from parsyfiles import RootParser, create_parser_options, add_parser_options

        parser = RootParser('parsyfiles with timeseries')
        parser.register_converter(my_converter)

        # -- you might wish to configure the DataFrame parser, though:
        opts = create_parser_options()
        opts = add_parser_options(opts, 'read_df_or_series_from_csv', {
            'parse_dates': True,
            'index_col': 0
        })
        opts = add_parser_options(opts, 'read_dataframe_from_xls',
                                  {'index_col': 0})

        dfs = parser.parse_collection(fix_path('./ts_collection'),
                                      TimeSeries,
                                      options=opts)
def parse_with_new_instance():
    rp = RootParser()
    result = rp.parse_item(
        os.path.join(THIS_DIR, 'test_data/b64pickle-float-1.0=True'), bool)
    assert result == True
Ejemplo n.º 23
0
def root_parser():
    l = getLogger('parsyfiles')
    l.setLevel(DEBUG)
    return RootParser()
Ejemplo n.º 24
0
 def setUp(self):
     """
     Creates the root parser to be used in most tests
     :return:
     """
     self.root_parser = RootParser()
Ejemplo n.º 25
0
def test_option_hints(root_parser: RootParser):
    """ Tests the option_hints method on the first parser available """

    print('Testing option hints for parsing chain')
    p = root_parser.get_all_parsers(strict_type_matching=False)
    print(p[0].options_hints())