def test_parse_with_custom_parser(self):
        """
        Parses a subclass of DataFrame with a custom converter.
        :return:
        """

        from typing import Type
        from parsyfiles.converting_core import T
        from logging import Logger
        from xml.etree.ElementTree import ElementTree, parse, tostring

        def read_xml(desired_type: Type[T], file_path: str, encoding: str,
                     logger: Logger, **kwargs):
            """
            Opens an XML file and returns the tree parsed from it as an ElementTree.

            :param desired_type:
            :param file_path:
            :param encoding:
            :param logger:
            :param kwargs:
            :return:
            """
            return parse(file_path)

        my_parser = SingleFileParserFunction(parser_function=read_xml,
                                             streaming_mode=False,
                                             supported_exts={'.xml'},
                                             supported_types={ElementTree})

        parser = RootParser('parsyfiles with timeseries')
        parser.register_parser(my_parser)
        xmls = parser.parse_collection(fix_path('./xml_collection'),
                                       ElementTree)
        pprint({name: tostring(x.getroot()) for name, x in xmls.items()})
Example #2
0
def test_union_recursive_1(root_parser: RootParser):
    """ Tests that you can parse infinitely-nested dictionaries from a folder using forward references """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    # First (preferred) way
    InfiniteRecursiveDictOfA = Dict[str, Union[A, 'InfiniteRecursiveDictOfA']]

    items = root_parser.parse_item(get_path('test2'), InfiniteRecursiveDictOfA)

    assert type(items['a']['a']['a']) == A
    assert type(items['a']['a']['b']) == A
    assert type(items['a']['b']) == A
    assert type(items['b']) == A

    # Less preferred way, but check that it works too
    InfiniteRecursiveDictOfA2 = Union[A, Dict[str,
                                              'InfiniteRecursiveDictOfA2']]

    items = root_parser.parse_collection(get_path('test2'),
                                         InfiniteRecursiveDictOfA2)

    assert type(items['a']['a']['a']) == A
    assert type(items['a']['a']['b']) == A
    assert type(items['a']['b']) == A
    assert type(items['b']) == A

    # This is a forward reference that is equivalent to 'A'.
    # It should be handled correctly by parsyfiles so as not to lead to infinite recursiong
    InfiniteRecursiveDictOfA3 = Union[A, 'InfiniteRecursiveDictOfA3']

    item = root_parser.parse_item(get_path('test2', 'b'),
                                  InfiniteRecursiveDictOfA3)
    assert type(item) == A
Example #3
0
def test_union_2(root_parser: RootParser):
    """ Tests that parsing a collection of Union works """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B:
        def __init__(self, bar: float):
            self.bar = bar

    items = root_parser.parse_collection(get_path('test1'), Union[A, B])
    assert len(items) == 2
    assert type(items['a']) == A
    assert type(items['b']) == B
Example #4
0
def test_parse_subtypes(root_parser: RootParser):
    """ Tests that subclasses can be parsed """
    class A:
        pass

    class B(A):
        def __init__(self, foo: str):
            self.foo = foo

    class C(B):
        def __init__(self, bar: str):
            super(C, self).__init__(foo=bar)

    items = root_parser.parse_collection(get_path('test2'), A)
    assert type(items['b']) == B
    assert type(items['c']) == C
    def test_pass_parser_options(self):
        """
        Passes options to the pandas parser
        :return:
        """
        from pandas import DataFrame
        from parsyfiles import RootParser

        # create a root parser
        parser = RootParser()

        # retrieve the parsers of interest
        parsers = parser.get_capabilities_for_type(DataFrame,
                                                   strict_type_matching=False)
        df_csv_parser = parsers['.csv']['1_exact_match'][0]
        p_id_csv = df_csv_parser.get_id_for_options()
        print('Parser id for csv is : ' + p_id_csv +
              ', implementing function is ' + repr(df_csv_parser._parser_func))
        print('option hints : ' + df_csv_parser.options_hints())
        df_xls_parser = parsers['.xls']['1_exact_match'][0]
        p_id_xls = df_xls_parser.get_id_for_options()
        print('Parser id for csv is : ' + p_id_xls +
              ', implementing function is ' + repr(df_xls_parser._parser_func))
        print('option hints : ' + df_xls_parser.options_hints())

        from parsyfiles import create_parser_options, add_parser_options

        # configure the DataFrame parsers to automatically parse dates and use the first column as index
        opts = create_parser_options()
        opts = add_parser_options(opts, 'read_df_or_series_from_csv', {
            'parse_dates': True,
            'index_col': 0
        })
        opts = add_parser_options(opts, 'read_dataframe_from_xls',
                                  {'index_col': 0})

        dfs = parser.parse_collection(
            fix_path('./test_data/demo/ts_collection'),
            DataFrame,
            options=opts)
        print(dfs)
def test_typevars_1(root_parser: RootParser):
    """ Tests that a constructor containing TypeVars is correctly handled """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B(A):
        def __init__(self, bar: float):
            super(B, self).__init__(foo=bar)

    TV = TypeVar('TV', bound=A)

    class Test(Generic[TV]):
        def __init__(self, obj: TV):
            self.obj = obj

    items = root_parser.parse_collection(get_path('test1'), Test)

    assert len(items) == 2
    assert type(items['a'].obj) == A
    assert type(items['b'].obj) == B
def test_typevars_3(root_parser: RootParser):
    """ Tests that a TypeVar with 'constraints' may be used as a desired Type -> it will be a Union """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B:
        def __init__(self, bar: float):
            self.bar = bar

    TV = TypeVar('TV', A, B)

    item = root_parser.parse_item(get_path('test2', 'a'), TV)
    assert type(item) == A

    item = root_parser.parse_item(get_path('test2', 'b'), TV)
    assert type(item) == B

    items = root_parser.parse_collection(get_path('test2'), TV)

    assert len(items) == 2
    assert type(items['a']) == A
    assert type(items['b']) == B
def test_typevars_2(root_parser: RootParser):
    """ Tests that a TypeVar with 'bound' may be used as a desired Type directly -> it will be replaced with the bound 
    type """
    class A:
        def __init__(self, foo: str):
            self.foo = foo

    class B(A):
        def __init__(self, bar: float):
            super(B, self).__init__(foo=str(bar))

    TV = TypeVar('TV', bound=A)

    item = root_parser.parse_item(get_path('test2', 'a'), TV)
    assert type(item) == A

    item = root_parser.parse_item(get_path('test2', 'b'), TV)
    assert type(item) == B

    items = root_parser.parse_collection(get_path('test2'), TV)

    assert len(items) == 2
    assert type(items['a']) == A
    assert type(items['b']) == B
    def test_parse_subclass_of_known_with_custom_converter(self):
        """
        Parses a subclass of DataFrame with a custom converter.
        :return:
        """

        # define your class
        from pandas import DataFrame, DatetimeIndex

        class TimeSeries(DataFrame):
            """
            A basic timeseries class that extends DataFrame
            """
            def __init__(self, df: DataFrame):
                """
                Constructor from a DataFrame. The DataFrame index should be an instance of DatetimeIndex
                :param df:
                """
                if isinstance(df, DataFrame) and isinstance(
                        df.index, DatetimeIndex):
                    if df.index.tz is None:
                        df.index = df.index.tz_localize(
                            tz='UTC'
                        )  # use the UTC hypothesis in absence of other hints
                    self._df = df
                else:
                    raise ValueError(
                        'Error creating TimeSeries from DataFrame: provided DataFrame does not have a '
                        'valid DatetimeIndex')

            def __getattr__(self, item):
                # Redirects anything that is not implemented here to the base dataframe.
                # this is called only if the attribute was not found the usual way

                # easy version of the dynamic proxy just to save time :)
                # see http://code.activestate.com/recipes/496741-object-proxying/ for "the answer"
                df = object.__getattribute__(self, '_df')
                if hasattr(df, item):
                    return getattr(df, item)
                else:
                    raise AttributeError('\'' + self.__class__.__name__ +
                                         '\' object has no attribute \'' +
                                         item + '\'')

            def update(self,
                       other,
                       join='left',
                       overwrite=True,
                       filter_func=None,
                       raise_conflict=False):
                """ For some reason this method was abstract in DataFrame so we have to implement it """
                return self._df.update(other,
                                       join=join,
                                       overwrite=overwrite,
                                       filter_func=filter_func,
                                       raise_conflict=raise_conflict)

        # -- create your converter
        from typing import Type
        from logging import Logger
        from parsyfiles.converting_core import ConverterFunction

        def df_to_ts(desired_type: Type[TimeSeries], df: DataFrame,
                     logger: Logger) -> TimeSeries:
            """ Converter from DataFrame to TimeSeries """
            return TimeSeries(df)

        my_converter = ConverterFunction(from_type=DataFrame,
                                         to_type=TimeSeries,
                                         conversion_method=df_to_ts)

        # -- create a parser and register your converter
        from parsyfiles import RootParser, create_parser_options, add_parser_options

        parser = RootParser('parsyfiles with timeseries')
        parser.register_converter(my_converter)

        # -- you might wish to configure the DataFrame parser, though:
        opts = create_parser_options()
        opts = add_parser_options(opts, 'read_df_or_series_from_csv', {
            'parse_dates': True,
            'index_col': 0
        })
        opts = add_parser_options(opts, 'read_dataframe_from_xls',
                                  {'index_col': 0})

        dfs = parser.parse_collection(fix_path('./ts_collection'),
                                      TimeSeries,
                                      options=opts)