def test_convert_to_pymorphy2(self, tmpdir):

        # import logging
        # from pymorphy2.opencorpora_dict.compile import logger
        # logger.setLevel(logging.DEBUG)
        # logger.addHandler(logging.StreamHandler())

        try:
            assert_can_create()
        except NotImplementedError as e:
            raise pytest.skip(e)

        # create a dictionary
        out_path = str(tmpdir.join('dicts'))
        options = {
            'min_paradigm_popularity': 0,
            'min_ending_freq': 0,
            'paradigm_prefixes': lang.ru.PARADIGM_PREFIXES,
        }
        convert_to_pymorphy2(self.XML_PATH,
                             out_path,
                             source_name='toy',
                             language_code='ru',
                             overwrite=True,
                             compile_options=options)

        # use it
        morph = pymorphy2.MorphAnalyzer(out_path)
        assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]

        # tag simplification should work
        assert morph.tag("ёж")[0] == morph.tag("ванька-встанька")[0]
    def test_convert_to_pymorphy2(self, tmpdir):

        # import logging
        # from pymorphy2.opencorpora_dict.compile import logger
        # logger.setLevel(logging.DEBUG)
        # logger.addHandler(logging.StreamHandler())

        try:
            assert_can_create()
        except NotImplementedError as e:
            raise pytest.skip(e)

        # create a dictionary
        out_path = str(tmpdir.join('dicts'))
        options = {
            'min_paradigm_popularity': 0,
            'min_ending_freq': 0,
            'paradigm_prefixes': lang.ru.PARADIGM_PREFIXES,
        }
        convert_to_pymorphy2(self.XML_PATH, out_path,
                             source_name='toy', language_code='ru',
                             overwrite=True, compile_options=options)

        # use it
        morph = pymorphy2.MorphAnalyzer(out_path)
        assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]

        # tag simplification should work
        assert morph.tag("ёж")[0] == morph.tag("ванька-встанька")[0]
Exemple #3
0
def convert_to_pymorphy2(opencorpora_dict_path,
                         out_path,
                         source_name,
                         language_code,
                         overwrite=False,
                         compile_options=None):
    """
    Convert a dictionary from OpenCorpora XML format to
    Pymorphy2 compacted format.

    ``out_path`` should be a name of folder where to put dictionaries.
    """
    from .parse import parse_opencorpora_xml
    from .preprocess import simplify_tags, drop_unsupported_parses
    from .storage import save_compiled_dict

    dawg.assert_can_create()
    if not _create_out_path(out_path, overwrite):
        return

    parsed_dict = parse_opencorpora_xml(opencorpora_dict_path)
    simplify_tags(parsed_dict)
    drop_unsupported_parses(parsed_dict)
    compiled_dict = compile_parsed_dict(parsed_dict, compile_options)
    save_compiled_dict(compiled_dict,
                       out_path,
                       source_name=source_name,
                       language_code=language_code)
Exemple #4
0
    def test_convert_to_pymorphy2(self, tmpdir):

        # import logging
        # from pymorphy2.opencorpora_dict.compile import logger
        # logger.setLevel(logging.DEBUG)
        # logger.addHandler(logging.StreamHandler())

        try:
            assert_can_create()
        except NotImplementedError as e:
            raise pytest.skip(e)

        # create a dictionary
        out_path = str(tmpdir.join('dicts'))
        options = {
            'min_paradigm_popularity': 0,
            'min_ending_freq': 0,
        }
        convert_to_pymorphy2(self.XML_PATH,
                             out_path,
                             overwrite=True,
                             prediction_options=options)

        # use it
        morph = pymorphy2.MorphAnalyzer(out_path,
                                        probability_estimator_cls=None)
        assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]
Exemple #5
0
    def test_convert_to_pymorphy2_with_allowed_link_types(self, tmpdir):

        try:
            assert_can_create()
        except NotImplementedError as e:
            raise pytest.skip(e)

        # create a dictionary
        out_path = str(tmpdir.join('dicts'))
        options = {
            'min_paradigm_popularity': 0,
            'min_ending_freq': 0,
            'paradigm_prefixes': lang.ru.PARADIGM_PREFIXES,
        }
        convert_to_pymorphy2(self.XML_PATH, out_path,
                             source_name='toy', language_code='ru',
                             overwrite=True, compile_options=options,
                             links_matching_path=self.ALLOWED_LINK_TYPES_XML_PATH)

        # use it
        morph = pymorphy2.MorphAnalyzer(out_path)
        assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]

        # tag simplification should work
        assert morph.tag("ёж")[0] == morph.tag("ванька-встанька")[0]

        # Init tags should be handled correctly
        assert 'Init' in morph.tag("Ц")[0]
        assert 'Init' not in morph.tag("ц")[0]

        # normalization tests
        assert morph.normal_forms('абсурднее') == ['абсурдный']
        assert morph.normal_forms('а') == ['а']
Exemple #6
0
def convert_to_pymorphy2(opencorpora_dict_path, out_path, overwrite=False,
                         prediction_options=None):
    """
    Convert a dictionary from OpenCorpora XML format to
    Pymorphy2 compacted format.

    ``out_path`` should be a name of folder where to put dictionaries.
    """
    from .parse import parse_opencorpora_xml
    from .storage import save_compiled_dict

    dawg.assert_can_create()
    if not _create_out_path(out_path, overwrite):
        return

    parsed_dict = parse_opencorpora_xml(opencorpora_dict_path)
    compiled_dict = compile_parsed_dict(parsed_dict, prediction_options)

    save_compiled_dict(compiled_dict, out_path)
Exemple #7
0
def convert_to_pymorphy2(opencorpora_dict_path, out_path, overwrite=False,
                         prediction_options=None):
    """
    Convert a dictionary from OpenCorpora XML format to
    Pymorphy2 compacted format.

    ``out_path`` should be a name of folder where to put dictionaries.
    """
    from .parse import parse_opencorpora_xml
    from .preprocess import simplify_tags
    from .storage import save_compiled_dict

    dawg.assert_can_create()
    if not _create_out_path(out_path, overwrite):
        return

    parsed_dict = parse_opencorpora_xml(opencorpora_dict_path)
    simplify_tags(parsed_dict)
    compiled_dict = compile_parsed_dict(parsed_dict, prediction_options)
    save_compiled_dict(compiled_dict, out_path)
    def test_convert_to_pymorphy2(self, tmpdir):

        # import logging
        # from pymorphy2.opencorpora_dict.compile import logger
        # logger.setLevel(logging.DEBUG)
        # logger.addHandler(logging.StreamHandler())

        try:
            assert_can_create()
        except NotImplementedError as e:
            raise pytest.skip(e)

        # create a dictionary
        out_path = str(tmpdir.join('dicts'))
        options = {
            'min_paradigm_popularity': 0,
            'min_ending_freq': 0,
        }
        convert_to_pymorphy2(self.XML_PATH, out_path, overwrite=True,
                             prediction_options=options)

        # use it
        morph = pymorphy2.MorphAnalyzer(out_path)
        assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]