def test_convert_to_pymorphy2(self, tmpdir): # import logging # from pymorphy2.opencorpora_dict.compile import logger # logger.setLevel(logging.DEBUG) # logger.addHandler(logging.StreamHandler()) try: assert_can_create() except NotImplementedError as e: raise pytest.skip(e) # create a dictionary out_path = str(tmpdir.join('dicts')) options = { 'min_paradigm_popularity': 0, 'min_ending_freq': 0, 'paradigm_prefixes': lang.ru.PARADIGM_PREFIXES, } convert_to_pymorphy2(self.XML_PATH, out_path, source_name='toy', language_code='ru', overwrite=True, compile_options=options) # use it morph = pymorphy2.MorphAnalyzer(out_path) assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')] # tag simplification should work assert morph.tag("ёж")[0] == morph.tag("ванька-встанька")[0]
def convert_to_pymorphy2(opencorpora_dict_path, out_path, source_name, language_code, overwrite=False, compile_options=None): """ Convert a dictionary from OpenCorpora XML format to Pymorphy2 compacted format. ``out_path`` should be a name of folder where to put dictionaries. """ from .parse import parse_opencorpora_xml from .preprocess import simplify_tags, drop_unsupported_parses from .storage import save_compiled_dict dawg.assert_can_create() if not _create_out_path(out_path, overwrite): return parsed_dict = parse_opencorpora_xml(opencorpora_dict_path) simplify_tags(parsed_dict) drop_unsupported_parses(parsed_dict) compiled_dict = compile_parsed_dict(parsed_dict, compile_options) save_compiled_dict(compiled_dict, out_path, source_name=source_name, language_code=language_code)
def test_convert_to_pymorphy2(self, tmpdir): # import logging # from pymorphy2.opencorpora_dict.compile import logger # logger.setLevel(logging.DEBUG) # logger.addHandler(logging.StreamHandler()) try: assert_can_create() except NotImplementedError as e: raise pytest.skip(e) # create a dictionary out_path = str(tmpdir.join('dicts')) options = { 'min_paradigm_popularity': 0, 'min_ending_freq': 0, } convert_to_pymorphy2(self.XML_PATH, out_path, overwrite=True, prediction_options=options) # use it morph = pymorphy2.MorphAnalyzer(out_path, probability_estimator_cls=None) assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]
def test_convert_to_pymorphy2_with_allowed_link_types(self, tmpdir): try: assert_can_create() except NotImplementedError as e: raise pytest.skip(e) # create a dictionary out_path = str(tmpdir.join('dicts')) options = { 'min_paradigm_popularity': 0, 'min_ending_freq': 0, 'paradigm_prefixes': lang.ru.PARADIGM_PREFIXES, } convert_to_pymorphy2(self.XML_PATH, out_path, source_name='toy', language_code='ru', overwrite=True, compile_options=options, links_matching_path=self.ALLOWED_LINK_TYPES_XML_PATH) # use it morph = pymorphy2.MorphAnalyzer(out_path) assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')] # tag simplification should work assert morph.tag("ёж")[0] == morph.tag("ванька-встанька")[0] # Init tags should be handled correctly assert 'Init' in morph.tag("Ц")[0] assert 'Init' not in morph.tag("ц")[0] # normalization tests assert morph.normal_forms('абсурднее') == ['абсурдный'] assert morph.normal_forms('а') == ['а']
def convert_to_pymorphy2(opencorpora_dict_path, out_path, overwrite=False, prediction_options=None): """ Convert a dictionary from OpenCorpora XML format to Pymorphy2 compacted format. ``out_path`` should be a name of folder where to put dictionaries. """ from .parse import parse_opencorpora_xml from .storage import save_compiled_dict dawg.assert_can_create() if not _create_out_path(out_path, overwrite): return parsed_dict = parse_opencorpora_xml(opencorpora_dict_path) compiled_dict = compile_parsed_dict(parsed_dict, prediction_options) save_compiled_dict(compiled_dict, out_path)
def convert_to_pymorphy2(opencorpora_dict_path, out_path, overwrite=False, prediction_options=None): """ Convert a dictionary from OpenCorpora XML format to Pymorphy2 compacted format. ``out_path`` should be a name of folder where to put dictionaries. """ from .parse import parse_opencorpora_xml from .preprocess import simplify_tags from .storage import save_compiled_dict dawg.assert_can_create() if not _create_out_path(out_path, overwrite): return parsed_dict = parse_opencorpora_xml(opencorpora_dict_path) simplify_tags(parsed_dict) compiled_dict = compile_parsed_dict(parsed_dict, prediction_options) save_compiled_dict(compiled_dict, out_path)
def test_convert_to_pymorphy2(self, tmpdir): # import logging # from pymorphy2.opencorpora_dict.compile import logger # logger.setLevel(logging.DEBUG) # logger.addHandler(logging.StreamHandler()) try: assert_can_create() except NotImplementedError as e: raise pytest.skip(e) # create a dictionary out_path = str(tmpdir.join('dicts')) options = { 'min_paradigm_popularity': 0, 'min_ending_freq': 0, } convert_to_pymorphy2(self.XML_PATH, out_path, overwrite=True, prediction_options=options) # use it morph = pymorphy2.MorphAnalyzer(out_path) assert morph.tag('ёжиться') == [morph.TagClass('INFN,impf,intr')]