def test_should_generate_converting_circumflexes(self): lexeme = Lexeme(u"rüzgâr", u"rüzgâr", SyntacticCategory.NOUN, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that( generated_roots, has_item( Root(u"rüzgar", lexeme, None, {LLCont, LVB, LLC, LLNotVless, LVU}))) assert_that( generated_roots, has_item( Root(u"rüzgâr", lexeme, None, {LLCont, LVB, LLC, LLNotVless, LVU}))) lexeme = Lexeme(u"alenî", u"alenî", SyntacticCategory.ADJECTIVE, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that( generated_roots, has_item( Root(u"aleni", lexeme, None, {LLNotCont, LVF, LLV, LLNotVless, LVU}))) assert_that( generated_roots, has_item( Root(u"alenî", lexeme, None, {LLNotCont, LVF, LLV, LLNotVless, LVU}))) lexeme = Lexeme(u"cülûs", u"cülûs", SyntacticCategory.NOUN, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that( generated_roots, has_item( Root(u"cülus", lexeme, None, {LLCont, LVB, LLC, LLVless, LVR}))) assert_that( generated_roots, has_item( Root(u"cülûs", lexeme, None, {LLCont, LVB, LLC, LLVless, LVR}))) lexeme = Lexeme(u"Âdem", u"Âdem", SyntacticCategory.NOUN, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that( generated_roots, has_item( Root(u"Adem", lexeme, None, {LLCont, LVF, LLC, LLNotVless, LVU}))) assert_that( generated_roots, has_item( Root(u"Âdem", lexeme, None, {LLCont, LVF, LLC, LLNotVless, LVU})))
def initialize(): all_roots = [] lexemes = LexiconLoader.load_from_file(os.path.join(os.path.dirname(__file__), '../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(CircumflexConvertingRootGenerator.generate(di)) root_map_generator = RootMapGenerator() root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) text_numeral_root_finder = TextNumeralRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() global contextless_parser contextless_parser = ContextlessMorphologicalParser(suffix_graph, predefined_paths, [word_root_finder, text_numeral_root_finder, digit_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder])
def initialize(): all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), '../resources/master_dictionary.txt')) for di in lexemes: all_roots.extend(CircumflexConvertingRootGenerator.generate(di)) root_map_generator = RootMapGenerator() root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph( NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) text_numeral_root_finder = TextNumeralRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder( ) proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder( ) global contextless_parser contextless_parser = ContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, text_numeral_root_finder, digit_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder ])
def test_should_generate_converting_circumflexes(self): lexeme = Lexeme(u"rüzgâr", u"rüzgâr", SyntacticCategory.NOUN, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that(generated_roots, has_item(Root(u"rüzgar", lexeme, None, {LLCont, LVB, LLC, LLNotVless, LVU}))) assert_that(generated_roots, has_item(Root(u"rüzgâr", lexeme, None, {LLCont,LVB, LLC, LLNotVless, LVU}))) lexeme = Lexeme(u"alenî", u"alenî", SyntacticCategory.ADJECTIVE, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that(generated_roots, has_item(Root(u"aleni", lexeme, None, {LLNotCont,LVF, LLV, LLNotVless, LVU}))) assert_that(generated_roots, has_item(Root(u"alenî", lexeme, None, {LLNotCont,LVF, LLV, LLNotVless, LVU}))) lexeme = Lexeme(u"cülûs", u"cülûs", SyntacticCategory.NOUN, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that(generated_roots, has_item(Root(u"cülus", lexeme, None, {LLCont,LVB, LLC, LLVless, LVR}))) assert_that(generated_roots, has_item(Root(u"cülûs", lexeme, None, {LLCont,LVB, LLC, LLVless, LVR}))) lexeme = Lexeme(u"Âdem", u"Âdem", SyntacticCategory.NOUN, None, None) generated_roots = CircumflexConvertingRootGenerator.generate(lexeme) assert_that(generated_roots, has_length(2)) assert_that(generated_roots, has_item(Root(u"Adem", lexeme, None, {LLCont,LVF, LLC, LLNotVless, LVU}))) assert_that(generated_roots, has_item(Root(u"Âdem", lexeme, None, {LLCont,LVF, LLC, LLNotVless, LVU})))
def setUpClass(cls): super(ParserTestWithSimpleParseSets, cls).setUpClass() all_roots = [] lexemes = LexiconLoader.load_from_file( os.path.join(os.path.dirname(__file__), "../../../../resources/master_dictionary.txt") ) for di in lexemes: all_roots.extend(CircumflexConvertingRootGenerator.generate(di)) root_map_generator = RootMapGenerator() cls.root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(cls.root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(cls.root_map) text_numeral_root_finder = TextNumeralRootFinder(cls.root_map) digit_numeral_root_finder = DigitNumeralRootFinder() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder() cls.parser = UpperCaseSupportingContextlessMorphologicalParser( suffix_graph, predefined_paths, [ word_root_finder, text_numeral_root_finder, digit_numeral_root_finder, proper_noun_from_apostrophe_root_finder, proper_noun_without_apostrophe_root_finder, ], )
from trnltk.morphology.model import formatter from trnltk.morphology.morphotactics.basicsuffixgraph import BasicSuffixGraph from trnltk.morphology.morphotactics.copulasuffixgraph import CopulaSuffixGraph from trnltk.morphology.contextless.parser.parser import logger as parser_logger, UpperCaseSupportingContextlessMorphologicalParser from trnltk.morphology.contextless.parser.rootfinder import WordRootFinder, DigitNumeralRootFinder, TextNumeralRootFinder, ProperNounFromApostropheRootFinder, ProperNounWithoutApostropheRootFinder from trnltk.morphology.contextless.parser.suffixapplier import logger as suffix_applier_logger from trnltk.morphology.morphotactics.numeralsuffixgraph import NumeralSuffixGraph from trnltk.morphology.morphotactics.predefinedpaths import PredefinedPaths from trnltk.morphology.morphotactics.propernounsuffixgraph import ProperNounSuffixGraph all_roots = [] lexemes = LexiconLoader.load_from_file('trnltk/trnltk/resources/master_dictionary.txt') for di in lexemes: all_roots.extend(CircumflexConvertingRootGenerator.generate(di)) root_map_generator = RootMapGenerator() root_map = root_map_generator.generate(all_roots) suffix_graph = CopulaSuffixGraph(NumeralSuffixGraph(ProperNounSuffixGraph(BasicSuffixGraph()))) suffix_graph.initialize() predefined_paths = PredefinedPaths(root_map, suffix_graph) predefined_paths.create_predefined_paths() word_root_finder = WordRootFinder(root_map) text_numeral_root_finder = TextNumeralRootFinder(root_map) digit_numeral_root_finder = DigitNumeralRootFinder() proper_noun_from_apostrophe_root_finder = ProperNounFromApostropheRootFinder() proper_noun_without_apostrophe_root_finder = ProperNounWithoutApostropheRootFinder()