def build_full_link_set_dictionary(in_tokenized_sentences, in_language = DEFAULT_LANGUAGE):
    stopped_sentences = common.stop(in_tokenized_sentences, in_language)
    full_link_set = link_lexemes.extract_full_link_set(stopped_sentences, True)
    result = collections.defaultdict(lambda: 0)
    for lexeme in full_link_set:
        result[lexeme[0]] = lexeme[1]
    return result
 def setUp(self):
     raw_text = ' '.join([line.strip() for line in open(ExistenceAreasCalculationTest.INPUT_FILE)])
     self.sentences = stop(tokenize_sentences(raw_text))
     self.fls = make_lexemes_dict(link_lexemes.extract_full_link_set(self.sentences))
     self.indexed_text = build_text_index(self.sentences, self.fls)
     self.lexeme_powers = {id: lexeme_associative_power(id, self.indexed_text) \
         for id in self.fls}
 def setUp(self):
     raw_text = ' '.join([line.strip() for line in open(ExistenceAreasCalculationTest.INPUT_FILE)])
     self.sentences = stop(tokenize_sentences(raw_text))
     self.fls = make_lexemes_dict(link_lexemes.extract_full_link_set(self.sentences))
     self.indexed_text = build_text_index(self.sentences, self.fls)
     existence_areas = calculate_existence_areas(self.indexed_text, self.fls)
     self.const_phrases = extract_constant_phrases(existence_areas)
    def initial_preprocessing(self, in_sentences):
        # just words in the right order
        self.fls = link_lexemes.extract_full_link_set(in_sentences,
                                                      self.language,
                                                      keep_positions = True)
        self.pos_to_lex_mapping = {record[1]:record[0] for record in self.fls}
        self.lex_to_pos_mapping = {record[0]:record[1] for record in self.fls}

        # representing sentences as lists of FLS indices, not words themselves
        self.indexed_text = self.build_text_index(in_sentences, self.lex_to_pos_mapping)
    def initial_preprocessing(self, in_sentences):
        # just words in the right order
        self.fls = link_lexemes.extract_full_link_set(in_sentences,
                                                      self.language,
                                                      keep_positions=True)
        self.pos_to_lex_mapping = {record[1]: record[0] for record in self.fls}
        self.lex_to_pos_mapping = {record[0]: record[1] for record in self.fls}

        # representing sentences as lists of FLS indices, not words themselves
        self.indexed_text = self.build_text_index(in_sentences,
                                                  self.lex_to_pos_mapping)
 def setUp(self):
     raw_text = ' '.join([
         line.strip()
         for line in open(ExistenceAreasCalculationTest.INPUT_FILE)
     ])
     self.sentences = stop(tokenize_sentences(raw_text))
     self.fls = make_lexemes_dict(
         link_lexemes.extract_full_link_set(self.sentences))
     self.indexed_text = build_text_index(self.sentences, self.fls)
     self.lexeme_powers = {id: lexeme_associative_power(id, self.indexed_text) \
         for id in self.fls}
 def setUp(self):
     raw_text = ' '.join([
         line.strip()
         for line in open(ExistenceAreasCalculationTest.INPUT_FILE)
     ])
     self.sentences = stop(tokenize_sentences(raw_text))
     self.fls = make_lexemes_dict(
         link_lexemes.extract_full_link_set(self.sentences))
     self.indexed_text = build_text_index(self.sentences, self.fls)
     existence_areas = calculate_existence_areas(self.indexed_text,
                                                 self.fls)
     self.const_phrases = extract_constant_phrases(existence_areas)
 def test_extraction(self):
     fls = make_lexemes_dict(
         link_lexemes.extract_full_link_set(self.sentences))
     indexed_text = build_text_index(self.sentences, fls)
     self.assertEqual(self.get_existence_areas(indexed_text, fls),
                      self.answer)
 def test_extraction(self):
     self.assertEqual(
         link_lexemes.extract_full_link_set(self.sentences, True),
         self.answer)
 def test_extraction(self):
     fls = make_lexemes_dict(link_lexemes.extract_full_link_set(self.sentences))
     indexed_text = build_text_index(self.sentences, fls)
     self.assertEqual(self.get_existence_areas(indexed_text, fls), self.answer)
 def test_extraction(self):
     self.assertEqual(link_lexemes.extract_full_link_set(self.sentences, True), self.answer)