def build_full_link_set_dictionary(in_tokenized_sentences, in_language = DEFAULT_LANGUAGE): stopped_sentences = common.stop(in_tokenized_sentences, in_language) full_link_set = link_lexemes.extract_full_link_set(stopped_sentences, True) result = collections.defaultdict(lambda: 0) for lexeme in full_link_set: result[lexeme[0]] = lexeme[1] return result
def setUp(self): raw_text = ' '.join([line.strip() for line in open(ExistenceAreasCalculationTest.INPUT_FILE)]) self.sentences = stop(tokenize_sentences(raw_text)) self.fls = make_lexemes_dict(link_lexemes.extract_full_link_set(self.sentences)) self.indexed_text = build_text_index(self.sentences, self.fls) self.lexeme_powers = {id: lexeme_associative_power(id, self.indexed_text) \ for id in self.fls}
def setUp(self): raw_text = ' '.join([line.strip() for line in open(ExistenceAreasCalculationTest.INPUT_FILE)]) self.sentences = stop(tokenize_sentences(raw_text)) self.fls = make_lexemes_dict(link_lexemes.extract_full_link_set(self.sentences)) self.indexed_text = build_text_index(self.sentences, self.fls) existence_areas = calculate_existence_areas(self.indexed_text, self.fls) self.const_phrases = extract_constant_phrases(existence_areas)
def initial_preprocessing(self, in_sentences): # just words in the right order self.fls = link_lexemes.extract_full_link_set(in_sentences, self.language, keep_positions = True) self.pos_to_lex_mapping = {record[1]:record[0] for record in self.fls} self.lex_to_pos_mapping = {record[0]:record[1] for record in self.fls} # representing sentences as lists of FLS indices, not words themselves self.indexed_text = self.build_text_index(in_sentences, self.lex_to_pos_mapping)
def initial_preprocessing(self, in_sentences): # just words in the right order self.fls = link_lexemes.extract_full_link_set(in_sentences, self.language, keep_positions=True) self.pos_to_lex_mapping = {record[1]: record[0] for record in self.fls} self.lex_to_pos_mapping = {record[0]: record[1] for record in self.fls} # representing sentences as lists of FLS indices, not words themselves self.indexed_text = self.build_text_index(in_sentences, self.lex_to_pos_mapping)
def setUp(self): raw_text = ' '.join([ line.strip() for line in open(ExistenceAreasCalculationTest.INPUT_FILE) ]) self.sentences = stop(tokenize_sentences(raw_text)) self.fls = make_lexemes_dict( link_lexemes.extract_full_link_set(self.sentences)) self.indexed_text = build_text_index(self.sentences, self.fls) self.lexeme_powers = {id: lexeme_associative_power(id, self.indexed_text) \ for id in self.fls}
def setUp(self): raw_text = ' '.join([ line.strip() for line in open(ExistenceAreasCalculationTest.INPUT_FILE) ]) self.sentences = stop(tokenize_sentences(raw_text)) self.fls = make_lexemes_dict( link_lexemes.extract_full_link_set(self.sentences)) self.indexed_text = build_text_index(self.sentences, self.fls) existence_areas = calculate_existence_areas(self.indexed_text, self.fls) self.const_phrases = extract_constant_phrases(existence_areas)
def test_extraction(self): fls = make_lexemes_dict( link_lexemes.extract_full_link_set(self.sentences)) indexed_text = build_text_index(self.sentences, fls) self.assertEqual(self.get_existence_areas(indexed_text, fls), self.answer)
def test_extraction(self): self.assertEqual( link_lexemes.extract_full_link_set(self.sentences, True), self.answer)
def test_extraction(self): fls = make_lexemes_dict(link_lexemes.extract_full_link_set(self.sentences)) indexed_text = build_text_index(self.sentences, fls) self.assertEqual(self.get_existence_areas(indexed_text, fls), self.answer)
def test_extraction(self): self.assertEqual(link_lexemes.extract_full_link_set(self.sentences, True), self.answer)