def process_word(self, text): t = [] for word in re.compile(self.R_WORDS).finditer(text): for particle in self.particles_generator('word'): for m in re.compile(particle['regex']).finditer(word.group()): t += create_tip_m(int(particle['category_id']), m, word.start()) return t
def process_sentences(self, text): t = [] for sentence in re.compile(self.R_SENTENCE).finditer(text): offset = sentence.start() for particle in self.particles_generator('sent'): for m in re.compile(particle['regex']).finditer( sentence.group()): t += create_tip_m(particle['category_id'], m) return t
def process(self, text): super().process(text) print("doesn db") tips = [] cursor = self.db.cursor() for word_m in m_regex.get_words_m(text): word = word_m.group().lower() print("- word: {}".format(word)) sql = "SELECT * FROM Mots WHERE fk_dictionnaires=1 AND mot=\"{}\"".format( word) word_db = cursor.execute(sql).fetchone() print(word_db) if word_db: for rule in self.rules: tips += rule(word, word_m, word_db) elif '\'' not in word: tips += create_tip_m(falcore.C_NOT_IN_DICTIONARY, word_m) return tips
def rule_word_complexity(self, word, word_m, word_db): """ In order to be considered easy, a word has to be frequent and short. """ tips = [] ponderation = float(word_db['ponderation']) is_frequent = ponderation > self.ponderation_min is_short = True if m_regex.is_short(word) else False is_long = True if m_regex.is_long(word) else False c_id = None if is_frequent and is_short: c_id = falcore.C_EASY_WORD elif is_frequent and is_long: c_id = falcore.C_LONG_WORD elif not is_frequent and is_long: c_Id = falcore.C_COMPLEX_WORD if c_id: tips += create_tip_m(c_id, word_m) return tips
def process_character(self, text): t = [] for particle in self.particles_generator('char'): for m in re.compile(particle['regex']).finditer(text): t += create_tip_m(particle['category_id'], m) return t
def rule_multisemic(self, word, word_m, word_db): if self.is_multisemic(word_db): return create_tip_m(falcore.C_MULTISEMIC_WORD, word_m) else: return []