def find(self, lemma, query, date_range): result = '' posts = kuzuha.search(query, field='text', _filter=date_range, sort=[]) for post in posts: result = self.find_clause(lemma, cabocha.parse(post['text']), query[0]) if result: result = re_right.sub(')', result) break return result or ''
def extract575(self, text): phase = 0 count = 0 parts = [] tokens = [] yomi_count = [] features = [] for chunk in cabocha.parse(text): for token in chunk['tokens']: length = 0 feature = token.feature.split(',') if feature[0] == '記号': length = 0 elif len(feature) >= 8: length = len(re_chokuon.sub('', feature[7])) else: length = len(re_chokuon.sub('', token.surface)) yomi_count.append(length) tokens.append(token.surface) features.append(feature) if phase == 2 and sum(yomi_count) == 17 and self.is_valid575(tokens): return ''.join(tokens) elif phase == 2 and sum(yomi_count) == 14 and yomi_count[-1] == 2: if not self.is_valid_pos(feature): return None elif phase == 2 and sum(yomi_count) == 13: if not self.is_valid_pos(feature): return None elif phase == 1 and sum(yomi_count) == 12: phase = 2 elif phase == 1 and sum(yomi_count) == 7 and yomi_count[-1] == 2: if not self.is_valid_pos(feature): return None elif phase == 1 and sum(yomi_count) == 6: if not self.is_valid_pos(feature): return None elif phase == 0 and sum(yomi_count) == 5: phase = 1 elif phase == 0 and sum(yomi_count) > 5 and yomi_count: yomi_count.pop(0) tokens.pop(0) features.pop(0) elif phase == 0 and length > 5: phase = 0 yomi_count = [] tokens = [] features = []