def query(cls, text): cls.create_elasticsearch_mapping() try: cls.ES.default_indices = cls.index_name() cls.ES.indices.refresh() except pyes.exceptions.NoServerAvailable: logging.debug(" ***> ~FRNo search server available.") return [] logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text) q = MatchQuery('address', text, operator="and", type="phrase") results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text) q = MatchQuery('title', text, operator="and") results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text) q = MatchQuery('link', text, operator="and") results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) return results
def query(cls, text): cls.ES.default_indices = cls.index_name() cls.ES.indices.refresh() logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text) q = MatchQuery('address', text, operator="and", type="phrase") results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text) q = MatchQuery('title', text, operator="and") results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text) q = MatchQuery('link', text, operator="and") results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) return results
def query(cls, text): cls.ES.default_indices = cls.index_name() cls.ES.indices.refresh() logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text) q = MatchQuery('address', text, operator="and", type="phrase") print q.serialize(), cls.index_name(), cls.type_name() results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text) q = MatchQuery('title', text, operator="and") print q.serialize() results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) if not results.total: logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text) q = MatchQuery('link', text, operator="and") print q.serialize() results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5, doc_types=[cls.type_name()]) return results
def phrase_search(topics, bibleverses, start, end, ts_field='created_at_date'): conn = get_es_connection() es_settings = ESSettings() sorted_topics = [] for topic in topics: is_spam = False for topic_term in topic: filters = [] filters.append( RangeFilter(qrange=ESRange(field=ts_field, from_value=start.strftime("%Y-%m-%d"), to_value=end.strftime("%Y-%m-%d"), include_upper=False)) ) filters.append(TermsFilter(field="bibleverse", values=bibleverses)) q = MatchQuery('text', topic_term['text'], type='phrase', slop=50) q = FilteredQuery(q, ANDFilter(filters)) q = q.search(size=1) resultset = conn.search(indices=es_settings.search_index, doc_types=[es_settings.search_es_type], query=q, size=1) for r in resultset: terms = topic_term['text'].split() regex = u"(?P<phrase>[a-z\s'\u2019]*{}.*{}[a-z\s']*)".format(*terms) # print "regex",regex ma = re.search(regex, r.text.lower()) if not ma: continue topic_term['es_phrase'] = ma.group('phrase').strip() topic_term['es_score'] = r._meta.score topic_term['final_score'] = topic_term['weight'] * topic_term['es_score'] topic_term['tweet_text'] = r.text.encode('ascii', 'ignore') topic_term['bibleverse'] = r['bibleverse'] if not is_spam and has_spam_text(topic_term['es_phrase']): is_spam = True sorted_topic = sorted(topic, key=lambda x: x.get('final_score', 0.0), reverse=True) if is_spam: for topic_term in topic: topic_term['is_spam'] = True sorted_topics.append(copy.deepcopy(sorted_topic)) return sorted_topics