Exemple #1
0
    def query(cls, text):
        cls.create_elasticsearch_mapping()
        try:
            cls.ES.default_indices = cls.index_name()
            cls.ES.indices.refresh()
        except pyes.exceptions.NoServerAvailable:
            logging.debug(" ***> ~FRNo search server available.")
            return []

        logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
        q = MatchQuery('address', text, operator="and", type="phrase")
        results = cls.ES.search(query=q,
                                sort="num_subscribers:desc",
                                size=5,
                                doc_types=[cls.type_name()])

        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
            q = MatchQuery('title', text, operator="and")
            results = cls.ES.search(query=q,
                                    sort="num_subscribers:desc",
                                    size=5,
                                    doc_types=[cls.type_name()])

        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
            q = MatchQuery('link', text, operator="and")
            results = cls.ES.search(query=q,
                                    sort="num_subscribers:desc",
                                    size=5,
                                    doc_types=[cls.type_name()])

        return results
Exemple #2
0
    def query(cls, text):
        cls.ES.default_indices = cls.index_name()
        cls.ES.indices.refresh()

        logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
        q = MatchQuery('address', text, operator="and", type="phrase")
        results = cls.ES.search(query=q,
                                sort="num_subscribers:desc",
                                size=5,
                                doc_types=[cls.type_name()])

        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
            q = MatchQuery('title', text, operator="and")
            results = cls.ES.search(query=q,
                                    sort="num_subscribers:desc",
                                    size=5,
                                    doc_types=[cls.type_name()])

        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
            q = MatchQuery('link', text, operator="and")
            results = cls.ES.search(query=q,
                                    sort="num_subscribers:desc",
                                    size=5,
                                    doc_types=[cls.type_name()])

        return results
Exemple #3
0
    def query(cls, text):
        cls.ES.default_indices = cls.index_name()
        cls.ES.indices.refresh()
        
        logging.info("~FGSearch ~FCfeeds~FG by address: ~SB%s" % text)
        q = MatchQuery('address', text, operator="and", type="phrase")
        print q.serialize(), cls.index_name(), cls.type_name()
        results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5,
                                doc_types=[cls.type_name()])

        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by title: ~SB%s" % text)
            q = MatchQuery('title', text, operator="and")
            print q.serialize()
            results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5,
                                    doc_types=[cls.type_name()])
            
        if not results.total:
            logging.info("~FGSearch ~FCfeeds~FG by link: ~SB%s" % text)
            q = MatchQuery('link', text, operator="and")
            print q.serialize()
            results = cls.ES.search(query=q, sort="num_subscribers:desc", size=5,
                                    doc_types=[cls.type_name()])
            
        return results
def phrase_search(topics, bibleverses, start, end, ts_field='created_at_date'):
    conn = get_es_connection()
    es_settings = ESSettings()
    sorted_topics = []

    for topic in topics:
        is_spam = False
        for topic_term in topic:
            filters = []
            filters.append(
                RangeFilter(qrange=ESRange(field=ts_field,
                                           from_value=start.strftime("%Y-%m-%d"),
                                           to_value=end.strftime("%Y-%m-%d"),
                                           include_upper=False))
            )
            filters.append(TermsFilter(field="bibleverse", values=bibleverses))
            q = MatchQuery('text', topic_term['text'], type='phrase', slop=50)
            q = FilteredQuery(q, ANDFilter(filters))
            q = q.search(size=1)
            resultset = conn.search(indices=es_settings.search_index,
                                    doc_types=[es_settings.search_es_type],
                                    query=q,
                                    size=1)

            for r in resultset:
                terms = topic_term['text'].split()
                regex = u"(?P<phrase>[a-z\s'\u2019]*{}.*{}[a-z\s']*)".format(*terms)
                # print "regex",regex
                ma = re.search(regex, r.text.lower())
                if not ma:
                    continue
                topic_term['es_phrase'] = ma.group('phrase').strip()
                topic_term['es_score'] = r._meta.score
                topic_term['final_score'] = topic_term['weight'] * topic_term['es_score']
                topic_term['tweet_text'] = r.text.encode('ascii', 'ignore')
                topic_term['bibleverse'] = r['bibleverse']

                if not is_spam and has_spam_text(topic_term['es_phrase']):
                    is_spam = True

        sorted_topic = sorted(topic,
                              key=lambda x: x.get('final_score', 0.0),
                              reverse=True)
        if is_spam:
            for topic_term in topic:
                topic_term['is_spam'] = True
        sorted_topics.append(copy.deepcopy(sorted_topic))
    return sorted_topics