Example #1
0
    def search(self, args, es_client=client):
        search = Search(using=es_client, index=SearchableEvent.meta.index)

        if args.get('name'):
            search = search.query('fuzzy', name=args['name'])
            search = search.highlight('name')

        if args.get('description'):
            search = search.query('match', description=args['description'])
            search = search.highlight('description')

        if args.get('location-name'):
            search = search.query('fuzzy', location_name=args['location_name'])
            search = search.highlight('location_name')

        if args.get('organizer-name'):
            search = search.query(
                'fuzzy', organizer_name=args['organizer_name'])
            search = search.highlight('organizer_name')

        if args.get('organizer-description'):
            search = search.query(
                'fuzzy', organizer_description=args['organizer_description'])
            search = search.highlight('organizer_description')

        return [to_dict(r) for r in search.execute()]
Example #2
0
        def get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:20].query('multi_match',
                                               query=text,
                                               fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''], 'qb': [''], 'guess': ''}
            else:
                guessForEvidence = request.form['guessForEvidence']
                guessForEvidence = guessForEvidence.split(
                    "style=\"color:blue\">")[1].split("</a>")[0].lower()

                guess = None
                for index, item in enumerate(results):
                    if item.page.lower().replace(
                            "_", " ")[0:25] == guessForEvidence:
                        guess = results[index]
                        break
                if guess == None:
                    print("expanding search")
                    s = Search(index='qb')[0:80].query(
                        'multi_match',
                        query=text,
                        fields=[wiki_field, qb_field])
                    s = s.highlight(wiki_field).highlight(qb_field)
                    results = list(s.execute())
                    for index, item in enumerate(results):
                        if item.page.lower().replace(
                                "_", " ")[0:25] == guessForEvidence:
                            guess = results[index]
                            break
                    if guess == None:
                        highlights = {'wiki': [''], 'qb': [''], 'guess': ''}
                        return jsonify(highlights)

                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {
                    'wiki': wiki_content,
                    'qb': qb_content,
                    'guess': guess.page
                }
            return jsonify(highlights)
Example #3
0
        def interface_get_highlights():
            wiki_field = "wiki_content"
            qb_field = "qb_content"
            text = request.form["text"]
            s = Search(index="qb")[0:20].query("multi_match",
                                               query=text,
                                               fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {"wiki": [""], "qb": [""], "guess": ""}
            else:
                guessForEvidence = request.form["guessForEvidence"]
                guessForEvidence = (guessForEvidence.split(
                    'style="color:blue">')[1].split("</a>")[0].lower())

                guess = None
                for index, item in enumerate(results):
                    if item.page.lower().replace(
                            "_", " ")[0:25] == guessForEvidence:
                        guess = results[index]
                        break
                if guess == None:
                    print("expanding search")
                    s = Search(index="qb")[0:80].query(
                        "multi_match",
                        query=text,
                        fields=[wiki_field, qb_field])
                    s = s.highlight(wiki_field).highlight(qb_field)
                    results = list(s.execute())
                    for index, item in enumerate(results):
                        if (item.page.lower().replace(
                                "_", " ")[0:25] == guessForEvidence):
                            guess = results[index]
                            break
                    if guess == None:
                        highlights = {"wiki": [""], "qb": [""], "guess": ""}
                        return jsonify(highlights)

                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = [""]

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = [""]

                highlights = {
                    "wiki": wiki_content,
                    "qb": qb_content,
                    "guess": guess.page,
                }
            return jsonify(highlights)
Example #4
0
    def search_close(self, origin_timestamp, channel, qterm, number_results):
        """
        Find log entries close to origin timestamp, filter by channel, highlight qterm and return them sorted by date.

        :param origin_timestamp: origin timestamp to find logs around
        :param channel: Channel to be filtered
        :param qterm: Term to be highlighted
        :param number_results: how many results
        :return: List of sorted log entries (Elastic-search response)
        :rtype: ``list``
        """
        # Prepare query
        s = DslSearch(using=self._es, index=self._index_prefix.format('*'))

        # Function score
        main_query_boosting = 1e-15  # only used for highlighting, not for scoring -> give very low signifance
        pos = MatchPhrase(msg={'query': qterm, 'boost': main_query_boosting}) | \
              Match(**{'username': {'query': qterm, 'boost': main_query_boosting}}) | \
              Match(channel={'query': qterm, 'boost': main_query_boosting}) | \
              Match(msg={'query': qterm, 'boost': main_query_boosting})
        main_query = (pos | Q('match_all'))

        function_score_query = Q('function_score',
                                 query=main_query,
                                 functions=[
                                     SF(
                                         'exp', **{
                                             '@timestamp': {
                                                 "origin": origin_timestamp,
                                                 "scale": "1m",
                                                 "decay": 0.999
                                             }
                                         })
                                 ])

        s = s.query(function_score_query)

        # filter channel
        s = s.filter('term', **{'channel.keyword': channel})

        # Number of results
        s = s[0:number_results]

        # Highlight
        s = s.highlight_options(order='score')
        s = s.highlight('msg', number_of_fragments=0)
        s = s.highlight('username')
        s = s.highlight('channel')

        # Execute
        response = s.execute()

        # Sort results
        response_sorted = sorted(response, key=lambda hit: hit['@timestamp'])

        return response_sorted
Example #5
0
        def interface_get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:20].query(
                'multi_match', query=text, fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''],
                              'qb': [''],
                              'guess': ''}
            else:
                guessForEvidence = request.form['guessForEvidence']
                guessForEvidence = guessForEvidence.split("style=\"color:blue\">")[1].split("</a>")[0].lower()

                guess = None
                for index, item in enumerate(results):
                    if item.page.lower().replace("_", " ")[0:25]  == guessForEvidence:
                        guess = results[index]
                        break
                if guess == None:
                    print("expanding search")
                    s = Search(index='qb')[0:80].query(
                        'multi_match', query=text, fields=[wiki_field, qb_field])
                    s = s.highlight(wiki_field).highlight(qb_field)
                    results = list(s.execute()) 
                    for index, item in enumerate(results):
                        if item.page.lower().replace("_", " ")[0:25]  == guessForEvidence:
                            guess = results[index]
                            break
                    if guess == None:
                        highlights = {'wiki': [''],
                                  'qb': [''],
                                  'guess': ''}
                        return jsonify(highlights)
 
                _highlights = guess.meta.highlight 
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {'wiki': wiki_content,
                              'qb': qb_content,
                              'guess': guess.page}
            return jsonify(highlights)
Example #6
0
    def run(self,
            key: Union[int, slice] = slice(0,
                                           settings.SEARCH_RESULTS_PER_PAGE)):
        """Perform search, placing the results in `self.results`, and the total
        number of results (across all pages) in `self.total`. Chainable."""

        search = DSLSearch(
            using=es7_client(),
            index=self.get_index()).params(**settings.ES7_SEARCH_PARAMS)

        # add the search class' filter
        search = search.query(self.get_filter())
        # add highlights for the search class' highlight_fields
        for highlight_field, options in self.get_highlight_fields_options():
            search = search.highlight(highlight_field, **options)
        # slice search
        search = search[key]

        # perform search
        self.hits = search.execute().hits
        self.last_key = key

        self.total = self.hits.total.value
        self.results = [self.make_result(hit) for hit in self.hits]

        return self
Example #7
0
File: hook.py Project: NPSDC/qb
    def get_highlights(self, text):
        # query top 10 guesses
        s = Search(index="qb_ir_instance_of")[0:10].query(
            "multi_match",
            query=text,
            fields=["wiki_content", "qb_content", "source_content"],
        )
        s = s.highlight("qb_content").highlight("wiki_content")
        results = list(s.execute())
        guess = results[0]  # take the best answer
        _highlights = guess.meta.highlight

        try:
            wiki_content = list(_highlights.wiki_content)
        except AttributeError:
            wiki_content = None

        try:
            qb_content = list(_highlights.qb_content)
        except AttributeError:
            qb_content = None

        highlights = {
            "wiki": wiki_content,
            "qb": qb_content,
            "guess": guess.page
        }
        return highlights
	def GetAuditDataMain(self, data):
		s = Search()
		s = s[0:1000]
		s = s.highlight('*')
		s = s.highlight_options(require_field_match=False)
		t = Q('query_string', query=data) & ~Q('query_string', default_field="AuditType.Generator", query="stateagentinspector") & ~Q('query_string', default_field="AuditType.Generator", query="w32processes-tree")

		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []

		try:
			for x in r.json()['hits']['hits']:
				for y, v in x['highlight'].iteritems():
					data.append({
							"doc_id": x['_id'],
							"endpoint": x['_parent'],
							"audittype": x['_source']['AuditType']['Generator'],
							"field": y,
							"response": v
						})
		except KeyError:
			pass

		return data
Example #9
0
        def get_highlights():
            wiki_field = "wiki_content"
            qb_field = "qb_content"
            text = request.form["text"]
            s = Search(index="qb")[0:10].query("multi_match",
                                               query=text,
                                               fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {"wiki": [""], "qb": [""], "guess": ""}
            else:
                guess = results[0]  # take the best answer
                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = [""]

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = [""]

                highlights = {
                    "wiki": wiki_content,
                    "qb": qb_content,
                    "guess": guess.page,
                }
            return jsonify(highlights)
Example #10
0
def get_highlights(text):
    # query top 10 guesses
    s = Search(index='qb_ir_instance_of')[0:10].query('multi_match', query=text,
            fields=['wiki_content', 'qb_content', 'source_content'])
    s = s.highlight('qb_content').highlight('wiki_content')
    results = list(s.execute())
    if len(results) == 0:
        highlights = {'wiki': [''],
                      'qb': [''],
                      'guess': ''}
        return highlights

    guess = results[0] # take the best answer
    _highlights = guess.meta.highlight 

    try:
        wiki_content = list(_highlights.wiki_content)
    except AttributeError:
        wiki_content = ['']

    try:
        qb_content = list(_highlights.qb_content)
    except AttributeError:
        qb_content = ['']

    highlights = {'wiki': wiki_content,
                  'qb': qb_content,
                  'guess': guess.page}
    return highlights
Example #11
0
def get_highlights(text):
    # query top 10 guesses
    s = Search(index="qb_0")[0:10].query(
        "multi_match",
        query=text,
        fields=["wiki_content", "qb_content", "source_content"],
    )
    s = s.highlight("qb_content").highlight("wiki_content")
    results = list(s.execute())
    if len(results) == 0:
        highlights = {"wiki": [""], "qb": [""], "guess": ""}
        return highlights

    guess = results[0]  # take the best answer
    _highlights = guess.meta.highlight

    try:
        wiki_content = list(_highlights.wiki_content)
    except AttributeError:
        wiki_content = [""]

    try:
        qb_content = list(_highlights.qb_content)
    except AttributeError:
        qb_content = [""]

    highlights = {"wiki": wiki_content, "qb": qb_content, "guess": guess.page}
    return highlights
Example #12
0
        def get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:10].query('multi_match',
                                               query=text,
                                               fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''], 'qb': [''], 'guess': ''}
            else:
                guess = results[0]  # take the best answer
                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {
                    'wiki': wiki_content,
                    'qb': qb_content,
                    'guess': guess.page
                }
            return jsonify(highlights)
Example #13
0
    def run(self, query, page=1, default_operator="AND", **kwargs):
        """Perform search, placing the results in `self.results`, and the total
        number of results (across all pages) in `self.total`. Chainable."""

        search = DSLSearch(using=es7_client(), index=self.get_index()).params(
            **settings.ES7_SEARCH_PARAMS
        )

        # add the search class' filter
        search = search.query(
            self.get_filter(query=query, default_operator=default_operator, **kwargs)
        )
        # add highlights for the search class' highlight_fields
        search = search.highlight(*self.get_highlight_fields(), **self.get_highlight_options())

        # do pagination
        start = (page - 1) * self.results_per_page
        search = search[start : start + self.results_per_page]

        # perform search
        self.hits = search.execute().hits

        self.total = self.hits.total.value if self.hits else 0
        self.results = [self.make_result(hit) for hit in self.hits]

        return self
Example #14
0
def get_highlights(text):
    # query top 10 guesses
    s = Search(index='qb_ir_instance_of')[0:10].query(
        'multi_match',
        query=text,
        fields=['wiki_content', 'qb_content', 'source_content'])
    s = s.highlight('qb_content').highlight('wiki_content')
    results = list(s.execute())
    if len(results) == 0:
        highlights = {'wiki': [''], 'qb': [''], 'guess': ''}
        return highlights

    guess = results[0]  # take the best answer
    _highlights = guess.meta.highlight

    try:
        wiki_content = list(_highlights.wiki_content)
    except AttributeError:
        wiki_content = ['']

    try:
        qb_content = list(_highlights.qb_content)
    except AttributeError:
        qb_content = ['']

    highlights = {'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page}
    return highlights
Example #15
0
        def get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:10].query(
                'multi_match', query=text, fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''],
                              'qb': [''],
                              'guess': ''}
            else:
                guess = results[0] # take the best answer
                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {'wiki': wiki_content,
                              'qb': qb_content,
                              'guess': guess.page}
            return jsonify(highlights)
Example #16
0
    def get(self, request, *args, **kwargs):
        query = self.request.query_params.get('query')
        country = self.request.query_params.get('country')
        points = self.request.query_params.get('points')

        search = Search(index=constants.ES_INDEX)
        q = {'should': [], 'filter': []}

        if query:
            q['should'] = [
                Match(variety={
                    'query': query,
                    'boost': 3.0
                }),
                Match(winery={
                    'query': query,
                    'boost': 2.0
                }),
                Match(description={
                    'query': query,
                    'boost': 1.0
                })
            ]
            q['minimum_should_match'] = 1

            search = search.highlight_options(number_of_fragments=0,
                                              pre_tags=['<mark>'],
                                              post_tags=['</mark>'])
            search = search.highlight('variety', 'winery', 'description')

        if country:
            q['filter'].append(Term(country=country))
        if points:
            q['filter'].append(Term(points=points))

        response = search.query('bool', **q).params(size=100).execute()

        if response.hits.total.value > 0:
            return Response(data=[{
                'id':
                hit.meta.id,
                'country':
                hit.country,
                'description':
                (hit.meta.highlight.description[0] if 'highlight' in hit.meta
                 and 'description' in hit.meta.highlight else hit.description),
                'points':
                hit.points,
                'price':
                hit.price,
                'variety': (
                    hit.meta.highlight.variety[0] if 'highlight' in hit.meta
                    and 'variety' in hit.meta.highlight else hit.variety),
                'winery': (
                    hit.meta.highlight.winery[0] if 'highlight' in hit.meta
                    and 'winery' in hit.meta.highlight else hit.winery)
            } for hit in response])
        else:
            return Response(data=[])
Example #17
0
async def fetchArticlBody(*,
                          projectName: str = Path(...),
                          urlItem: str,
                          word: str):
    # 查询 项目数据库,articles 表中数据

    # projectName 转 projectId
    projectId = await findProjectIdFromProjectName(
        dbPrefix,
        'Project',
        queryDict={'projectName': projectName},
        showDict={'_id': 1})
    if not projectId:
        raise HTTPException(status_code=503, detail='projectNotExist')

    # 页码起始
    start = 0
    end = 0
    # 带搜索的 es索引 (等价于 mongo中的 数据库)
    _index = f'kwm-{projectId}.articles'.lower()
    #print('_index', _index)

    s = Search()

    q1 = Q("match_phrase", url=f"\"{urlItem}\"")  # url 匹配
    q2 = Q('match_phrase', body=f"\"{word}\"")  # word 匹配
    s = s.query(q1)
    s = s.query(q2)
    s = s.source(includes=[''])  # 不返回输出
    s = s.highlight_options(order='score')
    s = s.highlight_options(
        pre_tags="<strong style=\"background: yellow;color: red\">")
    s = s.highlight_options(post_tags="</strong>")
    s = s.highlight_options(fragment_size=300)  #
    s = s.highlight('body')
    s = s[0:10000]

    # common setting
    #print(s.to_dict())

    # 执行
    response = await esRun(s.to_dict(), _index)  #s.execute(ignore_cache=True)
    #totalCount = response.hits.total.value
    temp = response.to_dict()['hits']['hits']
    result = []
    for item in temp:
        tt = {'_id': {'$oid': item['_id']}}
        tt.update(item['_source'])
        if item.get('highlight'):
            tt.update({'highlight': item['highlight']})
        if start >= 0 and end > 0:
            tt.update({'id': start + 1})
        result.append(tt)
        start = start + 1
    return (result)
Example #18
0
    def portalSearch(expression, start=0, end=25):
        client = Elasticsearch()
        ret = {'nodes': [], 'Counts': {}}
        q = Q("bool", must=[Q('match', _all=expression)])
        s = Search(using=client,
                   index="neo4j-inquisite-node",
                   doc_type="Repository,Data").query(q)
        q_total = s.count()
        s = s[0:q_total]
        s = s.highlight_options(require_field_match=False)
        s = s.highlight('*', fragment_size=45)
        res = s.execute()
        data = {}
        uuids = []
        pub_uuids = {}
        if res:
            for r in res:
                d = r.to_dict()
                if r.meta.doc_type == 'Repository':
                    if int(d['published']) == 0:
                        continue
                    repo_id = r.meta.id
                    ret['nodes'].append({
                        "id": r.meta.id,
                        "type": "Repository",
                        "name": d['name'],
                        "description": d['readme']
                    })
                    repo_uuids = SearchManager._getDataUUIDsForRepo(repo_id)
                    pub_uuids[repo_id] = repo_uuids
                else:
                    hits = []
                    highs = r.meta.highlight.to_dict()
                    for high_field, high_value in highs.items():
                        hits.append({high_field: high_value})
                    data[r.meta.id] = {'id': r.meta.id, "hits": hits}
                    uuids.append(r.meta.id)
            qString = "MATCH (r:Repository)--(t:SchemaType)--(d:Data) WHERE d.uuid IN {uuids} AND r.published = '1' RETURN d.uuid as uuid, r.name as repo_name, r.uuid as repo_id"
            pub_data = db.run(qString, {"uuids": uuids})
            data_max = 0
            for checked in pub_data:
                if data_max >= 32:
                    break
                ret['nodes'].append({
                    "id": checked['uuid'],
                    "type": "Data",
                    "repo_id": checked['repo_id'],
                    "repo_name": checked['repo_name'],
                    "hits": data[checked['uuid']]['hits']
                })
                data_max += 1

            return ret
        else:
            return ret
Example #19
0
def es_search(query_string, branch, ops_type, limit, fuzzy):
    s = Search(using=elasticsearch(), index=(branch), doc_type=ops_type)
    s = s[0:int(limit)]
    if fuzzy:
        q = Q('multi_match', query=query_string, fields=['label^4', 'title^3', 'prefLabel^4', 'identifier', 'description', 'altLabel^2', 'Synonym', 'Definition', 'shortName', 'mnemonic', 'disease_class'], fuzziness="AUTO", prefix_length=5, type='best_fields', tie_breaker=0.3)
    else:
        q = Q('multi_match', query=query_string, fields=['label^4', 'title^3', 'prefLabel^4', 'identifier', 'description', 'altLabel^2', 'Synonym', 'Definition', 'shortName', 'mnemonic', 'disease_class'], fuzziness=0, type='best_fields', tie_breaker=0.3)
    s = s.highlight('label', 'title', 'identifier', 'description', 'prefLabel', 'altLabel', 'Synonym', 'Definition', 'shortName', 'mnemonic', 'disease_class')
    s = s.query(q)
    es_response = s.execute()
    return es_response.to_dict()
Example #20
0
def search_content(keyword, limit=50):
    client = Elasticsearch()
    q = Q("multi_match", query=keyword, fields=['title', 'content'])
    s = Search(using=client)
    # s = Search(using=client, index="pet-index").query("match", content="金毛")
    s = Search(using=client, index="pet-index").query(q)
    s = s[0:limit]
    s = s.highlight_options(order='score')
    s = s.highlight('content')
    response = s.execute()
    return response
def match_phrase_in_text(phrase):
    s = Search(using=client, index="sample_film_index")
    q = Q('match_phrase', text=phrase)
    s = s.query(q)
    s = s.highlight_options(pre_tags='<mark>', post_tags='</mark>')  # for html
    s = s.highlight('text', fragment_size=999999999, number_of_fragments=1)
    response = s.execute()
    print "Num hits for", phrase, len(response.to_dict()['hits']['hits'])
    for hit in response:
        print hit.meta.score  #doc score
        print hit.meta.highlight  #highlighted snippet
Example #22
0
def test6_highlight():
    '''
    高亮显示
    :return:
    '''
    s = Search(using=client, index='test-index')
    s = s.query('match', sport='足球')
    s = s.highlight('sport')  # 高亮字段
    response = s.execute()
    for hit in response:
        for h_result in hit.meta.highlight.sport:  # 获得高亮结果
            print h_result
Example #23
0
    def run(self, query, page=1, default_operator="AND"):
        """Perform search, placing the results in `self.results`, and the total
        number of results (across all pages) in `self.total`. Chainable."""

        # Default to a dfs query
        search = DSLSearch(
            using=es7_client(),
            index=self.get_index()).params(search_type="dfs_query_then_fetch")

        # add the search class' filter
        search = search.query("bool", filter=self.get_filter())

        # add query, search over the search class' fields
        search = search.query(
            "simple_query_string",
            query=query,
            default_operator=default_operator,
            fields=self.get_fields(),
            # everything apart from WHITESPACE as that interferes with char mappings
            # and synonyms with whitespace in them by breaking up the phrase into tokens,
            # before they have a chance to go through the filter:
            flags="AND|ESCAPE|FUZZY|NEAR|NOT|OR|PHRASE|PRECEDENCE|PREFIX|SLOP",
        )

        # add highlights for the search class' highlight_fields
        search = search.highlight(
            *self.get_highlight_fields(),
            type="fvh",
            # order highlighted fragments by their relevance:
            order="score",
            # only get one fragment per field:
            number_of_fragments=1,
            # split fragments at the end of sentences:
            boundary_scanner="sentence",
            # return fragments roughly this size:
            fragment_size=SNIPPET_LENGTH,
            # add these tags before/after the highlighted sections:
            pre_tags=[f"<{HIGHLIGHT_TAG}>"],
            post_tags=[f"</{HIGHLIGHT_TAG}>"],
        )

        # do pagination
        start = (page - 1) * self.results_per_page
        search = search[start:start + self.results_per_page]

        # perform search
        self.hits = search.execute().hits

        self.total = self.hits.total.value if self.hits else 0
        self.results = [self.make_result(hit) for hit in self.hits]

        return self
Example #24
0
    def doSearch(self, body):
        try:
            client = connections.create_connection(hosts=[settings.ES_URL])
            s = Search(using=client,
                       index=settings.ES_INDEX_NAME,
                       doc_type=settings.ES_INDEX_TYPE)
            s = Search.from_dict(body)
            s = s.index(settings.ES_INDEX_NAME)
            s = s.doc_type(settings.ES_INDEX_TYPE)

            # hightlight the following fields in the search result
            s = s.highlight('title')
            s = s.highlight('description')
            s = s.highlight('data_time')
            s = s.highlight('source')

            body = s.to_dict()
            response = s.execute()
        except Exception:
            return None

        return response
Example #25
0
    def click3(self):                          ### -   ELASTIC SEARCH
        try:                         # search from the elastic search fully function 
            client = Elasticsearch()
            res = es.search(index="movies", body={})
            sample = res['hits']['hits']
         

            s = Search(using=client, index="movies")
            #print(s)
            getText = self.textSearch.toPlainText()
            q = Q('match', title=getText)
            s = s.query(q)
            s = s.highlight('text', fragment_size=20)
            response = s.execute()
            allTogether = ''
            
            for hit in response.hits.hits:
                allTogether = allTogether + "\n" + hit._source.title + " ----> By" + hit._source.cast
                print('FROM FILE 1 >>>>')
            self.textOut.setText(allTogether)
            
            res = es.search(index="imdb", body={})
            sample = res['hits']['hits']
            

            s = Search(using=client, index="imdb")
            print('From FILE 2 ->>>>')
            getText = self.textSearch.toPlainText()
            q = Q('match', title=getText)
            s = s.query(q)
            s = s.highlight('text', fragment_size=20)
            response = s.execute()
            allTogether = ''
            for hit in response.hits.hits:
                allTogether = allTogether + "\n" + hit._source.title + "->>> By " + hit._source.country
                print('FROM FILE 2 >>>>')
            self.textOut.setText(allTogether)
        except NotFoundError:
            print('error not found')
def free_search_in_title(word):
    s = Search(using=client, index="sample_film_index")
    # Q is a shortcut for constructing a query object
    q = Q('match', title=word)
    # At some point, q has to be added to the search object.
    s = s.query(q)
    s = s.highlight_options(pre_tags='<mark>', post_tags='</mark>') # for html
    s = s.highlight('title', word, fragment_size=999999999, number_of_fragments=1)
    response = s.execute()
    print "Num hits for", word, len(response.to_dict()['hits']['hits'])
    for hit in response:
        print hit.meta.score #doc score
        print hit.meta.highlight #highlighted snippet
Example #27
0
    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request, _(u'Impossible de se connecter à Elasticsearch'))
            return []

        if self.search_query:

            # find forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data['models']

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP['search']['search_groups']:
                        models.append(settings.ZDS_APP['search']['search_groups'][group][1])
            else:
                models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].iteritems()]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # weighting:
            weight_functions = []
            for _type, weights in settings.ZDS_APP['search']['boosts'].items():
                if _type in models:
                    weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']})

            scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]'])
            search_queryset = search_queryset.highlight('text').highlight('text_html')

            # executing:
            return self.index_manager.setup_search(search_queryset)

        return []
Example #28
0
    def run(self, query, page=1, default_operator="AND"):
        """Perform search, placing the results in `self.results`, and the total
        number of results (across all pages) in `self.total`. Chainable."""

        # Default to a dfs query
        search = DSLSearch(
            using=es7_client(),
            index=self.get_index()).params(search_type="dfs_query_then_fetch")

        # add the search class' filter
        search = search.query("bool", filter=self.get_filter())

        # add query, search over the search class' fields
        search = search.query(
            "simple_query_string",
            query=query,
            default_operator=default_operator,
            fields=self.get_fields(),
        )

        # add highlights for the search class' highlight_fields
        search = search.highlight(
            *self.get_highlight_fields(),
            type="fvh",
            # order highlighted fragments by their relevance:
            order="score",
            # only get one fragment per field:
            number_of_fragments=1,
            # split fragments at the end of sentences:
            boundary_scanner="sentence",
            # return fragments roughly this size:
            fragment_size=SNIPPET_LENGTH,
            # add these tags before/after the highlighted sections:
            pre_tags=[f"<{HIGHLIGHT_TAG}>"],
            post_tags=[f"</{HIGHLIGHT_TAG}>"],
        )

        # do pagination
        start = (page - 1) * self.results_per_page
        search = search[start:start + self.results_per_page]

        # perform search
        self.hits = search.execute().hits

        self.total = self.hits.total.value if self.hits else 0
        self.results = [self.make_result(hit) for hit in self.hits]

        return self
Example #29
0
def find(query, company_id, proposal_id):
    client = get_client()
    index = current_app.config["ES_IMPORT_INDEX"]
    s = Search(using=client, index=index)
    s = s.filter("term", company_id=company_id)
    # s = s.filter(~Q("term", proposal_id=proposal_id))
    # Weighting title more than the content since a user writing an exact title
    # should yield that section rather than the same query in a content
    s = s.query(Q("multi_match", query=query, fields=["title^4", "content"]))
    s = s.highlight_options(order="score",
                            pre_tags=["<span class='search-highlight'>"],
                            post_tags=["</span>"])
    s = s.highlight("title", "content")
    # Only get the first 20 results
    response = s[:20].execute()
    return response.hits
Example #30
0
    def GetAuditDataMain(self, data):
        s = Search()
        s = s[0:1000]
        s = s.highlight('*')
        s = s.highlight_options(require_field_match=False)
        t = Q('query_string',
              query=data) & ~Q('query_string',
                               default_field="AuditType.Generator",
                               query="stateagentinspector") & ~Q(
                                   'query_string',
                                   default_field="AuditType.Generator",
                                   query="w32processes-tree")

        query = s.query(t)

        try:
            r = requests.post(self.es_host + ":" + self.es_port + self.index +
                              self.type_audit_type + '/_search',
                              data=json.dumps(query.to_dict()),
                              auth=(self.elastic_user, self.elastic_pass),
                              verify=False)
        except ConnectionError as e:
            ret = {"connection_error": e.args[0]}
            return ret

        data = []

        try:
            for x in r.json()['hits']['hits']:
                for y, v in x['highlight'].iteritems():
                    data.append({
                        "doc_id":
                        x['_id'],
                        "endpoint":
                        x['_parent'],
                        "audittype":
                        x['_source']['AuditType']['Generator'],
                        "field":
                        y,
                        "response":
                        v
                    })
        except KeyError:
            pass

        return data
Example #31
0
    async def get(self):
        """Get the results from Elasticsearch."""
        q = self.request.query.get("q")
        if not q:
            return web.json_response([])

        es = Elasticsearch(
            hosts=[self.request.app["settings"].ELASTICSEARCH_URL],
            timeout=ELASTICSEARCH_TIMEOUT,
            verify_certs=ELASTICSEARCH_VERIFY_CERTS,
        )
        mapping = es.indices.get_mapping(ELASTICSEARCH_INDEX,
                                         include_type_name=True)
        search = Search(index=ELASTICSEARCH_INDEX, using=es)
        search = search.highlight_options(
            pre_tags=[PRE_HIGHLIGHT_TAG],
            post_tags=[POST_HIGHLIGHT_TAG],
        )
        query = self.queries(mapping, q)
        search = search.query(query)
        highlights = self.build_highlight(
            mapping[ELASTICSEARCH_INDEX]["mappings"]["_doc"]["properties"])

        for highlight in highlights:
            search = search.highlight(highlight, type="plain")

        search = search.extra(
            from_=0,
            size=MAX_RESULTS,
        )

        values = []
        for hit in search.execute():
            hit._d_.pop(META, None)
            if HIGHLIGHT and hasattr(hit.meta, "highlight"):
                highlight = hit.meta.highlight
                query = DictQuery(hit._d_)
                for key in highlight:
                    path = key.split(".")[:-1]
                    value = highlight[key][0]
                    query.set("/".join(path), value)
                values.append(query)
            else:
                values.append(hit._d_)
        return web.json_response(values)
Example #32
0
    def portalSearch(expression, start=0, end=25):
        client = Elasticsearch()
        ret = {'nodes': [], 'Counts': {}}
        q = Q("bool", must=[Q('match', _all=expression)])
        s = Search(using=client, index="neo4j-inquisite-node", doc_type="Repository,Data").query(q)
        q_total = s.count()
        s = s[0:q_total]
        s = s.highlight_options(require_field_match=False)
        s = s.highlight('*', fragment_size=45)
        res = s.execute()
        data = {}
        uuids = []
        pub_uuids = {}
        if res:
            for r in res:
                d = r.to_dict()
                if r.meta.doc_type == 'Repository':
                    if int(d['published']) == 0:
                        continue
                    repo_id = r.meta.id
                    ret['nodes'].append({"id": r.meta.id, "type": "Repository", "name": d['name'], "description": d['readme']})
                    repo_uuids = SearchManager._getDataUUIDsForRepo(repo_id)
                    pub_uuids[repo_id] = repo_uuids
                else:
                    hits = []
                    highs = r.meta.highlight.to_dict()
                    for high_field,high_value in highs.items():
                        hits.append({high_field: high_value})
                    data[r.meta.id] = {'id': r.meta.id, "hits": hits}
                    uuids.append(r.meta.id)
            qString = "MATCH (r:Repository)--(t:SchemaType)--(d:Data) WHERE d.uuid IN {uuids} AND r.published = '1' RETURN d.uuid as uuid, r.name as repo_name, r.uuid as repo_id"
            pub_data = db.run(qString, {"uuids": uuids})
            data_max = 0
            for checked in pub_data:
                if data_max >= 32:
                    break;
                ret['nodes'].append({"id": checked['uuid'], "type": "Data", "repo_id": checked['repo_id'], "repo_name": checked['repo_name'], "hits": data[checked['uuid']]['hits']})
                data_max += 1

            return ret
        else:
            return ret
Example #33
0
    def search_by_keywords(self, keywords, subject):
        search = Search(using=self.es, index='arxiv-index')
        query_content = Q()
        keywords = re.sub('[^A-Za-z0-9 ]+', '', keywords).lower()
        for keyword in keywords.split(' '):
            query_content = query_content + \
                (Q('wildcard', pdf='*' + keyword + '*') | \
                 Q('wildcard', abstract='*' + keyword + '*') | \
                 Q('wildcard', authors='*' + keyword + '*'))
        query_subject = Q()
        query_other = Q()
        if subject and subject != 'all':
            query_subject = Q('wildcard', subject='*' + subject + '.*')
            query_other = Q('wildcard', other_subjects='*' + subject + '.*')
        final_query = Q('bool',
                        must=[query_content],
                        should=[query_subject, query_other],
                        minimum_should_match=1)
        search = search.query(final_query)
        search = search.source([
            'title', 'authors', 'subject', 'other_subjects', 'abstract',
            'abstract_url', 'pdf_url', 'submit_date'
        ])
        search = search.highlight_options(order='score')
        search = search.highlight('abstract', fragment_size=400)

        total = search.count()
        search = search[0:total]
        search = self._extend_query(search, keywords)
        request = search.execute()

        for hit in request:
            response = hit.to_dict()
            if 'highlight' in hit.meta:
                response.update({'fragment': hit.meta.highlight.abstract})
            else:
                response.update({'fragment': []})
            yield response
Example #34
0
def get_second_best_wiki_words(question):
    text = question.flatten_text()
    # query top 10 guesses
    s = Search(index="qb_ir_instance_of")[0:10].query(
        "multi_match",
        query=text,
        fields=["wiki_content", "qb_content", "source_content"],
    )
    s = s.highlight("qb_content").highlight("wiki_content")
    results = list(s.execute())
    guess = results[1]  # take the second best answer
    _highlights = guess.meta.highlight

    try:
        wiki_content = list(_highlights.wiki_content)
    except AttributeError:
        wiki_content = None

    try:
        qb_content = list(_highlights.qb_content)
    except AttributeError:
        qb_content = None

    words = {}
    if wiki_content is None:
        words["wiki"] = None
    else:
        words["wiki"] = itertools.chain(
            *[re.findall("<em>(.*?)</em>", x) for x in list(wiki_content)])

    if qb_content is None:
        words["qb"] = None
    else:
        words["qb"] = itertools.chain(
            *[re.findall("<em>(.*?)</em>", x) for x in list(qb_content)])

    return words
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        config = get_configuration()
        timeout = getattr(config, 'request_timeout', 20)
        search_fields = getattr(config, 'search_fields', None)
        if not search_fields:
            search_fields = SEARCH_FIELDS
        search_fields = search_fields.split()
        logger.info(search_fields)
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        es = get_query_client()
        search = Search(using=es, index=index_name())
        search = search.params(request_timeout=timeout)
        search = search.sort('rid', '_id')
        search = search.source(include='rid')
        query_string = record.keys[0].decode('utf8')
        logger.info(query_string)
        if '*' in query_string:
            query_string = query_string.replace('*', ' ')
        query_string = query_string.strip()
        search = search.query('simple_query_string',
                              query=query_string,
                              fields=search_fields)
        results_count = search.count()
        search = search.params(request_timeout=timeout,
                               size=BATCH_SIZE,
                               track_scores=True)
        # setup highlighting
        for field in search_fields:
            name = field.split('^')[0]
            if name == 'title':
                # title shows up in results anyway
                continue
            search = search.highlight(name, fragment_size=FRAGMENT_SIZE)

        # initial return value, other batches to be applied
        retval = IIBTree()
        highlights = OOBTree()
        last_seen = None
        count = 0
        batch_count = results_count / BATCH_SIZE
        if results_count % BATCH_SIZE != 0:
            batch_count = batch_count + 1
        for i in xrange(batch_count):
            if last_seen is not None:
                search = search.update_from_dict({'search_after': last_seen})
            try:
                results = search.execute(ignore_cache=True)
            except TransportError:
                # No es client, return empty results
                logger.exception('ElasticSearch client not available.')
                return IIBTree(), (self.id, )

            for r in results:
                rid = getattr(r, 'rid', None)
                if rid is not None:
                    retval[rid] = int(10000 * float(r.meta.score))
                    # Index query returns only rids, so we need
                    # to save highlights for later use
                    highlight_list = []
                    if getattr(r.meta, 'highlight', None) is not None:
                        for key in dir(r.meta.highlight):
                            highlight_list.extend(r.meta.highlight[key])
                    highlights[r.meta.id] = highlight_list
                last_seen = [rid, r.meta.id]
                count = count + 1

        # store highlights
        try:
            annotations = IAnnotations(self.REQUEST)
            annotations[HIGHLIGHT_KEY] = highlights
        except TypeError:
            # maybe we are in a test
            pass

        return retval, (self.id, )
Example #36
0
    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request, _('Impossible de se connecter à Elasticsearch'))
            return []

        if self.search_query:

            # Searches forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # Restrict (sub)category if any
            if self.search_form.cleaned_data['category']:
                self.content_category = self.search_form.cleaned_data['category']
            if self.search_form.cleaned_data['subcategory']:
                self.content_subcategory = self.search_form.cleaned_data['subcategory']

            # Mark that contents must come from library if required
            self.from_library = False
            if self.search_form.cleaned_data['from_library'] == 'on':
                self.from_library = True

            # Setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data['models']

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP['search']['search_groups']:
                        models.append(settings.ZDS_APP['search']['search_groups'][group][1])
            else:
                models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items()]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # Weighting:
            weight_functions = []
            for _type, weights in list(settings.ZDS_APP['search']['boosts'].items()):
                if _type in models:
                    weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']})

            scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # Highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]'])
            search_queryset = search_queryset.highlight('text').highlight('text_html')

            # Executing:
            return self.index_manager.setup_search(search_queryset)

        return []