Python Search Examples, elasticsearch_dsl.Search Python Examples

Example #1

0

Show file

File: test_search.py Project: Aigeruth/elasticsearch-dsl-py

def test_scan_iterates_through_all_docs(data_client):
    s = Search(index='git').filter('term', _type='commits')

    commits = list(s.scan())

    assert 52 == len(commits)
    assert set(d['_id'] for d in DATA if d['_type'] == 'commits') == set(c.meta.id for c in commits)

Example #2

0

Show file

File: search.py Project: awesome-security/nightHawkResponse

	def GetAuditData(self, case, child_id, data_type, start=None, length=None, str_query=None, sort=None, order=None):
		q = ['w32registryraw', 'filedownloadhistory', 'urlhistory', 'timeline', 'w32apifiles', 'w32rawfiles', 'w32eventlogs']

		if data_type in q:
			query = search_queries.GetGeneratorQuery(data_type, str_query, case, child_id, start, length, sort, order)
		else:
			s = Search()
			s = s[0:1000]
			t = Q('query_string', default_field="ComputerName.raw", query=child_id) & Q('query_string', default_field="CaseInfo.case_name", query=case)
			query = s.query(t).filter('term', AuditType__Generator=data_type)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []

		try:
			for x in r.json()['hits']['hits']:
				data.append(x)
		except KeyError:
			return data

		return data

Example #3

0

Show file

File: search.py Project: awesome-security/nightHawkResponse

	def BuildRootTree(self):
		s = Search()
		t = Q('query_string', query="*")
		aggs_casenum = A('terms', field="CaseInfo.case_name", size=0)

		s.aggs.bucket('casenum', aggs_casenum)
		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = [{
			"id": "current_inv", "parent": "#", "text": "Current Investigations", "type": "root"
		}, {
			"id": "comp_inv", "parent": "#", "text": "Completed Investigations", "type": "root"
		}]

		for x in r.json()['aggregations']['casenum']['buckets']:
			data.append({
				"id": x['key'], "parent": "current_inv", "text": x['key'], "children": True, "type": "case"
			})

		return data

Example #4

0

Show file

File: elasticsearch_client.py Project: hecmec/fs-crawler-elasticsearch-py

    def get_files_in_path(self, dir_path):
        ''' gets all es file names from es in a given path '''
        dir_hash = FileResource.get_hash(dir_path)
        #s = FileResource.search().query("match", path["hashdir"] = dir_hash)
        #s = FileResource.search().query("multi_match", query=dir_hash, fields=['path.hashdir'])
        # [{"query": {"match_all": {"index": "content_crawler", "body": {"query": {"term": {"path.hashdir": "b5844a9aba1536cc74682d8bfa28553b5dfd8a8a"}}}, "doc_type": "file_resource"}}
        s = Search().query(
            index = self.index, 
            doc_type= self.type, 
            body={"query": 
                { 
                    "term" : {
                        "file_dir_hash" : dir_hash
                    }
                }
            }
        )

        response = s.execute()

        files = []

        for hit in s:
            files.append(hit.file_uri)

        return files

Example #5

0

Show file

File: search.py Project: awesome-security/nightHawkResponse

	def BuildAuditAggs(self, child_id, parent_id):
		s = Search()
		s = s[0]
		t = Q('query_string', default_field="CaseInfo.case_name", query=parent_id) & Q('match', ComputerName=child_id)
		aggs_generator = A('terms', field='AuditType.Generator', size=0)

		s.aggs.bucket('datatypes', aggs_generator)
		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []
		exclude = ['w32processes-memory', 'stateagentinspector', 'w32disks']

		for y in r.json()['aggregations']['datatypes']['buckets']:
			if not y['key'] in exclude:
				data.append({
						"id": y['key'], "parent": child_id, "text": y['key'], "type": "audit", "a_attr": {"href": "#" + y['key'] + '/' + parent_id + "/" + child_id }
					})

		return data

Example #6

0

Show file

File: views.py Project: atiqueahmedziad/zamboni

    def get(self, request, *args, **kwargs):
        q = request.GET.get('q')

        # Make search.
        queries = [
            query.Q('match', slug=self._phrase(q)),  # Slug.
            query.Q('match', type=self._phrase(q)),  # Type.
            query.Q('match', search_names=self._phrase(q)),  # Name.
            query.Q('prefix', carrier=q),  # Shelf carrier.
            query.Q('term', region=q)  # Shelf region.
        ]
        sq = query.Bool(should=queries)

        # Search.
        res = {'apps': [], 'brands': [], 'collections': [], 'shelves': []}
        es = Search(using=FeedItemIndexer.get_es(),
                    index=self.get_feed_element_index())
        feed_elements = es.query(sq).execute().hits
        if not feed_elements:
            return response.Response(res, status=status.HTTP_404_NOT_FOUND)

        # Deserialize.
        ctx = {'app_map': self.get_apps(request,
                                        self.get_app_ids_all(feed_elements)),
               'request': request}
        for feed_element in feed_elements:
            item_type = feed_element.item_type
            serializer = self.SERIALIZERS[item_type]
            data = serializer(feed_element, context=ctx).data
            res[self.PLURAL_TYPES[item_type]].append(data)

        # Return.
        return response.Response(res, status=status.HTTP_200_OK)

Example #7

0

Show file

File: search.py Project: awesome-security/nightHawkResponse

	def GetAuditDataMain(self, data):
		s = Search()
		s = s[0:1000]
		s = s.highlight('*')
		s = s.highlight_options(require_field_match=False)
		t = Q('query_string', query=data) & ~Q('query_string', default_field="AuditType.Generator", query="stateagentinspector") & ~Q('query_string', default_field="AuditType.Generator", query="w32processes-tree")

		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []

		try:
			for x in r.json()['hits']['hits']:
				for y, v in x['highlight'].iteritems():
					data.append({
							"doc_id": x['_id'],
							"endpoint": x['_parent'],
							"audittype": x['_source']['AuditType']['Generator'],
							"field": y,
							"response": v
						})
		except KeyError:
			pass

		return data

Example #8

0

Show file

File: test_serializers.py Project: justinpotts/addons-server

    def search(self):
        self.reindex(Addon)

        qs = Search(using=amo.search.get_es(),
                    index=AddonIndexer.get_index_alias(),
                    doc_type=AddonIndexer.get_doctype_name())
        return qs.filter('term', id=self.addon.pk).execute()[0]

Example #9

0

Show file

File: JobSuccessReport.py Project: shreyb/JobSuccessRateReport

    def query(self, client):
        """Method that actually queries elasticsearch"""
        # Set up our search parameters
        voq = self.config.get("query", "{}_voname".format(self.vo.lower()))
        productioncheck = '*Role=Production*'

        start_date = self.datesplit_pattern.split(self.start_time)
        starttimeq = datetime(*[int(elt) for elt in start_date]).isoformat()

        end_date = self.datesplit_pattern.split(self.end_time)
        endtimeq = datetime(*[int(elt) for elt in end_date]).isoformat()

        # Generate the index pattern based on the start and end dates
        indexpattern = indexpattern_generate(start_date, end_date)

        if self.verbose:
            print >> sys.stdout, indexpattern
            sleep(3)

        # Elasticsearch query
        resultset = Search(using=client, index=indexpattern) \
            .query("wildcard", VOName=productioncheck) \
            .filter(Q({"term": {"VOName": voq}})) \
            .filter("range", EndTime={"gte": starttimeq, "lt": endtimeq}) \
            .filter(Q({"term": {"ResourceType": "Payload"}}))

        if self.verbose:
            print resultset.to_dict()

        return resultset

Example #10

0

Show file

File: test_filters.py Project: gffbss/addons-server

 def _filter(self, req=None, data=None):
     req = req or RequestFactory().get('/', data=data or {})
     queryset = Search()
     for filter_class in self.filter_classes:
         queryset = filter_class().filter_queryset(req, queryset,
                                                   self.view_class)
     return queryset.to_dict()

Example #11

0

Show file

File: tweet_helper.py Project: roy-2404/sentiment_analysis

  def searchTweets(keyword, latlondist):
    #Variables that contains the user credentials to access Twitter API 
    if TwitterHelper.AWS_ACCESS_KEY == None:
      raise KeyError("Please set the AWS_ACCESS_KEY env. variable")
    
    if TwitterHelper.AWS_SECRET_KEY == None:
      raise KeyError("Please set the AWS_SECRET_KEY env. variable")

    s = Search()
    if latlondist != None:
      locJson = json.loads(latlondist)
      s = s.query({"filtered" : {"query" : {"match_all" : {}}, "filter" : {"geo_distance" : {"distance" : locJson['dist'], "location" : {"lat" : locJson['lat'], "lon" : locJson['lon']}}}}})

    if keyword != None:
      q = Q("match_phrase", text = keyword)
      s = s.query(q)
    
    scanResp = None
    scanResp = helpers.scan(client = TwitterHelper.ES, query = s.to_dict(), scroll = "1m", index = "tweets", timeout = "1m")

    arr = []
    for resp in scanResp:
      hit = resp['_source']
      d = {}
      d['name'] = hit['name']
      d['text'] = hit['text']
      d['sentiment'] = hit['sentiment']
      d['lat'] = hit['location']['lat']
      d['lon'] = hit['location']['lon']
      arr.append(d)
    allD = {}
    allD['tweets'] = arr
    mapInput = json.dumps(allD)
    return mapInput

Example #12

0

Show file

File: test_search.py Project: AndreCimander/elasticsearch-dsl-py

def test_scan_iterates_through_all_docs(data_client):
    s = Search(index='flat-git')

    commits = list(s.scan())

    assert 52 == len(commits)
    assert set(d['_id'] for d in FLAT_DATA) == set(c.meta.id for c in commits)

Example #13

0

Show file

File: es_import_failure_lines.py Project: AnthonyMeaux/treeherder

    def handle(self, *args, **options):
        min_id = FailureLine.objects.order_by('id').values_list("id", flat=True)[0] - 1
        chunk_size = options['chunk_size']

        if options["recreate"]:
            connection.indices.delete(TestFailureLine._doc_type.index, ignore=404)
            TestFailureLine.init()
        else:
            if connection.indices.exists(TestFailureLine._doc_type.index):
                self.stderr.write("Index already exists; can't perform import")
                return

        while True:
            rows = (FailureLine.objects
                    .filter(id__gt=min_id)
                    .order_by('id')
                    .values("id", "job_guid", "action", "test", "subtest",
                            "status", "expected", "message", "best_classification_id",
                            "best_is_verified"))[:chunk_size]
            if not rows:
                break
            es_lines = []
            for item in rows:
                es_line = failure_line_from_value(item)
                if es_line:
                    es_lines.append(es_line)
            self.stdout.write("Inserting %i rows" % len(es_lines))
            bulk_insert(es_lines)
            min_id = rows[len(rows) - 1]["id"]
            time.sleep(options['sleep'])
        s = Search(doc_type=TestFailureLine).params(search_type="count")
        self.stdout.write("Index contains %i documents" % s.execute().hits.total)

Example #14

0

Show file

 def authors(self, num_columns=0):
     """
     @param num_columns: int If non-zero, break up list into columns
     """
     s = Search(
         using=docstore._get_connection(settings.DOCSTORE_HOSTS),
         index=settings.DOCSTORE_INDEX,
         doc_type='authors'
     ).fields([
         'url_title', 'title', 'title_sort', 'lastmod'
     ])[0:docstore.MAX_SIZE]
     response = s.execute()
     authors = []
     for hit in response:
         url_title = hit.url_title[0]
         title = hit.title[0]
         title_sort = hit.title_sort[0]
         lastmod = hit.lastmod[0]
         if title and title_sort:
             author = Author()
             author.url_title = url_title
             author.title = title
             author.title_sort = title_sort
             author.lastmod = datetime.strptime(lastmod, mediawiki.TS_FORMAT)
             authors.append(author)
     authors = sorted(authors, key=lambda a: a.title_sort)
     if num_columns:
         return _columnizer(authors, num_columns)
     return authors

Example #15

0

Show file

File: test_search.py Project: AndreCimander/elasticsearch-dsl-py

def test_inner_hits_are_wrapped_in_response(data_client):
    s = Search(index='git')[0:1].query('has_parent', parent_type='repo', inner_hits={}, query=Q('match_all'))
    response = s.execute()

    commit = response.hits[0]
    assert isinstance(commit.meta.inner_hits.repo, response.__class__)
    assert repr(commit.meta.inner_hits.repo[0]).startswith("<Hit(git/doc/elasticsearch-dsl-py): ")

Example #16

0

Show file

File: sfes.py Project: IAlwaysBeCoding/salesforce

    def from_es_id(cls,es,es_id,access_token,instance,version=None):

        index_exists = es.indices.exists(index=cls.ES_INDEX)
        type_exists =  es.indices.exists_type(index=cls.ES_INDEX,
                                              doc_type=cls.ES_TYPE)

        if not all([index_exists,type_exists]):
            raise Exception('Elastic index or type does not exist. ' \
                            'Cannot find {c} in Elastisearch '\
                            ' to create an instance'.format(c=cls.__name__))

        find_instance = Search(using=es,index=cls.ES_INDEX) \
                        .query(Q("match",_id=es_id))

        r = find_instance.execute()
        if not r:
            raise Exception('Cannot find elasticsearch {t}' \
                            ' instance from elasticsearch ' \
                            'id:{id}'.format(t=cls.__name__,
                                            id=es_id))


        sf_id = r[0]._d_.pop('Id',None)
        if sf_id is None:
            raise Exception('Missing a valid SF Id in ' \
                            ' Elasticsearch document id:{i}'.format(i=sf_id))

        sf_data = r[0]._d_

        return cls(es=es,
                   sf_id=sf_id,
                   sf_data=sf_data,
                   access_token=access_token,
                   instance=instance)

Example #17

0

Show file

 def categories(self):
     s = Search(
         using=docstore._get_connection(settings.DOCSTORE_HOSTS),
         index=settings.DOCSTORE_INDEX,
         doc_type='articles'
     ).fields([
         'title', 'title_sort', 'categories',
     ])[0:docstore.MAX_SIZE]
     if not settings.MEDIAWIKI_SHOW_UNPUBLISHED:
         s = s.query('match', published=True)
     response = s.execute()
     pages = []
     for hit in response:
         page = Page()
         page.url_title = hit.title[0]
         page.title = hit.title[0]
         page.title_sort = hit.title_sort[0]
         page.categories = hit.get('categories', [])
         pages.append(page)
     articles = sorted(pages, key=lambda page: page.title_sort)
     categories = {}
     for page in articles:
         for category in page.categories:
             # exclude internal editorial categories
             if category not in settings.MEDIAWIKI_HIDDEN_CATEGORIES:
                 if category not in categories.keys():
                     categories[category] = []
                 # pages already sorted so category lists will be sorted
                 if page not in categories[category]:
                     categories[category].append(page)
     return categories

Example #18

0

Show file

File: indexer.py Project: nolanlum/moffle

def index_single(es, network, channel, date, lines):
    # Delete existing
    delete_existing = Search(
        using=es,
        index='moffle',
    ).query(
        "term", network=network,
    ).query(
        "term", channel=channel,
    ).query(
        "term", date=date,
    )

    es.delete_by_query(
        index='moffle',
        body=delete_existing.to_dict(),
    )

    actions = [x for x in (line_to_index_action(network, channel, date, i, line) for i, line in lines) if x]
    while actions:
        retries = 0
        try:
            success_count, _ = bulk(es, actions)
            log("{}/{}/{}: indexed {} lines".format(network, channel, date, success_count))
            return success_count
        except Exception as e:
            retries += 1
            log("{}/{}/{}: Attempt {}/3: {}".format(network, channel, date, retries, e))
            if retries > 3:
                raise

Example #19

0

Show file

File: sfes.py Project: IAlwaysBeCoding/salesforce

    def exists(self):

        find_instance = Search(using=self.es,index=self.index) \
                        .query(Q("match",Id=self.sf_id))

        response = find_instance.execute()
        return response

Example #20

0

Show file

File: stack.py Project: 0x37N0w4N/nightHawkResponse

	def BuildRootTree(self):
		s = Search()
		t = Q('has_parent', type='hostname', query=Q('query_string', query="*"))
		aggs = A('terms', field='AuditType.Generator', size=16)

		s.aggs.bucket('datatypes', aggs)
		query = s.query(t)

		try:
			r = requests.post(self.es_host + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()))
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = [{
			"id": "stackable", "parent": "#", "text": "Stackable Data"
		}]

		i = ['w32services', 'w32tasks', 'w32scripting-persistence', 'w32prefetch', 'w32network-dns', 'urlhistory']

		for x in r.json()['aggregations']['datatypes']['buckets']:
			if x['key'] not in i:
				pass
			else:
				data.append({
					"id" : x['key'], "parent": "stackable", "text": x['key'], "children": True
				})

		return data

Example #21

0

Show file

File: es_task_handler.py Project: AdamUnger/incubator-airflow

    def es_read(self, log_id, offset):
        """
        Returns the logs matching log_id in Elasticsearch and next offset.
        Returns '' if no log is found or there was an error.
        :param log_id: the log_id of the log to read.
        :type log_id: str
        :param offset: the offset start to read log from.
        :type offset: str
        """

        # Offset is the unique key for sorting logs given log_id.
        s = Search(using=self.client) \
            .query('match', log_id=log_id) \
            .sort('offset')

        s = s.filter('range', offset={'gt': offset})

        logs = []
        if s.count() != 0:
            try:

                logs = s[self.MAX_LINE_PER_PAGE * self.PAGE:self.MAX_LINE_PER_PAGE] \
                    .execute()
            except Exception as e:
                msg = 'Could not read log with log_id: {}, ' \
                      'error: {}'.format(log_id, str(e))
                self.log.exception(msg)

        return logs

Example #22

0

Show file

File: hoe.py Project: SonjaRosenberger/hoe

def search():
    q = request.args.get('q')
    #resp = es.search(index='hoe', doc_type='record', q=q, body=aggs)
    #logging.info(q)

    s = Search(using=es, index='hoe', doc_type='record')
    s.aggs.bucket('library_place', 'terms', field='library-place')
    s.aggs.bucket('type', 'terms', field='type')
    s.aggs.bucket('genre', 'terms', field='genre')
    s.aggs.bucket('keywords', 'terms', field='keywords.label')
    s.aggs.bucket('author', 'terms', field='author.literal')
    s.query = Q('multi_match', query=q, fields=['_all'])
    filters = []
    if 'filter' in request.args:
        filters = request.args.getlist('filter')
        logging.info(filters)
        for filter in filters:
            cat, val = filter.split(':')
            cat = cat.replace('_', '-')
            filter_dict = {}
            filter_dict.setdefault(cat, val)
            logging.info(cat)
            s.filter = F('term', **filter_dict)
    #if request.args
    resp = s.execute()
    #logging.info(resp)
    #logging.info(resp.aggregations.per_category.buckets)
    return render_template('resultlist.html', records=resp.to_dict().get('hits'), facets=resp.aggregations.to_dict(), header=q, query=q, filters=filters)

Example #23

0

Show file

File: elastic.py Project: densho/encyc-front

 def pages():
     """Returns list of published light Page objects.
     
     @returns: list
     """
     KEY = 'encyc-front:pages'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='articles').filter('term', published_encyc=True)[0:MAX_SIZE]
         s = s.sort('title_sort')
         s = s.fields([
             'url_title',
             'title',
             'title_sort',
             'published',
             'modified',
             'categories',
         ])
         response = s.execute()
         data = [
             Page(
                 url_title  = hitvalue(hit, 'url_title'),
                 title      = hitvalue(hit, 'title'),
                 title_sort = hitvalue(hit, 'title_sort'),
                 published  = hitvalue(hit, 'published'),
                 modified   = hitvalue(hit, 'modified'),
                 categories = hit.get('categories',[]),
                )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     return data

Example #24

0

Show file

File: elasticsearch.py Project: Pinafore/qb

        def get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:10].query(
                'multi_match', query=text, fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''],
                              'qb': [''],
                              'guess': ''}
            else:
                guess = results[0] # take the best answer
                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {'wiki': wiki_content,
                              'qb': qb_content,
                              'guess': guess.page}
            return jsonify(highlights)

Example #25

0

Show file

File: elastic.py Project: densho/encyc-front

 def authors(num_columns=None):
     """Returns list of published light Author objects.
     
     @returns: list
     """
     KEY = 'encyc-front:authors'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='authors')[0:MAX_SIZE]
         s = s.sort('title_sort')
         s = s.fields([
             'url_title',
             'title',
             'title_sort',
             'published',
             'modified',
         ])
         response = s.execute()
         data = [
             Author(
                 url_title  = hitvalue(hit, 'url_title'),
                 title      = hitvalue(hit, 'title'),
                 title_sort = hitvalue(hit, 'title_sort'),
                 published  = hitvalue(hit, 'published'),
                 modified   = hitvalue(hit, 'modified'),
             )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     if num_columns:
         return _columnizer(data, num_columns)
     return data

Example #26

0

Show file

File: controllers.py Project: jfunez/proto_opac

def get_journals_by_collection_institution(collection_acronym, page_from=0, page_size=1000):

    search = Search(index=INDEX).query(
             "nested", path="collections", query=Q("match", collections__acronym=COLLECTION))

    search = search.filter("exists", field="sponsors")

    search = search[page_from:page_size]
    search_response = search.execute()

    meta = {
        'total': search_response.hits.total,
    }

    sponsors = {}
    for journal in search_response:

        j = {'jid': journal.jid,
             'title': journal.title,
             'current_status': journal.current_status,
             'last_issue': journal.last_issue,
             'issue_count': journal.issue_count
             }

        for sponsor in journal['sponsors']:
            sponsors.setdefault(sponsor, []).append(j)

    result = {
        'meta': meta,
        'objects': sponsors
    }

    return result

Example #27

0

Show file

File: elasticsearch.py Project: Pinafore/qb

    def search(self, text: str, max_n_guesses: int,
               normalize_score_by_length=False,
               wiki_boost=1, qb_boost=1):
        if not self.exists():
            raise ValueError('The index does not exist, you must create it before searching')

        if wiki_boost != 1:
            wiki_field = 'wiki_content^{}'.format(wiki_boost)
        else:
            wiki_field = 'wiki_content'

        if qb_boost != 1:
            qb_field = 'qb_content^{}'.format(qb_boost)
        else:
            qb_field = 'qb_content'

        s = Search(index=self.name)[0:max_n_guesses].query(
            'multi_match', query=text, fields=[wiki_field, qb_field]
        )
        results = s.execute()
        guess_set = set()
        guesses = []
        if normalize_score_by_length:
            query_length = len(text.split())
        else:
            query_length = 1

        for r in results:
            if r.page in guess_set:
                continue
            else:
                guesses.append((r.page, r.meta.score / query_length))
        return guesses

Example #28

0

Show file

File: elastic.py Project: densho/encyc-front

 def sources():
     """Returns list of published light Source objects.
     
     @returns: list
     """
     KEY = 'encyc-front:sources'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='sources')[0:MAX_SIZE]
         s = s.sort('encyclopedia_id')
         s = s.fields([
             'encyclopedia_id',
             'published',
             'modified',
             'headword',
             'media_format',
             'img_path',
         ])
         response = s.execute()
         data = [
             Source(
                 encyclopedia_id = hitvalue(hit, 'encyclopedia_id'),
                 published = hitvalue(hit, 'published'),
                 modified = hitvalue(hit, 'modified'),
                 headword = hitvalue(hit, 'headword'),
                 media_format = hitvalue(hit, 'media_format'),
                 img_path = hitvalue(hit, 'img_path'),
                )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     return data

Example #29

0

Show file

File: analyze_network.py Project: ethereum/system-testing

def session_times():
    # {"@fields": {}, "@timestamp": "2015-02-23T17:03:41.738412Z", "@source_host": "newair.brainbot.com", "@message": "scenario.p2p_connect.started"}

    start_message = 'scenario.p2p_connect.starting.clients.sequentially'
    stop_message = 'scenario.p2p_connect.stopping.clients'
    s = Search(client)
    s = s.filter('bool',
                 should=[F('term', message=start_message),
                         F('term', message=stop_message)])
    s = s.fields(['message', '@timestamp'])
    s = s[0:100000]
    s = s.sort('-@timestamp')  # desc,  we want the latest events
    response = s.execute()

    events = []  # joungest to oldest, last should be a stop message
    for h in response:
        msg = 'start' if h['message'][0] == start_message else 'stop'
        ts = h['@timestamp'][0]
        events.append((msg, ts))
    assert not events or events[0][0] == 'stop'
    sessions = []
    while len(events) >= 2:
        stop = events.pop()
        start = events.pop()
        sessions.append(dict([start, stop]))
    return list(reversed(sessions))

Example #30

0

Show file

File: queries.py Project: lnielsen/invenio-stats

    def build_query(self, start_date, end_date, **kwargs):
        """Build the elasticsearch query."""
        agg_query = Search(using=self.client,
                           index=self.index,
                           doc_type=self.doc_type)[0:0]
        if start_date is not None or end_date is not None:
            time_range = {}
            if start_date is not None:
                time_range['gte'] = start_date.isoformat()
            if end_date is not None:
                time_range['lte'] = end_date.isoformat()
            agg_query = agg_query.filter(
                'range',
                **{self.time_field: time_range})

        term_agg = agg_query.aggs
        for term in self.aggregated_fields:
            term_agg = term_agg.bucket(term, 'terms', field=term, size=0)
        term_agg.metric('total', 'sum', field='count')

        if self.copy_fields:
            term_agg.metric(
                'top_hit', 'top_hits', size=1, sort={'timestamp': 'desc'}
            )

        for query_param, filtered_field in self.required_filters.items():
            if query_param in kwargs:
                agg_query = agg_query.filter(
                    'term', **{filtered_field: kwargs[query_param]}
                )

        return agg_query

Example #31

0

Show file

File: migusto.py Project: fabwu/hackzurich2020

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
from elasticsearch_dsl.connections import connections

BASE_URL = "https://hackzurich-api.migros.ch/hack/recipe/recipes_de/_search"

connections.create_connection(hosts=[BASE_URL],
                              http_auth=('hackzurich2020', 'uhSyJ08KexKn4ZFS'))

s = Search().query("match", title="Suppe")

response = s.execute()

for hit in response:
    print(hit.meta.score, hit.title)

Example #32

0

Show file

File: test_search.py Project: wahtak/warehouse

    def test_no_querystring(self):
        es = Search()

        query = queries.get_es_query(es, "", "", [])

        assert query == es.query()

Example #33

0

Show file

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

client = Elasticsearch()

s = Search(using=client, index="my-index") \
    .filter("term", category="search") \
    .query("match", title="python")   \
    .exclude("match", description="beta")

s.aggs.bucket('per_tag', 'terms', field='tags') \
    .metric('max_lines', 'max', field='lines')

response = s.execute()

for hit in response:
    print(hit.meta.score, hit.title)

for tag in response.aggregations.per_tag.buckets:
    print(tag.key, tag.max_lines.value)

Example #34

0

Show file

                        required=True,
                        help='Index to search')
    parser.add_argument('--query',
                        default=None,
                        nargs=argparse.REMAINDER,
                        help='Lucene query')

    args = parser.parse_args()

    index = args.index
    if args.query:
        query = ' '.join(args.query)

    try:
        client = Elasticsearch()
        s = Search(using=client, index=index)

        q = Q('query_string', query=query)
        s = s.query(q)
        response = s[0:10].execute()

        for r in response:
            print('DATE= %s URL=%s' % (r['date'], r['url']))
            print('AUTOR= %s' % r['author'])
            print('TITLE= %s' % r['title'])
            print('KEYWORDS= %s' % r['keywords'])
            print('----------------------------------------')

        print('%d Documents' % response.hits.total)
    except NotFoundError:
        print('Index %s does not exists' % index)

Example #35

0

Show file

def core_search(query):
    es = Elasticsearch(['http://elasticsearch613:9200/'])
    q = Q("multi_match", query=query, fields=['title', 'text', 'cat', 'tags' ])

    ss = Search(using=es, index='blog').query(q)
    return ss

Example #36

0

Show file

 def search(cls):
     return Search(using=get_es(),
                   index=get_index_name(),
                   doc_type=cls.get_doctype())

Example #37

0

Show file

def search(author):
    s = Search().filter('term', author=author)
    response = s.execute()
    return response

Example #38

0

Show file

 def test_get_es_search(self, es_data_client):
     view = self.create_view(es_data_client)
     expected = Search(using=es_data_client,
                       index='test',
                       doc_type=DataDocType)
     assert view.get_es_search().to_dict() == expected.to_dict()

Example #39

0

Show file

import pandas as pd
import time
start = time.clock()

raw_index = "mordred_raw"  #raw index name
enriched_index = "mordred_enriched"  #enriched index name
repo_url = "https://github.com/chaoss/grimoirelab-mordred.git"  #github repository url

es = Elasticsearch('http://localhost:9200', verify_certs=False)

call("p2o.py --enrich --index " + raw_index + " --index-enrich " +
     enriched_index + " -e http://localhost:9200 --no_inc --debug git  " +
     repo_url,
     shell=True)

s = Search(using=es, index=enriched_index)
s.aggs.bucket('by_authors', 'terms', field='author_name', size=10000).metric(
    'first_commit', 'min', field='author_date'
)  #aggregate on the basis of author name and find oldest commit date for each of them
s = s.sort("author_date")

result = s.execute()

buckets_result = result['aggregations']['by_authors']['buckets']
buckets = []
for bucket in buckets_result:
    first_commit = bucket['first_commit']['value'] / 1000
    buckets.append({
        'first_commit': datetime.utcfromtimestamp(first_commit),
        'author': bucket['key'],
        'commit_count': bucket['doc_count']

Example #40

0

Show file

File: forms.py Project: KindYAK/NLPMonitor

    def __init__(self,
                 data=None,
                 files=None,
                 auto_id='id_%s',
                 prefix=None,
                 initial=None,
                 error_class=ErrorList,
                 label_suffix=None,
                 empty_permitted=False,
                 field_order=None,
                 use_required_attribute=None,
                 renderer=None,
                 user=None,
                 has_combo=False):
        super().__init__(data, files, auto_id, prefix, initial, error_class,
                         label_suffix, empty_permitted, field_order,
                         use_required_attribute, renderer)

        # Get topic_modellings
        s = Search(using=ES_CLIENT, index=ES_INDEX_TOPIC_MODELLING).filter('term', is_ready=True) \
                .source(['name', 'algorithm', 'number_of_topics', 'number_of_documents',
                         'source', 'datetime_from', 'datetime_to'
                         # 'perplexity', 'purity', 'contrast', 'coherence',
                         # 'tau_smooth_sparse_theta', 'tau_smooth_sparse_phi',
                         # 'tau_decorrelator_phi', 'tau_coherence_phi',
                         ])[:500]
        group = None
        if not user.is_superuser:
            group = get_user_group(user)
        topic_modellings = s.execute()
        topic_modellings = sorted(topic_modellings,
                                  key=lambda x: x.number_of_documents,
                                  reverse=True)
        topic_modellings = ((
            tm.name.lower(),
            f"{tm.name.replace('bigartm', 'tm')} - {tm.number_of_topics} топиков - {tm.number_of_documents} текстов - "
            + (f"{tm.source} - "
               if hasattr(tm, 'source') and tm.source else f"Все СМИ ") +
            (f"С {tm.datetime_from[:10]} - "
             if hasattr(tm, 'datetime_from') and tm.datetime_from else f"") +
            (f"По {tm.datetime_to[:10]} - "
             if hasattr(tm, 'datetime_to') and tm.datetime_to else f""))
                            for tm in topic_modellings
                            if user.is_superuser or (group and tm.name.lower(
                            ) in group.topic_modelling_names.split(",")))
        if has_combo:
            combo_indices = ES_CLIENT.indices.get_alias(
                f"{ES_INDEX_TOPIC_COMBOS}_*").keys()
            tms_with_combo = [
                ind.replace(f"{ES_INDEX_TOPIC_COMBOS}_", "").lower()
                for ind in combo_indices
            ]
            topic_modellings = filter(lambda x: x[0] in tms_with_combo,
                                      topic_modellings)
            self.fields['topic_weight_threshold'].required = False
        self.fields['topic_modelling'].choices = topic_modellings

        # Get topic_weight_thresholds
        self.fields[
            'topic_weight_threshold'].choices = get_topic_weight_threshold_options(
                user.is_superuser or hasattr(user, "expert"))

Example #41

0

Show file

File: ElasticObj.py Project: shadongdong2019/Interface_GUI_0622

 def get_data_dsl(self):
     # using参数是指定Elasticsearch实例对象，index指定索引，可以缩小范围，index接受一个列表作为多个索引，且也可以用正则表示符合某种规则的索引都可以被索引，如index=["bank", "banner", "country"]又如index=["b*"]后者可以同时索引所有以b开头的索引，search中同样可以指定具体doc-type
     s = Search(using=self.es, index=self.index_name)
     res = s.query("match", serialNo="368400630043389952").query(
         "match", is_result="1").highlight("is_result").execute()
     print(type(res))

Example #42

0

Show file

File: brand.py Project: longhoang08/EcommerceSystem

 def build_brand_es(self, args, category_search_condition):
     keyword_es = Search() \
         .query(category_search_condition)
     keyword_es = keyword_es.sort(*self.sort_condition(args))
     keyword_es = self.add_page_limit_to_brand_es(args, keyword_es)
     return keyword_es

Example #43

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_upercase_and_lowercase_search_give_same_results(self):
        """Pretty self-explanatory function name, isn't it ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Index lowercase stuffs
        text_lc = 'test'

        topic_1_lc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_lc)

        tag_lc = TagFactory(title=text_lc)
        topic_1_lc.tags.add(tag_lc)
        topic_1_lc.subtitle = text_lc
        topic_1_lc.save()

        post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1)
        post_1_lc.text = post_1_lc.text_html = text_lc
        post_1_lc.save()

        tuto_lc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_lc = tuto_lc.load_version()

        tuto_lc.title = text_lc
        tuto_lc.authors.add(self.user)
        subcategory_lc = SubCategoryFactory(title=text_lc)
        tuto_lc.subcategory.add(subcategory_lc)
        tuto_lc.tags.add(tag_lc)
        tuto_lc.save()

        tuto_draft_lc.description = text_lc
        tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc,
                                                text_lc)

        chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc)
        extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc)
        extract_lc.repo_update(text_lc, text_lc)

        published_lc = publish_content(tuto_lc,
                                       tuto_draft_lc,
                                       is_major_update=True)

        tuto_lc.sha_public = tuto_draft_lc.current_version
        tuto_lc.sha_draft = tuto_draft_lc.current_version
        tuto_lc.public_version = published_lc
        tuto_lc.save()

        # 2. Index uppercase stuffs
        text_uc = 'TEST'

        topic_1_uc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_uc)

        topic_1_uc.tags.add(
            tag_lc)  # Note: a constraint forces tags title to be unique
        topic_1_uc.subtitle = text_uc
        topic_1_uc.save()

        post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1)
        post_1_uc.text = post_1_uc.text_html = text_uc
        post_1_uc.save()

        tuto_uc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_uc = tuto_uc.load_version()

        tuto_uc.title = text_uc
        tuto_uc.authors.add(self.user)
        tuto_uc.subcategory.add(subcategory_lc)
        tuto_uc.tags.add(tag_lc)
        tuto_uc.save()

        tuto_draft_uc.description = text_uc
        tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc,
                                                text_uc)

        chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc)
        extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc)
        extract_uc.repo_update(text_uc, text_uc)

        published_uc = publish_content(tuto_uc,
                                       tuto_draft_uc,
                                       is_major_update=True)

        tuto_uc.sha_public = tuto_draft_uc.current_version
        tuto_uc.sha_draft = tuto_draft_uc.current_version
        tuto_uc.public_version = published_uc
        tuto_uc.save()

        # 3. Index and search:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text_lc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_lc = result.context['object_list'].execute()
        self.assertEqual(response_lc.hits.total, 8)

        result = self.client.get(reverse('search:query') + '?q=' + text_uc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_uc = result.context['object_list'].execute()
        self.assertEqual(response_uc.hits.total, 8)

        for responses in zip(
                response_lc,
                response_uc):  # we should get results in the same order!
            self.assertEqual(responses[0].meta.id, responses[1].meta.id)

Example #44

0

Show file

 def get_queryset(self):
     return Search(using=amo.search.get_es(),
                   index=AddonIndexer.get_index_alias(),
                   doc_type=AddonIndexer.get_doctype_name())

Example #45

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_change_topic_impacts_posts(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden group and add staff in it.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        # 2. Create a normal topic and index it
        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok
        self.assertEqual(response[0].meta.doc_type,
                         Post.get_es_document_type())
        self.assertEqual(response[0].forum_pk, self.forum.pk)
        self.assertEqual(response[0].topic_pk, topic_1.pk)
        self.assertEqual(response[0].topic_title, topic_1.title)

        # 3. Change topic title and reindex
        topic_1.title = 'new title'
        topic_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok

        self.assertEqual(response[0].topic_title,
                         topic_1.title)  # title was changed

        # 4. connect with staff and move topic
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        data = {'move': '', 'forum': hidden_forum.pk, 'topic': topic_1.pk}
        response = self.client.post(reverse('topic-edit'), data, follow=False)

        self.assertEqual(302, response.status_code)

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(
            response.hits.total,
            1)  # Note: without staff, would not get any results (see below)

        self.assertEqual(response[0].forum_pk,
                         hidden_forum.pk)  # post was updated with new forum

        # 5. Topic is now hidden
        self.client.logout()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # ok

Example #46

0

Show file

File: script.py Project: fxdgear/es-py-examples

"""
Configuration is global so no client needs to be passed around.
"""
from elasticsearch_dsl import connections

"""
Default connection used where no other connection specified. Any configuration
methods just pass all parameters to the underlying elasticsearch-py client.
"""
connections.create_connection(hosts=["localhost"])

"""
Optionally specify an alias for the connection in case of multiple connections.
"""
connections.create_connection("prod", hosts=["localhost"])
s = Search(using="prod")
s.count()

"""
You can always just pass in your own client instance
"""
s = Search(using=Elasticsearch())
s.count()

"""
Any method on Search returns a clone so you need to always assign it back to
the same variable.
"""
s = Search()
s = s.params(q="fix")

Example #47

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_boosts(self):
        """Check if boosts are doing their job"""

        if not self.manager.connected_to_es:
            return

        # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto
        text = 'test'

        topic_1_solved_sticky = TopicFactory(forum=self.forum,
                                             author=self.user)
        topic_1_solved_sticky.title = text
        topic_1_solved_sticky.subtitle = ''
        topic_1_solved_sticky.solved_by = self.user
        topic_1_solved_sticky.is_sticky = True
        topic_1_solved_sticky.save()

        post_1 = PostFactory(topic=topic_1_solved_sticky,
                             author=self.user,
                             position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        post_2_useful = PostFactory(topic=topic_1_solved_sticky,
                                    author=self.user,
                                    position=2)
        post_2_useful.text = post_2_useful.text_html = text
        post_2_useful.is_useful = True
        post_2_useful.like = 5
        post_2_useful.dislike = 2  # l/d ratio above 1
        post_2_useful.save()

        topic_2_locked = TopicFactory(forum=self.forum,
                                      author=self.user,
                                      title=text)
        topic_2_locked.title = text
        topic_2_locked.subtitle = ''
        topic_2_locked.is_locked = True
        topic_2_locked.save()

        post_3_ld_below_1 = PostFactory(topic=topic_2_locked,
                                        author=self.user,
                                        position=1)
        post_3_ld_below_1.text = post_3_ld_below_1.text_html = text
        post_3_ld_below_1.like = 2
        post_3_ld_below_1.dislike = 5  # l/d ratio below 1
        post_3_ld_below_1.save()

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, 'Who cares ?', 'Same here')
        ExtractFactory(container=chapter1, db_object=tuto)

        published_tuto = publish_content(tuto,
                                         tuto_draft,
                                         is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published_tuto
        tuto.save()

        article = PublishedContentFactory(type='ARTICLE', title=text)
        published_article = PublishedContent.objects.get(content_pk=article.pk)

        opinion_not_picked = PublishedContentFactory(type='OPINION',
                                                     title=text)
        published_opinion_not_picked = PublishedContent.objects.get(
            content_pk=opinion_not_picked.pk)

        opinion_picked = PublishedContentFactory(type='OPINION', title=text)
        opinion_picked.sha_picked = opinion_picked.sha_draft
        opinion_picked.date_picked = datetime.datetime.now()
        opinion_picked.save()

        published_opinion_picked = PublishedContent.objects.get(
            content_pk=opinion_picked.pk)

        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 10)

        # 2. Reset all boosts to 1
        for doc_type in settings.ZDS_APP['search']['boosts']:
            for key in settings.ZDS_APP['search']['boosts'][doc_type]:
                settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0

        # 3. Test posts
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score ==
                        response[2].meta.score)

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score == response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(response[2].meta.id, str(
            post_2_useful.pk))  # post 2 is the only one not first

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(post_2_useful.pk))  # post 2 is useful

        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_2_useful.pk))  # post 2 have a l/d ratio of 5/2

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0
        settings.ZDS_APP['search']['boosts']['post'][
            'ld_ratio_below_1'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_3_ld_below_1.pk))  # post 3 have a l/d ratio of 2/5

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0

        # 4. Test topics
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score)

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is sticky

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is solved

        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_2_locked.pk))  # topic 2 is locked

        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 1.0  # no one would do that in real life

        # 5. Test published contents
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        # score are equals without boost:
        self.assertTrue(
            response[0].meta.score == response[1].meta.score == response[2].
            meta.score == response[3].meta.score == response[4].meta.score)

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_article.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 2.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 4.0
        # Note: in "real life", unpicked opinion would get a boost < 1.

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(
            response[0].meta.id,
            str(published_opinion_not_picked.pk))  # unpicked opinion got first
        self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk))

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 1.0

        # 6. Test global boosts
        # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists

        for model in self.indexable:

            # set a huge number to overcome the small differences:
            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 10.0

            result = self.client.get(reverse('search:query') + '?q=' + text,
                                     follow=False)

            self.assertEqual(result.status_code, 200)
            response = result.context['object_list'].execute()
            self.assertEqual(response.hits.total, 10)

            self.assertEqual(response[0].meta.doc_type,
                             model.get_es_document_type())  # obvious

            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 1.0

Example #48

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_change_publishedcontents_impacts_chapter(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create middle-size content and index it
        text = 'test'

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 2)

        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id,
                         published.content_public_slug + '__' + chapter1.slug)

        # 2. Change tuto: delete chapter and insert new one !
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        tuto_draft = tuto.load_version()

        tuto_draft.children[0].repo_delete()  # chapter 1 is gone !

        another_text = 'another thing'
        self.assertTrue(
            text not in another_text
        )  # to prevent a future modification from breaking this test

        chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter2.repo_update(another_text, another_text, another_text)
        extract2 = ExtractFactory(container=chapter2, db_object=tuto)
        extract2.repo_update(another_text, another_text)

        published = publish_content(tuto, tuto_draft, is_major_update=False)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # 2 objects, not 3 !

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        contents = [r for r in response if r.meta.doc_type != 'chapter']
        self.assertEqual(response.hits.total,
                         len(contents))  # no chapter found anymore

        result = self.client.get(reverse('search:query') + '?q=' +
                                 another_text + '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(response.hits.total, 1)
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug +
                         '__' + chapter2.slug)  # got new chapter

Example #49

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_category_and_subcategory_impact_search(self):
        """If two contents do not belong to the same (sub)category"""

        if not self.manager.connected_to_es:
            return

        text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?'

        # 1. Create two contents with different subcategories
        category_1 = 'category 1'
        subcategory_1 = SubCategoryFactory(title=category_1)
        category_2 = 'category 2'
        subcategory_2 = SubCategoryFactory(title=category_2)

        tuto_1 = PublishableContentFactory(type='TUTORIAL')
        tuto_1_draft = tuto_1.load_version()

        tuto_1.title = text
        tuto_1.authors.add(self.user)
        tuto_1.subcategory.add(subcategory_1)
        tuto_1.save()

        tuto_1_draft.description = text
        tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text)

        chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1)
        extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1)
        extract_1.repo_update(text, text)

        published_1 = publish_content(tuto_1,
                                      tuto_1_draft,
                                      is_major_update=True)

        tuto_1.sha_public = tuto_1_draft.current_version
        tuto_1.sha_draft = tuto_1_draft.current_version
        tuto_1.public_version = published_1
        tuto_1.save()

        tuto_2 = PublishableContentFactory(type='TUTORIAL')
        tuto_2_draft = tuto_2.load_version()

        tuto_2.title = text
        tuto_2.authors.add(self.user)
        tuto_2.subcategory.add(subcategory_2)
        tuto_2.save()

        tuto_2_draft.description = text
        tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text)

        chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2)
        extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2)
        extract_2.repo_update(text, text)

        published_2 = publish_content(tuto_2,
                                      tuto_2_draft,
                                      is_major_update=True)

        tuto_2.sha_public = tuto_2_draft.current_version
        tuto_2.sha_draft = tuto_2_draft.current_version
        tuto_2.public_version = published_2
        tuto_2.save()

        # 2. Index:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 4)  # Ok

        # 3. Test
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_1.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_1.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_1.slug + '__' + chapter_1.slug)

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_2.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_2.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_2.slug + '__' + chapter_2.slug)

Example #50

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_basic_search(self):
        """Basic search and filtering"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        # create a middle-size content and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        # nothing has been indexed yet:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 4)  # get 4 results

        # 2. Test filtering:
        topic_1 = Topic.objects.get(pk=topic_1.pk)
        post_1 = Post.objects.get(pk=post_1.pk)
        published = PublishedContent.objects.get(pk=published.pk)

        ids = {
            'topic': [topic_1.es_id],
            'post': [post_1.es_id],
            'content': [
                published.es_id,
                published.content_public_slug + '__' + chapter1.slug
            ],
        }

        search_groups = [
            k for k, v in settings.ZDS_APP['search']['search_groups'].items()
        ]
        group_to_model = {
            k: v[1]
            for k, v in settings.ZDS_APP['search']['search_groups'].items()
        }

        for doc_type in search_groups:
            result = self.client.get(reverse('search:query') + '?q=' + text +
                                     '&models=' + doc_type,
                                     follow=False)
            self.assertEqual(result.status_code, 200)

            response = result.context['object_list'].execute()

            self.assertEqual(response.hits.total,
                             len(ids[doc_type]))  # get 1 result of each …
            for i, r in enumerate(response):
                self.assertIn(
                    r.meta.doc_type,
                    group_to_model[doc_type])  # … and only of the right type …
                self.assertEqual(r.meta.id,
                                 ids[doc_type][i])  # … with the right id !

Example #51

0

Show file

File: knowledge_base.py Project: sharma-anshul/knowledgebase

    def search(self, query_text, locale=None, fields=None):
        """
        Return relevant articles given search text.

        Finding the query term in the title of an article is given twice
        as much weight as finding the text in the body.

        After the most relevant articles are obtained, they are ranked by the
        ranking module (uses view counts here, but can be easily extended).
        
        Args:
            query_text(str): Text to be searched.
            locale(str): String to filter results by location.
            fields(list(str)): If specified, restrict the fields returned to
                this list.

        Returns:
            list[dict]: Returns a ranked list of dictionaries representing articles
                [
                    {
                        'id': str,
                        'title': str,
                        'body': str,
                        'locale': str,
                    },
                    .
                    .
                ]
                
        """
        # Create Search object to "match" query text against the title and body
        # of articles stored in the Knowledge base.
        s = Search(
            using=self.client,
            index=self.INDEX,
            doc_type=self.TYPE
        ).query(
            'multi_match',
            query=query_text,
            fields=['title^2', 'body']
        )
        
        # If locale is provided, use it to filter the set of documents that are
        # queried for.
        if locale:
            s = s.filter('term', locale=locale)

        # Restrict fields if specified.
        s = s.source(fields)
   
        response = s.execute()
        results, result_dict = [], {}
        for hit in response:
            article_id = hit.meta['id']
            result_dict[article_id] = hit.__dict__['_d_']
            result_dict[article_id]['id'] = article_id

            # Retrieve view count for each relevant article.
            results.append((article_id, self.redis.get(article_id)))

        # Rank results using Ranking function. Currently sorts relevant results by
        # view counts.
        ranked_results = Ranker.rank(results)
        ranked_articles = [result_dict[article_id] for article_id in ranked_results]

        return ranked_articles

Example #52

0

Show file

File: tests_views.py Project: Karnaj/zds-site

    def test_hidden_forums_give_no_results_if_user_not_allowed(self):
        """Long name, isn't ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden staff group.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        topic_1 = TopicFactory(forum=hidden_forum,
                               author=self.staff,
                               title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        # 2. search without connection and get not result
        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 3. Connect with user (not a member of the group), search, and get no result
        self.assertTrue(
            self.client.login(username=self.user.username,
                              password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 4. Connect with staff, search, and get the topic and the post
        self.client.logout()
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)  # ok !

Example #53

0

Show file

File: search_1.py Project: kperi/es_tutorial

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

client = Elasticsearch()

s = Search().using(client).query("match", title="use")
for hit in s:
    print(hit.title)

print(100 * "*")

## do a term query
s = Search().using(client).query("term", title="snowball").execute()
if len(s) == 0:
    print("query %s is empty" % s.to_dict())
for hit in s:
    print(hit.title)
print(100 * "*")

## do a terms query
s = Search().using(client).query("terms", tags=["test"])
for hit in s:
    print(hit.title)

print(100 * "*")

Example #54

0

Show file

File: get_item.py Project: foundry4/connected-data-amanita

def build_query_body(item_uri):
    """Build query dict ready to pass to Elasticsearch search instance for retrieving a single item by URI."""
    search = Search(index='pips').query('term', _id=item_uri)

    return search.to_dict()

Example #55

0

Show file

 def get_count(self):
     s = Search(using=self.es, index=self.index_pattern)
     if self.query:
         s.update_from_dict({"query": self.query})
     return s.count()

Example #56

0

Show file

File: testElasticsearch2.py Project: TristanHermant4pm/analyticsPY

import numpy as np
from matplotlib_venn import venn3
from matplotlib import pyplot as plt

# Define a default Elasticsearch client
#connections.create_connection(hosts=['http://172.20.30.70:9200/'])

#elasticServer = 'http://172.20.30.70:9200/'	#prod
elasticServer = 'http://172.20.31.19:9200/'  #dev

client = Elasticsearch(hosts=[elasticServer])

#q = Q('bool', must=[Q('match', index='propertypriceregister'), Q('match', Type='propertypriceregister')])
q = Q('match', id='_search')
s = Search(using=client,
           index="propertypriceregister",
           doc_type="propertypriceregister").query()

s2 = Search(using=client, index="daft", doc_type="daftproperty").query()
s3 = Search(using=client, index="myhome", doc_type="myhomeproperty").query()
s4 = Search(using=client, index="daftdrop",
            doc_type="daftdropproperty").query()
'''
count = s.count()
for i in range(0, (count / 1000) + 1):

response = s[(i*1000):((i+1)*1000)].execute()
#response = s.execute()

print('Total %d hits found.' % response.hits.total)
'''

Example #57

0

Show file

from connectors.elasticsearch_connector import ElasticsearchConnector
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MultiMatch

es_man = ElasticsearchConnector(host="localhost", port="9220")
es_man.connect()

index = "fdg-article"
search_key = "publisher"
uuid = "7457b5f27a46e69e3f891767d01d2c6f6c5829132a4c03e78f4858828d539fc983a70a3c43ff8a082a8650c0972349eafb50bda7a44062428e2b405249a387f3"

a = "7457b5f27a46e69e3f891767d01d2c6f6c5829132a4c03e78f4858828d539fc983a70a3c43ff8a082a8650c0972349eafb50bda7a44062428e2b405249a387f3"
s = Search(using=es_man.es, index=index).query("match", publisher=uuid)
s = s.query("match", publisher=uuid)
s = s.execute()

multi_match = MultiMatch(query=uuid, fields=['publisher'])

s2 = Search(using=es_man.es, index=index).query(multi_match)
s2 = s2.execute()

art_index = "fdg-textscore"
art_id1 = "8796aff2a14a1ea1539265f76b044f1faf00304d6d9e237aaa21da6c4bab2166f0bed8a2eb99a05d18bc945f811f250da1f4c5a4acbfff1a213bc773894edcd1"
art_id2 = "8796aff2a14a1ea1539265f76b044f1faf00304d6d9e237aaa21da6c4bab2166be47575aa0278fdaaccf0d0aac645381b6db09383e58519fc3589398b63777b3"

sss = Search(using=es_man.es, index=art_index) \
    .filter("terms", _id=[art_id1, art_id2])

response = sss.execute()

# ------------------------------------------

Example #58

0

Show file

 def __len__(self):
     """Returns the total number of entries in the collection."""
     return Search(using=self.client, index=self.name).execute().hits.total

Example #59

0

Show file

 def _Search(self, indexname):
     """
 it returns the object which can be used for reatriving ceratin value from the DB
 """
     return Search(using=self.__client, index=indexname)

Example #60

0

Show file

File: search.py Project: eviljeff/olympia

    def _build_query(self):
        query = Q()

        source = ['id']
        sort = []

        aggregations = {}
        query_string = None
        as_list = as_dict = False

        for action, value in self.steps:
            if action == 'order_by':
                for key in value:
                    if key.startswith('-'):
                        sort.append({key[1:]: 'desc'})
                    else:
                        sort.append(key)
            elif action == 'values':
                source.extend(value)
                as_list, as_dict = True, False
            elif action == 'values_dict':
                if value:
                    source.extend(value)
                as_list, as_dict = False, True
            elif action == 'query':
                query &= self._process_queries(value)
            elif action == 'filter':
                query &= self._process_filters(value)
            elif action == 'source':
                source.extend(value)
            elif action == 'aggregate':
                aggregations.update(value)
            elif action == 'filter_query_string':
                query_string = value
            else:
                raise NotImplementedError(action)

        # If we have a raw query string we are going to apply all sorts
        # of boosts and filters to improve relevance scoring.
        #
        # We are using the same rules that `search.filters:SearchQueryFilter`
        # implements to have a single-source of truth for how our
        # scoring works.
        from olympia.search.filters import SearchQueryFilter

        search = Search().query(query)

        if query_string:
            search = SearchQueryFilter().apply_search_query(
                query_string, search)

        if sort:
            search = search.sort(*sort)

        if source:
            search = search.source(source)

        body = search.to_dict()

        # These are manually added for now to simplify a partial port to
        # elasticsearch-dsl
        if self.start:
            body['from'] = self.start
        if self.stop is not None:
            body['size'] = self.stop - self.start
        if aggregations:
            body['aggs'] = aggregations

        self.source, self.as_list, self.as_dict = source, as_list, as_dict
        return body