def test_scan_iterates_through_all_docs(data_client):
    s = Search(index='git').filter('term', _type='commits')

    commits = list(s.scan())

    assert 52 == len(commits)
    assert set(d['_id'] for d in DATA if d['_type'] == 'commits') == set(c.meta.id for c in commits)
	def GetAuditData(self, case, child_id, data_type, start=None, length=None, str_query=None, sort=None, order=None):
		q = ['w32registryraw', 'filedownloadhistory', 'urlhistory', 'timeline', 'w32apifiles', 'w32rawfiles', 'w32eventlogs']

		if data_type in q:
			query = search_queries.GetGeneratorQuery(data_type, str_query, case, child_id, start, length, sort, order)
		else:
			s = Search()
			s = s[0:1000]
			t = Q('query_string', default_field="ComputerName.raw", query=child_id) & Q('query_string', default_field="CaseInfo.case_name", query=case)
			query = s.query(t).filter('term', AuditType__Generator=data_type)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []

		try:
			for x in r.json()['hits']['hits']:
				data.append(x)
		except KeyError:
			return data

		return data
	def BuildRootTree(self):
		s = Search()
		t = Q('query_string', query="*")
		aggs_casenum = A('terms', field="CaseInfo.case_name", size=0)

		s.aggs.bucket('casenum', aggs_casenum)
		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = [{
			"id": "current_inv", "parent": "#", "text": "Current Investigations", "type": "root"
		}, {
			"id": "comp_inv", "parent": "#", "text": "Completed Investigations", "type": "root"
		}]

		for x in r.json()['aggregations']['casenum']['buckets']:
			data.append({
				"id": x['key'], "parent": "current_inv", "text": x['key'], "children": True, "type": "case"
			})

		return data
    def get_files_in_path(self, dir_path):
        ''' gets all es file names from es in a given path '''
        dir_hash = FileResource.get_hash(dir_path)
        #s = FileResource.search().query("match", path["hashdir"] = dir_hash)
        #s = FileResource.search().query("multi_match", query=dir_hash, fields=['path.hashdir'])
        # [{"query": {"match_all": {"index": "content_crawler", "body": {"query": {"term": {"path.hashdir": "b5844a9aba1536cc74682d8bfa28553b5dfd8a8a"}}}, "doc_type": "file_resource"}}
        s = Search().query(
            index = self.index, 
            doc_type= self.type, 
            body={"query": 
                { 
                    "term" : {
                        "file_dir_hash" : dir_hash
                    }
                }
            }
        )

        response = s.execute()

        files = []

        for hit in s:
            files.append(hit.file_uri)

        return files
	def BuildAuditAggs(self, child_id, parent_id):
		s = Search()
		s = s[0]
		t = Q('query_string', default_field="CaseInfo.case_name", query=parent_id) & Q('match', ComputerName=child_id)
		aggs_generator = A('terms', field='AuditType.Generator', size=0)

		s.aggs.bucket('datatypes', aggs_generator)
		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []
		exclude = ['w32processes-memory', 'stateagentinspector', 'w32disks']

		for y in r.json()['aggregations']['datatypes']['buckets']:
			if not y['key'] in exclude:
				data.append({
						"id": y['key'], "parent": child_id, "text": y['key'], "type": "audit", "a_attr": {"href": "#" + y['key'] + '/' + parent_id + "/" + child_id }
					})

		return data
Example #6
0
    def get(self, request, *args, **kwargs):
        q = request.GET.get('q')

        # Make search.
        queries = [
            query.Q('match', slug=self._phrase(q)),  # Slug.
            query.Q('match', type=self._phrase(q)),  # Type.
            query.Q('match', search_names=self._phrase(q)),  # Name.
            query.Q('prefix', carrier=q),  # Shelf carrier.
            query.Q('term', region=q)  # Shelf region.
        ]
        sq = query.Bool(should=queries)

        # Search.
        res = {'apps': [], 'brands': [], 'collections': [], 'shelves': []}
        es = Search(using=FeedItemIndexer.get_es(),
                    index=self.get_feed_element_index())
        feed_elements = es.query(sq).execute().hits
        if not feed_elements:
            return response.Response(res, status=status.HTTP_404_NOT_FOUND)

        # Deserialize.
        ctx = {'app_map': self.get_apps(request,
                                        self.get_app_ids_all(feed_elements)),
               'request': request}
        for feed_element in feed_elements:
            item_type = feed_element.item_type
            serializer = self.SERIALIZERS[item_type]
            data = serializer(feed_element, context=ctx).data
            res[self.PLURAL_TYPES[item_type]].append(data)

        # Return.
        return response.Response(res, status=status.HTTP_200_OK)
	def GetAuditDataMain(self, data):
		s = Search()
		s = s[0:1000]
		s = s.highlight('*')
		s = s.highlight_options(require_field_match=False)
		t = Q('query_string', query=data) & ~Q('query_string', default_field="AuditType.Generator", query="stateagentinspector") & ~Q('query_string', default_field="AuditType.Generator", query="w32processes-tree")

		query = s.query(t)

		try:
			r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False)
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = []

		try:
			for x in r.json()['hits']['hits']:
				for y, v in x['highlight'].iteritems():
					data.append({
							"doc_id": x['_id'],
							"endpoint": x['_parent'],
							"audittype": x['_source']['AuditType']['Generator'],
							"field": y,
							"response": v
						})
		except KeyError:
			pass

		return data
    def search(self):
        self.reindex(Addon)

        qs = Search(using=amo.search.get_es(),
                    index=AddonIndexer.get_index_alias(),
                    doc_type=AddonIndexer.get_doctype_name())
        return qs.filter('term', id=self.addon.pk).execute()[0]
    def query(self, client):
        """Method that actually queries elasticsearch"""
        # Set up our search parameters
        voq = self.config.get("query", "{}_voname".format(self.vo.lower()))
        productioncheck = '*Role=Production*'

        start_date = self.datesplit_pattern.split(self.start_time)
        starttimeq = datetime(*[int(elt) for elt in start_date]).isoformat()

        end_date = self.datesplit_pattern.split(self.end_time)
        endtimeq = datetime(*[int(elt) for elt in end_date]).isoformat()

        # Generate the index pattern based on the start and end dates
        indexpattern = indexpattern_generate(start_date, end_date)

        if self.verbose:
            print >> sys.stdout, indexpattern
            sleep(3)

        # Elasticsearch query
        resultset = Search(using=client, index=indexpattern) \
            .query("wildcard", VOName=productioncheck) \
            .filter(Q({"term": {"VOName": voq}})) \
            .filter("range", EndTime={"gte": starttimeq, "lt": endtimeq}) \
            .filter(Q({"term": {"ResourceType": "Payload"}}))

        if self.verbose:
            print resultset.to_dict()

        return resultset
Example #10
0
 def _filter(self, req=None, data=None):
     req = req or RequestFactory().get('/', data=data or {})
     queryset = Search()
     for filter_class in self.filter_classes:
         queryset = filter_class().filter_queryset(req, queryset,
                                                   self.view_class)
     return queryset.to_dict()
  def searchTweets(keyword, latlondist):
    #Variables that contains the user credentials to access Twitter API 
    if TwitterHelper.AWS_ACCESS_KEY == None:
      raise KeyError("Please set the AWS_ACCESS_KEY env. variable")
    
    if TwitterHelper.AWS_SECRET_KEY == None:
      raise KeyError("Please set the AWS_SECRET_KEY env. variable")

    s = Search()
    if latlondist != None:
      locJson = json.loads(latlondist)
      s = s.query({"filtered" : {"query" : {"match_all" : {}}, "filter" : {"geo_distance" : {"distance" : locJson['dist'], "location" : {"lat" : locJson['lat'], "lon" : locJson['lon']}}}}})

    if keyword != None:
      q = Q("match_phrase", text = keyword)
      s = s.query(q)
    
    scanResp = None
    scanResp = helpers.scan(client = TwitterHelper.ES, query = s.to_dict(), scroll = "1m", index = "tweets", timeout = "1m")

    arr = []
    for resp in scanResp:
      hit = resp['_source']
      d = {}
      d['name'] = hit['name']
      d['text'] = hit['text']
      d['sentiment'] = hit['sentiment']
      d['lat'] = hit['location']['lat']
      d['lon'] = hit['location']['lon']
      arr.append(d)
    allD = {}
    allD['tweets'] = arr
    mapInput = json.dumps(allD)
    return mapInput
def test_scan_iterates_through_all_docs(data_client):
    s = Search(index='flat-git')

    commits = list(s.scan())

    assert 52 == len(commits)
    assert set(d['_id'] for d in FLAT_DATA) == set(c.meta.id for c in commits)
    def handle(self, *args, **options):
        min_id = FailureLine.objects.order_by('id').values_list("id", flat=True)[0] - 1
        chunk_size = options['chunk_size']

        if options["recreate"]:
            connection.indices.delete(TestFailureLine._doc_type.index, ignore=404)
            TestFailureLine.init()
        else:
            if connection.indices.exists(TestFailureLine._doc_type.index):
                self.stderr.write("Index already exists; can't perform import")
                return

        while True:
            rows = (FailureLine.objects
                    .filter(id__gt=min_id)
                    .order_by('id')
                    .values("id", "job_guid", "action", "test", "subtest",
                            "status", "expected", "message", "best_classification_id",
                            "best_is_verified"))[:chunk_size]
            if not rows:
                break
            es_lines = []
            for item in rows:
                es_line = failure_line_from_value(item)
                if es_line:
                    es_lines.append(es_line)
            self.stdout.write("Inserting %i rows" % len(es_lines))
            bulk_insert(es_lines)
            min_id = rows[len(rows) - 1]["id"]
            time.sleep(options['sleep'])
        s = Search(doc_type=TestFailureLine).params(search_type="count")
        self.stdout.write("Index contains %i documents" % s.execute().hits.total)
Example #14
0
 def authors(self, num_columns=0):
     """
     @param num_columns: int If non-zero, break up list into columns
     """
     s = Search(
         using=docstore._get_connection(settings.DOCSTORE_HOSTS),
         index=settings.DOCSTORE_INDEX,
         doc_type='authors'
     ).fields([
         'url_title', 'title', 'title_sort', 'lastmod'
     ])[0:docstore.MAX_SIZE]
     response = s.execute()
     authors = []
     for hit in response:
         url_title = hit.url_title[0]
         title = hit.title[0]
         title_sort = hit.title_sort[0]
         lastmod = hit.lastmod[0]
         if title and title_sort:
             author = Author()
             author.url_title = url_title
             author.title = title
             author.title_sort = title_sort
             author.lastmod = datetime.strptime(lastmod, mediawiki.TS_FORMAT)
             authors.append(author)
     authors = sorted(authors, key=lambda a: a.title_sort)
     if num_columns:
         return _columnizer(authors, num_columns)
     return authors
def test_inner_hits_are_wrapped_in_response(data_client):
    s = Search(index='git')[0:1].query('has_parent', parent_type='repo', inner_hits={}, query=Q('match_all'))
    response = s.execute()

    commit = response.hits[0]
    assert isinstance(commit.meta.inner_hits.repo, response.__class__)
    assert repr(commit.meta.inner_hits.repo[0]).startswith("<Hit(git/doc/elasticsearch-dsl-py): ")
Example #16
0
    def from_es_id(cls,es,es_id,access_token,instance,version=None):

        index_exists = es.indices.exists(index=cls.ES_INDEX)
        type_exists =  es.indices.exists_type(index=cls.ES_INDEX,
                                              doc_type=cls.ES_TYPE)

        if not all([index_exists,type_exists]):
            raise Exception('Elastic index or type does not exist. ' \
                            'Cannot find {c} in Elastisearch '\
                            ' to create an instance'.format(c=cls.__name__))

        find_instance = Search(using=es,index=cls.ES_INDEX) \
                        .query(Q("match",_id=es_id))

        r = find_instance.execute()
        if not r:
            raise Exception('Cannot find elasticsearch {t}' \
                            ' instance from elasticsearch ' \
                            'id:{id}'.format(t=cls.__name__,
                                            id=es_id))


        sf_id = r[0]._d_.pop('Id',None)
        if sf_id is None:
            raise Exception('Missing a valid SF Id in ' \
                            ' Elasticsearch document id:{i}'.format(i=sf_id))

        sf_data = r[0]._d_

        return cls(es=es,
                   sf_id=sf_id,
                   sf_data=sf_data,
                   access_token=access_token,
                   instance=instance)
Example #17
0
 def categories(self):
     s = Search(
         using=docstore._get_connection(settings.DOCSTORE_HOSTS),
         index=settings.DOCSTORE_INDEX,
         doc_type='articles'
     ).fields([
         'title', 'title_sort', 'categories',
     ])[0:docstore.MAX_SIZE]
     if not settings.MEDIAWIKI_SHOW_UNPUBLISHED:
         s = s.query('match', published=True)
     response = s.execute()
     pages = []
     for hit in response:
         page = Page()
         page.url_title = hit.title[0]
         page.title = hit.title[0]
         page.title_sort = hit.title_sort[0]
         page.categories = hit.get('categories', [])
         pages.append(page)
     articles = sorted(pages, key=lambda page: page.title_sort)
     categories = {}
     for page in articles:
         for category in page.categories:
             # exclude internal editorial categories
             if category not in settings.MEDIAWIKI_HIDDEN_CATEGORIES:
                 if category not in categories.keys():
                     categories[category] = []
                 # pages already sorted so category lists will be sorted
                 if page not in categories[category]:
                     categories[category].append(page)
     return categories
Example #18
0
def index_single(es, network, channel, date, lines):
    # Delete existing
    delete_existing = Search(
        using=es,
        index='moffle',
    ).query(
        "term", network=network,
    ).query(
        "term", channel=channel,
    ).query(
        "term", date=date,
    )

    es.delete_by_query(
        index='moffle',
        body=delete_existing.to_dict(),
    )

    actions = [x for x in (line_to_index_action(network, channel, date, i, line) for i, line in lines) if x]
    while actions:
        retries = 0
        try:
            success_count, _ = bulk(es, actions)
            log("{}/{}/{}: indexed {} lines".format(network, channel, date, success_count))
            return success_count
        except Exception as e:
            retries += 1
            log("{}/{}/{}: Attempt {}/3: {}".format(network, channel, date, retries, e))
            if retries > 3:
                raise
Example #19
0
    def exists(self):

        find_instance = Search(using=self.es,index=self.index) \
                        .query(Q("match",Id=self.sf_id))

        response = find_instance.execute()
        return response
Example #20
0
	def BuildRootTree(self):
		s = Search()
		t = Q('has_parent', type='hostname', query=Q('query_string', query="*"))
		aggs = A('terms', field='AuditType.Generator', size=16)

		s.aggs.bucket('datatypes', aggs)
		query = s.query(t)

		try:
			r = requests.post(self.es_host + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()))
		except ConnectionError as e:
			ret = {"connection_error": e.args[0]}
			return ret

		data = [{
			"id": "stackable", "parent": "#", "text": "Stackable Data"
		}]

		i = ['w32services', 'w32tasks', 'w32scripting-persistence', 'w32prefetch', 'w32network-dns', 'urlhistory']

		for x in r.json()['aggregations']['datatypes']['buckets']:
			if x['key'] not in i:
				pass
			else:
				data.append({
					"id" : x['key'], "parent": "stackable", "text": x['key'], "children": True
				})

		return data
    def es_read(self, log_id, offset):
        """
        Returns the logs matching log_id in Elasticsearch and next offset.
        Returns '' if no log is found or there was an error.
        :param log_id: the log_id of the log to read.
        :type log_id: str
        :param offset: the offset start to read log from.
        :type offset: str
        """

        # Offset is the unique key for sorting logs given log_id.
        s = Search(using=self.client) \
            .query('match', log_id=log_id) \
            .sort('offset')

        s = s.filter('range', offset={'gt': offset})

        logs = []
        if s.count() != 0:
            try:

                logs = s[self.MAX_LINE_PER_PAGE * self.PAGE:self.MAX_LINE_PER_PAGE] \
                    .execute()
            except Exception as e:
                msg = 'Could not read log with log_id: {}, ' \
                      'error: {}'.format(log_id, str(e))
                self.log.exception(msg)

        return logs
Example #22
0
def search():
    q = request.args.get('q')
    #resp = es.search(index='hoe', doc_type='record', q=q, body=aggs)
    #logging.info(q)

    s = Search(using=es, index='hoe', doc_type='record')
    s.aggs.bucket('library_place', 'terms', field='library-place')
    s.aggs.bucket('type', 'terms', field='type')
    s.aggs.bucket('genre', 'terms', field='genre')
    s.aggs.bucket('keywords', 'terms', field='keywords.label')
    s.aggs.bucket('author', 'terms', field='author.literal')
    s.query = Q('multi_match', query=q, fields=['_all'])
    filters = []
    if 'filter' in request.args:
        filters = request.args.getlist('filter')
        logging.info(filters)
        for filter in filters:
            cat, val = filter.split(':')
            cat = cat.replace('_', '-')
            filter_dict = {}
            filter_dict.setdefault(cat, val)
            logging.info(cat)
            s.filter = F('term', **filter_dict)
    #if request.args
    resp = s.execute()
    #logging.info(resp)
    #logging.info(resp.aggregations.per_category.buckets)
    return render_template('resultlist.html', records=resp.to_dict().get('hits'), facets=resp.aggregations.to_dict(), header=q, query=q, filters=filters)
Example #23
0
 def pages():
     """Returns list of published light Page objects.
     
     @returns: list
     """
     KEY = 'encyc-front:pages'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='articles').filter('term', published_encyc=True)[0:MAX_SIZE]
         s = s.sort('title_sort')
         s = s.fields([
             'url_title',
             'title',
             'title_sort',
             'published',
             'modified',
             'categories',
         ])
         response = s.execute()
         data = [
             Page(
                 url_title  = hitvalue(hit, 'url_title'),
                 title      = hitvalue(hit, 'title'),
                 title_sort = hitvalue(hit, 'title_sort'),
                 published  = hitvalue(hit, 'published'),
                 modified   = hitvalue(hit, 'modified'),
                 categories = hit.get('categories',[]),
                )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     return data
Example #24
0
        def get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:10].query(
                'multi_match', query=text, fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''],
                              'qb': [''],
                              'guess': ''}
            else:
                guess = results[0] # take the best answer
                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {'wiki': wiki_content,
                              'qb': qb_content,
                              'guess': guess.page}
            return jsonify(highlights)
Example #25
0
 def authors(num_columns=None):
     """Returns list of published light Author objects.
     
     @returns: list
     """
     KEY = 'encyc-front:authors'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='authors')[0:MAX_SIZE]
         s = s.sort('title_sort')
         s = s.fields([
             'url_title',
             'title',
             'title_sort',
             'published',
             'modified',
         ])
         response = s.execute()
         data = [
             Author(
                 url_title  = hitvalue(hit, 'url_title'),
                 title      = hitvalue(hit, 'title'),
                 title_sort = hitvalue(hit, 'title_sort'),
                 published  = hitvalue(hit, 'published'),
                 modified   = hitvalue(hit, 'modified'),
             )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     if num_columns:
         return _columnizer(data, num_columns)
     return data
Example #26
0
def get_journals_by_collection_institution(collection_acronym, page_from=0, page_size=1000):

    search = Search(index=INDEX).query(
             "nested", path="collections", query=Q("match", collections__acronym=COLLECTION))

    search = search.filter("exists", field="sponsors")

    search = search[page_from:page_size]
    search_response = search.execute()

    meta = {
        'total': search_response.hits.total,
    }

    sponsors = {}
    for journal in search_response:

        j = {'jid': journal.jid,
             'title': journal.title,
             'current_status': journal.current_status,
             'last_issue': journal.last_issue,
             'issue_count': journal.issue_count
             }

        for sponsor in journal['sponsors']:
            sponsors.setdefault(sponsor, []).append(j)

    result = {
        'meta': meta,
        'objects': sponsors
    }

    return result
Example #27
0
    def search(self, text: str, max_n_guesses: int,
               normalize_score_by_length=False,
               wiki_boost=1, qb_boost=1):
        if not self.exists():
            raise ValueError('The index does not exist, you must create it before searching')

        if wiki_boost != 1:
            wiki_field = 'wiki_content^{}'.format(wiki_boost)
        else:
            wiki_field = 'wiki_content'

        if qb_boost != 1:
            qb_field = 'qb_content^{}'.format(qb_boost)
        else:
            qb_field = 'qb_content'

        s = Search(index=self.name)[0:max_n_guesses].query(
            'multi_match', query=text, fields=[wiki_field, qb_field]
        )
        results = s.execute()
        guess_set = set()
        guesses = []
        if normalize_score_by_length:
            query_length = len(text.split())
        else:
            query_length = 1

        for r in results:
            if r.page in guess_set:
                continue
            else:
                guesses.append((r.page, r.meta.score / query_length))
        return guesses
Example #28
0
 def sources():
     """Returns list of published light Source objects.
     
     @returns: list
     """
     KEY = 'encyc-front:sources'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='sources')[0:MAX_SIZE]
         s = s.sort('encyclopedia_id')
         s = s.fields([
             'encyclopedia_id',
             'published',
             'modified',
             'headword',
             'media_format',
             'img_path',
         ])
         response = s.execute()
         data = [
             Source(
                 encyclopedia_id = hitvalue(hit, 'encyclopedia_id'),
                 published = hitvalue(hit, 'published'),
                 modified = hitvalue(hit, 'modified'),
                 headword = hitvalue(hit, 'headword'),
                 media_format = hitvalue(hit, 'media_format'),
                 img_path = hitvalue(hit, 'img_path'),
                )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     return data
def session_times():
    # {"@fields": {}, "@timestamp": "2015-02-23T17:03:41.738412Z", "@source_host": "newair.brainbot.com", "@message": "scenario.p2p_connect.started"}

    start_message = 'scenario.p2p_connect.starting.clients.sequentially'
    stop_message = 'scenario.p2p_connect.stopping.clients'
    s = Search(client)
    s = s.filter('bool',
                 should=[F('term', message=start_message),
                         F('term', message=stop_message)])
    s = s.fields(['message', '@timestamp'])
    s = s[0:100000]
    s = s.sort('-@timestamp')  # desc,  we want the latest events
    response = s.execute()

    events = []  # joungest to oldest, last should be a stop message
    for h in response:
        msg = 'start' if h['message'][0] == start_message else 'stop'
        ts = h['@timestamp'][0]
        events.append((msg, ts))
    assert not events or events[0][0] == 'stop'
    sessions = []
    while len(events) >= 2:
        stop = events.pop()
        start = events.pop()
        sessions.append(dict([start, stop]))
    return list(reversed(sessions))
Example #30
0
    def build_query(self, start_date, end_date, **kwargs):
        """Build the elasticsearch query."""
        agg_query = Search(using=self.client,
                           index=self.index,
                           doc_type=self.doc_type)[0:0]
        if start_date is not None or end_date is not None:
            time_range = {}
            if start_date is not None:
                time_range['gte'] = start_date.isoformat()
            if end_date is not None:
                time_range['lte'] = end_date.isoformat()
            agg_query = agg_query.filter(
                'range',
                **{self.time_field: time_range})

        term_agg = agg_query.aggs
        for term in self.aggregated_fields:
            term_agg = term_agg.bucket(term, 'terms', field=term, size=0)
        term_agg.metric('total', 'sum', field='count')

        if self.copy_fields:
            term_agg.metric(
                'top_hit', 'top_hits', size=1, sort={'timestamp': 'desc'}
            )

        for query_param, filtered_field in self.required_filters.items():
            if query_param in kwargs:
                agg_query = agg_query.filter(
                    'term', **{filtered_field: kwargs[query_param]}
                )

        return agg_query
Example #31
0
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
from elasticsearch_dsl.connections import connections

BASE_URL = "https://hackzurich-api.migros.ch/hack/recipe/recipes_de/_search"

connections.create_connection(hosts=[BASE_URL],
                              http_auth=('hackzurich2020', 'uhSyJ08KexKn4ZFS'))

s = Search().query("match", title="Suppe")

response = s.execute()

for hit in response:
    print(hit.meta.score, hit.title)
Example #32
0
    def test_no_querystring(self):
        es = Search()

        query = queries.get_es_query(es, "", "", [])

        assert query == es.query()
Example #33
0
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

client = Elasticsearch()

s = Search(using=client, index="my-index") \
    .filter("term", category="search") \
    .query("match", title="python")   \
    .exclude("match", description="beta")

s.aggs.bucket('per_tag', 'terms', field='tags') \
    .metric('max_lines', 'max', field='lines')

response = s.execute()

for hit in response:
    print(hit.meta.score, hit.title)

for tag in response.aggregations.per_tag.buckets:
    print(tag.key, tag.max_lines.value)
Example #34
0
                        required=True,
                        help='Index to search')
    parser.add_argument('--query',
                        default=None,
                        nargs=argparse.REMAINDER,
                        help='Lucene query')

    args = parser.parse_args()

    index = args.index
    if args.query:
        query = ' '.join(args.query)

    try:
        client = Elasticsearch()
        s = Search(using=client, index=index)

        q = Q('query_string', query=query)
        s = s.query(q)
        response = s[0:10].execute()

        for r in response:
            print('DATE= %s URL=%s' % (r['date'], r['url']))
            print('AUTOR= %s' % r['author'])
            print('TITLE= %s' % r['title'])
            print('KEYWORDS= %s' % r['keywords'])
            print('----------------------------------------')

        print('%d Documents' % response.hits.total)
    except NotFoundError:
        print('Index %s does not exists' % index)
Example #35
0
def core_search(query):
    es = Elasticsearch(['http://elasticsearch613:9200/'])
    q = Q("multi_match", query=query, fields=['title', 'text', 'cat', 'tags' ])

    ss = Search(using=es, index='blog').query(q)
    return ss
Example #36
0
 def search(cls):
     return Search(using=get_es(),
                   index=get_index_name(),
                   doc_type=cls.get_doctype())
Example #37
0
def search(author):
    s = Search().filter('term', author=author)
    response = s.execute()
    return response
Example #38
0
 def test_get_es_search(self, es_data_client):
     view = self.create_view(es_data_client)
     expected = Search(using=es_data_client,
                       index='test',
                       doc_type=DataDocType)
     assert view.get_es_search().to_dict() == expected.to_dict()
Example #39
0
import pandas as pd
import time
start = time.clock()

raw_index = "mordred_raw"  #raw index name
enriched_index = "mordred_enriched"  #enriched index name
repo_url = "https://github.com/chaoss/grimoirelab-mordred.git"  #github repository url

es = Elasticsearch('http://localhost:9200', verify_certs=False)

call("p2o.py --enrich --index " + raw_index + " --index-enrich " +
     enriched_index + " -e http://localhost:9200 --no_inc --debug git  " +
     repo_url,
     shell=True)

s = Search(using=es, index=enriched_index)
s.aggs.bucket('by_authors', 'terms', field='author_name', size=10000).metric(
    'first_commit', 'min', field='author_date'
)  #aggregate on the basis of author name and find oldest commit date for each of them
s = s.sort("author_date")

result = s.execute()

buckets_result = result['aggregations']['by_authors']['buckets']
buckets = []
for bucket in buckets_result:
    first_commit = bucket['first_commit']['value'] / 1000
    buckets.append({
        'first_commit': datetime.utcfromtimestamp(first_commit),
        'author': bucket['key'],
        'commit_count': bucket['doc_count']
Example #40
0
    def __init__(self,
                 data=None,
                 files=None,
                 auto_id='id_%s',
                 prefix=None,
                 initial=None,
                 error_class=ErrorList,
                 label_suffix=None,
                 empty_permitted=False,
                 field_order=None,
                 use_required_attribute=None,
                 renderer=None,
                 user=None,
                 has_combo=False):
        super().__init__(data, files, auto_id, prefix, initial, error_class,
                         label_suffix, empty_permitted, field_order,
                         use_required_attribute, renderer)

        # Get topic_modellings
        s = Search(using=ES_CLIENT, index=ES_INDEX_TOPIC_MODELLING).filter('term', is_ready=True) \
                .source(['name', 'algorithm', 'number_of_topics', 'number_of_documents',
                         'source', 'datetime_from', 'datetime_to'
                         # 'perplexity', 'purity', 'contrast', 'coherence',
                         # 'tau_smooth_sparse_theta', 'tau_smooth_sparse_phi',
                         # 'tau_decorrelator_phi', 'tau_coherence_phi',
                         ])[:500]
        group = None
        if not user.is_superuser:
            group = get_user_group(user)
        topic_modellings = s.execute()
        topic_modellings = sorted(topic_modellings,
                                  key=lambda x: x.number_of_documents,
                                  reverse=True)
        topic_modellings = ((
            tm.name.lower(),
            f"{tm.name.replace('bigartm', 'tm')} - {tm.number_of_topics} топиков - {tm.number_of_documents} текстов - "
            + (f"{tm.source} - "
               if hasattr(tm, 'source') and tm.source else f"Все СМИ ") +
            (f"С {tm.datetime_from[:10]} - "
             if hasattr(tm, 'datetime_from') and tm.datetime_from else f"") +
            (f"По {tm.datetime_to[:10]} - "
             if hasattr(tm, 'datetime_to') and tm.datetime_to else f""))
                            for tm in topic_modellings
                            if user.is_superuser or (group and tm.name.lower(
                            ) in group.topic_modelling_names.split(",")))
        if has_combo:
            combo_indices = ES_CLIENT.indices.get_alias(
                f"{ES_INDEX_TOPIC_COMBOS}_*").keys()
            tms_with_combo = [
                ind.replace(f"{ES_INDEX_TOPIC_COMBOS}_", "").lower()
                for ind in combo_indices
            ]
            topic_modellings = filter(lambda x: x[0] in tms_with_combo,
                                      topic_modellings)
            self.fields['topic_weight_threshold'].required = False
        self.fields['topic_modelling'].choices = topic_modellings

        # Get topic_weight_thresholds
        self.fields[
            'topic_weight_threshold'].choices = get_topic_weight_threshold_options(
                user.is_superuser or hasattr(user, "expert"))
 def get_data_dsl(self):
     # using参数是指定Elasticsearch实例对象,index指定索引,可以缩小范围,index接受一个列表作为多个索引,且也可以用正则表示符合某种规则的索引都可以被索引,如index=["bank", "banner", "country"]又如index=["b*"]后者可以同时索引所有以b开头的索引,search中同样可以指定具体doc-type
     s = Search(using=self.es, index=self.index_name)
     res = s.query("match", serialNo="368400630043389952").query(
         "match", is_result="1").highlight("is_result").execute()
     print(type(res))
Example #42
0
 def build_brand_es(self, args, category_search_condition):
     keyword_es = Search() \
         .query(category_search_condition)
     keyword_es = keyword_es.sort(*self.sort_condition(args))
     keyword_es = self.add_page_limit_to_brand_es(args, keyword_es)
     return keyword_es
Example #43
0
    def test_upercase_and_lowercase_search_give_same_results(self):
        """Pretty self-explanatory function name, isn't it ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Index lowercase stuffs
        text_lc = 'test'

        topic_1_lc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_lc)

        tag_lc = TagFactory(title=text_lc)
        topic_1_lc.tags.add(tag_lc)
        topic_1_lc.subtitle = text_lc
        topic_1_lc.save()

        post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1)
        post_1_lc.text = post_1_lc.text_html = text_lc
        post_1_lc.save()

        tuto_lc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_lc = tuto_lc.load_version()

        tuto_lc.title = text_lc
        tuto_lc.authors.add(self.user)
        subcategory_lc = SubCategoryFactory(title=text_lc)
        tuto_lc.subcategory.add(subcategory_lc)
        tuto_lc.tags.add(tag_lc)
        tuto_lc.save()

        tuto_draft_lc.description = text_lc
        tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc,
                                                text_lc)

        chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc)
        extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc)
        extract_lc.repo_update(text_lc, text_lc)

        published_lc = publish_content(tuto_lc,
                                       tuto_draft_lc,
                                       is_major_update=True)

        tuto_lc.sha_public = tuto_draft_lc.current_version
        tuto_lc.sha_draft = tuto_draft_lc.current_version
        tuto_lc.public_version = published_lc
        tuto_lc.save()

        # 2. Index uppercase stuffs
        text_uc = 'TEST'

        topic_1_uc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_uc)

        topic_1_uc.tags.add(
            tag_lc)  # Note: a constraint forces tags title to be unique
        topic_1_uc.subtitle = text_uc
        topic_1_uc.save()

        post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1)
        post_1_uc.text = post_1_uc.text_html = text_uc
        post_1_uc.save()

        tuto_uc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_uc = tuto_uc.load_version()

        tuto_uc.title = text_uc
        tuto_uc.authors.add(self.user)
        tuto_uc.subcategory.add(subcategory_lc)
        tuto_uc.tags.add(tag_lc)
        tuto_uc.save()

        tuto_draft_uc.description = text_uc
        tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc,
                                                text_uc)

        chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc)
        extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc)
        extract_uc.repo_update(text_uc, text_uc)

        published_uc = publish_content(tuto_uc,
                                       tuto_draft_uc,
                                       is_major_update=True)

        tuto_uc.sha_public = tuto_draft_uc.current_version
        tuto_uc.sha_draft = tuto_draft_uc.current_version
        tuto_uc.public_version = published_uc
        tuto_uc.save()

        # 3. Index and search:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text_lc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_lc = result.context['object_list'].execute()
        self.assertEqual(response_lc.hits.total, 8)

        result = self.client.get(reverse('search:query') + '?q=' + text_uc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_uc = result.context['object_list'].execute()
        self.assertEqual(response_uc.hits.total, 8)

        for responses in zip(
                response_lc,
                response_uc):  # we should get results in the same order!
            self.assertEqual(responses[0].meta.id, responses[1].meta.id)
Example #44
0
 def get_queryset(self):
     return Search(using=amo.search.get_es(),
                   index=AddonIndexer.get_index_alias(),
                   doc_type=AddonIndexer.get_doctype_name())
Example #45
0
    def test_change_topic_impacts_posts(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden group and add staff in it.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        # 2. Create a normal topic and index it
        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok
        self.assertEqual(response[0].meta.doc_type,
                         Post.get_es_document_type())
        self.assertEqual(response[0].forum_pk, self.forum.pk)
        self.assertEqual(response[0].topic_pk, topic_1.pk)
        self.assertEqual(response[0].topic_title, topic_1.title)

        # 3. Change topic title and reindex
        topic_1.title = 'new title'
        topic_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok

        self.assertEqual(response[0].topic_title,
                         topic_1.title)  # title was changed

        # 4. connect with staff and move topic
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        data = {'move': '', 'forum': hidden_forum.pk, 'topic': topic_1.pk}
        response = self.client.post(reverse('topic-edit'), data, follow=False)

        self.assertEqual(302, response.status_code)

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(
            response.hits.total,
            1)  # Note: without staff, would not get any results (see below)

        self.assertEqual(response[0].forum_pk,
                         hidden_forum.pk)  # post was updated with new forum

        # 5. Topic is now hidden
        self.client.logout()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # ok
Example #46
0
"""
Configuration is global so no client needs to be passed around.
"""
from elasticsearch_dsl import connections

"""
Default connection used where no other connection specified. Any configuration
methods just pass all parameters to the underlying elasticsearch-py client.
"""
connections.create_connection(hosts=["localhost"])

"""
Optionally specify an alias for the connection in case of multiple connections.
"""
connections.create_connection("prod", hosts=["localhost"])
s = Search(using="prod")
s.count()

"""
You can always just pass in your own client instance
"""
s = Search(using=Elasticsearch())
s.count()

"""
Any method on Search returns a clone so you need to always assign it back to
the same variable.
"""
s = Search()
s = s.params(q="fix")
Example #47
0
    def test_boosts(self):
        """Check if boosts are doing their job"""

        if not self.manager.connected_to_es:
            return

        # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto
        text = 'test'

        topic_1_solved_sticky = TopicFactory(forum=self.forum,
                                             author=self.user)
        topic_1_solved_sticky.title = text
        topic_1_solved_sticky.subtitle = ''
        topic_1_solved_sticky.solved_by = self.user
        topic_1_solved_sticky.is_sticky = True
        topic_1_solved_sticky.save()

        post_1 = PostFactory(topic=topic_1_solved_sticky,
                             author=self.user,
                             position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        post_2_useful = PostFactory(topic=topic_1_solved_sticky,
                                    author=self.user,
                                    position=2)
        post_2_useful.text = post_2_useful.text_html = text
        post_2_useful.is_useful = True
        post_2_useful.like = 5
        post_2_useful.dislike = 2  # l/d ratio above 1
        post_2_useful.save()

        topic_2_locked = TopicFactory(forum=self.forum,
                                      author=self.user,
                                      title=text)
        topic_2_locked.title = text
        topic_2_locked.subtitle = ''
        topic_2_locked.is_locked = True
        topic_2_locked.save()

        post_3_ld_below_1 = PostFactory(topic=topic_2_locked,
                                        author=self.user,
                                        position=1)
        post_3_ld_below_1.text = post_3_ld_below_1.text_html = text
        post_3_ld_below_1.like = 2
        post_3_ld_below_1.dislike = 5  # l/d ratio below 1
        post_3_ld_below_1.save()

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, 'Who cares ?', 'Same here')
        ExtractFactory(container=chapter1, db_object=tuto)

        published_tuto = publish_content(tuto,
                                         tuto_draft,
                                         is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published_tuto
        tuto.save()

        article = PublishedContentFactory(type='ARTICLE', title=text)
        published_article = PublishedContent.objects.get(content_pk=article.pk)

        opinion_not_picked = PublishedContentFactory(type='OPINION',
                                                     title=text)
        published_opinion_not_picked = PublishedContent.objects.get(
            content_pk=opinion_not_picked.pk)

        opinion_picked = PublishedContentFactory(type='OPINION', title=text)
        opinion_picked.sha_picked = opinion_picked.sha_draft
        opinion_picked.date_picked = datetime.datetime.now()
        opinion_picked.save()

        published_opinion_picked = PublishedContent.objects.get(
            content_pk=opinion_picked.pk)

        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 10)

        # 2. Reset all boosts to 1
        for doc_type in settings.ZDS_APP['search']['boosts']:
            for key in settings.ZDS_APP['search']['boosts'][doc_type]:
                settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0

        # 3. Test posts
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score ==
                        response[2].meta.score)

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score == response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(response[2].meta.id, str(
            post_2_useful.pk))  # post 2 is the only one not first

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(post_2_useful.pk))  # post 2 is useful

        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_2_useful.pk))  # post 2 have a l/d ratio of 5/2

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0
        settings.ZDS_APP['search']['boosts']['post'][
            'ld_ratio_below_1'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_3_ld_below_1.pk))  # post 3 have a l/d ratio of 2/5

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0

        # 4. Test topics
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score)

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is sticky

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is solved

        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_2_locked.pk))  # topic 2 is locked

        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 1.0  # no one would do that in real life

        # 5. Test published contents
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        # score are equals without boost:
        self.assertTrue(
            response[0].meta.score == response[1].meta.score == response[2].
            meta.score == response[3].meta.score == response[4].meta.score)

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_article.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 2.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 4.0
        # Note: in "real life", unpicked opinion would get a boost < 1.

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(
            response[0].meta.id,
            str(published_opinion_not_picked.pk))  # unpicked opinion got first
        self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk))

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 1.0

        # 6. Test global boosts
        # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists

        for model in self.indexable:

            # set a huge number to overcome the small differences:
            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 10.0

            result = self.client.get(reverse('search:query') + '?q=' + text,
                                     follow=False)

            self.assertEqual(result.status_code, 200)
            response = result.context['object_list'].execute()
            self.assertEqual(response.hits.total, 10)

            self.assertEqual(response[0].meta.doc_type,
                             model.get_es_document_type())  # obvious

            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 1.0
Example #48
0
    def test_change_publishedcontents_impacts_chapter(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create middle-size content and index it
        text = 'test'

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 2)

        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id,
                         published.content_public_slug + '__' + chapter1.slug)

        # 2. Change tuto: delete chapter and insert new one !
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        tuto_draft = tuto.load_version()

        tuto_draft.children[0].repo_delete()  # chapter 1 is gone !

        another_text = 'another thing'
        self.assertTrue(
            text not in another_text
        )  # to prevent a future modification from breaking this test

        chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter2.repo_update(another_text, another_text, another_text)
        extract2 = ExtractFactory(container=chapter2, db_object=tuto)
        extract2.repo_update(another_text, another_text)

        published = publish_content(tuto, tuto_draft, is_major_update=False)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # 2 objects, not 3 !

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        contents = [r for r in response if r.meta.doc_type != 'chapter']
        self.assertEqual(response.hits.total,
                         len(contents))  # no chapter found anymore

        result = self.client.get(reverse('search:query') + '?q=' +
                                 another_text + '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(response.hits.total, 1)
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug +
                         '__' + chapter2.slug)  # got new chapter
Example #49
0
    def test_category_and_subcategory_impact_search(self):
        """If two contents do not belong to the same (sub)category"""

        if not self.manager.connected_to_es:
            return

        text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?'

        # 1. Create two contents with different subcategories
        category_1 = 'category 1'
        subcategory_1 = SubCategoryFactory(title=category_1)
        category_2 = 'category 2'
        subcategory_2 = SubCategoryFactory(title=category_2)

        tuto_1 = PublishableContentFactory(type='TUTORIAL')
        tuto_1_draft = tuto_1.load_version()

        tuto_1.title = text
        tuto_1.authors.add(self.user)
        tuto_1.subcategory.add(subcategory_1)
        tuto_1.save()

        tuto_1_draft.description = text
        tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text)

        chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1)
        extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1)
        extract_1.repo_update(text, text)

        published_1 = publish_content(tuto_1,
                                      tuto_1_draft,
                                      is_major_update=True)

        tuto_1.sha_public = tuto_1_draft.current_version
        tuto_1.sha_draft = tuto_1_draft.current_version
        tuto_1.public_version = published_1
        tuto_1.save()

        tuto_2 = PublishableContentFactory(type='TUTORIAL')
        tuto_2_draft = tuto_2.load_version()

        tuto_2.title = text
        tuto_2.authors.add(self.user)
        tuto_2.subcategory.add(subcategory_2)
        tuto_2.save()

        tuto_2_draft.description = text
        tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text)

        chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2)
        extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2)
        extract_2.repo_update(text, text)

        published_2 = publish_content(tuto_2,
                                      tuto_2_draft,
                                      is_major_update=True)

        tuto_2.sha_public = tuto_2_draft.current_version
        tuto_2.sha_draft = tuto_2_draft.current_version
        tuto_2.public_version = published_2
        tuto_2.save()

        # 2. Index:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 4)  # Ok

        # 3. Test
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_1.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_1.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_1.slug + '__' + chapter_1.slug)

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_2.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_2.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_2.slug + '__' + chapter_2.slug)
Example #50
0
    def test_basic_search(self):
        """Basic search and filtering"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        # create a middle-size content and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        # nothing has been indexed yet:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 4)  # get 4 results

        # 2. Test filtering:
        topic_1 = Topic.objects.get(pk=topic_1.pk)
        post_1 = Post.objects.get(pk=post_1.pk)
        published = PublishedContent.objects.get(pk=published.pk)

        ids = {
            'topic': [topic_1.es_id],
            'post': [post_1.es_id],
            'content': [
                published.es_id,
                published.content_public_slug + '__' + chapter1.slug
            ],
        }

        search_groups = [
            k for k, v in settings.ZDS_APP['search']['search_groups'].items()
        ]
        group_to_model = {
            k: v[1]
            for k, v in settings.ZDS_APP['search']['search_groups'].items()
        }

        for doc_type in search_groups:
            result = self.client.get(reverse('search:query') + '?q=' + text +
                                     '&models=' + doc_type,
                                     follow=False)
            self.assertEqual(result.status_code, 200)

            response = result.context['object_list'].execute()

            self.assertEqual(response.hits.total,
                             len(ids[doc_type]))  # get 1 result of each …
            for i, r in enumerate(response):
                self.assertIn(
                    r.meta.doc_type,
                    group_to_model[doc_type])  # … and only of the right type …
                self.assertEqual(r.meta.id,
                                 ids[doc_type][i])  # … with the right id !
    def search(self, query_text, locale=None, fields=None):
        """
        Return relevant articles given search text.

        Finding the query term in the title of an article is given twice
        as much weight as finding the text in the body.

        After the most relevant articles are obtained, they are ranked by the
        ranking module (uses view counts here, but can be easily extended).
        
        Args:
            query_text(str): Text to be searched.
            locale(str): String to filter results by location.
            fields(list(str)): If specified, restrict the fields returned to
                this list.

        Returns:
            list[dict]: Returns a ranked list of dictionaries representing articles
                [
                    {
                        'id': str,
                        'title': str,
                        'body': str,
                        'locale': str,
                    },
                    .
                    .
                ]
                
        """
        # Create Search object to "match" query text against the title and body
        # of articles stored in the Knowledge base.
        s = Search(
            using=self.client,
            index=self.INDEX,
            doc_type=self.TYPE
        ).query(
            'multi_match',
            query=query_text,
            fields=['title^2', 'body']
        )
        
        # If locale is provided, use it to filter the set of documents that are
        # queried for.
        if locale:
            s = s.filter('term', locale=locale)

        # Restrict fields if specified.
        s = s.source(fields)
   
        response = s.execute()
        results, result_dict = [], {}
        for hit in response:
            article_id = hit.meta['id']
            result_dict[article_id] = hit.__dict__['_d_']
            result_dict[article_id]['id'] = article_id

            # Retrieve view count for each relevant article.
            results.append((article_id, self.redis.get(article_id)))

        # Rank results using Ranking function. Currently sorts relevant results by
        # view counts.
        ranked_results = Ranker.rank(results)
        ranked_articles = [result_dict[article_id] for article_id in ranked_results]

        return ranked_articles
Example #52
0
    def test_hidden_forums_give_no_results_if_user_not_allowed(self):
        """Long name, isn't ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden staff group.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        topic_1 = TopicFactory(forum=hidden_forum,
                               author=self.staff,
                               title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        # 2. search without connection and get not result
        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 3. Connect with user (not a member of the group), search, and get no result
        self.assertTrue(
            self.client.login(username=self.user.username,
                              password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 4. Connect with staff, search, and get the topic and the post
        self.client.logout()
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)  # ok !
Example #53
0
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search

client = Elasticsearch()

s = Search().using(client).query("match", title="use")
for hit in s:
    print(hit.title)

print(100 * "*")

## do a term query
s = Search().using(client).query("term", title="snowball").execute()
if len(s) == 0:
    print("query %s is empty" % s.to_dict())
for hit in s:
    print(hit.title)
print(100 * "*")

## do a terms query
s = Search().using(client).query("terms", tags=["test"])
for hit in s:
    print(hit.title)

print(100 * "*")
def build_query_body(item_uri):
    """Build query dict ready to pass to Elasticsearch search instance for retrieving a single item by URI."""
    search = Search(index='pips').query('term', _id=item_uri)

    return search.to_dict()
Example #55
0
 def get_count(self):
     s = Search(using=self.es, index=self.index_pattern)
     if self.query:
         s.update_from_dict({"query": self.query})
     return s.count()
import numpy as np
from matplotlib_venn import venn3
from matplotlib import pyplot as plt

# Define a default Elasticsearch client
#connections.create_connection(hosts=['http://172.20.30.70:9200/'])

#elasticServer = 'http://172.20.30.70:9200/'	#prod
elasticServer = 'http://172.20.31.19:9200/'  #dev

client = Elasticsearch(hosts=[elasticServer])

#q = Q('bool', must=[Q('match', index='propertypriceregister'), Q('match', Type='propertypriceregister')])
q = Q('match', id='_search')
s = Search(using=client,
           index="propertypriceregister",
           doc_type="propertypriceregister").query()

s2 = Search(using=client, index="daft", doc_type="daftproperty").query()
s3 = Search(using=client, index="myhome", doc_type="myhomeproperty").query()
s4 = Search(using=client, index="daftdrop",
            doc_type="daftdropproperty").query()
'''
count = s.count()
for i in range(0, (count / 1000) + 1):

response = s[(i*1000):((i+1)*1000)].execute()
#response = s.execute()

print('Total %d hits found.' % response.hits.total)
'''
Example #57
0
from connectors.elasticsearch_connector import ElasticsearchConnector
from elasticsearch_dsl import Search
from elasticsearch_dsl.query import MultiMatch

es_man = ElasticsearchConnector(host="localhost", port="9220")
es_man.connect()

index = "fdg-article"
search_key = "publisher"
uuid = "7457b5f27a46e69e3f891767d01d2c6f6c5829132a4c03e78f4858828d539fc983a70a3c43ff8a082a8650c0972349eafb50bda7a44062428e2b405249a387f3"

a = "7457b5f27a46e69e3f891767d01d2c6f6c5829132a4c03e78f4858828d539fc983a70a3c43ff8a082a8650c0972349eafb50bda7a44062428e2b405249a387f3"
s = Search(using=es_man.es, index=index).query("match", publisher=uuid)
s = s.query("match", publisher=uuid)
s = s.execute()

multi_match = MultiMatch(query=uuid, fields=['publisher'])

s2 = Search(using=es_man.es, index=index).query(multi_match)
s2 = s2.execute()

art_index = "fdg-textscore"
art_id1 = "8796aff2a14a1ea1539265f76b044f1faf00304d6d9e237aaa21da6c4bab2166f0bed8a2eb99a05d18bc945f811f250da1f4c5a4acbfff1a213bc773894edcd1"
art_id2 = "8796aff2a14a1ea1539265f76b044f1faf00304d6d9e237aaa21da6c4bab2166be47575aa0278fdaaccf0d0aac645381b6db09383e58519fc3589398b63777b3"

sss = Search(using=es_man.es, index=art_index) \
    .filter("terms", _id=[art_id1, art_id2])

response = sss.execute()

# ------------------------------------------
Example #58
0
 def __len__(self):
     """Returns the total number of entries in the collection."""
     return Search(using=self.client, index=self.name).execute().hits.total
Example #59
0
 def _Search(self, indexname):
     """
 it returns the object which can be used for reatriving ceratin value from the DB
 """
     return Search(using=self.__client, index=indexname)
Example #60
0
    def _build_query(self):
        query = Q()

        source = ['id']
        sort = []

        aggregations = {}
        query_string = None
        as_list = as_dict = False

        for action, value in self.steps:
            if action == 'order_by':
                for key in value:
                    if key.startswith('-'):
                        sort.append({key[1:]: 'desc'})
                    else:
                        sort.append(key)
            elif action == 'values':
                source.extend(value)
                as_list, as_dict = True, False
            elif action == 'values_dict':
                if value:
                    source.extend(value)
                as_list, as_dict = False, True
            elif action == 'query':
                query &= self._process_queries(value)
            elif action == 'filter':
                query &= self._process_filters(value)
            elif action == 'source':
                source.extend(value)
            elif action == 'aggregate':
                aggregations.update(value)
            elif action == 'filter_query_string':
                query_string = value
            else:
                raise NotImplementedError(action)

        # If we have a raw query string we are going to apply all sorts
        # of boosts and filters to improve relevance scoring.
        #
        # We are using the same rules that `search.filters:SearchQueryFilter`
        # implements to have a single-source of truth for how our
        # scoring works.
        from olympia.search.filters import SearchQueryFilter

        search = Search().query(query)

        if query_string:
            search = SearchQueryFilter().apply_search_query(
                query_string, search)

        if sort:
            search = search.sort(*sort)

        if source:
            search = search.source(source)

        body = search.to_dict()

        # These are manually added for now to simplify a partial port to
        # elasticsearch-dsl
        if self.start:
            body['from'] = self.start
        if self.stop is not None:
            body['size'] = self.stop - self.start
        if aggregations:
            body['aggs'] = aggregations

        self.source, self.as_list, self.as_dict = source, as_list, as_dict
        return body