Esempio n. 1
0
 def query_articles(self, query, prefs):
     client = connections.get_connection()
     search = Search(using=client, index='articles')
     q = Q('bool', must=[Q('exists', field='watson_analyzed'),
                         Q('match', watson_success=True),
                         Q('match', body=query)])
     search = search.query(q)
     search.execute()
     documents = []
     for hit in search[:100]:
         if '#' not in hit.url and '?' not in hit.url:
             documents.append({
                 'id': hit.meta.id,
                 'title': hit.title,
                 'body': hit.body,
                 'url': hit.url,
                 'score': hit.meta.score,
                 'tone': dict(
                     joy=hit.tone.joy,
                     fear=hit.tone.fear,
                     sadness=hit.tone.sadness,
                     disgust=hit.tone.disgust,
                     anger=hit.tone.anger
                 ),
                 'top_image': hit.top_image
             })
     if len(documents) < 10:
         return documents
     else:
         return select_k_and_sort(documents, prefs)
Esempio n. 2
0
def consensus(offset=60):
    """
    check for 'eth.chain.new_head' messages
    and return the max number of clients, that had the same head
    during the last `offset` seconds.
    """
    s = Search(client)
    # s = s.query(Q('match', message='eth.chain.new_head'))
    s = s.filter('exists', field='json_message.eth.chain.new_head.block_number')
    s = s.sort({'json_message.eth.chain.new_head.ts': {'order': 'desc', 'ignore_unmapped': 'true'}})
    response = s.execute()

    # Get latest block number
    x = max(hit['_source']['json_message']['eth.chain.new_head']['block_number'] for hit in response.hits.hits)

    # By default, the buckets are ordered by their doc_count descending
    # s.aggs.bucket('by_block_hash', 'terms', field='json_message.eth.chain.new_head.block_hash', size=3)

    # Reach consensus around latest block number
    s = Search(client)
    s = s.filter(time_range_filter(field="json_message.eth.chain.new_head.ts", offset=offset))
    s.aggs.bucket('latest', 'range',
                  field='json_message.eth.chain.new_head.block_number',
                  ranges=[{"from": x - 1, "to": x + 1}]).bucket(
                      'by_block_hash', 'terms',
                      field='json_message.eth.chain.new_head.block_hash',
                      size=3)
    # s = s[10:10]
    response = s.execute()
    # pprint(response)

    if response:
        return max(tag.doc_count for tag in response.aggregations.latest.buckets[0].by_block_hash.buckets)
    else:
        return 0
Esempio n. 3
0
 def query_articles(self, query):
     client = connections.get_connection()
     search = Search(using=client)
     q = Q('match', body=query)
     search = search.query(q)
     search.execute()
     for hit in search:
         yield {
             'title': hit.title,
             'body': hit.body,
             'top_image': hit.top_image
         }
Esempio n. 4
0
        def interface_get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:20].query(
                'multi_match', query=text, fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''],
                              'qb': [''],
                              'guess': ''}
            else:
                guessForEvidence = request.form['guessForEvidence']
                guessForEvidence = guessForEvidence.split("style=\"color:blue\">")[1].split("</a>")[0].lower()

                guess = None
                for index, item in enumerate(results):
                    if item.page.lower().replace("_", " ")[0:25]  == guessForEvidence:
                        guess = results[index]
                        break
                if guess == None:
                    print("expanding search")
                    s = Search(index='qb')[0:80].query(
                        'multi_match', query=text, fields=[wiki_field, qb_field])
                    s = s.highlight(wiki_field).highlight(qb_field)
                    results = list(s.execute()) 
                    for index, item in enumerate(results):
                        if item.page.lower().replace("_", " ")[0:25]  == guessForEvidence:
                            guess = results[index]
                            break
                    if guess == None:
                        highlights = {'wiki': [''],
                                  'qb': [''],
                                  'guess': ''}
                        return jsonify(highlights)
 
                _highlights = guess.meta.highlight 
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {'wiki': wiki_content,
                              'qb': qb_content,
                              'guess': guess.page}
            return jsonify(highlights)
Esempio n. 5
0
File: main.py Progetto: hmoco/sir
def autocomplete_institutions_titles():
    name = str(request.args.get('name'))
    if not name:
        return Response({'Autocomplete query requires a "?name={} parameter'}, status=500)
    page = int(request.args.get('page') or 1)
    size = int(request.args.get('size') or SIZE)
    start = (page - 1) * size
    s_title = Search(index=ELASTIC_INDEX).query('match_phrase_prefix', name={'query': name, 'slop': 5})[start:start + size]
    res_title = s_title.execute()
    s_alias =  Search(index=ELASTIC_INDEX).query('match_phrase_prefix', other_names={'query': name, 'slop': 5})[start:start + size]
    res_alias = s_alias.execute()
    comb = list(set(get_names(res_alias) + get_names(res_title)))
    return Response(json.dumps(comb), status=200)
Esempio n. 6
0
File: app.py Progetto: urykhy/stuff
    def search(self, **params):
        limit_cat = params.get('cat', "").strip()
        limit_forum = params.get('forum', "").strip()
        limit_count = int(params.get('count', 100))
        limit_size_min = human2bytes(params.get('min', "0b"))
        limit_size_max = human2bytes(params.get('max', "0b"))
        limit_wild = int(params.get('wild', 0))
        arg = params.get('query', '').strip()
        if not arg:
            arg = "hobbit"

        s = Search(using=es, index=ela_index)
        if limit_size_min:
            s = s.filter("range", size = {'gte' : limit_size_min })
        if limit_size_max:
            s = s.filter("range", size = {'lte' : limit_size_max })

        arg = arg.split(' ')
        if limit_wild:
            q = Q("wildcard", name="*"+arg.pop(0)+"*")
            for a in arg:
                q = q & Q("wildcard", name="*"+a+"*")
        else:
            q = Q("match", name=arg.pop(0))
            for a in arg:
                q = q & Q("match", name=a)

        if len(limit_cat):
            for a in limit_cat.split(' '):
                q = q & Q("match", category=a)
        if len(limit_forum):
            for a in limit_forum.split(' '):
                q = q & Q("match", forum=a)

        s = s.query(q)
        #cherrypy.log("query is "+str(s.to_dict()))
        r = s.execute()
        size = r.hits.total
        #cherrypy.log("query have "+str(size)+" elements")
        if size > limit_count:
            size = limit_count
        s = s.sort('-size')
        s = s.extra(size=size)
        r = s.execute()

        data = []
        for b in r:
            a = [b.id, b.size, b.name, b.category, b.forum, b.date[0] if b.date else '', b.hash]
            data.append(a)

        return {'data': data}
Esempio n. 7
0
File: app.py Progetto: urykhy/stuff
    def search(self, **params):
        limit_author = params.get('author', "").strip()
        limit_title = params.get('title', "").strip()
        limit_count = int(params.get('count', 10))
        limit_wild = int(params.get('wild', 0))
        q = None

        if not limit_author and not limit_title:
            limit_title = "hobbit"

        s = Search(using=es, index=ela_index)
        arg = limit_title.split(' ')
        arg = [x for x in arg if x]
        if len(arg):
            if limit_wild:
                q = Q("wildcard", title="*"+arg.pop(0)+"*")
                for a in arg:
                    q = q & Q("wildcard", title="*"+a+"*")
            else:
                q = Q("match", title=arg.pop(0))
                for a in arg:
                    q = q & Q("match", title=a)

        arg = limit_author.split(' ')
        arg = [x for x in arg if x]
        if len(arg):
            for a in arg:
                if q:
                    q = q & Q("match", author=a)
                else:
                    q = Q("match", author=a)

        s = s.query(q)
        #cherrypy.log("query is "+str(s.to_dict()))
        r = s.execute()
        size = r.hits.total
        if size > limit_count:
            size = limit_count
        s = s.sort('-date')
        s = s.extra(size=size)
        r = s.execute()
        #cherrypy.log("result is "+str(r))

        data = []
        for b in r:
            a = [b.id, b.author, b.title, b.size, b.date]
            data.append(a)

        return {'data': data}
Esempio n. 8
0
def reverse():
    try:
        lon = float(request.args.get('lon'))
        lat = float(request.args.get('lat'))
    except (TypeError, ValueError):
        lon = lat = None

    if not lat or not lon:
        abort(400, "missing 'lon' or 'lat': /?lon=2.0984&lat=48.0938")

    s = Search(es).index(INDEX).query(MatchAll()).extra(size=1).sort({
        "_geo_distance": {
            "coordinate": {
                "lat": lat,
                "lon": lon
            },
            "order": "asc"
        }})
    _type = request.args.get('type', None)
    if _type:
        s = s.query({'match': {'type': _type}})
    results = s.execute()
    if len(results.hits) < 1:
        notfound.debug('reverse: lat: {}, lon: {}, type: {}'.format(
            lat, lon, _type))

    debug = 'debug' in request.args
    data = to_geo_json(results, debug=debug)
    data = json.dumps(data, indent=4 if debug else None)
    response = Response(data, mimetype='application/json')
    cors(response)
    return response
def test_inner_hits_are_wrapped_in_response(data_client):
    s = Search(index='git')[0:1].query('has_parent', parent_type='repo', inner_hits={}, query=Q('match_all'))
    response = s.execute()

    commit = response.hits[0]
    assert isinstance(commit.meta.inner_hits.repo, response.__class__)
    assert repr(commit.meta.inner_hits.repo[0]).startswith("<Hit(git/doc/elasticsearch-dsl-py): ")
    def search(self, doc_type, query=""):
        """
        Execute search query and retrive results

        :param doc_type: Type in ElasticSearch
        :param query: search query
        :return: list with results
        """
        results = []
        if type(query) in [str, unicode] and type(doc_type) == DocTypeMeta:
            q = Q("multi_match",
                  query=query.lower(),
                  fields=["title"])

            s = Search()
            s = s.using(self.client)
            s = s.index(self.index_name)
            s = s.doc_type(doc_type)
            s = s.query(q)
            print "search query: " + str(s.to_dict())

            response = s.execute()

            for resp in response:
                results.append(resp)
        return results
Esempio n. 11
0
 def categories(self):
     s = Search(
         using=docstore._get_connection(settings.DOCSTORE_HOSTS),
         index=settings.DOCSTORE_INDEX,
         doc_type='articles'
     ).fields([
         'title', 'title_sort', 'categories',
     ])[0:docstore.MAX_SIZE]
     if not settings.MEDIAWIKI_SHOW_UNPUBLISHED:
         s = s.query('match', published=True)
     response = s.execute()
     pages = []
     for hit in response:
         page = Page()
         page.url_title = hit.title[0]
         page.title = hit.title[0]
         page.title_sort = hit.title_sort[0]
         page.categories = hit.get('categories', [])
         pages.append(page)
     articles = sorted(pages, key=lambda page: page.title_sort)
     categories = {}
     for page in articles:
         for category in page.categories:
             # exclude internal editorial categories
             if category not in settings.MEDIAWIKI_HIDDEN_CATEGORIES:
                 if category not in categories.keys():
                     categories[category] = []
                 # pages already sorted so category lists will be sorted
                 if page not in categories[category]:
                     categories[category].append(page)
     return categories
Esempio n. 12
0
def gracc_query_apel(year, month):
    index = osg_summary_index
    starttime = datetime.datetime(year, month, 1)
    onemonth = dateutil.relativedelta.relativedelta(months=1)
    endtime = starttime + onemonth
    s = Search(using=es, index=index)
    s = s.query('bool',
        filter=[
            Q('range', EndTime={'gte': starttime, 'lt': endtime })
          & Q('terms', VOName=vo_list)
          & ( Q('term', ResourceType='Batch')
            | ( Q('term', ResourceType='Payload')
              & Q('term', Grid='Local') )
            )
        ]
    )

    bkt = s.aggs
    bkt = bkt.bucket('Cores', 'terms', size=MAXSZ, field='Processors')
    bkt = bkt.bucket('VO',    'terms', size=MAXSZ, field='VOName')
    bkt = bkt.bucket('DN',    'terms', size=MAXSZ, field='DN')
    bkt = bkt.bucket('Site',  'terms', size=MAXSZ, missing=MISSING, field='OIM_ResourceGroup')
    #bkt = bkt.bucket('Site', 'terms', size=MAXSZ, field='SiteName')
    #bkt = bkt.bucket('Site', 'terms', size=MAXSZ, field='WLCGAccountingName')
    add_bkt_metrics(bkt)

    bkt = bkt.bucket('SiteName',  'terms', size=MAXSZ, field='SiteName')

    add_bkt_metrics(bkt)

    response = s.execute()
    return response
    def get_files_in_path(self, dir_path):
        ''' gets all es file names from es in a given path '''
        dir_hash = FileResource.get_hash(dir_path)
        #s = FileResource.search().query("match", path["hashdir"] = dir_hash)
        #s = FileResource.search().query("multi_match", query=dir_hash, fields=['path.hashdir'])
        # [{"query": {"match_all": {"index": "content_crawler", "body": {"query": {"term": {"path.hashdir": "b5844a9aba1536cc74682d8bfa28553b5dfd8a8a"}}}, "doc_type": "file_resource"}}
        s = Search().query(
            index = self.index, 
            doc_type= self.type, 
            body={"query": 
                { 
                    "term" : {
                        "file_dir_hash" : dir_hash
                    }
                }
            }
        )

        response = s.execute()

        files = []

        for hit in s:
            files.append(hit.file_uri)

        return files
Esempio n. 14
0
    def search(self, **params):
        index = params.get('index', self.index)
        search = Search(using=self.client, index=index)

        page = params.get('page', None)
        per_page = params.get('per_page', None)
        if page and per_page:
            page = page - 1
            search._extra = {'from': page, 'size': per_page}

        sort = params.get('sort', None)
        if sort and sort.replace('-', '') in ['created_at', 'level']:
            search = search.sort(sort)

        date_filter = self._filter_by_date_interval(params)
        if date_filter:
            search = search.filter(date_filter)

        level = params.get('group_by', None)
        if level:
            search = search.query('match', level=level)

        hits = search.execute()

        format = params.get('format', 'object')
        if format == 'dict':
            return self._to_dict(hits)
        else:
            return self._to_logs(hits)
Esempio n. 15
0
 def authors(self, num_columns=0):
     """
     @param num_columns: int If non-zero, break up list into columns
     """
     s = Search(
         using=docstore._get_connection(settings.DOCSTORE_HOSTS),
         index=settings.DOCSTORE_INDEX,
         doc_type='authors'
     ).fields([
         'url_title', 'title', 'title_sort', 'lastmod'
     ])[0:docstore.MAX_SIZE]
     response = s.execute()
     authors = []
     for hit in response:
         url_title = hit.url_title[0]
         title = hit.title[0]
         title_sort = hit.title_sort[0]
         lastmod = hit.lastmod[0]
         if title and title_sort:
             author = Author()
             author.url_title = url_title
             author.title = title
             author.title_sort = title_sort
             author.lastmod = datetime.strptime(lastmod, mediawiki.TS_FORMAT)
             authors.append(author)
     authors = sorted(authors, key=lambda a: a.title_sort)
     if num_columns:
         return _columnizer(authors, num_columns)
     return authors
Esempio n. 16
0
    def exists(self):

        find_instance = Search(using=self.es,index=self.index) \
                        .query(Q("match",Id=self.sf_id))

        response = find_instance.execute()
        return response
Esempio n. 17
0
def session_times():
    # {"@fields": {}, "@timestamp": "2015-02-23T17:03:41.738412Z", "@source_host": "newair.brainbot.com", "@message": "scenario.p2p_connect.started"}

    start_message = 'scenario.p2p_connect.starting.clients.sequentially'
    stop_message = 'scenario.p2p_connect.stopping.clients'
    s = Search(client)
    s = s.filter('bool',
                 should=[F('term', message=start_message),
                         F('term', message=stop_message)])
    s = s.fields(['message', '@timestamp'])
    s = s[0:100000]
    s = s.sort('-@timestamp')  # desc,  we want the latest events
    response = s.execute()

    events = []  # joungest to oldest, last should be a stop message
    for h in response:
        msg = 'start' if h['message'][0] == start_message else 'stop'
        ts = h['@timestamp'][0]
        events.append((msg, ts))
    assert not events or events[0][0] == 'stop'
    sessions = []
    while len(events) >= 2:
        stop = events.pop()
        start = events.pop()
        sessions.append(dict([start, stop]))
    return list(reversed(sessions))
Esempio n. 18
0
def dates():
    """Return maximum and minimum date from dataset."""
    q = Search(using=client, index=TRENDS_INDEX)[0:0]
    q.aggs.bucket('min_date', 'min', field=TRENDS_DATE_FIELD)
    q.aggs.bucket('max_date', 'max', field=TRENDS_DATE_FIELD)
    res = q.execute().aggregations
    return jsonify({'maximum': res.min_date.value_as_string, 'minimum': res.max_date.value_as_string})
    def handle(self, *args, **options):
        min_id = FailureLine.objects.order_by('id').values_list("id", flat=True)[0] - 1
        chunk_size = options['chunk_size']

        if options["recreate"]:
            connection.indices.delete(TestFailureLine._doc_type.index, ignore=404)
            TestFailureLine.init()
        else:
            if connection.indices.exists(TestFailureLine._doc_type.index):
                self.stderr.write("Index already exists; can't perform import")
                return

        while True:
            rows = (FailureLine.objects
                    .filter(id__gt=min_id)
                    .order_by('id')
                    .values("id", "job_guid", "action", "test", "subtest",
                            "status", "expected", "message", "best_classification_id",
                            "best_is_verified"))[:chunk_size]
            if not rows:
                break
            es_lines = []
            for item in rows:
                es_line = failure_line_from_value(item)
                if es_line:
                    es_lines.append(es_line)
            self.stdout.write("Inserting %i rows" % len(es_lines))
            bulk_insert(es_lines)
            min_id = rows[len(rows) - 1]["id"]
            time.sleep(options['sleep'])
        s = Search(doc_type=TestFailureLine).params(search_type="count")
        self.stdout.write("Index contains %i documents" % s.execute().hits.total)
Esempio n. 20
0
def get_journals_by_collection_institution(collection_acronym, page_from=0, page_size=1000):

    search = Search(index=INDEX).query(
             "nested", path="collections", query=Q("match", collections__acronym=COLLECTION))

    search = search.filter("exists", field="sponsors")

    search = search[page_from:page_size]
    search_response = search.execute()

    meta = {
        'total': search_response.hits.total,
    }

    sponsors = {}
    for journal in search_response:

        j = {'jid': journal.jid,
             'title': journal.title,
             'current_status': journal.current_status,
             'last_issue': journal.last_issue,
             'issue_count': journal.issue_count
             }

        for sponsor in journal['sponsors']:
            sponsors.setdefault(sponsor, []).append(j)

    result = {
        'meta': meta,
        'objects': sponsors
    }

    return result
Esempio n. 21
0
        def get_highlights():
            wiki_field = 'wiki_content'
            qb_field = 'qb_content'
            text = request.form['text']
            s = Search(index='qb')[0:10].query(
                'multi_match', query=text, fields=[wiki_field, qb_field])
            s = s.highlight(wiki_field).highlight(qb_field)
            results = list(s.execute())

            if len(results) == 0:
                highlights = {'wiki': [''],
                              'qb': [''],
                              'guess': ''}
            else:
                guess = results[0] # take the best answer
                _highlights = guess.meta.highlight
                try:
                    wiki_content = list(_highlights.wiki_content)
                except AttributeError:
                    wiki_content = ['']

                try:
                    qb_content = list(_highlights.qb_content)
                except AttributeError:
                    qb_content = ['']

                highlights = {'wiki': wiki_content,
                              'qb': qb_content,
                              'guess': guess.page}
            return jsonify(highlights)
Esempio n. 22
0
    def search(self, text: str, max_n_guesses: int,
               normalize_score_by_length=False,
               wiki_boost=1, qb_boost=1):
        if not self.exists():
            raise ValueError('The index does not exist, you must create it before searching')

        if wiki_boost != 1:
            wiki_field = 'wiki_content^{}'.format(wiki_boost)
        else:
            wiki_field = 'wiki_content'

        if qb_boost != 1:
            qb_field = 'qb_content^{}'.format(qb_boost)
        else:
            qb_field = 'qb_content'

        s = Search(index=self.name)[0:max_n_guesses].query(
            'multi_match', query=text, fields=[wiki_field, qb_field]
        )
        results = s.execute()
        guess_set = set()
        guesses = []
        if normalize_score_by_length:
            query_length = len(text.split())
        else:
            query_length = 1

        for r in results:
            if r.page in guess_set:
                continue
            else:
                guesses.append((r.page, r.meta.score / query_length))
        return guesses
Esempio n. 23
0
    def process(self, start_time:datetime, end_time:datetime, input:DataFrame):
        logger.debug('Start: %s  End: %s  Log: index=%s fields=%s' % (start_time.isoformat(), end_time.isoformat(), str(self.indices), str(self.fields)))

        search = Search(using=self.client, index=self.indices[0])
        search = search.filter(Range(** {'@timestamp': {'gte': start_time.isoformat(), 'lte': end_time.isoformat()}}))

        for k,v in self.fields.items():
            if isinstance(v, list):
                for sv in v:
                    search = search.query("match", **{k:sv})

            else:
                search = search.query("match", **{k:v})

        logger.debug('ES Query: %s' % str(search.to_dict()))
        response = search.execute()

        logger.debug('Results: success:%d failed:%d hits:%d' % (response._shards.successful, response._shards.failed, len(response.hits)))

        for hit in response:
            # filter out the meta key and flatten the values
            row = {k: str(hit[k]) for k in hit if k != 'meta'}

            logger.debug(row)
            input = input.append(row, ignore_index=True)

        return input
Esempio n. 24
0
 def pages():
     """Returns list of published light Page objects.
     
     @returns: list
     """
     KEY = 'encyc-front:pages'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='articles').filter('term', published_encyc=True)[0:MAX_SIZE]
         s = s.sort('title_sort')
         s = s.fields([
             'url_title',
             'title',
             'title_sort',
             'published',
             'modified',
             'categories',
         ])
         response = s.execute()
         data = [
             Page(
                 url_title  = hitvalue(hit, 'url_title'),
                 title      = hitvalue(hit, 'title'),
                 title_sort = hitvalue(hit, 'title_sort'),
                 published  = hitvalue(hit, 'published'),
                 modified   = hitvalue(hit, 'modified'),
                 categories = hit.get('categories',[]),
                )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     return data
Esempio n. 25
0
 def authors(num_columns=None):
     """Returns list of published light Author objects.
     
     @returns: list
     """
     KEY = 'encyc-front:authors'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='authors')[0:MAX_SIZE]
         s = s.sort('title_sort')
         s = s.fields([
             'url_title',
             'title',
             'title_sort',
             'published',
             'modified',
         ])
         response = s.execute()
         data = [
             Author(
                 url_title  = hitvalue(hit, 'url_title'),
                 title      = hitvalue(hit, 'title'),
                 title_sort = hitvalue(hit, 'title_sort'),
                 published  = hitvalue(hit, 'published'),
                 modified   = hitvalue(hit, 'modified'),
             )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     if num_columns:
         return _columnizer(data, num_columns)
     return data
Esempio n. 26
0
 def sources():
     """Returns list of published light Source objects.
     
     @returns: list
     """
     KEY = 'encyc-front:sources'
     TIMEOUT = 60*5
     data = cache.get(KEY)
     if not data:
         s = Search(doc_type='sources')[0:MAX_SIZE]
         s = s.sort('encyclopedia_id')
         s = s.fields([
             'encyclopedia_id',
             'published',
             'modified',
             'headword',
             'media_format',
             'img_path',
         ])
         response = s.execute()
         data = [
             Source(
                 encyclopedia_id = hitvalue(hit, 'encyclopedia_id'),
                 published = hitvalue(hit, 'published'),
                 modified = hitvalue(hit, 'modified'),
                 headword = hitvalue(hit, 'headword'),
                 media_format = hitvalue(hit, 'media_format'),
                 img_path = hitvalue(hit, 'img_path'),
                )
             for hit in response
             if hitvalue(hit, 'published')
         ]
         cache.set(KEY, data, TIMEOUT)
     return data
Esempio n. 27
0
def get_highlights(text):
    # query top 10 guesses
    s = Search(index='qb_ir_instance_of')[0:10].query('multi_match', query=text,
            fields=['wiki_content', 'qb_content', 'source_content'])
    s = s.highlight('qb_content').highlight('wiki_content')
    results = list(s.execute())
    if len(results) == 0:
        highlights = {'wiki': [''],
                      'qb': [''],
                      'guess': ''}
        return highlights

    guess = results[0] # take the best answer
    _highlights = guess.meta.highlight 

    try:
        wiki_content = list(_highlights.wiki_content)
    except AttributeError:
        wiki_content = ['']

    try:
        qb_content = list(_highlights.qb_content)
    except AttributeError:
        qb_content = ['']

    highlights = {'wiki': wiki_content,
                  'qb': qb_content,
                  'guess': guess.page}
    return highlights
Esempio n. 28
0
def search():
    q = request.args.get('q')
    #resp = es.search(index='hoe', doc_type='record', q=q, body=aggs)
    #logging.info(q)

    s = Search(using=es, index='hoe', doc_type='record')
    s.aggs.bucket('library_place', 'terms', field='library-place')
    s.aggs.bucket('type', 'terms', field='type')
    s.aggs.bucket('genre', 'terms', field='genre')
    s.aggs.bucket('keywords', 'terms', field='keywords.label')
    s.aggs.bucket('author', 'terms', field='author.literal')
    s.query = Q('multi_match', query=q, fields=['_all'])
    filters = []
    if 'filter' in request.args:
        filters = request.args.getlist('filter')
        logging.info(filters)
        for filter in filters:
            cat, val = filter.split(':')
            cat = cat.replace('_', '-')
            filter_dict = {}
            filter_dict.setdefault(cat, val)
            logging.info(cat)
            s.filter = F('term', **filter_dict)
    #if request.args
    resp = s.execute()
    #logging.info(resp)
    #logging.info(resp.aggregations.per_category.buckets)
    return render_template('resultlist.html', records=resp.to_dict().get('hits'), facets=resp.aggregations.to_dict(), header=q, query=q, filters=filters)
Esempio n. 29
0
    def from_es_id(cls,es,es_id,access_token,instance,version=None):

        index_exists = es.indices.exists(index=cls.ES_INDEX)
        type_exists =  es.indices.exists_type(index=cls.ES_INDEX,
                                              doc_type=cls.ES_TYPE)

        if not all([index_exists,type_exists]):
            raise Exception('Elastic index or type does not exist. ' \
                            'Cannot find {c} in Elastisearch '\
                            ' to create an instance'.format(c=cls.__name__))

        find_instance = Search(using=es,index=cls.ES_INDEX) \
                        .query(Q("match",_id=es_id))

        r = find_instance.execute()
        if not r:
            raise Exception('Cannot find elasticsearch {t}' \
                            ' instance from elasticsearch ' \
                            'id:{id}'.format(t=cls.__name__,
                                            id=es_id))


        sf_id = r[0]._d_.pop('Id',None)
        if sf_id is None:
            raise Exception('Missing a valid SF Id in ' \
                            ' Elasticsearch document id:{i}'.format(i=sf_id))

        sf_data = r[0]._d_

        return cls(es=es,
                   sf_id=sf_id,
                   sf_data=sf_data,
                   access_token=access_token,
                   instance=instance)
Esempio n. 30
0
    def search(self, args, es_client=client):
        search = Search(using=es_client, index=SearchableEvent.meta.index)

        if args.get('name'):
            search = search.query('fuzzy', name=args['name'])
            search = search.highlight('name')

        if args.get('description'):
            search = search.query('match', description=args['description'])
            search = search.highlight('description')

        if args.get('location-name'):
            search = search.query('fuzzy', location_name=args['location_name'])
            search = search.highlight('location_name')

        if args.get('organizer-name'):
            search = search.query(
                'fuzzy', organizer_name=args['organizer_name'])
            search = search.highlight('organizer_name')

        if args.get('organizer-description'):
            search = search.query(
                'fuzzy', organizer_description=args['organizer_description'])
            search = search.highlight('organizer_description')

        return [to_dict(r) for r in search.execute()]
Esempio n. 31
0
def is_alive():
    find_string = datetime.utcnow().strftime("%Y-%m")
    from_date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")

    s = Search(using=es, index="bitshares-" + find_string)
    s.query = Q("range", block_data__block_time={'gte': from_date, 'lte': "now"})
    s.aggs.metric("max_block_time", "max", field="block_data.block_time")

    json_response = {
        "server_time": datetime.utcnow(),
        "head_block_timestamp": None,
        "head_block_time": None
    }

    try:
        response = s.execute()
        if response.aggregations.max_block_time.value is not None:
            json_response["head_block_time"] = str(response.aggregations.max_block_time.value_as_string)
            json_response["head_block_timestamp"] = response.aggregations.max_block_time.value
            json_response["deltatime"] = abs((datetime.utcfromtimestamp(json_response["head_block_timestamp"] / 1000) - json_response["server_time"]).total_seconds())
            if json_response["deltatime"] < 30:
                json_response["status"] = "ok"
            else:
                json_response["status"] = "out_of_sync"
                json_response["error"] = "last_block_too_old"
        else:
            json_response["status"] = "out_of_sync"
            json_response["deltatime"] = "Infinite"
            json_response["query_index"] = find_string
            json_response["query_from_date"] = from_date
            json_response["error"] = "no_blocks_last_24_hours"
    except NotFoundError:
        json_response["status"] = "out_of_sync"
        json_response["deltatime"] = "Infinite"
        json_response["error"] = "index_not_found"
        json_response["query_index"] = find_string

    return json_response
Esempio n. 32
0
def get_aggregated_filtered_statistics(filters):
    s = Search(using=es, doc_type='associations')
    s = filter_association_search(s, filters)
    agg_chr = A("terms", field="snp.chr")
    agg_type = A("terms", field="snp.coding")
    agg_annotation = A({
        "nested": {
            "path": "snp.annotations"
        },
        "aggs": {
            "annotations": {
                "terms": {
                    "field": "snp.annotations.effect"
                }
            }
        }
    })
    agg_maf = A("range",
                field="maf",
                ranges=[{
                    "to": 0.01
                }, {
                    "from": 0.01,
                    "to": 0.05001
                }, {
                    "from": 0.05001,
                    "to": 0.1001
                }, {
                    "from": 0.1001
                }])
    agg_mac = A("range", field="mac", ranges=[{"to": 6}, {"from": 6}])
    s.aggs.bucket('maf_count', agg_maf)
    s.aggs.bucket('mac_count', agg_mac)
    s.aggs.bucket('chr_count', agg_chr)
    s.aggs.bucket('type_count', agg_type)
    s.aggs.bucket('annotation_count', agg_annotation)
    agg_results = s.execute().aggregations
    return agg_results.chr_count.buckets, agg_results.maf_count.buckets, agg_results.mac_count.buckets, agg_results.type_count.buckets, agg_results.annotation_count.annotations.buckets
Esempio n. 33
0
def search(request):
    if not request.GET.get('q'):
        return bad_request('no search term')

    term = request.GET.get('q')
    hits = []
    s = Search(index=KORPUS_INDEX)
    s = s.source(includes=['pk', 'rec', 'vrsta', 'podvrsta'])
    s.query = MultiMatch(type='bool_prefix',
                         query=remove_punctuation(term),
                         fields=['oblici'])
    try:
        response = s.execute()
        for hit in response.hits.hits:
            hits.append({
                'vrsta': hit['_source']['vrsta'],
                'vrsta_text': VRSTE_RECI[hit['_source']['vrsta']],
                'rec': hit['_source']['rec'],
                'pk': hit['_source']['pk']
            })
        return Response(hits, status=HTTP_200_OK, content_type=JSON)
    except ElasticsearchException as error:
        return server_error(error.args)
Esempio n. 34
0
def get_all_data_sender_mobile_numbers(dbm):
    all_data_senders_count = get_all_data_senders_count(dbm)
    search_parameters = {
        "response_fields": ['mobile_number'],
        "number_of_results": all_data_senders_count,
        "start_result_number": 0
    }
    es = Elasticsearch(hosts=[{
        "host": ELASTIC_SEARCH_HOST,
        "port": ELASTIC_SEARCH_PORT
    }])
    search = Search(using=es,
                    index=dbm.database_name,
                    doc_type=REPORTER_DOC_TYPE)
    search = _add_non_contact_filter(search)
    search = _add_non_deleted_ds_filter(search)
    search = _restrict_test_ds_filter(search)
    search = _add_pagination_criteria(search_parameters, search)
    search = _add_response_fields(search_parameters, search)
    search_results = search.execute()
    return [
        safe_getattr(item, 'mobile_number')[0] for item in search_results.hits
    ]
Esempio n. 35
0
def get_trx():

    trx = request.args.get('trx', "738be2bd22e2da31d587d281ea7ee9bd02b9dbf0")
    from_ = request.args.get('from_', 0)
    size = request.args.get('size', 10)

    s = Search(using=es,
               index="graphene-*",
               extra={
                   "size": size,
                   "from": from_
               })

    q = Q("match", block_data__trx_id=trx)

    s.query = q
    response = s.execute()
    results = []
    for hit in response:
        # print hit.to_dict()
        results.append(hit.to_dict())

    return jsonify(results)
Esempio n. 36
0
def tx_propagation(client_count, offset=10):
    """
    check for 'eth.tx.tx_new' messages
    and return the max number of clients, that had the same tx
    during the last `offset` seconds.
    """
    s = Search(client)
    # s = s.query(Q("match", message='eth.tx.received'))
    s = s.filter('exists', field='json_message.eth.tx.received.tx_hash')
    s = s.filter(
        time_range_filter(field="json_message.eth.tx.received.ts",
                          offset=offset))
    s.aggs.bucket('by_tx',
                  'terms',
                  field='json_message.eth.tx.received.tx_hash',
                  size=client_count)
    # s = s[0:1000]
    response = s.execute()
    if response:
        return max(tag.doc_count
                   for tag in response.aggregations.by_tx.buckets)
    else:
        return 0
    def search_substrings_and(self, substrings, _index):
        """
        Search for documents containing all substrings from the input list.
        (search for a conjunction of all substrings from 'substrings'
        :param substrings: substring list
        :param _index: ES index
        """
        start = clock()
        # create the first query object, then add other queries to it using the 'and' operation
        q = Q("match", content=substrings[0])
        for substring in substrings[1:]:
            q = q & Q("match", content=substring)

        # create search object from 'q' query object
        s = Search(index=_index).using(self.client).query(q)
        response = s.execute()
        end = clock() - start
        self.time = end
        self.total = response.hits.total
        for substring in substrings[:-1]:
            self.query += "'{}' & ".format(substring)
        self.query += substrings[-1]
        self.__save__()
Esempio n. 38
0
def get_topics_aggregations(topic_modelling, topic_weight_threshold,
                            is_multi_corpus):
    s = Search(using=ES_CLIENT, index=f"{ES_INDEX_TOPIC_DOCUMENT}_{topic_modelling}") \
        .filter("range", topic_weight={"gte": topic_weight_threshold})
    s.aggs.bucket(name='topics', agg_type="terms", field='topic_id', size=10000) \
        .metric("topic_weight", agg_type="sum", field="topic_weight")
    if is_multi_corpus:
        s.aggs['topics'].bucket(name="corpus", agg_type="terms", field="document_corpus", size=10000) \
            .metric("topic_weight", agg_type="sum", field="topic_weight")
    result = s.execute()
    topic_info_dict = dict((bucket.key, {
        "count":
        bucket.doc_count,
        "weight_sum":
        bucket.topic_weight.value,
        "corpus_weights":
        dict(((bucket_corpus.key, {
            "count": bucket_corpus.doc_count,
            "weight_sum": bucket_corpus.topic_weight.value,
        }) for bucket_corpus in bucket.corpus.buckets
              )) if is_multi_corpus else None
    }) for bucket in result.aggregations.topics.buckets)
    return topic_info_dict
def search_pokemon(es: Elasticsearch, search_query: str, page: int):
    s = Search(using=es)
    q = Q({
        'function_score': {
            'query': {
                'multi_match': {
                    'query':
                    search_query,
                    'fields': [
                        'doc.name', 'doc.abilities.ability.name',
                        'doc.forms.name', 'doc.moves.move.name'
                    ],
                    'fuzziness':
                    'AUTO',
                    'prefix_length':
                    2
                }
            }
        }
    })
    s = s.query(q)[(page - 1) * 10:page * 10]
    res = s.execute()
    return res
Esempio n. 40
0
 def get_entity(self, sport, element):
     search = Search(using=self.es)
     if sport == Sport.SOCCER:
         search = search.index('soccer-entity')
     if sport == Sport.BASKETBALL:
         search = search.index('basketball-entity')
     search = search.query(Match(_id=element[0]))
     response = search.execute()
     if len(response) > 0:
         entity = {'name': response[0]['name']}
         if 'abstract' in response[0]:
             entity['abstract'] = response[0]['abstract']
         else:
             entity['abstract'] = 'None'
         if 'type' in response[0]:
             entity['type'] = response[0]['type']
         else:
             entity['type'] = 'None'
     else:
         entity = {'name': element[0], 'abstract': 'None', 'type': 'None'}
     entity['similarity'] = round(element[1], 2)
     entity['sport'] = sport.value
     return entity
Esempio n. 41
0
def search(query, filter=None):
    s = Search(index='policy-index').query("multi_match", query=query,
                                           fields=["title", "school", "department", "administrator", "author", "state",
                                                   "city", "latitude", "longitude", "link", "tags", "abstract", "text"],
                                           fuzziness="AUTO").extra(from_=0, size=100)
    if filter is not None and len(filter) > 0:
        years = []
        schools = []
        for f in filter:
            try:
                f = int(f)
                years.append(Q('range', published_date={'gte': date(f, 1, 1), 'lt': date(f, 12, 31)}))
            except ValueError:
                schools.append(Q('match_phrase', school=f))
        if len(years) > 0 and len(schools) == 0:
            s = s.query("bool", filter=functools.reduce(operator.or_, years))
        if len(schools) > 0 and len(years) == 0:
            s = s.query("bool", filter=functools.reduce(operator.or_, schools))
        if len(schools) > 0 and len(years) > 0:
            combined = functools.reduce(operator.or_, years) & functools.reduce(operator.or_, schools)
            s = s.query("bool", filter=combined)
    response = s.execute()
    return response
Esempio n. 42
0
    def get_metrics_data(self, query):
        """
        Get the metrics data from Elasticsearch given a DSL query

        :param query: query to be sent to Elasticsearch
        :return: a dict with the results of executing the query
        """
        if self.es_url.startswith("http"):
            url = self.es_url
        else:
            url = 'http://' + self.es_url
        es = Elasticsearch(url)
        s = Search(using=es, index=self.es_index)
        s = s.update_from_dict(query)
        try:
            response = s.execute()
            return response.to_dict()
        except Exception as e:
            print()
            print(
                "In get_metrics_data: Failed to fetch data.\n Query: {}, \n Error Info: {}"
                .format(query, e.info))
            raise
Esempio n. 43
0
    def get_performed_users(self):
        """
        Returns the users that performed actions within the search filters
        """
        search = Search(using=self.es, index=self.index)
        for query in self.searchfilter.values():
            search = search.query(query)

        search.aggs.bucket(
            "user_names",
            "terms",
            field=self.get_field_name("userIdentity.userName"),
            size=5000,
        )
        response = search.execute()

        user_names = {}
        for user in response.aggregations.user_names.buckets:
            if user.key == "HIDDEN_DUE_TO_SECURITY_REASONS":
                # This happens when a user logs in with the wrong username
                continue
            user_names[user.key] = True
        return user_names
Esempio n. 44
0
def getHostBytes(client, starttime, endtime):
    s = Search(using=client, index="htcondor-xfer-stats2-*")
    s = s.filter('range', **{'@timestamp': {'gte': starttime, 'lt': endtime}})
    # Remove records with more than 1 TB of data transferred, bug:
    # https://htcondor-wiki.cs.wisc.edu/index.cgi/tktview?tn=7575,0
    s = s.filter('range', bytes={'from': 0, 'to': 1024**4})
    bkt = s.aggs
    bkt = bkt.bucket('hosts', 'terms', size=MAXSZ, field='host.name.keyword')
    bkt = bkt.metric('Bytes', 'sum', field='bytes')
    bkt = bkt.metric('loss', 'avg', field='lost')

    print(s.to_dict())

    response = s.execute()
    hosts = {}
    for tag in response.aggregations.hosts:
        hosts[tag.key] = {
            'bytes': tag.Bytes.value,
            'bytes_str': convert_gb(tag.Bytes.value),
            'loss': tag.loss.value
        }

    return hosts
Esempio n. 45
0
def test_time_field_query(es):
    """Test executing query of fetch time field.

    Notes:
        if is_fetch is ticked, this function checks if the entered TIME_FIELD returns results.

    Args:
        es(Elasticsearch): an Elasticsearch object to which we run the test.

    Returns:
        (dict).The results of the query if they are returned.
    """
    query = QueryString(query=TIME_FIELD + ':*')
    search = Search(using=es, index=FETCH_INDEX).query(query)[0:1]
    response = search.execute().to_dict()
    _, total_results = get_total_results(response)

    if total_results == 0:
        # failed in getting the TIME_FIELD
        return_error("Fetch incidents test failed.\nDate field value incorrect [{}].".format(TIME_FIELD))

    else:
        return response
def companySearch(search):
    s = Search(using=es)
    search['offset'] = int(search['offset'])
    s = s.index('job_index')
    s = s.query('match_phrase', company=search['company'])
    s = s[search['offset']:search['offset'] + 10]
    response = s.execute()

    resultlist = []
    print response.hits.total
    for hit in response.hits:
        result = {}
        result['id'] = hit.meta.id
        result['score'] = hit.meta.score
        result['title'] = hit['title']
        result['summary'] = hit['summary'][:180]
        result['url'] = 'www.indeed.com' + hit['url']
        result['company'] = hit['company']
        result['location'] = hit['location']
        result['postingdate'] = str(datetime.datetime.fromordinal(hit['date']))
        resultlist.append(result)

    return resultlist
Esempio n. 47
0
def get_need_content_from_id(content_id, index='rastarockets_needs'):
    """
    Return need content from unique ID

    :param content_id: Need content unique ID
    :type content_id: str

    :param index: Index name (optional)
    :type index: str

    :return: NeedContent if exist
    :rtype: NeedContent|None
    """
    search = Search(using=current_app.els_client,
                    index=index,
                    doc_type='content').query('term', _id=content_id)

    response = search.execute()
    if response.hits.total > 0:
        return NeedContent(response.hits[0])

    else:
        return None
Esempio n. 48
0
def test_checking_of_missing_user(es, data):
    user_count = 0
    hit_count = 0
    if data['index'] == 'users':
        if data['action'] == 'add':

            # Loads user data
            test_data = load_csv_to_dict(data['datafile'])

            for user in test_data:
                user_count += 1
                query = query_construct(test_data.fieldnames, user)
                q = Q('bool', must=query)
                s = Search(using=es, index='users')
                s = s.query(q)
                response = s.execute()
                print(response)
                for hit in response:
                    hit_count += 1

    # Check if the expected user count is equal to the actual user count
    assert user_count == hit_count, \
        'Expected {} users but ES returning {} users'.format(user_count, hit_count)
    def query_distinct_event_ids(self):
        es_query = []
        es_query.append({
            'match': {
                'winlog.provider_name':
                MICROSOFT_WINDOWS_DNSCLIENT_PROVIDER_NAME
            }
        })
        query = Q({'bool': {'must': es_query}})
        s = Search(using=self.Client, index="winlogbeat-*").query(query)
        s.source(includes=['winlog.event_id', 'winlog.event_data.LogString'])
        s.aggs.bucket('distinct_event_ids',
                      'terms',
                      field='winlog.event_id',
                      size=1000)
        response = s.execute()

        sorted_distinct_distinct_event_ids = sorted(
            response.aggregations.distinct_event_ids,
            key=lambda kv: (kv.doc_count, kv.key),
            reverse=True)
        for e in sorted_distinct_distinct_event_ids:
            print("{0:50} {1}".format(e.key, e.doc_count))
Esempio n. 50
0
    def search(self, query, get_count=False, includes=None, size=1000):
        if self.DebugQuery:
            pprint.pprint(query)

        s = Search(using=self.Client, index=WINLOGBEAT_INDEX).query(query)
        if self.DTRange != None:
            s = s.filter('range', **self.DTRange)

        if includes == None:
            includes = ['winlog.provider_name', 'winlog.event_id']

        s.source(includes=includes)

        if get_count:
            return s.count()

        if self.Scan:
            return s.scan()
        else:
            s = s[0:size]
            return s.execute().hits

        return None
Esempio n. 51
0
def getAllDocs1(es, indices):

    hitCount = 0
    index = "netflow-v5-2017.10.11"
    #qDict = {'size': 1000000, 'query': {'match_all': {} }}
    qDict = {'size': 1000, 'sort': ['_doc']}  # or just {'sort': [ '_doc']}
    qDict = {'size': 500, 'query': {'match_all': {}}}
    qDict = {'query': {'terms': {'_id': 'AV8LhQqZyn_BE1UV4cVe'}}}

    s = Search(using=es, index=index)
    s.update_from_dict(qDict)
    total = s.count()
    s = s[0:total]
    results = s.execute()
    print("results.hits.total={}  s.count()={}  results.hits.hits={}".format(
        results.hits.total, s.count(), len(results.hits.hits)))
    #s = s[0:s.count()-1]
    #results = s.execute()
    print("Results: {}".format(len(results)))
    for result in s.scan():
        print(str(hitCount) + ":  " + result.to_dict()["@timestamp"])
        hitCount += 1
    exit()
Esempio n. 52
0
    def dump_event_counts(self):
        s = Search(using=self.Client, index=WINLOGBEAT_INDEX)
        s.source(includes=['winlog.provider_name', 'winlog.event_id'])
        s.aggs.bucket('distinct_provider_names',
                      'terms',
                      field='winlog.provider_name',
                      size=100000)
        response = s.execute()

        sorted_distinct_provider_names = sorted(
            response.aggregations.distinct_provider_names,
            key=lambda kv: (kv.doc_count, kv.key),
            reverse=True)

        max_provider_name_len = 0
        for e in sorted_distinct_provider_names:
            str_len = len(e.key)
            if max_provider_name_len < str_len:
                max_provider_name_len = str_len

        fmt_str = "{0:%d} {1}" % max_provider_name_len
        for e in sorted_distinct_provider_names:
            print(fmt_str.format(e.key, e.doc_count))
Esempio n. 53
0
    def get(self, request, **kwargs):
        """
        search for a user
        :param request: the HTTP GET request
        :return: JSON
        """

        if "search_query" in request.GET:
            query = str(request.GET.get("search_query"))

            s = Search(index="acronyms").query("match", acronym=query)
            response = s.execute()

            logging.debug(response)

            hits_list = []
            for hit in response.to_dict()["hits"]["hits"]:
                hit = hit['_source']
                hits_list.append(hit)

            return JsonResponse({'data': hits_list})

        return JsonResponse({'status': "error", 'detail': "please include a query"}, status=400)
Esempio n. 54
0
def execute_elastic_query(args):
    logger.debug(args)

    query = args.query
    host = args.host
    port = args.port
    from_time = "now-{seconds}s".format(seconds=args.seconds)
    aggregate = need_aggregate(args)

    index = build_indices(indices_count=args.indices_count,
                          index_pattern=args.index_pattern,
                          index_prefix=args.index_prefix)

    client = Elasticsearch(hosts=["{}:{}".format(host, port)])

    s = Search(using=client, index=index) \
        .query("query_string", query=query, analyze_wildcard=True) \
        .query("range", **{"@timestamp": {"gte": "{}".format(from_time)}})

    if aggregate:
        s.aggs.bucket(args.aggregation_name, A(args.aggregation_type, field=args.aggregation_field))

    return s.execute()
Esempio n. 55
0
def search(query, page, sort):
    q = Q("match_all")
    word = query
    if word[0] == '"' and word[-1] == '"':
        q = q & Q("match_phrase", words="{}".format(word))
    else:
        words = word.split(' ')
        for w in words:
            q = q & Q("wildcard", words="{}".format(w))
    if ":" in sort:
        sort_arr = sort.split(":")
        search = Search(using=client, index="file").query(q).sort(
            {sort_arr[0]: {
                 "order": sort_arr[1]
             }})
    else:
        search = Search(using=client, index="file").query(q)
    total = search.count()
    max_pages = total // per_page
    search = search[(page - 1) * per_page:page * per_page]
    response = search.execute()
    return [file_result_from_hit(hit)
            for hit in response], max_pages + 1, total
Esempio n. 56
0
    def get_summary(self):
        s = Search(index=self.index)
        # Filter by date to approximately 20 years ago, to ensure there aren't more
        # than 10000 buckets
        date_20_years_ago = (datetime.utcnow() - timedelta(days=int(20*365.25))).date()
        s = s.filter('range', **{'last_updated': {'gte': str(date_20_years_ago)}})
        s.aggs.bucket('daily_workflows', 'date_histogram',
                      field='last_updated',
                      format="yyyy-MM-dd", interval='day') \
            .bucket('recid', 'terms', field='recid')
        result = s.execute().aggregations.to_dict()

        # flatten summary
        processed_result = []
        _daily_workflows = result['daily_workflows']
        for day in _daily_workflows['buckets']:
            for recid in day['recid']['buckets']:
                record_search = self.search(term=recid['key'], fields=['recid'])
                record = record_search[0] if len(record_search) == 1 else record_search[1]

                processed_result.append(record.as_custom_dict(exclude=[]))

        return processed_result
Esempio n. 57
0
def get_trade_history(size=10, from_date='2015-10-10', to_date='now', sort_by='-operation_id_num',
                      search_after=None, base="1.3.0", quote="1.3.121"):

    s = Search(using=es, index="bitshares-*")

    s = s.extra(size=size)
    if search_after and search_after != '':
        s = s.extra(search_after=search_after.split(','))

    q = Q()
    q = q & Q("match", operation_type=4)
    q = q & Q("match", operation_history__op_object__is_maker=True)

    q = q & Q("match", operation_history__op_object__fill_price__base__asset_id=base)
    q = q & Q("match", operation_history__op_object__fill_price__quote__asset_id=quote)

    range_query = Q("range", block_data__block_time={'gte': from_date, 'lte': to_date})
    s.query = q & range_query

    s = s.sort(*sort_by.split(','))
    response = s.execute()

    return [hit.to_dict() for hit in response]
Esempio n. 58
0
def autocomplete_view(request):
    query = request.GET.get('term', '')
#    resp = models.client.suggest(
#        index='review',
#        body={                                                                                                                                          
#            'perfume': {
#               "text": query,
#               "completion": {
#                   "field": 'perfume',
#               }
#            }
#        }
#    )
    s = Search(using=models.client, index = "review")
    s = s.filter("term", perfume=query)
    resp = s.execute()

    perfumes = []
    for hit in resp:
        perfumes.append(hit.perfume)
    data = json.dumps(perfumes)
    mimetype = 'application/json'
    return HttpResponse(data, mimetype)
Esempio n. 59
0
def gracc_query_apel(year, month):
    index = osg_summary_index
    starttime = datetime.datetime(year, month, 1)
    onemonth = dateutil.relativedelta.relativedelta(months=1)
    endtime = starttime + onemonth
    s = Search(using=es, index=index)
    s = s.query('bool',
                filter=[
                    Q('range', EndTime={
                        'gte': starttime,
                        'lt': endtime
                    })
                    & Q('terms', VOName=vo_list)
                    & (Q('term', ResourceType='Batch')
                       | (Q('term', ResourceType='Payload')
                          & Q('term', Grid='Local')))
                ])

    bkt = s.aggs
    bkt = bkt.bucket('Cores', 'terms', size=MAXSZ, field='Processors')
    bkt = bkt.bucket('VO', 'terms', size=MAXSZ, field='VOName')
    bkt = bkt.bucket('DN', 'terms', size=MAXSZ, field='DN')
    bkt = bkt.bucket('Site',
                     'terms',
                     size=MAXSZ,
                     missing=MISSING,
                     field='OIM_ResourceGroup')
    #bkt = bkt.bucket('Site', 'terms', size=MAXSZ, field='SiteName')
    #bkt = bkt.bucket('Site', 'terms', size=MAXSZ, field='WLCGAccountingName')
    add_bkt_metrics(bkt)

    bkt = bkt.bucket('SiteName', 'terms', size=MAXSZ, field='SiteName')

    add_bkt_metrics(bkt)

    response = s.execute()
    return response
Esempio n. 60
0
    def search(self,
               text: str,
               max_n_guesses: int,
               normalize_score_by_length=False,
               wiki_boost=1,
               qb_boost=1):
        if not self.exists():
            raise ValueError(
                'The index does not exist, you must create it before searching'
            )

        if wiki_boost != 1:
            wiki_field = 'wiki_content^{}'.format(wiki_boost)
        else:
            wiki_field = 'wiki_content'

        if qb_boost != 1:
            qb_field = 'qb_content^{}'.format(qb_boost)
        else:
            qb_field = 'qb_content'

        s = Search(index=self.name)[0:max_n_guesses].query(
            'multi_match', query=text, fields=[wiki_field, qb_field])
        results = s.execute()
        guess_set = set()
        guesses = []
        if normalize_score_by_length:
            query_length = len(text.split())
        else:
            query_length = 1

        for r in results:
            if r.page in guess_set:
                continue
            else:
                guesses.append((r.page, r.meta.score / query_length))
        return guesses