Beispiel #1
0
def consensus(offset=60):
    """
    check for 'eth.chain.new_head' messages
    and return the max number of clients, that had the same head
    during the last `offset` seconds.
    """
    s = Search(client)
    # s = s.query(Q('match', message='eth.chain.new_head'))
    s = s.filter('exists', field='json_message.eth.chain.new_head.block_number')
    s = s.sort({'json_message.eth.chain.new_head.ts': {'order': 'desc', 'ignore_unmapped': 'true'}})
    response = s.execute()

    # Get latest block number
    x = max(hit['_source']['json_message']['eth.chain.new_head']['block_number'] for hit in response.hits.hits)

    # By default, the buckets are ordered by their doc_count descending
    # s.aggs.bucket('by_block_hash', 'terms', field='json_message.eth.chain.new_head.block_hash', size=3)

    # Reach consensus around latest block number
    s = Search(client)
    s = s.filter(time_range_filter(field="json_message.eth.chain.new_head.ts", offset=offset))
    s.aggs.bucket('latest', 'range',
                  field='json_message.eth.chain.new_head.block_number',
                  ranges=[{"from": x - 1, "to": x + 1}]).bucket(
                      'by_block_hash', 'terms',
                      field='json_message.eth.chain.new_head.block_hash',
                      size=3)
    # s = s[10:10]
    response = s.execute()
    # pprint(response)

    if response:
        return max(tag.doc_count for tag in response.aggregations.latest.buckets[0].by_block_hash.buckets)
    else:
        return 0
Beispiel #2
0
    def build_query(self, start_date, end_date, **kwargs):
        """Build the elasticsearch query."""
        agg_query = Search(using=self.client,
                           index=self.index,
                           doc_type=self.doc_type)[0:0]
        if start_date is not None or end_date is not None:
            time_range = {}
            if start_date is not None:
                time_range['gte'] = start_date.isoformat()
            if end_date is not None:
                time_range['lte'] = end_date.isoformat()
            agg_query = agg_query.filter(
                'range',
                **{self.time_field: time_range})

        term_agg = agg_query.aggs
        for term in self.aggregated_fields:
            term_agg = term_agg.bucket(term, 'terms', field=term, size=0)
        term_agg.metric('total', 'sum', field='count')

        if self.copy_fields:
            term_agg.metric(
                'top_hit', 'top_hits', size=1, sort={'timestamp': 'desc'}
            )

        for query_param, filtered_field in self.required_filters.items():
            if query_param in kwargs:
                agg_query = agg_query.filter(
                    'term', **{filtered_field: kwargs[query_param]}
                )

        return agg_query
 def hotspots(self, family, name):
     query = Search(using=self.client, index=family)
     if name:
         query = query.filter('term', name=name)
     query = query.filter('range', timestamp={'gte':self.lookback})
     query.aggs.bucket('hotspot', 'geohash_grid', field='location', precision=7)
     hashes = query[0].execute().aggregations['hotspot']['buckets'][:3]
     return [Geohash.decode_exactly(hash['key'])[:2] for hash in hashes]
Beispiel #4
0
def search_jobs():
    s = Search(using=client)
    s = s.filter('term', _index='can_tenant_chouun')
    s = s.filter('term', _type='job')
    s = s.filter('term', id=12)

    resp = s.execute()
    for hit in resp:
        analysis = hit['analysis']
        print(type(analysis))
        print(dir(analysis))
Beispiel #5
0
 def get_rev_links(self, model, rel, *item_types):
     search = Search(using=self.es)
     search = search.extra(size=SEARCH_MAX)
     # rel links use '~' instead of '.' due to ES field restraints
     proc_rel = rel.replace('.', '~')
     # had to use ** kw notation because of variable in field name
     search = search.filter('term', **{'links.' + proc_rel: str(model.uuid)})
     if item_types:
         search = search.filter('terms', item_type=item_types)
     hits = search.execute()
     return [hit.to_dict().get('uuid', hit.to_dict().get('_id')) for hit in hits]
def fetch(session):
    s = Search(client)
    s = s.filter('bool',
                 should=[F('term', message='p2p.disconnected'),
                         F('term', message='p2p.connected')])
    s = s.filter('range', **{'@timestamp': dict(gte=session['start'], lte=session['stop'])})
    s = s.fields(['json_message.p2p.connected.remote_id', 'guid', 'message', '@timestamp'])
    s = s[0:100000]
    # s = s[0:10]
    s = s.sort('@timestamp')
    response = s.execute()
    return response
def test_connections(clients):
    assert_connected(minconnected=len(clients), minpeers=len(clients)-2)
    
    guids = [nodeid_tool.topub(ext_id.encode('utf-8')) for ext_id in clients]
    for guid in guids:
        s = Search(client)
        s = s.filter(F('term', at_message='p2p.connected'))
        s = s.filter(F('term', guid=guid))
        s = s.filter(F('term', remote_id=guid))
        response = s.execute()
        # pprint (response)
        assert response.hits.total == 0, 'a client is connected to itself'
    print 'PASS: no client is connected to itself'
Beispiel #8
0
    def search(self, **params):
        limit_cat = params.get('cat', "").strip()
        limit_forum = params.get('forum', "").strip()
        limit_count = int(params.get('count', 100))
        limit_size_min = human2bytes(params.get('min', "0b"))
        limit_size_max = human2bytes(params.get('max', "0b"))
        limit_wild = int(params.get('wild', 0))
        arg = params.get('query', '').strip()
        if not arg:
            arg = "hobbit"

        s = Search(using=es, index=ela_index)
        if limit_size_min:
            s = s.filter("range", size = {'gte' : limit_size_min })
        if limit_size_max:
            s = s.filter("range", size = {'lte' : limit_size_max })

        arg = arg.split(' ')
        if limit_wild:
            q = Q("wildcard", name="*"+arg.pop(0)+"*")
            for a in arg:
                q = q & Q("wildcard", name="*"+a+"*")
        else:
            q = Q("match", name=arg.pop(0))
            for a in arg:
                q = q & Q("match", name=a)

        if len(limit_cat):
            for a in limit_cat.split(' '):
                q = q & Q("match", category=a)
        if len(limit_forum):
            for a in limit_forum.split(' '):
                q = q & Q("match", forum=a)

        s = s.query(q)
        #cherrypy.log("query is "+str(s.to_dict()))
        r = s.execute()
        size = r.hits.total
        #cherrypy.log("query have "+str(size)+" elements")
        if size > limit_count:
            size = limit_count
        s = s.sort('-size')
        s = s.extra(size=size)
        r = s.execute()

        data = []
        for b in r:
            a = [b.id, b.size, b.name, b.category, b.forum, b.date[0] if b.date else '', b.hash]
            data.append(a)

        return {'data': data}
Beispiel #9
0
def get_all_cans(index, estype=Types.candidate,
                 fields=['id'],
                 status=1, at_most=10000):
    s = Search(using=client)
    s = s.filter('term', _index=index)
    s = s.filter('term', _type=estype)
    s = s.filter('term', status=status)

    s = s.source(include=fields)
    s = s[:at_most]
    resp = s.execute()
    # print(resp.took)
    # print(resp.hits.total)
    return [hit['id'] for hit in resp]
def test_connections(clients):
    len_clients = len(clients)
    min_peers = len_clients if len_clients <= 3 else 3
    assert_connected(minconnected=len_clients, minpeers=min_peers, offset=offset)

    guids = [nodeid_tool.topub(ext_id.encode('utf-8')) for ext_id in clients]
    for guid in guids:
        s = Search(client)
        s = s.filter('exists', field='json_message.p2p.connected.ts')
        s = s.filter(F('term', guid=guid))
        s = s.filter(F('term', remote_id=guid))
        response = s.execute()
        # pprint (response)
        assert response.hits.total == 0, 'a client is connected to itself'
    print 'PASS: no client is connected to itself'
    def es_read(self, log_id, offset):
        """
        Returns the logs matching log_id in Elasticsearch and next offset.
        Returns '' if no log is found or there was an error.
        :param log_id: the log_id of the log to read.
        :type log_id: str
        :param offset: the offset start to read log from.
        :type offset: str
        """

        # Offset is the unique key for sorting logs given log_id.
        s = Search(using=self.client) \
            .query('match', log_id=log_id) \
            .sort('offset')

        s = s.filter('range', offset={'gt': offset})

        logs = []
        if s.count() != 0:
            try:

                logs = s[self.MAX_LINE_PER_PAGE * self.PAGE:self.MAX_LINE_PER_PAGE] \
                    .execute()
            except Exception as e:
                msg = 'Could not read log with log_id: {}, ' \
                      'error: {}'.format(log_id, str(e))
                self.log.exception(msg)

        return logs
Beispiel #12
0
 def get_by_unique_key(self, unique_key, name):
     term = 'unique_keys.' + unique_key
     # had to use ** kw notation because of variable in field name
     search = Search(using=self.es)
     search = search.filter('term', **{term: name})
     search = search.extra(version=True)
     return self._one(search)
    def search(self):
        self.reindex(Addon)

        qs = Search(using=amo.search.get_es(),
                    index=AddonIndexer.get_index_alias(),
                    doc_type=AddonIndexer.get_doctype_name())
        return qs.filter('term', id=self.addon.pk).execute()[0]
Beispiel #14
0
    def find(self, region, account, start, end):
        s = Search(using=self.es, index=app.config['ELASTICSEARCH_INDEX'], doc_type=app.config['ELASTICSEARCH_TYPE'])
        s = s.filter('term', region=region)
        if account is not None:
            s = s.filter('term', account=account)
        s = s.filter('range', date={
            'gte': parse(start).date().isoformat(),
            'lte': parse(end).date().isoformat()
        })[0:0]

        s.aggs.bucket('by_project', 'terms', field='projectid.raw', size=0) \
            .bucket('by_type', 'terms', field='usagetype.raw') \
            .bucket('by_offering', 'terms', field='offeringid.raw') \
            .metric('rawusage_sum', 'sum', field='rawusage')

        return s.execute().aggregations.to_dict()
Beispiel #15
0
def assert_started(minstarted, offset=90):
    """Asserts that at least `minstarted` clients logged 'starting' event."""
    """
        "starting": {
            "comment": "one of the first log events, before any operation is started",
            "client_impl": "Impl/OS/version, e.g. Go/Linux/0.8.2",
            "eth_version": "int, e.g. 52",
            "ts": "YYYY-MM-DDTHH:MM:SS.SSSSSSZ"
        }
    """
    s = Search(client)
    s = s.filter(time_range_filter(field="json_message.starting.ts", offset=offset))
    s.aggs.bucket('by_host', 'terms', field='syslog_hostname.raw', size=0)
    response = s.execute()
    # pprint(response)

    print "passed for:"
    for tag in response.aggregations.by_host.buckets:
        print '  %s' % tag.key  # ip_from_guid(tag.key)

    num_started = len(response.aggregations.by_host.buckets)

    assert num_started >= minstarted, 'only %d (of at least %d) clients started' % (num_started, minstarted)
    for tag in response.aggregations.by_host.buckets:
        assert tag.doc_count == 1, 'client %s started more than once' % tag.key  # ip_from_guid(tag.key)
    def search(self, **params):
        index = params.get('index', self.index)
        search = Search(using=self.client, index=index)

        page = params.get('page', None)
        per_page = params.get('per_page', None)
        if page and per_page:
            page = page - 1
            search._extra = {'from': page, 'size': per_page}

        sort = params.get('sort', None)
        if sort and sort.replace('-', '') in ['created_at', 'level']:
            search = search.sort(sort)

        date_filter = self._filter_by_date_interval(params)
        if date_filter:
            search = search.filter(date_filter)

        level = params.get('group_by', None)
        if level:
            search = search.query('match', level=level)

        hits = search.execute()

        format = params.get('format', 'object')
        if format == 'dict':
            return self._to_dict(hits)
        else:
            return self._to_logs(hits)
Beispiel #17
0
    def process(self, start_time:datetime, end_time:datetime, input:DataFrame):
        logger.debug('Start: %s  End: %s  Log: index=%s fields=%s' % (start_time.isoformat(), end_time.isoformat(), str(self.indices), str(self.fields)))

        search = Search(using=self.client, index=self.indices[0])
        search = search.filter(Range(** {'@timestamp': {'gte': start_time.isoformat(), 'lte': end_time.isoformat()}}))

        for k,v in self.fields.items():
            if isinstance(v, list):
                for sv in v:
                    search = search.query("match", **{k:sv})

            else:
                search = search.query("match", **{k:v})

        logger.debug('ES Query: %s' % str(search.to_dict()))
        response = search.execute()

        logger.debug('Results: success:%d failed:%d hits:%d' % (response._shards.successful, response._shards.failed, len(response.hits)))

        for hit in response:
            # filter out the meta key and flatten the values
            row = {k: str(hit[k]) for k in hit if k != 'meta'}

            logger.debug(row)
            input = input.append(row, ignore_index=True)

        return input
Beispiel #18
0
 def get_by_json(self, key, value, item_type, default=None):
     # find the term with the specific type
     term = 'embedded.' + key + '.raw'
     search = Search(using=self.es)
     search = search.filter('term', **{term: value})
     search = search.filter('type', value=item_type)
     return self._one(search)
Beispiel #19
0
def search(q=None, tags=None, events_only=None, accounts=None,
           location=None, latitude=None, longitude=None, distance=None,
           distance_switch=False, **kwargs):
    es = get_es()
    queryset = Search(using=es).index(ELASTICSEARCH_INDEX_NAME)

    # Exclude events from the past
    queryset = queryset.filter(
        F("bool", should=[F({"range": {"event_end": {"lte" : "now"}}}),
        F({"missing": {"field": "event_end"}})])
    )

    queryset = filter_by_query(queryset, q=q)

    queryset = filter_by_tags(queryset, tags=tags)

    queryset = filter_by_events_only(queryset, events_only=events_only)

    queryset = filter_by_accounts(queryset, accounts=accounts)

    queryset = filter_by_location(queryset, location=location, latitude=latitude,
                                  longitude=longitude, distance=distance,
                                  distance_switch=distance_switch)

    return queryset
def session_times():
    # {"@fields": {}, "@timestamp": "2015-02-23T17:03:41.738412Z", "@source_host": "newair.brainbot.com", "@message": "scenario.p2p_connect.started"}

    start_message = 'scenario.p2p_connect.starting.clients.sequentially'
    stop_message = 'scenario.p2p_connect.stopping.clients'
    s = Search(client)
    s = s.filter('bool',
                 should=[F('term', message=start_message),
                         F('term', message=stop_message)])
    s = s.fields(['message', '@timestamp'])
    s = s[0:100000]
    s = s.sort('-@timestamp')  # desc,  we want the latest events
    response = s.execute()

    events = []  # joungest to oldest, last should be a stop message
    for h in response:
        msg = 'start' if h['message'][0] == start_message else 'stop'
        ts = h['@timestamp'][0]
        events.append((msg, ts))
    assert not events or events[0][0] == 'stop'
    sessions = []
    while len(events) >= 2:
        stop = events.pop()
        start = events.pop()
        sessions.append(dict([start, stop]))
    return list(reversed(sessions))
Beispiel #21
0
def get_journals_by_collection_institution(collection_acronym, page_from=0, page_size=1000):

    search = Search(index=INDEX).query(
             "nested", path="collections", query=Q("match", collections__acronym=COLLECTION))

    search = search.filter("exists", field="sponsors")

    search = search[page_from:page_size]
    search_response = search.execute()

    meta = {
        'total': search_response.hits.total,
    }

    sponsors = {}
    for journal in search_response:

        j = {'jid': journal.jid,
             'title': journal.title,
             'current_status': journal.current_status,
             'last_issue': journal.last_issue,
             'issue_count': journal.issue_count
             }

        for sponsor in journal['sponsors']:
            sponsors.setdefault(sponsor, []).append(j)

    result = {
        'meta': meta,
        'objects': sponsors
    }

    return result
Beispiel #22
0
def search():
    q = request.args.get('q')
    #resp = es.search(index='hoe', doc_type='record', q=q, body=aggs)
    #logging.info(q)

    s = Search(using=es, index='hoe', doc_type='record')
    s.aggs.bucket('library_place', 'terms', field='library-place')
    s.aggs.bucket('type', 'terms', field='type')
    s.aggs.bucket('genre', 'terms', field='genre')
    s.aggs.bucket('keywords', 'terms', field='keywords.label')
    s.aggs.bucket('author', 'terms', field='author.literal')
    s.query = Q('multi_match', query=q, fields=['_all'])
    filters = []
    if 'filter' in request.args:
        filters = request.args.getlist('filter')
        logging.info(filters)
        for filter in filters:
            cat, val = filter.split(':')
            cat = cat.replace('_', '-')
            filter_dict = {}
            filter_dict.setdefault(cat, val)
            logging.info(cat)
            s.filter = F('term', **filter_dict)
    #if request.args
    resp = s.execute()
    #logging.info(resp)
    #logging.info(resp.aggregations.per_category.buckets)
    return render_template('resultlist.html', records=resp.to_dict().get('hits'), facets=resp.aggregations.to_dict(), header=q, query=q, filters=filters)
Beispiel #23
0
def get_all_job_cans(index, estype=Types.job_candidate,
                     fields=['id', 'job', 'candidate'],
                     status=None, at_most=10000):
    s = Search(using=client)
    s = s.filter('term', _index=index)
    s = s.filter('term', _type=estype)
    if status:
        s = s.filter('term', status=status)

    s = s.source(include=fields)
    s = s[:at_most]
    resp = s.execute()
    # print(resp.took)
    # print(resp.hits.total)
    return [{'id': hit['id'],
             'job_id': hit['job'],
             'can_id': hit['candidate']} for hit in resp]
Beispiel #24
0
def assert_mining(minmining, offset=300):
    """
    assert that at least `minmining` clients have started mining and mined a block
    """
    s = Search(client)
    s = s.filter(F('term', message='eth.miner.new_block'))
    s = s.filter(time_range_filter(offset=offset))
    s.aggs.bucket('by_host', 'terms', field='syslog_hostname.raw', size=0)
    response = s.execute()
    # pprint(response)

    print "passed for: "
    for tag in response.aggregations.by_host.buckets:
        print '  %s, blocks mined: %d' % (tag.key, tag.doc_count)  # ip_from_guid(tag.key)

    num_mining = len(response.aggregations.by_host.buckets)
    assert num_mining >= minmining, 'only %d clients mining, expected at least %d' % (num_mining, minmining)
Beispiel #25
0
def tx_propagation(client_count, offset=10):
    """
    check for 'eth.tx.tx_new' messages
    and return the max number of clients, that had the same tx
    during the last `offset` seconds.
    """
    s = Search(client)
    # s = s.query(Q("match", message='eth.tx.received'))
    s = s.filter('exists', field='json_message.eth.tx.received.tx_hash')
    s = s.filter(time_range_filter(field="json_message.eth.tx.received.ts", offset=offset))
    s.aggs.bucket('by_tx', 'terms', field='json_message.eth.tx.received.tx_hash', size=client_count)
    # s = s[0:1000]
    response = s.execute()
    if response:
        return max(tag.doc_count for tag in response.aggregations.by_tx.buckets)
    else:
        return 0
Beispiel #26
0
def get_last_day_top(top_len=50):
    OFFSET = '+8h'  # TODO: didn't figuer out why

    s = Search().using(client)
    time_range = {'gte': 'now-1d' + OFFSET, 'lte': 'now' + OFFSET}
    s_q = s.filter('range', timestamp=time_range).sort('-likes')[:100]
    r = s_q.execute()
    return r.hits.hits
    def search(self, query: str, filters: dict=None, only_this_type: bool=True, **kwargs: dict) -> list:
        """performs a search against elasticsearch and then pulls the corresponding data from the db

        :param query: query terms to search by
        :param filters: named (attribute, value) filters to limit the query results
        :param kwargs: additional search keyword arguments
        :return: a list of models with an additional `__score` value added
        """
        # build base search object
        s = Search(using=self.indexer.es).index(self.indexer.index_name)
        if only_this_type:
            s = s.doc_type(self.indexer.doc_type_name)

        # build query
        s = s.query('match', _all=query)

        # add filter
        if filters is not None:
            for attr, value in filters.items():
                s = s.filter(F({'term': {attr: value}}))

        # execute query
        res = s.execute()

        # build up django query
        results = {}
        for hit in res:
            # get the model
            dj_type = hit._meta.doc_type
            model = get_model(dj_type)

            # get the pk
            pk_name = model._meta.pk.name
            pk = getattr(hit, pk_name)

            # get the score
            score = hit._meta.score

            # add to mapping
            results.setdefault(model, {})
            results[model][pk] = score

        # get queryset
        querysets = []
        for model, pk_score in results.items():
            qs = model.objects.filter(pk__in=pk_score.keys())
            querysets += list(qs)

        # attach scores to instances
        for instance in querysets:
            score = results[type(instance)][instance.pk]
            instance._meta.es_score = score

        # order by score
        querysets = sorted(querysets, key=lambda i: i._meta.es_score, reverse=True)

        # return
        return querysets
Beispiel #28
0
def create_search_obj(user, search_param_dict=None, filter_on_email_optin=False):
    """
    Creates a search object and prepares it with metadata and query parameters that
    we want to apply for all ES requests

    Args:
        user (User): User object
        search_param_dict (dict): A dict representing the body of an ES query
        filter_on_email_optin (bool): If true, filter out profiles where email_optin != True

    Returns:
        Search: elasticsearch_dsl Search object
    """
    staff_program_ids = get_advance_searchable_program_ids(user)
    is_advance_search_capable = bool(staff_program_ids)
    index_type = PRIVATE_ENROLLMENT_INDEX_TYPE if is_advance_search_capable else PUBLIC_ENROLLMENT_INDEX_TYPE
    index = get_default_alias(index_type)
    search_obj = Search(index=index)
    # Update from search params first so our server-side filtering will overwrite it if necessary
    if search_param_dict is not None:
        search_obj.update_from_dict(search_param_dict)

    if not is_advance_search_capable:
        # Learners can't search for other learners with privacy set to private
        search_obj = search_obj.filter(
            ~Q('term', **{'profile.account_privacy': Profile.PRIVATE})  # pylint: disable=invalid-unary-operand-type
        )

    # Limit results to one of the programs the user is staff on
    search_obj = search_obj.filter(create_program_limit_query(
        user,
        staff_program_ids,
        filter_on_email_optin=filter_on_email_optin
    ))
    # Filter so that only filled_out profiles are seen
    search_obj = search_obj.filter(
        Q('term', **{'profile.filled_out': True})
    )
    # Force size to be the one we set on the server
    update_dict = {'size': settings.ELASTICSEARCH_DEFAULT_PAGE_SIZE}
    if search_param_dict is not None and search_param_dict.get('from') is not None:
        update_dict['from'] = search_param_dict['from']
    search_obj.update_from_dict(update_dict)

    return search_obj
Beispiel #29
0
    def query(self):
        search_obj = Search()
        for f in self.filters:
            search_obj = search_obj.filter(f)

        for q in self.queries:
            search_obj = search_obj.query(q)

        return search_obj.to_dict()
Beispiel #30
0
    def aggregated_search(self, search_query, indices, aggregations, size, request_timeout):
        search_obj = Search(using=self.es_connection, index=indices).params(size=size, request_timeout=request_timeout)
        query_obj = search_obj.filter(search_query)
        for aggregation in aggregations:
            query_obj.aggs.bucket(name=aggregation.to_dict()['terms']['field'], agg_type=aggregation)
        results = query_obj.execute()

        result_set = AggregatedResults(results)
        return result_set
def page_detail(id):
    try:
        # search the document based on its metaid
        s = Search(using=es)
        s = s.index('job_index')
        s = s.filter('term', _id=id)
        ret = s.execute()
        job=get_job_detail(ret.hits[0].to_dict(),id)

        return render_template('detail.html', job)
    except KeyError:
        return "Problem"
 def _queryElasticsearch(self, from_date, to_date, query):
     logging.debug("Connecting to ES")
     client = Elasticsearch([self._config['ElasticSearch']['uri']])
     
     logging.debug("Beginning search")
     s = Search(using=client, index=self._config['ElasticSearch']['raw_index'])
     s = s.filter('range', **{'EndTime': {'from': from_date, 'to': to_date }})
     
     logging.debug("About to execute query:\n%s" % str(s.to_dict()))
     
     for hit in s.scan():
         yield hit
Beispiel #33
0
    def serialize(self):
        self.reindex(Addon)

        qs = Search(using=amo.search.get_es(),
                    index=AddonIndexer.get_index_alias(),
                    doc_type=AddonIndexer.get_doctype_name())
        obj = qs.filter('term', id=self.addon.pk).execute()[0]

        with self.assertNumQueries(0):
            serializer = ESAddonSerializer(context={'request': self.request})
            result = serializer.to_representation(obj)
        return result
Beispiel #34
0
    def get_sensors(self, **kwargs):
        sensors = []
        s = Search(index="sensors", using=self.es)

        # Build filters from kwargs
        for k, v in kwargs.items():
            s = s.filter('wildcard', **{k: v})

        response = s.execute()
        for hit in response:
            sensors.append(hit.to_dict())
        return sensors
    def aggregated_search(self, search_query, indices, aggregations, size,
                          request_timeout):
        search_obj = Search(using=self.es_connection, index=indices).params(
            size=size, request_timeout=request_timeout)
        query_obj = search_obj.filter(search_query)
        for aggregation in aggregations:
            query_obj.aggs.bucket(name=aggregation.to_dict()['terms']['field'],
                                  agg_type=aggregation)
        results = query_obj.execute()

        result_set = AggregatedResults(results)
        return result_set
Beispiel #36
0
def get_tm_index(topic_modelling_name):
    from elasticsearch_dsl import Search, Q
    from nlpmonitor.settings import ES_CLIENT, ES_INDEX_TOPIC_MODELLING

    print("!!!", "Get topic model")
    ss = Search(using=ES_CLIENT, index=ES_INDEX_TOPIC_MODELLING)
    ss = ss.query(
        Q("term", name=topic_modelling_name)
        | Q("term", **{"name.keyword": topic_modelling_name}))
    ss = ss.filter("term", is_ready=True)
    tm_index = ss.source(['number_of_topics', 'name']).execute()[0]
    return tm_index
Beispiel #37
0
def get_session_id(esindex, estype, name, gender):
    esconn = es_object.connection
    s = Search(using=esconn, index=esindex, doc_type=estype)
    s = s.filter("term", gender=gender.lower()).query("match_phrase",
                                                      name=stripSpaces(
                                                          str(name)).title())
    response = s.execute()
    try:
        hit = response.hits[0]
        return hit.meta.id
    except IndexError:
        return None
Beispiel #38
0
    def test_filter_org(self):
        """Tests add organization name inclusion filter.
        """

        s = Search()
        s.filter = MagicMock(return_value='test')

        result = esc.filter_org(s, esc.UNKNOWN_ORG_NAME)

        s.filter.assert_called_with('term',
                                    author_org_name=esc.UNKNOWN_ORG_NAME)
        self.assertEqual(result, 'test')
Beispiel #39
0
def _filter_licenses(s: Search, licenses):
    """
    Filter out all licenses except for those provided in the `licenses`
    parameter.
    """
    if not licenses:
        return s
    license_filters = []
    for _license in licenses.split(','):
        license_filters.append(Q('term', license__keyword=_license))
    s = s.filter('bool', should=license_filters, minimum_should_match=1)
    return s
Beispiel #40
0
    def get_queryset(self):
        s = Search(index=ELASTIC_INDEX)
        title_param = self.request.query_params.get('q', None)
        genre_param = self.request.query_params.get('g', None)

        title_query = _field_query(title_param, 'title')
        genre_query = _field_query(genre_param, 'genre')

        return [
            i.__dict__['_d_']
            for i in s.filter(title_query & genre_query).execute()
        ]
Beispiel #41
0
    def delete(self, start_date=None, end_date=None):
        """Delete aggregation documents."""
        aggs_query = Search(
            using=self.client,
            index=self.aggregation_alias,
            doc_type=self.aggregation_doc_type).extra(_source=False)

        range_args = {}
        if start_date:
            range_args['gte'] = format_range_dt(
                start_date.replace(microsecond=0), self.aggregation_interval)
        if end_date:
            range_args['lte'] = format_range_dt(
                end_date.replace(microsecond=0), self.aggregation_interval)
        if range_args:
            aggs_query = aggs_query.filter('range', timestamp=range_args)

        bookmarks_query = Search(
            using=self.client,
            index=self.bookmark_api.bookmark_index,
        ).sort({'date': {
            'order': 'desc'
        }})

        if range_args:
            bookmarks_query = bookmarks_query.filter('range', date=range_args)

        def _delete_actions():
            for query in (aggs_query, bookmarks_query):
                affected_indices = set()
                for doc in query.scan():
                    affected_indices.add(doc.meta.index)
                    yield dict(_index=doc.meta.index,
                               _op_type='delete',
                               _id=doc.meta.id,
                               _type=doc.meta.doc_type)
                current_search_client.indices.flush(
                    index=','.join(affected_indices), wait_if_ongoing=True)

        bulk(self.client, _delete_actions(), refresh=True)
Beispiel #42
0
def main():
    parser = argparse.ArgumentParser(description='Download items from ES index')
    arg = parser.add_argument
    arg('output', help='output in .jl.gz format')
    arg('index', help='ES index name')
    arg('--domain', help='url.domain to filter')
    arg('--id', help='record id')
    arg('--host', default='localhost', help='ES host in host[:port] format')
    arg('--user', help='HTTP Basic Auth user')
    arg('--password', help='HTTP Basic Auth password')
    arg('--chunk-size', type=int, default=100, help='download chunk size')

    args = parser.parse_args()
    kwargs = {}
    if args.user or args.password:
        kwargs['http_auth'] = (args.user, args.password)

    client = elasticsearch.Elasticsearch(
        [args.host],
        connection_class=elasticsearch.RequestsHttpConnection,
        timeout=600,
        **kwargs)
    print(client.info())

    search = Search(using=client, index=args.index)
    if args.domain:
        search = search.filter('term', **{'url.domain': args.domain})
    if args.id:
        search = search.filter('term', **{'_id': args.id})

    total = 0
    with tqdm.tqdm(total=search.count()) as pbar:
        with gzip.open(args.output, 'wt') as f:
            for x in search.params(size=args.chunk_size).scan():
                total += 1
                pbar.update(1)
                f.write(json.dumps(x.to_dict()))
                f.write('\n')

    print('{:,} items downloaded to {}'.format(total, args.output))
Beispiel #43
0
    def _search(self, query):
        s = Search(using=self.Client, index="winlogbeat-*").query(query)

        if self.DTRange != None:
            s = s.filter('range', **self.DTRange)

        s.source(includes=['winlog.*'])
        s.sort('-winlog.event_data.UtcTime')

        if self.Scan:
            return s.scan()
        else:
            return s.execute().hits
Beispiel #44
0
 def search(self,
            cls: AbstractText,
            query: str = None,
            start_date_str: str = None,
            end_date_str: str = None):
     s = Search(using=self.client, index=cls.index)
     if query:
         s = s.query('multi_match',
                     query=query,
                     fields=cls.get_all_fields())
     if start_date_str:
         s = s.filter('range', **{cls.Field.DATE: {'gte': start_date_str}})
     if end_date_str:
         s = s.filter('range', **{cls.Field.DATE: {'lt': end_date_str}})
     try:
         res = s.execute()
         return list(
             map(lambda hit: cls(hit['_source']), res['hits']['hits']))
     except Exception as e:
         raise ElasticsearchException(
             f'failed to search {cls.__class__.__name__} for {s.to_dict()}'
         ) from e
Beispiel #45
0
 def search_my_data(self, username, q, offset, limit):
     search = Search(index='des-files')
     search = search.filter("nested",
                            path="permissions",
                            query=Q("term", permissions__username=username))
     search = search.query("simple_query_string",
                           query=q,
                           fields=["name", "name._exact", "keywords"])
     search = search.query(
         Q('bool', must=[Q({'prefix': {
             'path._exact': username
         }})]))
     search = search.filter("term", system='designsafe.storage.default')
     search = search.query(
         Q('bool',
           must_not=[
               Q({'prefix': {
                   'path._exact': '{}/.Trash'.format(username)
               }})
           ]))
     logger.info(search.to_dict())
     return search
Beispiel #46
0
    def build_query(self, interval, start_date, end_date, **kwargs):
        """Build the elasticsearch query."""
        agg_query = Search(using=self.client, index=self.index)[0:0]

        if start_date is not None or end_date is not None:
            time_range = {}
            if start_date is not None:
                time_range['gte'] = start_date.isoformat()
            if end_date is not None:
                time_range['lte'] = end_date.isoformat()
            agg_query = agg_query.filter(
                'range',
                **{self.time_field: time_range})

        for modifier in self.query_modifiers:
            agg_query = modifier(agg_query, **kwargs)

        base_agg = agg_query.aggs.bucket(
            'histogram',
            'date_histogram',
            field=self.time_field,
            interval=interval
        )

        for destination, (metric, field, opts) in self.metric_fields.items():
            base_agg.metric(destination, metric, field=field, **opts)

        if self.copy_fields:
            base_agg.metric(
                'top_hit', 'top_hits', size=1, sort={'timestamp': 'desc'}
            )

        for query_param, filtered_field in self.required_filters.items():
            if query_param in kwargs:
                agg_query = agg_query.filter(
                    'term', **{filtered_field: kwargs[query_param]}
                )

        return agg_query
Beispiel #47
0
    def default_string_query(self, q, options):

        match = self._parse_interval_query(q)
        if match:  # interval query
            search = Search()
            if match['query'] != '':
                search = search.query("query_string", query=match['query'])
            search = search.filter('match', chrom=match['chr'])
            assembly = 'hg38' if options.assembly == 'hg38' else 'hg19'
            search = search.filter(
                'range', **{assembly + ".start": {
                    "lte": match['gend']
                }})
            search = search.filter(
                'range', **{assembly + ".end": {
                    "gte": match['gstart']
                }})

        else:  # default query
            search = super().default_string_query(q, options)

        return search
Beispiel #48
0
def get_access_search(
    client: Elasticsearch,
    index: str,
    ts_range: TimestampRange = None,
    prefixes: typing.Sequence[str] = None,
    timestamp_field: str = 'timestamp',
) -> Search:
    search = Search(using=client, index=index)
    search = filter_url_by_prefixes(search, prefixes)
    range_filter = get_range_filter(ts_range, timestamp_field)
    if range_filter:
        search = search.filter(range_filter)
    return search
Beispiel #49
0
    def infoTopApp(self):
        s = Search(index='ossim-osdepym*')
        s = s.query('match_all')
        s = s.filter('range',
                     log_date={
                         "gte": 1554087600000,
                         "lte": 1556679599999
                     })
        s.aggs.bucket(
            'users',
            A('terms', field='app.keyword', size=5, order={"_count": "desc"}))

        return s.execute().aggregations.users.buckets
Beispiel #50
0
    def test_add_date_filter_min_date(self):
        """Test add filter calls with `start_date`.
        """

        s = Search()
        s.filter = MagicMock(return_value='test')

        min_date = '2018-01-01'
        result = esc.add_date_filter(s, start_date=min_date)

        s.filter.assert_called_with('range',
                                    grimoire_creation_date={'gt': min_date})
        self.assertEqual(result, 'test')
Beispiel #51
0
    def post(self):
        json = request.get_json()
        s = Search(using=es,index='twitter',doc_type='items')
        username = json.pop('username') if 'username' in json else None
        following = True
        if 'following' in json:
            following = json.pop('following')
        else:
            if username:
                following = True
        timestamp = json.pop('timestamp') if 'timestamp' in json else time()
        search = 'q' in json
        limit = json.pop('limit') if 'limit' in json and json['limit'] <= 100 else 50
        following_list = db.user.find_one({'username':session['username']})['following'] # do we only need this is following=true?
        #query = {'timestamp':{'$lte':timestamp}}
        s = s.filter('range', timestamp={'lte':timestamp})
        if search:
            #query['$text'] = {'$search':json['q']}
            s = s.query('match', content=json['q'])
        if username:
            if following:
                query['username'] = username if username in following_list else ''
                s = s.filter('term',username=query['username'])
            else:
                #query['username'] = username
                s =s.filter('term',username=username)
        else:
            if following:
                #query['username'] = {'$in': following_list}
                s = s.filter('terms',username=following_list)
        # my code        
        if 'parent' in json:
            #query['parent'] = json['parent']
            s = s.filter('term', parent=json['parent'])
        if 'replies' not in json:
            json['replies'] = True
        if not json['replies']:
            query['parent'] = None
            s = s.filter('term', parent=None)
        # endmy code        
        if 'rank' not in json:
            json['rank'] = 'interest'
        s =s[0:limit]

        if json['rank'] == 'time':
            sort_key = 'timestamp'
            s = s.sort('-timestamp')
        else:
            sort_key = 'interest_score'
            s = s.sort('-interest_score')
        #sort_dir = -1

        #results = db.items.find(query).sort(sort_key, sort_dir).limit(limit)
        #results = db.items.find(filter=query, limit=limit, sort=sort_by)
        #results = db.items.aggregate([{'$match':query}, {'$limit': limit}, {'$sort': sort_by}])
        results = s.execute()
        l = [x['_source'].to_dict() for x in results['hits']['hits']]
        return Response(response = dumps({'status':'OK','items':l}),mimetype='application/json')
Beispiel #52
0
def get_search_by_entities_query(
    entities,
    term=None,
    filter_data=None,
    composite_field_mapping=None,
    permission_filters=None,
    ordering=None,
    fields_to_include=None,
    fields_to_exclude=None,
):
    """
    Performs filtered search for the given term across given entities.
    """
    filter_data = filter_data or {}
    query = []
    if term != '':
        for entity in entities:
            query.append(_build_term_query(term, fields=entity.SEARCH_FIELDS))

    filters, ranges = _split_range_fields(filter_data)

    # document must match all filters in the list (and)
    must_filter = _build_must_queries(filters, ranges, composite_field_mapping)

    s = Search(index=[entity.get_read_alias()
                      for entity in entities], ).query(Bool(must=query), )

    permission_query = _build_entity_permission_query(permission_filters)
    if permission_query:
        s = s.filter(permission_query)

    s = s.filter(Bool(must=must_filter))
    s = _apply_sorting_to_query(s, ordering)
    return _apply_source_filtering_to_query(
        s,
        fields_to_include=fields_to_include,
        fields_to_exclude=fields_to_exclude,
    )
Beispiel #53
0
def tx_list(offset=10):
    """
    check for 'eth.tx.tx_new' messages
    and return the max number of clients, that had the same tx
    during the last `offset` seconds.
    """
    s = Search(client)
    s = s.query(Q("match", message='eth.tx.received'))
    s = s.filter(time_range_filter(offset=offset))
    s = s[0:100]
    response = s.execute()
    for hit in response.hits:
        print hit.to_dict()
    return response
    def __build_search(self, date_range, project_name=None, org_name=None):
        s = Search(using=self._es_conn, index=self._es_index)
        s = s.filter('range', **date_range)
        if project_name:
            s = s.filter('term', project=project_name)
        if org_name:
            s = s.filter('term', author_org_name=org_name)

        # from:to parameters (=> from: 0, size: 0)
        s = s[0:0]

        # Get author_name and most recent metadata__timestamp for quarter (should be enough per quarter,
        # computing it by user probably is not needed as we are going to recalculate the whole quarter)

        # We are not keeping all metadata__* fields because we are grouping commits by author, so we can only
        # store one value per author.
        s.aggs.bucket(self.TIMEFRAME, 'date_histogram', field=self._timeframe_field, interval='quarter') \
            .metric(self.LATEST_TS, 'max', field=self._sort_on_field)\
            .bucket(self.AUTHOR_UUID, 'terms', field=self.AUTHOR_UUID, size=1000) \
            .metric(self.CONTRIBUTIONS, 'cardinality', field=self.contribs_field, precision_threshold=40000)\
            .bucket(self.AUTHOR_NAME, 'terms', field=self.AUTHOR_NAME, size=1)

        return s
Beispiel #55
0
def get_top_genes_aggregated_filtered_statistics(filters):
    s = Search(using=es, doc_type='genes')
    if 'chr' in filters and len(filters['chr']) > 0 and len(
            filters['chr']) < 5:
        s = s.filter(
            Q('bool',
              should=[
                  Q('term', chr=chrom if len(chrom) > 3 else 'chr%s' % chrom)
                  for chrom in filters['chr']
              ]))
    agg_chr = A("terms", field="chr")
    s.aggs.bucket('chr_count', agg_chr)
    agg_results = s.execute().aggregations
    return agg_results.chr_count.buckets
Beispiel #56
0
def tx_propagation(client_count, offset=10):
    """
    check for 'eth.tx.tx_new' messages
    and return the max number of clients, that had the same tx
    during the last `offset` seconds.
    """
    s = Search(client)
    # s = s.query(Q("match", message='eth.tx.received'))
    s = s.filter('exists', field='json_message.eth.tx.received.tx_hash')
    s = s.filter(
        time_range_filter(field="json_message.eth.tx.received.ts",
                          offset=offset))
    s.aggs.bucket('by_tx',
                  'terms',
                  field='json_message.eth.tx.received.tx_hash',
                  size=client_count)
    # s = s[0:1000]
    response = s.execute()
    if response:
        return max(tag.doc_count
                   for tag in response.aggregations.by_tx.buckets)
    else:
        return 0
def getHostBytes(client, starttime, endtime):
    s = Search(using=client, index="htcondor-xfer-stats2-*")
    s = s.filter('range', **{'@timestamp': {'gte': starttime, 'lt': endtime}})
    # Remove records with more than 1 TB of data transferred, bug:
    # https://htcondor-wiki.cs.wisc.edu/index.cgi/tktview?tn=7575,0
    s = s.filter('range', bytes={'from': 0, 'to': 1024**4})
    bkt = s.aggs
    bkt = bkt.bucket('hosts', 'terms', size=MAXSZ, field='host.name.keyword')
    bkt = bkt.metric('Bytes', 'sum', field='bytes')
    bkt = bkt.metric('loss', 'avg', field='lost')

    print(s.to_dict())

    response = s.execute()
    hosts = {}
    for tag in response.aggregations.hosts:
        hosts[tag.key] = {
            'bytes': tag.Bytes.value,
            'bytes_str': convert_gb(tag.Bytes.value),
            'loss': tag.loss.value
        }

    return hosts
Beispiel #58
0
def test_filters():
    s = Search()
    s = s.filter('terms', tags=['search', 'python'])
    print(s.to_dict())
    # {'query': {'bool': {'filter': [{'terms': {'tags': ['search', 'python']}}]}}}

    s = s.query('bool', filter=[Q('terms', tags=['search', 'python'])])
    print(s.to_dict())
    # {'query': {'bool': {'filter': [{'terms': {'tags': ['search', 'python']}}]}}}

    s = s.exclude('terms', tags=['search', 'python'])
    # 或者
    # s = s.query('bool', filter=[~Q('terms', tags=['search', 'python'])])
    print(s.to_dict())
Beispiel #59
0
def filter_url_by_prefixes(
    search: Search,
    prefixes: typing.Sequence[str] = None,
    url_field: str = 'url__original',
) -> Search:
    if prefixes:
        prefix, *tail = prefixes
        lookup = {url_field: prefix}
        query = Q('match_bool_prefix', **lookup)
        for prefix in tail:
            lookup[url_field] = prefix
            query = query | Q('match_bool_prefix', **lookup)
        search = search.filter(query)
    return search
Beispiel #60
0
def es_issue_count(
    es_client: Any, container_id: str, year: int, volume: str, issue: str
) -> int:
    search = Search(using=es_client, index="fatcat_release")
    search = (
        search.filter("term", container_id=container_id)
        .filter("term", year=year)
        .filter("term", volume=volume)
        .filter("term", issue=issue)
        .extra(request_cache=True)
    )
    search = search.params()

    return search.count()