def search_term(self, key, indices=["default"]): result = None params = {"term": {"_id": key}} query = pyes.Search(params) row = self.conn.search(query, indices=indices) if row.total > 0: result = row[0] return result
def test_delete_warmer(self): warmer1 = pyes.Search(pyes.MatchAllQuery()) self.conn.put_warmer(indices=[self.index_name], name='w1', warmer=warmer1) self.conn.delete_warmer(indices=[self.index_name], name='w1') self.assertRaises(pyes.exceptions.ElasticSearchException, self.conn.get_warmer, indices=[self.index_name], name='w1')
def query(self, sort='timestamp', start=0, size=20, severity=None, timestamp_from=None, timestamp_till=None): fltr = [] if severity is not None: fltr.append( pyes.TermFilter(field='severity', value=severity) ) if timestamp_from is not None: if isinstance(timestamp_from, datetime.datetime): timestamp_from = timestamp_from.isoformat() fltr.append( pyes.RangeFilter( pyes.ESRangeOp( 'timestamp', 'gte', timestamp_from ) ) ) if timestamp_till is not None: if isinstance(timestamp_till, datetime.datetime): timestamp_till = timestamp_till.isoformat() fltr.append( pyes.RangeFilter( pyes.ESRangeOp( 'timestamp', 'lte', timestamp_till ) ) ) f = None if fltr: f = pyes.ANDFilter(fltr) q = pyes.MatchAllQuery() s = pyes.Search( query=q, filter=f, start=start, size=size) return self.es.search( s, indices=[self.index], doc_types=[self.document_type])
def all_docs(self, keys_only=False, indices=["default"], size=10000): query = pyes.Search({'match_all': {}}) rows = self.conn.search(query, indices=indices, size=size) docs = [] for row in rows: if keys_only: row = row['meta']['id'] docs.append(row) return docs
def preservation_planning_fpr_search(request, current_page_number = None): if current_page_number == None: current_page_number = 1 query = request.GET.get('query', '') if query == '': # No query in the URL parameters list, try to see if we've got an existing query going from a previous page... query = request.session['fpr_query'] # No query from a previous page either if query == '': query = '*' return HttpResponse('No query.') request.session['fpr_query'] = query # Save this for pagination... conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) indexes = conn.get_indices() if 'fpr_file' not in indexes: # Grab relevant FPR data from the DB results = get_fpr_table() request.session['fpr_results'] = results # Setup indexing for some Elastic Search action. for row in results: conn.index(row, 'fpr_file', 'fpr_files') else: results = request.session['fpr_results'] # do fulltext search q = pyes.StringQuery(query) s = pyes.Search(q) try: results = conn.search_raw(s, size=len(results), indices='fpr_file') except: return HttpResponse('Error accessing index.') form = FPRSearchForm() search_hits = [] for row in results.hits.hits: search_hits.append(row['_source'].copy()) page = helpers.pager(search_hits, results_per_page, current_page_number) hit_count = len(search_hits) return render(request, 'main/preservation_planning_fpr.html', locals())
def index(): """The home page""" conn = es.ES(app.config['ELASTICSEARCH_HOST']) hits = [] total = 0 searching = False q = 'Lettertape' if request.method == 'POST' and request.form.get('q'): q = request.form['q'] h = es.HighLighter(['<span class="lyric-summary-highlight">'], ['</span>']) query = es.TermsQuery() query.add('lyrics', q.lower().split()) s = es.Search(query, highlight=h, size=MAX_RESULTS) s.add_highlight('lyrics') hits = conn.search(s) total = hits.total hits = prepare_hits(hits) return render_template('index.html', hits=hits, searching=searching, q=q, total=total)
def kibanaDashboards(): try: resultsList = [] es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) r = es.search(pyes.Search(pyes.MatchAllQuery(), size=100), 'kibana-int', 'dashboard') if r: for dashboard in r: dashboardJson = json.loads(dashboard.dashboard) resultsList.append({ 'name': dashboardJson['title'], 'url': "%s/%s/%s" % (options.kibanaurl, "index.html#/dashboard/elasticsearch", dashboardJson['title']) }) return json.dumps(resultsList) else: sys.stderr.write('No Kibana dashboard found\n') except pyes.exceptions.NoServerAvailable: sys.stderr.write('Elastic Search server could not be reached, check network connectivity\n')
def query(cls, text, max_subscribers=5): try: cls.ES().default_indices = cls.index_name() cls.ES().indices.refresh() except pyes.exceptions.NoServerAvailable: logging.debug(" ***> ~FRNo search server available.") return [] if settings.DEBUG: max_subscribers = 1 logging.info("~FGSearch ~FCfeeds~FG: ~SB%s" % text) q = pyes.query.BoolQuery() q.add_should(pyes.query.MatchQuery('address', text, analyzer="simple", cutoff_frequency=0.0005, minimum_should_match="75%")) q.add_should(pyes.query.MatchQuery('link', text, analyzer="simple", cutoff_frequency=0.0005, minimum_should_match="75%")) q.add_should(pyes.query.MatchQuery('title', text, analyzer="simple", cutoff_frequency=0.0005, minimum_should_match="75%")) q = pyes.Search(q, min_score=1) results = cls.ES().search(query=q, size=max_subscribers, doc_types=[cls.type_name()], sort="num_subscribers:desc") return results
def es_query(self, query): """make an ElasticSearch query and return the results""" search = pyes.Search(query) results = self.es.search(query=search, indexes=[self.es_index], doc_types=[self.doc_type], size=0) # the first query is just used to determine the size of the set if not 'hits' in results and not 'total' in results['hits']: raise Exception("bad ES response %s" % json.dumps(results)) total = results['hits']['total'] # repeat the query to retrieve the entire set results = self.es.search(query=search, indexes=[self.es_index], doc_types=[self.doc_type], size=total) if not 'hits' in results and not 'hits' in results['hits']: raise Exception("bad ES response %s" % json.dumps(results)) return results
def test_put_get_warmer(self): warmer1 = pyes.Search(pyes.MatchAllQuery()) #ES fails if the index is empty self.conn.index({'a': 1}, self.index_name, self.document_type) self.conn.refresh(self.index_name) self.conn.put_warmer(indices=[self.index_name], name='w1', warmer=warmer1) result = self.conn.get_warmer(indices=[self.index_name], name='w1') expected = { self.index_name: { 'warmers': { 'w1': { 'source': { 'query': { 'match_all': {} } }, 'types': [] } } } } self.assertEqual(result, expected)
def list_display(request): current_page_number = request.GET.get('page', 1) form = forms.StorageSearchForm() # get ElasticSearch stats aip_indexed_file_count = advanced_search.indexed_count('aips') # get AIPs order_by = request.GET.get('order_by', 'name') sort_by = request.GET.get('sort_by', 'up') if sort_by == 'down': sort_direction = 'desc' else: sort_direction = 'asc' sort_specification = order_by + ':' + sort_direction conn = elasticSearchFunctions.connect_and_create_index('aips') items_per_page = 10 start = (int(current_page_number) - 1) * items_per_page aipResults = conn.search(pyes.Search(pyes.MatchAllQuery(), start=start, size=items_per_page), doc_types=['aip'], fields='origin,uuid,filePath,created,name,size', sort=sort_specification) try: len(aipResults) except pyes.exceptions.ElasticSearchException: # there will be an error if no mapping exists for AIPs due to no AIPs # having been created return render(request, 'archival_storage/archival_storage.html', locals()) # handle pagination page = helpers.pager(aipResults, items_per_page, current_page_number) if not page: raise Http404 # augment data sips = [] for aip in page['objects']: sip = {} sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/") sip['name'] = aip.name sip['uuid'] = aip.uuid sip['date'] = aip.created try: size = float(aip.size) sip['size'] = '{0:.2f} MB'.format(size) except: sip['size'] = 'Removed' sips.append(sip) # get total size of all AIPS from ElasticSearch q = pyes.MatchAllQuery().search() q.facet.add(pyes.facets.StatisticalFacet('total', field='size')) aipResults = conn.search(q, doc_types=['aip']) total_size = aipResults.facets.total.total total_size = '{0:.2f}'.format(total_size) return render(request, 'archival_storage/archival_storage.html', locals())
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None, status=None, type_=None, session=None): _filter = {} for key, value in [ (settings.LEVEL_FIELD, abbr), ('chamber', chamber), ('subjects', subjects), ('bill_id', bill_id), ]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id if status: # Status is slightly different: it's a dict like-- # {'action_dates.signed': {'$ne': None}} _filter.update(**status) if type_: _filter['type'] = type_ if session: _filter['session'] = session # process full-text query if query and settings.ENABLE_ELASTICSEARCH: # block spammers, possibly move to a BANNED_SEARCH_LIST setting if '<a href' in query: return db.bills.find({settings.LEVEL_FIELD: None}) if re.findall('\d+', query): _id_filter = dict(_filter) _id_filter['bill_id'] = fix_bill_id(query).upper() result = db.bills.find(_id_filter) if result: return result query = { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if settings.LEVEL_FIELD in _filter: es_terms.append( pyes.TermFilter(settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD))) if 'session' in _filter: es_terms.append( pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append( pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append( pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append( pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} elif query: _filter['title'] = {'$regex': query, '$options': 'i'} # return query return db.bills.find(_filter, bill_fields)
def read(self, request): bill_fields = { 'title': 1, 'created_at': 1, 'updated_at': 1, 'bill_id': 1, 'type': 1, 'state': 1, 'level': 1, 'country': 1, 'session': 1, 'chamber': 1, 'subjects': 1, '_type': 1, 'id': 1 } # replace with request's fields if they exist bill_fields = _build_field_list(request, bill_fields) # normal mongo search logic _filter = _build_mongo_filter( request, ('state', 'chamber', 'subjects', 'bill_id', 'bill_id__in')) # process search_window search_window = request.GET.get('search_window', '') if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: resp = rc.BAD_REQUEST resp.write(": invalid search_window. Valid choices are " "'term', 'session' or 'all'") return resp # process updated_since since = request.GET.get('updated_since') if since: try: _filter['updated_at'] = {'$gte': parse_param_dt(since)} except ValueError: resp = rc.BAD_REQUEST resp.write(": invalid updated_since parameter." " Please supply a date in YYYY-MM-DD format.") return resp # process sponsor_id sponsor_id = request.GET.get('sponsor_id') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id # process full-text query query = request.GET.get('q') if query: query = { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if 'state' in _filter: es_terms.append(pyes.TermFilter('state', _filter.pop('state'))) if 'session' in _filter: es_terms.append( pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append( pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append( pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append( pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} # start with base query query = db.bills.find(_filter, bill_fields) # pagination page = request.GET.get('page') per_page = request.GET.get('per_page') if page and not per_page: per_page = 50 if per_page and not page: page = 1 if page: page = int(page) per_page = int(per_page) query = query.limit(per_page).skip(per_page * (page - 1)) else: # limit response size count = db.bills.find(_filter, bill_fields).count() if count > 5000: resp = rc.BAD_REQUEST resp.write(': request too large, try narrowing your search by ' 'adding more filters.') return resp # sorting sort = request.GET.get('sort') if sort == 'updated_at': query = query.sort([('updated_at', -1)]) elif sort == 'created_at': query = query.sort([('created_at', -1)]) return list(query)