Python Searchの例、pyes.Search Pythonの例

コード例 #1

0

ファイルを表示

 def search_term(self, key, indices=["default"]):
     result = None
     params = {"term": {"_id": key}}
     query = pyes.Search(params)
     row = self.conn.search(query, indices=indices)
     if row.total > 0:
         result = row[0]
     return result

コード例 #2

0

ファイルを表示

 def test_delete_warmer(self):
     warmer1 = pyes.Search(pyes.MatchAllQuery())
     self.conn.put_warmer(indices=[self.index_name],
                          name='w1',
                          warmer=warmer1)
     self.conn.delete_warmer(indices=[self.index_name], name='w1')
     self.assertRaises(pyes.exceptions.ElasticSearchException,
                       self.conn.get_warmer,
                       indices=[self.index_name],
                       name='w1')

コード例 #3

0

ファイルを表示

    def query(self,
              sort='timestamp',
              start=0,
              size=20,
              severity=None,
              timestamp_from=None,
              timestamp_till=None):
        fltr = []

        if severity is not None:
            fltr.append(
                pyes.TermFilter(field='severity', value=severity)
            )

        if timestamp_from is not None:
            if isinstance(timestamp_from, datetime.datetime):
                timestamp_from = timestamp_from.isoformat()

            fltr.append(
                pyes.RangeFilter(
                    pyes.ESRangeOp(
                        'timestamp', 'gte', timestamp_from
                    )
                )
            )

        if timestamp_till is not None:
            if isinstance(timestamp_till, datetime.datetime):
                timestamp_till = timestamp_till.isoformat()

            fltr.append(
                pyes.RangeFilter(
                    pyes.ESRangeOp(
                        'timestamp', 'lte', timestamp_till
                    )
                )
            )

        f = None
        if fltr:
            f = pyes.ANDFilter(fltr)
        q = pyes.MatchAllQuery()

        s = pyes.Search(
            query=q,
            filter=f,
            start=start,
            size=size)

        return self.es.search(
            s,
            indices=[self.index],
            doc_types=[self.document_type])

コード例 #4

0

ファイルを表示

    def all_docs(self, keys_only=False, indices=["default"], size=10000):

        query = pyes.Search({'match_all': {}})
        rows = self.conn.search(query, indices=indices, size=size)
        docs = []

        for row in rows:
            if keys_only:
                row = row['meta']['id']
            docs.append(row)

        return docs

コード例 #5

0

ファイルを表示

def preservation_planning_fpr_search(request, current_page_number = None):
    if current_page_number == None:                
        current_page_number = 1

    query = request.GET.get('query', '')

    if query == '':
        # No query in the URL parameters list, try to see if we've got an existing query going from a previous page...
        query = request.session['fpr_query']
  
        # No query from a previous page either
        if query == '':
            query = '*'
            return HttpResponse('No query.')


    request.session['fpr_query'] = query # Save this for pagination...
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    indexes = conn.get_indices()

    if 'fpr_file' not in indexes:
        # Grab relevant FPR data from the DB
        results = get_fpr_table()
        request.session['fpr_results'] = results

        # Setup indexing for some Elastic Search action.
        for row in results:
            conn.index(row, 'fpr_file', 'fpr_files')
    else:
        results = request.session['fpr_results']
    
    # do fulltext search
    q = pyes.StringQuery(query)
    s = pyes.Search(q)

    try:
        results = conn.search_raw(s, size=len(results), indices='fpr_file')
    except:
        return HttpResponse('Error accessing index.')
    
    form = FPRSearchForm()

    search_hits = []

    for row in results.hits.hits:
        search_hits.append(row['_source'].copy())

    page = helpers.pager(search_hits, results_per_page, current_page_number)
    hit_count = len(search_hits) 
  
    return render(request, 'main/preservation_planning_fpr.html', locals())

コード例 #6

0

ファイルを表示

def index():
    """The home page"""
    conn = es.ES(app.config['ELASTICSEARCH_HOST'])
    hits = []
    total = 0
    searching = False
    q = 'Lettertape'
    if request.method == 'POST' and request.form.get('q'):
        q = request.form['q']
        h = es.HighLighter(['<span class="lyric-summary-highlight">'], ['</span>'])
        query = es.TermsQuery()
        query.add('lyrics', q.lower().split())
        s = es.Search(query, highlight=h, size=MAX_RESULTS)
        s.add_highlight('lyrics')
        hits = conn.search(s)
        total = hits.total
        hits = prepare_hits(hits)
    return render_template('index.html', hits=hits,
                           searching=searching, q=q, total=total)

コード例 #7

0

ファイルを表示

def kibanaDashboards():
    try:
        resultsList = []
        es = pyes.ES((list('{0}'.format(s) for s in options.esservers)))
        r = es.search(pyes.Search(pyes.MatchAllQuery(), size=100),
            'kibana-int', 'dashboard')
        if r:
            for dashboard in r:
                dashboardJson = json.loads(dashboard.dashboard)
                resultsList.append({
                    'name': dashboardJson['title'],
                    'url': "%s/%s/%s" % (options.kibanaurl,
                        "index.html#/dashboard/elasticsearch",
                        dashboardJson['title'])
                })
            return json.dumps(resultsList)
        else:
            sys.stderr.write('No Kibana dashboard found\n')
    except pyes.exceptions.NoServerAvailable:
        sys.stderr.write('Elastic Search server could not be reached, check network connectivity\n')

コード例 #8

0

ファイルを表示

ファイル: models.py プロジェクト: venkat0708/NewsBlur

    def query(cls, text, max_subscribers=5):
        try:
            cls.ES().default_indices = cls.index_name()
            cls.ES().indices.refresh()
        except pyes.exceptions.NoServerAvailable:
            logging.debug(" ***> ~FRNo search server available.")
            return []
        
        if settings.DEBUG:
            max_subscribers = 1
        
        logging.info("~FGSearch ~FCfeeds~FG: ~SB%s" % text)
        q = pyes.query.BoolQuery()
        q.add_should(pyes.query.MatchQuery('address', text, analyzer="simple", cutoff_frequency=0.0005, minimum_should_match="75%"))
        q.add_should(pyes.query.MatchQuery('link', text, analyzer="simple", cutoff_frequency=0.0005, minimum_should_match="75%"))
        q.add_should(pyes.query.MatchQuery('title', text, analyzer="simple", cutoff_frequency=0.0005, minimum_should_match="75%"))
        q = pyes.Search(q, min_score=1)
        results = cls.ES().search(query=q, size=max_subscribers, doc_types=[cls.type_name()], sort="num_subscribers:desc")

        return results

コード例 #9

0

ファイルを表示

ファイル: model.py プロジェクト: mozilla/toolbox

    def es_query(self, query):
        """make an ElasticSearch query and return the results"""
        search = pyes.Search(query)
        results = self.es.search(query=search,
                                 indexes=[self.es_index],
                                 doc_types=[self.doc_type],
                                 size=0)

        # the first query is just used to determine the size of the set
        if not 'hits' in results and not 'total' in results['hits']:
            raise Exception("bad ES response %s" % json.dumps(results))
        total = results['hits']['total']

        # repeat the query to retrieve the entire set
        results = self.es.search(query=search,
                                 indexes=[self.es_index],
                                 doc_types=[self.doc_type],
                                 size=total)

        if not 'hits' in results and not 'hits' in results['hits']:
            raise Exception("bad ES response %s" % json.dumps(results))

        return results

コード例 #10

0

ファイルを表示

 def test_put_get_warmer(self):
     warmer1 = pyes.Search(pyes.MatchAllQuery())
     #ES fails if the index is empty
     self.conn.index({'a': 1}, self.index_name, self.document_type)
     self.conn.refresh(self.index_name)
     self.conn.put_warmer(indices=[self.index_name],
                          name='w1',
                          warmer=warmer1)
     result = self.conn.get_warmer(indices=[self.index_name], name='w1')
     expected = {
         self.index_name: {
             'warmers': {
                 'w1': {
                     'source': {
                         'query': {
                             'match_all': {}
                         }
                     },
                     'types': []
                 }
             }
         }
     }
     self.assertEqual(result, expected)

コード例 #11

0

ファイルを表示

def list_display(request):
    current_page_number = request.GET.get('page', 1)

    form = forms.StorageSearchForm()

    # get ElasticSearch stats
    aip_indexed_file_count = advanced_search.indexed_count('aips')

    # get AIPs
    order_by = request.GET.get('order_by', 'name')
    sort_by = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction

    conn = elasticSearchFunctions.connect_and_create_index('aips')

    items_per_page = 10
    start = (int(current_page_number) - 1) * items_per_page

    aipResults = conn.search(pyes.Search(pyes.MatchAllQuery(),
                                         start=start,
                                         size=items_per_page),
                             doc_types=['aip'],
                             fields='origin,uuid,filePath,created,name,size',
                             sort=sort_specification)

    try:
        len(aipResults)
    except pyes.exceptions.ElasticSearchException:
        # there will be an error if no mapping exists for AIPs due to no AIPs
        # having been created
        return render(request, 'archival_storage/archival_storage.html',
                      locals())

    # handle pagination
    page = helpers.pager(aipResults, items_per_page, current_page_number)

    if not page:
        raise Http404

    # augment data
    sips = []
    for aip in page['objects']:
        sip = {}
        sip['href'] = aip.filePath.replace(AIPSTOREPATH + '/', "AIPsStore/")
        sip['name'] = aip.name
        sip['uuid'] = aip.uuid

        sip['date'] = aip.created

        try:
            size = float(aip.size)
            sip['size'] = '{0:.2f} MB'.format(size)
        except:
            sip['size'] = 'Removed'

        sips.append(sip)

    # get total size of all AIPS from ElasticSearch
    q = pyes.MatchAllQuery().search()
    q.facet.add(pyes.facets.StatisticalFacet('total', field='size'))
    aipResults = conn.search(q, doc_types=['aip'])
    total_size = aipResults.facets.total.total
    total_size = '{0:.2f}'.format(total_size)

    return render(request, 'archival_storage/archival_storage.html', locals())

コード例 #12

0

ファイルを表示

    def search(query=None,
               abbr=None,
               chamber=None,
               subjects=None,
               bill_id=None,
               bill_id__in=None,
               search_window=None,
               updated_since=None,
               sponsor_id=None,
               bill_fields=None,
               status=None,
               type_=None,
               session=None):
        _filter = {}
        for key, value in [
            (settings.LEVEL_FIELD, abbr),
            ('chamber', chamber),
            ('subjects', subjects),
            ('bill_id', bill_id),
        ]:
            if value is not None:
                _filter[key] = value

        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        if updated_since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        if status:
            # Status is slightly different: it's a dict like--
            # {'action_dates.signed': {'$ne': None}}
            _filter.update(**status)

        if type_:
            _filter['type'] = type_

        if session:
            _filter['session'] = session

        # process full-text query
        if query and settings.ENABLE_ELASTICSEARCH:
            # block spammers, possibly move to a BANNED_SEARCH_LIST setting
            if '<a href' in query:
                return db.bills.find({settings.LEVEL_FIELD: None})

            if re.findall('\d+', query):
                _id_filter = dict(_filter)
                _id_filter['bill_id'] = fix_bill_id(query).upper()
                result = db.bills.find(_id_filter)
                if result:
                    return result

            query = {
                "query_string": {
                    "fields": ["text", "title"],
                    "default_operator": "AND",
                    "query": query
                }
            }
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if settings.LEVEL_FIELD in _filter:
                es_terms.append(
                    pyes.TermFilter(settings.LEVEL_FIELD,
                                    _filter.pop(settings.LEVEL_FIELD)))
            if 'session' in _filter:
                es_terms.append(
                    pyes.TermFilter('session', _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(
                    pyes.TermFilter('chamber', _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(
                    pyes.TermFilter('subjects',
                                    _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(
                    pyes.TermFilter('sponsors',
                                    _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search,
                                             search_type='scan',
                                             scroll='3m',
                                             size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}
        elif query:
            _filter['title'] = {'$regex': query, '$options': 'i'}

        # return query
        return db.bills.find(_filter, bill_fields)

コード例 #13

0

ファイルを表示

    def read(self, request):

        bill_fields = {
            'title': 1,
            'created_at': 1,
            'updated_at': 1,
            'bill_id': 1,
            'type': 1,
            'state': 1,
            'level': 1,
            'country': 1,
            'session': 1,
            'chamber': 1,
            'subjects': 1,
            '_type': 1,
            'id': 1
        }
        # replace with request's fields if they exist
        bill_fields = _build_field_list(request, bill_fields)

        # normal mongo search logic
        _filter = _build_mongo_filter(
            request,
            ('state', 'chamber', 'subjects', 'bill_id', 'bill_id__in'))

        # process search_window
        search_window = request.GET.get('search_window', '')
        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                resp = rc.BAD_REQUEST
                resp.write(": invalid search_window. Valid choices are "
                           "'term', 'session' or 'all'")
                return resp

        # process updated_since
        since = request.GET.get('updated_since')
        if since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(since)}
            except ValueError:
                resp = rc.BAD_REQUEST
                resp.write(": invalid updated_since parameter."
                           " Please supply a date in YYYY-MM-DD format.")
                return resp

        # process sponsor_id
        sponsor_id = request.GET.get('sponsor_id')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        # process full-text query
        query = request.GET.get('q')
        if query:
            query = {
                "query_string": {
                    "fields": ["text", "title"],
                    "default_operator": "AND",
                    "query": query
                }
            }
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if 'state' in _filter:
                es_terms.append(pyes.TermFilter('state', _filter.pop('state')))
            if 'session' in _filter:
                es_terms.append(
                    pyes.TermFilter('session', _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(
                    pyes.TermFilter('chamber', _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(
                    pyes.TermFilter('subjects',
                                    _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(
                    pyes.TermFilter('sponsors',
                                    _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search,
                                             search_type='scan',
                                             scroll='3m',
                                             size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}

        # start with base query
        query = db.bills.find(_filter, bill_fields)

        # pagination
        page = request.GET.get('page')
        per_page = request.GET.get('per_page')
        if page and not per_page:
            per_page = 50
        if per_page and not page:
            page = 1

        if page:
            page = int(page)
            per_page = int(per_page)
            query = query.limit(per_page).skip(per_page * (page - 1))
        else:
            # limit response size
            count = db.bills.find(_filter, bill_fields).count()
            if count > 5000:
                resp = rc.BAD_REQUEST
                resp.write(': request too large, try narrowing your search by '
                           'adding more filters.')
                return resp

        # sorting
        sort = request.GET.get('sort')
        if sort == 'updated_at':
            query = query.sort([('updated_at', -1)])
        elif sort == 'created_at':
            query = query.sort([('created_at', -1)])

        return list(query)