Example #1
0
    def test_GeoDistanceFilter(self):
        gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoDistanceFilter("pin.location", [70, 40], "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)
 def _get_results(self):
     """
     @returns: elasticsearch iterator over results
     defined by self.query
     """
     query = self.db_query
     if self.db_query.is_empty():
         query = MatchAllQuery()
     if self._ordering:
         query.sort = self._ordering
     #print "query", self.query.tables, query
     return self._connection.search(query, indices=[self.connection.db_name], doc_types=self.query.model._meta.db_table)
Example #3
0
 def _get_ids_worker(args):
     from utils.es import ESIndexer
     from pyes import MatchAllQuery
     es_kwargs, start, step = args
     q = MatchAllQuery().search()
     q.sort = [{'entrezgene': 'asc'}, {'ensembl.gene': 'asc'}]
     q.fields = []
     q.start = start
     q.size = step
     esi = ESIndexer(**es_kwargs)
     cnt = esi.count()['count']
     res = esi.conn.search_raw(q)
     assert res['hits']['total'] == cnt
     return [doc['_id'] for doc in res['hits']['hits']]
Example #4
0
    def test_remove(self):
        """Ensure we can properly delete from ElasticSearch via DocManager.
        """

        docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'}
        self.elastic_doc.upsert(docc)
        self.elastic_doc.commit()
        res = self.elastic_conn.search(MatchAllQuery())
        self.assertTrue(len(res) == 1)

        self.elastic_doc.remove(docc)
        self.elastic_doc.commit()
        res = self.elastic_conn.search(MatchAllQuery())
        self.assertTrue(len(res) == 0)
Example #5
0
 def _get_results(self):
     """
     @returns: elasticsearch iterator over results
     defined by self.query
     """
     query = self.db_query
     if self.db_query.is_empty():
         query = MatchAllQuery()
     if self._ordering:
         query.sort = self._ordering
     #print "query", self.query.tables, query
     return self._connection.search(
         query,
         indices=[self.connection.db_name],
         doc_types=self.query.model._meta.db_table)
Example #6
0
    def test_remove(self):
        """Ensure we can properly delete from ElasticSearch via DocManager.
        """

        docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'}
        ElasticDoc.upsert(docc)
        ElasticDoc.commit()
        res = elastic.search(MatchAllQuery())
        self.assertTrue(len(res) == 1)

        ElasticDoc.remove(docc)
        ElasticDoc.commit()
        res = elastic.search(MatchAllQuery())
        self.assertTrue(len(res) == 0)
        print("PASSED REMOVE")
Example #7
0
    def get_last_doc(self):
        """Returns the last document stored in the Elastic engine.
        """

        result = self.elastic.search(MatchAllQuery(), size=1, sort='_ts:desc')
        for item in result:
            return item
Example #8
0
 def __call__(self, dquery):
     filters = []
     catalog = self.catalogtool._catalog
     idxs = catalog.indexes.keys()
     query = MatchAllQuery()
     for key, value in dquery.items():
         if key not in idxs:
             continue
         index = getIndex(catalog, key)
         if index is None:
             continue
         qq = index.get_query(key, value)
         if qq is None:
             continue
         if type(qq) == tuple:
             qq, is_query = qq
         else:
             is_query = False
         if is_query:
             query = qq
         else:
             filters.append(qq)
     if len(filters) == 0:
         return query
     else:
         return FilteredQuery(query, ANDFilter(filters))
Example #9
0
    def count(self, limit=None):
        query = self.db_query
        if self.db_query.is_empty():
            query = MatchAllQuery()

        res = self._connection.count(query,
                                     doc_types=self.query.model._meta.db_table)
        return res["count"]
Example #10
0
    def test_upsert(self):
        """Ensure we can properly insert into ElasticSearch via DocManager.
        """

        docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'}
        ElasticDoc.upsert(docc)
        ElasticDoc.commit()
        res = elastic.search(MatchAllQuery())
        for doc in res:
            self.assertTrue(doc['_id'] == '1' and doc['name'] == 'John')

        docc = {'_id': '1', 'name': 'Paul', 'ns': 'test.test'}
        ElasticDoc.upsert(docc)
        ElasticDoc.commit()
        res = elastic.search(MatchAllQuery())
        for doc in res:
            self.assertTrue(doc['_id'] == '1' and doc['name'] == 'Paul')
        print("PASSED UPSERT")
Example #11
0
    def test_upsert(self):
        """Ensure we can properly insert into ElasticSearch via DocManager.
        """

        docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'}
        self.elastic_doc.upsert(docc)
        self.elastic_doc.commit()
        res = self.elastic_conn.search(MatchAllQuery())
        for doc in res:
            self.assertTrue(doc['_id'] == '1' and doc['name'] == 'John')
Example #12
0
    def test_GeoBoundingBoxFilter(self):
        gq = GeoBoundingBoxFilter("pin.location",
                                  location_tl={
                                      "lat": 40.717,
                                      "lon": 70.99
                                  },
                                  location_br={
                                      "lat": 40.03,
                                      "lon": 72.0
                                  })
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717],
                                  [74.1, 40.03])
        q = FilteredQuery(MatchAllQuery(), gq)
        result2 = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(result2.total, 1)
Example #13
0
    def test_ReconvertDoubles(self):
        """Regression test for issue#6.

        Pyes used to fail when getting a query respones in which a document
        contained a list of doubles.

        """
        q = MatchAllQuery()
        result = self.conn.search(query=q, indexes=["test-pindex"])
        self.assertEquals(result['hits']['total'], 2)
Example #14
0
    def test_GeoPolygonFilter(self):
        gq = GeoPolygonFilter("pin.location", [{
            "lat": 50,
            "lon": -30
        }, {
            "lat": 30,
            "lon": -80
        }, {
            "lat": 80,
            "lon": -90
        }])
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoPolygonFilter("pin.location",
                              [[-30, 50], [-80, 30], [-90, 80]])
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)
    def get_last_doc(self):
        """Returns the last document stored in the Elastic engine.
        """

        it = None
        q = MatchAllQuery()
        result = self.elastic.search(q, size=1, sort={'_ts:desc'})
        for it in result:
            r = it
            break
        return r
Example #16
0
    def test_full_search(self):
        """Query ElasticSearch for all docs via API and via DocManager's
            _search(), compare.
        """

        docc = {'_id': '1', 'name': 'John', 'ns': 'test.test'}
        self.elastic_doc.upsert(docc)
        docc = {'_id': '2', 'name': 'Paul', 'ns': 'test.test'}
        self.elastic_doc.upsert(docc)
        self.elastic_doc.commit()
        search = self.elastic_doc._search()
        search2 = self.elastic_conn.search(MatchAllQuery())
        self.assertTrue(len(search) == len(search2))
        self.assertTrue(len(search) != 0)
        for i in range(0, len(search)):
            self.assertTrue(list(search)[i] == list(search2)[i])
Example #17
0
    def afterCompletion(self, transaction):
        tdata = get()
        if not tdata.registered:
            return
        es = tdata.es
        if es.mode == DISABLE_MODE:
            tdata.reset()
            return

        success = transaction.status == Status.COMMITTED
        query = FilteredQuery(MatchAllQuery(),
            TermFilter('transaction_id', tdata.tid))
        
        conn = es.conn
        # NEED to refresh here otherwise we'll have inconsistencies
        conn.refresh()
        try:
            docs = conn.search(query, es.catalogsid, es.trns_catalogtype,
                               sort='order:desc')
            docs.count() # force executing
        except ElasticSearchException:
            # XXX uh oh, nasty, we have a problem. Let's log it.
            warn("Error trying to abort transaction: %s" %(
                traceback.format_exc()))
            tdata.reset()
            return

        for doc in docs:
            conn.delete(es.catalogsid, es.trns_catalogtype, doc.get_id())
            if not success:
                if doc.action == Actions.add:
                    # if it was an add action, remove delete
                    conn.delete(es.catalogsid, es.catalogtype, doc.uid)
                elif doc.action in (Actions.modify, Actions.delete):
                    # if it was a modify or delete, restore the doc
                    restored_doc = loads(doc.data)
                    conn.index(restored_doc, es.catalogsid, es.catalogtype, doc.uid)
        # NEED to refresh here otherwise we'll have inconsistencies
        conn.refresh()
        tdata.reset()
Example #18
0
    def _get_results(self, index=0, count=None):
        """
        @returns: elasticsearch iterator over results
        defined by self.query
        """
        query = self.db_query
        if self.db_query.is_empty():
            query = MatchAllQuery()
        if self._ordering:
            query.sort = self._ordering
        query = Search(query)
        query.bulk_read = 5000
        if index > 0 and count is not None:
            query.start = index
            query.size = count

        # else:
        #     query = Search(query, start=0, size=5000)

        #print "query", self.query.tables, query
        return self._connection.search(query, indices=[self.connection.db_name], doc_types=self.query.model._meta.db_table)
Example #19
0
    def query(self, q=None, fields=None, start=0, size=10, sort=None, only_in=None,
              h=['_all'], facets=None, returnquery=False, explain=False,
              filter_by=None, custom_filter=None):
        '''Perform a query on ES and return SearchResult object.
           @param q: if q is a string, it will be wrapped as a StringQuery,
                      otherwise, q must be a pre-built Query instance.
           @param fields: a list of fields to be returned in the query result.
                          to return all field, use ['_source'].
           @param start:  hits to skip, for pagination.
           @param size:   # of hits to be returned, for pagination.
           @param sort:  fields used to sort return hits, e.g.,
                            ['species', 'symbol']
                            ['-_id']  # descending on _id field
           @param only_in: a list of "index_types" to search against. Any types
                            not in self.ES_AVAILABLE_TYPES will be ignored;
                           or a string of one specific index_type;
                           or if empty (None or []), all available index_type
                                 will be searched against.
           @param h: fields for highlighting
           @param facets: fields for faceting, using default facets if None.
           @param returnquery: if True, return query JSON string for debugging.
           @param explain: if True, enables explanation for each hit on how its
                             score was computed.
           @param filter_by: A dictionary of {<field>: <value>} or
                                a list of (<field>, <value>) tuple, e.g.,
                      {'tag': 'chart', 'species': 'human'}
                      [('tag', 'chart'), ('species', ['human', 'mouse'])]
                      Note that <value> can be a list for multiple values.
           @param custom_filter: if provided, apply this filter instead.

        '''
        # Parse out the possible types to search across
        # doc_types = []
        # if only_in:
        #     if isinstance(only_in, basestring):
        #         only_in = [only_in]
        #     doc_types = list(set(only_in) & set(self.ES_AVAILABLE_TYPES))
        # doc_types = doc_types or self.ES_AVAILABLE_TYPES.keys()
        if only_in:
            if isinstance(only_in, basestring):
                only_in = [only_in]
            doc_types = only_in
        else:
            doc_types = self.ES_AVAILABLE_TYPES.keys()

        # Initialize q if it was not specified
        if not q:
            q = MatchAllQuery()
        # Setup q as a Query object if it was passed in as a string
        if type(q) in types.StringTypes:
            # Check for max query length
            if len(q) > self.ES_MAX_QUERY_LENGTH:
                return BiogpsSearchResult({'error': 'Query string too long.'})
            q = StringQuery(q, default_operator='AND')

        # Apply custom_filter if provided
        if custom_filter:
            q = FilteredQuery(q, custom_filter)
        # Otherwise, call the default filter build chain
        else:
            filter = self._build_filter(doc_types, filter_by)
            if filter:
                q = FilteredQuery(q, filter)

        q = q.search(fields=fields, start=start, size=size, sort=sort , explain=explain) # , index_boost={'gene': 1})

        # Add highlighting
        for _h in h:
            q.add_highlight(_h, fragment_size=300, number_of_fragments=0)

        # Add faceting
        _facets = facets or self._get_default_facets(doc_types)
        if _facets:
            for _f in _facets:
                q.facet.add_term_facet(_f)

        # Only for debugging
        if returnquery:
            return json.dumps(q.q, indent=2)

        # Run the final query and return the results
        return self._query(q, doc_types)
Example #20
0
def get_pubs(filter=MissingFilter('types')):
  q = FilteredQuery(MatchAllQuery(), filter)

  pubs = conn.search(query=q, indices=e_index, doc_types="immo")
  return pubs
Example #21
0
 def _search(self):
     """For test purposes only. Performs search on Elastic with empty query.
     Does not have to be implemented.
     """
     results = self.elastic.search(MatchAllQuery())
     return results
Example #22
0
 def getAllElasticsTransactions(self):
     return self.es.conn.search(MatchAllQuery(), self.es.catalogsid,
                                self.es.trns_catalogtype)
Example #23
0
# test
from mediaresearchapp.tasks import MediaAggregateSQLTask

if __name__ == '__main__':
    es = ES("127.0.0.1:9200", default_indices='mediaaggregate')

    # Filters
    filters = [GeoDistanceFilter('location', [40.0, 9.00], 20, 'arc', 'km')]

    #     filters = [TermFilter('message', 'elastic'),
    #                GeoDistanceFilter('locations',
    #                                  {"lat": 40.0, "lon": 9.00},
    #                                  20, 'arc', 'km')
    #                ]
    filter = ANDFilter(filters)
    q = FilteredQuery(MatchAllQuery(), filter)
    results = es.search(q)
    for r in results:
        print r
        break

    q4 = RegexTermQuery('city', 'bang.*')
    print q4
    resultset = es.search(q4)
    for r in resultset:
        print r

    query_str = {
        "query": {
            "termquery": [{
                "fieldname1": "value"
Example #24
0
ftool = FileTools()
ftrans = FormatTranslator()

# 1. Create Connection
conn = ES()

# 2. Index Data
dataset_json = open("../dataset.json")
dataset = json.load(dataset_json)['data']
for data in dataset:
    conn.index(data, "example_index", "example_type",
               "example_id_" + str(dataset.index(data)))

# 3. Create Simple Query
query = MatchAllQuery()

# 4. Create Simple Aggregation
agg = TermsAgg('agg1', field="name", sub_aggs=[], size=100)

# 5. Get Result
search = Search(query, size=5)
search.agg.add(agg)
print search.serialize()

result = conn.search(search, "example_index", "example_type")

for i in result:
    print json.dumps(i, indent=2)
print json.dumps(result.aggs, indent=2)