def delete_dataset(dataset, bucket, ratio): q = Query() q.mapkey_single = dataset v = View(bucket, "views", "keys", query=q) keys = (x.value for x in v if x.key == dataset) def pack_in_groups(keys, n, ratio): group = [] for k in keys: if ratio > random.random(): #print k if len(group) >= n: #print(group[0]) yield group group = [] group.append(k) if len(group) >= 0: yield group nremoved = 0 for kg in pack_in_groups(keys, 500, ratio): try: if kg: bucket.remove_multi(kg, quiet=True) except NotFoundError as error: print(error) else: nremoved += len(kg) if nremoved and nremoved % 10000 == 0: print(nremoved) return nremoved
def show_customer(customer): doc = db.get(customer, quiet=True) if not doc.success: return "No such customer {0}".format(customer), 404 obj = CustomerRow(name=doc.value['name'], value=None, id=customer, doc=doc.value) rp = Node_SnapshotListRowProcessor() q = Query() q.mapkey_single = customer q.limit = ENTRIES_PER_PAGE log_rows = db.query("node_snapshot", "by_customer", row_processor=rp, query=q, include_docs=True) logs = [] for log in log_rows: logs.append({'id' : log.id, 'name' : log.name}) rp = RowProcessor(rowclass=CustomerRow) cluster_rows = db.query("cluster", "by_customer", row_processor=rp, query=q, include_docs=True) clusters = [] for cluster in cluster_rows: clusters.append({'id' : cluster.id, 'name' : cluster.name}) return render_template('/customer/show.html', customer=obj, logs=logs, clusters=clusters)
def _poll_vq_single(self, dname, use_devmode, ddresp): """ Initiate a view query for a view located in a design document :param ddresp: The design document to poll (as JSON) :return: True if successful, False if no views. """ vname = None query = None v_mr = ddresp.get('views', {}) v_spatial = ddresp.get('spatial', {}) if v_mr: vname = single_dict_key(v_mr) query = Query() elif v_spatial: vname = single_dict_key(v_spatial) query = SpatialQuery() if not vname: return False query.stale = STALE_OK query.limit = 1 for r in self._cb.query(dname, vname, use_devmode=use_devmode, query=query): pass return True
def test_mixed_query(self): self.assertRaises(ArgumentError, self.cb.query, "d", "v", query=Query(), limit=10) self.cb.query("d","v", query=Query(limit=5).update(skip=15))
def getAllFactors(): q = Query() q.limit = 1 rows = c.query("dev_factor", "all", query=q) result = list() print rows for row in rows: result.append(row.key) return json.dumps(result)
def search(self, start_ts, end_ts): #print '--> search: ', start_it, end_ts """Searches in Couchbase and finds all documents that were modified or deleted within the timestamp range. """ q = Query(inclusive_end=False) q.mapkey_range = [start_ts, end_ts + q.STRING_RANGE_END] view = View(self.couchbase, "mongo_connect", "by_timestamp", query=q, include_docs=True) for row in view: print row yield result.doc.value
def test_key_query(self): q = Query() q.mapkey_single = ["abbaye_de_maredsous"] ret = self.cb.query("beer", "brewery_beers", query=q) rows = list(ret) self.assertEqual(len(rows), 1) q.mapkey_single = UNSPEC q.mapkey_multi = [["abbaye_de_maredsous"], ["abbaye_de_maredsous", "abbaye_de_maredsous-8"]] ret = self.cb.query("beer", "brewery_beers", query=q) rows = list(ret) self.assertEqual(len(rows), 2)
def getByView(self, parameter): bucket = Bucket(self._bucketUrl) options = Query() options.mapkey_range = (str(parameter), str(parameter)) options.stale = False rows = bucket.query(self.designDocument, self._viewName, query=options) # the resulting row view from bucket.query is [key, value, docid, doc] # since we want docids, select the elements with index 2 docids = [row[2] for row in rows] if len(docids) == 0: return [] results = bucket.get_multi(docids).values() return [result.value for result in results]
def test_row_processor(self): rp = BreweryBeerRowProcessor() q = Query(limit=20) ret = self.cb.query("beer", "brewery_beers", query=q, row_processor=rp, include_docs=True) beers = list(ret) for b in beers: self.assertIsInstance(b, Beer) self.assertIsInstance(b.brewery, Brewery) ret = self.cb.query("beer", "brewery_beers", query=q, row_processor=rp, include_docs=False) list(ret) ret = self.cb.query("beer", "brewery_beers", row_processor=rp, include_docs=False, limit=40) self.assertRaises(ValueError, list, ret)
def brewery_search(): value = request.args.get('value') q = Query() q.mapkey_range = [value, value + Query.STRING_RANGE_END] q.limit = ENTRIES_PER_PAGE ret = [] rp = BreweryRowProcessor() res = db.query("brewery", "by_name", row_processor=rp, query=q, include_docs=True) for brewery in res: ret.append({'id' : brewery.id, 'name' : brewery.name}) return return_search_json(ret)
def beer_search(): value = request.args.get('value') q = Query() q.mapkey_range = [value, value + Query.STRING_RANGE_END] q.limit = ENTRIES_PER_PAGE ret = [] res = db.query("beer", "by_name", row_processor=BeerRowProcessor(), query=q, include_docs=True) for beer in res: ret.append({'id' : beer.id, 'name' : beer.name, 'brewery' : beer.brewery_id}) return return_search_json(ret)
def lookup_all_option_groups(self, menuitem_id): q = Query(stale=False, inclusive_end=True, mapkey_single=menuitem_id) resultset = [] for result in View(self.bucket, 'dev_dsx', 'menu_item_options', query=q): resultset.append(result.value) return resultset
def customer_search(): value = request.args.get('value') q = Query() q.mapkey_range = [value, value + Query.STRING_RANGE_END] q.limit = ENTRIES_PER_PAGE ret = [] rp = RowProcessor(rowclass=CustomerRow) res = db.query("customer", "by_name", row_processor=rp, query=q, include_docs=True) for customer in res: print customer ret.append({'id' : customer.id, 'name' : customer.name}) return return_search_json(ret)
def test_long_uri(self): qobj = Query() qobj.mapkey_multi = [ str(x) for x in xrange(MAX_URI_LENGTH) ] ret = self.cb.query("beer", "brewery_beers", query=qobj) # No assertions, just make sure it didn't break for row in ret: raise Exception("...") # Apparently only the "keys" parameter is supposed to be in POST. # Let's fetch 100 items now keys = [r.key for r in self.cb.query("beer", "brewery_beers", limit=100)] self.assertEqual(100, len(keys)) kslice = keys[90:] self.assertEqual(10, len(kslice)) rows = [x for x in self.cb.query("beer", "brewery_beers", mapkey_multi=kslice, limit=5)] self.assertEqual(5, len(rows)) for row in rows: self.assertTrue(row.key in kslice)
def test_long_uri(self): qobj = Query() qobj.mapkey_multi = [ str(x) for x in range(500) ] ret = self.cb.query("beer", "brewery_beers", query=qobj) # No assertions, just make sure it didn't break for row in ret: raise Exception("...") # Apparently only the "keys" parameter is supposed to be in POST. # Let's fetch 100 items now keys = [r.key for r in self.cb.query("beer", "brewery_beers", limit=100)] self.assertEqual(100, len(keys)) kslice = keys[90:] self.assertEqual(10, len(kslice)) rows = [x for x in self.cb.query("beer", "brewery_beers", mapkey_multi=kslice, limit=5)] self.assertEqual(5, len(rows)) for row in rows: self.assertTrue(row.key in kslice)
def test_range_query(self): q = Query() q.mapkey_range = [["abbaye_de_maredsous"], ["abbaye_de_maredsous", Query.STRING_RANGE_END]] q.inclusive_end = True ret = self.cb.query("beer", "brewery_beers", query=q) rows = list(ret) self.assertEqual(len(rows), 4) q.mapkey_range = [["u"], ["v"]] ret = self.cb.query("beer", "brewery_beers", query=q) self.assertEqual(len(list(ret)), 88) q.mapkey_range = [["u"], ["uppper" + Query.STRING_RANGE_END]] ret = self.cb.query("beer", "brewery_beers", query=q) rows = list(ret) self.assertEqual(len(rows), 56)
def log_search(): value = request.args.get('value') q = Query() q.mapkey_range = [value, value + Query.STRING_RANGE_END] q.limit = ENTRIES_PER_PAGE ret = [] rp = Node_SnapshotListRowProcessor() res = db.query("node_snapshot", "by_name", row_processor=rp, query=q, include_docs=True) for log in res: ret.append({'id' : log.id, 'name' : log.name, 'customer' : log.customer_id}) return return_search_json(ret)
def _assert_vopteq_multi(self, d, key, value): q = Query(**{key: value}) enc = q.encoded res = {} for kvp in enc.split("&"): k, v = kvp.split("=") res[k] = v d = d.copy() for k in d: d[k] = ulp.quote(d[k]) self.assertEqual(res, d)
def get_last_doc(self): #print "--> get_last_doc" """Searches in Couchbase to find the document that was modified or deleted most recently.""" q = Query(descending=True, limit=1) view = View(self.couchbase, "mongo_connect", "by_timestamp", query=q, include_docs=True) for row in view: print row return result.doc.value
def search_tag(query, page): (qcount, acount, tcount, ucount, tag_list) = common_data() tag=query[4:] q=pyes.MatchQuery('tag', tag) question_results=kunjika.es_conn.search(query=q) results=[] for r in question_results: results.append(r['tag']) print r['tag'] questions_list=[] for tag in results: #question_view = urllib2.urlopen( # kunjika.DB_URL + 'questions/_design/dev_qa/_view/get_questions_by_tag?key=' + '"' + tag + '"').read() #question_view = json.loads(question_view)['rows'] #print question_view q = Query(key=tag) for result in View(kunjika.qb, "dev_qa", "get_questions_by_tag", include_docs=True, query=q): #print result questions_list.append(result.doc.value) #for element in question_view['rows']: # questions_list.append(element['value']) for i in questions_list: i['tstamp'] = strftime("%a, %d %b %Y %H:%M", localtime(i['content']['ts'])) user = kunjika.cb.get(i['content']['op']).value i['opname'] = user['name'] pagination = Pagination(page, kunjika.QUESTIONS_PER_PAGE, len(questions_list)) if g.user is None: return render_template('search.html', title='Search results for ' + query, qpage=True, questions=questions_list[(page-1)*kunjika.QUESTIONS_PER_PAGE:], pagination=pagination, qcount=qcount, ucount=ucount, tcount=tcount, acount=acount, tag_list=tag_list, query=query) elif g.user is not None and g.user.is_authenticated(): return render_template('search.html', title='Search results for ' + query, qpage=True, questions=questions_list[(page-1)*kunjika.QUESTIONS_PER_PAGE:], pagination=pagination, qcount=qcount, ucount=ucount, tcount=tcount, acount=acount, tag_list=tag_list, query=query) else: return render_template('search.html', title='Search results for ' + query, qpage=True, questions=questions_list[(page-1)*kunjika.QUESTIONS_PER_PAGE:], pagination=pagination, qcount=qcount, ucount=ucount, tcount=tcount, acount=acount, tag_list=tag_list, query=query)
def __init__(self, parent, design, view, row_processor=None, streaming=0, include_docs=False, query=None, **params): """ Construct a iterable which can be used to iterate over view query results. :param parent: The parent Connection object :type parent: :class:`~couchbase.connection.Connection` :param string design: The design document :param string view: The name of the view within the design document :param callable row_processor: See :attr:`row_processor` for more details. :param boolean include_docs: If set, the document itself will be retrieved for each row in the result. The default algorithm uses :meth:`~couchbase.connection.Connection.get_multi` for each page (i.e. every :attr:`streaming` results). The :attr:`~couchbase.views.params.Query.reduce` family of attributes must not be active, as results fro ``reduce`` views do not have corresponding doc IDs (as these are aggregation functions). :param bool streaming: Whether a streaming chunked request should be used. This is helpful for handling the view results in small chunks rather than loading the entire resultset into memory at once. By default, a single request is made and the response is decoded at once. With streaming enabled, rows are decoded incrementally. :param query: If set, is a :class:`~couchbase.views.params.Query` object. It is illegal to use this in conjunction with additional ``params`` :param params: Extra view options. This may be used to pass view arguments (as defined in :class:`~couchbase.views.params.Query`) without explicitly constructing a :class:`~couchbase.views.params.Query` object. It is illegal to use this together with the ``query`` argument. If you wish to 'inline' additional arguments to the provided ``query`` object, use the query's :meth:`~couchbase.views.params.Query.update` method instead. This object is an iterator - it does not send out the request until the first item from the iterator is request. See :meth:`__iter__` for more details on what this object returns. Simple view query, with no extra options:: # c is the Connection object. for result in View(c, "beer", "brewery_beers"): print("emitted key: {0}, doc_id: {1}" .format(result.key, result.docid)) Execute a view with extra query options:: # Implicitly creates a Query object view = View(c, "beer", "by_location", limit=4, reduce=True, group_level=2) Pass a Query object:: q = Query( stale=False, inclusive_end=True, mapkey_range=[ ["21st_ammendment_brewery_cafe"], ["21st_ammendment_brewery_cafe", Query.STRING_RANGE_END] ] ) view = View(c, "beer", "brewery_beer", query=q) Add extra parameters to query object for single call:: view = View(c, "beer", "brewery_beer", query=q.update(debug=True, copy=True)) Include documents with query:: view = View(c, "beer", "brewery_beer", query=q, include_docs=True) for result in view: print("Emitted key: {0}, Document: {1}".format( result.key, result.doc.value)) """ self._parent = parent self.design = design self.view = view self.errors = [] self.raw = None self.rows_returned = 0 self.include_docs = include_docs self.indexed_rows = 0 if not row_processor: row_processor = RowProcessor() self.row_processor = row_processor self._rp_iter = None if query and params: raise ArgumentError.pyexc( "Extra parameters are mutually exclusive with the " "'query' argument. Use query.update() to add extra arguments") if query: self._query = deepcopy(query) else: self._query = Query.from_any(params) if include_docs: if (self._query.reduce or self._query.group or self._query.group_level): raise ArgumentError.pyexc( "include_docs is only applicable " "for map-only views, but 'reduce', " "'group', or 'group_level' " "was specified", self._query) # The original 'limit' parameter, passed to the query. self._streaming = streaming self._do_iter = True
def listDataset(argv): from couchbase.views.iterator import View from couchbase.views.params import Query from striped.client import CouchBaseBackend from couchbase.exceptions import KeyExistsError, TemporaryFailError, TimeoutError, NotFoundError Usage = """ python listDataset.py -c <CouchBase config file> [-f|-l] <bucket name> <dataset name> """ config_file = None opts, args = getopt.getopt(argv, "c:lfn") opts = dict(opts) config_file = opts.get("-c") files_only = "-f" in opts long_print = "-l" in opts counter = "-n" in opts if len(sys.argv) < 2: print(Usage) sys.exit(1) bucket_name, dataset_name = args backend = CouchBaseBackend(bucket_name, config=config_file) bucket = backend.bucket if False: q = Query() q.mapkey_single = dataset_name v = View(bucket, "views", "RGInfos", query=q) infos = [x.value for x in v if x.key == dataset_name] infos = backend.RGInfos(dataset_name) infos = sorted(infos, key = lambda info: info["RGID"]) if long_print: print("RGID NEvents File(s)") print("------- ---------- -------") nevents = 0 files = {} rgids = set() for info in infos: fn = info["Segments"][0]["FileName"] print("%7d %10d %s" % (info["RGID"], info["NEvents"], fn)) rgids.add(info["RGID"]) files[fn] = 1 for s in info["Segments"][1:]: print("%19s %s" % (" ", s["FileName"])) files[s["FileName"]] = 1 nevents += info["NEvents"] print("------- ---------- -------") print("%7d %10d %d" % (len(infos), nevents, len(files))) maxrgid = max(rgids) if len(rgids) != maxrgid+1: print("Missing RGIDs (%d):" % (maxrgid+1 - len(rgids),)) for rgid in range(maxrgid): if not rgid in rgids: print(rgid, end=' ') print() elif files_only: files = {} # filename -> nevents for info in infos: for s in info["Segments"]: fn = s["FileName"] files[fn] = files.get(fn, 0) + s["NEvents"] for fn in sorted(files.keys()): print(fn) else: files = set() rgids = set() nevents = 0 try: counter = backend.counter("%s:@@nextRGID" % (dataset_name,), delta=0).value except NotFoundError: counter = None for info in infos: rgids.add(info["RGID"]) for s in info["Segments"]: files.add(s["FileName"]) nevents += info["NEvents"] print("Next FrameID: ", counter) print("Files: ", len(files)) print("Frames: ", len(rgids)) print("Events: ", nevents) if len(rgids): print("Max farme id: ", max(rgids)) print("Events/frame: ", int(float(nevents)/float(len(rgids))+0.5)) maxrgid = max(rgids) if len(rgids) < maxrgid + 1: print("Missing RGIDs (%d):" % (maxrgid+1 - len(rgids),)) for rgid in range(maxrgid): if not rgid in rgids: print(rgid, end=' ') print()
def __init__(self, parent, design, view, row_processor=None, streaming=0, include_docs=False, query=None, **params): """ Construct a iterable which can be used to iterate over view query results. :param parent: The parent Connection object :type parent: :class:`~couchbase.connection.Connection` :param string design: The design document :param string view: The name of the view within the design document :param callable row_processor: See :attr:`row_processor` for more details. :param boolean include_docs: If set, the document itself will be retrieved for each row in the result. The default algorithm uses :meth:`~couchbase.connection.Connection.get_multi` for each page (i.e. every :attr:`streaming` results). The :attr:`~couchbase.views.params.Query.reduce` family of attributes must not be active, as results fro ``reduce`` views do not have corresponding doc IDs (as these are aggregation functions). :param bool streaming: Whether a streaming chunked request should be used. This is helpful for handling the view results in small chunks rather than loading the entire resultset into memory at once. By default, a single request is made and the response is decoded at once. With streaming enabled, rows are decoded incrementally. :param query: If set, is a :class:`~couchbase.views.params.Query` object. It is illegal to use this in conjunction with additional ``params`` :param params: Extra view options. This may be used to pass view arguments (as defined in :class:`~couchbase.views.params.Query`) without explicitly constructing a :class:`~couchbase.views.params.Query` object. It is illegal to use this together with the ``query`` argument. If you wish to 'inline' additional arguments to the provided ``query`` object, use the query's :meth:`~couchbase.views.params.Query.update` method instead. This object is an iterator - it does not send out the request until the first item from the iterator is request. See :meth:`__iter__` for more details on what this object returns. Simple view query, with no extra options:: # c is the Connection object. for result in View(c, "beer", "brewery_beers"): print("emitted key: {0}, doc_id: {1}" .format(result.key, result.docid)) Execute a view with extra query options:: # Implicitly creates a Query object view = View(c, "beer", "by_location", limit=4, reduce=True, group_level=2) Pass a Query object:: q = Query( stale=False, inclusive_end=True, mapkey_range=[ ["21st_ammendment_brewery_cafe"], ["21st_ammendment_brewery_cafe", Query.STRING_RANGE_END] ] ) view = View(c, "beer", "brewery_beer", query=q) Add extra parameters to query object for single call:: view = View(c, "beer", "brewery_beer", query=q.update(debug=True, copy=True)) Include documents with query:: view = View(c, "beer", "brewery_beer", query=q, include_docs=True) for result in view: print("Emitted key: {0}, Document: {1}".format( result.key, result.doc.value)) """ self._parent = parent self.design = design self.view = view self.errors = [] self.raw = None self.rows_returned = 0 self.include_docs = include_docs self.indexed_rows = 0 if not row_processor: row_processor = RowProcessor() self.row_processor = row_processor self._rp_iter = None if query and params: raise ArgumentError.pyexc( "Extra parameters are mutually exclusive with the " "'query' argument. Use query.update() to add extra arguments") if query: self._query = deepcopy(query) else: self._query = Query.from_any(params) if include_docs: if (self._query.reduce or self._query.group or self._query.group_level): raise ArgumentError.pyexc("include_docs is only applicable " "for map-only views, but 'reduce', " "'group', or 'group_level' " "was specified", self._query) # The original 'limit' parameter, passed to the query. self._streaming = streaming self._do_iter = True
def next(self, doc): view_name = self.view_sequence.next() params = self.generate_params(**doc)[view_name] params = dict(self.params, **params) return self.DDOC_NAME, view_name, Query(**params)
def query_view(view_name, query_key, query=None): design, v = parse_view_name(view_name) query = query or Query(key=query_key, stale=get_stale()) result = View(connection(), design, v, query=query) result_keys = [x.docid for x in result] return result_keys
def next(self, doc): ddoc_name, view_name = next(self.view_sequence) params = self.generate_params(**doc)[view_name] params = dict(self.params, **params) return ddoc_name, view_name, Query(**params)
def get_uids(self, date_string): query = Query(endkey=date_string, stale=get_stale()) return query_view('deleted_documents', None, query=query)
def test_http_data_streaming(self): q = Query(limit=30, debug=True) self._verify_data( self.cb.query("beer", "brewery_beers", streaming=True, query=q))
def construct_query(self): try: from couchbase.views.params import Query except ImportError: print "Unable to import Couchbase Python Client. \ Please see http://www.couchbase.com/communities/python/getting-started." sys.exit(0) q = Query() query_params = self.query_conf["query_params"] if "stale" in query_params: q.update(stale=query_params["stale"]) if "startkey" in query_params: q.update(startkey=query_params["startkey"]) if "endkey" in query_params: q.update(endkey=query_params["endkey"]) if "mapkey_range" in query_params: q.update(mapkey_range=query_params["mapkey_range"]) if "group" in query_params: q.update(group=query_params["group"]) if "group_level" in query_params: q.update(group_level=query_params["group_level"]) if "reduce" in query_params: q.update(reduce=query_params["reduce"]) if "limit" in query_params: q.update(limit=query_params["limit"]) q.update(connection_timeout=300000) return q