def get_index(self, bucket, index, startkey, endkey=None, return_terms=None, max_results=None, continuation=None, timeout=None, term_regex=None): """ Performs a secondary index query. """ if term_regex and not self.index_term_regex(): raise NotImplementedError("Secondary index term_regex is not " "supported on %s" % self.server_version.vstring) if timeout == 'infinity': timeout = 0 params = {'return_terms': return_terms, 'max_results': max_results, 'continuation': continuation, 'timeout': timeout, 'term_regex': term_regex} bucket_type = self._get_bucket_type(bucket.bucket_type) url = self.index_path(bucket.name, index, startkey, endkey, bucket_type=bucket_type, **params) status, headers, body = self._request('GET', url) self.check_http_code(status, [200]) json_data = json.loads(bytes_to_str(body)) if return_terms and u'results' in json_data: results = [] for result in json_data[u'results'][:]: term, key = list(result.items())[0] results.append((decode_index_value(index, term), key),) else: results = json_data[u'keys'][:] if max_results and u'continuation' in json_data: return (results, json_data[u'continuation']) else: return (results, None)
def decode_timeseries(self, resp, tsobj, convert_timestamp=False): """ Fills an TsObject with the appropriate data and metadata from a TsGetResp / TsQueryResp. :param resp: the protobuf message from which to process data :type resp: kvhosting.pb.riak_ts_pb2.TsQueryRsp or kvhosting.pb.riak_ts_pb2.TsGetResp :param tsobj: a TsObject :type tsobj: TsObject :param convert_timestamp: Convert timestamps to datetime objects :type tsobj: boolean """ if resp.columns is not None: col_names = [] col_types = [] for col in resp.columns: col_names.append(bytes_to_str(col.name)) col_type = self.decode_timeseries_col_type(col.type) col_types.append(col_type) tsobj.columns = TsColumns(col_names, col_types) tsobj.rows = [] if resp.rows is not None: for row in resp.rows: tsobj.rows.append( self.decode_timeseries_row(row, resp.columns, convert_timestamp))
def get_buckets(self, transport, bucket_type=None, timeout=None): """ get_buckets(bucket_type=None, timeout=None) Get the list of buckets as :class:`RiakBucket <riak.bucket.RiakBucket>` instances. .. warning:: Do not use this in production, as it requires traversing through all keys stored in a cluster. .. note:: This request is automatically retried :attr:`retries` times if it fails due to network error. :param bucket_type: the optional containing bucket type :type bucket_type: :class:`~riak.bucket.BucketType` :param timeout: a timeout value in milliseconds :type timeout: int :rtype: list of :class:`RiakBucket <riak.bucket.RiakBucket>` instances """ _validate_timeout(timeout) if bucket_type: bucketfn = self._bucket_type_bucket_builder else: bucketfn = self._default_type_bucket_builder return [ bucketfn(bytes_to_str(name), bucket_type) for name in transport.get_buckets(bucket_type=bucket_type, timeout=timeout) ]
def maybe_riak_error(self, msg_code, data=None): if msg_code == kvhosting.pb.messages.MSG_CODE_ERROR_RESP: if data is None: raise RiakError('no error provided!') else: err = parse_pbuf_msg(msg_code, data) raise RiakError(bytes_to_str(err.errmsg))
def mkpath(*segments, **query): """ Constructs the path & query portion of a URI from path segments and a dict. """ # Remove empty segments (e.g. no key specified) segments = [bytes_to_str(s) for s in segments if s is not None] # Join the segments into a path pathstring = '/'.join(segments) # Remove extra slashes pathstring = re.sub('/+', '/', pathstring) # Add the query string if it exists _query = {} for key in query: if query[key] in [False, True]: _query[key] = str(query[key]).lower() elif query[key] is not None: if PY2 and isinstance(query[key], unicode): # noqa _query[key] = query[key].encode('utf-8') else: _query[key] = query[key] if len(_query) > 0: pathstring += "?" + urlencode(_query) if not pathstring.startswith('/'): pathstring = '/' + pathstring return pathstring
def decode_bucket_props(self, msg): """ Decodes the protobuf bucket properties message into a dict. :param msg: the protobuf message to decode :type msg: kvhosting.pb.riak_pb2.RpbBucketProps :rtype dict """ props = {} for prop in NORMAL_PROPS: if msg.HasField(prop): props[prop] = getattr(msg, prop) if isinstance(props[prop], bytes): props[prop] = bytes_to_str(props[prop]) for prop in COMMIT_HOOK_PROPS: if getattr(msg, 'has_' + prop): props[prop] = self.decode_hooklist(getattr(msg, prop)) for prop in MODFUN_PROPS: if msg.HasField(prop): props[prop] = self.decode_modfun(getattr(msg, prop)) for prop in QUORUM_PROPS: if msg.HasField(prop): props[prop] = self.decode_quorum(getattr(msg, prop)) if msg.HasField('repl'): props['repl'] = REPL_TO_PY[msg.repl] return props
def test_encode_data_for_listkeys(self): c = PbufCodec(client_timeouts=True) msg = c.encode_timeseries_listkeysreq(self.table, 1234) req = riak.pb.riak_ts_pb2.TsListKeysReq() req.ParseFromString(msg.data) self.assertEqual(self.table.name, bytes_to_str(req.table)) self.assertEqual(1234, req.timeout)
def next(self): response = super(PbufMapredStream, self).next() if response.done and not response.HasField('response'): raise StopIteration return response.phase, json.loads(bytes_to_str(response.response))
def next(self): response = super(PbufIndexStream, self).next() if response.done and not (response.keys or response.results or response.continuation): raise StopIteration if self.return_terms and response.results: return [(decode_index_value(self.index, r.key), bytes_to_str(r.value)) for r in response.results] elif response.keys: if PY2: return response.keys[:] else: return [bytes_to_str(key) for key in response.keys] elif response.continuation: return CONTINUATION(bytes_to_str(response.continuation))
def decode_index_req(self, resp, index, return_terms=None, max_results=None): if return_terms and resp.results: results = [(decode_index_value(index, pair.key), bytes_to_str(pair.value)) for pair in resp.results] else: results = resp.keys[:] if six.PY3: results = [bytes_to_str(key) for key in resp.keys] if max_results is not None and resp.HasField('continuation'): return (results, bytes_to_str(resp.continuation)) else: return (results, None)
def stream_buckets(self, bucket_type=None, timeout=None): """ Streams the list of buckets. This is a generator method that should be iterated over. .. warning:: Do not use this in production, as it requires traversing through all keys stored in a cluster. The caller should explicitly close the returned iterator, either using :func:`contextlib.closing` or calling ``close()`` explicitly. Consuming the entire iterator will also close the stream. If it does not, the associated connection might not be returned to the pool. Example:: from contextlib import closing # Using contextlib.closing with closing(client.stream_buckets()) as buckets: for bucket_list in buckets: do_something(bucket_list) # Explicit close() stream = client.stream_buckets() for bucket_list in stream: do_something(bucket_list) stream.close() :param bucket_type: the optional containing bucket type :type bucket_type: :class:`~riak.bucket.BucketType` :param timeout: a timeout value in milliseconds :type timeout: int :rtype: iterator that yields lists of :class:`RiakBucket <riak.bucket.RiakBucket>` instances """ _validate_timeout(timeout) if bucket_type: bucketfn = self._bucket_type_bucket_builder else: bucketfn = self._default_type_bucket_builder resource = self._acquire() transport = resource.object stream = transport.stream_buckets(bucket_type=bucket_type, timeout=timeout) stream.attach(resource) try: for bucket_list in stream: bucket_list = [ bucketfn(bytes_to_str(name), bucket_type) for name in bucket_list ] if len(bucket_list) > 0: yield bucket_list finally: stream.close()
def stats(self): """ Gets performance statistics and server information """ status, _, body = self._request('GET', self.stats_path(), {'Accept': 'application/json'}) if status == 200: return json.loads(bytes_to_str(body)) else: return None
def decode_map_value(self, entries): out = {} for entry in entries: name = bytes_to_str(entry.field.name[:]) dtype = MAP_FIELD_TYPES[entry.field.type] if dtype == 'counter': value = entry.counter_value elif dtype == 'set': value = self.decode_set_value(entry.set_value) elif dtype == 'register': value = bytes_to_str(entry.register_value[:]) elif dtype == 'flag': value = entry.flag_value elif dtype == 'map': value = self.decode_map_value(entry.map_value) else: raise ValueError( 'Map may not contain datatype: {}'.format(dtype)) out[(name, dtype)] = value return out
def decode_put(self, robj, resp): if resp is not None: if resp.HasField('key'): robj.key = bytes_to_str(resp.key) if resp.HasField("vclock"): robj.vclock = VClock(resp.vclock, 'binary') if resp.content: self.decode_contents(resp.content, robj) elif not robj.key: raise RiakError("missing response object") return robj
def get_bucket_type_props(self, bucket_type): """ Get properties for a bucket-type """ self._check_bucket_types(bucket_type) url = self.bucket_type_properties_path(bucket_type.name) status, headers, body = self._request('GET', url) if status == 200: props = json.loads(bytes_to_str(body)) return props['props'] else: raise RiakError('Error getting bucket-type properties.')
def decode_index_value(self, index, value): """ Decodes a secondary index value into the correct Python type. :param index: the name of the index :type index: str :param value: the value of the index entry :type value: str :rtype str or int """ if index.endswith("_int"): return int(value) else: return bytes_to_str(value)
def decode_hook(self, hook): """ Decodes a protobuf commit hook message into a dict. Used in bucket properties. :param hook: the hook to decode :type hook: kvhosting.pb.riak_pb2.RpbCommitHook :rtype dict """ if hook.HasField('modfun'): return self.decode_modfun(hook.modfun) else: return {'name': bytes_to_str(hook.name)}
def get_buckets(self, bucket_type=None, timeout=None): """ Fetch a list of all buckets """ bucket_type = self._get_bucket_type(bucket_type) url = self.bucket_list_path(bucket_type=bucket_type, timeout=timeout) status, headers, body = self._request('GET', url) if status == 200: props = json.loads(bytes_to_str(body)) return props['buckets'] else: raise RiakError('Error getting buckets.')
def decode_link(self, link): """ Decodes an RpbLink message into a tuple :param link: an RpbLink message :type link: kvhosting.pb.riak_pb2.RpbLink :rtype tuple """ if link.HasField("bucket"): bucket = bytes_to_str(link.bucket) else: bucket = None if link.HasField("key"): key = bytes_to_str(link.key) else: key = None if link.HasField("tag"): tag = bytes_to_str(link.tag) else: tag = None return (bucket, key, tag)
def get_keys(self, bucket, timeout=None): """ Fetch a list of keys for the bucket """ bucket_type = self._get_bucket_type(bucket.bucket_type) url = self.key_list_path(bucket.name, bucket_type=bucket_type, timeout=timeout) status, _, body = self._request('GET', url) if status == 200: props = json.loads(bytes_to_str(body)) return props['keys'] else: raise RiakError('Error listing keys.')
def decode_preflist(self, item): """ Decodes a preflist response :param preflist: a bucket/key preflist :type preflist: list of kvhosting.pb.riak_kv_pb2.RpbBucketKeyPreflistItem :rtype dict """ result = { 'partition': item.partition, 'node': bytes_to_str(item.node), 'primary': item.primary } return result
def stream_keys(self, bucket, timeout=None): """ Lists all keys in a bucket via a stream. This is a generator method which should be iterated over. .. warning:: Do not use this in production, as it requires traversing through all keys stored in a cluster. The caller should explicitly close the returned iterator, either using :func:`contextlib.closing` or calling ``close()`` explicitly. Consuming the entire iterator will also close the stream. If it does not, the associated connection might not be returned to the pool. Example:: from contextlib import closing # Using contextlib.closing with closing(client.stream_keys(mybucket)) as keys: for key_list in keys: do_something(key_list) # Explicit close() stream = client.stream_keys(mybucket) for key_list in stream: do_something(key_list) stream.close() :param bucket: the bucket whose properties will be set :type bucket: RiakBucket :param timeout: a timeout value in milliseconds :type timeout: int :rtype: iterator """ _validate_timeout(timeout) resource = self._acquire() transport = resource.object stream = transport.stream_keys(bucket, timeout=timeout) stream.attach(resource) try: for keylist in stream: if len(keylist) > 0: if six.PY2: yield keylist else: yield [bytes_to_str(item) for item in keylist] finally: stream.close()
def update_datatype(self, datatype, **options): if not self.datatypes(): raise NotImplementedError('Datatypes are not supported.') if datatype.bucket.bucket_type.is_default(): raise NotImplementedError( 'Datatypes cannot be used in the default bucket-type.') op = datatype.to_op() context = datatype.context type_name = datatype.type_name if not op: raise ValueError("No operation to send on datatype {!r}". format(datatype)) if type_name not in ('counter', 'set', 'hll', 'map'): raise TypeError("Cannot send operation on datatype {!r}". format(type_name)) if 'return_body' in options: options['returnbody'] = options['return_body'] url = self.datatypes_path(datatype.bucket.bucket_type.name, datatype.bucket.name, datatype.key, **options) headers = {'Content-Type': 'application/json'} opdict = self._encode_dt_op(type_name, op) if context: opdict['context'] = context payload = json.dumps(opdict) status, headers, body = self._request('POST', url, headers, payload) self.check_http_code(status, [200, 201, 204]) if status == 201: datatype.key = headers['location'].strip().split('/')[-1] if status != 204: response = json.loads(bytes_to_str(body)) datatype._context = response.get('context') datatype._set_value(self._decode_datatype(type_name, response['value'])) return True
def get_resources(self): """ Gets a JSON mapping of server-side resource names to paths :rtype dict """ status, _, body = self._request('GET', '/', {'Accept': 'application/json'}) if status == 200: tmp, resources = json.loads(bytes_to_str(body)), {} for k in tmp: # The keys and values returned by json.loads() are unicode, # which will cause problems when passed into httplib later # (expecting bytes both in Python 2.x and 3.x). # We just encode the resource paths into bytes, with an # encoding consistent with what the resources module expects. resources[k] = tmp[k].encode('utf-8') return resources else: return {}
def fetch_datatype(self, bucket, key, **options): if not self.datatypes(): raise NotImplementedError("Datatypes are not supported.") if bucket.bucket_type.is_default(): raise NotImplementedError( 'Datatypes cannot be used in the default bucket-type.') url = self.datatypes_path(bucket.bucket_type.name, bucket.name, key, **options) status, headers, body = self._request('GET', url) self.check_http_code(status, [200, 404]) response = json.loads(bytes_to_str(body)) dtype = response['type'] if status == 404: return (dtype, None, None) else: return (dtype, self._decode_datatype(dtype, response['value']), response.get('context'))
def mapred(self, inputs, query, timeout=None): """ Run a MapReduce query. """ # Construct the job, optionally set the timeout... content = self._construct_mapred_json(inputs, query, timeout) # Do the request... url = self.mapred_path() headers = {'Content-Type': 'application/json'} status, headers, body = self._request('POST', url, headers, content) # Make sure the expected status code came back... if status != 200: raise RiakError( 'Error running MapReduce operation. Headers: %s Body: %s' % (repr(headers), repr(body))) result = json.loads(bytes_to_str(body)) return result
def get_preflist(self, bucket, key): """ Get the preflist for a bucket/key :param bucket: Riak Bucket :type bucket: :class:`~kvhosting.bucket.RiakBucket` :param key: Riak Key :type key: string :rtype: list of dicts """ if not self.preflists(): raise NotImplementedError("fetching preflists is not supported.") bucket_type = self._get_bucket_type(bucket.bucket_type) url = self.preflist_path(bucket.name, key, bucket_type=bucket_type) status, headers, body = self._request('GET', url) if status == 200: preflist = json.loads(bytes_to_str(body)) return preflist['preflist'] else: raise RiakError('Error getting bucket/key preflist.')
def list_search_indexes(self): """ Return a list of Solr search indexes from Yokozuna. :rtype list of dicts """ if not self.yz_wm_index: raise NotImplementedError("Search 2.0 administration is not " "supported for this version") url = self.search_index_path() # Run the request... status, headers, body = self._request('GET', url) if status == 200: json_data = json.loads(bytes_to_str(body)) # Return a list of dictionaries return json_data else: raise RiakError('Error getting Search 2.0 index.')
def get_search_index(self, index): """ Fetch the specified Solr search index for Yokozuna. :param index: a name of a yz index :type index: string :rtype string """ if not self.yz_wm_index: raise NotImplementedError("Search 2.0 administration is not " "supported for this version") url = self.search_index_path(index) # Run the request... status, headers, body = self._request('GET', url) if status == 200: return json.loads(bytes_to_str(body)) else: raise RiakError('Error getting Search 2.0 index.')
def decode_content(self, rpb_content, sibling): """ Decodes a single sibling from the protobuf representation into a RiakObject. :param rpb_content: a single RpbContent message :type rpb_content: kvhosting.pb.riak_pb2.RpbContent :param sibling: a RiakContent sibling container :type sibling: RiakContent :rtype: RiakContent """ if rpb_content.HasField("deleted") and rpb_content.deleted: sibling.exists = False else: sibling.exists = True if rpb_content.HasField("content_type"): sibling.content_type = bytes_to_str(rpb_content.content_type) if rpb_content.HasField("charset"): sibling.charset = bytes_to_str(rpb_content.charset) if rpb_content.HasField("content_encoding"): sibling.content_encoding = \ bytes_to_str(rpb_content.content_encoding) if rpb_content.HasField("vtag"): sibling.etag = bytes_to_str(rpb_content.vtag) sibling.links = [self.decode_link(link) for link in rpb_content.links] if rpb_content.HasField("last_mod"): sibling.last_modified = float(rpb_content.last_mod) if rpb_content.HasField("last_mod_usecs"): sibling.last_modified += rpb_content.last_mod_usecs / 1000000.0 sibling.usermeta = dict([(bytes_to_str(usermd.key), bytes_to_str(usermd.value)) for usermd in rpb_content.usermeta]) sibling.indexes = set([(bytes_to_str(index.key), decode_index_value(index.key, index.value)) for index in rpb_content.indexes]) sibling.encoded_data = rpb_content.value return sibling