class RiakStorage(AbstractStorage): bucket_types = { 'users': 'egara-lww', 'users-current': 'egara-unique', 'imap-events': 'egara-lww', 'imap-folders': 'egara-lww', 'imap-folders-current': 'egara-unique', 'imap-message-timeline': 'egara-lww' } def __init__(self, *args, **kw): riak_host = 'localhost' riak_port = 8098 self.client = RiakClient( protocol='http', host=conf['STORAGE'].get('riak_host', riak_host), http_port=conf['STORAGE'].get('riak_port', riak_port) ) self.client.set_decoder('application/octet-stream', self._decode_binary) self.users_cache = CachedDict(ttl=10) def _decode_binary(self, data): return str(data).encode("utf-8") def _get_bucket(self, bucketname): _type = self.bucket_types.get(bucketname, None) if _type: return self.client.bucket_type(_type).bucket(bucketname) return None def get(self, key, index, doctype=None, fields=None, **kw): """ Standard API for accessing key/value storage """ result = None log.debug("Riak get key %r from %r", key, index) try: bucket = self._get_bucket(index) res = bucket.get(key) if res and res.data: result = res.data except Exception, e: log.warning("Riak exception: %s", str(e)) result = None return result
class RiakTSFinder(object): def __init__(self, config): self.config = config self.riak = RiakClient(host=config['riak_ts']['host'],port=config['riak_ts']['port']) def find_nodes(self, query): #print vars(query) bucket = self.riak.bucket_type('default').bucket('metric_nodes') exact = bucket.get("node-%s" % query.pattern) nodes = [] if exact.exists: yield LeafNode(query.pattern,RiakTSReader(query.pattern, self.riak, self.config)) else: pattern = query.pattern pattern = re.sub('\.select metric','',pattern) # If there's not a star in the pattern: if re.match('^[^*]*$', pattern): # remove any possible trailing .'s, and add a .* at the end pattern = re.sub('\.*$', '.*', pattern) # Replace all embeded *'s with a Solr regex compatible wildcard, scoped between .'s: [^.]* pattern = re.sub('\*','[^.]*', pattern) print "Solr pattern: %s" % pattern results = bucket.search("branch_s:/%s/" % pattern, index='metric_nodes', rows=1000000) print(results) if results['num_found'] > 0: print "Branch search results" for doc in results['docs']: branch = bucket.get(doc['_yz_rk']) branch_node = BranchNode(branch.data['branch_s']) print "BranchNode: name: %s, path: %s" % (branch_node.name, branch_node.path) yield branch_node else: node_results = bucket.search("node_s:/%s/" % pattern, index='metric_nodes', rows=1000000) print "Node search results" print(node_results['docs']) for doc in node_results['docs']: node = bucket.get(doc['_yz_rk']) node_name = node.data['node_s'] print "Node: %s" % node_name yield LeafNode(node_name, RiakTSReader(node_name, self.riak, self.config))
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from riak import RiakClient client = RiakClient(protocol='pbc', nodes=[{ 'host': '172.17.0.2', 'http_port': 8098, 'pb_port': 8087 }]) #client.retries = 3 bucket = client.bucket_type('mfn_counter_trigger').bucket('counter_triggers') # Use the code below when mapper wants to create a k-of-n parallel execution # topic, key, value are separated by ";", and are used for trigger to publish <key, value> to kafka's topic. # topic and key must not contain the character ";". value can contain the character ";". counter_name = 'topic;key;value' k = 3 counter = bucket.new(counter_name) counter.increment(k) counter.store() # Use the code below when a finishing parallel execution needs to decrease the counter by 1 # topic, key, value are separated by ";", and are used for trigger to publish <key, value> to kafka's topic. # topic and key must not contain the character ";". value can contain the character ";". counter_name = 'topic;key;value' counter = bucket.get(counter_name)
from riak import RiakClient, RiakNode client = RiakClient(protocol='pbc', host='127.0.0.1', http_port=8098) print(client.ping()) tweetBucket = client.bucket("tweets") print(tweetBucket.get_properties()) print(client.get_buckets()) print(client.bucket_type("tweets")) allKeysInTweets = client.get_keys(tweetBucket) print("Number of keys... ",len(allKeysInTweets)) stream = client.stream_keys(tweetBucket) for key_list in stream: print("key_list;" , key_list) stream.close()
) print("df is: {0}".format(df)) print("test") print("df is: {0}".format(df.to_dict())) # client = RiakClient(pb_port=8087, protocol='pbc') # Single Node # client = RiakClient(protocol='http', host='127.0.0.1', http_port=8098) # A Cluster of Nodes client = RiakClient(nodes=[{'host': '127.0.0.1', 'http_port': 8098}]) # bucket = client.bucket_type('news').bucket('hscicNews') bucket = client.bucket_type('news').bucket('hscicNews') custom_search = [ 'Care|Quality|Commission', 'September|2004', 'general|population|generally', 'Care Quality Commission|admission', 'general population|Alzheimer' ] val = [] for x, i in enumerate(custom_search, 0): regex = re.compile(i) val.append(my_query(regex, get_directory())) keys = bucket.new('RESULT', data=df.to_dict())
class DbEngine(object): def __init__(self): self.client = RiakClient(pb_port=8087, protocol='pbc') def add_query(self, user_id, query, bucket_name='user_queries'): """ Adds query to database. :param int user_id: Id of user associated with the query. :param str query: Query to be saved into database. """ bucket = self.client.bucket_type('set').bucket(bucket_name) queries_bucket = Set(bucket, str(user_id)) queries_bucket.add(str(query)) queries_bucket.store() bucket = self.client.bucket_type('set').bucket('all_queries') queries_bucket = Set(bucket, 'queries') queries_bucket.add(str(query)) queries_bucket.store() def get_user_queries(self, user_id, bucket_name='user_queries'): """ Retrieves user queries form database. :param int user_id: Id of user associated with the query. :return: list of user queries. """ bucket = self.client.bucket_type('set').bucket(bucket_name) queries_bucket = Set(bucket, str(user_id)) queries_bucket.reload() return queries_bucket.value def get_all_queries(self, bucket_name='all_queries'): """ Retrieves all queries form database. :return: list of all queries. """ bucket = self.client.bucket_type('set').bucket(bucket_name) queries_bucket = Set(bucket, 'queries') queries_bucket.reload() return queries_bucket.value def add_keywords(self, query, keywords, bucket_name='keywords'): """ Adds keywords for given query to database. :param str query: Query associated with keywords. :param list keywords: List of keywords produced from the query. """ bucket = self.client.bucket_type('set').bucket(bucket_name) keywords_bucket = Set(bucket, str(query)) for keyword in keywords: keywords_bucket.add(str(keyword)) keywords_bucket.store() def get_keywords(self, query, bucket_name='keywords'): """ Retrieves all keywords associated with given query form database. :return: list of keywords. """ bucket = self.client.bucket_type('set').bucket(bucket_name) keywords_bucket = Set(bucket, str(query)) keywords_bucket.reload() return keywords_bucket.value def add_url(self, query, url, bucket_name='urls'): """ Adds url for given query to database. :param str query: Query associated with url. :param str url: URL of page satisfying search requirements. """ bucket = self.client.bucket_type('set').bucket(bucket_name) urls_bucket = Set(bucket, str(query)) urls_bucket.add(str(url)) urls_bucket.store() def get_urls(self, query, bucket_name='urls'): """ Retrieves all URLs associated with given query form database. :return: list of URLs. """ urls = set() bucket = self.client.bucket_type('set').bucket(bucket_name) urls_bucket = Set(bucket, str(query)) urls_bucket.reload() urls |= urls_bucket.value return urls
from riak import RiakClient import riak client = RiakClient(protocol='pbc', host='127.0.0.1', http_port=8098) bucket = client.bucket_type('tweetsSchema').bucket('tweets') print("bucket content" ,bucket) print(client.fulltext_search("tweets","*:*"))