class RiakStorage(AbstractStorage):

    bucket_types = {
        'users':         'egara-lww',
        'users-current': 'egara-unique',
        'imap-events':   'egara-lww',
        'imap-folders':  'egara-lww',
        'imap-folders-current':  'egara-unique',
        'imap-message-timeline': 'egara-lww'
    }

    def __init__(self, *args, **kw):
        riak_host = 'localhost'
        riak_port = 8098

        self.client = RiakClient(
            protocol='http',
            host=conf['STORAGE'].get('riak_host', riak_host),
            http_port=conf['STORAGE'].get('riak_port', riak_port)
        )
        self.client.set_decoder('application/octet-stream', self._decode_binary)
        self.users_cache = CachedDict(ttl=10)

    def _decode_binary(self, data):
        return str(data).encode("utf-8")

    def _get_bucket(self, bucketname):
        _type = self.bucket_types.get(bucketname, None)
        if _type:
            return self.client.bucket_type(_type).bucket(bucketname)

        return None


    def get(self, key, index, doctype=None, fields=None, **kw):
        """
            Standard API for accessing key/value storage
        """
        result = None
        log.debug("Riak get key %r from %r", key, index)

        try:
            bucket = self._get_bucket(index)
            res = bucket.get(key)
            if res and res.data:
                result = res.data

        except Exception, e:
            log.warning("Riak exception: %s", str(e))
            result = None

        return result
Esempio n. 2
0
class RiakTSFinder(object):
    def __init__(self, config):
        self.config = config
        self.riak = RiakClient(host=config['riak_ts']['host'],port=config['riak_ts']['port'])

    def find_nodes(self, query):
        #print vars(query)
        bucket = self.riak.bucket_type('default').bucket('metric_nodes')

        exact = bucket.get("node-%s" % query.pattern)
        nodes = []
        if exact.exists:
            yield LeafNode(query.pattern,RiakTSReader(query.pattern, self.riak, self.config))
        else:
            pattern = query.pattern
            pattern = re.sub('\.select metric','',pattern)
            # If there's not a star in the pattern:
            if re.match('^[^*]*$', pattern):
              # remove any possible trailing .'s, and add a .* at the end
              pattern = re.sub('\.*$', '.*', pattern)
            # Replace all embeded *'s with a Solr regex compatible wildcard, scoped between .'s: [^.]*
            pattern = re.sub('\*','[^.]*', pattern)
            print "Solr pattern: %s" % pattern
            results = bucket.search("branch_s:/%s/" % pattern, index='metric_nodes', rows=1000000)

            print(results)
            if results['num_found'] > 0:
                print "Branch search results"
                for doc in results['docs']:
                    branch = bucket.get(doc['_yz_rk'])
                    branch_node = BranchNode(branch.data['branch_s'])
                    print "BranchNode: name: %s, path: %s" % (branch_node.name, branch_node.path)
                    yield branch_node
            else:
                node_results = bucket.search("node_s:/%s/" % pattern, index='metric_nodes', rows=1000000)
                print "Node search results"
                print(node_results['docs'])
                for doc in node_results['docs']:
                    node = bucket.get(doc['_yz_rk'])
                    node_name = node.data['node_s']
                    print "Node: %s" % node_name
                    yield LeafNode(node_name, RiakTSReader(node_name, self.riak, self.config))
Esempio n. 3
0
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

from riak import RiakClient

client = RiakClient(protocol='pbc',
                    nodes=[{
                        'host': '172.17.0.2',
                        'http_port': 8098,
                        'pb_port': 8087
                    }])
#client.retries = 3

bucket = client.bucket_type('mfn_counter_trigger').bucket('counter_triggers')

# Use the code below when mapper wants to create a k-of-n parallel execution
# topic, key, value are separated by ";", and are used for trigger to publish <key, value> to kafka's topic.
# topic and key must not contain the character ";".  value can contain the character ";".
counter_name = 'topic;key;value'
k = 3
counter = bucket.new(counter_name)
counter.increment(k)
counter.store()

# Use the code below when a finishing parallel execution needs to decrease the counter by 1
# topic, key, value are separated by ";", and are used for trigger to publish <key, value> to kafka's topic.
# topic and key must not contain the character ";".  value can contain the character ";".
counter_name = 'topic;key;value'
counter = bucket.get(counter_name)
Esempio n. 4
0
from riak import RiakClient, RiakNode

client = RiakClient(protocol='pbc', host='127.0.0.1', http_port=8098)
print(client.ping())
tweetBucket = client.bucket("tweets")
print(tweetBucket.get_properties())
print(client.get_buckets())
print(client.bucket_type("tweets"))
allKeysInTweets = client.get_keys(tweetBucket)
print("Number of keys... ",len(allKeysInTweets))


stream = client.stream_keys(tweetBucket)
for key_list in stream:
     print("key_list;" , key_list)
stream.close()
Esempio n. 5
0
)

print("df is: {0}".format(df))
print("test")
print("df is: {0}".format(df.to_dict()))

# client = RiakClient(pb_port=8087, protocol='pbc')

# Single Node
# client = RiakClient(protocol='http', host='127.0.0.1', http_port=8098)

# A Cluster of Nodes
client = RiakClient(nodes=[{'host': '127.0.0.1', 'http_port': 8098}])

# bucket = client.bucket_type('news').bucket('hscicNews')
bucket = client.bucket_type('news').bucket('hscicNews')

custom_search = [
    'Care|Quality|Commission', 'September|2004',
    'general|population|generally', 'Care Quality Commission|admission',
    'general population|Alzheimer'
]

val = []

for x, i in enumerate(custom_search, 0):
    regex = re.compile(i)
    val.append(my_query(regex, get_directory()))

keys = bucket.new('RESULT', data=df.to_dict())
Esempio n. 6
0
class DbEngine(object):
    def __init__(self):
        self.client = RiakClient(pb_port=8087, protocol='pbc')

    def add_query(self, user_id, query, bucket_name='user_queries'):
        """
        Adds query to database.

        :param int user_id: Id of user associated with the query.
        :param str query: Query to be saved into database.
        """

        bucket = self.client.bucket_type('set').bucket(bucket_name)
        queries_bucket = Set(bucket, str(user_id))
        queries_bucket.add(str(query))
        queries_bucket.store()
        bucket = self.client.bucket_type('set').bucket('all_queries')
        queries_bucket = Set(bucket, 'queries')
        queries_bucket.add(str(query))
        queries_bucket.store()

    def get_user_queries(self, user_id, bucket_name='user_queries'):
        """
        Retrieves user queries form database.

        :param int user_id: Id of user associated with the query.
        :return: list of user queries.
        """

        bucket = self.client.bucket_type('set').bucket(bucket_name)
        queries_bucket = Set(bucket, str(user_id))
        queries_bucket.reload()
        return queries_bucket.value

    def get_all_queries(self, bucket_name='all_queries'):
        """
        Retrieves all queries form database.

        :return: list of all queries.
        """

        bucket = self.client.bucket_type('set').bucket(bucket_name)
        queries_bucket = Set(bucket, 'queries')
        queries_bucket.reload()
        return queries_bucket.value

    def add_keywords(self, query, keywords, bucket_name='keywords'):
        """
        Adds keywords for given query to database.

        :param str query: Query associated with keywords.
        :param list keywords: List of keywords produced from the query.
        """

        bucket = self.client.bucket_type('set').bucket(bucket_name)
        keywords_bucket = Set(bucket, str(query))
        for keyword in keywords:
            keywords_bucket.add(str(keyword))
        keywords_bucket.store()

    def get_keywords(self, query, bucket_name='keywords'):
        """
        Retrieves all keywords associated with given query form database.

        :return: list of keywords.
        """

        bucket = self.client.bucket_type('set').bucket(bucket_name)
        keywords_bucket = Set(bucket, str(query))
        keywords_bucket.reload()
        return keywords_bucket.value

    def add_url(self, query, url, bucket_name='urls'):
        """
        Adds url for given query to database.

        :param str query: Query associated with url.
        :param str url: URL of page satisfying search requirements.
        """

        bucket = self.client.bucket_type('set').bucket(bucket_name)
        urls_bucket = Set(bucket, str(query))
        urls_bucket.add(str(url))
        urls_bucket.store()

    def get_urls(self, query, bucket_name='urls'):
        """
        Retrieves all URLs associated with given query form database.

        :return: list of URLs.
        """

        urls = set()
        bucket = self.client.bucket_type('set').bucket(bucket_name)
        urls_bucket = Set(bucket, str(query))
        urls_bucket.reload()
        urls |= urls_bucket.value
        return urls
Esempio n. 7
0
from riak import RiakClient
import riak

client = RiakClient(protocol='pbc', host='127.0.0.1', http_port=8098)


bucket = client.bucket_type('tweetsSchema').bucket('tweets')
print("bucket content" ,bucket)

print(client.fulltext_search("tweets","*:*"))