예제 #1
0
def process(status=lambda x:None):

    """Process all of the outstanding raw messages

    Args: 
      status: Function that accepts a string to report status.  Right
        now "status" is just "." for each processed item and "!" for
        each batched save.
    Todo:
      We shuld be using multigets to reduce the load on cassandra.  
    """
    q = common.Que()
    pool, raw, polished, index = common.open_cassandra_connections()
    polished = polished.batch()
    batched_keywords = []
    
    counter = 0
    try:
        while True:
            keywords = process_one_element(q, pool, raw, polished)
            if not keywords:
                if batched_keywords:
                    index.insert(dict_merge(*batched_keywords))
                polished.send()
                break
            counter += 1 
            status('.')
            if counter > BATCH_SIZE:
                status('!')
                if batched_keywords:
                    keywords = dict_merge(*batched_keywords)
                    index.batch_insert(keywords)
                polished.send()
                counter = 0 
        status('done\n')
    except KeyboardInterrupt:
        polished.send()
예제 #2
0
#!/usr/bin/env python
 
import common 

common.open_redis_connection().delete('pending')
pool, _raw, _polished, _index = common.open_cassandra_connections()
for cf in ('RawData', 'PolishedData', 'index'):
    print "Truncating ", cf 
    pool.truncate(cf)
예제 #3
0
 def __init__(self):
     self.que = common.BatchQue()
     self.pool, self.raw, _, _ = common.open_cassandra_connections()
     self.raw = self.raw.batch()
     self.counter = 1