def test_watcher(redis_connection): redis_connection.time = RedisTime() rds = redis_connection queue_name = get_random_id() out_queue = NamedQueue(queue_name, rds) try: # Create a server and hijack its running flag and the current time in 'redis' client = WatcherClient(rds) server = WatcherServer(rds, rds) server.running = ToggleTrue() rds.time.current = 0 assert out_queue.length() == 0 # Send a simple event to occur soon client.touch(10, 'one-second', queue_name, {'first': 'one'}) server.try_run() assert out_queue.length() == 0 # Nothing yet rds.time.current = 12 # Jump forward 12 seconds server.try_run() assert out_queue.length() == 1 assert out_queue.pop() == {'first': 'one'} # Send a simple event to occur soon, then change our mind client.touch(10, 'one-second', queue_name, {'first': 'one'}) client.touch(20, 'one-second', queue_name, {'first': 'one'}) server.try_run() assert out_queue.length() == 0 # Nothing yet # Set events to occur, in inverse order, reuse a key, overwrite content and timeout client.touch(200, 'one-second', queue_name, {'first': 'last'}) client.touch(100, '100-second', queue_name, {'first': '100'}) client.touch(50, '50-second', queue_name, {'first': '50'}) server.try_run() assert out_queue.length() == 0 # Nothing yet for _ in range(15): rds.time.current += 20 server.try_run() assert out_queue.length() == 3 assert out_queue.pop() == {'first': '50'} assert out_queue.pop() == {'first': '100'} assert out_queue.pop() == {'first': 'last'} # Send a simple event to occur soon, then stop it rds.time.current = 0 client.touch(10, 'one-second', queue_name, {'first': 'one'}) server.try_run() assert out_queue.length() == 0 # Nothing yet client.clear('one-second') rds.time.current = 12 # Jump forward 12 seconds server.try_run() assert out_queue.length() == 0 # still nothing because it was cleared finally: out_queue.delete()
def test_get_message(datastore, client): notification_queue = get_random_id() queue = NamedQueue("nq-%s" % notification_queue, host=config.core.redis.persistent.host, port=config.core.redis.persistent.port) queue.delete() msg = random_model_obj(Submission).as_primitives() queue.push(msg) res = client.ingest.get_message(notification_queue) assert isinstance(res, dict) assert 'sid' in res assert 'results' in res assert res == msg
def test_get_message_list(datastore, client): notification_queue = get_random_id() queue = NamedQueue("nq-%s" % notification_queue, host=config.core.redis.persistent.host, port=config.core.redis.persistent.port) queue.delete() msg_0 = random_model_obj(Submission).as_primitives() queue.push(msg_0) msg_1 = random_model_obj(Submission).as_primitives() queue.push(msg_1) res = client.ingest.get_message_list(notification_queue) assert len(res) == 2 assert res[0] == msg_0 assert res[1] == msg_1
class DistributedBackup(object): def __init__(self, working_dir, worker_count=50, spawn_workers=True, use_threading=False, logger=None): self.working_dir = working_dir self.datastore = forge.get_datastore(archive_access=True) self.logger = logger self.plist = [] self.use_threading = use_threading self.instance_id = get_random_id() self.worker_queue = NamedQueue(f"r-worker-{self.instance_id}", ttl=1800) self.done_queue = NamedQueue(f"r-done-{self.instance_id}", ttl=1800) self.hash_queue = Hash(f"r-hash-{self.instance_id}") self.bucket_error = [] self.VALID_BUCKETS = sorted(list( self.datastore.ds.get_models().keys())) self.worker_count = worker_count self.spawn_workers = spawn_workers self.total_count = 0 self.error_map_count = {} self.missing_map_count = {} self.map_count = {} self.last_time = 0 self.last_count = 0 self.error_count = 0 def cleanup(self): self.worker_queue.delete() self.done_queue.delete() self.hash_queue.delete() for p in self.plist: p.terminate() def done_thread(self, title): t0 = time.time() self.last_time = t0 running_threads = self.worker_count while running_threads > 0: msg = self.done_queue.pop(timeout=1) if msg is None: continue if "stopped" in msg: running_threads -= 1 continue bucket_name = msg.get('bucket_name', 'unknown') if msg.get('success', False): self.total_count += 1 if msg.get("missing", False): if bucket_name not in self.missing_map_count: self.missing_map_count[bucket_name] = 0 self.missing_map_count[bucket_name] += 1 else: if bucket_name not in self.map_count: self.map_count[bucket_name] = 0 self.map_count[bucket_name] += 1 new_t = time.time() if (new_t - self.last_time) > 5: if self.logger: self.logger.info( "%s (%s at %s keys/sec) ==> %s" % (self.total_count, new_t - self.last_time, int((self.total_count - self.last_count) / (new_t - self.last_time)), self.map_count)) self.last_count = self.total_count self.last_time = new_t else: self.error_count += 1 if bucket_name not in self.error_map_count: self.error_map_count[bucket_name] = 0 self.error_map_count[bucket_name] += 1 # Cleanup self.cleanup() summary = "" summary += "\n########################\n" summary += "####### SUMMARY #######\n" summary += "########################\n" summary += "%s items - %s errors - %s secs\n\n" % \ (self.total_count, self.error_count, time.time() - t0) for k, v in self.map_count.items(): summary += "\t%15s: %s\n" % (k.upper(), v) if len(self.missing_map_count.keys()) > 0: summary += "\n\nMissing data:\n\n" for k, v in self.missing_map_count.items(): summary += "\t%15s: %s\n" % (k.upper(), v) if len(self.error_map_count.keys()) > 0: summary += "\n\nErrors:\n\n" for k, v in self.error_map_count.items(): summary += "\t%15s: %s\n" % (k.upper(), v) if len(self.bucket_error) > 0: summary += f"\nThese buckets failed to {title.lower()} completely: {self.bucket_error}\n" if self.logger: self.logger.info(summary) # noinspection PyBroadException,PyProtectedMember def backup(self, bucket_list, follow_keys=False, query=None): if query is None: query = 'id:*' for bucket in bucket_list: if bucket not in self.VALID_BUCKETS: if self.logger: self.logger.warn( "\n%s is not a valid bucket.\n\n" "The list of valid buckets is the following:\n\n\t%s\n" % (bucket.upper(), "\n\t".join(self.VALID_BUCKETS))) return targets = ', '.join(bucket_list) try: if self.logger: self.logger.info("\n-----------------------") self.logger.info("----- Data Backup -----") self.logger.info("-----------------------") self.logger.info(f" Deep: {follow_keys}") self.logger.info(f" Buckets: {targets}") self.logger.info(f" Workers: {self.worker_count}") self.logger.info(f" Target directory: {self.working_dir}") self.logger.info(f" Filtering query: {query}") # Start the workers for x in range(self.worker_count): if self.use_threading: t = threading.Thread(target=backup_worker, args=(x, self.instance_id, self.working_dir)) t.setDaemon(True) t.start() else: p = Process(target=backup_worker, args=(x, self.instance_id, self.working_dir)) p.start() self.plist.append(p) # Start done thread dt = threading.Thread(target=self.done_thread, args=('Backup', ), name="Done thread") dt.setDaemon(True) dt.start() # Process data buckets for bucket_name in bucket_list: try: collection = self.datastore.get_collection(bucket_name) for item in collection.stream_search(query, fl="id", item_buffer_size=500, as_obj=False): self.worker_queue.push({ "bucket_name": bucket_name, "key": item['id'], "follow_keys": follow_keys }) except Exception as e: self.cleanup() if self.logger: self.logger.execption(e) self.logger.error( "Error occurred while processing bucket %s." % bucket_name) self.bucket_error.append(bucket_name) for _ in range(self.worker_count): self.worker_queue.push({"stop": True}) dt.join() except Exception as e: if self.logger: self.logger.execption(e) def restore(self): try: if self.logger: self.logger.info("\n------------------------") self.logger.info("----- Data Restore -----") self.logger.info("------------------------") self.logger.info(f" Workers: {self.worker_count}") self.logger.info(f" Target directory: {self.working_dir}") for x in range(self.worker_count): if self.use_threading: t = threading.Thread(target=restore_worker, args=(x, self.instance_id, self.working_dir)) t.setDaemon(True) t.start() else: p = Process(target=restore_worker, args=(x, self.instance_id, self.working_dir)) p.start() self.plist.append(p) # Start done thread dt = threading.Thread(target=self.done_thread, args=('Restore', ), name="Done thread") dt.setDaemon(True) dt.start() # Wait for workers to finish dt.join() except Exception as e: if self.logger: self.logger.execption(e)