Beispiel #1
0
def test_watcher(redis_connection):
    redis_connection.time = RedisTime()
    rds = redis_connection
    queue_name = get_random_id()
    out_queue = NamedQueue(queue_name, rds)
    try:
        # Create a server and hijack its running flag and the current time in 'redis'
        client = WatcherClient(rds)
        server = WatcherServer(rds, rds)
        server.running = ToggleTrue()
        rds.time.current = 0
        assert out_queue.length() == 0

        # Send a simple event to occur soon
        client.touch(10, 'one-second', queue_name, {'first': 'one'})
        server.try_run()
        assert out_queue.length() == 0  # Nothing yet
        rds.time.current = 12  # Jump forward 12 seconds
        server.try_run()
        assert out_queue.length() == 1
        assert out_queue.pop() == {'first': 'one'}

        # Send a simple event to occur soon, then change our mind
        client.touch(10, 'one-second', queue_name, {'first': 'one'})
        client.touch(20, 'one-second', queue_name, {'first': 'one'})
        server.try_run()
        assert out_queue.length() == 0  # Nothing yet

        # Set events to occur, in inverse order, reuse a key, overwrite content and timeout
        client.touch(200, 'one-second', queue_name, {'first': 'last'})
        client.touch(100, '100-second', queue_name, {'first': '100'})
        client.touch(50, '50-second', queue_name, {'first': '50'})
        server.try_run()
        assert out_queue.length() == 0  # Nothing yet

        for _ in range(15):
            rds.time.current += 20
            server.try_run()

        assert out_queue.length() == 3
        assert out_queue.pop() == {'first': '50'}
        assert out_queue.pop() == {'first': '100'}
        assert out_queue.pop() == {'first': 'last'}

        # Send a simple event to occur soon, then stop it
        rds.time.current = 0
        client.touch(10, 'one-second', queue_name, {'first': 'one'})
        server.try_run()
        assert out_queue.length() == 0  # Nothing yet
        client.clear('one-second')
        rds.time.current = 12  # Jump forward 12 seconds
        server.try_run()
        assert out_queue.length() == 0  # still nothing because it was cleared

    finally:
        out_queue.delete()
def test_get_message(datastore, client):
    notification_queue = get_random_id()
    queue = NamedQueue("nq-%s" % notification_queue,
                       host=config.core.redis.persistent.host,
                       port=config.core.redis.persistent.port)
    queue.delete()
    msg = random_model_obj(Submission).as_primitives()
    queue.push(msg)

    res = client.ingest.get_message(notification_queue)
    assert isinstance(res, dict)
    assert 'sid' in res
    assert 'results' in res
    assert res == msg
def test_get_message_list(datastore, client):
    notification_queue = get_random_id()
    queue = NamedQueue("nq-%s" % notification_queue,
                       host=config.core.redis.persistent.host,
                       port=config.core.redis.persistent.port)
    queue.delete()
    msg_0 = random_model_obj(Submission).as_primitives()
    queue.push(msg_0)
    msg_1 = random_model_obj(Submission).as_primitives()
    queue.push(msg_1)

    res = client.ingest.get_message_list(notification_queue)
    assert len(res) == 2
    assert res[0] == msg_0
    assert res[1] == msg_1
Beispiel #4
0
class DistributedBackup(object):
    def __init__(self,
                 working_dir,
                 worker_count=50,
                 spawn_workers=True,
                 use_threading=False,
                 logger=None):
        self.working_dir = working_dir
        self.datastore = forge.get_datastore(archive_access=True)
        self.logger = logger
        self.plist = []
        self.use_threading = use_threading
        self.instance_id = get_random_id()
        self.worker_queue = NamedQueue(f"r-worker-{self.instance_id}",
                                       ttl=1800)
        self.done_queue = NamedQueue(f"r-done-{self.instance_id}", ttl=1800)
        self.hash_queue = Hash(f"r-hash-{self.instance_id}")
        self.bucket_error = []
        self.VALID_BUCKETS = sorted(list(
            self.datastore.ds.get_models().keys()))
        self.worker_count = worker_count
        self.spawn_workers = spawn_workers
        self.total_count = 0
        self.error_map_count = {}
        self.missing_map_count = {}
        self.map_count = {}
        self.last_time = 0
        self.last_count = 0
        self.error_count = 0

    def cleanup(self):
        self.worker_queue.delete()
        self.done_queue.delete()
        self.hash_queue.delete()
        for p in self.plist:
            p.terminate()

    def done_thread(self, title):
        t0 = time.time()
        self.last_time = t0

        running_threads = self.worker_count

        while running_threads > 0:
            msg = self.done_queue.pop(timeout=1)

            if msg is None:
                continue

            if "stopped" in msg:
                running_threads -= 1
                continue

            bucket_name = msg.get('bucket_name', 'unknown')

            if msg.get('success', False):
                self.total_count += 1

                if msg.get("missing", False):
                    if bucket_name not in self.missing_map_count:
                        self.missing_map_count[bucket_name] = 0

                    self.missing_map_count[bucket_name] += 1
                else:
                    if bucket_name not in self.map_count:
                        self.map_count[bucket_name] = 0

                    self.map_count[bucket_name] += 1

                new_t = time.time()
                if (new_t - self.last_time) > 5:
                    if self.logger:
                        self.logger.info(
                            "%s (%s at %s keys/sec) ==> %s" %
                            (self.total_count, new_t - self.last_time,
                             int((self.total_count - self.last_count) /
                                 (new_t - self.last_time)), self.map_count))
                    self.last_count = self.total_count
                    self.last_time = new_t
            else:
                self.error_count += 1

                if bucket_name not in self.error_map_count:
                    self.error_map_count[bucket_name] = 0

                self.error_map_count[bucket_name] += 1

        # Cleanup
        self.cleanup()

        summary = ""
        summary += "\n########################\n"
        summary += "####### SUMMARY  #######\n"
        summary += "########################\n"
        summary += "%s items - %s errors - %s secs\n\n" % \
                   (self.total_count, self.error_count, time.time() - t0)

        for k, v in self.map_count.items():
            summary += "\t%15s: %s\n" % (k.upper(), v)

        if len(self.missing_map_count.keys()) > 0:
            summary += "\n\nMissing data:\n\n"
            for k, v in self.missing_map_count.items():
                summary += "\t%15s: %s\n" % (k.upper(), v)

        if len(self.error_map_count.keys()) > 0:
            summary += "\n\nErrors:\n\n"
            for k, v in self.error_map_count.items():
                summary += "\t%15s: %s\n" % (k.upper(), v)

        if len(self.bucket_error) > 0:
            summary += f"\nThese buckets failed to {title.lower()} completely: {self.bucket_error}\n"
        if self.logger:
            self.logger.info(summary)

    # noinspection PyBroadException,PyProtectedMember
    def backup(self, bucket_list, follow_keys=False, query=None):
        if query is None:
            query = 'id:*'

        for bucket in bucket_list:
            if bucket not in self.VALID_BUCKETS:
                if self.logger:
                    self.logger.warn(
                        "\n%s is not a valid bucket.\n\n"
                        "The list of valid buckets is the following:\n\n\t%s\n"
                        % (bucket.upper(), "\n\t".join(self.VALID_BUCKETS)))
                return

        targets = ', '.join(bucket_list)
        try:
            if self.logger:
                self.logger.info("\n-----------------------")
                self.logger.info("----- Data Backup -----")
                self.logger.info("-----------------------")
                self.logger.info(f"    Deep: {follow_keys}")
                self.logger.info(f"    Buckets: {targets}")
                self.logger.info(f"    Workers: {self.worker_count}")
                self.logger.info(f"    Target directory: {self.working_dir}")
                self.logger.info(f"    Filtering query: {query}")

            # Start the workers
            for x in range(self.worker_count):
                if self.use_threading:
                    t = threading.Thread(target=backup_worker,
                                         args=(x, self.instance_id,
                                               self.working_dir))
                    t.setDaemon(True)
                    t.start()
                else:
                    p = Process(target=backup_worker,
                                args=(x, self.instance_id, self.working_dir))
                    p.start()
                    self.plist.append(p)

            # Start done thread
            dt = threading.Thread(target=self.done_thread,
                                  args=('Backup', ),
                                  name="Done thread")
            dt.setDaemon(True)
            dt.start()

            # Process data buckets
            for bucket_name in bucket_list:
                try:
                    collection = self.datastore.get_collection(bucket_name)
                    for item in collection.stream_search(query,
                                                         fl="id",
                                                         item_buffer_size=500,
                                                         as_obj=False):
                        self.worker_queue.push({
                            "bucket_name": bucket_name,
                            "key": item['id'],
                            "follow_keys": follow_keys
                        })

                except Exception as e:
                    self.cleanup()
                    if self.logger:
                        self.logger.execption(e)
                        self.logger.error(
                            "Error occurred while processing bucket %s." %
                            bucket_name)
                    self.bucket_error.append(bucket_name)

            for _ in range(self.worker_count):
                self.worker_queue.push({"stop": True})

            dt.join()
        except Exception as e:
            if self.logger:
                self.logger.execption(e)

    def restore(self):
        try:
            if self.logger:
                self.logger.info("\n------------------------")
                self.logger.info("----- Data Restore -----")
                self.logger.info("------------------------")
                self.logger.info(f"    Workers: {self.worker_count}")
                self.logger.info(f"    Target directory: {self.working_dir}")

            for x in range(self.worker_count):
                if self.use_threading:
                    t = threading.Thread(target=restore_worker,
                                         args=(x, self.instance_id,
                                               self.working_dir))
                    t.setDaemon(True)
                    t.start()
                else:
                    p = Process(target=restore_worker,
                                args=(x, self.instance_id, self.working_dir))
                    p.start()
                    self.plist.append(p)

            # Start done thread
            dt = threading.Thread(target=self.done_thread,
                                  args=('Restore', ),
                                  name="Done thread")
            dt.setDaemon(True)
            dt.start()

            # Wait for workers to finish
            dt.join()
        except Exception as e:
            if self.logger:
                self.logger.execption(e)