class Distributor(): def __init__(self): self.mongo = MongoClient('mongodb://localhost:27017/') crooshdb = self.mongo.crooshdb self.taskqueue = MongoQueue( crooshdb.taskqueue, consumer_id="distributor", timeout=300, max_attempts=3) def addJob(self, job): for task in job.tasks: self.taskqueue.put(task.toJSON())
def __init__(self): self.mongo = MongoClient('mongodb://localhost:27017/') crooshdb = self.mongo.crooshdb self.taskqueue = MongoQueue( crooshdb.taskqueue, consumer_id="distributor", timeout=300, max_attempts=3)
class MultiThreadingCrawler(object): def __init__(self): self.queue = MongoQueue() self.gap = 1 def producer(self): for i in range(100): self.queue.put(i) def consumer(self): while True: if not self.queue.empty(): item = self.queue.get() print(item) self.queue.task_done() else: break def run(self, max_threads, *args, **kwargs): producer = threading.Thread(target=self.producer) producer.start() # Let the producer run for a while time.sleep(self.gap) threads = [] while not self.queue.empty(): for thread in threads: if not thread.is_alive(): threads.remove(thread) while len(threads) < max_threads and not self.queue.empty(): thread = threading.Thread(target=self.consumer) thread.setDaemon(True) thread.start() threads.append(thread) # all threads have been processed # sleep temporarily so CPU can focus execution on other threads time.sleep(self.gap) # Waiting for all elements to be processed self.queue.join() def __call__(self, *args, **kwargs): self.run(*args, **kwargs)
def __init__(self, name): threading.Thread.__init__(self, name=name) self.exitFlag = 0 self.setDaemon(True) # Mongo connection mongo = MongoClient('mongodb://localhost:27017/') self.db = mongo.crooshdb self.taskqueue = MongoQueue( self.db.taskqueue, consumer_id=name, timeout=300, max_attempts=3) # Active tasks self.activeTask = None self.taskRunner = None self.acceptsTasks = True self.register()
def dequeue(n): q = MongoQueue(pymongo.MongoClient().test_queue.queue_1, "consumer_1") j = q.next() if j: return j.payload["context_id"]
class MongoQueueTest(TestCase): def setUp(self): self.client = pymongo.MongoClient() self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1") def tearDown(self): self.client.drop_database("test_queue") def assert_job_equal(self, job, data): for k, v in data.items(): self.assertEqual(job.payload[k], v) def test_put_next(self): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(dict(data)) job = self.queue.next() self.assert_job_equal(job, data) job = self.queue.next() self.assertEqual(job, None) def test_atomic_next(self): data = {"context_id": "alpha321", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(dict(data)) p = mp.Pool() q = self.queue jobs = p.map(dequeue, [1,2]) self.assertNotEqual(jobs[0], jobs[1]) def test_get_empty_queue(self): job = self.queue.next() self.assertEqual(job, None) def test_priority(self): self.queue.put({"name": "alice"}, priority=1) self.queue.put({"name": "bob"}, priority=2) self.queue.put({"name": "mike"}, priority=0) self.assertEqual( ["bob", "alice", "mike"], [self.queue.next().payload['name'], self.queue.next().payload['name'], self.queue.next().payload['name']]) job = self.queue.next() self.assertEqual(job, None) def test_complete(self): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": datetime.now()} self.queue.put(data) self.assertEqual(self.queue.size(), 1) job = self.queue.next() job.complete() self.assertEqual(self.queue.size(), 0) job = self.queue.next() self.assertEqual(job, None) def test_release(self): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(data) job = self.queue.next() job.release() self.assertEqual(self.queue.size(), 1) job = self.queue.next() self.assert_job_equal(job, data) job = self.queue.next() self.assertEqual(job, None) def test_max_attempts(self): data = {"context_id": "alpha", "ts": time.time()} self.queue.put(dict(data)) attempts = 0 for i in xrange(0, self.queue.max_attempts): job = self.queue.next() if not job: break with job: attempts += 1 raise Exception() self.assertEqual(attempts, self.queue.max_attempts) def test_error(self): pass def test_progress(self): pass def test_stats(self): for i in range(5): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(data) job = self.queue.next() job.error("problem") stats = self.queue.stats() self.assertEqual({'available': 5, 'total': 5, 'locked': 0, 'errors': 0}, stats) def test_context_manager_error(self): self.queue.put({"foobar": 1}) job = self.queue.next() try: with job as data: self.assertEqual(data['payload']["foobar"], 1) # Item is returned to the queue on error raise SyntaxError except SyntaxError: pass job = self.queue.next() self.assertEqual(job.data['attempts'], 1) def test_context_manager_complete(self): self.queue.put({"foobar": 1}) job = self.queue.next() with job as data: self.assertEqual(data['payload']["foobar"], 1) job = self.queue.next() self.assertEqual(job, None) def test_next_by_payload(self): self.queue.put({"type": "first_type", "param":"param1"}) self.queue.put({"type": "second_type", "param":"param2"}) self.queue.put({"type": "third_type", "param":"param3"}) job = self.queue.next({"payload.type": "second_type"}) with job as data: self.assertEqual(data["payload"]["param"], "param2") job = self.queue.next({"payload.type": "third_type"}) with job as data: self.assertEqual(data["payload"]["param"], "param3") job = self.queue.next({"payload.type": "fourth_type"}) self.assertEqual(job, None) job = self.queue.next() with job as data: self.assertEqual(data["payload"]["param"], "param1") job = self.queue.next() self.assertEqual(job, None)
def setUp(self): self.client = pymongo.MongoClient() self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1", retry_after=2)
class MongoQueueRetryTimeTests(TestCase): def setUp(self): self.client = pymongo.MongoClient() self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1", retry_after=2) def tearDown(self): self.client.drop_database("test_queue") def test_complete_scenario(self): self.queue.put({"message": "hello"}) job = self.queue.next() with job as data: raise Exception time.sleep(1) job2 = self.queue.next() self.assertEqual(job2, None) time.sleep(1.1) job3 = self.queue.next() with job as data: self.assertEqual(data["message"], "hello") job4 = self.queue.next() self.assertEqual(job4, None) def test_error_with_increased_retry(self): self.queue.put({"message": "hello"}) job = self.queue.next() job.error(custom_retry_after=3) time.sleep(1) job = self.queue.next() self.assertEqual(job, None) time.sleep(2.1) job2 = self.queue.next() with job2 as data: self.assertEqual(data["message"], "hello") job3 = self.queue.next() self.assertEqual(job3, None) def test_release_with_increased_retry(self): self.queue.put({"message": "hello"}) job = self.queue.next() job.release(custom_retry_after=3) time.sleep(1) job = self.queue.next() self.assertEqual(job, None) time.sleep(2.1) job2 = self.queue.next() with job2 as data: self.assertEqual(data["message"], "hello") job3 = self.queue.next() self.assertEqual(job3, None)
class MongoQueueTest(TestCase): def setUp(self): self.client = pymongo.MongoClient() self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1") def tearDown(self): self.client.drop_database("test_queue") def assert_job_equal(self, job, data): for k, v in data.items(): self.assertEqual(job.payload[k], v) def test_put_next(self): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(dict(data)) job = self.queue.next() self.assert_job_equal(job, data) job = self.queue.next() self.assertEqual(job, None) def test_atomic_next(self): data = { "context_id": "alpha321", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(dict(data)) p = mp.Pool() q = self.queue jobs = p.map(dequeue, [1, 2]) self.assertNotEqual(jobs[0], jobs[1]) def test_get_empty_queue(self): job = self.queue.next() self.assertEqual(job, None) def test_priority(self): self.queue.put({"name": "alice"}, priority=1) self.queue.put({"name": "bob"}, priority=2) self.queue.put({"name": "mike"}, priority=0) self.assertEqual(["bob", "alice", "mike"], [ self.queue.next().payload['name'], self.queue.next().payload['name'], self.queue.next().payload['name'] ]) job = self.queue.next() self.assertEqual(job, None) def test_complete(self): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": datetime.now() } self.queue.put(data) self.assertEqual(self.queue.size(), 1) job = self.queue.next() job.complete() self.assertEqual(self.queue.size(), 0) job = self.queue.next() self.assertEqual(job, None) def test_release(self): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(data) job = self.queue.next() job.release() self.assertEqual(self.queue.size(), 1) job = self.queue.next() self.assert_job_equal(job, data) job = self.queue.next() self.assertEqual(job, None) def test_max_attempts(self): data = {"context_id": "alpha", "ts": time.time()} self.queue.put(dict(data)) attempts = 0 for i in xrange(0, self.queue.max_attempts): job = self.queue.next() if not job: break with job: attempts += 1 raise Exception() self.assertEqual(attempts, self.queue.max_attempts) def test_error(self): pass def test_progress(self): pass def test_stats(self): for i in range(5): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(data) job = self.queue.next() job.error("problem") stats = self.queue.stats() self.assertEqual({ 'available': 5, 'total': 5, 'locked': 0, 'errors': 0 }, stats) def test_context_manager_error(self): self.queue.put({"foobar": 1}) job = self.queue.next() try: with job as data: self.assertEqual(data['payload']["foobar"], 1) # Item is returned to the queue on error raise SyntaxError except SyntaxError: pass job = self.queue.next() self.assertEqual(job.data['attempts'], 1) def test_context_manager_complete(self): self.queue.put({"foobar": 1}) job = self.queue.next() with job as data: self.assertEqual(data['payload']["foobar"], 1) job = self.queue.next() self.assertEqual(job, None) def test_next_by_payload(self): self.queue.put({"type": "first_type", "param": "param1"}) self.queue.put({"type": "second_type", "param": "param2"}) self.queue.put({"type": "third_type", "param": "param3"}) job = self.queue.next({"payload.type": "second_type"}) with job as data: self.assertEqual(data["payload"]["param"], "param2") job = self.queue.next({"payload.type": "third_type"}) with job as data: self.assertEqual(data["payload"]["param"], "param3") job = self.queue.next({"payload.type": "fourth_type"}) self.assertEqual(job, None) job = self.queue.next() with job as data: self.assertEqual(data["payload"]["param"], "param1") job = self.queue.next() self.assertEqual(job, None)
def setUp(self): self.client = pymongo.Connection(os.environ.get("TEST_MONGODB")) self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1")
class MongoQueueTest(TestCase): def setUp(self): self.client = pymongo.Connection(os.environ.get("TEST_MONGODB")) self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1") def tearDown(self): self.client.drop_database("test_queue") def assert_job_equal(self, job, data): for k, v in data.items(): self.assertEqual(job.payload[k], v) def test_put_next(self): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(dict(data)) job = self.queue.next() self.assert_job_equal(job, data) def test_get_empty_queue(self): job = self.queue.next() self.assertEqual(job, None) def test_priority(self): data = {"priority": 1, "name": "hello world"} self.queue.put({ "priority": 1, "name": "alice"}) self.queue.put({ "priority": 2, "name": "bob"}) self.queue.put({ "priority": 0, "name": "mike"}) self.assertEqual( ["bob", "alice", "mike"], [self.queue.next().payload['name'], self.queue.next().payload['name'], self.queue.next().payload['name']]) def test_complete(self): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": datetime.now()} self.queue.put(data) self.assertEqual(self.queue.size(), 1) job = self.queue.next() job.complete() self.assertEqual(self.queue.size(), 0) def test_release(self): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(data) job = self.queue.next() job.release() self.assertEqual(self.queue.size(), 1) job = self.queue.next() self.assert_job_equal(job, data) def test_error(self): pass def test_progress(self): pass def test_stats(self): for i in range(5): data = {"context_id": "alpha", "data": [1, 2, 3], "more-data": time.time()} self.queue.put(data) job = self.queue.next() job.error("problem") stats = self.queue.stats() self.assertEqual({'available': 5, 'total': 5, 'locked': 0, 'errors': 0}, stats) def test_context_manager_error(self): self.queue.put({"foobar": 1}) job = self.queue.next() try: with job as data: self.assertEqual(data['payload']["foobar"], 1) # Item is returned to the queue on error raise SyntaxError except SyntaxError: pass job = self.queue.next() self.assertEqual(job.data['attempts'], 1) def test_context_manager_complete(self): self.queue.put({"foobar": 1}) job = self.queue.next() with job as data: self.assertEqual(data['payload']["foobar"], 1) job = self.queue.next() self.assertEqual(job, None)
def __init__(self): self.queue = MongoQueue() self.gap = 1
class MongoQueueTest(TestCase): def setUp(self): self.client = pymongo.Connection(os.environ.get("TEST_MONGODB")) self.db = self.client.test_queue self.queue = MongoQueue(self.db.queue_1, "consumer_1") def tearDown(self): self.client.drop_database("test_queue") def assert_job_equal(self, job, data): for k, v in data.items(): self.assertEqual(job.payload[k], v) def test_put_next(self): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(dict(data)) job = self.queue.next() self.assert_job_equal(job, data) def test_get_empty_queue(self): job = self.queue.next() self.assertEqual(job, None) def test_priority(self): self.queue.put({"name": "alice"}, priority=1) self.queue.put({"name": "bob"}, priority=2) self.queue.put({"name": "mike"}, priority=0) self.assertEqual(["bob", "alice", "mike"], [ self.queue.next().payload['name'], self.queue.next().payload['name'], self.queue.next().payload['name'] ]) def test_complete(self): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": datetime.now() } self.queue.put(data) self.assertEqual(self.queue.size(), 1) job = self.queue.next() job.complete() self.assertEqual(self.queue.size(), 0) def test_release(self): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(data) job = self.queue.next() job.release() self.assertEqual(self.queue.size(), 1) job = self.queue.next() self.assert_job_equal(job, data) def test_error(self): pass def test_progress(self): pass def test_stats(self): for i in range(5): data = { "context_id": "alpha", "data": [1, 2, 3], "more-data": time.time() } self.queue.put(data) job = self.queue.next() job.error("problem") stats = self.queue.stats() self.assertEqual({ 'available': 5, 'total': 5, 'locked': 0, 'errors': 0 }, stats) def test_context_manager_error(self): self.queue.put({"foobar": 1}) job = self.queue.next() try: with job as data: self.assertEqual(data['payload']["foobar"], 1) # Item is returned to the queue on error raise SyntaxError except SyntaxError: pass job = self.queue.next() self.assertEqual(job.data['attempts'], 1) def test_context_manager_complete(self): self.queue.put({"foobar": 1}) job = self.queue.next() with job as data: self.assertEqual(data['payload']["foobar"], 1) job = self.queue.next() self.assertEqual(job, None)
class Machine(threading.Thread): '''A machine represents one worker on the network. Depending on the incoming task, will spawn an appropriate task processor. It WILL NOT add tasks to the queue.''' def __init__(self, name): threading.Thread.__init__(self, name=name) self.exitFlag = 0 self.setDaemon(True) # Mongo connection mongo = MongoClient('mongodb://localhost:27017/') self.db = mongo.crooshdb self.taskqueue = MongoQueue( self.db.taskqueue, consumer_id=name, timeout=300, max_attempts=3) # Active tasks self.activeTask = None self.taskRunner = None self.acceptsTasks = True self.register() def register(self): '''Adds machine to list of active machines on DB''' self.db.machines.find_and_modify( query={"_id": self.name}, update={"$setOnInsert": { "_id": self.name, "activeTask": None, "acceptsTasks": True }}, upsert=True, new=True ) def run(self): self.listen() def close(self): self.exitFlag = 1 print("Cleanup complete") def listen(self): print("Machine listening for tasks...") while not self.exitFlag: self.pollTasks() time.sleep(2) self.join() def pollTasks(self): if not self.taskRunner: task = self.taskqueue.next() if task: # Set active task for machine taskWrapper = MayaRenderTask.buildFromQueue(task) print("Running task {0}".format(taskWrapper.jobID)) self.updateMachineStatus(task) self.taskRunner = MayaRenderTaskRunner(taskWrapper) # Clear task for machine on completion and reregister to queue self.updateMachineStatus(None) self.taskRunner = None def updateMachineStatus(self, task=None): self.acceptsTasks = False if task else True self.db.machines.find_and_modify( query={"_id": self.name}, update={ "activeTask": task.job_id if task else None, "acceptsTasks": self.acceptsTasks } )