Example #1
0
class Distributor():
    def __init__(self):
        self.mongo = MongoClient('mongodb://localhost:27017/')
        crooshdb = self.mongo.crooshdb
        self.taskqueue = MongoQueue(
            crooshdb.taskqueue,
            consumer_id="distributor",
            timeout=300,
            max_attempts=3)

    def addJob(self, job):
        for task in job.tasks:
            self.taskqueue.put(task.toJSON())
Example #2
0
 def __init__(self):
     self.mongo = MongoClient('mongodb://localhost:27017/')
     crooshdb = self.mongo.crooshdb
     self.taskqueue = MongoQueue(
         crooshdb.taskqueue,
         consumer_id="distributor",
         timeout=300,
         max_attempts=3)
Example #3
0
class MultiThreadingCrawler(object):
    def __init__(self):
        self.queue = MongoQueue()
        self.gap = 1

    def producer(self):
        for i in range(100):
            self.queue.put(i)

    def consumer(self):
        while True:
            if not self.queue.empty():
                item = self.queue.get()
                print(item)
                self.queue.task_done()
            else:
                break

    def run(self, max_threads, *args, **kwargs):
        producer = threading.Thread(target=self.producer)
        producer.start()

        # Let the producer run for a while
        time.sleep(self.gap)

        threads = []
        while not self.queue.empty():
            for thread in threads:
                if not thread.is_alive():
                    threads.remove(thread)
            while len(threads) < max_threads and not self.queue.empty():
                thread = threading.Thread(target=self.consumer)
                thread.setDaemon(True)
                thread.start()
                threads.append(thread)
            # all threads have been processed
            # sleep temporarily so CPU can focus execution on other threads
            time.sleep(self.gap)
        # Waiting for all elements to be processed
        self.queue.join()

    def __call__(self, *args, **kwargs):
        self.run(*args, **kwargs)
Example #4
0
    def __init__(self, name):
        threading.Thread.__init__(self, name=name)
        self.exitFlag = 0
        self.setDaemon(True)

        #  Mongo connection
        mongo = MongoClient('mongodb://localhost:27017/')
        self.db = mongo.crooshdb
        self.taskqueue = MongoQueue(
            self.db.taskqueue,
            consumer_id=name,
            timeout=300,
            max_attempts=3)

        # Active tasks
        self.activeTask = None
        self.taskRunner = None
        self.acceptsTasks = True
        self.register()
Example #5
0
def dequeue(n):
    q = MongoQueue(pymongo.MongoClient().test_queue.queue_1, "consumer_1")
    j = q.next()
    if j:
        return j.payload["context_id"]
Example #6
0
class MongoQueueTest(TestCase):

    def setUp(self):
        self.client = pymongo.MongoClient()
        self.db = self.client.test_queue
        self.queue = MongoQueue(self.db.queue_1, "consumer_1")

    def tearDown(self):
        self.client.drop_database("test_queue")

    def assert_job_equal(self, job, data):
        for k, v in data.items():
            self.assertEqual(job.payload[k], v)

    def test_put_next(self):
        data = {"context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": time.time()}
        self.queue.put(dict(data))
        job = self.queue.next()
        self.assert_job_equal(job, data)

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_atomic_next(self):
        data = {"context_id": "alpha321",
                "data": [1, 2, 3],
                "more-data": time.time()}
        self.queue.put(dict(data))

        p = mp.Pool()
        q = self.queue
        jobs = p.map(dequeue, [1,2])
        self.assertNotEqual(jobs[0], jobs[1])

    def test_get_empty_queue(self):
        job = self.queue.next()
        self.assertEqual(job, None)

    def test_priority(self):
        self.queue.put({"name": "alice"}, priority=1)
        self.queue.put({"name": "bob"}, priority=2)
        self.queue.put({"name": "mike"}, priority=0)

        self.assertEqual(
            ["bob", "alice", "mike"],
            [self.queue.next().payload['name'],
             self.queue.next().payload['name'],
             self.queue.next().payload['name']])

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_complete(self):
        data = {"context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": datetime.now()}

        self.queue.put(data)
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        job.complete()
        self.assertEqual(self.queue.size(), 0)

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_release(self):
        data = {"context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": time.time()}

        self.queue.put(data)
        job = self.queue.next()
        job.release()
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        self.assert_job_equal(job, data)

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_max_attempts(self):
        data = {"context_id": "alpha",
                "ts": time.time()}
        self.queue.put(dict(data))
        attempts = 0
        for i in xrange(0, self.queue.max_attempts):
            job = self.queue.next()
            if not job:
                break
            with job:
                attempts += 1
                raise Exception()
        self.assertEqual(attempts, self.queue.max_attempts)

    def test_error(self):
        pass

    def test_progress(self):
        pass

    def test_stats(self):

        for i in range(5):
            data = {"context_id": "alpha",
                    "data": [1, 2, 3],
                    "more-data": time.time()}
            self.queue.put(data)
        job = self.queue.next()
        job.error("problem")

        stats = self.queue.stats()
        self.assertEqual({'available': 5,
                          'total': 5,
                          'locked': 0,
                          'errors': 0}, stats)

    def test_context_manager_error(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        try:
            with job as data:
                self.assertEqual(data['payload']["foobar"], 1)
                # Item is returned to the queue on error
                raise SyntaxError
        except SyntaxError:
            pass

        job = self.queue.next()
        self.assertEqual(job.data['attempts'], 1)

    def test_context_manager_complete(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        with job as data:
            self.assertEqual(data['payload']["foobar"], 1)
        job = self.queue.next()
        self.assertEqual(job, None)

    def test_next_by_payload(self):
        self.queue.put({"type": "first_type", "param":"param1"})
        self.queue.put({"type": "second_type", "param":"param2"})
        self.queue.put({"type": "third_type", "param":"param3"})

        job = self.queue.next({"payload.type": "second_type"})
        with job as data:
            self.assertEqual(data["payload"]["param"], "param2")

        job = self.queue.next({"payload.type": "third_type"})
        with job as data:
            self.assertEqual(data["payload"]["param"], "param3")

        job = self.queue.next({"payload.type": "fourth_type"})
        self.assertEqual(job, None)

        job = self.queue.next()
        with job as data:
            self.assertEqual(data["payload"]["param"], "param1")

        job = self.queue.next()
        self.assertEqual(job, None)
Example #7
0
 def setUp(self):
     self.client = pymongo.MongoClient()
     self.db = self.client.test_queue
     self.queue = MongoQueue(self.db.queue_1, "consumer_1", retry_after=2)
Example #8
0
class MongoQueueRetryTimeTests(TestCase):
    def setUp(self):
        self.client = pymongo.MongoClient()
        self.db = self.client.test_queue
        self.queue = MongoQueue(self.db.queue_1, "consumer_1", retry_after=2)

    def tearDown(self):
        self.client.drop_database("test_queue")

    def test_complete_scenario(self):
        self.queue.put({"message": "hello"})

        job = self.queue.next()
        with job as data:
            raise Exception

        time.sleep(1)
        job2 = self.queue.next()
        self.assertEqual(job2, None)
        time.sleep(1.1)

        job3 = self.queue.next()
        with job as data:
            self.assertEqual(data["message"], "hello")
        job4 = self.queue.next()
        self.assertEqual(job4, None)

    def test_error_with_increased_retry(self):
        self.queue.put({"message": "hello"})
        job = self.queue.next()
        job.error(custom_retry_after=3)

        time.sleep(1)
        job = self.queue.next()
        self.assertEqual(job, None)

        time.sleep(2.1)

        job2 = self.queue.next()
        with job2 as data:
            self.assertEqual(data["message"], "hello")

        job3 = self.queue.next()
        self.assertEqual(job3, None)

    def test_release_with_increased_retry(self):
        self.queue.put({"message": "hello"})
        job = self.queue.next()
        job.release(custom_retry_after=3)

        time.sleep(1)
        job = self.queue.next()
        self.assertEqual(job, None)

        time.sleep(2.1)

        job2 = self.queue.next()
        with job2 as data:
            self.assertEqual(data["message"], "hello")

        job3 = self.queue.next()
        self.assertEqual(job3, None)
Example #9
0
def dequeue(n):
    q = MongoQueue(pymongo.MongoClient().test_queue.queue_1, "consumer_1")
    j = q.next()
    if j:
        return j.payload["context_id"]
Example #10
0
class MongoQueueTest(TestCase):
    def setUp(self):
        self.client = pymongo.MongoClient()
        self.db = self.client.test_queue
        self.queue = MongoQueue(self.db.queue_1, "consumer_1")

    def tearDown(self):
        self.client.drop_database("test_queue")

    def assert_job_equal(self, job, data):
        for k, v in data.items():
            self.assertEqual(job.payload[k], v)

    def test_put_next(self):
        data = {
            "context_id": "alpha",
            "data": [1, 2, 3],
            "more-data": time.time()
        }
        self.queue.put(dict(data))
        job = self.queue.next()
        self.assert_job_equal(job, data)

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_atomic_next(self):
        data = {
            "context_id": "alpha321",
            "data": [1, 2, 3],
            "more-data": time.time()
        }
        self.queue.put(dict(data))

        p = mp.Pool()
        q = self.queue
        jobs = p.map(dequeue, [1, 2])
        self.assertNotEqual(jobs[0], jobs[1])

    def test_get_empty_queue(self):
        job = self.queue.next()
        self.assertEqual(job, None)

    def test_priority(self):
        self.queue.put({"name": "alice"}, priority=1)
        self.queue.put({"name": "bob"}, priority=2)
        self.queue.put({"name": "mike"}, priority=0)

        self.assertEqual(["bob", "alice", "mike"], [
            self.queue.next().payload['name'],
            self.queue.next().payload['name'],
            self.queue.next().payload['name']
        ])

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_complete(self):
        data = {
            "context_id": "alpha",
            "data": [1, 2, 3],
            "more-data": datetime.now()
        }

        self.queue.put(data)
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        job.complete()
        self.assertEqual(self.queue.size(), 0)

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_release(self):
        data = {
            "context_id": "alpha",
            "data": [1, 2, 3],
            "more-data": time.time()
        }

        self.queue.put(data)
        job = self.queue.next()
        job.release()
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        self.assert_job_equal(job, data)

        job = self.queue.next()
        self.assertEqual(job, None)

    def test_max_attempts(self):
        data = {"context_id": "alpha", "ts": time.time()}
        self.queue.put(dict(data))
        attempts = 0
        for i in xrange(0, self.queue.max_attempts):
            job = self.queue.next()
            if not job:
                break
            with job:
                attempts += 1
                raise Exception()
        self.assertEqual(attempts, self.queue.max_attempts)

    def test_error(self):
        pass

    def test_progress(self):
        pass

    def test_stats(self):

        for i in range(5):
            data = {
                "context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": time.time()
            }
            self.queue.put(data)
        job = self.queue.next()
        job.error("problem")

        stats = self.queue.stats()
        self.assertEqual({
            'available': 5,
            'total': 5,
            'locked': 0,
            'errors': 0
        }, stats)

    def test_context_manager_error(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        try:
            with job as data:
                self.assertEqual(data['payload']["foobar"], 1)
                # Item is returned to the queue on error
                raise SyntaxError
        except SyntaxError:
            pass

        job = self.queue.next()
        self.assertEqual(job.data['attempts'], 1)

    def test_context_manager_complete(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        with job as data:
            self.assertEqual(data['payload']["foobar"], 1)
        job = self.queue.next()
        self.assertEqual(job, None)

    def test_next_by_payload(self):
        self.queue.put({"type": "first_type", "param": "param1"})
        self.queue.put({"type": "second_type", "param": "param2"})
        self.queue.put({"type": "third_type", "param": "param3"})

        job = self.queue.next({"payload.type": "second_type"})
        with job as data:
            self.assertEqual(data["payload"]["param"], "param2")

        job = self.queue.next({"payload.type": "third_type"})
        with job as data:
            self.assertEqual(data["payload"]["param"], "param3")

        job = self.queue.next({"payload.type": "fourth_type"})
        self.assertEqual(job, None)

        job = self.queue.next()
        with job as data:
            self.assertEqual(data["payload"]["param"], "param1")

        job = self.queue.next()
        self.assertEqual(job, None)
Example #11
0
 def setUp(self):
     self.client = pymongo.Connection(os.environ.get("TEST_MONGODB"))
     self.db = self.client.test_queue
     self.queue = MongoQueue(self.db.queue_1, "consumer_1")
Example #12
0
class MongoQueueTest(TestCase):

    def setUp(self):
        self.client = pymongo.Connection(os.environ.get("TEST_MONGODB"))
        self.db = self.client.test_queue
        self.queue = MongoQueue(self.db.queue_1, "consumer_1")

    def tearDown(self):
        self.client.drop_database("test_queue")

    def assert_job_equal(self, job, data):
        for k, v in data.items():
            self.assertEqual(job.payload[k], v)

    def test_put_next(self):
        data = {"context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": time.time()}
        self.queue.put(dict(data))
        job = self.queue.next()
        self.assert_job_equal(job, data)

    def test_get_empty_queue(self):
        job = self.queue.next()
        self.assertEqual(job, None)

    def test_priority(self):
        data = {"priority": 1,
                "name": "hello world"}
        self.queue.put({
            "priority": 1, "name": "alice"})
        self.queue.put({
            "priority": 2, "name": "bob"})
        self.queue.put({
            "priority": 0, "name": "mike"})
        self.assertEqual(
            ["bob", "alice", "mike"],
            [self.queue.next().payload['name'],
             self.queue.next().payload['name'],
             self.queue.next().payload['name']])

    def test_complete(self):
        data = {"context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": datetime.now()}

        self.queue.put(data)
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        job.complete()
        self.assertEqual(self.queue.size(), 0)

    def test_release(self):
        data = {"context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": time.time()}

        self.queue.put(data)
        job = self.queue.next()
        job.release()
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        self.assert_job_equal(job, data)

    def test_error(self):
        pass

    def test_progress(self):
        pass

    def test_stats(self):

        for i in range(5):
            data = {"context_id": "alpha",
                    "data": [1, 2, 3],
                    "more-data": time.time()}
            self.queue.put(data)
        job = self.queue.next()
        job.error("problem")

        stats = self.queue.stats()
        self.assertEqual({'available': 5,
                          'total': 5,
                          'locked': 0,
                          'errors': 0}, stats)

    def test_context_manager_error(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        try:
            with job as data:
                self.assertEqual(data['payload']["foobar"], 1)
                # Item is returned to the queue on error
                raise SyntaxError
        except SyntaxError:
            pass

        job = self.queue.next()
        self.assertEqual(job.data['attempts'], 1)

    def test_context_manager_complete(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        with job as data:
            self.assertEqual(data['payload']["foobar"], 1)
        job = self.queue.next()
        self.assertEqual(job, None)
Example #13
0
 def __init__(self):
     self.queue = MongoQueue()
     self.gap = 1
Example #14
0
 def setUp(self):
     self.client = pymongo.Connection(os.environ.get("TEST_MONGODB"))
     self.db = self.client.test_queue
     self.queue = MongoQueue(self.db.queue_1, "consumer_1")
Example #15
0
class MongoQueueTest(TestCase):
    def setUp(self):
        self.client = pymongo.Connection(os.environ.get("TEST_MONGODB"))
        self.db = self.client.test_queue
        self.queue = MongoQueue(self.db.queue_1, "consumer_1")

    def tearDown(self):
        self.client.drop_database("test_queue")

    def assert_job_equal(self, job, data):
        for k, v in data.items():
            self.assertEqual(job.payload[k], v)

    def test_put_next(self):
        data = {
            "context_id": "alpha",
            "data": [1, 2, 3],
            "more-data": time.time()
        }
        self.queue.put(dict(data))
        job = self.queue.next()
        self.assert_job_equal(job, data)

    def test_get_empty_queue(self):
        job = self.queue.next()
        self.assertEqual(job, None)

    def test_priority(self):
        self.queue.put({"name": "alice"}, priority=1)
        self.queue.put({"name": "bob"}, priority=2)
        self.queue.put({"name": "mike"}, priority=0)

        self.assertEqual(["bob", "alice", "mike"], [
            self.queue.next().payload['name'],
            self.queue.next().payload['name'],
            self.queue.next().payload['name']
        ])

    def test_complete(self):
        data = {
            "context_id": "alpha",
            "data": [1, 2, 3],
            "more-data": datetime.now()
        }

        self.queue.put(data)
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        job.complete()
        self.assertEqual(self.queue.size(), 0)

    def test_release(self):
        data = {
            "context_id": "alpha",
            "data": [1, 2, 3],
            "more-data": time.time()
        }

        self.queue.put(data)
        job = self.queue.next()
        job.release()
        self.assertEqual(self.queue.size(), 1)
        job = self.queue.next()
        self.assert_job_equal(job, data)

    def test_error(self):
        pass

    def test_progress(self):
        pass

    def test_stats(self):

        for i in range(5):
            data = {
                "context_id": "alpha",
                "data": [1, 2, 3],
                "more-data": time.time()
            }
            self.queue.put(data)
        job = self.queue.next()
        job.error("problem")

        stats = self.queue.stats()
        self.assertEqual({
            'available': 5,
            'total': 5,
            'locked': 0,
            'errors': 0
        }, stats)

    def test_context_manager_error(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        try:
            with job as data:
                self.assertEqual(data['payload']["foobar"], 1)
                # Item is returned to the queue on error
                raise SyntaxError
        except SyntaxError:
            pass

        job = self.queue.next()
        self.assertEqual(job.data['attempts'], 1)

    def test_context_manager_complete(self):
        self.queue.put({"foobar": 1})
        job = self.queue.next()
        with job as data:
            self.assertEqual(data['payload']["foobar"], 1)
        job = self.queue.next()
        self.assertEqual(job, None)
Example #16
0
class MongoQueueRetryTimeTests(TestCase):

    def setUp(self):
        self.client = pymongo.MongoClient()
        self.db = self.client.test_queue
        self.queue = MongoQueue(self.db.queue_1, "consumer_1", retry_after=2)

    def tearDown(self):
        self.client.drop_database("test_queue")

    def test_complete_scenario(self):
        self.queue.put({"message": "hello"})

        job = self.queue.next()
        with job as data:
            raise Exception

        time.sleep(1)
        job2 = self.queue.next()
        self.assertEqual(job2, None)
        time.sleep(1.1)

        job3 = self.queue.next()
        with job as data:
            self.assertEqual(data["message"], "hello")
        job4 = self.queue.next()
        self.assertEqual(job4, None)

    def test_error_with_increased_retry(self):
        self.queue.put({"message": "hello"})
        job = self.queue.next()
        job.error(custom_retry_after=3)

        time.sleep(1)
        job = self.queue.next()
        self.assertEqual(job, None)

        time.sleep(2.1)

        job2 = self.queue.next()
        with job2 as data:
            self.assertEqual(data["message"], "hello")

        job3 = self.queue.next()
        self.assertEqual(job3, None)

    def test_release_with_increased_retry(self):
        self.queue.put({"message": "hello"})
        job = self.queue.next()
        job.release(custom_retry_after=3)

        time.sleep(1)
        job = self.queue.next()
        self.assertEqual(job, None)

        time.sleep(2.1)

        job2 = self.queue.next()
        with job2 as data:
            self.assertEqual(data["message"], "hello")

        job3 = self.queue.next()
        self.assertEqual(job3, None)
Example #17
0
 def setUp(self):
     self.client = pymongo.MongoClient()
     self.db = self.client.test_queue
     self.queue = MongoQueue(self.db.queue_1, "consumer_1", retry_after=2)
Example #18
0
class Machine(threading.Thread):
    '''A machine represents one worker on the network.
    Depending on the incoming task, will spawn an appropriate task processor.
    It WILL NOT add tasks to the queue.'''

    def __init__(self, name):
        threading.Thread.__init__(self, name=name)
        self.exitFlag = 0
        self.setDaemon(True)

        #  Mongo connection
        mongo = MongoClient('mongodb://localhost:27017/')
        self.db = mongo.crooshdb
        self.taskqueue = MongoQueue(
            self.db.taskqueue,
            consumer_id=name,
            timeout=300,
            max_attempts=3)

        # Active tasks
        self.activeTask = None
        self.taskRunner = None
        self.acceptsTasks = True
        self.register()

    def register(self):
        '''Adds machine to list of active machines on DB'''
        self.db.machines.find_and_modify(
            query={"_id": self.name},
            update={"$setOnInsert": {
                "_id": self.name,
                "activeTask": None,
                "acceptsTasks": True
            }},
            upsert=True,
            new=True
        )

    def run(self):
        self.listen()

    def close(self):
        self.exitFlag = 1
        print("Cleanup complete")

    def listen(self):
        print("Machine listening for tasks...")
        while not self.exitFlag:
            self.pollTasks()
            time.sleep(2)
        self.join()

    def pollTasks(self):
        if not self.taskRunner:
            task = self.taskqueue.next()
            if task:
                # Set active task for machine
                taskWrapper = MayaRenderTask.buildFromQueue(task)
                print("Running task {0}".format(taskWrapper.jobID))

                self.updateMachineStatus(task)
                self.taskRunner = MayaRenderTaskRunner(taskWrapper)

                # Clear task for machine on completion and reregister to queue
                self.updateMachineStatus(None)
                self.taskRunner = None

    def updateMachineStatus(self, task=None):
        self.acceptsTasks = False if task else True
        self.db.machines.find_and_modify(
            query={"_id": self.name},
            update={
                "activeTask": task.job_id if task else None,
                "acceptsTasks": self.acceptsTasks
            }
        )