예제 #1
0
    def __init__(self, monque, task, args, kwargs, config):
        self.config = Configuration(**config)
        self.config.parent = task.config

        self.monque = monque
        self.task = task
        self.name = task.get_name()
        self.args = args
        self.kwargs = kwargs

        self.collection = None
        self.id = None
        self.doc = None

        self.logger = self.task.logger

        self.queue = self.config.get("queue", "default")

        self.priority = self.config.get("priority", None)

        self.start_time = self.get_start_time()
        self.result = None

        self.max_in_queue = int(self.config.get("max_in_queue", 0))
        self.max_running = int(self.config.get("max_running", 0))
        self.must_be_unique = self.config.get("must_be_unique", False)
        self.unique_kwargs = self.config.get("unique_kwargs", None)
예제 #2
0
    def connect(self, init=False):
        self.logger.debug("Monque.connect()")
        if not self.connection:
            host = self.config.get("mongo.host", "localhost")
            if ":" in host:
                host, port = host.split(":", 1)
                port = int(port)
            else:
                port = int(self.config.get("mongo.port", 27017))
            self.connection = pymongo.MongoClient(host, port)

        db_name = self.config.get("mongo.db", "monque")
        self.db = self.connection[db_name]

        self.global_config = Configuration.get_global(self.db, self.config.get("mongo.config", "config"))
        self.config.parent = self.global_config

        self.get_collections(init=init)
예제 #3
0
    def init_config(self,kwargs):
        """
        Create a config object that encapsulates all the config settings
        for this task, including those inherited from base classes, from the
        queue, etc
        """

        self.config = Configuration(**kwargs)
        self.config.parent = self.monque.config

        # Iterate through the list of base classes to get configs from class members.
        # Iteration is done in reverse order, so the lowest in the class hierarchy
        # will 'stick' in the final config
        mro = [x for x in self.__class__.__mro__]
        mro = filter(lambda cls: cls != object,mro)
        mro.reverse()
        for cls in mro:
            #print "mro cls=%s" % (cls)
            for k,v in cls.__dict__.iteritems():
                if k.startswith('__'):
                    continue
                self.config.set(k,v)
예제 #4
0
class Task(object):
    """
    A Task object can be executed remotely via the queue, or can be executed directly
    as a callable or just invoking the run() method.

    A Task object is actually more of an 'actor', in that the remote workers will only
    instantiate the Task class once, and reuse that instance every time it is processing
    a Task of that type. So implementation should be careful not to store state in the
    instance (except perhaps cached data to be reused for future invocations of the task)
    """

    def __init__(self,**kwargs):
        self.monque = kwargs.pop('monque',None)
        if not self.monque:
            self.monque = monque.instance.current_instance
        if not self.monque:
            raise Task.NoQueue("No Monque queue instance")

        self.logger = kwargs.pop('logger',None)
        if not self.logger:
            self.logger = self.monque.get_logger() if self.monque else \
                logging.getLogger('monque.task')

        self.init_config(kwargs)

    def init_config(self,kwargs):
        """
        Create a config object that encapsulates all the config settings
        for this task, including those inherited from base classes, from the
        queue, etc
        """

        self.config = Configuration(**kwargs)
        self.config.parent = self.monque.config

        # Iterate through the list of base classes to get configs from class members.
        # Iteration is done in reverse order, so the lowest in the class hierarchy
        # will 'stick' in the final config
        mro = [x for x in self.__class__.__mro__]
        mro = filter(lambda cls: cls != object,mro)
        mro.reverse()
        for cls in mro:
            #print "mro cls=%s" % (cls)
            for k,v in cls.__dict__.iteritems():
                if k.startswith('__'):
                    continue
                self.config.set(k,v)


    def get_name(self):
        return self.__class__.__name__

    def __call__(self,*args, **kwargs):
        return self.run(*args,**kwargs)

    def run(self, *args, **kwargs):
        """ This is to be implemented by subclasses """
        raise NotImplementedError()

    def is_run_by_worker(self):
        from monque.worker import Worker
        return isinstance(self.monque,Worker)

    def post(self,args=[],kwargs={},**config):
        """
        Submit this task to the queue to be executed by a (remote) worker.
        Result is a TaskRemote instance that can be used to monitor progress and
        get results back.
        """
        return self.monque.post(self,args,kwargs,config)



    @classmethod
    def find_task_class(klass,class_name):
        all_subclasses = klass.find_all_task_classes()

        # Find fullname match if possible:
        for sub in all_subclasses:
            full_name = sub.__module__ + '.' + sub.__name__
            if full_name == class_name:
                return sub

        # Find short name match:
        short = class_name.split('.')[-1]
        for sub in all_subclasses:
            if sub.__name__ == short:
                return sub

        raise Task.ClassNotFound(class_name)


    @classmethod
    def find_all_task_classes(klass):
        all = []
        subs = [s for s in klass.__subclasses__()]
        while subs:
            sub = subs.pop(0)
            all.append(sub)
            for subsub in sub.__subclasses__():
                subs.append(subsub)

        not_obsolete = filter(lambda sub: '__obsolete__' not in sub.__dict__,all)

        return sorted(not_obsolete)


    class NoQueue(Exception):
        pass

    class ClassNotFound(Exception):
        pass
예제 #5
0
class PostedTask(object):
    """
    In-memory representation of a task posted (or to be posted) to a queue.
    """

    def __init__(self, monque, task, args, kwargs, config):
        self.config = Configuration(**config)
        self.config.parent = task.config

        self.monque = monque
        self.task = task
        self.name = task.get_name()
        self.args = args
        self.kwargs = kwargs

        self.collection = None
        self.id = None
        self.doc = None

        self.logger = self.task.logger

        self.queue = self.config.get("queue", "default")

        self.priority = self.config.get("priority", None)

        self.start_time = self.get_start_time()
        self.result = None

        self.max_in_queue = int(self.config.get("max_in_queue", 0))
        self.max_running = int(self.config.get("max_running", 0))
        self.must_be_unique = self.config.get("must_be_unique", False)
        self.unique_kwargs = self.config.get("unique_kwargs", None)

    def get_start_time(self):
        absolute = self.config.get("at")
        if absolute:
            if isinstance(absolute, datetime.datetime):
                return absolute
            elif type(absolute) == int or type(absolute) == float:
                return datetime.datetime.fromtimestamp(absolute)
            raise ValueError("Unrecognized format of 'at': %s" % (absolute))

        delay = self.config.get("delay")
        if delay:
            if isinstance(absolute, datetime.timedelta):
                return datetime.datetime.utcnow() + delay
            elif type(delay) == int or type(delay) == float:
                return datetime.datetime.utcnow() + datetime.timedelta(seconds=delay)
            raise ValueError("Unrecognized format of 'delay': %s"(delay))

        return None

    def save_into(self, collection):
        """
        This is where the task actually gets inserted into the collection.
        TODO: options for write concern, etc?
        """
        # self.logger.debug("Task save_into() collection=%s task=%s args=%s kwargs=%s" %
        #                  (collection,self.name,self.args,self.kwargs))
        if not self.doc:
            self.doc = self.serialize()
        # self.logger.debug("Task save_into() doc=%s" % (self.doc))
        collection.save(self.doc)
        self.id = self.doc["_id"]
        # self.logger.debug("Task save_into() id=%s" % (self.id))

    def mark_running(self):
        if self.doc:
            self.doc["status"] = "running"
            self.doc["started_at"] = datetime.datetime.utcnow()

        if self.collection and self.id:
            self.collection.find_and_modify(
                query={"_id": self.id},
                update={"$set": {"status": self.doc["status"], "started_at": self.doc["started_at"]}},
            )

    def remove(self):
        if self.collection and self.id:
            self.collection.find_and_modify(query={"_id": self.id}, remove=True)

    def serialize(self):
        """
        Return a serialized version (dict) of the task, as it is to be stored
        in the collection
        """
        doc = {
            "name": self.name,
            "class": self.task.__module__ + "." + self.task.__class__.__name__,
            "queue": self.queue,
            "payload": {"args": self.args, "kwargs": self.kwargs},
            "constraints": {},
            "created_at": datetime.datetime.utcnow(),
            "submitted_at": datetime.datetime.utcnow(),
            "status": "pending",
        }

        # Add constraints:
        if self.priority is not None:
            doc["constraints"]["priority"] = self.priority
        if self.start_time:
            doc["constraints"]["start_time"] = self.start_time
        if self.max_in_queue:
            doc["constraints"]["max_in_queue"] = self.max_in_queue
        if self.max_running:
            doc["constraints"]["max_running"] = self.max_running
        if self.must_be_unique:
            doc["constraints"]["must_be_unique"] = True
            if self.unique_kwargs:
                doc["constraints"]["unique_kwargs"] = self.unique_kwargs

        return doc

    def notify_workers(self, collection):
        """
        Add a doc to the collection (the activity log) that indicates new tasks in the queue,
        so workers that are tailing the collection can immediately pick it up
        """
        collection.insert({"task": self.id, "queue": self.queue})

    def notify_results(self, collection):
        """
        Add a doc to the collection (the activity log) that indicates task results are available,
        so clients waiting for the results can immediately pick it up
        """
        collection.insert({"result": self.id})

    @classmethod
    def get_next(klass, **kwargs):
        collection = kwargs.pop("collection")
        queue = kwargs.pop("queue", None)
        worker = kwargs.pop("worker", None)

        # Set up the queury filters:
        query = {"status": "pending"}

        if queue:
            if type(queue) == str:
                query["queue"] = queue
            elif type(queue) == list:
                if len(queue) == 1:
                    query["queue"] = queue[0]
                else:
                    query["queue"] = {"$in": queue}

        now = datetime.datetime.utcnow()
        query["$or"] = [{"constraints.start_time": {"$exists": False}}, {"constraints.start_time": {"$lte": now}}]

        # As soon as it is picked up, mark it as 'taken',
        # which is the pre-cursor state to 'running',
        # in which pre-run conditions are checked, etc
        update = {"$set": {"status": "taken", "taken_at": datetime.datetime.utcnow(), "worker": worker}}

        found = collection.find_and_modify(
            query=query,
            update=update,
            new=True,
            sort=[("constraints.priority", pymongo.DESCENDING), ("_id", pymongo.ASCENDING)],
        )

        return found

    def unget(self):
        """
        Put a task back into the queue that was 'incorrectly' taken.
        Usually this is for a task that is taken, then one or more pre-execution tasks
        fails (e.g. too many running tasks of a given type)
        """
        if not self.collection or not self.id:
            return

        self.logger.debug("Task unget() id=%s" % (self.id))

        self.collection.find_and_modify(query={"_id": self.id}, update={"$et": {"status": "pending"}})

    def wait(self, timeout=None):
        """
        Wait for the results of the task to be posted to the result queue.
        If timeout (given in seconds) is not None, then wait at least that long
        for the result. If no result is available within that time, returns None.
        If the result is received, the result is returned back.
        """

        query = {"result": self.id}

        expire_at = None
        if timeout:
            expire_at = time.time() + timeout

        while expire_at is None or time.time() < expire_at:
            tail = self.monque.activity_log.find(query, tailable=True, await_data=False)
            got = False
            for doc in tail:
                got = True

            if got:
                break

            time.sleep(0.1)

        result = self.monque.results_collection.find_one(self.id)
        if result:
            return self.handle_result(result)

        return None

    def handle_result(self, result):
        self.result = result

        status = self.result.get("status", None)

        if status == "completed":
            return self.result["result"]

        elif status == "failed":
            exception = self.result["exception"]
            raise PostedTask.RuntimeException(exception)

    class RuntimeException(Exception):
        pass