Esempio n. 1
0
 def _timed(*args, **kw):
     from powerhose import logger
     start = time.time()
     try:
         return func(*args, **kw)
     finally:
         logger.debug('%.4f' % (time.time() - start))
Esempio n. 2
0
    def run(self):
        self.running = True
        self.register()
        self.pinger.start()

        while self.running and not self.pinger.unresponsive:
            try:
                events = dict(self.poller.poll(self.timeout))
            except zmq.ZMQError:
                break

            for socket in events:
                msg = unserialize(socket.recv())

                if msg[0] == 'JOB':
                    # do the job and send the result
                    start = time.time()
                    try:
                        res = self.target(msg[1:])
                    except Exception, e:
                        # XXX log the error
                        res = str(e)
                    logger.debug('%.6f' % (time.time() - start))
                    socket.send(serialize("JOBRES", msg[1], res))
                else:
                    socket.send('ERROR')
Esempio n. 3
0
    def acquire(self, timeout=None):
        """Acquire a worker from the queue and remove it.

        Should be used with :func:`release`.

        Options:

        - **timeout**: time in second before raising a TimeoutError
          exception. Defaults to the value provided in the class
          initialization.
        """
        logger.debug('Trying to acquire a worker')
        if timeout is None:
            timeout = self.timeout

        worker = None

        # wait for timeout seconds
        try:
            while worker is None:
                worker = self._available.get(timeout=timeout)
                if worker.identity not in self._workers:
                    # it has been removed
                    self.delete(worker.identity)
                    worker = None

        except Empty:
            raise TimeoutError("Could not get a worker")

        logger.debug('we got one \o/')
        return worker
Esempio n. 4
0
 def failed(self):
     logger.debug("ping failed let's die")
     try:
         self._msg('REMOVE', 'REMOVED')
     except RegisterError:
         pass
     self.stop()
Esempio n. 5
0
    def run(self):
        self.running = True
        self.register()
        self.pinger.start()

        while self.running and not self.pinger.unresponsive:
            try:
                events = dict(self.poller.poll(self.timeout))
            except zmq.ZMQError:
                break

            for socket in events:
                msg = unserialize(socket.recv())

                logger.debug(msg)
                if msg[0] == 'JOB':
                    # do the job and send the result
                    start = time.time()
                    try:
                        res = self.target(msg[1:])
                    except Exception, e:
                        # XXX log the error
                        res = str(e)
                    logger.debug('%.6f' % (time.time() - start))
                    socket.send(serialize("JOBRES", msg[1], res))
                else:
                    socket.send('ERROR')
Esempio n. 6
0
 def _timed(*args, **kw):
     from powerhose import logger
     start = time.time()
     try:
         return func(*args, **kw)
     finally:
         logger.debug('%.4f' % (time.time() - start))
Esempio n. 7
0
    def execute(self, job, timeout=1.):
        """Execute a job and return the result.

        Options:

        - **job**: a :class:`Job` instance.
        - **timeout**: the maximum allowed time in seconds. (default: 1)

        If the job fails to run, this method may raise one of these
        exceptions:

        - :class:`TimeoutError`: timed out.
        - :class:`ExecutionError`: the worker has failed.

        In case of an execution error, the exception usually holds
        more details on the failure.
        """
        from powerhose import logger
        e = None

        for i in range(self.retries):
            try:
                return self._execute(job, timeout)
            except (TimeoutError, ExecutionError), e:
                logger.debug(str(e))
                logger.debug('retrying - %d' % (i + 1))
Esempio n. 8
0
 def failed(self):
     logger.debug("ping failed let's die")
     try:
         self._msg('REMOVE', 'REMOVED')
     except RegisterError:
         pass
     self.stop()
Esempio n. 9
0
 def stop(self):
     """Stops the registration loop.
     """
     if not self.started:
         return
     logger.debug('Stopping registration at ' + self.endpoint)
     self.registration.stop()
     self.started = False
Esempio n. 10
0
 def start(self):
     """Starts the registration loop.
     """
     if self.started:
         return
     logger.debug('Starting registration at ' + self.endpoint)
     self.registration.start()
     self.started = True
Esempio n. 11
0
 def stop(self):
     if not self.running:
         return
     logger.debug('stopping pinger')
     self.running = False
     try:
         self.join()
     except RuntimeError:
         pass
Esempio n. 12
0
    def release(self, worker):
        """Put back the worker in the queue.

        Options:

        - **worker**: the worker to put back.

        Should be used with :func:`acquire`.
        """
        logger.debug('releasing the worker')
        self._available.put(worker)
Esempio n. 13
0
    def execute(self, job_id, job_data, timeout=1.):
        from powerhose import logger
        e = None

        for i in range(self.retries):
            try:

                return self._execute(job_id, job_data, timeout)
            except (TimeoutError, ExecutionError), e:
                logger.debug(str(e))
                logger.debug('retrying - %d' % (i + 1))
Esempio n. 14
0
    def execute(self, job_id, job_data, timeout=1.):
        from powerhose import logger
        e = None

        for i in range(self.retries):
            try:

                return self._execute(job_id, job_data, timeout)
            except (TimeoutError, ExecutionError), e:
                logger.debug(str(e))
                logger.debug('retrying - %d' % (i + 1))
Esempio n. 15
0
 def __init__(self, identity, socket, locker, fail_callable,
              duration=5., max_fails=10.):
     threading.Thread.__init__(self)
     self.duration = duration
     self.identity = identity
     logger.debug('starting pinger from %s' % self.identity)
     self.socket = socket
     self.locker = locker
     self.running = False
     self.fail_callable = fail_callable
     self.max_fails = max_fails
     self.poller = zmq.Poller()
     self.poller.register(self.socket, zmq.POLLIN)
     self.disabled = False
     self.unresponsive = False
Esempio n. 16
0
    def run(self):
        self.running = True
        num_failed = 0

        while self.running:
            if num_failed >= self.max_fails:
                self.unresponsive = True
                self.running = False
                break

            if self.disabled:
                time.sleep(1.)
                continue

            with self.locker:
                try:
                    data = serialize('PING', self.identity)
                    logger.debug('[pinger] Pinging with ' + data)
                    self.socket.send(data, zmq.NOBLOCK)
                except zmq.ZMQError, e:
                    num_failed += 1
                    logger.debug('[pinger] ' + str(e))
                    continue


                try:
                    events = dict(self.poller.poll(self.duration * 1000))
                except zmq.ZMQError, e:
                    self.num_failed += 1
                    logger.debug('[pinger] ' + str(e))
                    continue

                if len(events) == 0:
                    logger.debug('[pinger] ' + 'no pong!')
                    self.fail_callable()
                    num_failed += 1
                else:
                    for socket in events:
                        res = socket.recv()
                        logger.debug('[pinger] ' + 'got ' + res)
                        if res != 'PONG':
                            self.running = False
                            self.fail_callable()
                            num_failed += 1
Esempio n. 17
0
    def __init__(self, endpoint, workers_cmd, num_workers=5, working_dir=None,
                 circus_controller='tcp://127.0.0.1:555',
                 circus_pubsub_endpoint='tcp://127.0.0.1:5556', env=None):

        # initialisation
        pid = str(thread.get_ident())
        self.endpoint = endpoint.replace('$PID', pid)
        self.workers_cmd = workers_cmd.replace('$PID', pid)
        circus_controller = circus_controller.replace('$PID', pid)
        circus_pubsub_endpoint = circus_pubsub_endpoint.replace('$PID', pid)
        envdict = {}

        if env is not None:
            if isinstance(env, dict):
                envdict = env
            else:
                for pair in env.split(';'):
                    key, value = pair.split('=', 1)
                    envdict[key] = value

        # register the runner and the workers in the global vars.
        if self.endpoint not in _runners:
            _runners[self.endpoint] = JobRunner(self.endpoint)
            _workers[self.endpoint] = CryptoWorkers(self.workers_cmd,
                                                    num_workers=num_workers,
                                                    working_dir=working_dir,
                                                    controller=circus_controller,
                                                    pubsub_endpoint=circus_pubsub_endpoint,
                                                    env=envdict)
        self.runner = _runners[self.endpoint]
        logger.debug('Starting powerhose master')

        # start the runner ...
        self.runner.start()
        time.sleep(.5)
        self.workers = _workers[self.endpoint]

        # ... and the workers
        self.workers.start()

        # wait a bit
        time.sleep(1.)
Esempio n. 18
0
def stop_runners():
    logger.debug("stop_runner starts")

    for workers in _workers.values():
        workers.stop()

    logger.debug("workers killed")

    for runner in _runners.values():
        logger.debug('Stopping powerhose master')
        runner.stop()

    logger.debug("stop_runner ends")
Esempio n. 19
0
    def __init__(self, endpoint, workers_cmd, num_workers=5, working_dir=None,
                 env=None):

        self.endpoint = endpoint
        self.workers_cmd = workers_cmd
        if env is not None:
            envdict = {}
            for pair in env.split(';'):
                key, value = pair.split('=', 1)
                envdict[key] = value

        if self.endpoint not in _runners:
            _runners[self.endpoint] = JobRunner(self.endpoint)
            _workers[self.endpoint] = CryptoWorkers(self.workers_cmd,
                                                    num_workers=num_workers,
                                                    working_dir=working_dir,
                                                    env=envdict)
        self.runner = _runners[self.endpoint]
        logger.debug('Starting powerhose master')
        self.runner.start()
        time.sleep(.5)
        self.workers = _workers[self.endpoint]
        self.workers.run()
Esempio n. 20
0
    def _execute(self, job, timeout=1.):
        worker = None
        timeout *= 1000.   # timeout is in ms
        data = serialize("JOB", job.serialize())
        logger.debug('Lets run that job')
        try:
            logger.debug('getting a worker')

            with self.workers.get_context() as worker:
                try:
                    worker.send(data, zmq.NOBLOCK)
                except zmq.ZMQError, e:
                    raise ExecutionError(str(e))

                poller = zmq.Poller()
                poller.register(worker, zmq.POLLIN)

                try:
                    events = dict(poller.poll(timeout))
                except zmq.ZMQError, e:
                    raise ExecutionError(str(e))

                if events == {}:
                    raise TimeoutError()

                for socket in events:
                    try:
                        msg = unserialize(socket.recv())
                    except zmq.ZMQError, e:
                        raise ExecutionError(str(e))

                    if msg[0] == 'JOBRES':
                        # we got a result
                        return msg[-1]
                    else:
                        raise NotImplementedError(str(msg))
Esempio n. 21
0
 def stop(self):
     """Stop the thread -- thus the registration
     """
     logger.debug('Stopping [workermgr]')
     self.alive = False
     self.join()
Esempio n. 22
0
 def stop(self):
     logger.debug('Stopping powerhose workers')
     self.workers.stop()
     self.join()
Esempio n. 23
0
 def run(self):
     logger.debug('Starting powerhose workers')
     self.workers.run()
Esempio n. 24
0
 def stop(self):
     logger.debug('stopping workers')
     self.trainer.stop()
     logger.debug('stopping workers done')
Esempio n. 25
0
 def run(self):
     logger.debug('starting workers')
     self.trainer.start()
Esempio n. 26
0
 def stop(self):
     logger.debug('stopping workers')
     self.arbiter.stop()
     logger.debug('stopping workers done')
Esempio n. 27
0
                    events = dict(poller.poll(timeout))
                except zmq.ZMQError, e:
                    raise ExecutionError(str(e))

                if events == {}:
                    raise TimeoutError()

                for socket in events:
                    try:
                        msg = unserialize(socket.recv())
                    except zmq.ZMQError, e:
                        raise ExecutionError(str(e))

                    if msg[0] == 'JOBRES':
                        # we got a result
                        return msg[-1]
                    else:
                        raise NotImplementedError(str(msg))

        except Exception, e:
            logger.debug('something went wrong')

            if worker is not None:
                # killing this worker - it can come back on the next ping
                self.workers.delete(worker.identity)

            exc_type, exc_value, exc_traceback = sys.exc_info()
            exc = traceback.format_tb(exc_traceback)
            exc.insert(0, str(e))
            raise ExecutionError('\n'.join(exc))
Esempio n. 28
0
 def stop(self):
     logger.debug('Stopping [workermgr]')
     self.alive = False
     self.join()
Esempio n. 29
0
    def run(self):
        self.alive = True

        # channel to communicate with the workers
        logger.debug('Starting [workermgr]')
        client = self.context.socket(zmq.REP)
        client.identity = 'master'
        client.bind(self.endpoint)
        poller = zmq.Poller()
        poller.register(client, zmq.POLLIN)
        poll_timeout = 1000

        while self.alive:
            try:
                events = dict(poller.poll(poll_timeout))
            except zmq.ZMQError, e:
                logger.debug("The poll failed")
                logger.debug(str(e))
                break

            for socket in events:
                msg = unserialize(socket.recv())

                if len(msg) < 2:
                    # XXX log
                    socket.send('ERROR')

                if msg[-2] == 'PING':
                    logger.debug("[workermgr] Got a PING")
                    if msg[-1] not in self.workers:
                        name = msg[-1]
                        logger.debug("Registered " + name)
                        # keep track of that worker
                        work = self.context.socket(zmq.REQ)
                        work.connect(name)
                        work.identity = name
                        self.workers.add(work)

                    # in any case we pong back
                    logger.debug("[workermgr] sent a PONG")
                    socket.send('PONG')
                elif msg[-2] == 'REMOVE':
                    if msg[-1] in self.workers:
                        logger.debug("[workermgr] Removing` " + msg[-1])
                        self.workers.delete(msg[-1])
                    socket.send('REMOVED')
                else:
                    logger.debug('Error')
                    socket.send('ERROR')

            time.sleep(.1)
Esempio n. 30
0
 def run(self):
     logger.debug('starting workers')
     self.trainer.start()
Esempio n. 31
0
 def start(self):
     if self.started:
         return
     logger.debug('Starting registration at ' + self.endpoint)
     self.registration.start()
     self.started = True
Esempio n. 32
0
 def stop(self):
     if not self.started:
         return
     logger.debug('Stopping registration at ' + self.endpoint)
     self.registration.stop()
     self.started = False
Esempio n. 33
0
 def stop(self):
     logger.debug('stopping workers')
     self.trainer.stop()
     logger.debug('stopping workers done')
Esempio n. 34
0
 def run(self):
     logger.debug('starting workers')
     self.arbiter.start()
     logger.debug('workers Stopped')