Exemple #1
0
 def reap_workers(self):
     """\
     Reap workers to avoid zombie processes
     """
     try:
         while True:
             wpid, status = os.waitpid(-1, os.WNOHANG)
             if not wpid:
                 break
             if self.reexec_pid == wpid:
                 self.reexec_pid = 0
             else:
                 # A worker said it cannot boot. We'll shutdown
                 # to avoid infinite start/stop cycles.
                 exitcode = status >> 8
                 if exitcode == self.WORKER_BOOT_ERROR:
                     reason = "Worker failed to boot."
                     raise HaltServer(reason, self.WORKER_BOOT_ERROR)
                 if exitcode == self.APP_LOAD_ERROR:
                     reason = "App failed to load."
                     raise HaltServer(reason, self.APP_LOAD_ERROR)
                 worker = self.WORKERS.pop(wpid, None)
                 if not worker:
                     continue
                 worker.tmp.close()
     except OSError as e:
         if e.errno != errno.ECHILD:
             raise
Exemple #2
0
    def reap_workers(self):
        """\
        Reap workers to avoid zombie processes
        """
        try:
            while True:
                wpid, status = os.waitpid(-1, os.WNOHANG)
                if not wpid:
                    break
                if self.reexec_pid == wpid:
                    self.reexec_pid = 0
                else:
                    # A worker was terminated. If the termination reason was
                    # that it could not boot, we'll shut it down to avoid
                    # infinite start/stop cycles.
                    exitcode = status >> 8
                    if exitcode == self.WORKER_BOOT_ERROR:
                        reason = "Worker failed to boot."
                        raise HaltServer(reason, self.WORKER_BOOT_ERROR)
                    if exitcode == self.APP_LOAD_ERROR:
                        reason = "app failed to load."
                        raise HaltServer(reason, self.APP_LOAD_ERROR)

                    worker = self.WORKERS.pop(wpid, None)
                    if not worker:
                        continue
                    worker.tmp.close()
                    self.cfg.child_exit(self, worker)
        except OSError as e:
            if e.errno != errno.ECHILD:
                raise
Exemple #3
0
def loadModel():
    # look first for a .h5 file in argv,
    # then look for FF_MODEL in environment
    # then fail
    model = None
    global SESSION
    SESSION = tf.Session()
    K.set_session(SESSION)
    with SESSION.as_default():
        for f in sys.argv[1:]:
            if '.h5' in f:
                logger.info('loading keras model from %s' % f)
                model = load_model(f)
        if model is None and os.environ.get('FF_MODEL', None) is not None:
            logger.info('loading keras model from %s' % os.environ['FF_MODEL'])
            model = load_model(os.environ['FF_MODEL'])
        else:
            msg = 'cannot find model file in sys.argv or in environment var "FF_MODEL"'
            logger.error(msg)
            raise HaltServer(msg)

    # necessary for multi-threads, for some reason
    model._make_predict_function()
    #GRAPH.finalize() # avoid modifications
    return model
Exemple #4
0
    def reap_workers(self, abc=''):
        """Reap workers to avoid zombie processes
        """
        self._log('%s reap_workers' % abc)
        try:
            while True:
                # -1: meaning wait for any child process, 不一定都是
                # workers, 也可能是 reexec 中 fork 得到的子进程
                wpid, status = os.waitpid(-1, os.WNOHANG)
                if not wpid:
                    break

                self._log('%s reap_workers wpid=%s reexec_pid=%s master_pid=%s'
                          % (abc, wpid, self.reexec_pid, self.master_pid))
                # reexec 中 fork 得到的 子进程
                if self.reexec_pid == wpid:
                    self.reexec_pid = 0
                else:
                    # A worker was terminated. If the termination reason was
                    # that it could not boot, we'll shut it down to avoid
                    # infinite start/stop cycles.
                    exitcode = status >> 8
                    if exitcode == self.WORKER_BOOT_ERROR:
                        reason = "Worker failed to boot."
                        raise HaltServer(reason, self.WORKER_BOOT_ERROR)
                    if exitcode == self.APP_LOAD_ERROR:
                        reason = "App failed to load."
                        raise HaltServer(reason, self.APP_LOAD_ERROR)

                    worker = self.WORKERS.pop(wpid, None)
                    if not worker:
                        continue
                    worker.tmp.close()
                    self.cfg.child_exit(self, worker)
        except OSError as e:
            if e.errno != errno.ECHILD:
                raise