Beispiel #1
0
class LocalSystem:
    """A ResourceCoordinator that uses the current machine,
    up to max_cores_to_use cores of it

    It uses multiprocessing and the LocalSlave
    """
    def __init__(self, max_cores_to_use=util.CPUs(), profile=False):
        self.max_cores_to_use = max_cores_to_use  # todo: update to local cpu count...
        self.slave = LocalSlave(self)
        self.cores_available = max_cores_to_use
        self.physical_memory, self.swap_memory = get_memory_available()
        self.timeout = 5
        self.profile = profile

    def spawn_slaves(self):
        return {'LocalSlave': self.slave}

    def get_resources(self):
        res = {
                'LocalSlave': {  # this is always the maximum available - the graph is handling the bookeeping of running jobs
                    'cores': self.cores_available,
                    'physical_memory': self.physical_memory,
                    'swap_memory': self.swap_memory,
                    }
                }
        logger.info('get_resources, result %s - %s' % (id(res), res))
        return res

    def enter_loop(self):
        self.spawn_slaves()
        self.que = MPQueueFixed()
        logger.info("Starting first batch of jobs")
        self.pipegraph.start_jobs()
        while True:
            self.slave.check_for_dead_jobs(
            )  # whether time out or or job was done, let's check this...
            self.see_if_output_is_requested()
            try:
                logger.info("Listening to que")
                slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get(
                    block=True, timeout=self.timeout)  # was there a job done?t
                logger.info("Job returned: %s, was_ok: %s" %
                            (job_id_done, was_ok))
                logger.info("Remaining in que (approx): %i" % self.que.qsize())
                job = self.pipegraph.jobs[job_id_done]
                job.was_done_on.add(slave_id)
                job.stdout = stdout
                job.stderr = stderr
                job.exception = exception
                job.trace = trace
                job.failed = not was_ok
                job.stop_time = time.time()
                if job.start_time:
                    logger.info("%s runtime: %is" %
                                (job_id_done, job.stop_time - job.start_time))
                if job.failed:
                    try:
                        if job.exception.startswith('STR'.encode('UTF-8')):
                            job.exception = job.exception[3:]
                            raise pickle.UnpicklingError(
                                "String Transmission"
                            )  # what an ugly control flow...
                        logger.info("Before depickle %s" % type(exception))
                        job.exception = pickle.loads(exception)
                        logger.info("After depickle %s" % type(job.exception))
                        logger.info("exception stored at %s" % (job))
                    except (
                            pickle.UnpicklingError, EOFError
                    ):  # some exceptions can't be pickled, so we send a string instead
                        pass
                    if job.exception:
                        logger.info("Exception: %s" % repr(exception))
                        logger.info("Trace: %s" % trace)
                    logger.info("stdout: %s" % stdout)
                    logger.info("stderr: %s" % stderr)
                if not new_jobs is False:
                    if not job.modifies_jobgraph():
                        job.exception = ppg_exceptions.JobContractError(
                            "%s created jobs, but was not a job with modifies_jobgraph() returning True"
                            % job)
                        job.failed = True
                    else:
                        new_jobs = pickle.loads(new_jobs)
                        logger.info("We retrieved %i new jobs from %s" %
                                    (len(new_jobs), job))
                        self.pipegraph.new_jobs_generated_during_runtime(
                            new_jobs)

                more_jobs = self.pipegraph.job_executed(job)
                #if job.cores_needed == -1:
                #self.cores_available = self.max_cores_to_use
                #else:
                #self.cores_available += job.cores_needed
                if not more_jobs:  # this means that all jobs are done and there are no longer any more running...
                    break
                self.pipegraph.start_jobs()

            except (queue.Empty, IOError):  # either timeout, or the que failed
                pass
        self.que.close()
        self.que.join_thread()  # wait for the que to close
        logger.info("Leaving loop")

    def see_if_output_is_requested(self):
        import select
        try:
            if select.select([sys.stdin], [], [], 0)[0]:
                ch = sys.stdin.read(1)  # enter pressed...
                self.pipegraph.print_running_jobs()
                pass
        finally:
            pass
class LocalSystem:
    """A ResourceCoordinator that uses the current machine,
    up to max_cores_to_use cores of it

    It uses multiprocessing and the LocalSlave
    """

    def __init__(self, max_cores_to_use=util.CPUs(), profile = False):
        self.max_cores_to_use = max_cores_to_use  # todo: update to local cpu count...
        self.slave = LocalSlave(self)
        self.cores_available = max_cores_to_use
        self.physical_memory, self.swap_memory = get_memory_available()
        self.timeout = 5
        self.profile = profile

    def spawn_slaves(self):
        return {
                'LocalSlave': self.slave
                }

    def get_resources(self):
        res = {
                'LocalSlave': {  # this is always the maximum available - the graph is handling the bookeeping of running jobs
                    'cores': self.cores_available,
                    'physical_memory': self.physical_memory,
                    'swap_memory': self.swap_memory,
                    }
                }
        logger.info('get_resources, result %s - %s' % (id(res), res))
        return res

    def enter_loop(self):
        self.spawn_slaves()
        self.que = MPQueueFixed()
        logger.info("Starting first batch of jobs")
        self.pipegraph.start_jobs()
        while True:
            self.slave.check_for_dead_jobs()  # whether time out or or job was done, let's check this...
            self.see_if_output_is_requested()
            try:
                logger.info("Listening to que")
                slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get(block=True, timeout=self.timeout)  # was there a job done?t
                logger.info("Job returned: %s, was_ok: %s" % (job_id_done, was_ok))
                logger.info("Remaining in que (approx): %i" % self.que.qsize())
                job = self.pipegraph.jobs[job_id_done]
                job.was_done_on.add(slave_id)
                job.stdout = stdout
                job.stderr = stderr
                job.exception = exception
                job.trace = trace
                job.failed = not was_ok
                job.stop_time = time.time()
                if job.start_time:
                    logger.info("%s runtime: %is" % (job_id_done, job.stop_time - job.start_time))
                if job.failed:
                    try:
                        if job.exception.startswith('STR'.encode('UTF-8')):
                            job.exception = job.exception[3:]
                            raise pickle.UnpicklingError("String Transmission")  # what an ugly control flow...
                        logger.info("Before depickle %s" % type(exception))
                        job.exception = pickle.loads(exception)
                        logger.info("After depickle %s" % type(job.exception))
                        logger.info("exception stored at %s" % (job))
                    except (pickle.UnpicklingError, EOFError):  # some exceptions can't be pickled, so we send a string instead
                        pass
                    if job.exception:
                        logger.info("Exception: %s" % repr(exception))
                        logger.info("Trace: %s" % trace)
                    logger.info("stdout: %s" % stdout)
                    logger.info("stderr: %s" % stderr)
                if not new_jobs is False:
                    if not job.modifies_jobgraph():
                        job.exception = ppg_exceptions.JobContractError("%s created jobs, but was not a job with modifies_jobgraph() returning True" % job)
                        job.failed = True
                    else:
                        new_jobs = pickle.loads(new_jobs)
                        logger.info("We retrieved %i new jobs from %s" % (len(new_jobs), job))
                        self.pipegraph.new_jobs_generated_during_runtime(new_jobs)

                more_jobs = self.pipegraph.job_executed(job)
                #if job.cores_needed == -1:
                    #self.cores_available = self.max_cores_to_use
                #else:
                    #self.cores_available += job.cores_needed
                if not more_jobs:  # this means that all jobs are done and there are no longer any more running...
                    break
                self.pipegraph.start_jobs()

            except (queue.Empty, IOError):  # either timeout, or the que failed
                pass
        self.que.close()
        self.que.join_thread()  # wait for the que to close
        logger.info("Leaving loop")

    def see_if_output_is_requested(self):
        import select
        try:
            if select.select([sys.stdin], [], [], 0)[0]:
                ch = sys.stdin.read(1)  # enter pressed...
                self.pipegraph.print_running_jobs()
                pass
        finally:
            pass
Beispiel #3
0
    def enter_loop(self):
        self.spawn_slaves()
        self.que = MPQueueFixed()
        logger.info("Starting first batch of jobs")
        self.pipegraph.start_jobs()
        while True:
            self.slave.check_for_dead_jobs(
            )  # whether time out or or job was done, let's check this...
            self.see_if_output_is_requested()
            try:
                logger.info("Listening to que")
                slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get(
                    block=True, timeout=self.timeout)  # was there a job done?t
                logger.info("Job returned: %s, was_ok: %s" %
                            (job_id_done, was_ok))
                logger.info("Remaining in que (approx): %i" % self.que.qsize())
                job = self.pipegraph.jobs[job_id_done]
                job.was_done_on.add(slave_id)
                job.stdout = stdout
                job.stderr = stderr
                job.exception = exception
                job.trace = trace
                job.failed = not was_ok
                job.stop_time = time.time()
                if job.start_time:
                    logger.info("%s runtime: %is" %
                                (job_id_done, job.stop_time - job.start_time))
                if job.failed:
                    try:
                        if job.exception.startswith('STR'.encode('UTF-8')):
                            job.exception = job.exception[3:]
                            raise pickle.UnpicklingError(
                                "String Transmission"
                            )  # what an ugly control flow...
                        logger.info("Before depickle %s" % type(exception))
                        job.exception = pickle.loads(exception)
                        logger.info("After depickle %s" % type(job.exception))
                        logger.info("exception stored at %s" % (job))
                    except (
                            pickle.UnpicklingError, EOFError
                    ):  # some exceptions can't be pickled, so we send a string instead
                        pass
                    if job.exception:
                        logger.info("Exception: %s" % repr(exception))
                        logger.info("Trace: %s" % trace)
                    logger.info("stdout: %s" % stdout)
                    logger.info("stderr: %s" % stderr)
                if not new_jobs is False:
                    if not job.modifies_jobgraph():
                        job.exception = ppg_exceptions.JobContractError(
                            "%s created jobs, but was not a job with modifies_jobgraph() returning True"
                            % job)
                        job.failed = True
                    else:
                        new_jobs = pickle.loads(new_jobs)
                        logger.info("We retrieved %i new jobs from %s" %
                                    (len(new_jobs), job))
                        self.pipegraph.new_jobs_generated_during_runtime(
                            new_jobs)

                more_jobs = self.pipegraph.job_executed(job)
                #if job.cores_needed == -1:
                #self.cores_available = self.max_cores_to_use
                #else:
                #self.cores_available += job.cores_needed
                if not more_jobs:  # this means that all jobs are done and there are no longer any more running...
                    break
                self.pipegraph.start_jobs()

            except (queue.Empty, IOError):  # either timeout, or the que failed
                pass
        self.que.close()
        self.que.join_thread()  # wait for the que to close
        logger.info("Leaving loop")
    def enter_loop(self):
        self.spawn_slaves()
        self.que = MPQueueFixed()
        logger.info("Starting first batch of jobs")
        self.pipegraph.start_jobs()
        while True:
            self.slave.check_for_dead_jobs()  # whether time out or or job was done, let's check this...
            self.see_if_output_is_requested()
            try:
                logger.info("Listening to que")
                slave_id, was_ok, job_id_done, stdout, stderr, exception, trace, new_jobs = self.que.get(block=True, timeout=self.timeout)  # was there a job done?t
                logger.info("Job returned: %s, was_ok: %s" % (job_id_done, was_ok))
                logger.info("Remaining in que (approx): %i" % self.que.qsize())
                job = self.pipegraph.jobs[job_id_done]
                job.was_done_on.add(slave_id)
                job.stdout = stdout
                job.stderr = stderr
                job.exception = exception
                job.trace = trace
                job.failed = not was_ok
                job.stop_time = time.time()
                if job.start_time:
                    logger.info("%s runtime: %is" % (job_id_done, job.stop_time - job.start_time))
                if job.failed:
                    try:
                        if job.exception.startswith('STR'.encode('UTF-8')):
                            job.exception = job.exception[3:]
                            raise pickle.UnpicklingError("String Transmission")  # what an ugly control flow...
                        logger.info("Before depickle %s" % type(exception))
                        job.exception = pickle.loads(exception)
                        logger.info("After depickle %s" % type(job.exception))
                        logger.info("exception stored at %s" % (job))
                    except (pickle.UnpicklingError, EOFError):  # some exceptions can't be pickled, so we send a string instead
                        pass
                    if job.exception:
                        logger.info("Exception: %s" % repr(exception))
                        logger.info("Trace: %s" % trace)
                    logger.info("stdout: %s" % stdout)
                    logger.info("stderr: %s" % stderr)
                if not new_jobs is False:
                    if not job.modifies_jobgraph():
                        job.exception = ppg_exceptions.JobContractError("%s created jobs, but was not a job with modifies_jobgraph() returning True" % job)
                        job.failed = True
                    else:
                        new_jobs = pickle.loads(new_jobs)
                        logger.info("We retrieved %i new jobs from %s" % (len(new_jobs), job))
                        self.pipegraph.new_jobs_generated_during_runtime(new_jobs)

                more_jobs = self.pipegraph.job_executed(job)
                #if job.cores_needed == -1:
                    #self.cores_available = self.max_cores_to_use
                #else:
                    #self.cores_available += job.cores_needed
                if not more_jobs:  # this means that all jobs are done and there are no longer any more running...
                    break
                self.pipegraph.start_jobs()

            except (queue.Empty, IOError):  # either timeout, or the que failed
                pass
        self.que.close()
        self.que.join_thread()  # wait for the que to close
        logger.info("Leaving loop")