Beispiel #1
0
    def get(self, timeout=0):  # @UnusedVariable
        if not self.told_you_ready:
            raise CompmakeBug("I didnt tell you it was ready.")
        if self.already_read:
            msg = 'Compmake BUG: should not call twice.'
            raise CompmakeBug(msg)
        self.already_read = True

        assert os.path.exists(self.retcode)
        ret_str = open(self.retcode, 'r').read()
        try:
            ret = int(ret_str)
        except ValueError:
            msg = 'Could not interpret file %r: %r.' % (self.retcode, ret_str)
            raise HostFailed(host='localhost',
                             job_id=self.job_id,
                             reason=msg,
                             bt='')
            #
        #
        #         raise HostFailed(host="xxx",
        #                                      job_id=self.job_id,
        # reason=reason, bt="")  # XXX
        #

        try:
            stderr = open(self.stderr, 'r').read()
            stdout = open(self.stdout, 'r').read()

            stderr = 'Contents of %s:\n' % self.stderr + stderr
            stdout = 'Contents of %s:\n' % self.stdout + stdout

            # if ret == CompmakeConstants.RET_CODE_JOB_FAILED:
            #                 msg = 'SGE Job failed (ret: %s)\n' % ret
            #                 msg += indent(stderr, '| ')
            #                 # mark_as_failed(self.job_id, msg, None)
            #                 raise JobFailed(msg)
            #             elif ret != 0:
            #                 msg = 'SGE Job failed (ret: %s)\n' % ret
            #                 error(msg)
            #                 msg += indent(stderr, '| ')
            #                 raise JobFailed(msg)

            if not os.path.exists(self.out_results):
                msg = 'job succeeded but no %r found' % self.out_results
                msg += '\n' + indent(stderr, 'stderr')
                msg += '\n' + indent(stdout, 'stdout')
                raise CompmakeBug(msg)

            res = safe_pickle_load(self.out_results)
            result_dict_raise_if_error(res)
            return res
        finally:
            fs = [self.stderr, self.stdout, self.out_results, self.retcode]
            for filename in fs:
                if os.path.exists(filename):
                    os.unlink(filename)
Beispiel #2
0
def result_dict_raise_if_error(res):
    from compmake.exceptions import JobFailed
    from compmake.exceptions import HostFailed
    from compmake.exceptions import CompmakeBug
    from compmake.exceptions import JobInterrupted

    result_dict_check(res)

    if 'fail' in res:
        raise JobFailed.from_dict(res)

    if 'abort' in res:
        raise HostFailed.from_dict(res)

    if 'bug' in res:
        raise CompmakeBug.from_dict(res)

    if 'interrupted' in res:
        raise JobInterrupted.from_dict(res)
Beispiel #3
0
def result_dict_raise_if_error(res):
    from compmake.exceptions import JobFailed
    from compmake.exceptions import HostFailed
    from compmake.exceptions import CompmakeBug
    from compmake.exceptions import JobInterrupted

    result_dict_check(res)

    if 'fail' in res:
        raise JobFailed.from_dict(res)

    if 'abort' in res:
        raise HostFailed.from_dict(res)

    if 'bug' in res:
        raise CompmakeBug.from_dict(res)

    if 'interrupted' in res:
        raise JobInterrupted.from_dict(res)
Beispiel #4
0
    def ready(self):
        if self.told_you_ready:
            raise CompmakeBug('should not call ready() twice')

        if self.npolls % 20 == 1:
            try:
                qacct = get_qacct(self.sge_id)
                # print('job: %s sgejob: %s res: %s' % (self.job_id,
                # self.sge_id, qacct))
                if 'failed' in qacct and qacct['failed'] != '0':
                    reason = 'Job schedule failed: %s\n%s' % (qacct['failed'],
                                                              qacct)
                    raise HostFailed(host="xxx",
                                     job_id=self.job_id,
                                     reason=reason,
                                     bt="")  # XXX

            except JobNotRunYet:
                qacct = None
                pass
        else:
            qacct = None

        self.npolls += 1

        if os.path.exists(self.retcode):
            self.told_you_ready = True
            return True
        else:
            if qacct is not None:
                msg = 'The file %r does not exist but it looks like the job ' \
                      'is done' % self.retcode
                msg += '\n %s ' % qacct
                # All right, this is simply NFS that is not updated yet
                # raise CompmakeBug(msg)

            return False
Beispiel #5
0
    def ready(self):
        self.count += 1
        is_ready = self.async_result.ready()
        #         tmp_filename = self.tmp_filename

        if self.count > 10000 and (self.count % 100 == 0):
            #             if is_ready:
            #                 if not os.path.exists(tmp_filename):
            #                     msg = 'I would have expected tmp_filename
            # to exist.\n %s' % tmp_filename
            #                     error('%s: %s' % (self.job_id, msg))
            #             else:
            #                 if os.path.exists(tmp_filename):
            #                     msg = 'The tmp_filename exists! but job
            # not returned yet.\n %s' % tmp_filename
            #                     error('%s: %s' % (self.job_id, msg))
            #
            #                     if self.count % 100 == 0:
            #                         s = open(tmp_filename).read()
            #                         print('%s: %s: %s ' % (self.job_id,
            # self.count, s))

            if False:
                if self.count % 100 == 0:
                    s = self.read_status()  # @UnusedVariable
                    #print('%70s: %10s  %s         ' % (self.job_id, self.count,
                    #  s))

        # timeout
        if self.count > 100000:
            raise HostFailed(host='localhost',
                             job_id=self.job_id,
                             reason='Timeout',
                             bt='')

        return is_ready
Beispiel #6
0
def pmake_worker(name, job_queue, result_queue, signal_queue, signal_token,
                 write_log=None):
    if write_log:
        f = open(write_log, 'w')

        def log(s):
            #print('%s: %s' % (name, s))
            f.write('%s: ' % name)
            f.write(s)
            f.write('\n')
            f.flush()
    else:
        def log(s):
            print('%s: %s' % (name, s))
            pass

    log('started pmake_worker()')
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    def put_result(x):
        log('putting result in result_queue..')
        result_queue.put(x, block=True)
        if signal_queue is not None:
            log('putting result in signal_queue..')
            signal_queue.put(signal_token, block=True)
        log('(done)')

    try:
        while True:
            log('Listening for job')
            try:
                job = job_queue.get(block=True, timeout=5)
            except Empty:
                log('Could not receive anything.')
                continue
            if job == PmakeSub.EXIT_TOKEN:
                log('Received EXIT_TOKEN.')
                break

            log('got job: %s' % str(job))
            function, arguments = job
            try:
                result = function(arguments)
            except JobFailed as e:
                log('Job failed, putting notice.')
                log('result: %s' % str(e))  # debug
                put_result(e.get_result_dict())
            except JobInterrupted as e:
                log('Job interrupted, putting notice.')
                put_result(dict(abort=str(e)))  # XXX
            except CompmakeBug as e:  # XXX :to finish
                log('CompmakeBug')
                put_result(e.get_result_dict())
            else:
                log('result: %s' % str(result))
                put_result(result)

            log('...done.')

            # except KeyboardInterrupt: pass
    except BaseException as e:
        reason = 'aborted because of uncaptured:\n' + indent(
                traceback.format_exc(), '| ')
        mye = HostFailed(host="???", job_id="???",
                         reason=reason, bt=traceback.format_exc())
        log(str(mye))
        put_result(mye.get_result_dict())
    except:
        mye = HostFailed(host="???", job_id="???",
                         reason='Uknown exception (not BaseException)',
                         bt="not available")
        log(str(mye))
        put_result(mye.get_result_dict())
        log('(put)')


    if signal_queue is not None:
        signal_queue.close()
    result_queue.close()
    log('clean exit.')
Beispiel #7
0
def pmake_worker(name,
                 job_queue,
                 result_queue,
                 signal_queue,
                 signal_token,
                 write_log=None):
    if write_log:
        f = open(write_log, 'w')

        def log(s):
            #print('%s: %s' % (name, s))
            f.write('%s: ' % name)
            f.write(s)
            f.write('\n')
            f.flush()
    else:

        def log(s):
            print('%s: %s' % (name, s))
            pass

    log('started pmake_worker()')
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    def put_result(x):
        log('putting result in result_queue..')
        result_queue.put(x, block=True)
        if signal_queue is not None:
            log('putting result in signal_queue..')
            signal_queue.put(signal_token, block=True)
        log('(done)')

    try:
        while True:
            log('Listening for job')
            try:
                job = job_queue.get(block=True, timeout=5)
            except Empty:
                log('Could not receive anything.')
                continue
            if job == PmakeSub.EXIT_TOKEN:
                log('Received EXIT_TOKEN.')
                break

            log('got job: %s' % str(job))
            function, arguments = job
            try:
                result = function(arguments)
            except JobFailed as e:
                log('Job failed, putting notice.')
                log('result: %s' % str(e))  # debug
                put_result(e.get_result_dict())
            except JobInterrupted as e:
                log('Job interrupted, putting notice.')
                put_result(dict(abort=str(e)))  # XXX
            except CompmakeBug as e:  # XXX :to finish
                log('CompmakeBug')
                put_result(e.get_result_dict())
            else:
                log('result: %s' % str(result))
                put_result(result)

            log('...done.')

            # except KeyboardInterrupt: pass
    except BaseException as e:
        reason = 'aborted because of uncaptured:\n' + indent(
            traceback.format_exc(), '| ')
        mye = HostFailed(host="???",
                         job_id="???",
                         reason=reason,
                         bt=traceback.format_exc())
        log(str(mye))
        put_result(mye.get_result_dict())
    except:
        mye = HostFailed(host="???",
                         job_id="???",
                         reason='Uknown exception (not BaseException)',
                         bt="not available")
        log(str(mye))
        put_result(mye.get_result_dict())
        log('(put)')

    if signal_queue is not None:
        signal_queue.close()
    result_queue.close()
    log('clean exit.')
Beispiel #8
0
def pmake_worker(name, job_queue, result_queue, signal_queue, signal_token, write_log=None):
    if write_log:
        f = open(write_log, "w")

        def log(s):
            f.write("%s: " % name)
            f.write(s)
            f.write("\n")
            f.flush()

    else:

        def log(s):
            pass

    log("started pmake_worker()")
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    def put_result(x):
        log("putting result in result_queue..")
        result_queue.put(x, block=True)
        if signal_queue is not None:
            log("putting result in signal_queue..")
            signal_queue.put(signal_token, block=True)
        log("(done)")

    try:
        while True:
            log("Listening for job")
            job = job_queue.get(block=True)
            log("got job: %s" % str(job))
            if job == PmakeSub.EXIT_TOKEN:
                break
            function, arguments = job
            try:
                result = function(arguments)
            except JobFailed as e:
                log("Job failed, putting notice.")
                log("result: %s" % str(e))  # debug
                put_result(e.get_result_dict())
            except JobInterrupted as e:
                log("Job interrupted, putting notice.")
                put_result(dict(abort=str(e)))  # XXX
            except CompmakeBug as e:  # XXX :to finish
                log("CompmakeBug")
                put_result(e.get_result_dict())
            else:
                log("result: %s" % str(result))
                put_result(result)

            log("...done.")

            # except KeyboardInterrupt: pass
    except BaseException as e:
        reason = "aborted because of uncaptured:\n" + indent(traceback.format_exc(e), "| ")
        mye = HostFailed(host="???", job_id="???", reason=reason, bt=traceback.format_exc(e))
        log(str(mye))
        put_result(mye.get_result_dict())
    except:
        mye = HostFailed(host="???", job_id="???", reason="Uknown exception (not BaseException)", bt="not available")
        log(str(mye))
        put_result(mye.get_result_dict())
        log("(put)")

    if signal_queue is not None:
        signal_queue.close()
    result_queue.close()
    log("clean exit.")