Example #1
0
    def get_job_table(self, state_filter, limit = 50, offset = 0,
                      cmd_filter = None):
        limit += 1
        data = []
        state_filter = state_filter.split('|')
        if state_filter:
            for s in state_filter[:]:
                if s != 'all' and s not in self.all_state:
                    state_filter.remove(s)
            state_filter = tuple(state_filter)

        if not state_filter:
            state_filter = tuple([ 'fail', 'pending', 'running' ])

        if 'all' in state_filter:
            state_filter = tuple([ x for x in self.all_state ])

        with db.connection(self):
            fmt_strs = ', '.join(['%s'] * len(state_filter))
            q = 'SELECT * FROM job WHERE job_state IN (' + fmt_strs + ')'
            if cmd_filter:
                q += 'AND job_jobname = %s'
                args = state_filter + (cmd_filter,) + (limit, ) + (offset,)
            else:
                args = state_filter + (limit, ) + (offset,)
            q+= ' ORDER BY job_id DESC LIMIT %s OFFSET %s'
            #print >> sys.stderr, q % (state_filter + (limit, ) + (offset,))
            self.cursor.execute(q, args)
            data = self.cursor.fetchall()

        has_next = True if len(data) == limit else False
        return data[:limit-1], has_next
Example #2
0
 def add_request(self, jobname, run_cmd, args,  max_vmem,
                 cpu_bound = True, force = False):
     job_id = 0
     with db.connection(self):
         job_id = self._add_request(jobname, run_cmd, args,
                                    max_vmem, cpu_bound, force)
     return job_id
Example #3
0
    def get_job_table(self, state_filter, limit = 50, offset = 0,
                      cmd_filter = None):
        limit += 1
        data = []
        state_filter = state_filter.split('|')
        if state_filter:
            for s in state_filter[:]:
                if s != 'all' and s not in self.all_state:
                    state_filter.remove(s)
            state_filter = tuple(state_filter)

        if not state_filter:
            state_filter = tuple([ 'fail', 'pending', 'running' ])

        if 'all' in state_filter:
            state_filter = tuple([ x for x in self.all_state ])

        with db.connection(self):
            fmt_strs = ', '.join(['%s'] * len(state_filter))
            q = 'SELECT * FROM job WHERE job_state IN (' + fmt_strs + ')'
            if cmd_filter:
                q += 'AND job_jobname = %s'
                args = state_filter + (cmd_filter,) + (limit, ) + (offset,)
            else:
                args = state_filter + (limit, ) + (offset,)
            q+= ' ORDER BY job_id DESC LIMIT %s OFFSET %s'
            #print >> sys.stderr, q % (state_filter + (limit, ) + (offset,))
            self.cursor.execute(q, args)
            data = self.cursor.fetchall()

        has_next = True if len(data) == limit else False
        return data[:limit-1], has_next
Example #4
0
 def add_request(self, jobname, run_cmd, args,  max_vmem,
                 cpu_bound = True, force = False):
     job_id = 0
     with db.connection(self):
         job_id = self._add_request(jobname, run_cmd, args,
                                    max_vmem, cpu_bound, force)
     return job_id
Example #5
0
    def update_accounting(self):
        jobs = {}  
        with db.connection(self):
            q = 'SELECT job_id, sge_jobnumber, sge_hostname FROM accounting WHERE sge_hostname=""'
            self.cursor.execute(q)
            for data in self.cursor.fetchall():
                jobs[data['sge_jobnumber']] = [ data ]

        if not len(jobs):
            return

        self.search_accounting(jobs)

        with db.connection(self):
            fields = [ 'hostname', 'qsub_time', 'start_time', 'end_time',
                       'failed', 'exit_status', 'ru_utime', 'ru_stime',
                       'ru_wallclock', 'used_maxvmem' ]

            set_str = []
            for f in fields:
                set_str.append('sge_%s=%%(%s)s' % (f, f))
            set_str = ', '.join(set_str)
            for sge_jobnumber in jobs:
                sge_jobnumber = int(sge_jobnumber)

                # Accounting not found, it'll found in the next run.
                if len(jobs[sge_jobnumber]) <= 1:
                    continue
                q  = "UPDATE accounting SET " + set_str
                # We can't let execute() do the quoting for jobnumber, but 
                # sge_jobnumber is forced to int so this code is sql injection
                # safe.
                q += ' WHERE sge_jobnumber=%d' % sge_jobnumber
                # Kludge, execute() don't accept a namedtuple nor an
                # OrderedDict so convert it explicitly to a dict.
                d = jobs[sge_jobnumber][1]._asdict()
                d = dict(zip(d.keys(), d.values()))
                self.cursor.execute(q, d)

                job = jobs[sge_jobnumber][0]

                new_state = 'success'
                if int(d['failed']) or int(d['exit_status']):
                    new_state = 'fail'
                q = 'UPDATE job SET job_state=%s WHERE job_id=%s'
                self.cursor.execute(q, [ new_state, job['job_id'] ])
Example #6
0
    def pending_request(self, limit = 16, offset = 0):
        data = []
        with db.connection(self):
            self.cursor.execute("SELECT * FROM job WHERE job_state='pending' LIMIT %s OFFSET %s",
                                [ limit, offset ])
            data = self.cursor.fetchall()

        return data
Example #7
0
    def pending_request(self, limit = 16, offset = 0):
        data = []
        with db.connection(self):
            self.cursor.execute("SELECT * FROM job WHERE job_state='pending' LIMIT %s OFFSET %s",
                                [ limit, offset ])
            data = self.cursor.fetchall()

        return data
Example #8
0
    def update_accounting(self):
        jobs = {}  
        with db.connection(self):
            q = 'SELECT job_id, sge_jobnumber, sge_hostname FROM accounting WHERE sge_hostname=""'
            self.cursor.execute(q)
            for data in self.cursor.fetchall():
                jobs[data['sge_jobnumber']] = [ data ]

        if not len(jobs):
            return

        self.search_accounting(jobs)

        with db.connection(self):
            fields = [ 'hostname', 'qsub_time', 'start_time', 'end_time',
                       'failed', 'exit_status', 'ru_utime', 'ru_stime',
                       'ru_wallclock', 'used_maxvmem' ]

            set_str = []
            for f in fields:
                set_str.append('sge_%s=%%(%s)s' % (f, f))
            set_str = ', '.join(set_str)
            for sge_jobnumber in jobs:
                sge_jobnumber = int(sge_jobnumber)

                # Accounting not found, it'll found in the next run.
                if len(jobs[sge_jobnumber]) <= 1:
                    continue
                q  = "UPDATE accounting SET " + set_str
                # We can't let execute() do the quoting for jobnumber, but 
                # sge_jobnumber is forced to int so this code is sql injection
                # safe.
                q += ' WHERE sge_jobnumber=%d' % sge_jobnumber
                # Kludge, execute() don't accept a namedtuple nor an
                # OrderedDict so convert it explicitly to a dict.
                d = jobs[sge_jobnumber][1]._asdict()
                d = dict(zip(d.keys(), d.values()))
                self.cursor.execute(q, d)

                job = jobs[sge_jobnumber][0]

                new_state = 'success'
                if int(d['failed']) or int(d['exit_status']):
                    new_state = 'fail'
                q = 'UPDATE job SET job_state=%s WHERE job_id=%s'
                self.cursor.execute(q, [ new_state, job['job_id'] ])
Example #9
0
def update_db(lang, bookname):
    import hocr_request

    db_hocr = hocr_request.DbHocr()
    with db.connection(db_hocr):
        path = cache_path(bookname, lang)
        if os.path.exists(path + 'sha1.sum'):
            sha1 = read_sha1(path)
            db_hocr.add_update_row(bookname, lang, sha1)
        else:
            print >> sys.stderr, "Can't locate sha1.sum", path
Example #10
0
 def check_running(self):
     sge_running = qstat.running_jobs('')
     if sge_running:
         with db.connection(self):
             q = 'SELECT job_id, sge_jobnumber, job_args FROM job WHERE job_state="running"'
             self.cursor.execute(q)
             for r in self.cursor.fetchall():
                 if not r['sge_jobnumber'] in sge_running:
                     self._exec_check(r)
         return len(sge_running)
     return None
Example #11
0
 def check_running(self):
     sge_running = qstat.running_jobs('')
     if sge_running:
         with db.connection(self):
             q = 'SELECT job_id, sge_jobnumber, job_args FROM job WHERE job_state="running"'
             self.cursor.execute(q)
             for r in self.cursor.fetchall():
                 if not r['sge_jobnumber'] in sge_running:
                     self._exec_check(r)
         return len(sge_running)
     return None
Example #12
0
def update_db(lang, bookname):
    import hocr_request

    db_hocr = hocr_request.DbHocr()
    with db.connection(db_hocr):
        path = cache_path(bookname, lang)
        if os.path.exists(path + 'sha1.sum'):
            sha1 = read_sha1(path)
            db_hocr.add_update_row(bookname, lang, sha1)
        else:
            print >> sys.stderr, "Can't locate sha1.sum", path
Example #13
0
    def exec_request(self, r):
        sge_job_nr = 0

        # This is a bit convoluted but we need it to avoid a race condition:
        # we set the job as running before starting it so on if this script
        # run twice in parallel we don't try to start the same job twice. Then
        # when the job really started or fail to start we update its state
        # again. As we don't know yet the sge job number, we setup it as zero.
        # Note this could be done in pending_request() but I prefer to protect
        # it locally.
        really_pending = False
        with db.connection(self):
            q = 'UPDATE job SET job_state=%s, sge_jobnumber=%s WHERE job_id=%s AND job_state="pending"'
            if self.cursor.execute(q, [ 'running', 0, r['job_id'] ]):
                really_pending = True

        if not really_pending:
            print >> sys.stderr, "run request for job_id %s cancelled, as it's no longer pending" % r['job_id']
            return

        cmdline_arg = job_cmdline_arg(r, 'job_run_cmd')
        sge_cmdline = sge_cmdline_arg(r)
        ls = subprocess.Popen(sge_cmdline + cmdline_arg,
                              stdin=None, stdout=subprocess.PIPE,
                              close_fds = True)
        text = ls.stdout.read()
        ls.wait()
        try:
            sge_job_nr = int(re.search('Your job (\d+) ', text).group(1))
            new_state = 'running'
        except:
            utils.print_traceback("sge failure to exec job: %d" % r['job_id'], text)
            new_state = 'sge_fail'


        # Now we can really update the job state, see comment above.
        with db.connection(self):
            q = 'UPDATE job SET job_state=%s, sge_jobnumber=%s WHERE job_id=%s'
            self.cursor.execute(q, [ new_state, sge_job_nr, r['job_id'] ])
Example #14
0
    def exec_request(self, r):
        sge_job_nr = 0

        # This is a bit convoluted but we need it to avoid a race condition:
        # we set the job as running before starting it so on if this script
        # run twice in parallel we don't try to start the same job twice. Then
        # when the job really started or fail to start we update its state
        # again. As we don't know yet the sge job number, we setup it as zero.
        # Note this could be done in pending_request() but I prefer to protect
        # it locally.
        really_pending = False
        with db.connection(self):
            q = 'UPDATE job SET job_state=%s, sge_jobnumber=%s WHERE job_id=%s AND job_state="pending"'
            if self.cursor.execute(q, [ 'running', 0, r['job_id'] ]):
                really_pending = True

        if not really_pending:
            print >> sys.stderr, "run request for job_id %s cancelled, as it's no longer pending" % r['job_id']
            return

        cmdline_arg = job_cmdline_arg(r, 'job_run_cmd')
        sge_cmdline = sge_cmdline_arg(r)
        ls = subprocess.Popen(sge_cmdline + cmdline_arg,
                              stdin=None, stdout=subprocess.PIPE,
                              close_fds = True)
        text = ls.stdout.read()
        ls.wait()
        try:
            sge_job_nr = int(re.search('Your job (\d+) ', text).group(1))
            new_state = 'running'
        except:
            utils.print_traceback("sge failure to exec job: %d" % r['job_id'], text)
            new_state = 'sge_fail'


        # Now we can really update the job state, see comment above.
        with db.connection(self):
            q = 'UPDATE job SET job_state=%s, sge_jobnumber=%s WHERE job_id=%s'
            self.cursor.execute(q, [ new_state, sge_job_nr, r['job_id'] ])
Example #15
0
def delete(bookname, lang):
    if type(bookname) == type(u''):
        bookname = bookname.encode('utf-8')
    bookname = bookname.replace(' ', '_')
    path = hocr.cache_path(bookname, lang)
    sha1 = hocr.read_sha1(path)

    db_hocr = hocr_request.DbHocr()
    with db.connection(db_hocr):
        q = 'delete from hocr where sha1=%s and lang=%s and title=%s'
        db_hocr.cursor.execute(q, [sha1, lang, bookname])
        print db_hocr.cursor.fetchall()
    if os.path.exists(path + 'sha1.sum'):
        os.remove(path + 'sha1.sum')
Example #16
0
def delete(bookname, lang):
    if type(bookname) == type(u''):
        bookname = bookname.encode('utf-8')
    bookname = bookname.replace(' ', '_')
    path = hocr.cache_path(bookname, lang)
    sha1 = hocr.read_sha1(path)

    db_hocr = hocr_request.DbHocr()
    with db.connection(db_hocr):
        q = 'delete from hocr where sha1=%s and lang=%s and title=%s'
        db_hocr.cursor.execute(q, [sha1, lang, bookname])
        print db_hocr.cursor.fetchall()
    if os.path.exists(path + 'sha1.sum'):
        os.remove(path + 'sha1.sum')
Example #17
0
    def get_accounting_table(self, limit = 50, offset = 0, job_ids = None):
        limit += 1
        data = []
        if not job_ids:
            job_ids = []
        if type(job_ids) != type([]):
            job_ids = [ job_ids ]
        with db.connection(self):
            q = 'SELECT * from accounting '
            if job_ids:
                fmt_strs = ', '.join(['%s'] * len(job_ids))
                q += 'WHERE job_id in (' + fmt_strs + ') '

            q += 'ORDER BY job_id DESC, sge_jobnumber DESC, sge_hostname LIMIT %s OFFSET %s'
            self.cursor.execute(q, tuple(job_ids) + (limit, ) + (offset,))
            data = self.cursor.fetchall()

        has_next = True if len(data) == limit else False
        return data[:limit-1], has_next
Example #18
0
    def get_accounting_table(self, limit = 50, offset = 0, job_ids = None):
        limit += 1
        data = []
        if not job_ids:
            job_ids = []
        if type(job_ids) != type([]):
            job_ids = [ job_ids ]
        with db.connection(self):
            q = 'SELECT * from accounting '
            if job_ids:
                fmt_strs = ', '.join(['%s'] * len(job_ids))
                q += 'WHERE job_id in (' + fmt_strs + ') '

            q += 'ORDER BY job_id DESC, sge_jobnumber DESC, sge_hostname LIMIT %s OFFSET %s'
            self.cursor.execute(q, tuple(job_ids) + (limit, ) + (offset,))
            data = self.cursor.fetchall()

        has_next = True if len(data) == limit else False
        return data[:limit-1], has_next