def terminate_process(pid, jobchainid, log_id, writeconnectioninfo):
    querystr = '''select pid, status from {}.jobchain_lst_run 
                where jobchain_id={} and id = {}'''.format(
        writeconnectioninfo[4], jobchainid, log_id)
    res = mycon.db_read(writeconnectioninfo, querystr)
    if len(res) != 1:
        raise RuntimeError('No result returned')
    log_pid = res[0][0]
    status = res[0][1]
    if status != 'RUNNING...':
        raise RuntimeError('The job is not running')
    if log_pid != pid:
        raise RuntimeError('Mismatched pid')
    else:
        shellstr = '''sudo kill -sigint {}'''.format(pid)
        try:
            a = sp.run(shellstr.split(' '),
                       stdout=sp.PIPE,
                       stderr=sp.PIPE,
                       timeout=5)
            time.sleep(1)
            if a.returncode == 0:
                counter = 5
                while kill_check(log_id, writeconnectioninfo) is False:
                    counter -= 1
                    time.sleep(1)
                    if counter == 0:
                        raise RuntimeError('Cannot kill this job')
            else:
                raise RuntimeError(a.stderr.decode('ascii', 'ignore'))
        except sp.TimeoutExpired:
            raise RuntimeError('Time out, Please try again')
Exemple #2
0
 def fetchjob(self, conninfo):  # dbread
     querystr = \
         "SELECT id, step_lvl, env, command, ottime FROM schedulerDB.jobtable WHERE job_chain_id={};".format(self.id)
     rowtpl = mycon.db_read(conninfo, querystr)
     for row in rowtpl:
         self.joblst.append(Myjob(row[0], row[1], row[2], row[3], row[4]))
     self.joblst.sort(key=lambda temp: temp.step)
def register(EXTERNAL_IP):
    dbconnectioninfo = [DATABASES['scheduler_control']['HOST'],
                           int(DATABASES['scheduler_control']['PORT']),
                    DATABASES['scheduler_control']['USER'],
                    DATABASES['scheduler_control']['PASSWORD'],
                    DATABASES['scheduler_control']['NAME']]

    #EXTERNAL_IP = ipgetter.myip()

    querystr='''select count(*) from {}.registered_scheduler 
                where machine_ip = \'{}\''''.format(dbconnectioninfo[4],EXTERNAL_IP)
    res = mycon.db_read(dbconnectioninfo, querystr)[0][0]

    if res == 1: ## Exists
        querystr='''UPDATE {}.registered_scheduler SET 
                    machine_port={}, machine_role='{}'
                    where machine_ip = '{}'  '''\
            .format(dbconnectioninfo[4],SOCKET_SERVER_PORT, NODE_TYPE, EXTERNAL_IP)
        mycon.db_write(dbconnectioninfo, querystr)

    else: ## Not Exists
        querystr='''INSERT INTO {}.registered_scheduler
                    (machine_ip, machine_port, machine_role) VALUES ('{}',{},'{}')'''\
                    .format(dbconnectioninfo[4],EXTERNAL_IP, SOCKET_SERVER_PORT, NODE_TYPE)
        mycon.db_write(dbconnectioninfo, querystr)
def kill_check(log_id, writeconnectioninfo):
    querystr = '''select status from {}.jobchain_lst_run 
                where id = {}'''.format(writeconnectioninfo[4], log_id)
    res = mycon.db_read(writeconnectioninfo, querystr)
    status = res[0][0]
    if 'running' not in status:
        return True
    return False
Exemple #5
0
    def scheduler_status_check(self):
        bkpflag = self.envinfo[0]
        thispid = os.getpid()

        retflag = False
        typestr = 'main'
        if bkpflag == 1:
            typestr = 'bkp'
        query = "SELECT pid_index FROM schedulerDB.realtime_status " \
                "WHERE name = '{}';".format(typestr)
        retans = myconn.db_read(self.writeconnectioninfo, query)
        if len(retans) == 0:
            self.scheduler_status_init(thispid)
            retflag = True
        else:
            dbpid = retans[0][0]
            if dbpid == 0:
                query = "UPDATE schedulerDB.realtime_status SET pid_index={} WHERE name = '{}';".format(
                    thispid, typestr)
                myconn.db_write(self.writeconnectioninfo, query)
                retflag = True
        return retflag
def insert_job(jobchainid, replyqueue, readconnectioninfo, writeconnectioninfo,
               envinfo, tracklck, runningjobdict):
    bkpflag = envinfo[0]
    flagstr = 'main_flag'
    typestr = 'main'
    if bkpflag == 1:
        typestr = 'bkp'
        flagstr = 'BKP_flag'

    query = \
        "SELECT id,timestr, dep_ot ,job_desc " \
        "FROM {}.jobchain_table " \
        "WHERE id={} and {}=1; ".format(readconnectioninfo[4], jobchainid, flagstr)
    res = mycon.db_read(writeconnectioninfo, query)
    if len(res) != 1:
        raise RuntimeError('{} does not have jobchain {}'.format(
            typestr, jobchainid))
    row = res[0]
    eachjc = mj.Myjobchain(row[0], row[1], None, row[2], row[3])
    tracklck.acquire()
    errorlst = []
    try:
        if str(jobchainid) in runningjobdict and runningjobdict[str(
                jobchainid)] == 1:
            errorlst.append((0, eachjc))
        else:
            runningjobdict[str(eachjc.id)] = 1
            p = mp.Process(target=worker.worker,
                           args=(eachjc, replyqueue, readconnectioninfo,
                                 writeconnectioninfo, envinfo))
            p.start()
    finally:
        tracklck.release()
    for eacherrjc in errorlst:
        if eacherrjc[0] == 0:
            raise RuntimeError('Job Id: {} not finished'.format(
                eacherrjc[1].id))
def sqlfetcher(timestampqueue: mp.Queue, replyqueue, readconnectioninfo, writeconnectioninfo, envinfo,
               tracklck, runningjobdict
               ):
    thispid = os.getpid()
    pidregister(thispid, writeconnectioninfo)
    bkpflag = envinfo[0]
    runningprocessdict = {}
    typestr = 'main'
    flagstr = 'main_flag'
    if bkpflag == 1:
        typestr = 'bkp'
        flagstr = 'BKP_flag'

    try:
        while True:
            exetime = timestampqueue.get()  # get timestamp
            query = \
                "SELECT id, timestr, dep_ot ,job_desc " \
                "FROM schedulerDB.jobchain_table " \
                "WHERE switch='ON' and {}=1; ".format(flagstr)
            read_cur = mycon.db_read(readconnectioninfo, query)

            query = \
                "SELECT jobchain_id, MAX(last_run_start) FROM schedulerDB.jobchain_lst_run " \
                "GROUP BY jobchain_id ORDER BY jobchain_id;"

            write_cur = mycon.db_read(writeconnectioninfo, query)
            if len(write_cur) == 0:
                tempnp = np.array([[-1, -1]])
            else:
                tempnp = np.array(write_cur)
            pdframe = pd.DataFrame(tempnp, index=tempnp[:, 0])

            jobChainlst = []
            for row in read_cur:
                lstrun = None
                if row[0] in pdframe.index:
                    lstrun = pdframe.loc[[row[0]]].values[0, 1]
                jobChainlst.append(mj.Myjobchain(row[0], row[1], lstrun, row[2], row[3]))
            errorlst = []

            for eachjc in jobChainlst:
                if jobchain_timestrcheck(eachjc) is False:
                    errorlst.append((1, eachjc))
                    ## Turn off that jobchain
                    querystr = '''update jobchain_table set switch = 'OFF' where id = {};'''.format(eachjc.id)
                    mycon.db_write(writeconnectioninfo, querystr)
                    continue
                if jobcheck(eachjc, exetime) is False:
                    continue
                tracklck.acquire()
                try:
                    if str(eachjc.id) in runningjobdict and runningjobdict[str(eachjc.id)] == 1:
                        errorlst.append((0, eachjc))
                    else:
                        runningjobdict[str(eachjc.id)] = 1
                        # worker.worker(eachjc, replyqueue,
                        #                      readconnectioninfo,
                        #                      writeconnectioninfo,
                        #                      envinfo)
                        p = mp.Process(target=worker.worker,
                                       args=(eachjc, replyqueue,
                                             readconnectioninfo,
                                             writeconnectioninfo,
                                             envinfo))
                        runningprocessdict[str(eachjc.id)] = p
                        runningprocessdict[str(eachjc.id)].start()
                finally:
                    tracklck.release()
            for eacherrjc in errorlst:
                if eacherrjc[0] == 0:
                    util.sendlog(writeconnectioninfo, 'Job Id: {} not finished'.format(eacherrjc[1].id))
                elif eacherrjc[0] == 1:
                    util.sendlog(writeconnectioninfo, 'Job Id: {} invalid timestr'.format(eacherrjc[1].id))
    finally:
        query = "UPDATE schedulerDB.realtime_status SET pid_sqlfetcher={};".format(0)
        mycon.db_write(writeconnectioninfo, query)
Exemple #8
0
 def fetchdep(self, conninfo):  # dbread
     querystr = "SELECT id,env, command, ottime FROM schedulerDB.deptable WHERE jobchainid={};".format(
         self.id)
     rowtpl = mycon.db_read(conninfo, querystr)
     for row in rowtpl:
         self.deplst.append(Mydep(row[0], row[1], row[2], row[3]))