def loadJobsFromList(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk
        """

        loadAction = self.daoFactory(classname = "Jobs.LoadForErrorHandler")


        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})

        results = loadAction.execute(jobID = binds)

        # You have to have a list
        if type(results) == dict:
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id = entry['id'])
            tmpJob.update(entry)
            listOfJobs.append(tmpJob)


        return listOfJobs
Exemple #2
0
    def loadJobsFromListFull(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk.
        Include the full metadata.
        """

        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})

        results = self.loadAction.execute(jobID=binds)

        # You have to have a list
        if isinstance(results, dict):
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id=entry['id'])
            tmpJob.update(entry)
            listOfJobs.append(tmpJob)

        return listOfJobs
Exemple #3
0
    def loadJobsFromList(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk
        """

        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})

        results = self.idLoad.execute(jobID = binds)

        # You have to have a list
        if type(results) == dict:
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id = entry['id'])
            tmpJob.update(entry)
            listOfJobs.append(tmpJob)

        return listOfJobs
    def loadJobsFromListFull(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk.
        Include the full metadata.
        """

        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})

        results = self.loadAction.execute(jobID = binds)

        # You have to have a list
        if type(results) == dict:
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id = entry['id'])
            tmpJob.update(entry)
            listOfJobs.append(tmpJob)

        return listOfJobs
    def loadJobsFromList(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk
        """

        loadAction = self.daoFactory(classname="Jobs.LoadFromID")
        getTypeAction = self.daoFactory(classname="Jobs.GetType")

        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})

        results = loadAction.execute(jobID=binds)
        typeResults = getTypeAction.execute(jobID=idList)
        subTypes = {}

        for typeEntry in typeResults:
            subTypes[typeEntry['id']] = typeEntry['type']

        # You have to have a list
        if isinstance(results, dict):
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id=entry['id'])
            tmpJob.update(entry)
            tmpJob['jobType'] = subTypes[entry['id']]
            listOfJobs.append(tmpJob)

        return listOfJobs
Exemple #6
0
    def loadJobsFromList(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk
        """

        loadAction = self.daoFactory(classname="Jobs.LoadFromID")
        getTypeAction = self.daoFactory(classname="Jobs.GetType")

        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})

        results = loadAction.execute(jobID=binds)
        typeResults = getTypeAction.execute(jobID=idList)
        subTypes = {}

        for typeEntry in typeResults:
            subTypes[typeEntry['id']] = typeEntry['type']

        # You have to have a list
        if isinstance(results, dict):
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id=entry['id'])
            tmpJob.update(entry)
            tmpJob['jobType'] = subTypes[entry['id']]
            listOfJobs.append(tmpJob)

        return listOfJobs
Exemple #7
0
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None):
    """
    _killWorkflow_

    Kill a workflow that is already executing inside the agent.  This will
    mark all incomplete jobs as failed and files that belong to all
    non-cleanup and non-logcollect subscriptions as failed.  The name of the
    JSM couch database and the URL to the database must be passed in as well
    so the state transitions are logged.
    """
    myThread = threading.currentThread()
    daoFactory = DAOFactory(package="WMCore.WMBS",
                            logger=myThread.logger,
                            dbinterface=myThread.dbi)
    killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow")
    killJobsAction = daoFactory(classname="Jobs.KillWorkflow")

    existingTransaction = False
    if myThread.transaction.conn:
        existingTransaction = True
    else:
        myThread.transaction.begin()

    killFilesAction.execute(workflowName=workflowName,
                            conn=myThread.transaction.conn,
                            transaction=True)

    liveJobs = killJobsAction.execute(workflowName=workflowName,
                                      conn=myThread.transaction.conn,
                                      transaction=True)

    changeState = ChangeState(jobCouchConfig)

    # Deal with any jobs that are running in the batch system
    # only works if we can start the API
    if bossAirConfig:
        bossAir = BossAirAPI(config=bossAirConfig, noSetup=True)
        killableJobs = []
        for liveJob in liveJobs:
            if liveJob["state"].lower() == 'executing':
                # Then we need to kill this on the batch system
                liveWMBSJob = Job(id=liveJob["id"])
                liveWMBSJob.update(liveJob)
                changeState.propagate(liveWMBSJob, "killed", liveJob["state"])
                killableJobs.append(liveJob)
        # Now kill them
        try:
            bossAir.kill(jobs=killableJobs)
        except BossAirException, ex:
            # Something's gone wrong
            # Jobs not killed!
            logging.error(
                "Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.
            pass
Exemple #8
0
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig = None):
    """
    _killWorkflow_

    Kill a workflow that is already executing inside the agent.  This will
    mark all incomplete jobs as failed and files that belong to all
    non-cleanup and non-logcollect subscriptions as failed.  The name of the
    JSM couch database and the URL to the database must be passed in as well
    so the state transitions are logged.
    """
    myThread = threading.currentThread()
    daoFactory = DAOFactory(package = "WMCore.WMBS",
                            logger = myThread.logger,
                            dbinterface = myThread.dbi)
    killFilesAction = daoFactory(classname = "Subscriptions.KillWorkflow")
    killJobsAction = daoFactory(classname = "Jobs.KillWorkflow")

    existingTransaction = False
    if myThread.transaction.conn:
        existingTransaction = True
    else:
        myThread.transaction.begin()

    killFilesAction.execute(workflowName = workflowName,
                            conn = myThread.transaction.conn,
                            transaction = True)

    liveJobs = killJobsAction.execute(workflowName = workflowName,
                                      conn = myThread.transaction.conn,
                                      transaction = True)

    changeState = ChangeState(jobCouchConfig)

    # Deal with any jobs that are running in the batch system
    # only works if we can start the API
    if bossAirConfig:
        bossAir = BossAirAPI(config = bossAirConfig, noSetup = True)
        killableJobs = []
        for liveJob in liveJobs:
            if liveJob["state"].lower() == 'executing':
                # Then we need to kill this on the batch system
                liveWMBSJob = Job(id = liveJob["id"])
                liveWMBSJob.update(liveJob)
                changeState.propagate(liveWMBSJob, "killed", liveJob["state"])
                killableJobs.append(liveJob)
        # Now kill them
        try:
            bossAir.kill(jobs = killableJobs)
        except BossAirException, ex:
            # Something's gone wrong
            # Jobs not killed!
            logging.error("Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.
            pass
    def findFinishedJobs(self):
        """
        _findFinishedJobs_

        Will actually, surprisingly, find finished jobs (i.e., jobs either exhausted or successful)
        """
        jobList = []

        jobListAction = self.daoFactory(classname="Jobs.GetAllJobs")
        jobList1 = jobListAction.execute(state="success",
                                         limitRows=self.numberOfJobsToArchive)
        jobList2 = jobListAction.execute(state="exhausted",
                                         limitRows=self.numberOfJobsToArchive)
        jobList3 = jobListAction.execute(state="killed",
                                         limitRows=self.numberOfJobsToArchive)

        jobList.extend(jobList1)
        jobList.extend(jobList2)
        jobList.extend(jobList3)

        if len(jobList) == 0:
            # Then nothing is ready
            return []

        # Put together a list of job IDs
        binds = []
        for jobID in jobList:
            binds.append({"jobid": jobID})

        results = self.loadAction.execute(jobID=binds)

        if not isinstance(results, list):
            results = [results]

        doneList = []

        for entry in results:
            # One job per entry
            tmpJob = Job(id=entry['id'])
            tmpJob.update(entry)
            doneList.append(tmpJob)

        return doneList
Exemple #10
0
    def findFinishedJobs(self):
        """
        _findFinishedJobs_

        Will actually, surprisingly, find finished jobs (i.e., jobs either exhausted or successful)
        """
        jobList = []

        jobListAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        jobList1 = jobListAction.execute(state = "success")
        jobList2 = jobListAction.execute(state = "exhausted")
        jobList3 = jobListAction.execute(state = "killed")        

        jobList.extend(jobList1)
        jobList.extend(jobList2)
        jobList.extend(jobList3)

        if len(jobList) == 0:
            # Then nothing is ready
            return []

        # Put together a list of job IDs
        binds = []
        for jobID in jobList:
            binds.append({"jobid": jobID})
        

        results = self.loadAction.execute(jobID = binds)

        if not type(results) == list:
            results = [results]
        
        doneList = []

        for entry in results:
            # One job per entry
            tmpJob = Job(id = entry['id'])
            tmpJob.update(entry)
            doneList.append(tmpJob)


        return doneList
Exemple #11
0
    def loadJobsFromList(self, idList):
        """
        _loadJobsFromList_

        Load jobs in bulk
        """
        binds = []
        for jobID in idList:
            binds.append({"jobid": jobID})
        results = self.idLoad.execute(jobID=binds)

        # You have to have a list
        if isinstance(results, dict):
            results = [results]

        listOfJobs = []
        for entry in results:
            # One job per entry
            tmpJob = Job(id=entry['id'])
            tmpJob.update(entry)
            listOfJobs.append(tmpJob)

        return listOfJobs
Exemple #12
0
            # Something's gone wrong
            # Jobs not killed!
            logging.error("Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.
            pass

    for liveJob in liveJobs:
        if liveJob["state"] == "killed":
            # Then we've killed it already
            continue
        liveWMBSJob = Job(id = liveJob["id"])
        liveWMBSJob.update(liveJob)
        changeState.propagate(liveWMBSJob, "killed", liveJob["state"])

    if not existingTransaction:
        myThread.transaction.commit()
    return

def freeSlots(multiplier = 1.0, minusRunning = False, allowedStates = ['Normal'], knownCmsSites = None):
    """
    Get free resources from wmbs.

    Specify multiplier to apply a ratio to the actual numbers.
    minusRunning control if running jobs should be counted
    """
    from WMCore.ResourceControl.ResourceControl import ResourceControl
    rc_sites = ResourceControl().listThresholdsForCreate()
Exemple #13
0
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None):
    """
    _killWorkflow_

    Kill a workflow that is already executing inside the agent.  This will
    mark all incomplete jobs as failed and files that belong to all
    non-cleanup and non-logcollect subscriptions as failed.  The name of the
    JSM couch database and the URL to the database must be passed in as well
    so the state transitions are logged.
    """
    myThread = threading.currentThread()
    daoFactory = DAOFactory(package="WMCore.WMBS",
                            logger=myThread.logger,
                            dbinterface=myThread.dbi)
    killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow")
    killJobsAction = daoFactory(classname="Jobs.KillWorkflow")

    killFilesAction.execute(workflowName=workflowName,
                            conn=myThread.transaction.conn)

    liveJobs = killJobsAction.execute(workflowName=workflowName,
                                      conn=myThread.transaction.conn)

    changeState = ChangeState(jobCouchConfig)

    # Deal with any jobs that are running in the batch system
    # only works if we can start the API
    if bossAirConfig:
        bossAir = BossAirAPI(config=bossAirConfig, noSetup=True)
        killableJobs = []
        for liveJob in liveJobs:
            if liveJob["state"].lower() == 'executing':
                # Then we need to kill this on the batch system
                liveWMBSJob = Job(id=liveJob["id"])
                liveWMBSJob.update(liveJob)
                killableJobs.append(liveJob)
        # Now kill them
        try:
            logging.info("Killing %d jobs for workflow: %s", len(killableJobs),
                         workflowName)
            bossAir.kill(jobs=killableJobs, workflowName=workflowName)
        except BossAirException as ex:
            # Something's gone wrong. Jobs not killed!
            logging.error(
                "Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.

    liveWMBSJobs = defaultdict(list)
    for liveJob in liveJobs:
        if liveJob["state"] == "killed":
            # Then we've killed it already
            continue
        liveWMBSJob = Job(id=liveJob["id"])
        liveWMBSJob.update(liveJob)
        liveWMBSJobs[liveJob["state"]].append(liveWMBSJob)

    for state, jobsByState in liveWMBSJobs.items():
        if len(jobsByState) > 100 and state != "executing":
            # if there are to many jobs skip the couch and dashboard update
            # TODO: couch and dashboard need to be updated or parallel.
            changeState.check("killed", state)
            changeState.persist(jobsByState, "killed", state)
        else:
            changeState.propagate(jobsByState, "killed", state)
    return
Exemple #14
0
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None):
    """
    _killWorkflow_

    Kill a workflow that is already executing inside the agent.  This will
    mark all incomplete jobs as failed and files that belong to all
    non-cleanup and non-logcollect subscriptions as failed.  The name of the
    JSM couch database and the URL to the database must be passed in as well
    so the state transitions are logged.
    """
    myThread = threading.currentThread()
    daoFactory = DAOFactory(package="WMCore.WMBS",
                            logger=myThread.logger,
                            dbinterface=myThread.dbi)
    killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow")
    killJobsAction = daoFactory(classname="Jobs.KillWorkflow")

    killFilesAction.execute(workflowName=workflowName,
                            conn=myThread.transaction.conn)

    liveJobs = killJobsAction.execute(workflowName=workflowName,
                                      conn=myThread.transaction.conn)

    changeState = ChangeState(jobCouchConfig)

    # Deal with any jobs that are running in the batch system
    # only works if we can start the API
    if bossAirConfig:
        bossAir = BossAirAPI(config=bossAirConfig, noSetup=True)
        killableJobs = []
        for liveJob in liveJobs:
            if liveJob["state"].lower() == 'executing':
                # Then we need to kill this on the batch system
                liveWMBSJob = Job(id=liveJob["id"])
                liveWMBSJob.update(liveJob)
                killableJobs.append(liveJob)
        # Now kill them
        try:
            logging.info("Killing %d jobs for workflow: %s", len(killableJobs), workflowName)
            bossAir.kill(jobs=killableJobs, workflowName=workflowName)
        except BossAirException as ex:
            # Something's gone wrong. Jobs not killed!
            logging.error("Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.

    liveWMBSJobs = defaultdict(list)
    for liveJob in liveJobs:
        if liveJob["state"] == "killed":
            # Then we've killed it already
            continue
        liveWMBSJob = Job(id=liveJob["id"])
        liveWMBSJob.update(liveJob)
        liveWMBSJobs[liveJob["state"]].append(liveWMBSJob)

    for state, jobsByState in liveWMBSJobs.items():
        if len(jobsByState) > 100 and state != "executing":
            # if there are to many jobs skip the couch and dashboard update
            # TODO: couch and dashboard need to be updated or parallel.
            changeState.check("killed", state)
            changeState.persist(jobsByState, "killed", state)
        else:
            changeState.propagate(jobsByState, "killed", state)
    return
Exemple #15
0
            # Jobs not killed!
            logging.error(
                "Error while trying to kill running jobs in workflow!\n")
            logging.error(str(ex))
            trace = getattr(ex, 'traceback', '')
            logging.error(trace)
            # But continue; we need to kill the jobs in the master
            # the batch system will have to take care of itself.
            pass

    for liveJob in liveJobs:
        if liveJob["state"] == "killed":
            # Then we've killed it already
            continue
        liveWMBSJob = Job(id=liveJob["id"])
        liveWMBSJob.update(liveJob)
        changeState.propagate(liveWMBSJob, "killed", liveJob["state"])

    if not existingTransaction:
        myThread.transaction.commit()
    return


def freeSlots(multiplier=1.0,
              minusRunning=False,
              allowedStates=['Normal'],
              knownCmsSites=None):
    """
    Get free resources from wmbs.

    Specify multiplier to apply a ratio to the actual numbers.