コード例 #1
0
def cache_users_from_file():
    cache_file = get_cache_filename()
    try:
        cache_users_from_fd(open(cache_file, "r"))
    except Exception, e:
        htcondor.log(
            htcondor.LogLevel.Always,
            "Failed to cache users from file %s: %s" % (cache_file, str(e)))
コード例 #2
0
def cache_users():
    url = htcondor.param.get("CMSLPC_USER_URL")
    if not url:
        cache_users_from_file()
        return
    try:
        urlfd = urllib.urlopen(url)
        cache_users_from_fd(urlfd)
    except Exception, e:
        htcondor.log(htcondor.LogLevel.Always, "Failed to cache users from URL %s: %s" % (url, str(e)))
        cache_users_from_file()
        return
コード例 #3
0
def cache_users():
    url = htcondor.param.get("CMSLPC_USER_URL")
    if not url:
        cache_users_from_file()
        return
    try:
        urlfd = urllib.urlopen(url)
        cache_users_from_fd(urlfd)
    except Exception, e:
        htcondor.log(htcondor.LogLevel.Always,
                     "Failed to cache users from URL %s: %s" % (url, str(e)))
        cache_users_from_file()
        return
コード例 #4
0
def write_cache_file():
    final_fname = get_cache_filename()
    dirname, prefix = os.path.split(final_fname)
    fd, name = tempfile.mkstemp(dir=dirname, prefix=prefix)
    try:
        for dn in g_cache:
            os.write(fd, dn + "\n")
        os.close(fd)
        os.rename(name, final_fname)
    except Exception, e:
        htcondor.log(htcondor.LogLevel.Always, "Failed to write out cache file: %s" % str(e))
        try:
            os.unlink(name)
        except:
            pass
コード例 #5
0
ファイル: audit_payloads.py プロジェクト: xzhao87/htcondor-ce
def stopjob(info):
    global runningmasters
    if 'Name' not in info or 'SlotID' not in info:
        return
    name = info['Name']
    matchre = ""
    if 'GLIDEIN_MASTER_NAME' in info:
        idxname = info['GLIDEIN_MASTER_NAME']
        if idxname == name:
            # stop all jobs under this master
            matchre = '.*'
        else:
            # names of form "slotN@" stop that name and all "slotN_M@" names
            slotn = re.sub('^(slot[0-9]*)@.*', r'\1', name)
            if slotn != name:
                # match any name starting with slotN@ or slotN_
                matchre = '^' + slotn + '[@_]'
            # else take the default of matching only one name
    else:
        idxname = name
    idx = (idxname, info['SlotID'])
    if idx not in runningmasters:
        return

    runningjobs = runningmasters[idx][1]
    if matchre == "":
        # no match expression, just stop one
        if name not in runningjobs:
            return
        stopjobnames = [name]
    else:
        # select all jobs in this master
        stopjobnames = runningjobs.keys()
        if matchre != '.*':
            # restrict to the matching regular expression
            regex = re.compile(matchre)
            stopjobnames = filter(regex.search, stopjobnames)

    for stopjobname in stopjobnames:
        loginfo = {}
        loginfo['Name'] = stopjobname
        loginfo['SlotID'] = info['SlotID']
        loginfo['GlobalJobId'] = runningjobs[stopjobname]
        htcondor.log(htcondor.LogLevel.Audit, "Job stop: %s" % loginfo)
        del runningjobs[stopjobname]

    if len(runningjobs) == 0:
        del runningmasters[idx]
コード例 #6
0
def write_cache_file():
    final_fname = get_cache_filename()
    dirname, prefix = os.path.split(final_fname)
    fd, name = tempfile.mkstemp(dir=dirname, prefix=prefix)
    try:
        for dn in g_cache:
            os.write(fd, dn + "\n")
        os.close(fd)
        os.rename(name, final_fname)
    except Exception, e:
        htcondor.log(htcondor.LogLevel.Always,
                     "Failed to write out cache file: %s" % str(e))
        try:
            os.unlink(name)
        except:
            pass
コード例 #7
0
def cache_users_from_file():
    cache_file = get_cache_filename()
    try:
        cache_users_from_fd(open(cache_file, "r"))
    except Exception, e:
        htcondor.log(htcondor.LogLevel.Always, "Failed to cache users from file %s: %s" % (cache_file, str(e)))
コード例 #8
0
ファイル: audit_payloads.py プロジェクト: xzhao87/htcondor-ce
def startjob(info):
    global maxjobsecs
    global runningmasters

    if 'Name' not in info or 'SlotID' not in info or 'GlobalJobId' not in info:
        return

    name = info['Name']
    if 'GLIDEIN_MASTER_NAME' in info:
        # Glidein may be partitioned and sometimes tear down all contained
        #  slots at once, so need to track those slots together
        idxname = info['GLIDEIN_MASTER_NAME']
    else:
        idxname = name
    idx = (idxname, info['SlotID'])
    globaljobid = info['GlobalJobId']
    now = 0
    if idx in runningmasters:
        thismaster = runningmasters[idx]
        runningjobs = thismaster[1]
        if name in runningjobs:
            if globaljobid == runningjobs[name]:
                # just an update to a running job, ignore
                return
            # first stop the existing job, the slot is being reused
            stopjob(info)
# this may have removed the last job in thismaster, check again
    if idx not in runningmasters:
        # new master
        now = time.time()
        thismaster = (now, {})
        runningmasters[idx] = thismaster
    # add job to this master
    thismaster[1][name] = globaljobid

    printinfo = {}
    keys = [
        'Name', 'SlotID', 'GlobalJobId', 'RemoteOwner', 'ClientMachine',
        'ProjectName', 'Group', 'x509UserProxyVOName', 'x509userproxysubject',
        'x509UserProxyEmail'
    ]
    for key in keys:
        if key in info:
            printinfo[key] = info[key]
    htcondor.log(htcondor.LogLevel.Audit, "Job start: %s" % printinfo)

    if now == 0:
        return

    # also look for expired jobs at the beginning of the list and stop them
    for idx in runningmasters:
        thismaster = runningmasters[idx]
        deltasecs = int(now - thismaster[0])
        if deltasecs <= maxjobsecs:
            break
        loginfo = {}
        loginfo['SlotID'] = idx[1]
        runningjobs = thismaster[1]
        for jobname in runningjobs:
            loginfo['Name'] = jobname
            loginfo['GlobalJobId'] = runningjobs[jobname]
            htcondor.log(
                htcondor.LogLevel.Audit,
                "Cleaning up %d-second expired job: %s" % (deltasecs, loginfo))
        del runningmasters[idx]
コード例 #9
0
ファイル: audit_payloads.py プロジェクト: xzhao87/htcondor-ce
from collections import OrderedDict

# Dictionary containing all tracked running jobs.
# Each entry is for a 'master', which is either a pilot job/glidein or
#  individual job.
# The index of the dictionary is a tuple of (mastername, slotid).
# The contents of each entry is a tuple of (starttime, jobs), where
#  jobs is a dictionary of individual job names running in that master
#  and each entry has a value of the GlobalJobId of that job.
runningmasters = OrderedDict()

if 'AUDIT_PAYLOAD_MAX_HOURS' in htcondor.param:
    maxjobhours = int(htcondor.param['AUDIT_PAYLOAD_MAX_HOURS'])
else:
    maxjobhours = 3 * 24
htcondor.log(htcondor.LogLevel.Audit,
             "Audit payload maximum job hours: %d" % maxjobhours)
maxjobsecs = maxjobhours * 60 * 60


# a job may be being stopped
def stopjob(info):
    global runningmasters
    if 'Name' not in info or 'SlotID' not in info:
        return
    name = info['Name']
    matchre = ""
    if 'GLIDEIN_MASTER_NAME' in info:
        idxname = info['GLIDEIN_MASTER_NAME']
        if idxname == name:
            # stop all jobs under this master
            matchre = '.*'