Exemple #1
0
def findLocalFiles(book, dataset):

    print " INFO - loading local physical files (Tier-3)."

    cmd = "hdfs dfs -ls /cms/store/user/paus/%s/%s " % (book,
                                                        dataset) + "|grep root"
    myRx = rex.Rex()
    (rc, out, err) = myRx.executeLocalAction(cmd)

    lines = out.split("\n")
    # in case hdfs is not installed
    if len(lines) < 2:
        cmd = "ls -1 /mnt/hadoop/cms/store/user/paus/%s/%s " % (
            book, dataset) + "|grep root"
        myRx = rex.Rex()
        (rc, out, err) = myRx.executeLocalAction(cmd)

    # find list
    lFiles = set()
    for row in out.split("\n"):
        # empty lines
        if len(row) < 2:
            continue
        # make sure to adapt to format
        f = row.split("/")
        if len(f) > 2:
            filename = "/".join(f[-2:])
        else:
            filename = "%s/%s" % (dataset, row)
        lFiles.add(filename)

    return lFiles
Exemple #2
0
def findAllT3Files(dir):

    print " INFO - loading all physical files on T3."

    cmd = "export T2TOOLS_SERVER=t3serv015.mit.edu; export T2TOOLS_USER=cmsprod;"
    cmd += "list %s/* " % (dir) + "|grep root"
    myRx = rex.Rex()
    #print " CMD: " + cmd
    (rc, out, err) = myRx.executeLocalAction(cmd)

    with open(".sizes-t3", "w") as fH:
        # find list
        files = {}
        for row in out.split("\n"):
            if len(row) < 2:
                continue

            fH.write(row + '\n')
            size = int((row.split(" ")[0]).split(":")[1])
            filename = "/".join((row.split(" ")[-1]).split('/')[-2:])

            files[filename] = size

            if size < 10:
                print " ERROR - zero size file found: %s/%s" % (dir, filename)

    return files
Exemple #3
0
def findAllFiles(dir):

    print " INFO - loading all physical files on T2."

    cmd = "list %s/* " % (dir) + "|grep root"
    myRx = rex.Rex()
    (rc, out, err) = myRx.executeLocalAction(cmd)

    with open(".sizes", "w") as fH:
        # find list
        files = {}
        for row in out.split("\n"):
            if len(row) < 2:
                continue

            fH.write(row + '\n')
            size = int((row.split(" ")[0]).split(":")[1])
            filename = "/".join((row.split(" ")[-1]).split('/')[-2:])

            files[filename] = size

            if size < 10:
                print " ERROR - zero size file found: " + filename

    return files
Exemple #4
0
    def executeCondorCmd(self, cmd='condor_q', output=False):

        if output:
            print(' execute condor command: %s' % (cmd))

        myRx = rex.Rex(self.host, self.user)
        irc = 0

        if not self.isLocal():
            (irc, rc, out, err) = myRx.executeAction(cmd)
            if (irc != 0 or rc != 0):
                print(' ERROR -- IRC: %d' % (irc))
        else:
            (rc, out, err) = myRx.executeLocalAction(cmd)

        if (irc != 0 or rc != 0):
            print(' ERROR -- RC: %d' % (rc))
            print(' ERROR -- ERR:\n%s' % (err))

        if output:
            print(' OUT:\n%s' % (out))
            if err != '':
                print('\n ERR:\n%s' % (err))

        return (rc, out, err)
Exemple #5
0
def findFiles(book, dataset):
    # very important to only look at files that are newer than the ones we counted already

    print(" INFO - analyzing book:%s  dataset:%s." % (book, dataset))

    cmd = "ls -1t %s/%s/%s/*.err " % (TRUNC, book, dataset)
    myRx = rex.Rex()
    (rc, out, err) = myRx.executeLocalAction(cmd)

    ##print(" =DEBUG= START - OUT ==\n%s\n =DEBUG= END - OUT.\n"%(out))

    # find list
    files = set()
    for row in out.split("\n"):
        if len(row) < 2:
            continue
        fileName = (row.split('/')[-1]).split('.')[0]
        if 'ncounts' in fileName:  # only consider files that were not yet analyzed
            print(" Found the counts file: %s (%s) --> BREAK" %
                  (row, fileName))
            break
        print(" Adding file: %s" % (fileName))
        files.add(fileName)

    return files
Exemple #6
0
    def findX509Proxy(self):
        cmd = "voms-proxy-info -path"
        #print " CMD: " + cmd
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)
        x509Proxy = out[:-1]
        print " X509Proxy: " + x509Proxy

        return x509Proxy.split("/")[-1]
Exemple #7
0
def clearLocalCache(datasetId):

    cmd = 'rm -f %s/????/%s.????' % (WORK_DIR, datasetId)
    #print(' Clearing cache: %s'%(cmd))

    myRex = rex.Rex()
    (rc, out, err) = myRex.executeLocalAction(cmd)

    return rc
Exemple #8
0
def numberOfFiles(config, version, dataset):

    nFiles = -1
    myRx = rex.Rex()
    (rc, out,
     err) = myRx.executeLocalAction("list %s/%s/%s/%s/*.root 2> /dev/null" %
                                    (BASE, config, version, dataset))
    nFiles = len(out.split("\n"))
    return nFiles
Exemple #9
0
    def __init__(self, task):

        self.task = task
        self.localUser = os.getenv('USER')

        self.logRemoveScript = ''  # '#!/bin/bash\n'
        self.webRemoveScript = ''  # '#!/bin/bash\n'
        self.logSaveScript = ''  # '#!/bin/bash\n'

        self.rex = rex.Rex(self.task.scheduler.host, self.task.scheduler.user)
Exemple #10
0
 def findCmsswVersion(self):
     cmd = "ls -1rt %s/%s/"%(os.getenv('KRAKEN_CMSSW'),self.request.version)
     print " CMD: " + cmd
     myRex = rex.Rex()
     (rc,out,err) = myRex.executeLocalAction(cmd)
     cmsswVersion = ""
     for line in out.split("\n"):
         if 'CMSSW_' in line:
             cmsswVersion = line
     print " CMSSW: " + cmsswVersion
 
     return (cmsswVersion.replace('CMSSW_',''))
Exemple #11
0
    def __init__(self, task):

        self.task = task
        self.localUser = os.getenv('USER')

        self.activity = os.getenv('KRAKEN_ACTIVITY')

        self.logRemoveScript = ''
        self.webRemoveScript = ''
        self.logSaveScript = ''

        self.rex = rex.Rex(self.task.scheduler.host, self.task.scheduler.user)
Exemple #12
0
def find_files(dir):
    cmd = "list %s/* " % (dir) + "|grep root"
    myRx = rex.Rex()
    (rc, out, err) = myRx.executeLocalAction(cmd)

    files = []
    for row in out.split("\n"):
        if len(row) < 2:
            continue
        filename = "/".join((row.split(" ")[-1]).split('/')[-2:])
        files.append(filename)

    return files
Exemple #13
0
    def findOsVersion(self):
        cmd = "ls -1 %s/%s/SW_%s/lib|cut -d_ -f1|tail -1"%\
            (os.getenv('KRAKEN_SW'),self.request.version,self.swVersion)
        #print(" CMD: " + cmd)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)
        osVersion = ""
        osVersion = out.decode()[:-1]
        if osVersion == "":
            osVersion = 'slc7'
        print(" OS: " + osVersion)

        return osVersion
Exemple #14
0
def testTier2Disk(debug=0):
    # make sure we can see the Tier-2 disks: returns -1 on failure

    cmd = "list /cms/store/user/paus 2> /dev/null"
    if debug > 0:
        print " CMD: %s"%(cmd)

    myRx = rex.Rex()
    (rc,out,err) = myRx.executeLocalAction("list /cms/store/user/paus 2> /dev/null")

    if debug > 0:
        print " RC: %d\n OUT:\n%s\n ERR:\n%s\n"%(rc,out,err)

    return rc
Exemple #15
0
def makeDatasetList(config, version):

    myRx = rex.Rex()
    (rc, out, err) = myRx.executeLocalAction(
        "list %s/%s/%s 2> /dev/null | grep ^D:" % (BASE, config, version))
    datasetList = []
    for line in out.split("\n"):
        dataset = line.split("/")[-1]
        if len(dataset) > 4:
            datasetList.append(dataset)

    if debug > 0:
        print " RC: %d\n OUT:\n%s\n ERR:\n%s\n" % (rc, out, err)

    return datasetList
Exemple #16
0
    def findSwVersion(self):
        cmd = "ls -1rt %s/%s/ |grep ^.*SW_" % (os.getenv('KRAKEN_SW'),
                                               self.request.version)
        print(" CMD: " + cmd)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)
        swVersion = ""
        for line in out.decode().split("\n"):
            if 'SW_' in line:
                swVersion = line
                swVersion = re.sub(r'^.*SW_', '', swVersion)

        print(" SW: " + swVersion)

        return swVersion
Exemple #17
0
def findAllFiles(book,dataset):

    print " INFO - loading all physical files."

    cmd = "gfal-ls gsiftp://se01.cmsaf.mit.edu:2811/cms/store/user/paus/%s/%s "%(book,dataset) + "|grep root"
    myRx = rex.Rex()  
    (rc,out,err) = myRx.executeLocalAction(cmd)

    # find list
    files = set()
    for row in out.split("\n"):
        if len(row) < 2:
            continue
        filename = "%s/%s"%(dataset,row)
        files.add(filename)

    return files
Exemple #18
0
def findDatasetProperties(dataset, dbsInst, debug=0):
    # test whether this is a legitimate dataset by asking DAS and determine size and number of files

    if "=" in dataset:  # this is a dataset produced with Kraken
        # find config, version and original dataset name
        f = dataset.split("=")
        conf = (f[0])[1:]
        vers = f[1]
        dset = f[2].replace("/", "+")

        sizeGb = 10  # does not matter
        nFiles = 0

        cmd = 'cat %s/%s/%s/%s/Filesets' % (CATALOG_INPUT, conf, vers, dset)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)

        for line in out.split("\n"):
            line = ' '.join(line.split())
            f = line.split(" ")
            if len(f) > 1:
                nFiles += 1
                id = f[0]
                path = re.sub(r'root://.*/(/store/.*)', r'\1', f[1])
                lfn = "%s/%s.root" % (path, id)
                nEvents = int(f[2])

                fId = fileIds.fileId(id + ".root", nEvents)
                lfn = fileIds.lfn(fId, id, path)
                lfns[fId.getName()] = lfn
                if debug > -1:
                    print(" Adding: %s, %s, %s" % (id, lfn.fId.getName()))

        return (sizeGb, nFiles, lfns)

    # dealing with a standard dataset first test
    if dbsInst == 'private':
        print(" Private dataset detected.")
        sizeGb = 10  # does not matter
        nFiles = 0
        f = dataset.split("/")
        trunc = f[1]
        conf = f[2]
        vers = f[3]
        dset = f[4]
        cmd = 'cat %s/%s/%s/%s/%s/RawFiles.00' % (CATALOG_INPUT, trunc, conf,
                                                  vers, dset)
        print(" CMD: %s" % cmd)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)

        for line in out.split("\n"):
            #print(" LINE - >%s<"%(line))
            line = ' '.join(line.split())
            f = line.split(" ")
            if len(f) > 1:
                nFiles += 1
                id = (f[0].split('/')[-1]).replace('.root', '')
                block = id[0:20]
                path = "/".join(f[0].split('/')[0:-1])
                path = re.sub(r'root://.*/(/store/.*)', r'\1', path)
                lfn = "%s/%s.root" % (path, id)
                #print(" ID: %s\nPATH %s\nLFN: %s"%(id,path,lfn))

                nEvents = int(f[2])

                #            #print '%s: %d %d %f'%(fileName,nFiles,nEvents,totalSize/1000./1000./1000.)
                #            fId = fileIds.fileId(fileName,nEvents)
                #            lfn = fileIds.lfn(fId,block,path)
                fId = fileIds.fileId(id + ".root", nEvents)
                lfn = fileIds.lfn(fId, block, path)
                #lfn.show()
                lfns[fId.getName()] = lfn
                if debug > -1:
                    print " Adding: %s, %s" % (id, path)
            else:
                pass
                #print(" LINE invalid")

        return (sizeGb, nFiles, lfns)

    # dealing with a standard dataset first test
    if not isDatasetValid(dataset, dbsInst, debug):
        print(' WARNING - dataset was not found to be valid.')
        print('         - continue and see whether it is in production.')
        print('         - to get all data this call has to be repeated')
        print('         - once the dataset is completed.')
        #return (-1,-1,-1)
    else:
        print(' INFO - dataset is valid.')

    proxy = getProxy()
    url = 'curl -s --cert %s -k -H "Accept: application/json"'%proxy \
        + ' "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/'  \
        + 'files?dataset=%s&detail=true"'%(dataset)

    if debug > 1:
        print(' CURL: ' + url)

    myRex = rex.Rex()
    (rc, out, err) = myRex.executeLocalAction(url)

    if rc != 0:
        print(' ERROR ocurred in %s' % (url))
        sys.exit(1)

    data = json.loads(out)

    units = 'GB'
    nFiles = 0
    totalSize = 0
    blocks = []
    for entry in data:
        valid = int(entry["is_file_valid"])
        fileName = entry["logical_file_name"]
        path = "/".join(fileName.split("/")[:-1])
        size = int(entry["file_size"])
        block = entry["block_name"].split("#")[1]
        nEvents = int(entry["event_count"])
        if valid == 1:
            nFiles += 1
            totalSize += size
            #print('%s: %d %d %f'%(fileName,nFiles,nEvents,totalSize/1000./1000./1000.))
            fId = fileIds.fileId(fileName, nEvents)
            lfn = fileIds.lfn(fId, block, path)
            lfns[fId.getName()] = lfn

    try:
        sizeGb = convertSizeToGb(str(totalSize))
    except:
        print('\n Error - could not convert size and number of files (%s %s / %s).'\
            %(totalSize,units,nFiles))
        sys.exit(1)

    if debug > 1:
        for lfn in lfns:
            lfns[lfn].show()

    print('\n DBS - %s --> %.1f %s (nFiles: %d)\n' %
          (dataset, sizeGb, units, nFiles))

    return (sizeGb, nFiles, lfns)
Exemple #19
0
# --------------------------------------------------------------------------------------------------
# Read new values from the command line
for opt, arg in opts:
    if opt == "--help":
        print usage
        sys.exit(0)

# Deal with obvious problems
if not os.getenv('KRAKEN_AGENTS_WWW'):
    print "\n Kraken agent environment is not initialized (KRAKEN_AGENTS_WWW).\n"
    sys.exit(1)

# --------------------------------------------------------------------------------------------------
# Here is where the real action starts -------------------------------------------------------------
# --------------------------------------------------------------------------------------------------
myRx = rex.Rex()

# make sure to touch the heartbeat file first
cmd = "date >& " + os.getenv('KRAKEN_AGENTS_LOG') + '/heartbeat'
(rc, out, err) = myRx.executeLocalAction(cmd)

if rc != 0:
    print '\n ==== ERROR -- DATE (%s) ====\n\n%s' % (cmd, err)
    print '\n ==== OUTPUT -- DATE (%s) ====\n\n%s' % (cmd, out)
else:
    print ' ==== DATE (%s) ====' % (cmd)

# issue full rsync on the log directory
cmd = "rsync -Cavz --delete " + os.getenv('KRAKEN_AGENTS_LOG') + ' ' \
    +                           os.getenv('KRAKEN_AGENTS_WWW') + '/../'
(rc, out, err) = myRx.executeLocalAction(cmd)
Exemple #20
0

#===================================================================================================
#  M A I N
#===================================================================================================
# make sure command line is complete
if len(sys.argv) < 2:
    print "\n ERROR -- " + usage
    sys.exit(1)

# command line variables
directory = sys.argv[1]
print "\n INFO - checkDirectory.py %s" % (directory)
cmd = "t2tools.py --action ls --source " + directory + " | grep root"
# make sure we can work remotely/locally
remoteX = rex.Rex('none', 'none')
(rc, out, err) = remoteX.executeLocalAction(cmd)

content = out.split("\n")

# get Ids in the database
(requestId, datasetId) = getRequestId(directory)
nEvents = findAllFilesInDb(requestId)

# get disk resident Ids
path = directory
fileIds = []
for line in content:
    f = line.split(" ")
    if len(f) > 1:
        file = f[1]
Exemple #21
0
#!/usr/bin/env python

import os
import sys
import rex
import requests
import json
import time
import pprint

myRex = rex.Rex()
base = os.environ.get('KRAKEN_SE_BASE', '/cms/store/user/paus')


def findFileSizes(config, version, dataset):

    fileSizes = {}
    cmd = "t2tools.py --action ls --source %s/%s/%s/%s |grep root" % (
        base, config, version, dataset)
    (rc, out, err) = myRex.executeLocalAction(cmd)
    for line in out.split('\n'):
        if len(line.split(' ')) > 1:
            size = (line.split(' ')[0]).split(':')[1]
            fileName = (line.split('/')[-1]).split('.')[0]
            fileSizes[fileName] = int(size)

    return fileSizes


def getRequestId(cursor, config, version, dataset):
    # extract the unique request id this file is part of
Exemple #22
0
def findDatasetProperties(dataset, dbsInst, debug=0):
    # test whether this is a legitimate dataset by asking DAS and determine size and number of files

    if "=" in dataset:
        # find config, version and original dataset name
        f = dataset.split("=")
        conf = (f[0])[1:]
        vers = f[1]
        dset = f[2].replace("/", "+")

        sizeGb = 10  # does not matter
        nFiles = 0
        lfns = {}

        cmd = 'cat %s/%s/%s/%s/Filesets' % (CATALOG_INPUT, conf, vers, dset)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)

        for line in out.split("\n"):
            line = ' '.join(line.split())
            f = line.split(" ")
            if len(f) > 1:
                nFiles += 1
                id = f[0]
                path = re.sub(r'root://.*/(/store/.*)', r'\1', f[1])
                lfn = "%s/%s.root" % (path, id)
                nEvents = int(f[2])

                fId = fileIds.fileId(id + ".root", nEvents)
                lfn = fileIds.lfn(fId, id, path)
                lfns[fId.getName()] = lfn
                if debug > 1:
                    print " Adding: %s, %s" % (id, lfn)

        return (sizeGb, nFiles, lfns)

    ## dealing with a standard dataset first test
    #if not isDatasetValid(dataset,dbsInst,debug):
    #    return (-1,-1,-1)

    proxy = getProxy()
    url = 'curl -s --cert %s -k -H "Accept: application/json"'%proxy \
        + ' "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/'  \
        + 'files?dataset=%s&detail=true"'%(dataset)

    if debug > 1:
        print ' CURL: ' + url

    myRex = rex.Rex()
    (rc, out, err) = myRex.executeLocalAction(url)

    if rc != 0:
        print ' ERROR ocurred in %s' % (url)
        sys.exit(1)

    data = json.loads(out)

    units = 'GB'
    nFiles = 0
    totalSize = 0
    lfns = {}
    blocks = []
    for entry in data:
        valid = int(entry["is_file_valid"])
        fileName = entry["logical_file_name"]
        path = "/".join(fileName.split("/")[:-1])
        size = int(entry["file_size"])
        block = entry["block_name"].split("#")[1]
        nEvents = int(entry["event_count"])
        if valid == 1:
            nFiles += 1
            totalSize += size
            #print '%s: %d %d %f'%(fileName,nFiles,nEvents,totalSize/1000./1000./1000.)
            fId = fileIds.fileId(fileName, nEvents)
            lfn = fileIds.lfn(fId, block, path)
            lfns[fId.getName()] = lfn

    try:
        sizeGb = convertSizeToGb(str(totalSize))
    except:
        print '\n Error - could not convert size and number of files (%s %s / %s).'\
            %(totalSize,units,nFiles)
        sys.exit(1)

    if debug > 1:
        for lfn in lfns:
            lfns[lfn].show()

    print '\n DBS - %s --> %.1f %s (nFiles: %d)\n' % (dataset, sizeGb, units,
                                                      nFiles)

    return (sizeGb, nFiles, lfns)