Exemple #1
0
def loadCatalog(catalog, book, dataset):
    # load the unique file ids of the existing catalog for existence checks (careful)

    mitcfg = book.split("/")[0]
    version = book.split("/")[1]

    catalogedIds = fileIds.fileIds()

    # first make sure the catalog is compact

    rc = 0
    cmd = "grep root " + catalog + '/' + book + '/' + dataset + '/RawFiles.00'
    list = cmd.split(" ")
    p = subprocess.Popen(list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    (out, err) = p.communicate()
    rc = p.returncode

    if rc != 0:
        print " ERROR -- %d" % (rc)
        print out
        print err
        #sys.exit(1)

    for line in out.split("\n"):
        f = line.split(" ")
        if len(f) > 2:
            name = f[0]
            nEvents = int(f[1])
            catalogedId = fileIds.fileId(name, nEvents)
            catalogedIds.addFileId(catalogedId)

    return catalogedIds
Exemple #2
0
def loadFilesFromDisk(book, dataset):

    fileOnDiskIds = fileIds.fileIds()

    # list all files from the giben directory
    cmd = 'list ' + DIR + "/" + book + "/" + dataset
    if DEBUG > 0:
        print " CMD (loadFilesFromDisk): " + cmd

    rc = 0
    list = cmd.split(" ")
    p = subprocess.Popen(list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    (out, err) = p.communicate()
    rc = p.returncode

    if rc != 0:
        print " ERROR -- %d" % (rc)
        print out
        print err
        #sys.exit(1)

    for line in out.split("\n"):
        if '.root' in line:
            f = line.split(" ")
            if len(f) > 1:
                name = f[1]
                nEvents = -1
                fileOnDiskId = fileIds.fileId(name, nEvents)
                fileOnDiskIds.addFileId(fileOnDiskId)

    return fileOnDiskIds
Exemple #3
0
def loadLfns(dataset):

    lfnIds = fileIds.fileIds()

    # find the correct file

    lfnFile = "/home/cmsprod/cms/jobs/lfns/" + dataset + ".lfns"
    if DEBUG > 0:
        print " LFN file: " + lfnFile

    rc = 0
    cmd = "cat " + lfnFile
    list = cmd.split(" ")
    p = subprocess.Popen(list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    (out, err) = p.communicate()
    rc = p.returncode

    if rc != 0:
        print " ERROR -- %d" % (rc)
        print out
        print err
        #sys.exit(1)

    for line in out.split("\n"):
        f = line.split(" ")
        if len(f) > 2:
            name = f[1]
            nEvents = int(f[2])
            lfnId = fileIds.fileId(name, nEvents)
            lfnIds.addFileId(lfnId)

    return lfnIds
Exemple #4
0
def makeCatalog(dir, nFilesPerSet):

    cmd = 'cat ' + dir + '/RawFiles.?? | grep root | sort -u'

    init = False
    iFileset = 0
    fileset = None

    filesetsOut = open(dir + '/Filesets', 'w')
    with open(dir + '/Files', 'w') as filesOut:
        for line in os.popen(cmd).readlines():  # run command
            line = line[:-1]
            line = " ".join(str(line).split()).strip()
            f = line.split(' ')
            g = f[0].split('/')
            dataDir = '/'.join(g[:-1])

            if not init:
                fileset = fileIds.fileIdSet('0000', dataDir)
                init = True

            fileName = g[-1]
            if len(f) != 7:
                print ' Length is not six: %d' % len(f)
                sys.exit(1)

            file = fileIds.fileId(fileName, int(f[1]), int(f[2]), int(f[3]),
                                  int(f[4]), int(f[5]), int(f[6]))

            if (fileset.nFiles() < nFilesPerSet):
                fileset.addFile(file)
            else:
                fileset.showShort(filesetsOut, DEBUG)
                fileset.showShortFiles(filesOut, DEBUG)
                iFileset = iFileset + 1
                name = '%04d' % iFileset
                fileset.reset(name, dataDir)
                fileset.addFile(file)

        # complete what you started (will never be empty if init is set)
        if init:
            fileset.showShort(filesetsOut, DEBUG)
            fileset.showShortFiles(filesOut, DEBUG)
            iFileset = iFileset + 1

    filesetsOut.close()
Exemple #5
0
def getFiles(book, dataset):
    # get all corresponding files

    version = book.split('/')[1]
    mitcfg = book.split('/')[0]

    # decode the dataset
    f = dataset.split('+')
    process = f[0]
    setup = f[1]
    tier = f[2]

    sql = "select FileName, NEvents from Files inner join Requests on " \
        + " Files.RequestId = Requests.RequestId inner join Datasets on " \
        + " Requests.DatasetId = Datasets.DatasetId where " \
        + " DatasetProcess = '%s' and DatasetSetup='%s' and DatasetTier='%s'"%(process,setup,tier) \
        + " and RequestConfig = '%s' and RequestVersion = '%s'"%(mitcfg,version)

    results = []
    try:
        # Execute the SQL command
        if DEBUG > 0:
            print ' SQL: %s' % (sql)
        Cursor.execute(sql)
        if DEBUG > 0:
            print ' SQL: fetch results'
        results = Cursor.fetchall()
        if DEBUG > 0:
            print ' SQL: DONE'
    except:
        print 'ERROR(%s) - could not find request id.' % (sql)

    # found the request Id
    catalogedIds = fileIds.fileIds()
    for row in results:
        fileId = row[0]
        nEvents = int(row[1])

        catalogedId = fileIds.fileId(fileId, nEvents)
        catalogedIds.addFileId(catalogedId)

    return catalogedIds
Exemple #6
0
def findDatasetProperties(dataset, dbsInst, debug=0):
    # test whether this is a legitimate dataset by asking DAS and determine size and number of files

    if "=" in dataset:
        # find config, version and original dataset name
        f = dataset.split("=")
        conf = (f[0])[1:]
        vers = f[1]
        dset = f[2].replace("/", "+")

        sizeGb = 10  # does not matter
        nFiles = 0
        lfns = {}

        cmd = 'cat %s/%s/%s/%s/Filesets' % (CATALOG_INPUT, conf, vers, dset)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)

        for line in out.split("\n"):
            line = ' '.join(line.split())
            f = line.split(" ")
            if len(f) > 1:
                nFiles += 1
                id = f[0]
                path = re.sub(r'root://.*/(/store/.*)', r'\1', f[1])
                lfn = "%s/%s.root" % (path, id)
                nEvents = int(f[2])

                fId = fileIds.fileId(id + ".root", nEvents)
                lfn = fileIds.lfn(fId, id, path)
                lfns[fId.getName()] = lfn
                if debug > 1:
                    print " Adding: %s, %s" % (id, lfn)

        return (sizeGb, nFiles, lfns)

    ## dealing with a standard dataset first test
    #if not isDatasetValid(dataset,dbsInst,debug):
    #    return (-1,-1,-1)

    proxy = getProxy()
    url = 'curl -s --cert %s -k -H "Accept: application/json"'%proxy \
        + ' "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/'  \
        + 'files?dataset=%s&detail=true"'%(dataset)

    if debug > 1:
        print ' CURL: ' + url

    myRex = rex.Rex()
    (rc, out, err) = myRex.executeLocalAction(url)

    if rc != 0:
        print ' ERROR ocurred in %s' % (url)
        sys.exit(1)

    data = json.loads(out)

    units = 'GB'
    nFiles = 0
    totalSize = 0
    lfns = {}
    blocks = []
    for entry in data:
        valid = int(entry["is_file_valid"])
        fileName = entry["logical_file_name"]
        path = "/".join(fileName.split("/")[:-1])
        size = int(entry["file_size"])
        block = entry["block_name"].split("#")[1]
        nEvents = int(entry["event_count"])
        if valid == 1:
            nFiles += 1
            totalSize += size
            #print '%s: %d %d %f'%(fileName,nFiles,nEvents,totalSize/1000./1000./1000.)
            fId = fileIds.fileId(fileName, nEvents)
            lfn = fileIds.lfn(fId, block, path)
            lfns[fId.getName()] = lfn

    try:
        sizeGb = convertSizeToGb(str(totalSize))
    except:
        print '\n Error - could not convert size and number of files (%s %s / %s).'\
            %(totalSize,units,nFiles)
        sys.exit(1)

    if debug > 1:
        for lfn in lfns:
            lfns[lfn].show()

    print '\n DBS - %s --> %.1f %s (nFiles: %d)\n' % (dataset, sizeGb, units,
                                                      nFiles)

    return (sizeGb, nFiles, lfns)
Exemple #7
0
def findDatasetProperties(dataset, dbsInst, debug=0):
    # test whether this is a legitimate dataset by asking DAS and determine size and number of files

    if "=" in dataset:  # this is a dataset produced with Kraken
        # find config, version and original dataset name
        f = dataset.split("=")
        conf = (f[0])[1:]
        vers = f[1]
        dset = f[2].replace("/", "+")

        sizeGb = 10  # does not matter
        nFiles = 0

        cmd = 'cat %s/%s/%s/%s/Filesets' % (CATALOG_INPUT, conf, vers, dset)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)

        for line in out.split("\n"):
            line = ' '.join(line.split())
            f = line.split(" ")
            if len(f) > 1:
                nFiles += 1
                id = f[0]
                path = re.sub(r'root://.*/(/store/.*)', r'\1', f[1])
                lfn = "%s/%s.root" % (path, id)
                nEvents = int(f[2])

                fId = fileIds.fileId(id + ".root", nEvents)
                lfn = fileIds.lfn(fId, id, path)
                lfns[fId.getName()] = lfn
                if debug > -1:
                    print(" Adding: %s, %s, %s" % (id, lfn.fId.getName()))

        return (sizeGb, nFiles, lfns)

    # dealing with a standard dataset first test
    if dbsInst == 'private':
        print(" Private dataset detected.")
        sizeGb = 10  # does not matter
        nFiles = 0
        f = dataset.split("/")
        trunc = f[1]
        conf = f[2]
        vers = f[3]
        dset = f[4]
        cmd = 'cat %s/%s/%s/%s/%s/RawFiles.00' % (CATALOG_INPUT, trunc, conf,
                                                  vers, dset)
        print(" CMD: %s" % cmd)
        myRex = rex.Rex()
        (rc, out, err) = myRex.executeLocalAction(cmd)

        for line in out.split("\n"):
            #print(" LINE - >%s<"%(line))
            line = ' '.join(line.split())
            f = line.split(" ")
            if len(f) > 1:
                nFiles += 1
                id = (f[0].split('/')[-1]).replace('.root', '')
                block = id[0:20]
                path = "/".join(f[0].split('/')[0:-1])
                path = re.sub(r'root://.*/(/store/.*)', r'\1', path)
                lfn = "%s/%s.root" % (path, id)
                #print(" ID: %s\nPATH %s\nLFN: %s"%(id,path,lfn))

                nEvents = int(f[2])

                #            #print '%s: %d %d %f'%(fileName,nFiles,nEvents,totalSize/1000./1000./1000.)
                #            fId = fileIds.fileId(fileName,nEvents)
                #            lfn = fileIds.lfn(fId,block,path)
                fId = fileIds.fileId(id + ".root", nEvents)
                lfn = fileIds.lfn(fId, block, path)
                #lfn.show()
                lfns[fId.getName()] = lfn
                if debug > -1:
                    print " Adding: %s, %s" % (id, path)
            else:
                pass
                #print(" LINE invalid")

        return (sizeGb, nFiles, lfns)

    # dealing with a standard dataset first test
    if not isDatasetValid(dataset, dbsInst, debug):
        print(' WARNING - dataset was not found to be valid.')
        print('         - continue and see whether it is in production.')
        print('         - to get all data this call has to be repeated')
        print('         - once the dataset is completed.')
        #return (-1,-1,-1)
    else:
        print(' INFO - dataset is valid.')

    proxy = getProxy()
    url = 'curl -s --cert %s -k -H "Accept: application/json"'%proxy \
        + ' "https://cmsweb.cern.ch/dbs/prod/global/DBSReader/'  \
        + 'files?dataset=%s&detail=true"'%(dataset)

    if debug > 1:
        print(' CURL: ' + url)

    myRex = rex.Rex()
    (rc, out, err) = myRex.executeLocalAction(url)

    if rc != 0:
        print(' ERROR ocurred in %s' % (url))
        sys.exit(1)

    data = json.loads(out)

    units = 'GB'
    nFiles = 0
    totalSize = 0
    blocks = []
    for entry in data:
        valid = int(entry["is_file_valid"])
        fileName = entry["logical_file_name"]
        path = "/".join(fileName.split("/")[:-1])
        size = int(entry["file_size"])
        block = entry["block_name"].split("#")[1]
        nEvents = int(entry["event_count"])
        if valid == 1:
            nFiles += 1
            totalSize += size
            #print('%s: %d %d %f'%(fileName,nFiles,nEvents,totalSize/1000./1000./1000.))
            fId = fileIds.fileId(fileName, nEvents)
            lfn = fileIds.lfn(fId, block, path)
            lfns[fId.getName()] = lfn

    try:
        sizeGb = convertSizeToGb(str(totalSize))
    except:
        print('\n Error - could not convert size and number of files (%s %s / %s).'\
            %(totalSize,units,nFiles))
        sys.exit(1)

    if debug > 1:
        for lfn in lfns:
            lfns[lfn].show()

    print('\n DBS - %s --> %.1f %s (nFiles: %d)\n' %
          (dataset, sizeGb, units, nFiles))

    return (sizeGb, nFiles, lfns)