Python DBSReader.matchProcessedDatasets Examples

Programming Language: Python

Namespace/Package Name: ProdCommon.DataMgmt.DBS.DBSReader

Class/Type: DBSReader

Method/Function: matchProcessedDatasets

Examples at hotexamples.com: 6

Python DBSReader.matchProcessedDatasets - 6 examples found. These are the top rated real world Python examples of ProdCommon.DataMgmt.DBS.DBSReader.DBSReader.matchProcessedDatasets extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DBSReader(16)

listFileBlockLocation(9)

getFileBlocksInfo(4)

listFileBlocks(4)

matchProcessedDatasets(4)

blockIsOpen(2)

getFileBlock(2)

getFiles(2)

listFilesInBlock(2)

blockToDatasetPath(1)

getFileBlockWithParents(1)

lfnsInBlock(1)

listDatasetFiles(1)

Example #1

Show file

def validateDataset( datasetPath, dbsUrl):
    """
    _validateDataset_
    
    Util method to check that the datasetPath provided
    exists in the dbsUrl provided
    
    """
    
    datasetDetails = DatasetConventions.parseDatasetPath(datasetPath)
    for key in ['Primary', 'DataTier', 'Processed']:
        if datasetDetails[key] == None:
            msg = "Invalid Dataset Name: \n ==> %s\n" % datasetPath
            msg += "Does not contain %s information" % key
            raise WorkflowMakerError(msg)
                

    datasets = []
    try:
        reader = DBSReader(dbsUrl)
        datasets = reader.matchProcessedDatasets(
            datasetDetails['Primary'],
            datasetDetails['DataTier'],
            datasetDetails['Processed'])
    except Exception, ex:
        msg = "Error calling DBS to validate dataset:\n%s\n" % datasetPath
        msg += str(ex)
        raise WorkflowMakerError(msg)

Example #2

Show file

File: InspectDBS.py Project: PerilousApricot/CRAB2-1

    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset '+self.dataset_to_check+' in DBS url '+ self.DBSURL+'\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:   
            primds=self.dataset_to_check.split('/')[1]
            procds=self.dataset_to_check.split('/')[2]
            tier=self.dataset_to_check.split('/')[3]
            datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
        #  //
        # // Get list of blocks for the dataset and their location
        #//
            if len(dataset.get('PathList'))==0:
                print "===== Empty dataset yet /%s/%s with tiers %s"%(dataset.get('PrimaryDataset')['Name'],dataset.get('Name'),dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot=0
                    print "=== dataset %s"%datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset['Description']
                    ################    
                    blocks=dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" %block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" %block['OpenForWriting']
                        print "      Number of files: %s"%block['NumberOfFiles']
                        print "      Number of Bytes: %s"%block['BlockSize']
                        print "      Number of Events: %s"%block['NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files=dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath)
        if not common.debugLevel:
            common.logger.info('You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='+self.dataset_to_check+' -USER.dbs_url_for_publication='+self.DBSURL+' -debug')

Example #3

Show file

File: Publisher.py Project: geonmo/CRAB2

    def publishDataset(self,file):
        """
        """
        try:
            jobReport = readJobReport(file)[0]
            self.exit_status = '0'
        except IndexError:
            self.exit_status = '1'
            msg = "Error: Problem with "+file+" file"
            common.logger.info(msg)
            return self.exit_status

        if (len(self.dataset_to_import) != 0):
           for dataset in self.dataset_to_import:
               common.logger.info("--->>> Importing parent dataset in the dbs: " +dataset)
               status_import=self.importParentDataset(self.globalDBS, dataset)
               if (status_import == 1):
                   common.logger.info('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL)
                   self.exit_status='1'
                   return self.exit_status
               else:
                   common.logger.info('Import ok of dataset '+dataset)

        
        if (len(jobReport.files) <= 0) :
            self.exit_status = '1'
            msg = "Error: No EDM file to publish in xml file"+file+" file"
            common.logger.info(msg)
            return self.exit_status
        else:
            msg = "fjr contains some files to publish" 
            common.logger.debug(msg)

        #### datasets creation in dbs
        #// DBS to contact write and read of the same dbs
        dbsReader = DBSReader(self.DBSURL,level='ERROR')
        dbswriter = DBSWriter(self.DBSURL)
        #####

        self.published_datasets = [] 
        for fileinfo in jobReport.files:
            datasets_info=fileinfo.dataset
            if len(datasets_info)<=0:
                self.exit_status = '1'
                msg = "Error: No info about dataset in the xml file "+file
                common.logger.info(msg)
                return self.exit_status
            else:
                for dataset in datasets_info:
                    #### for production data
                    self.processedData = dataset['ProcessedDataset']
                    if (dataset['PrimaryDataset'] == 'null'):
                        dataset['PrimaryDataset'] = self.userprocessedData
                    elif self.datasetpath.upper() != 'NONE':
                        dataset['ParentDataset']= self.datasetpath

                    dataset['PSetContent']=self.content
                    cfgMeta = {'name' : self.pset , 'Type' : 'user' , 'annotation': 'user cfg', 'version' : 'private version'} # add real name of user cfg
                    common.logger.info("PrimaryDataset = %s"%dataset['PrimaryDataset'])
                    common.logger.info("ProcessedDataset = %s"%dataset['ProcessedDataset'])
                    common.logger.info("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER")
                    
                    self.dataset_to_check="/"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER"


                    self.published_datasets.append(self.dataset_to_check)

                    common.logger.log(10-1,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset']))
                    
                    #### check if dataset already exists in the DBS
                    result = dbsReader.matchProcessedDatasets(dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset'])
                    if (len(result) != 0):
                       result = dbsReader.listDatasetFiles(self.dataset_to_check)

                    primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs)
                    common.logger.log(10-1,"Primary:  %s "%primary)
                    print "primary = ", primary 

                    algo = DBSWriterObjects.createAlgorithm(dataset, cfgMeta, dbswriter.dbs)
                    common.logger.log(10-1,"Algo:  %s "%algo)

                    processed = DBSWriterObjects.createProcessedDataset(primary, algo, dataset, dbswriter.dbs)
                    common.logger.log(10-1,"Processed:  %s "%processed)
                    print "processed = ", processed 

                    common.logger.log(10-1,"Inserted primary %s processed %s"%(primary,processed))
                    #######################################################################################
                
        common.logger.log(10-1,"exit_status = %s "%self.exit_status)
        return self.exit_status

Example #4

Show file

    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset ' +
                           self.dataset_to_check + ' in DBS url ' +
                           self.DBSURL + '\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:
            primds = self.dataset_to_check.split('/')[1]
            procds = self.dataset_to_check.split('/')[2]
            tier = self.dataset_to_check.split('/')[3]
            datasets = dbsreader.matchProcessedDatasets(primds, tier, procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
            #  //
            # // Get list of blocks for the dataset and their location
            #//
            if len(dataset.get('PathList')) == 0:
                print "===== Empty dataset yet /%s/%s with tiers %s" % (
                    dataset.get('PrimaryDataset')['Name'], dataset.get('Name'),
                    dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot = 0
                    print "=== dataset %s" % datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset[
                            'Description']
                    ################
                    blocks = dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList = dbsreader.listFileBlockLocation(
                            block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" % block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" % block[
                            'OpenForWriting']
                        print "      Number of files: %s" % block[
                            'NumberOfFiles']
                        print "      Number of Bytes: %s" % block['BlockSize']
                        print "      Number of Events: %s" % block[
                            'NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files = dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s" % (
                                    file['FileSize'], file['NumberOfEvents'],
                                    file['LogicalFileName'], file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n" % (
                        nevttot, datasetpath)
        if not common.debugLevel:
            common.logger.info(
                'You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='
                + self.dataset_to_check + ' -USER.dbs_url_for_publication=' +
                self.DBSURL + ' -debug')

Example #5

Show file

import logging
logging.disable(logging.INFO)

#  //
# // Get API to DBS
#//
dbsreader = DBSReader(url)
#  //
# // Get list of datasets
#//
if datasetPath:
     primds=datasetPath.split('/')[1]
     procds=datasetPath.split('/')[2]
     tier=datasetPath.split('/')[3]
#     print " matchProcessedDatasets(%s,%s,%s)"%(primds,tier,procds)
     datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
else:
     datasets=dbsreader.matchProcessedDatasets("*","*","*")


for dataset in datasets:
#  //
# // Get list of blocks for the dataset and their location
#//
 for datasetpath in dataset.get('PathList'):
   nevttot=0
   print "===== dataset %s"%datasetpath
   blocks=dbsreader.getFileBlocksInfo(datasetpath)
   for block in blocks:
     SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
     print "== File block %s is located at: %s"%(block['Name'],SEList)

Example #6

Show file

    def publishDataset(self, file):
        """
        """
        try:
            jobReport = readJobReport(file)[0]
            self.exit_status = '0'
        except IndexError:
            self.exit_status = '1'
            msg = "Error: Problem with " + file + " file"
            common.logger.info(msg)
            return self.exit_status

        if (len(self.dataset_to_import) != 0):
            for dataset in self.dataset_to_import:
                common.logger.info(
                    "--->>> Importing parent dataset in the dbs: " + dataset)
                status_import = self.importParentDataset(
                    self.globalDBS, dataset)
                if (status_import == 1):
                    common.logger.info('Problem with parent ' + dataset +
                                       ' import from the global DBS ' +
                                       self.globalDBS + 'to the local one ' +
                                       self.DBSURL)
                    self.exit_status = '1'
                    return self.exit_status
                else:
                    common.logger.info('Import ok of dataset ' + dataset)

        if (len(jobReport.files) <= 0):
            self.exit_status = '1'
            msg = "Error: No EDM file to publish in xml file" + file + " file"
            common.logger.info(msg)
            return self.exit_status
        else:
            msg = "fjr contains some files to publish"
            common.logger.debug(msg)

        #### datasets creation in dbs
        #// DBS to contact write and read of the same dbs
        dbsReader = DBSReader(self.DBSURL, level='ERROR')
        dbswriter = DBSWriter(self.DBSURL)
        #####

        self.published_datasets = []
        for fileinfo in jobReport.files:
            datasets_info = fileinfo.dataset
            if len(datasets_info) <= 0:
                self.exit_status = '1'
                msg = "Error: No info about dataset in the xml file " + file
                common.logger.info(msg)
                return self.exit_status
            else:
                for dataset in datasets_info:
                    #### for production data
                    self.processedData = dataset['ProcessedDataset']
                    if (dataset['PrimaryDataset'] == 'null'):
                        dataset['PrimaryDataset'] = self.userprocessedData
                    elif self.datasetpath.upper() != 'NONE':
                        dataset['ParentDataset'] = self.datasetpath

                    dataset['PSetContent'] = self.content
                    cfgMeta = {
                        'name': self.pset,
                        'Type': 'user',
                        'annotation': 'user cfg',
                        'version': 'private version'
                    }  # add real name of user cfg
                    common.logger.info("PrimaryDataset = %s" %
                                       dataset['PrimaryDataset'])
                    common.logger.info("ProcessedDataset = %s" %
                                       dataset['ProcessedDataset'])
                    common.logger.info("<User Dataset Name> = /" +
                                       dataset['PrimaryDataset'] + "/" +
                                       dataset['ProcessedDataset'] + "/USER")

                    self.dataset_to_check = "/" + dataset[
                        'PrimaryDataset'] + "/" + dataset[
                            'ProcessedDataset'] + "/USER"

                    self.published_datasets.append(self.dataset_to_check)

                    common.logger.log(
                        10 - 1, "--->>> Inserting primary: %s processed : %s" %
                        (dataset['PrimaryDataset'],
                         dataset['ProcessedDataset']))

                    #### check if dataset already exists in the DBS
                    result = dbsReader.matchProcessedDatasets(
                        dataset['PrimaryDataset'], 'USER',
                        dataset['ProcessedDataset'])
                    if (len(result) != 0):
                        result = dbsReader.listDatasetFiles(
                            self.dataset_to_check)

                    primary = DBSWriterObjects.createPrimaryDataset(
                        dataset, dbswriter.dbs)
                    common.logger.log(10 - 1, "Primary:  %s " % primary)
                    print "primary = ", primary

                    algo = DBSWriterObjects.createAlgorithm(
                        dataset, cfgMeta, dbswriter.dbs)
                    common.logger.log(10 - 1, "Algo:  %s " % algo)

                    processed = DBSWriterObjects.createProcessedDataset(
                        primary, algo, dataset, dbswriter.dbs)
                    common.logger.log(10 - 1, "Processed:  %s " % processed)
                    print "processed = ", processed

                    common.logger.log(
                        10 - 1, "Inserted primary %s processed %s" %
                        (primary, processed))
                    #######################################################################################

        common.logger.log(10 - 1, "exit_status = %s " % self.exit_status)
        return self.exit_status