Example #1
0
def validateDataset( datasetPath, dbsUrl):
    """
    _validateDataset_
    
    Util method to check that the datasetPath provided
    exists in the dbsUrl provided
    
    """
    
    datasetDetails = DatasetConventions.parseDatasetPath(datasetPath)
    for key in ['Primary', 'DataTier', 'Processed']:
        if datasetDetails[key] == None:
            msg = "Invalid Dataset Name: \n ==> %s\n" % datasetPath
            msg += "Does not contain %s information" % key
            raise WorkflowMakerError(msg)
                

    datasets = []
    try:
        reader = DBSReader(dbsUrl)
        datasets = reader.matchProcessedDatasets(
            datasetDetails['Primary'],
            datasetDetails['DataTier'],
            datasetDetails['Processed'])
    except Exception, ex:
        msg = "Error calling DBS to validate dataset:\n%s\n" % datasetPath
        msg += str(ex)
        raise WorkflowMakerError(msg)
Example #2
0
    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset '+self.dataset_to_check+' in DBS url '+ self.DBSURL+'\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:   
            primds=self.dataset_to_check.split('/')[1]
            procds=self.dataset_to_check.split('/')[2]
            tier=self.dataset_to_check.split('/')[3]
            datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
        #  //
        # // Get list of blocks for the dataset and their location
        #//
            if len(dataset.get('PathList'))==0:
                print "===== Empty dataset yet /%s/%s with tiers %s"%(dataset.get('PrimaryDataset')['Name'],dataset.get('Name'),dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot=0
                    print "=== dataset %s"%datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset['Description']
                    ################    
                    blocks=dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" %block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" %block['OpenForWriting']
                        print "      Number of files: %s"%block['NumberOfFiles']
                        print "      Number of Bytes: %s"%block['BlockSize']
                        print "      Number of Events: %s"%block['NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files=dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath)
        if not common.debugLevel:
            common.logger.info('You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='+self.dataset_to_check+' -USER.dbs_url_for_publication='+self.DBSURL+' -debug')
Example #3
0
    def publishDataset(self,file):
        """
        """
        try:
            jobReport = readJobReport(file)[0]
            self.exit_status = '0'
        except IndexError:
            self.exit_status = '1'
            msg = "Error: Problem with "+file+" file"
            common.logger.info(msg)
            return self.exit_status

        if (len(self.dataset_to_import) != 0):
           for dataset in self.dataset_to_import:
               common.logger.info("--->>> Importing parent dataset in the dbs: " +dataset)
               status_import=self.importParentDataset(self.globalDBS, dataset)
               if (status_import == 1):
                   common.logger.info('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL)
                   self.exit_status='1'
                   return self.exit_status
               else:
                   common.logger.info('Import ok of dataset '+dataset)

        
        if (len(jobReport.files) <= 0) :
            self.exit_status = '1'
            msg = "Error: No EDM file to publish in xml file"+file+" file"
            common.logger.info(msg)
            return self.exit_status
        else:
            msg = "fjr contains some files to publish" 
            common.logger.debug(msg)

        #### datasets creation in dbs
        #// DBS to contact write and read of the same dbs
        dbsReader = DBSReader(self.DBSURL,level='ERROR')
        dbswriter = DBSWriter(self.DBSURL)
        #####

        self.published_datasets = [] 
        for fileinfo in jobReport.files:
            datasets_info=fileinfo.dataset
            if len(datasets_info)<=0:
                self.exit_status = '1'
                msg = "Error: No info about dataset in the xml file "+file
                common.logger.info(msg)
                return self.exit_status
            else:
                for dataset in datasets_info:
                    #### for production data
                    self.processedData = dataset['ProcessedDataset']
                    if (dataset['PrimaryDataset'] == 'null'):
                        dataset['PrimaryDataset'] = self.userprocessedData
                    elif self.datasetpath.upper() != 'NONE':
                        dataset['ParentDataset']= self.datasetpath

                    dataset['PSetContent']=self.content
                    cfgMeta = {'name' : self.pset , 'Type' : 'user' , 'annotation': 'user cfg', 'version' : 'private version'} # add real name of user cfg
                    common.logger.info("PrimaryDataset = %s"%dataset['PrimaryDataset'])
                    common.logger.info("ProcessedDataset = %s"%dataset['ProcessedDataset'])
                    common.logger.info("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER")
                    
                    self.dataset_to_check="/"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER"


                    self.published_datasets.append(self.dataset_to_check)

                    common.logger.log(10-1,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset']))
                    
                    #### check if dataset already exists in the DBS
                    result = dbsReader.matchProcessedDatasets(dataset['PrimaryDataset'], 'USER', dataset['ProcessedDataset'])
                    if (len(result) != 0):
                       result = dbsReader.listDatasetFiles(self.dataset_to_check)

                    primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs)
                    common.logger.log(10-1,"Primary:  %s "%primary)
                    print "primary = ", primary 

                    algo = DBSWriterObjects.createAlgorithm(dataset, cfgMeta, dbswriter.dbs)
                    common.logger.log(10-1,"Algo:  %s "%algo)

                    processed = DBSWriterObjects.createProcessedDataset(primary, algo, dataset, dbswriter.dbs)
                    common.logger.log(10-1,"Processed:  %s "%processed)
                    print "processed = ", processed 

                    common.logger.log(10-1,"Inserted primary %s processed %s"%(primary,processed))
                    #######################################################################################
                
        common.logger.log(10-1,"exit_status = %s "%self.exit_status)
        return self.exit_status
Example #4
0
    def checkPublication(self):
        """
           check dataset publication in a dbs  
        """

        common.logger.info('--->>> Check data publication: dataset ' +
                           self.dataset_to_check + ' in DBS url ' +
                           self.DBSURL + '\n')
        #  //
        # // Get API to DBS
        #//
        dbsreader = DBSReader(self.DBSURL)
        #  //
        # // Get list of datasets
        #//
        if len(self.dataset_to_check.split('/')) < 4:
            msg = "the provided dataset name is not correct"
            raise CrabException(msg)
        else:
            primds = self.dataset_to_check.split('/')[1]
            procds = self.dataset_to_check.split('/')[2]
            tier = self.dataset_to_check.split('/')[3]
            datasets = dbsreader.matchProcessedDatasets(primds, tier, procds)
            if common.debugLevel:
                print "PrimaryDataset = ", primds
                print "ProcessedDataset = ", procds
                print "DataTier = ", tier
                print "datasets matching your requirements= ", datasets

        for dataset in datasets:
            #  //
            # // Get list of blocks for the dataset and their location
            #//
            if len(dataset.get('PathList')) == 0:
                print "===== Empty dataset yet /%s/%s with tiers %s" % (
                    dataset.get('PrimaryDataset')['Name'], dataset.get('Name'),
                    dataset.get('TierList'))
            else:
                for datasetpath in dataset.get('PathList'):
                    nevttot = 0
                    print "=== dataset %s" % datasetpath
                    ### FEDE #######
                    if dataset['Description'] != None:
                        print "=== dataset description = ", dataset[
                            'Description']
                    ################
                    blocks = dbsreader.getFileBlocksInfo(datasetpath)
                    for block in blocks:
                        SEList = dbsreader.listFileBlockLocation(
                            block['Name'])  # replace that with DLS query
                        print "===== File block name: %s" % block['Name']
                        print "      File block located at: ", SEList
                        print "      File block status: %s" % block[
                            'OpenForWriting']
                        print "      Number of files: %s" % block[
                            'NumberOfFiles']
                        print "      Number of Bytes: %s" % block['BlockSize']
                        print "      Number of Events: %s" % block[
                            'NumberOfEvents']
                        if common.debugLevel:
                            print "--------- info about files --------"
                            print " Size \t Events \t LFN \t FileStatus "
                            files = dbsreader.listFilesInBlock(block['Name'])
                            for file in files:
                                print "%s %s %s %s" % (
                                    file['FileSize'], file['NumberOfEvents'],
                                    file['LogicalFileName'], file['Status'])
                        nevttot = nevttot + block['NumberOfEvents']
                    print "\n total events: %s in dataset: %s\n" % (
                        nevttot, datasetpath)
        if not common.debugLevel:
            common.logger.info(
                'You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='
                + self.dataset_to_check + ' -USER.dbs_url_for_publication=' +
                self.DBSURL + ' -debug')
Example #5
0
import logging
logging.disable(logging.INFO)

#  //
# // Get API to DBS
#//
dbsreader = DBSReader(url)
#  //
# // Get list of datasets
#//
if datasetPath:
     primds=datasetPath.split('/')[1]
     procds=datasetPath.split('/')[2]
     tier=datasetPath.split('/')[3]
#     print " matchProcessedDatasets(%s,%s,%s)"%(primds,tier,procds)
     datasets=dbsreader.matchProcessedDatasets(primds,tier,procds)
else:
     datasets=dbsreader.matchProcessedDatasets("*","*","*")


for dataset in datasets:
#  //
# // Get list of blocks for the dataset and their location
#//
 for datasetpath in dataset.get('PathList'):
   nevttot=0
   print "===== dataset %s"%datasetpath
   blocks=dbsreader.getFileBlocksInfo(datasetpath)
   for block in blocks:
     SEList=dbsreader.listFileBlockLocation(block['Name'])  # replace that with DLS query
     print "== File block %s is located at: %s"%(block['Name'],SEList)
Example #6
0
    def publishDataset(self, file):
        """
        """
        try:
            jobReport = readJobReport(file)[0]
            self.exit_status = '0'
        except IndexError:
            self.exit_status = '1'
            msg = "Error: Problem with " + file + " file"
            common.logger.info(msg)
            return self.exit_status

        if (len(self.dataset_to_import) != 0):
            for dataset in self.dataset_to_import:
                common.logger.info(
                    "--->>> Importing parent dataset in the dbs: " + dataset)
                status_import = self.importParentDataset(
                    self.globalDBS, dataset)
                if (status_import == 1):
                    common.logger.info('Problem with parent ' + dataset +
                                       ' import from the global DBS ' +
                                       self.globalDBS + 'to the local one ' +
                                       self.DBSURL)
                    self.exit_status = '1'
                    return self.exit_status
                else:
                    common.logger.info('Import ok of dataset ' + dataset)

        if (len(jobReport.files) <= 0):
            self.exit_status = '1'
            msg = "Error: No EDM file to publish in xml file" + file + " file"
            common.logger.info(msg)
            return self.exit_status
        else:
            msg = "fjr contains some files to publish"
            common.logger.debug(msg)

        #### datasets creation in dbs
        #// DBS to contact write and read of the same dbs
        dbsReader = DBSReader(self.DBSURL, level='ERROR')
        dbswriter = DBSWriter(self.DBSURL)
        #####

        self.published_datasets = []
        for fileinfo in jobReport.files:
            datasets_info = fileinfo.dataset
            if len(datasets_info) <= 0:
                self.exit_status = '1'
                msg = "Error: No info about dataset in the xml file " + file
                common.logger.info(msg)
                return self.exit_status
            else:
                for dataset in datasets_info:
                    #### for production data
                    self.processedData = dataset['ProcessedDataset']
                    if (dataset['PrimaryDataset'] == 'null'):
                        dataset['PrimaryDataset'] = self.userprocessedData
                    elif self.datasetpath.upper() != 'NONE':
                        dataset['ParentDataset'] = self.datasetpath

                    dataset['PSetContent'] = self.content
                    cfgMeta = {
                        'name': self.pset,
                        'Type': 'user',
                        'annotation': 'user cfg',
                        'version': 'private version'
                    }  # add real name of user cfg
                    common.logger.info("PrimaryDataset = %s" %
                                       dataset['PrimaryDataset'])
                    common.logger.info("ProcessedDataset = %s" %
                                       dataset['ProcessedDataset'])
                    common.logger.info("<User Dataset Name> = /" +
                                       dataset['PrimaryDataset'] + "/" +
                                       dataset['ProcessedDataset'] + "/USER")

                    self.dataset_to_check = "/" + dataset[
                        'PrimaryDataset'] + "/" + dataset[
                            'ProcessedDataset'] + "/USER"

                    self.published_datasets.append(self.dataset_to_check)

                    common.logger.log(
                        10 - 1, "--->>> Inserting primary: %s processed : %s" %
                        (dataset['PrimaryDataset'],
                         dataset['ProcessedDataset']))

                    #### check if dataset already exists in the DBS
                    result = dbsReader.matchProcessedDatasets(
                        dataset['PrimaryDataset'], 'USER',
                        dataset['ProcessedDataset'])
                    if (len(result) != 0):
                        result = dbsReader.listDatasetFiles(
                            self.dataset_to_check)

                    primary = DBSWriterObjects.createPrimaryDataset(
                        dataset, dbswriter.dbs)
                    common.logger.log(10 - 1, "Primary:  %s " % primary)
                    print "primary = ", primary

                    algo = DBSWriterObjects.createAlgorithm(
                        dataset, cfgMeta, dbswriter.dbs)
                    common.logger.log(10 - 1, "Algo:  %s " % algo)

                    processed = DBSWriterObjects.createProcessedDataset(
                        primary, algo, dataset, dbswriter.dbs)
                    common.logger.log(10 - 1, "Processed:  %s " % processed)
                    print "processed = ", processed

                    common.logger.log(
                        10 - 1, "Inserted primary %s processed %s" %
                        (primary, processed))
                    #######################################################################################

        common.logger.log(10 - 1, "exit_status = %s " % self.exit_status)
        return self.exit_status