def checkPublication(self): """ check dataset publication in a dbs """ common.logger.info('--->>> Check data publication: dataset '+self.dataset_to_check+' in DBS url '+ self.DBSURL+'\n') # // # // Get API to DBS #// dbsreader = DBSReader(self.DBSURL) # // # // Get list of datasets #// if len(self.dataset_to_check.split('/')) < 4: msg = "the provided dataset name is not correct" raise CrabException(msg) else: primds=self.dataset_to_check.split('/')[1] procds=self.dataset_to_check.split('/')[2] tier=self.dataset_to_check.split('/')[3] datasets=dbsreader.matchProcessedDatasets(primds,tier,procds) if common.debugLevel: print "PrimaryDataset = ", primds print "ProcessedDataset = ", procds print "DataTier = ", tier print "datasets matching your requirements= ", datasets for dataset in datasets: # // # // Get list of blocks for the dataset and their location #// if len(dataset.get('PathList'))==0: print "===== Empty dataset yet /%s/%s with tiers %s"%(dataset.get('PrimaryDataset')['Name'],dataset.get('Name'),dataset.get('TierList')) else: for datasetpath in dataset.get('PathList'): nevttot=0 print "=== dataset %s"%datasetpath ### FEDE ####### if dataset['Description'] != None: print "=== dataset description = ", dataset['Description'] ################ blocks=dbsreader.getFileBlocksInfo(datasetpath) for block in blocks: SEList=dbsreader.listFileBlockLocation(block['Name']) # replace that with DLS query print "===== File block name: %s" %block['Name'] print " File block located at: ", SEList print " File block status: %s" %block['OpenForWriting'] print " Number of files: %s"%block['NumberOfFiles'] print " Number of Bytes: %s"%block['BlockSize'] print " Number of Events: %s"%block['NumberOfEvents'] if common.debugLevel: print "--------- info about files --------" print " Size \t Events \t LFN \t FileStatus " files=dbsreader.listFilesInBlock(block['Name']) for file in files: print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status']) nevttot = nevttot + block['NumberOfEvents'] print "\n total events: %s in dataset: %s\n"%(nevttot,datasetpath) if not common.debugLevel: common.logger.info('You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check='+self.dataset_to_check+' -USER.dbs_url_for_publication='+self.DBSURL+' -debug')
def index(self, dataset): html = """<html><body><h2>Local DBS Dataset Listing</h2>\n """ html += "<h4>Dataset: %s<h4>\n" % dataset reader = DBSReader(self.localDBS) html += "<h4>Block Details</h4>\n" html += "<table>\n" html += "<tr><th>Block</th><th>SEName</th><th>Files</th>" html += "<th>Events</th></tr>\n" try: blocks = reader.getFileBlocksInfo(dataset) except Exception, ex: html += "</table>\n" html += "<p> Error accessing dataset information: %s</p>" % str(ex) html += """</body></html>""" return html
def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed = True, skipNoSiteError=False): """ _importDataset_ Import a dataset into the local scope DBS with full parentage hirerarchy (at least not slow because branches info is dropped). Parents are also imported. This method imports block by block, then each time a block is imported, its parent blocks will be imported first. - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to - *onlyClosed* : Only closed blocks will be imported if set to True - *skipNoSiteError* : If this is True, then this method wont raise an Exception if a block has no site information in sourceDBS. """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed) blkCounter=0 for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// blkCounter=blkCounter+1 msg="Importing block %s of %s: %s " % (blkCounter,len(inputBlocks),block) logging.debug(msg) if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": # we don't skip the error raising if not skipNoSiteError: msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) msg = "Block has no locations defined: %s" % block logging.info(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block,sename) logging.info(sename) continue try: self.dbs.dbsMigrateBlock(sourceDBS, targetDBS, block_name=block) except DbsException, ex: msg = "Error in DBSWriter.importDataset\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": # we don't skip the error raising if not skipNoSiteError: msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) msg = "Block has no locations defined: %s" % block logging.info(msg) for sename in locations: self.dbs.addReplicaToBlock(block,sename)
def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed = True): """ _importDataset_ Import a dataset into the local scope DBS. It complains if the parent dataset ar not there!! - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed) for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block,sename) logging.info(sename) continue try: xferData = reader.dbs.listDatasetContents( sourceDatasetPath, block ) except DbsException, ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) try: self.dbs.insertDatasetContents(xferData) except DbsException, ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)
def checkPublication(self): """ check dataset publication in a dbs """ common.logger.info('--->>> Check data publication: dataset ' + self.dataset_to_check + ' in DBS url ' + self.DBSURL + '\n') # // # // Get API to DBS #// dbsreader = DBSReader(self.DBSURL) # // # // Get list of datasets #// if len(self.dataset_to_check.split('/')) < 4: msg = "the provided dataset name is not correct" raise CrabException(msg) else: primds = self.dataset_to_check.split('/')[1] procds = self.dataset_to_check.split('/')[2] tier = self.dataset_to_check.split('/')[3] datasets = dbsreader.matchProcessedDatasets(primds, tier, procds) if common.debugLevel: print "PrimaryDataset = ", primds print "ProcessedDataset = ", procds print "DataTier = ", tier print "datasets matching your requirements= ", datasets for dataset in datasets: # // # // Get list of blocks for the dataset and their location #// if len(dataset.get('PathList')) == 0: print "===== Empty dataset yet /%s/%s with tiers %s" % ( dataset.get('PrimaryDataset')['Name'], dataset.get('Name'), dataset.get('TierList')) else: for datasetpath in dataset.get('PathList'): nevttot = 0 print "=== dataset %s" % datasetpath ### FEDE ####### if dataset['Description'] != None: print "=== dataset description = ", dataset[ 'Description'] ################ blocks = dbsreader.getFileBlocksInfo(datasetpath) for block in blocks: SEList = dbsreader.listFileBlockLocation( block['Name']) # replace that with DLS query print "===== File block name: %s" % block['Name'] print " File block located at: ", SEList print " File block status: %s" % block[ 'OpenForWriting'] print " Number of files: %s" % block[ 'NumberOfFiles'] print " Number of Bytes: %s" % block['BlockSize'] print " Number of Events: %s" % block[ 'NumberOfEvents'] if common.debugLevel: print "--------- info about files --------" print " Size \t Events \t LFN \t FileStatus " files = dbsreader.listFilesInBlock(block['Name']) for file in files: print "%s %s %s %s" % ( file['FileSize'], file['NumberOfEvents'], file['LogicalFileName'], file['Status']) nevttot = nevttot + block['NumberOfEvents'] print "\n total events: %s in dataset: %s\n" % ( nevttot, datasetpath) if not common.debugLevel: common.logger.info( 'You can obtain more info about files of the dataset using: crab -checkPublication -USER.dataset_to_check=' + self.dataset_to_check + ' -USER.dbs_url_for_publication=' + self.DBSURL + ' -debug')
procds=datasetPath.split('/')[2] tier=datasetPath.split('/')[3] # print " matchProcessedDatasets(%s,%s,%s)"%(primds,tier,procds) datasets=dbsreader.matchProcessedDatasets(primds,tier,procds) else: datasets=dbsreader.matchProcessedDatasets("*","*","*") for dataset in datasets: # // # // Get list of blocks for the dataset and their location #// for datasetpath in dataset.get('PathList'): nevttot=0 print "===== dataset %s"%datasetpath blocks=dbsreader.getFileBlocksInfo(datasetpath) for block in blocks: SEList=dbsreader.listFileBlockLocation(block['Name']) # replace that with DLS query print "== File block %s is located at: %s"%(block['Name'],SEList) print "File block name: %s" %block['Name'] print "File block status: %s" %block['OpenForWriting'] print "Number of files: %s"%block['NumberOfFiles'] print "Number of Bytes: %s"%block['BlockSize'] print "Number of Events: %s"%block['NumberOfEvents'] if full: print "--------- info about files --------" print " Size \t Events \t LFN \t FileStatus " files=dbsreader.listFilesInBlock(block['Name']) for file in files: print "%s %s %s %s"%(file['FileSize'],file['NumberOfEvents'],file['LogicalFileName'],file['Status']) nevttot = nevttot + block['NumberOfEvents']
def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed=True, skipNoSiteError=False): """ _importDataset_ Import a dataset into the local scope DBS with full parentage hirerarchy (at least not slow because branches info is dropped). Parents are also imported. This method imports block by block, then each time a block is imported, its parent blocks will be imported first. - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to - *onlyClosed* : Only closed blocks will be imported if set to True - *skipNoSiteError* : If this is True, then this method wont raise an Exception if a block has no site information in sourceDBS. """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed) blkCounter = 0 for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// blkCounter = blkCounter + 1 msg = "Importing block %s of %s: %s " % (blkCounter, len(inputBlocks), block) logging.debug(msg) if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str( inputBlock['NumberOfFiles']) != "0": # we don't skip the error raising if not skipNoSiteError: msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) msg = "Block has no locations defined: %s" % block logging.info(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block, sename) logging.info(sename) continue try: self.dbs.dbsMigrateBlock(sourceDBS, targetDBS, block_name=block) except DbsException, ex: msg = "Error in DBSWriter.importDataset\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": # we don't skip the error raising if not skipNoSiteError: msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) msg = "Block has no locations defined: %s" % block logging.info(msg) for sename in locations: self.dbs.addReplicaToBlock(block, sename)
def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed=True): """ _importDataset_ Import a dataset into the local scope DBS. It complains if the parent dataset ar not there!! - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed) for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str( inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block, sename) logging.info(sename) continue try: xferData = reader.dbs.listDatasetContents( sourceDatasetPath, block) except DbsException, ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) try: self.dbs.insertDatasetContents(xferData) except DbsException, ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)