Ejemplo n.º 1
0
 def browse(self, gui=True):
     f = self._createTmpFile()
     if gui:
         cmd = 'bookkeepingGUI("%s")' % f
         execute(cmd)
         l = self._fileToList(f)
         ds = LHCbDataset()
         ds.extend([l])
         return ds
Ejemplo n.º 2
0
 def browse(self, gui=True):
     f = self._createTmpFile()
     if gui:
         cmd = 'bookkeepingGUI("%s")' % f
         execute(cmd)
         l = self._fileToList(f)
         ds = LHCbDataset()
         ds.extend([l])
         return ds
Ejemplo n.º 3
0
 def getDataset(self):
     if self.fulldataset is None:
         self.fulldataset = LHCbDataset(
             super(BKTestQuery, self).getDataset().files)
     if self.dataset is None:
         self.dataset = LHCbDataset(
             self.fulldataset.files[:self.filesToRelease])
         self.fulldatasetptr = self.filesToRelease
     else:
         self.dataset.files += self.fulldataset.files[
             self.fulldatasetptr:self.fulldatasetptr + self.filesToRelease]
         self.fulldatasetptr += self.filesToRelease
     return self.dataset
Ejemplo n.º 4
0
def genDataFiles(job):
    """
    Generating a data.py file which contains the data we want gaudirun to use
    Args:
        job (Job): This is the job object which contains everything useful for generating the code
    """
    logger.debug("Doing XML Catalog stuff")

    inputsandbox = []

    data = job.inputdata
    if data:
        logger.debug("Returning options String")
        data_str = data.optionsString()
        if data.hasLFNs():
            logger.info("Generating Data catalog for job: %s" % job.fqid)
            logger.debug("Returning Catalogue")
            inputsandbox.append(FileBuffer('catalog.xml', data.getCatalog()))
            cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n'
            data_str += cat_opts

        inputsandbox.append(
            FileBuffer(GaudiExecDiracRTHandler.data_file, data_str))
    else:
        inputsandbox.append(
            FileBuffer(GaudiExecDiracRTHandler.data_file,
                       '#dummy_data_file\n' + LHCbDataset().optionsString()))

    return inputsandbox
Ejemplo n.º 5
0
    def testSplit(self):
        j = Job(backend=Dirac())
        j.inputdata = LHCbDataset()
        # j.inputdata.files+=[
        #    'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00012533/0000/00012533_00000120_1.dimuon.dst',
        #    'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00012368/0000/00012368_00000600_1.dimuon.dst',
        #    'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00012368/0000/00012368_00000682_1.dimuon.dst',
        #    'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00012368/0000/00012368_00000355_1.dimuon.dst',
        #    'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00012368/0000/00012368_00000620_1.dimuon.dst',
        #    'LFN:/lhcb/LHCb/Collision11/DIMUON.DST/00012533/0000/00012533_00000074_1.dimuon.dst'
        #    ]
        #j.inputdata = LFNs

        # print("try1")
        # somedata = BKQuery('LFN:/LHCb/Collision12/Beam4000GeV-VeloClosed-MagUp/Real Data/Reco14/Stripping20/90000000/DIMUON.DST', dqflag=['OK']).getDataset()#[0:5]
        #j.inputdata = somedata[0:5]
        # print("try2")
        j.inputdata = BKQuery(
            '/LHCb/Collision12/Beam4000GeV-VeloClosed-MagUp/Real Data/Reco14/Stripping20/90000000/DIMUON.DST',
            dqflag=['OK']).getDataset()[0:5]
        #j.inputdata = BKQuery('LFN:/lhcb/LHCb/Collision10/DIMUON.DST/00010942/0000/00010942_00000218_1.dimuon.dst', dqflag=['OK']).getDataset()[0:5]
        #j.inputdata = BKQuery('/lhcb/LHCb/Collision10/DIMUON.DST/00010942/0000/00010942_00000218_1.dimuon.dst', dqflag=['OK']).getDataset()[0:5]

        #len_files = len(inputdata.files)
        ds = SplitByFiles()
        ds.bulksubmit = False
        ds.filesPerJob = 2
        result = ds.split(j)
        print("Got %s subjobs" % len(result))
        assert len(result) >= 3, 'Unexpected number of subjobs'
Ejemplo n.º 6
0
 def getOutputDataLFNs(self):
     """Get a list of outputdata that has been uploaded by Dirac. Excludes
     the outputsandbox if it is there."""
     lfns = super(Dirac, self).getOutputDataLFNs()
     ds = LHCbDataset()
     for f in lfns:
         ds.files.append(DiracFile(lfn=f))
     return GPIProxyObjectFactory(ds)
Ejemplo n.º 7
0
 def getOutputData(self, outputDir=None, names=None, force=False):
     """Retrieve data stored on SE to outputDir (default=job output workspace).
     If names=None, then all outputdata is downloaded otherwise names should
     be a list of files to download. If force is True then download performed
     even if data already exists."""
     downloaded_files = super(Dirac, self).getOutputData(outputDir, names, force)
     ds = LHCbDataset()
     for f in downloaded_files:
         ds.files.append(DiracFile(lfn=f))
     return GPIProxyObjectFactory(ds)
Ejemplo n.º 8
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")
        datatmp = []

        logger.debug("dataset size: %s" % str(len(dataset)))
        #logger.debug( "dataset: %s" % str(dataset) )

        from GangaLHCb.Lib.LHCbDataset.LHCbDataset import LHCbDataset

        if isinstance(dataset, LHCbDataset):
            for i in dataset:
                if isType(i, DiracFile):
                    datatmp.append(i)
                else:
                    logger.error("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i)))
        elif isinstance(dataset, (list, GangaList)):
            for this_file in dataset:
                if type(this_file) is str:
                    datatmp.append(allComponentFilters['gangafiles'](this_file, None))
                elif isType(this_file, IGangaFile):
                    datatmp.append(this_file)
                else:
                    logger.error("Unexpected type: %s" % str(type(this_file)))
                    logger.error("Wanted object to inherit from type: %s: %s" % (str(type(IGangaFile()))))
                    from Ganga.Core.exceptions import GangaException
                    x = GangaException("Unknown(unexpected) file object: %s" % this_file)
                    raise x
        elif type(dataset) is str:
            datatmp.append(DiracFile(lfn=dataset))
        else:
            logger.error("Unkown dataset type, cannot perform split here")
            from Ganga.Core.exceptions import GangaException
            logger.error("Dataset found: " + str(dataset))
            raise GangaException("Unkown dataset type, cannot perform split here")

        logger.debug("Creating new Job in Splitter")
        j = Job()
        logger.debug("Copying From Job")
        j.copyFrom(stripProxy(job), ['splitter', 'subjobs', 'inputdata', 'inputsandbox', 'inputfiles'])
        logger.debug("Unsetting Splitter")
        j.splitter = None
        #logger.debug("Unsetting Merger")
        #j.merger = None
        #j.inputsandbox = [] ## master added automatically
        #j.inputfiles = []
        logger.debug("Setting InputData")
        j.inputdata = LHCbDataset(files=datatmp[:],
                                  persistency=self.persistency,
                                  depth=self.depth)
        #j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice
        logger.debug("Returning new subjob")
        return j
Ejemplo n.º 9
0
    def _create_subjob(self, job, dataset):
        if True in (isinstance(i,str) for i in dataset):
            dataset = [LogicalFile(file) for file in dataset]
        j=Job()
        j.copyFrom(stripProxy(job))
        j.splitter = None
        j.merger = None
        j.inputsandbox = [] ## master added automatically
        j.inputdata = LHCbDataset( files             = dataset[:],
                                   persistency       = self.persistency,
                                   depth             = self.depth )
        j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice

        return j
Ejemplo n.º 10
0
    def _create_subjob(self, job, dataset):
        logger.debug("_create_subjob")

        datatmp = []
        if isinstance(dataset, LHCbDataset):
            for i in dataset:
                if isinstance(i, DiracFile):
                    datatmp.extend(i)
                else:
                    logger.error(
                        "Unkown file-type %s, cannot perform split with file %s"
                        % (type(i), str(i)))
                    from Ganga.Core.exceptions import GangaException
                    raise GangaException(
                        "Unkown file-type %s, cannot perform split with file %s"
                        % (type(i), str(i)))
        elif isinstance(dataset, list):
            from Ganga.GPIDev.Base.Proxy import isType
            for i in dataset:
                if type(i) is str:
                    datatmp.append(DiracFile(lfn=i))
                elif isType(i, DiracFile()):
                    datatmp.extend(i)
                else:
                    x = GangaException("Unknown(unexpected) file object: %s" %
                                       i)
                    raise x
        else:
            logger.error("Unkown dataset type, cannot perform split here")
            from Ganga.Core.exceptions import GangaException
            raise GangaException(
                "Unkown dataset type, cannot perform split here")

        logger.debug("Creating new Job in Splitter")
        j = Job()
        j.copyFrom(stripProxy(job))
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputfiles = []
        j.inputdata = LHCbDataset(files=datatmp[:],
                                  persistency=self.persistency,
                                  depth=self.depth)
        j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice

        return j
Ejemplo n.º 11
0
    def testIgnoreMissing(self):
        j = Job(backend=Dirac())
        j.inputdata = LHCbDataset()
        # j.inputdata.files+=[
        #    'LFN:/not/a/file.dst',
        #    'LFN:/lhcb/data/2010/DIMUON.DST/00008395/0000/00008395_00000919_1.dimuon.dst',
        #    'LFN:/lhcb/data/2010/DIMUON.DST/00008395/0000/00008395_00000922_1.dimuon.dst',
        #    'LFN:/lhcb/data/2010/DIMUON.DST/00008395/0000/00008395_00000915_1.dimuon.dst',
        #    'LFN:/lhcb/data/2010/DIMUON.DST/00008395/0000/00008395_00000920_1.dimuon.dst',
        #    'LFN:/lhcb/data/2010/DIMUON.DST/00008395/0000/00008395_00000916_1.dimuon.dst',
        #    'LFN:/lhcb/data/2010/DIMUON.DST/00008395/0000/00008395_00000914_1.dimuon.dst'
        #    ]
        import copy
        #myLFNs = copy.deepcopy(LFNs)
        #myLFNs = BKQuery('LFN:/lhcb/LHCb/Collision10/DIMUON.DST/00010942/0000/00010942_00000218_1.dimuon.dst', dqflag=['OK']).getDataset()[0:5]

        myLFNs = BKQuery(
            '/LHCb/Collision12/Beam4000GeV-VeloClosed-MagUp/Real Data/Reco14/Stripping20/90000000/DIMUON.DST',
            dqflag=['OK']).getDataset()[0:5]
        myLFNs.append('LFN:/not/a/file.dst')

        print(myLFNs)

        j.inputdata = myLFNs

        ds = SplitByFiles()
        ds.bulksubmit = False
        ds.ignoremissing = True
        # shouldn't throw exception
        result = ds.split(j)
        print('result = ', result)
        ds.ignoremissing = False
        # should throw exception
        threw = False
        try:
            result = ds.split(j)
            print('result = %s' % str(result))
        except:
            threw = True
        assert threw, 'should have thrown exception'
Ejemplo n.º 12
0
 def _setup_subjob_dataset(self, dataset):
     return LHCbDataset(files=[DiracFile(lfn=f) for f in dataset])