def getOutputData(self, outputDir=None, names=None, force=False): """Retrieve data stored on SE to dir (default=job output workspace). If names=None, then all outputdata is downloaded otherwise names should be a list of files to download. If force=True then data will be redownloaded even if the file already exists. Note that if called on a master job then all subjobs outputwill be downloaded. If dir is None then the subjobs output goes into their individual outputworkspaces as expected. If however one specifies a dir then this is treated as a top dir and a subdir for each job will be created below it. This will avoid overwriting files with the same name from each subjob. Args: outputDir (str): This string represents the output dir where the sandbox is to be placed names (list): list of names which match namePatterns in the outputfiles force (bool): Force the download out data potentially overwriting existing objects """ j = self.getJobObject() if outputDir is not None and not os.path.isdir(outputDir): raise GangaDiracError( "Designated outupt path '%s' must exist and be a directory" % outputDir) def download(dirac_file, job, is_subjob=False): dirac_file.localDir = job.getOutputWorkspace().getPath() if outputDir is not None: output_dir = outputDir if is_subjob: output_dir = os.path.join(outputDir, job.fqid) if not os.path.isdir(output_dir): os.mkdir(output_dir) dirac_file.localDir = output_dir if os.path.exists( os.path.join(dirac_file.localDir, os.path.basename( dirac_file.lfn))) and not force: return try: dirac_file.get() return dirac_file.lfn # should really make the get method throw if doesn't suceed. todo except (GangaDiracError, GangaFileError) as e: logger.warning(e) suceeded = [] if j.subjobs: for sj in j.subjobs: suceeded.extend([ download(f, sj, True) for f in outputfiles_iterator(sj, DiracFile) if f.lfn != '' and ( names is None or f.namePattern in names) ]) else: suceeded.extend([ download(f, j, False) for f in outputfiles_iterator(j, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names) ]) return filter(lambda x: x is not None, suceeded)
def getOutputDataLFNs(self): """Retrieve the list of LFNs assigned to outputdata""" j = self.getJobObject() lfns = [] if j.subjobs: for sj in j.subjobs: lfns.extend([f.lfn for f in outputfiles_iterator(sj, DiracFile) if f.lfn != '']) else: lfns.extend([f.lfn for f in outputfiles_iterator(j, DiracFile) if f.lfn != '']) return lfns
def test_outputfiles_iterator(self): ######################################################## class testfile(object): def __init__(this, name, subfiles=[]): this.name = name this.subfiles = subfiles class testfileA(testfile): def __init__(this, name, subfiles=[]): super(testfileA, this).__init__(name, subfiles) class testfileB(testfile): def __init__(this, name, subfiles=[]): super(testfileB, this).__init__(name, subfiles) class testJob(object): def __init__(this, outputfiles=[], nc_outputfiles=[]): this.outputfiles = outputfiles this.non_copyable_outputfiles = nc_outputfiles def predA(f): return f.name == 'A2' def predB(f): return f.name == 'BS2' ######################################################## test_job = testJob(outputfiles=[testfileA('A1', subfiles=[testfileA('AS1')]), testfileA('A2'), testfileB('B1', subfiles=[testfileB('BS1')]), testfileA('A3')], nc_outputfiles=[testfileB('B2'), testfileA('A4'), testfileB('B3', subfiles=[testfileB('BS2'), testfileB('BS3')]), testfileB('B4')]) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile)], ['AS1', 'A2', 'BS1', 'A3', 'B2', 'A4', 'BS2', 'BS3', 'B4']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileA)], ['AS1', 'A2', 'A3', 'A4']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileB)], ['BS1', 'B2', 'BS2', 'BS3', 'B4']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, include_subfiles=False)], ['A1', 'A2', 'B1', 'A3', 'B2', 'A4', 'B3', 'B4']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileA, include_subfiles=False)], ['A1', 'A2', 'A3', 'A4']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileB, include_subfiles=False)], ['B1', 'B2', 'B3', 'B4']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predA)], ['A2']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predB)], ['BS2']) self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predB, include_subfiles=False)], [])
def getOutputData(self, outputDir=None, names=None, force=False): """Retrieve data stored on SE to dir (default=job output workspace). If names=None, then all outputdata is downloaded otherwise names should be a list of files to download. If force=True then data will be redownloaded even if the file already exists. Note that if called on a master job then all subjobs outputwill be downloaded. If dir is None then the subjobs output goes into their individual outputworkspaces as expected. If however one specifies a dir then this is treated as a top dir and a subdir for each job will be created below it. This will avoid overwriting files with the same name from each subjob. Args: outputDir (str): This string represents the output dir where the sandbox is to be placed names (list): list of names which match namePatterns in the outputfiles force (bool): Force the download out data potentially overwriting existing objects """ j = self.getJobObject() if outputDir is not None and not os.path.isdir(outputDir): raise GangaException("Designated outupt path '%s' must exist and be a directory" % outputDir) def download(dirac_file, job, is_subjob=False): dirac_file.localDir = job.getOutputWorkspace().getPath() if outputDir is not None: output_dir = outputDir if is_subjob: output_dir = os.path.join(outputDir, job.fqid) if not os.path.isdir(output_dir): os.mkdir(output_dir) dirac_file.localDir = output_dir if os.path.exists(os.path.join(dirac_file.localDir, os.path.basename(dirac_file.lfn))) and not force: return try: if isType(dirac_file, DiracFile): dirac_file.get(localPath=dirac_file.localDir) else: dirac_file.get() return dirac_file.lfn # should really make the get method throw if doesn't suceed. todo except GangaException as e: logger.warning(e) suceeded = [] if j.subjobs: for sj in j.subjobs: suceeded.extend([download(f, sj, True) for f in outputfiles_iterator(sj, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names)]) else: suceeded.extend([download(f, j, False) for f in outputfiles_iterator(j, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names)]) return filter(lambda x: x is not None, suceeded)
def test_outputfiles_iterator(): from GangaDirac.Lib.Backends.DiracUtils import outputfiles_iterator ######################################################## class TestFile(object): def __init__(self, name, subfiles=[]): self.name = name self.subfiles = subfiles class TestFileA(TestFile): def __init__(self, name, subfiles=[]): super(TestFileA, self).__init__(name, subfiles) class TestFileB(TestFile): def __init__(self, name, subfiles=[]): super(TestFileB, self).__init__(name, subfiles) class TestJob(object): def __init__(self, outputfiles=[], nc_outputfiles=[]): self.outputfiles = outputfiles self.non_copyable_outputfiles = nc_outputfiles def pred_a(f): return f.name == 'A2' def pred_b(f): return f.name == 'BS2' ######################################################## test_job = TestJob(outputfiles=[TestFileA('A1', subfiles=[TestFileA('AS1')]), TestFileA('A2'), TestFileB('B1', subfiles=[TestFileB('BS1')]), TestFileA('A3')], nc_outputfiles=[TestFileB('B2'), TestFileA('A4'), TestFileB('B3', subfiles=[TestFileB('BS2'), TestFileB('BS3')]), TestFileB('B4')]) assert [f.name for f in outputfiles_iterator(test_job, TestFile)] == ['AS1', 'A2', 'BS1', 'A3', 'B2', 'A4', 'BS2', 'BS3', 'B4'] assert [f.name for f in outputfiles_iterator(test_job, TestFileA)] == ['AS1', 'A2', 'A3', 'A4'] assert [f.name for f in outputfiles_iterator(test_job, TestFileB)] == ['BS1', 'B2', 'BS2', 'BS3', 'B4'] assert [f.name for f in outputfiles_iterator(test_job, TestFile, include_subfiles=False)] == ['A1', 'A2', 'B1', 'A3', 'B2', 'A4', 'B3', 'B4'] assert [f.name for f in outputfiles_iterator(test_job, TestFileA, include_subfiles=False)] == ['A1', 'A2', 'A3', 'A4'] assert [f.name for f in outputfiles_iterator(test_job, TestFileB, include_subfiles=False)] == ['B1', 'B2', 'B3', 'B4'] assert [f.name for f in outputfiles_iterator(test_job, TestFile, selection_pred=pred_a)] == ['A2'] assert [f.name for f in outputfiles_iterator(test_job, TestFile, selection_pred=pred_b)] == ['BS2'] assert [f.name for f in outputfiles_iterator(test_job, TestFile, selection_pred=pred_b, include_subfiles=False)] == []
def test_outputfiles_iterator(self): ######################################################## class testfile(object): def __init__(this, name, subfiles=[]): this.name = name this.subfiles = subfiles class testfileA(testfile): def __init__(this, name, subfiles=[]): super(testfileA, this).__init__(name, subfiles) class testfileB(testfile): def __init__(this, name, subfiles=[]): super(testfileB, this).__init__(name, subfiles) class testJob(object): def __init__(this, outputfiles=[], nc_outputfiles=[]): this.outputfiles = outputfiles this.non_copyable_outputfiles = nc_outputfiles def predA(f): return f.name == 'A2' def predB(f): return f.name == 'BS2' ######################################################## test_job = testJob( outputfiles=[ testfileA('A1', subfiles=[testfileA('AS1')]), testfileA('A2'), testfileB('B1', subfiles=[testfileB('BS1')]), testfileA('A3') ], nc_outputfiles=[ testfileB('B2'), testfileA('A4'), testfileB('B3', subfiles=[testfileB('BS2'), testfileB('BS3')]), testfileB('B4') ]) self.assertEqual( [f.name for f in outputfiles_iterator(test_job, testfile)], ['AS1', 'A2', 'BS1', 'A3', 'B2', 'A4', 'BS2', 'BS3', 'B4']) self.assertEqual( [f.name for f in outputfiles_iterator(test_job, testfileA)], ['AS1', 'A2', 'A3', 'A4']) self.assertEqual( [f.name for f in outputfiles_iterator(test_job, testfileB)], ['BS1', 'B2', 'BS2', 'BS3', 'B4']) self.assertEqual([ f.name for f in outputfiles_iterator( test_job, testfile, include_subfiles=False) ], ['A1', 'A2', 'B1', 'A3', 'B2', 'A4', 'B3', 'B4']) self.assertEqual([ f.name for f in outputfiles_iterator( test_job, testfileA, include_subfiles=False) ], ['A1', 'A2', 'A3', 'A4']) self.assertEqual([ f.name for f in outputfiles_iterator( test_job, testfileB, include_subfiles=False) ], ['B1', 'B2', 'B3', 'B4']) self.assertEqual([ f.name for f in outputfiles_iterator( test_job, testfile, selection_pred=predA) ], ['A2']) self.assertEqual([ f.name for f in outputfiles_iterator( test_job, testfile, selection_pred=predB) ], ['BS2']) self.assertEqual([ f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predB, include_subfiles=False) ], [])