Beispiel #1
0
    def getOutputData(self, outputDir=None, names=None, force=False):
        """Retrieve data stored on SE to dir (default=job output workspace).
        If names=None, then all outputdata is downloaded otherwise names should
        be a list of files to download. If force=True then data will be redownloaded
        even if the file already exists.

        Note that if called on a master job then all subjobs outputwill be downloaded.
        If dir is None then the subjobs output goes into their individual
        outputworkspaces as expected. If however one specifies a dir then this is
        treated as a top dir and a subdir for each job will be created below it. This
        will avoid overwriting files with the same name from each subjob.
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
            names (list): list of names which match namePatterns in the outputfiles
            force (bool): Force the download out data potentially overwriting existing objects
        """
        j = self.getJobObject()
        if outputDir is not None and not os.path.isdir(outputDir):
            raise GangaDiracError(
                "Designated outupt path '%s' must exist and be a directory" %
                outputDir)

        def download(dirac_file, job, is_subjob=False):
            dirac_file.localDir = job.getOutputWorkspace().getPath()
            if outputDir is not None:
                output_dir = outputDir
                if is_subjob:
                    output_dir = os.path.join(outputDir, job.fqid)
                    if not os.path.isdir(output_dir):
                        os.mkdir(output_dir)
                dirac_file.localDir = output_dir
            if os.path.exists(
                    os.path.join(dirac_file.localDir,
                                 os.path.basename(
                                     dirac_file.lfn))) and not force:
                return
            try:
                dirac_file.get()
                return dirac_file.lfn
            # should really make the get method throw if doesn't suceed. todo
            except (GangaDiracError, GangaFileError) as e:
                logger.warning(e)

        suceeded = []
        if j.subjobs:
            for sj in j.subjobs:
                suceeded.extend([
                    download(f, sj, True)
                    for f in outputfiles_iterator(sj, DiracFile)
                    if f.lfn != '' and (
                        names is None or f.namePattern in names)
                ])
        else:
            suceeded.extend([
                download(f, j, False)
                for f in outputfiles_iterator(j, DiracFile)
                if f.lfn != '' and (names is None or f.namePattern in names)
            ])

        return filter(lambda x: x is not None, suceeded)
Beispiel #2
0
    def getOutputDataLFNs(self):
        """Retrieve the list of LFNs assigned to outputdata"""
        j = self.getJobObject()
        lfns = []

        if j.subjobs:
            for sj in j.subjobs:
                lfns.extend([f.lfn for f in outputfiles_iterator(sj, DiracFile) if f.lfn != ''])
        else:
            lfns.extend([f.lfn for f in outputfiles_iterator(j, DiracFile) if f.lfn != ''])
        return lfns
    def test_outputfiles_iterator(self):

        ########################################################
        class testfile(object):

            def __init__(this, name, subfiles=[]):
                this.name = name
                this.subfiles = subfiles

        class testfileA(testfile):

            def __init__(this, name, subfiles=[]):
                super(testfileA, this).__init__(name, subfiles)

        class testfileB(testfile):

            def __init__(this, name, subfiles=[]):
                super(testfileB, this).__init__(name, subfiles)

        class testJob(object):

            def __init__(this, outputfiles=[], nc_outputfiles=[]):
                this.outputfiles = outputfiles
                this.non_copyable_outputfiles = nc_outputfiles

        def predA(f):
            return f.name == 'A2'

        def predB(f):
            return f.name == 'BS2'
        ########################################################

        test_job = testJob(outputfiles=[testfileA('A1', subfiles=[testfileA('AS1')]), testfileA('A2'),
                                        testfileB('B1', subfiles=[testfileB('BS1')]), testfileA('A3')],
                           nc_outputfiles=[testfileB('B2'), testfileA('A4'),
                                           testfileB('B3', subfiles=[testfileB('BS2'), testfileB('BS3')]), testfileB('B4')])

        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile)],
                         ['AS1', 'A2', 'BS1', 'A3', 'B2', 'A4', 'BS2', 'BS3', 'B4'])
        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileA)],
                         ['AS1', 'A2', 'A3', 'A4'])
        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileB)],
                         ['BS1', 'B2', 'BS2', 'BS3', 'B4'])

        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, include_subfiles=False)],
                         ['A1', 'A2', 'B1', 'A3', 'B2', 'A4', 'B3', 'B4'])
        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileA, include_subfiles=False)],
                         ['A1', 'A2', 'A3', 'A4'])
        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfileB, include_subfiles=False)],
                         ['B1', 'B2', 'B3', 'B4'])

        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predA)],
                         ['A2'])
        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predB)],
                         ['BS2'])
        self.assertEqual([f.name for f in outputfiles_iterator(test_job, testfile, selection_pred=predB, include_subfiles=False)],
                         [])
Beispiel #4
0
    def getOutputData(self, outputDir=None, names=None, force=False):
        """Retrieve data stored on SE to dir (default=job output workspace).
        If names=None, then all outputdata is downloaded otherwise names should
        be a list of files to download. If force=True then data will be redownloaded
        even if the file already exists.

        Note that if called on a master job then all subjobs outputwill be downloaded.
        If dir is None then the subjobs output goes into their individual
        outputworkspaces as expected. If however one specifies a dir then this is
        treated as a top dir and a subdir for each job will be created below it. This
        will avoid overwriting files with the same name from each subjob.
        Args:
            outputDir (str): This string represents the output dir where the sandbox is to be placed
            names (list): list of names which match namePatterns in the outputfiles
            force (bool): Force the download out data potentially overwriting existing objects
        """
        j = self.getJobObject()
        if outputDir is not None and not os.path.isdir(outputDir):
            raise GangaException("Designated outupt path '%s' must exist and be a directory" % outputDir)

        def download(dirac_file, job, is_subjob=False):
            dirac_file.localDir = job.getOutputWorkspace().getPath()
            if outputDir is not None:
                output_dir = outputDir
                if is_subjob:
                    output_dir = os.path.join(outputDir, job.fqid)
                    if not os.path.isdir(output_dir):
                        os.mkdir(output_dir)
                dirac_file.localDir = output_dir
            if os.path.exists(os.path.join(dirac_file.localDir, os.path.basename(dirac_file.lfn))) and not force:
                return
            try:
                if isType(dirac_file, DiracFile):
                    dirac_file.get(localPath=dirac_file.localDir)
                else:
                    dirac_file.get()
                return dirac_file.lfn
            # should really make the get method throw if doesn't suceed. todo
            except GangaException as e:
                logger.warning(e)

        suceeded = []
        if j.subjobs:
            for sj in j.subjobs:
                suceeded.extend([download(f, sj, True) for f in outputfiles_iterator(sj, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names)])
        else:
            suceeded.extend([download(f, j, False) for f in outputfiles_iterator(j, DiracFile) if f.lfn != '' and (names is None or f.namePattern in names)])

        return filter(lambda x: x is not None, suceeded)
Beispiel #5
0
def test_outputfiles_iterator():
    from GangaDirac.Lib.Backends.DiracUtils import outputfiles_iterator

    ########################################################
    class TestFile(object):
        def __init__(self, name, subfiles=[]):
            self.name = name
            self.subfiles = subfiles

    class TestFileA(TestFile):
        def __init__(self, name, subfiles=[]):
            super(TestFileA, self).__init__(name, subfiles)

    class TestFileB(TestFile):
        def __init__(self, name, subfiles=[]):
            super(TestFileB, self).__init__(name, subfiles)

    class TestJob(object):
        def __init__(self, outputfiles=[], nc_outputfiles=[]):
            self.outputfiles = outputfiles
            self.non_copyable_outputfiles = nc_outputfiles

    def pred_a(f):
        return f.name == 'A2'

    def pred_b(f):
        return f.name == 'BS2'
    ########################################################

    test_job = TestJob(outputfiles=[TestFileA('A1', subfiles=[TestFileA('AS1')]), TestFileA('A2'),
                                    TestFileB('B1', subfiles=[TestFileB('BS1')]), TestFileA('A3')],
                       nc_outputfiles=[TestFileB('B2'), TestFileA('A4'),
                                       TestFileB('B3', subfiles=[TestFileB('BS2'), TestFileB('BS3')]), TestFileB('B4')])

    assert [f.name for f in outputfiles_iterator(test_job, TestFile)] == ['AS1', 'A2', 'BS1', 'A3', 'B2', 'A4', 'BS2', 'BS3', 'B4']
    assert [f.name for f in outputfiles_iterator(test_job, TestFileA)] == ['AS1', 'A2', 'A3', 'A4']
    assert [f.name for f in outputfiles_iterator(test_job, TestFileB)] == ['BS1', 'B2', 'BS2', 'BS3', 'B4']

    assert [f.name for f in outputfiles_iterator(test_job, TestFile, include_subfiles=False)] == ['A1', 'A2', 'B1', 'A3', 'B2', 'A4', 'B3', 'B4']
    assert [f.name for f in outputfiles_iterator(test_job, TestFileA, include_subfiles=False)] == ['A1', 'A2', 'A3', 'A4']
    assert [f.name for f in outputfiles_iterator(test_job, TestFileB, include_subfiles=False)] == ['B1', 'B2', 'B3', 'B4']

    assert [f.name for f in outputfiles_iterator(test_job, TestFile, selection_pred=pred_a)] == ['A2']
    assert [f.name for f in outputfiles_iterator(test_job, TestFile, selection_pred=pred_b)] == ['BS2']
    assert [f.name for f in outputfiles_iterator(test_job, TestFile, selection_pred=pred_b, include_subfiles=False)] == []
Beispiel #6
0
    def test_outputfiles_iterator(self):

        ########################################################
        class testfile(object):
            def __init__(this, name, subfiles=[]):
                this.name = name
                this.subfiles = subfiles

        class testfileA(testfile):
            def __init__(this, name, subfiles=[]):
                super(testfileA, this).__init__(name, subfiles)

        class testfileB(testfile):
            def __init__(this, name, subfiles=[]):
                super(testfileB, this).__init__(name, subfiles)

        class testJob(object):
            def __init__(this, outputfiles=[], nc_outputfiles=[]):
                this.outputfiles = outputfiles
                this.non_copyable_outputfiles = nc_outputfiles

        def predA(f):
            return f.name == 'A2'

        def predB(f):
            return f.name == 'BS2'

        ########################################################

        test_job = testJob(
            outputfiles=[
                testfileA('A1', subfiles=[testfileA('AS1')]),
                testfileA('A2'),
                testfileB('B1', subfiles=[testfileB('BS1')]),
                testfileA('A3')
            ],
            nc_outputfiles=[
                testfileB('B2'),
                testfileA('A4'),
                testfileB('B3', subfiles=[testfileB('BS2'),
                                          testfileB('BS3')]),
                testfileB('B4')
            ])

        self.assertEqual(
            [f.name for f in outputfiles_iterator(test_job, testfile)],
            ['AS1', 'A2', 'BS1', 'A3', 'B2', 'A4', 'BS2', 'BS3', 'B4'])
        self.assertEqual(
            [f.name for f in outputfiles_iterator(test_job, testfileA)],
            ['AS1', 'A2', 'A3', 'A4'])
        self.assertEqual(
            [f.name for f in outputfiles_iterator(test_job, testfileB)],
            ['BS1', 'B2', 'BS2', 'BS3', 'B4'])

        self.assertEqual([
            f.name for f in outputfiles_iterator(
                test_job, testfile, include_subfiles=False)
        ], ['A1', 'A2', 'B1', 'A3', 'B2', 'A4', 'B3', 'B4'])
        self.assertEqual([
            f.name for f in outputfiles_iterator(
                test_job, testfileA, include_subfiles=False)
        ], ['A1', 'A2', 'A3', 'A4'])
        self.assertEqual([
            f.name for f in outputfiles_iterator(
                test_job, testfileB, include_subfiles=False)
        ], ['B1', 'B2', 'B3', 'B4'])

        self.assertEqual([
            f.name for f in outputfiles_iterator(
                test_job, testfile, selection_pred=predA)
        ], ['A2'])
        self.assertEqual([
            f.name for f in outputfiles_iterator(
                test_job, testfile, selection_pred=predB)
        ], ['BS2'])
        self.assertEqual([
            f.name for f in outputfiles_iterator(test_job,
                                                 testfile,
                                                 selection_pred=predB,
                                                 include_subfiles=False)
        ], [])