Beispiel #1
0
    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create an output unit given this output data"""

        # we need a parent job that has completed to get the output files
        incl_pat_list = []
        excl_pat_list = []
        for parent in parent_units:
            if len(parent.active_job_ids) == 0 or parent.status != "completed":
                return None

            for inds in self.inputdata:
                from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput
                if isType(
                        inds, TaskChainInput
                ) and inds.input_trf_id == parent._getParent().getID():
                    incl_pat_list += inds.include_file_mask
                    excl_pat_list += inds.exclude_file_mask

        # go over the output files and copy the appropriates over as input
        # files
        flist = []
        import re
        for parent in parent_units:
            job = getJobByID(parent.active_job_ids[0])
            if job.subjobs:
                job_list = job.subjobs
            else:
                job_list = [job]

            for sj in job_list:
                for f in sj.outputfiles:

                    # match any dirac files that are allowed in the file mask
                    if isType(f, DiracFile):
                        if len(incl_pat_list) > 0:
                            for pat in incl_pat_list:
                                if re.search(pat, f.lfn):
                                    flist.append("LFN:" + f.lfn)
                        else:
                            flist.append("LFN:" + f.lfn)

                        if len(excl_pat_list) > 0:
                            for pat in excl_pat_list:
                                if re.search(
                                        pat,
                                        f.lfn) and "LFN:" + f.lfn in flist:
                                    flist.remove("LFN:" + f.lfn)

        # just do one unit that uses all data
        unit = LHCbUnit()
        unit.name = "Unit %d" % len(self.units)
        unit.inputdata = LHCbDataset(files=[DiracFile(lfn=f) for f in flist])

        return unit
Beispiel #2
0
    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create an output unit given this output data"""

        # we need a parent job that has completed to get the output files
        incl_pat_list = []
        excl_pat_list = []
        for parent in parent_units:
            if len(parent.active_job_ids) == 0 or parent.status != "completed":
                return None

            for inds in self.inputdata:
                from Ganga.GPI import TaskChainInput
                if isType(inds, TaskChainInput) and inds.input_trf_id == parent._getParent().getID():
                    incl_pat_list += inds.include_file_mask
                    excl_pat_list += inds.exclude_file_mask

        # go over the output files and copy the appropriates over as input
        # files
        flist = []
        import re
        for parent in parent_units:
            job = GPI.jobs(parent.active_job_ids[0])
            if job.subjobs:
                job_list = job.subjobs
            else:
                job_list = [job]

            for sj in job_list:
                for f in sj.outputfiles:

                    # match any dirac files that are allowed in the file mask
                    if isType(f, DiracFile):
                        if len(incl_pat_list) > 0:
                            for pat in incl_pat_list:
                                if re.search(pat, f.lfn):
                                    flist.append("LFN:" + f.lfn)
                        else:
                            flist.append("LFN:" + f.lfn)

                        if len(excl_pat_list) > 0:
                            for pat in excl_pat_list:
                                if re.search(pat, f.lfn) and "LFN:" + f.lfn in flist:
                                    flist.remove("LFN:" + f.lfn)

        # just do one unit that uses all data
        unit = LHCbUnit()
        unit.name = "Unit %d" % len(self.units)
        unit.inputdata = LHCbDataset(files=[DiracFile(lfn=f) for f in flist])

        return unit
Beispiel #3
0
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(LHCbTransform, self).createUnits()

        if len(self.inputdata) > 0:

            # check for conflicting input
            if self.mc_num_units > 0:
                logger.warning("Inputdata specified - MC Event info ignored")

            # loop over input data and see if we need to create any more units
            import copy
            for id, inds in enumerate(self.inputdata):

                if not isType(inds, LHCbDataset):
                    continue

                # go over the units and see what files have been assigned
                assigned_data = LHCbDataset()
                for unit in self.units:

                    if unit.input_datset_index != id:
                        continue

                    assigned_data.files += unit.inputdata.files

                # any new files
                new_data = LHCbDataset( files = self.inputdata[id].difference(assigned_data).files )

                if len(new_data.files) == 0:
                    continue

                # create units for these files
                if self.files_per_unit > 0:

                    # loop over the file array and create units for each set
                    num = 0
                    while num < len(new_data.files):
                        unit = LHCbUnit()
                        unit.name = "Unit %d" % len(self.units)
                        unit.input_datset_index = id
                        self.addUnitToTRF(unit)
                        unit.inputdata = copy.deepcopy(self.inputdata[id])
                        unit.inputdata.files = []
                        unit.inputdata.files += new_data.files[
                            num:num + self.files_per_unit]
                        num += self.files_per_unit

                else:
                    # new unit required for this dataset
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
                    unit.inputdata = copy.deepcopy(self.inputdata[id])
                    unit.inputdata.files = []
                    unit.inputdata.files += new_data.files

        elif self.mc_num_units > 0:

            if len(self.units) == 0:
                # check for appropriate splitter
                from GPI import GaussSplitter
                if not self.splitter or isType(self.splitter, GaussSplitter):
                    logger.warning("No GaussSplitter specified - first event info ignored")

                # create units for MC generation
                for i in range(0, self.mc_num_units):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
        else:
            logger.error(
                "Please specify either inputdata or MC info for unit generation")
Beispiel #4
0
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(LHCbTransform, self).createUnits()

        if len(self.inputdata) > 0:

            # check for conflicting input
            if self.mc_num_units > 0:
                logger.warning("Inputdata specified - MC Event info ignored")

            # loop over input data and see if we need to create any more units
            import copy
            for id, inds in enumerate(self.inputdata):

                if not isType(inds, LHCbDataset):
                    continue

                # go over the units and see what files have been assigned
                assigned_data = LHCbDataset()
                for unit in self.units:

                    if unit.input_datset_index != id:
                        continue

                    assigned_data.files += unit.inputdata.files

                # any new files
                new_data = LHCbDataset(
                    files=self.inputdata[id].difference(assigned_data).files)

                if len(new_data.files) == 0:
                    continue

                # Create units for these files
                step = self.files_per_unit
                if step <= 0:
                    step = len(new_data.files)

                for num in range(0, len(new_data.files), step):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    unit.input_datset_index = id
                    self.addUnitToTRF(unit)
                    unit.inputdata = copy.deepcopy(self.inputdata[id])
                    unit.inputdata.files = []
                    unit.inputdata.files += new_data.files[num:num + step]

        elif self.mc_num_units > 0:
            if len(self.units) == 0:
                # check for appropriate splitter
                from GangaLHCb.Lib.Splitters.GaussSplitter import GaussSplitter
                if not self.splitter or isType(self.splitter, GaussSplitter):
                    logger.warning(
                        "No GaussSplitter specified - first event info ignored"
                    )

                # create units for MC generation
                for i in range(0, self.mc_num_units):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
        else:
            import traceback
            traceback.print_stack()
            logger.error(
                "Please specify either inputdata or MC info for unit generation"
            )