コード例 #1
0
    def updateQuery(self, resubmit=False):
        """Update the dataset information of all attached transforms. This will
        include any new data in the processing or re-run jobs that have data which
        has been removed."""
        # Tried to use multithreading, better to check the tasksregistry class
        # Also tried multiprocessing but bottleneck at server.
        for t in self.transforms:
            try:
                t.updateQuery(resubmit)
            except GangaException as e:
                logger.warning(e.__str__())
                continue

        # update the status of the Task in case we're started running again
        self.updateStatus()
コード例 #2
0
ファイル: LHCbTask.py プロジェクト: VladimirRomanovsky/ganga
    def updateQuery(self, resubmit=False):
        """Update the dataset information of all attached transforms. This will
        include any new data in the processing or re-run jobs that have data which
        has been removed."""
        # Tried to use multithreading, better to check the tasksregistry class
        # Also tried multiprocessing but bottleneck at server.
        for t in self.transforms:
            try:
                t.updateQuery(resubmit)
            except GangaException as e:
                logger.warning(e.__str__())
                continue

        # update the status of the Task in case we're started running again
        self.updateStatus()
コード例 #3
0
ファイル: LHCbUnit.py プロジェクト: VladimirRomanovsky/ganga
    def updateStatus(self, status):
        """Update status hook"""

        # check for input data deletion of chain data
        if status == "completed" and self._getParent().delete_chain_input and len(self.req_units) > 0:

            # the inputdata field *must* be filled from the parent task
            # NOTE: When changing to inputfiles, will probably need to check
            # for any specified in trf.inputfiles

            # check that the parent replicas have been copied by checking
            # backend status == Done
            job_list = []
            for req_unit in self.req_units:
                trf = self._getParent()._getParent().transforms[
                    int(req_unit.split(":")[0])]
                req_unit_id = req_unit.split(":")[1]

                if req_unit_id != "ALL":
                    unit = trf.units[int(req_unit_id)]
                    job_list.append(GPI.jobs(unit.active_job_ids[0]))
                else:
                    for unit in trf.units:
                        job_list.append(GPI.jobs(unit.active_job_ids[0]))

            for j in job_list:
                if j.subjobs:
                    for sj in j.subjobs:
                        if sj.backend.status != "Done":
                            return
                else:
                    if j.backend.status != "Done":
                        return

            job = GPI.jobs(self.active_job_ids[0])
            for f in job.inputdata.files:
                logger.warning(
                    "Removing chain inputdata file '%s'..." % f.name)
                f.remove()

        super(LHCbUnit, self).updateStatus(status)
コード例 #4
0
ファイル: LHCbTransform.py プロジェクト: chrisburr/ganga
    def removeUnusedData(self):
        """Remove any output data from orphaned jobs"""
        for unit in self.units:
            for jid in unit.prev_job_ids:
                try:
                    logger.warning("Removing data from job '%d'..." % jid)
                    job = GPI.jobs(jid)

                    jlist = []
                    if len(job.subjobs) > 0:
                        jlist = job.subjobs
                    else:
                        jlist = [job]

                    for sj in jlist:
                        for f in sj.outputfiles:
                            if isType(f, DiracFile) == "DiracFile" and f.lfn:
                                f.remove()
                except:
                    logger.error("Problem deleting data for job '%d'" % jid)
                    pass
コード例 #5
0
    def removeUnusedData(self):
        """Remove any output data from orphaned jobs"""
        for unit in self.units:
            for jid in unit.prev_job_ids:
                try:
                    logger.warning("Removing data from job '%d'..." % jid)
                    job = GPI.jobs(jid)

                    jlist = []
                    if len(job.subjobs) > 0:
                        jlist = job.subjobs
                    else:
                        jlist = [job]

                    for sj in jlist:
                        for f in sj.outputfiles:
                            if isType(f, DiracFile) == "DiracFile" and f.lfn:
                                f.remove()
                except:
                    logger.error("Problem deleting data for job '%d'" % jid)
                    pass
コード例 #6
0
ファイル: LHCbTransform.py プロジェクト: chrisburr/ganga
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(LHCbTransform, self).createUnits()

        if len(self.inputdata) > 0:

            # check for conflicting input
            if self.mc_num_units > 0:
                logger.warning("Inputdata specified - MC Event info ignored")

            # loop over input data and see if we need to create any more units
            import copy
            for id, inds in enumerate(self.inputdata):

                if not isType(inds, LHCbDataset):
                    continue

                # go over the units and see what files have been assigned
                assigned_data = LHCbDataset()
                for unit in self.units:

                    if unit.input_datset_index != id:
                        continue

                    assigned_data.files += unit.inputdata.files

                # any new files
                new_data = LHCbDataset( files = self.inputdata[id].difference(assigned_data).files )

                if len(new_data.files) == 0:
                    continue

                # create units for these files
                if self.files_per_unit > 0:

                    # loop over the file array and create units for each set
                    num = 0
                    while num < len(new_data.files):
                        unit = LHCbUnit()
                        unit.name = "Unit %d" % len(self.units)
                        unit.input_datset_index = id
                        self.addUnitToTRF(unit)
                        unit.inputdata = copy.deepcopy(self.inputdata[id])
                        unit.inputdata.files = []
                        unit.inputdata.files += new_data.files[
                            num:num + self.files_per_unit]
                        num += self.files_per_unit

                else:
                    # new unit required for this dataset
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
                    unit.inputdata = copy.deepcopy(self.inputdata[id])
                    unit.inputdata.files = []
                    unit.inputdata.files += new_data.files

        elif self.mc_num_units > 0:

            if len(self.units) == 0:
                # check for appropriate splitter
                from GPI import GaussSplitter
                if not self.splitter or isType(self.splitter, GaussSplitter):
                    logger.warning("No GaussSplitter specified - first event info ignored")

                # create units for MC generation
                for i in range(0, self.mc_num_units):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
        else:
            logger.error(
                "Please specify either inputdata or MC info for unit generation")
コード例 #7
0
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(LHCbTransform, self).createUnits()

        if len(self.inputdata) > 0:

            # check for conflicting input
            if self.mc_num_units > 0:
                logger.warning("Inputdata specified - MC Event info ignored")

            # loop over input data and see if we need to create any more units
            import copy
            for id, inds in enumerate(self.inputdata):

                if not isType(inds, LHCbDataset):
                    continue

                # go over the units and see what files have been assigned
                assigned_data = LHCbDataset()
                for unit in self.units:

                    if unit.input_datset_index != id:
                        continue

                    assigned_data.files += unit.inputdata.files

                # any new files
                new_data = LHCbDataset(
                    files=self.inputdata[id].difference(assigned_data).files)

                if len(new_data.files) == 0:
                    continue

                # create units for these files
                if self.files_per_unit > 0:

                    # loop over the file array and create units for each set
                    num = 0
                    while num < len(new_data.files):
                        unit = LHCbUnit()
                        unit.name = "Unit %d" % len(self.units)
                        unit.input_datset_index = id
                        self.addUnitToTRF(unit)
                        unit.inputdata = copy.deepcopy(self.inputdata[id])
                        unit.inputdata.files = []
                        unit.inputdata.files += new_data.files[num:num + self.
                                                               files_per_unit]
                        num += self.files_per_unit

                else:
                    # new unit required for this dataset
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
                    unit.inputdata = copy.deepcopy(self.inputdata[id])
                    unit.inputdata.files = []
                    unit.inputdata.files += new_data.files

        elif self.mc_num_units > 0:

            if len(self.units) == 0:
                # check for appropriate splitter
                from GPI import GaussSplitter
                if not self.splitter or isType(self.splitter, GaussSplitter):
                    logger.warning(
                        "No GaussSplitter specified - first event info ignored"
                    )

                # create units for MC generation
                for i in range(0, self.mc_num_units):
                    unit = LHCbUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
        else:
            import traceback
            traceback.print_stack()
            logger.error(
                "Please specify either inputdata or MC info for unit generation"
            )