Beispiel #1
0
    def addQuery(self, transform, bkQuery, associate=True):
        """Allows the user to add multiple transforms corresponding to the list of
        BKQuery type objects given in the second argument. The first argument
        is a transform object to use as the basis for the creation of further
        transforms."""
        if not isinstance(transform, LHCbTransform):
            raise GangaException(
                None, 'First argument must be an LHCbTransform object to use as the basis for establishing the new transforms')

        # Check if the template transform is associated with the Task
        try:
            self.transforms.index(transform)
        except:
            if associate:
                logger.info(
                    'The transform is not associated with this Task, doing so now.')
                self.appendTransform(transform)

        # Check if the BKQuery input is correct and append/update
        if type(bkQuery) is not list:
            bkQuery = [bkQuery]
        for bk in bkQuery:
            if not isType(bk, BKQuery):
                raise GangaAttributeError(
                    None, 'LHCbTransform expects a BKQuery object or list of BKQuery objects passed to the addQuery method')
            if len(transform.queries) != 0:  # If template has no query itself
                logger.info('Attaching query to transform')
                transform.addQuery(stripProxy(bk))
            else:  # Duplicate from template
                logger.info('Duplicating transform to add new query.')
                tr = deepcopy(transform)
                tr.addQuery(stripProxy(bk))
                self.appendTransform(tr)
Beispiel #2
0
    def addQuery(self, transform, bkQuery, associate=True):
        """Allows the user to add multiple transforms corresponding to the list of
        BKQuery type objects given in the second argument. The first argument
        is a transform object to use as the basis for the creation of further
        transforms."""
        if not isinstance(transform, LHCbTransform):
            raise GangaException(
                None,
                'First argument must be an LHCbTransform object to use as the basis for establishing the new transforms'
            )

        # Check if the template transform is associated with the Task
        try:
            self.transforms.index(transform)
        except:
            if associate:
                logger.info(
                    'The transform is not associated with this Task, doing so now.'
                )
                self.appendTransform(transform)

        # Check if the BKQuery input is correct and append/update
        if type(bkQuery) is not list:
            bkQuery = [bkQuery]
        for bk in bkQuery:
            if not isType(bk, BKQuery):
                raise GangaAttributeError(
                    None,
                    'LHCbTransform expects a BKQuery object or list of BKQuery objects passed to the addQuery method'
                )
            if len(transform.queries) != 0:  # If template has no query itself
                logger.info('Attaching query to transform')
                transform.addQuery(stripProxy(bk))
            else:  # Duplicate from template
                logger.info('Duplicating transform to add new query.')
                tr = deepcopy(transform)
                tr.addQuery(stripProxy(bk))
                self.appendTransform(tr)
Beispiel #3
0
    def updateQuery(self, resubmit=False):
        """Update the dataset information of the transforms. This will
        include any new data in the processing or re-run jobs that have data which
        has been removed."""
        if len(self.queries) == 0:
            raise GangaException(
                None, 'Cannot call updateQuery() on an LHCbTransform without any queries')

        if self._getParent() != None:
            logger.info('Retrieving latest bookkeeping information for transform %i:%i, please wait...' % (
                self._getParent().id, self.getID()))
        else:
            logger.info(
                'Retrieving latest bookkeeping information for transform, please wait...')

        # check we have an input DS per BK Query
        while len(self.queries) > len(self.inputdata):
            self.inputdata.append(LHCbDataset())

        # loop over the queries and add fill file lists
        for id, query in enumerate(self.queries):

            # Get the latest dataset
            latest_dataset = query.getDataset()

            # Compare to previous inputdata, get new and removed
            logger.info(
                'Checking for new and removed data for query %d, please wait...' % self.queries.index(query))
            dead_data = LHCbDataset()
            new_data = LHCbDataset()

            # loop over the old data and compare
            new_data.files += latest_dataset.difference(
                self.inputdata[id]).files
            dead_data.files += self.inputdata[
                id].difference(latest_dataset).files

            # for dead data, find then kill/remove any associated jobs
            # loop over units and check any associated with this DS
            # TODO: Follow through chained tasks
            for unit in self.units:
                # associted unit
                if unit.input_datset_index != id:
                    continue

                # find the job
                if len(unit.active_job_ids) == 0:
                    continue

                # check the data
                for f in dead_data.files:
                    if f in unit.inputdata.files:

                        # kill the job
                        job = GPI.jobs(unit.active_job_ids[0])
                        if job.status in ['submitted', 'running']:
                            job.kill()

                        # forget the job
                        unit.prev_job_ids.append(unit.active_job_ids[0])
                        unit.active_job_ids = []
                        break

            # in any case, now just set the DS files to the new set
            self.inputdata[id].files = []
            self.inputdata[id].files = latest_dataset.files
Beispiel #4
0
    def updateQuery(self, resubmit=False):
        """Update the dataset information of the transforms. This will
        include any new data in the processing or re-run jobs that have data which
        has been removed."""
        if len(self.queries) == 0:
            raise GangaException(
                None,
                'Cannot call updateQuery() on an LHCbTransform without any queries'
            )

        if self._getParent() != None:
            logger.info(
                'Retrieving latest bookkeeping information for transform %i:%i, please wait...'
                % (self._getParent().id, self.getID()))
        else:
            logger.info(
                'Retrieving latest bookkeeping information for transform, please wait...'
            )

        # check we have an input DS per BK Query
        while len(self.queries) > len(self.inputdata):
            self.inputdata.append(LHCbDataset())

        # loop over the queries and add fill file lists
        for id, query in enumerate(self.queries):

            # Get the latest dataset
            latest_dataset = query.getDataset()

            # Compare to previous inputdata, get new and removed
            logger.info(
                'Checking for new and removed data for query %d, please wait...'
                % self.queries.index(query))
            dead_data = LHCbDataset()
            new_data = LHCbDataset()

            # loop over the old data and compare
            new_data.files += latest_dataset.difference(
                self.inputdata[id]).files
            dead_data.files += self.inputdata[id].difference(
                latest_dataset).files

            # for dead data, find then kill/remove any associated jobs
            # loop over units and check any associated with this DS
            # TODO: Follow through chained tasks
            for unit in self.units:
                # associted unit
                if unit.input_datset_index != id:
                    continue

                # find the job
                if len(unit.active_job_ids) == 0:
                    continue

                # check the data
                for f in dead_data.files:
                    if f in unit.inputdata.files:

                        # kill the job
                        job = GPI.jobs(unit.active_job_ids[0])
                        if job.status in ['submitted', 'running']:
                            job.kill()

                        # forget the job
                        unit.prev_job_ids.append(unit.active_job_ids[0])
                        unit.active_job_ids = []
                        break

            # in any case, now just set the DS files to the new set
            self.inputdata[id].files = []
            self.inputdata[id].files = latest_dataset.files