Ejemplo n.º 1
0
   def initializeFromDatasets(self,dataset_list):
      """ For each dataset in the dataset_list a unit is created. 
          The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field."""
      if not type(dataset_list) is list:
         logger.error("dataset_list must be a python list: ['ds1','ds2',...]")
         return

      # check for primary transforms
      primary_tfs = []
      for tf in self.transforms:
         if len(tf.required_trfs) == 0:
            primary_tfs.append( tf )

      if len(primary_tfs) == 0:
         logger.error("No primary transforms specified. Yout need at least one before the Task can be initialised.")
         return
      
      unit_num = 0
         
      for dset in dataset_list:
         dset = dset.strip()
         try:
            if "*" in dset:
               logger.error("WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!")
            try:
               if dset[-1] == "/":
                  tid_datasets = dq2.listDatasetsInContainer(dset)
               else:
                  tid_datasets = dq2.listDatasetsInContainer(dset+"/")
            except DQUnknownDatasetException:
               dslist = dq2.listDatasets(dset).keys()
               if len(dslist) == 0:
                  logger.error("Dataset %s not found!" % dset)
                  return
               tid_datasets = [ds for ds in dslist if "_tid" in ds and not "_sub" in ds]
               if len(tid_datasets) == 0:
                  if len(dslist) > 1:
                     logger.error("Found no tid dataset but multiple datasets match %s*!" % dset)
                     return
                  tid_datasets = [dslist[0]]
         except Exception as e:
            logger.error('DQ2 Error while listing dataset %s*! %s' % (dset, e))
            return
         logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset))

         if len(tid_datasets) == 0:
            logger.error("No tid datasets found from dataset list. Maybe the container '%s' is empty?" % dset)
            return
         
         prev_num = unit_num
         for tf in primary_tfs:
            unit_num = prev_num
            for ds in tid_datasets:
               tf.addUnit("Unit_%d" % unit_num, ds)
               unit_num += 1
Ejemplo n.º 2
0
    def checkOutputContainers(self):
        """Go through all transforms and check all datasets are registered"""
        logger.info("Cleaning out overall Task container...")

        try:
            dslist = []
            dq2_lock.acquire()
            try:
                dslist = dq2.listDatasetsInContainer(self.getContainerName())
            except:
                dslist = []

            try:
                dq2.deleteDatasetsFromContainer(self.getContainerName(), dslist )

            except DQContainerDoesNotHaveDataset:
                  pass
            except Exception as x:
                logger.error("Problem cleaning out Task container: %s %s", x.__class__, x)
            except DQException as x:
                logger.error('DQ2 Problem cleaning out Task container: %s %s' %( x.__class__, x))
        finally:
            dq2_lock.release()

        logger.info("Checking output data has been registered. This can take a few minutes...")
        for trf in self.transforms:
            logger.info("Checking containers in Tranform %d..." % trf.getID() )
            trf.checkOutputContainers()
Ejemplo n.º 3
0
    def initializeFromDatasets(self,
                               dset_list,
                               template=None,
                               using_jedi=True):
        """Initialise the trf with the given dataset list, creating a unit for each DS"""

        for ds in dset_list:

            if ds[-1] == "/":
                if not using_jedi:
                    try:
                        tid_datasets = dq2.listDatasetsInContainer(ds)
                    except DQUnknownDatasetException:
                        logger.error("dataset container %s not found" % ds)

                    logger.info("Found %i datasets matching %s..." %
                                (len(tid_datasets), ds))

                    for ds2 in tid_datasets:
                        self.addUnit('.'.join(ds.split(".")[1:-1]), ds2,
                                     template)
                else:
                    self.addUnit('.'.join(ds[:-1].split(".")[1:]), ds,
                                 template)
            else:
                self.addUnit('.'.join(ds.split(".")[1:-1]), ds, template)
Ejemplo n.º 4
0
    def checkOutputContainers(self):
        """Go through all transforms and check all datasets are registered"""
        logger.info("Cleaning out overall Task container...")

        try:
            dslist = []
            dq2_lock.acquire()
            try:
                dslist = dq2.listDatasetsInContainer(self.getContainerName())
            except:
                dslist = []

            try:
                dq2.deleteDatasetsFromContainer(self.getContainerName(),
                                                dslist)

            except DQContainerDoesNotHaveDataset:
                pass
            except Exception as x:
                logger.error("Problem cleaning out Task container: %s %s",
                             x.__class__, x)
            except DQException as x:
                logger.error('DQ2 Problem cleaning out Task container: %s %s' %
                             (x.__class__, x))
        finally:
            dq2_lock.release()

        logger.info(
            "Checking output data has been registered. This can take a few minutes..."
        )
        for trf in self.transforms:
            logger.info("Checking containers in Tranform %d..." % trf.getID())
            trf.checkOutputContainers()
Ejemplo n.º 5
0
   def checkOutputContainers(self):
      """Go through all completed units and make sure datasets are registered as required"""
      logger.info("Cleaning out transform %d container..." % self.getID())

      try:
         dslist = []
         dq2_lock.acquire()
         try:
            dslist = dq2.listDatasetsInContainer(self.getContainerName())
         except:
            dslist = []

         try:
            dq2.deleteDatasetsFromContainer(self.getContainerName(), dslist )

         except DQContainerDoesNotHaveDataset:
            pass
         except Exception as x:
            logger.error("Problem cleaning out Transform container: %s %s", x.__class__, x)
         except DQException as x:
            logger.error('DQ2 Problem cleaning out Transform container: %s %s' %( x.__class__, x))
      finally:
         dq2_lock.release()

      logger.info("Checking output data has been registered for Transform %d..." % self.getID())
      for unit in self.units:
         
         if len(unit.active_job_ids) == 0:
            continue

         if unit.status == "completed" and GPI.jobs(unit.active_job_ids[0]).outputdata and GPI.jobs(unit.active_job_ids[0]).outputdata._impl._name == "DQ2OutputDataset":
            logger.info("Checking containers in Unit %d..." % unit.getID() )
            unit.registerDataset()            
Ejemplo n.º 6
0
   def getOutputDatasetList(self):
      """Return a list of the output datasets associated with this unit"""
      
      ds_list = []
      for cont in self.getContainerList():
         ds_list += dq2.listDatasetsInContainer(cont)

      return ds_list
Ejemplo n.º 7
0
   def getOutputDatasetList(self):
      """Return a list of the output datasets associated with this unit"""
      
      ds_list = []
      for cont in self.getContainerList():
         ds_list += dq2.listDatasetsInContainer(cont)

      return ds_list
Ejemplo n.º 8
0
    def checkForSubmission(self):
        """Additional checks for unit submission"""

        # call the base class
        if not super(AtlasUnit, self).checkForSubmission():
            return False

        # check that parent units are complete because otherwise, when we check for submission to do submissions first (ITransform.update)
        # datasets may not have been created yet
        if not self.checkParentUnitsAreComplete():
            return False

        # Add a check for chain units to have frozen their input DS
        if len(
                self.req_units
        ) > 0 and self.inputdata._name == "DQ2Dataset" and not self.inputdata.tag_info:

            # check datasets are frozen
            for uds in self.inputdata.dataset:
                try:
                    dq2_lock.acquire()

                    try:
                        # list datasets in container
                        ds_list = dq2.listDatasetsInContainer(uds)

                        cont_ok = True
                        for ds in ds_list:
                            # find locations and check if frozen
                            loc_dict = dq2.listDatasetReplicas(ds)
                            locations = []
                            for loc in loc_dict[loc_dict.keys()[0]]:
                                locations += loc_dict[loc_dict.keys()[0]][loc]

                            ds_ok = False
                            for loc in locations:
                                if loc == "":
                                    continue
                                datasetsiteinfo = dq2.listFileReplicas(loc, ds)
                                if datasetsiteinfo[0]['found'] != None:
                                    ds_ok = True
                                    break

                            if not ds_ok:
                                cont_ok = False
                                break
                    except:
                        logger.warning(
                            "Unable to check if datasets are frozen")
                        cont_ok = False
                finally:
                    dq2_lock.release()

            # at least one dataset wasn't frozen
            if not cont_ok:
                return False

        return True
Ejemplo n.º 9
0
   def checkForSubmission(self):
      """Additional checks for unit submission"""

      # call the base class
      if not super(AtlasUnit,self).checkForSubmission():
         return False

      # check that parent units are complete because otherwise, when we check for submission to do submissions first (ITransform.update)
      # datasets may not have been created yet
      if not self.checkParentUnitsAreComplete():
         return False

      # Add a check for chain units to have frozen their input DS
      if len(self.req_units) > 0 and self.inputdata._name == "DQ2Dataset" and not self.inputdata.tag_info:

         # check datasets are frozen
         for uds in self.inputdata.dataset:
            try:
               dq2_lock.acquire()

               try:
                  # list datasets in container
                  ds_list = dq2.listDatasetsInContainer(uds)

                  cont_ok = True
                  for ds in ds_list:
                     # find locations and check if frozen
                     loc_dict = dq2.listDatasetReplicas(ds)
                     locations = []
                     for loc in loc_dict[ loc_dict.keys()[0] ]:
                        locations += loc_dict[ loc_dict.keys()[0] ][loc]

                     ds_ok = False
                     for loc in locations:
                        if loc == "":
                           continue
                        datasetsiteinfo = dq2.listFileReplicas(loc, ds)
                        if datasetsiteinfo[0]['found'] != None:
                           ds_ok = True
                           break

                     if not ds_ok:
                        cont_ok = False
                        break
               except:
                  logger.warning("Unable to check if datasets are frozen")
                  cont_ok = False
            finally:
               dq2_lock.release()


         # at least one dataset wasn't frozen
         if not cont_ok:
            return False

      return True
Ejemplo n.º 10
0
 def listAllDatasets(self):
    "List all datasets in container of this transform"
    ds_list = []
    try:
       try:
          dq2_lock.acquire()
          ds_list = dq2.listDatasetsInContainer(self.getContainerName())
       except DQContainerDoesNotHaveDataset:
          pass
       except Exception as x:
          logger.error('Problem finding datasets associated with TRF container %s: %s %s' %( self.getContainerName(), x.__class__, x))
       except DQException as x:
          logger.error('DQ2 Problem finding datasets associated with TRF container %s: %s %s' %( self.getContainerName(), x.__class__, x))
    finally:
        dq2_lock.release()
        
    return ds_list
Ejemplo n.º 11
0
 def initializeFromContainer(self, dset, template = None, using_jedi = True):
    """Initialise the trf with given container, creating a unit for each DS"""
    if dset[-1] != "/":
       logger.error("Please supply a container!")
       return
    
    if not using_jedi:
       try:
          tid_datasets = dq2.listDatasetsInContainer(dset)
       except DQUnknownDatasetException:
          logger.error("dataset container %s not found" % dset)
          return
       
       logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset))
       
       for ds in tid_datasets:
          self.addUnit('.'.join( ds.split(".")[1:-1] ), ds, template)
    else:
       self.addUnit('.'.join( dset[:-1].split(".")[1:] ), dset, template)
Ejemplo n.º 12
0
    def checkOutputContainers(self):
        """Go through all completed units and make sure datasets are registered as required"""
        logger.info("Cleaning out transform %d container..." % self.getID())

        try:
            dslist = []
            dq2_lock.acquire()
            try:
                dslist = dq2.listDatasetsInContainer(self.getContainerName())
            except:
                dslist = []

            try:
                dq2.deleteDatasetsFromContainer(self.getContainerName(),
                                                dslist)

            except DQContainerDoesNotHaveDataset:
                pass
            except Exception as x:
                logger.error("Problem cleaning out Transform container: %s %s",
                             x.__class__, x)
            except DQException as x:
                logger.error(
                    'DQ2 Problem cleaning out Transform container: %s %s' %
                    (x.__class__, x))
        finally:
            dq2_lock.release()

        logger.info(
            "Checking output data has been registered for Transform %d..." %
            self.getID())
        for unit in self.units:

            if len(unit.active_job_ids) == 0:
                continue

            if unit.status == "completed" and GPI.jobs(
                    unit.active_job_ids[0]).outputdata and GPI.jobs(
                        unit.active_job_ids[0]
                    ).outputdata._impl._name == "DQ2OutputDataset":
                logger.info("Checking containers in Unit %d..." % unit.getID())
                unit.registerDataset()
Ejemplo n.º 13
0
    def initializeFromContainer(self, dset, template=None, using_jedi=True):
        """Initialise the trf with given container, creating a unit for each DS"""
        if dset[-1] != "/":
            logger.error("Please supply a container!")
            return

        if not using_jedi:
            try:
                tid_datasets = dq2.listDatasetsInContainer(dset)
            except DQUnknownDatasetException:
                logger.error("dataset container %s not found" % dset)
                return

            logger.info("Found %i datasets matching %s..." %
                        (len(tid_datasets), dset))

            for ds in tid_datasets:
                self.addUnit('.'.join(ds.split(".")[1:-1]), ds, template)
        else:
            self.addUnit('.'.join(dset[:-1].split(".")[1:]), dset, template)
Ejemplo n.º 14
0
   def initializeFromDatasets(self, dset_list, template = None, using_jedi = True):
      """Initialise the trf with the given dataset list, creating a unit for each DS"""

      for ds in dset_list:
         
         if ds[-1] == "/":
            if not using_jedi:
               try:
                  tid_datasets = dq2.listDatasetsInContainer(ds)
               except DQUnknownDatasetException:
                  logger.error("dataset container %s not found" % ds)
         
               logger.info("Found %i datasets matching %s..." % (len(tid_datasets), ds))
         
               for ds2 in tid_datasets:
                  self.addUnit('.'.join( ds.split(".")[1:-1] ), ds2, template)
            else:
               self.addUnit('.'.join( ds[:-1].split(".")[1:] ), ds, template)
         else:
            self.addUnit('.'.join( ds.split(".")[1:-1] ), ds, template)
Ejemplo n.º 15
0
    def listAllDatasets(self):
        "List all datasets in container of this transform"
        ds_list = []
        try:
            try:
                dq2_lock.acquire()
                ds_list = dq2.listDatasetsInContainer(self.getContainerName())
            except DQContainerDoesNotHaveDataset:
                pass
            except Exception as x:
                logger.error(
                    'Problem finding datasets associated with TRF container %s: %s %s'
                    % (self.getContainerName(), x.__class__, x))
            except DQException as x:
                logger.error(
                    'DQ2 Problem finding datasets associated with TRF container %s: %s %s'
                    % (self.getContainerName(), x.__class__, x))
        finally:
            dq2_lock.release()

        return ds_list
Ejemplo n.º 16
0
    def initializeFromDatasets(self, dataset_list):
        """ For each dataset in the dataset_list a unit is created. 
          The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field."""
        if not type(dataset_list) is list:
            logger.error(
                "dataset_list must be a python list: ['ds1','ds2',...]")
            return

        # check for primary transforms
        primary_tfs = []
        for tf in self.transforms:
            if len(tf.required_trfs) == 0:
                primary_tfs.append(tf)

        if len(primary_tfs) == 0:
            logger.error(
                "No primary transforms specified. Yout need at least one before the Task can be initialised."
            )
            return

        unit_num = 0

        for dset in dataset_list:
            dset = dset.strip()
            try:
                if "*" in dset:
                    logger.error(
                        "WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!"
                    )
                try:
                    if dset[-1] == "/":
                        tid_datasets = dq2.listDatasetsInContainer(dset)
                    else:
                        tid_datasets = dq2.listDatasetsInContainer(dset + "/")
                except DQUnknownDatasetException:
                    dslist = dq2.listDatasets(dset).keys()
                    if len(dslist) == 0:
                        logger.error("Dataset %s not found!" % dset)
                        return
                    tid_datasets = [
                        ds for ds in dslist
                        if "_tid" in ds and not "_sub" in ds
                    ]
                    if len(tid_datasets) == 0:
                        if len(dslist) > 1:
                            logger.error(
                                "Found no tid dataset but multiple datasets match %s*!"
                                % dset)
                            return
                        tid_datasets = [dslist[0]]
            except Exception as e:
                logger.error('DQ2 Error while listing dataset %s*! %s' %
                             (dset, e))
                return
            logger.info("Found %i datasets matching %s..." %
                        (len(tid_datasets), dset))

            if len(tid_datasets) == 0:
                logger.error(
                    "No tid datasets found from dataset list. Maybe the container '%s' is empty?"
                    % dset)
                return

            prev_num = unit_num
            for tf in primary_tfs:
                unit_num = prev_num
                for ds in tid_datasets:
                    tf.addUnit("Unit_%d" % unit_num, ds)
                    unit_num += 1