Ejemplo n.º 1
0
   def initializeFromDatasets(self,dataset_list):
      """ For each dataset in the dataset_list a unit is created. 
          The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field."""
      if not type(dataset_list) is list:
         logger.error("dataset_list must be a python list: ['ds1','ds2',...]")
         return

      # check for primary transforms
      primary_tfs = []
      for tf in self.transforms:
         if len(tf.required_trfs) == 0:
            primary_tfs.append( tf )

      if len(primary_tfs) == 0:
         logger.error("No primary transforms specified. Yout need at least one before the Task can be initialised.")
         return
      
      unit_num = 0
         
      for dset in dataset_list:
         dset = dset.strip()
         try:
            if "*" in dset:
               logger.error("WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!")
            try:
               if dset[-1] == "/":
                  tid_datasets = dq2.listDatasetsInContainer(dset)
               else:
                  tid_datasets = dq2.listDatasetsInContainer(dset+"/")
            except DQUnknownDatasetException:
               dslist = dq2.listDatasets(dset).keys()
               if len(dslist) == 0:
                  logger.error("Dataset %s not found!" % dset)
                  return
               tid_datasets = [ds for ds in dslist if "_tid" in ds and not "_sub" in ds]
               if len(tid_datasets) == 0:
                  if len(dslist) > 1:
                     logger.error("Found no tid dataset but multiple datasets match %s*!" % dset)
                     return
                  tid_datasets = [dslist[0]]
         except Exception as e:
            logger.error('DQ2 Error while listing dataset %s*! %s' % (dset, e))
            return
         logger.info("Found %i datasets matching %s..." % (len(tid_datasets), dset))

         if len(tid_datasets) == 0:
            logger.error("No tid datasets found from dataset list. Maybe the container '%s' is empty?" % dset)
            return
         
         prev_num = unit_num
         for tf in primary_tfs:
            unit_num = prev_num
            for ds in tid_datasets:
               tf.addUnit("Unit_%d" % unit_num, ds)
               unit_num += 1
Ejemplo n.º 2
0
    def initializeFromDatasets(self, dataset_list):
        """ For each dataset in the dataset_list a unit is created. 
          The output dataset names are set using the run numbers and tags of the input datasets appended to the current t.analysis.outputdata.datasetname field."""
        if not type(dataset_list) is list:
            logger.error(
                "dataset_list must be a python list: ['ds1','ds2',...]")
            return

        # check for primary transforms
        primary_tfs = []
        for tf in self.transforms:
            if len(tf.required_trfs) == 0:
                primary_tfs.append(tf)

        if len(primary_tfs) == 0:
            logger.error(
                "No primary transforms specified. Yout need at least one before the Task can be initialised."
            )
            return

        unit_num = 0

        for dset in dataset_list:
            dset = dset.strip()
            try:
                if "*" in dset:
                    logger.error(
                        "WARNING: Wildcards may include unexpected datasets in your processing! Please list your datasets before specifying them here!"
                    )
                try:
                    if dset[-1] == "/":
                        tid_datasets = dq2.listDatasetsInContainer(dset)
                    else:
                        tid_datasets = dq2.listDatasetsInContainer(dset + "/")
                except DQUnknownDatasetException:
                    dslist = dq2.listDatasets(dset).keys()
                    if len(dslist) == 0:
                        logger.error("Dataset %s not found!" % dset)
                        return
                    tid_datasets = [
                        ds for ds in dslist
                        if "_tid" in ds and not "_sub" in ds
                    ]
                    if len(tid_datasets) == 0:
                        if len(dslist) > 1:
                            logger.error(
                                "Found no tid dataset but multiple datasets match %s*!"
                                % dset)
                            return
                        tid_datasets = [dslist[0]]
            except Exception as e:
                logger.error('DQ2 Error while listing dataset %s*! %s' %
                             (dset, e))
                return
            logger.info("Found %i datasets matching %s..." %
                        (len(tid_datasets), dset))

            if len(tid_datasets) == 0:
                logger.error(
                    "No tid datasets found from dataset list. Maybe the container '%s' is empty?"
                    % dset)
                return

            prev_num = unit_num
            for tf in primary_tfs:
                unit_num = prev_num
                for ds in tid_datasets:
                    tf.addUnit("Unit_%d" % unit_num, ds)
                    unit_num += 1
Ejemplo n.º 3
0
    def checkCompletedApp(self, app):
        task = self._getParent()
        j = app._getParent()
        for odat in j.outputdata.outputdata:
            # Look out: if this is changed, there is anothher one like it below!
            if 0 == len([
                    f for f in j.outputdata.output
                    if ".".join(odat.split(".")[:-1]) in f
            ]):
                logger.error("Job %s has not produced %s file, only: %s" %
                             (j.id, odat, j.outputdata.output))
                return False
        # if this is the first app to complete the partition...
        if self.getPartitionStatus(self._app_partition[app.id]) != "completed":
            task_container, subtask_dsname = task.container_name, self.dataset_name

            infos = {}
            for oinfo in j.outputdata.output:
                try:
                    dq2_lock.acquire()
                    info = oinfo.split(",")
                    # get master replica from dataset - info not set to SE; but to ANALY_XYZ from panda
                    master_replica = dq2.getMasterReplicaLocation(info[0])
                    if master_replica:
                        info[5] = master_replica
                    else:
                        replicas = dq2.listDatasetReplicas(info[0]).values()
                        if len(replicas) == 0:
                            try:
                                info[5] = getPandaClient().PandaSites[
                                    info[5]]["ddm"]
                            except KeyError:
                                pass
                        else:
                            complete, incomplete = replicas[0].values()
                            info[5] = (complete + incomplete)[0]
                    if info[4][:3] == "ad:":
                        info[4] = info[4][3:]

                finally:
                    dq2_lock.release()

                datasetname = subtask_dsname + '.' + info[5]
                info[0] = datasetname
                infos.setdefault(datasetname, []).append(",".join(info))

            for ds in infos.keys():
                outputdata = DQ2OutputDataset()
                try:
                    outputdata.create_dataset(ds)
                except DQDatasetExistsException:
                    pass
                try:
                    outputdata.register_datasets_details(None, infos[ds])
                except DQFileExistsInDatasetException:
                    pass

            # Register Container
            try:
                containerinfo = {}
                dq2_lock.acquire()
                try:
                    containerinfo = dq2.listDatasets(task_container)
                except:
                    containerinfo = {}
                if containerinfo == {}:
                    try:
                        dq2.registerContainer(task_container)
                        logger.debug('Registered container for Task %i: %s' %
                                     (task.id, task_container))
                    except Exception as x:
                        logger.error(
                            'Problem registering container for Task %i, %s : %s %s'
                            % (task.id, task_container, x.__class__, x))
                for ds in infos.keys():
                    try:
                        dq2.registerDatasetsInContainer(task_container, [ds])
                    except DQContainerAlreadyHasDataset:
                        pass
                    except Exception as x:
                        logger.error(
                            'Problem registering dataset %s in container %s: %s %s'
                            % (subtask_dsname, task_container, x.__class__, x))
            finally:
                dq2_lock.release()
        return True
Ejemplo n.º 4
0
   def registerDataset(self):
      """Register in the transform container"""
      trf = self._getParent()
      trf_container = trf.getContainerName()

      fail = False
      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(trf_container)
         except:
            containerinfo = {}
            
         if containerinfo == {}:
            try:
               dq2.registerContainer(trf_container)
               logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), trf_container))
               
            except Exception as x:
               logger.error('Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x))
               fail = True
               
         job = GPI.jobs(self.active_job_ids[0])
         ds_list = self.getOutputDatasetList()

         for ds in ds_list:
            try:
               dq2.registerDatasetsInContainer(trf_container, [ ds ] )
            except DQContainerAlreadyHasDataset:
               pass
            except Exception as x:
               logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x))
               fail = True
      finally:
         dq2_lock.release()
         
      if fail:
         return not fail
      
      # add dataset to the task container
      task = trf._getParent()
      task_container = task.getContainerName()
      
      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(task_container)
         except:
            containerinfo = {}
         if containerinfo == {}:
            try:
               dq2.registerContainer(task_container)
               logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), task_container))
                  
            except Exception as x:
               logger.error('Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' %
                            (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' %
                            (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x))
               fail = True 

         ds_list = self.getOutputDatasetList()

         for ds in ds_list:
            try:
               dq2.registerDatasetsInContainer(task_container, [ ds ] )
            except DQContainerAlreadyHasDataset:
               pass
            except Exception as x:
               logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x))
               fail = True
      finally:
          dq2_lock.release()

      return not fail
Ejemplo n.º 5
0
   def unregisterDataset(self):
      """Register in the transform container"""
      trf = self._getParent()
      trf_container = trf.getContainerName()
      fail = False
      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(trf_container)
         except:
            containerinfo = {}
            
         if containerinfo != {}:
            job = GPI.jobs(self.active_job_ids[0])
            ds_list = self.getOutputDatasetList()
            for ds in ds_list:
               
               try:
                  dq2.deleteDatasetsFromContainer(trf_container, [ ds ] )
               except DQContainerDoesNotHaveDataset:
                  pass
               except Exception as x:
                  logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x))
                  fail = True
               except DQException as x:
                  logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x))
                  fail = True
      finally:
         dq2_lock.release()

      if fail:
         return not fail
      
      # add dataset to the task container
      task = trf._getParent()
      task_container = task.getContainerName()

      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(task_container)
         except:
            containerinfo = {}
            
         if containerinfo != {}:
            job = GPI.jobs(self.active_job_ids[0])
            ds_list = self.getOutputDatasetList()
            for ds in ds_list:
               
               try:
                  dq2.deleteDatasetsFromContainer(task_container, [ ds ] )
               except DQContainerDoesNotHaveDataset:
                  pass
               except Exception as x:
                  logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x))
                  fail = True
               except DQException as x:
                  logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x))
                  fail = True
      finally:
         dq2_lock.release()

      return not fail
Ejemplo n.º 6
0
   def checkCompletedApp(self, app):
      task = self._getParent()
      j = app._getParent()
      for odat in j.outputdata.outputdata:
          # Look out: if this is changed, there is anothher one like it below!
          if 0==len([f for f in j.outputdata.output if ".".join(odat.split(".")[:-1]) in f]):
              logger.error("Job %s has not produced %s file, only: %s" % (j.id, odat, j.outputdata.output))
              return False
      # if this is the first app to complete the partition...
      if self.getPartitionStatus(self._app_partition[app.id]) != "completed":
          task_container, subtask_dsname = task.container_name, self.dataset_name

          infos = {}
          for oinfo in j.outputdata.output:
              try:
                  dq2_lock.acquire()
                  info = oinfo.split(",")
                  # get master replica from dataset - info not set to SE; but to ANALY_XYZ from panda
                  master_replica = dq2.getMasterReplicaLocation(info[0])
                  if master_replica:
                      info[5] = master_replica
                  else:
                      replicas = dq2.listDatasetReplicas(info[0]).values()
                      if len(replicas) == 0:
                          try:
                              info[5] = getPandaClient().PandaSites[info[5]]["ddm"]
                          except KeyError:
                              pass
                      else:
                          complete, incomplete = replicas[0].values()
                          info[5] = (complete + incomplete)[0]
                  if info[4][:3] == "ad:":
                      info[4] = info[4][3:]

              finally:
                  dq2_lock.release()
                
              datasetname = subtask_dsname + '.' + info[5]
              info[0] = datasetname
              infos.setdefault(datasetname, []).append(",".join(info))

          for ds in infos.keys():
              outputdata = DQ2OutputDataset()
              try:
                  outputdata.create_dataset(ds)
              except DQDatasetExistsException:
                  pass
              try:
                  outputdata.register_datasets_details(None, infos[ds])
              except DQFileExistsInDatasetException:
                  pass

          # Register Container
          try:
              containerinfo = {}
              dq2_lock.acquire()
              try:
                  containerinfo = dq2.listDatasets(task_container)
              except:
                  containerinfo = {}
              if containerinfo == {}:
                  try:
                      dq2.registerContainer(task_container)
                      logger.debug('Registered container for Task %i: %s' % (task.id, task_container))
                  except Exception as x:
                      logger.error('Problem registering container for Task %i, %s : %s %s' % (task.id, task_container,x.__class__, x))
              for ds in infos.keys():
                  try:
                      dq2.registerDatasetsInContainer(task_container, [ ds ] )
                  except DQContainerAlreadyHasDataset:
                      pass
                  except Exception as x:
                      logger.error('Problem registering dataset %s in container %s: %s %s' %( subtask_dsname, task_container, x.__class__, x))
          finally:
              dq2_lock.release()
      return True
Ejemplo n.º 7
0
   def registerDataset(self):
      """Register in the transform container"""
      trf = self._getParent()
      trf_container = trf.getContainerName()

      fail = False
      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(trf_container)
         except:
            containerinfo = {}
            
         if containerinfo == {}:
            try:
               dq2.registerContainer(trf_container)
               logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), trf_container))
               
            except Exception as x:
               logger.error('Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering container for Unit %i of Transform %i, %s : %s %s' % (self.getID(), trf.getID(), trf_container,x.__class__, x))
               fail = True
               
         job = GPI.jobs(self.active_job_ids[0])
         ds_list = self.getOutputDatasetList()

         for ds in ds_list:
            try:
               dq2.registerDatasetsInContainer(trf_container, [ ds ] )
            except DQContainerAlreadyHasDataset:
               pass
            except Exception as x:
               logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, trf_container, x.__class__, x))
               fail = True
      finally:
         dq2_lock.release()
         
      if fail:
         return not fail
      
      # add dataset to the task container
      task = trf._getParent()
      task_container = task.getContainerName()
      
      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(task_container)
         except:
            containerinfo = {}
         if containerinfo == {}:
            try:
               dq2.registerContainer(task_container)
               logger.info('Registered container for Unit %i of Transform %i: %s' % (self.getID(), trf.getID(), task_container))
                  
            except Exception as x:
               logger.error('Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' %
                            (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering container for Unit %i of Transform %i in Task %i, %s : %s %s' %
                            (self.getID(), trf.getID(), task.getID(), task_container, x.__class__, x))
               fail = True 

         ds_list = self.getOutputDatasetList()

         for ds in ds_list:
            try:
               dq2.registerDatasetsInContainer(task_container, [ ds ] )
            except DQContainerAlreadyHasDataset:
               pass
            except Exception as x:
               logger.error('Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x))
               fail = True
            except DQException as x:
               logger.error('DQ2 Problem registering dataset %s in container %s: %s %s' %( job.outputdata.datasetname, task_container, x.__class__, x))
               fail = True
      finally:
          dq2_lock.release()

      return not fail
Ejemplo n.º 8
0
   def unregisterDataset(self):
      """Register in the transform container"""
      trf = self._getParent()
      trf_container = trf.getContainerName()
      fail = False
      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(trf_container)
         except:
            containerinfo = {}
            
         if containerinfo != {}:
            job = GPI.jobs(self.active_job_ids[0])
            ds_list = self.getOutputDatasetList()
            for ds in ds_list:
               
               try:
                  dq2.deleteDatasetsFromContainer(trf_container, [ ds ] )
               except DQContainerDoesNotHaveDataset:
                  pass
               except Exception as x:
                  logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x))
                  fail = True
               except DQException as x:
                  logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, trf_container, x.__class__, x))
                  fail = True
      finally:
         dq2_lock.release()

      if fail:
         return not fail
      
      # add dataset to the task container
      task = trf._getParent()
      task_container = task.getContainerName()

      try:
         containerinfo = {}
         dq2_lock.acquire()
         try:
            containerinfo = dq2.listDatasets(task_container)
         except:
            containerinfo = {}
            
         if containerinfo != {}:
            job = GPI.jobs(self.active_job_ids[0])
            ds_list = self.getOutputDatasetList()
            for ds in ds_list:
               
               try:
                  dq2.deleteDatasetsFromContainer(task_container, [ ds ] )
               except DQContainerDoesNotHaveDataset:
                  pass
               except Exception as x:
                  logger.error('Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x))
                  fail = True
               except DQException as x:
                  logger.error('DQ2 Problem removing dataset %s from container %s: %s %s' %( j.outputdata.datasetname, task_container, x.__class__, x))
                  fail = True
      finally:
         dq2_lock.release()

      return not fail