Example #1
0
 def addUnit(self, outname, dsname, template=None):
     """Create a new unit based on this ds and output"""
     unit = AtlasUnit()
     if not template:
         unit.inputdata = DQ2Dataset()
     else:
         unit.inputdata = stripProxy(template)
     unit.inputdata.dataset = dsname
     unit.name = outname
     self.addUnitToTRF(unit)
Example #2
0
 def addUnit(self, outname, dsname, template = None):
    """Create a new unit based on this ds and output"""
    unit = AtlasUnit()
    if not template:
       unit.inputdata = DQ2Dataset()
    else:
       unit.inputdata = stripProxy( template )
    unit.inputdata.dataset = dsname
    unit.name = outname
    self.addUnitToTRF( unit )
Example #3
0
    def createChainUnit(self, parent_units, use_copy_output=True):
        """Create an output unit given this output data"""

        # we need valid parent jobs
        for parent in parent_units:
            # need datasetname filled for Panda jobs
            if len(parent.active_job_ids) == 0 or \
                   (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \
                    GPI.jobs(parent.active_job_ids[0]).outputdata and \
                    GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Panda" and \
                    GPI.jobs(parent.active_job_ids[0]).outputdata.datasetname == ""):
                return None

            # need datasetList filled for Jedi jobs
            if len(parent.active_job_ids) == 0 or \
                   (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \
                    GPI.jobs(parent.active_job_ids[0]).outputdata and \
                    GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Jedi" and \
                    len(GPI.jobs(parent.active_job_ids[0]).outputdata.datasetList) == 0):
                return None

            # for local jobs, make sure units are complete
            if GPI.jobs(parent_units[0].active_job_ids[0]).outputdata._impl._name == "ATLASOutputDataset" and \
                   parent.status != "completed":
                return None

        # Are we doing Local -> Local? i.e. are we going from ATLASOutputDataset?
        # Problem: Doesn't take into account merger locations...
        if GPI.jobs(parent_units[0].active_job_ids[0]
                    ).outputdata._impl._name == "ATLASOutputDataset":
            unit = AtlasUnit()
            unit.inputdata = ATLASLocalDataset()

            for parent in parent_units:
                for l in GPI.jobs(parent.active_job_ids[0]).outputdata.output:
                    unit.inputdata.names += l

        # should we use the copy_output (ie. local output). Special case for TagPrepare
        elif GPI.jobs(parent_units[0].active_job_ids[0]
                      ).application._impl._name == "TagPrepare":

            # make sure all have completed before taking the tag-info
            if parent_units[0].status != "completed":
                return None

            unit = AtlasUnit()
            unit.inputdata = DQ2Dataset()
            unit.inputdata.tag_info = GPI.jobs(
                parent_units[0].active_job_ids[0]).application.tag_info

        elif not use_copy_output or not parent.copy_output:
            unit = AtlasUnit()
            unit.inputdata = DQ2Dataset()
            ds_list = []
            for parent in parent_units:

                # Don't just use the main datasetname as Jedi introduces separate containers for logs and output files
                if GPI.jobs(parent.active_job_ids[0]
                            ).backend._impl._name == "Jedi":
                    for ds in GPI.jobs(
                            parent.active_job_ids[0]).outputdata.datasetList:
                        if not ds.endswith(".log/"):
                            unit.inputdata.dataset.append(ds)
                else:
                    unit.inputdata.dataset.append(
                        GPI.jobs(
                            parent.active_job_ids[0]).outputdata.datasetname)

        else:

            unit = AtlasUnit()
            unit.inputdata = ATLASLocalDataset()

            for parent in parent_units:
                # unit needs to have completed and downloaded
                if parent.status != "completed":
                    return None

                # we should be OK so copy all output to an ATLASLocalDataset
                for f in parent.copy_output.files:
                    unit.inputdata.names.append(
                        os.path.join(parent.copy_output.local_location, f))

        return unit
Example #4
0
    def createUnits(self):
        """Create new units if required given the inputdata"""

        # call parent for chaining
        super(AtlasTransform, self).createUnits()

        # if there is no input data, just create a single unit given the application
        if len(self.inputdata) == 0 and len(self.units) == 0:
            unit = AtlasUnit()
            unit.name = "Unit %d" % len(self.units)
            self.addUnitToTRF(unit)

        # loop over input data and see if we need to create any more units
        for inds in self.inputdata:

            ok = True

            if inds._name == "DQ2Dataset":
                # check if this data is being run over
                ok = False
                for unit in self.units:
                    if unit.inputdata.dataset == inds.dataset:
                        ok = True

                if not ok:
                    # new unit required for this dataset
                    unit = AtlasUnit()
                    unit.name = "Unit %d" % len(self.units)
                    self.addUnitToTRF(unit)
                    unit.inputdata = inds

            elif inds._name == "ATLASLocalDataset":

                # different behaviour depending on files_per_unit
                if self.files_per_unit < 0:
                    # check if this data is being run over
                    ok = False
                    for unit in self.units:
                        if set(unit.inputdata.names) == set(inds.names):
                            ok = True

                    if not ok:
                        # new unit required for this dataset
                        unit = AtlasUnit()
                        unit.name = "Unit %d" % len(self.units)
                        self.addUnitToTRF(unit)
                        unit.inputdata = inds

                else:

                    ok = False
                    curr_data = []
                    for unit in self.units:
                        curr_data.extend(unit.inputdata.names)

                    if set(inds.names) in set(curr_data) or set(
                            inds.names) == set(curr_data):
                        ok = True

                    if not ok:
                        # new unit(s) required for this dataset
                        num = 0
                        while num < len(inds.names):
                            unit = AtlasUnit()
                            unit.name = "Unit %d" % len(self.units)
                            self.addUnitToTRF(unit)
                            unit.inputdata = inds.clone()
                            unit.inputdata.names = inds.names[num:num + self.
                                                              files_per_unit]
                            num += self.files_per_unit
Example #5
0
   def createChainUnit( self, parent_units, use_copy_output = True ):
      """Create an output unit given this output data"""
      
      # we need valid parent jobs
      for parent in parent_units:
         # need datasetname filled for Panda jobs
         if len(parent.active_job_ids) == 0 or \
                (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \
                 GPI.jobs(parent.active_job_ids[0]).outputdata and \
                 GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Panda" and \
                 GPI.jobs(parent.active_job_ids[0]).outputdata.datasetname == ""):
            return None

         # need datasetList filled for Jedi jobs
         if len(parent.active_job_ids) == 0 or \
                (GPI.jobs(parent.active_job_ids[0]).application._impl._name != "TagPrepare" and \
                 GPI.jobs(parent.active_job_ids[0]).outputdata and \
                 GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Jedi" and \
                 len(GPI.jobs(parent.active_job_ids[0]).outputdata.datasetList) == 0):
            return None

         # for local jobs, make sure units are complete
         if GPI.jobs(parent_units[0].active_job_ids[0]).outputdata._impl._name == "ATLASOutputDataset" and \
                parent.status != "completed":
            return None
                

      # Are we doing Local -> Local? i.e. are we going from ATLASOutputDataset?
      # Problem: Doesn't take into account merger locations...
      if GPI.jobs(parent_units[0].active_job_ids[0]).outputdata._impl._name == "ATLASOutputDataset":
         unit = AtlasUnit()
         unit.inputdata = ATLASLocalDataset()

         for parent in parent_units:
            for l in GPI.jobs(parent.active_job_ids[0]).outputdata.output:
               unit.inputdata.names += l
            
      # should we use the copy_output (ie. local output). Special case for TagPrepare
      elif GPI.jobs(parent_units[0].active_job_ids[0]).application._impl._name == "TagPrepare":
         
         # make sure all have completed before taking the tag-info
         if parent_units[0].status != "completed":
            return None
         
         unit = AtlasUnit()
         unit.inputdata = DQ2Dataset()
         unit.inputdata.tag_info = GPI.jobs(parent_units[0].active_job_ids[0]).application.tag_info
         
      elif not use_copy_output or not parent.copy_output:
         unit = AtlasUnit()
         unit.inputdata = DQ2Dataset()
         ds_list = []
         for parent in parent_units:
            
            # Don't just use the main datasetname as Jedi introduces separate containers for logs and output files
            if GPI.jobs(parent.active_job_ids[0]).backend._impl._name == "Jedi":
               for ds in GPI.jobs(parent.active_job_ids[0]).outputdata.datasetList:
                  if not ds.endswith(".log/"):
                     unit.inputdata.dataset.append( ds )
            else:
               unit.inputdata.dataset.append( GPI.jobs(parent.active_job_ids[0]).outputdata.datasetname )
         
      else:

         unit = AtlasUnit()
         unit.inputdata = ATLASLocalDataset()

         for parent in parent_units:
            # unit needs to have completed and downloaded
            if parent.status != "completed":
               return None

            # we should be OK so copy all output to an ATLASLocalDataset
            for f in parent.copy_output.files:
               unit.inputdata.names.append( os.path.join( parent.copy_output.local_location, f ) )
         
      return unit
Example #6
0
   def createUnits(self):
      """Create new units if required given the inputdata"""
      
      # call parent for chaining
      super(AtlasTransform,self).createUnits()
      
      # if there is no input data, just create a single unit given the application
      if len(self.inputdata) == 0 and len(self.units) == 0:
         unit = AtlasUnit()
         unit.name = "Unit %d" % len(self.units)
         self.addUnitToTRF( unit )
      
      # loop over input data and see if we need to create any more units
      for inds in self.inputdata:
         
         ok = True

         if inds._name == "DQ2Dataset":
            # check if this data is being run over
            ok = False
            for unit in self.units:
               if unit.inputdata.dataset == inds.dataset:
                  ok = True

            if not ok:
               # new unit required for this dataset
               unit = AtlasUnit()
               unit.name = "Unit %d" % len(self.units)
               self.addUnitToTRF( unit )
               unit.inputdata = inds

         elif inds._name == "ATLASLocalDataset":

            # different behaviour depending on files_per_unit
            if self.files_per_unit < 0:
               # check if this data is being run over
               ok = False
               for unit in self.units:
                  if set(unit.inputdata.names) == set(inds.names):
                     ok = True
                     
               if not ok:
                  # new unit required for this dataset
                  unit = AtlasUnit()
                  unit.name = "Unit %d" % len(self.units)
                  self.addUnitToTRF( unit )
                  unit.inputdata = inds
            
            else:

               ok = False
               curr_data = []
               for unit in self.units:
                  curr_data.extend( unit.inputdata.names )

               if set(inds.names) in set( curr_data ) or set(inds.names) == set( curr_data ):
                  ok = True

               if not ok:
                  # new unit(s) required for this dataset
                  num = 0
                  while num < len( inds.names ):
                     unit = AtlasUnit()
                     unit.name = "Unit %d" % len(self.units)
                     self.addUnitToTRF( unit )
                     unit.inputdata = inds.clone()
                     unit.inputdata.names = inds.names[num:num + self.files_per_unit]
                     num += self.files_per_unit