Esempio n. 1
0
   def createNewJob(self):
      """Create any jobs required for this unit"""      
      j = GPI.Job()
      j._impl.backend = self._getParent().backend.clone()
      j._impl.application = self._getParent().application.clone()
      if self.inputdata:
         j.inputdata = self.inputdata.clone()

      trf = self._getParent()
      task = trf._getParent()
      if trf.outputdata:
         j.outputdata = trf.outputdata.clone()
      elif j.inputdata and j.inputdata._impl._name == "ATLASLocalDataset" and j.application._impl._name != "TagPrepare":
         j.outputdata = GPI.ATLASOutputDataset()
      elif j.application._impl._name != "TagPrepare":
         j.outputdata = GPI.DQ2OutputDataset()

      # check for ds name specified and length
      if j.outputdata and j.outputdata._impl._name == "DQ2OutputDataset":
         max_length = configDQ2['OUTPUTDATASET_NAMELENGTH'] - 11

         # merge names need to be shorter
         if (j.backend._impl._name == "Panda" or j.backend._impl._name == "Jedi"):
            if j.backend.requirements.enableMerge:
               max_length -= 12

            if j.backend._impl._name == "Jedi":
               # go over the outputdata and check for output names that Jedi appends to the outDS name
               tmp_len_chg = 8
               for o in j.outputdata.outputdata:
                  if (len(o)+1) > tmp_len_chg:
                     tmp_len_chg = len(o)+1

               max_length -= tmp_len_chg

            elif j.backend.individualOutDS:
               max_length -= 8

         if j.outputdata.datasetname != "":
            dsn = [j.outputdata.datasetname, "j%i.t%i.trf%i.u%i" %
                   (j.id, task.id, trf.getID(), self.getID())]

            if len(".".join(dsn)) > max_length:
               dsn = [j.outputdata.datasetname[: - (len(".".join(dsn)) - max_length)], "j%i.t%i.trf%i.u%i" %
                      (j.id, task.id, trf.getID(), self.getID())]
         else:
            dsn = [trf.getContainerName()[:-1], self.name, "j%i.t%i.trf%i.u%i" %
                   (j.id, task.id, trf.getID(), self.getID())]

            if len(".".join(dsn)) > max_length:
               dsn2 = [trf.getContainerName(2 * max_length / 3)[:-1], "", "j%i.t%i.trf%i.u%i" % (j.id, task.id, trf.getID(), self.getID())]
               dsn = [trf.getContainerName(2 * max_length / 3)[:-1], self.name[: - (len(".".join(dsn2)) - max_length)], "j%i.t%i.trf%i.u%i" %
                      (j.id, task.id, trf.getID(), self.getID())]
            
         j.outputdata.datasetname = '.'.join(dsn).replace(":", "_").replace(" ", "").replace(",","_")
                           
      j.inputsandbox = self._getParent().inputsandbox
      j.outputsandbox = self._getParent().outputsandbox

      # check for splitter - TagPrepare and Jedi don't user splitters
      if j.application._impl._name == "TagPrepare":
         return j
      
      if j.backend._impl._name == "Jedi":
         if trf.files_per_job > 0:
            j.backend.requirements.nFilesPerJob = trf.files_per_job
         elif trf.MB_per_job > 0:
            j.backend.requirements.nGBPerJob = trf.MB_per_job / 1000

         return j

      if not trf.splitter:
         # provide a default number of files if there's nothing else given
         nfiles = trf.files_per_job
         if nfiles < 1:
            nfiles = 5

         if j.inputdata._impl._name == "ATLASLocalDataset":
            j.splitter = AthenaSplitterJob()
            if trf.subjobs_per_unit > 0:
               j.splitter.numsubjobs = trf.subjobs_per_unit
            else:
               import math 
               j.splitter.numsubjobs = int( math.ceil( len(j.inputdata.names) / float(nfiles) ) )
         else:
            j.splitter = DQ2JobSplitter()
            if trf.MB_per_job > 0:
               j.splitter.filesize = trf.MB_per_job
            elif trf.subjobs_per_unit > 0:
               j.splitter.numsubjobs = trf.subjobs_per_unit
            else:
               j.splitter.numfiles = nfiles
      else:
         j.splitter = trf.splitter.clone()

      # postprocessors
      if len(self._getParent().postprocessors.process_objects) > 0:
         import copy
         j.postprocessors = copy.deepcopy( addProxy(self._getParent()).postprocessors )
         
      return j