Python listFilesInDataset Examples, GangaAtlas.Lib.ATLASDataset.DQ2Dataset.dq2.listFilesInDataset Python Examples

Example #1

0

Show file

File: AtlasUnit.py Project: MannyMoo/ganga

   def checkCompleted(self, job):
      """Check if this unit is complete"""
      if job.status == "completed":
         if job.outputdata and job.outputdata._impl._name == "DQ2OutputDataset" and job.backend.__class__.__name__ != "Jedi":

            # make sure all datasets are complete
            if job.backend.requirements.enableMerge:

               # get container list
               cont_list = self.getContainerList()

               for mj in job.backend.mergejobs:
                  if mj.status != "finished":
                     # merge jobs failed - reset the unit for the moment
                     logger.error("Merge jobs failed. Resetting unit...")
                     self._getParent().resetUnit(self.getID())
                     return False

               for cont in cont_list:
                  dq2_list = dq2.listFilesInDataset(cont)
                  for guid in dq2_list[0].keys():                  
                     if dq2_list[0][guid]['lfn'].find("merge") == -1:
                        logger.warning("Merged files not transferred to out DS by Panda yet. Waiting...")
                        return False

         return True
      else:
         return False

Example #2

0

Show file

   def checkCompleted(self, job):
      """Check if this unit is complete"""
      if job.status == "completed":
         if job.outputdata and job.outputdata._impl._name == "DQ2OutputDataset" and job.backend.__class__.__name__ != "Jedi":

            # make sure all datasets are complete
            if job.backend.requirements.enableMerge:

               # get container list
               cont_list = self.getContainerList()

               for mj in job.backend.mergejobs:
                  if mj.status != "finished":
                     # merge jobs failed - reset the unit for the moment
                     logger.error("Merge jobs failed. Resetting unit...")
                     self._getParent().resetUnit(self.getID())
                     return False

               for cont in cont_list:
                  dq2_list = dq2.listFilesInDataset(cont)
                  for guid in dq2_list[0].keys():                  
                     if dq2_list[0][guid]['lfn'].find("merge") == -1:
                        logger.warning("Merged files not transferred to out DS by Panda yet. Waiting...")
                        return False

         return True
      else:
         return False

Example #3

0

Show file

File: AtlasUnit.py Project: MannyMoo/ganga

   def copyOutput(self):
      """Copy the output data to local storage"""

      job = GPI.jobs(self.active_job_ids[0])
      
      if self.copy_output._name != "TaskLocalCopy" or job.outputdata._impl._name != "DQ2OutputDataset":
         logger.error("Cannot transfer from DS type '%s' to '%s'. Please contact plugin developer." % (job.outputdata._name, self.copy_output._name))
         return False

      # get list of output files
      self._acquireDownloadLock()
      dq2_list = []
      if len(self.output_file_list) == 0:
         for ds in self.getOutputDatasetList():
            dq2_list = dq2.listFilesInDataset(ds)
            
            # merge job DSs leave empty non-merged DSs around
            if job.backend.__class__.__name__ == "Panda" and job.backend.requirements.enableMerge and not ds.endswith("merge") and len(dq2_list) == 0:
               continue

            for guid in dq2_list[0].keys():
               self.output_file_list[ dq2_list[0][guid]['lfn'] ] = ds
         
      # check which ones still need downloading
      to_download = {}
      for f in self.output_file_list.keys():
         
         # check for REs
         if self.copy_output.isValid(f) and not self.copy_output.isDownloaded(f):            
            to_download[ f ] = self.output_file_list[f]

      # store download location in case it's changed while downloading
      download_loc = self.copy_output.local_location
      self._releaseDownloadLock()

      # is everything downloaded?
      if len(to_download.keys()) == 0:
         return True

      # nope, so pick the requested number and off we go
      thread_array = []
      for fname in to_download.keys()[:self._getParent().num_dq2_threads]:
         dsname = to_download[fname]
         exe = 'dq2-get -L ROAMING -a -d -H %s -f %s %s' % (download_loc, fname, dsname)
         logger.info("Downloading '%s' to %s..." % (fname, download_loc))

         thread = Download.download_dq2(exe)
         thread.start()
         thread_array.append(thread)

      for t in thread_array:
         t.join()

      self._acquireDownloadLock()
      
      # check for valid download - SHOULD REALLY BE A HASH CHECK
      for fname in to_download.keys()[:self._getParent().num_dq2_threads]:
         full_path = os.path.join(self.copy_output.local_location, fname)
         if not os.path.exists(full_path):
            logger.error("Error downloading '%s'. File doesn't exist after download." % full_path)
         elif os.path.getsize( full_path ) < 4:
            logger.error("Error downloading '%s'. File size smaller than 4 bytes (%d)" % (full_path, os.path.getsize( full_path ) ))
         else:
            self.copy_output.files.append(fname)
            logger.info("File '%s' downloaded successfully" % full_path)

      self._releaseDownloadLock()

      return False

Example #4

0

Show file

   def copyOutput(self):
      """Copy the output data to local storage"""

      job = GPI.jobs(self.active_job_ids[0])
      
      if self.copy_output._name != "TaskLocalCopy" or job.outputdata._impl._name != "DQ2OutputDataset":
         logger.error("Cannot transfer from DS type '%s' to '%s'. Please contact plugin developer." % (job.outputdata._name, self.copy_output._name))
         return False

      # get list of output files
      self._acquireDownloadLock()
      dq2_list = []
      if len(self.output_file_list) == 0:
         for ds in self.getOutputDatasetList():
            dq2_list = dq2.listFilesInDataset(ds)
            
            # merge job DSs leave empty non-merged DSs around
            if job.backend.__class__.__name__ == "Panda" and job.backend.requirements.enableMerge and not ds.endswith("merge") and len(dq2_list) == 0:
               continue

            for guid in dq2_list[0].keys():
               self.output_file_list[ dq2_list[0][guid]['lfn'] ] = ds
         
      # check which ones still need downloading
      to_download = {}
      for f in self.output_file_list.keys():
         
         # check for REs
         if self.copy_output.isValid(f) and not self.copy_output.isDownloaded(f):            
            to_download[ f ] = self.output_file_list[f]

      # store download location in case it's changed while downloading
      download_loc = self.copy_output.local_location
      self._releaseDownloadLock()

      # is everything downloaded?
      if len(to_download.keys()) == 0:
         return True

      # nope, so pick the requested number and off we go
      thread_array = []
      for fname in to_download.keys()[:self._getParent().num_dq2_threads]:
         dsname = to_download[fname]
         exe = 'dq2-get -L ROAMING -a -d -H %s -f %s %s' % (download_loc, fname, dsname)
         logger.info("Downloading '%s' to %s..." % (fname, download_loc))

         thread = Download.download_dq2(exe)
         thread.start()
         thread_array.append(thread)

      for t in thread_array:
         t.join()

      self._acquireDownloadLock()
      
      # check for valid download - SHOULD REALLY BE A HASH CHECK
      for fname in to_download.keys()[:self._getParent().num_dq2_threads]:
         full_path = os.path.join(self.copy_output.local_location, fname)
         if not os.path.exists(full_path):
            logger.error("Error downloading '%s'. File doesn't exist after download." % full_path)
         elif os.path.getsize( full_path ) < 4:
            logger.error("Error downloading '%s'. File size smaller than 4 bytes (%d)" % (full_path, os.path.getsize( full_path ) ))
         else:
            self.copy_output.files.append(fname)
            logger.info("File '%s' downloaded successfully" % full_path)

      self._releaseDownloadLock()

      return False