def downloadSmallFiles(self, jobs): ''' This method is for jobs which should be killed and resubmitted. An attempt is made to download heartbeat json but it is fine to fail as the job may still be running. ''' for job in jobs: if not 'JobID' in job or not job['JobID']: continue jobid = job['JobID'] sessionid = jobid[jobid.rfind('/'):] localdir = self.tmpdir + sessionid try: os.makedirs(localdir, 0o755) except: pass source = aCTUtils.DataPoint(str(jobid + '/heartbeat.json'), self.uc) dest = aCTUtils.DataPoint(str(localdir + '/heartbeat.json'), self.uc) dm = arc.DataMover() status = dm.Transfer(source.h, dest.h, arc.FileCache(), arc.URLMap()) if not status: self.log.debug('%s: Failed to download %s: %s' % (job['pandaid'], source.h.GetURL().str(), str(status)))
def __arc_copy(self, src, dest, space_token=None): # TODO set proxy path # Convert the arguments to DataPoint objects source = DataPoint(str(src), self.cfg) if source.h is None: raise ServiceUnavailable("Can't handle source %s" % src) destination = DataPoint(str(dest), self.cfg) if destination.h is None: raise ServiceUnavailable("Can't handle destination %s" % dest) if space_token: destination.h.GetURL().AddOption('spacetoken', space_token) # DataMover does the transfer mover = arc.DataMover() # Don't attempt to retry on error mover.retry(False) # Passive and insecure gridftp mover.passive(True) mover.secure(False) # Do the transfer status = mover.Transfer(source.h, destination.h, arc.FileCache(), arc.URLMap()) if not status: if status.GetErrno() == errno.ENOENT: raise SourceNotFound() if status.GetErrno() == errno.EEXIST: raise FileAlreadyExists() raise ServiceUnavailable(str(status))
def _copy_file(self, source, destination, usercfg, log): '''Copy a file from source to destination''' log.info('Copying {0} to {1}'.format(source, destination)) source_datapoint = arc_utils.DataPoint(str(source), usercfg) destination_datapoint = arc_utils.DataPoint(str(destination), usercfg) dm = arc.DataMover() dm.retry(False) dm.passive(True) dm.secure(False) status = dm.Transfer(source_datapoint.h, destination_datapoint.h, arc.FileCache(), arc.URLMap()) return status
def _download_outputs(self, files, logdir, jobid, pandaid, userconfig, log): '''Download the output files specified in downloadfiles''' # construct datapoint object, initialising connection. Use the same # object until base URL changes. TODO group by base URL. datapoint = arc_utils.DataPoint(str(jobid), userconfig) dp = datapoint.h dm = arc.DataMover() dm.retry(False) dm.passive(True) dm.secure(False) fetched = [] notfetched = [] notfetchedretry = [] # create required local log dirs try: os.makedirs(logdir, 0755) except OSError as e: if e.errno != errno.EEXIST or not os.path.isdir(logdir): log.warning('Failed to create directory {0}: {1}'.format( logdir, os.strerror(e.errno))) notfetched.append(jobid) return (fetched, notfetched, notfetchedretry) tmpdldir = os.path.join(self.tmpdir, pandaid) try: os.makedirs(tmpdldir, 0755) except OSError as e: if e.errno != errno.EEXIST or not os.path.isdir(tmpdldir): log.warning('Failed to create directory {0}: {1}'.format( tmpdldir, os.strerror(e.errno))) notfetched.append(jobid) return (fetched, notfetched, notfetchedretry) filelist = files.split(';') if re.search(r'[\*\[\]\?]', files): # found wildcard, need to get sessiondir list remotefiles = self.listUrlRecursive(jobid, log) expandedfiles = [] for wcf in filelist: if re.search(r'[\*\[\]\?]', wcf): # only match wildcards in matching dirs expandedfiles += [ rf for rf in remotefiles if fnmatch.fnmatch(rf, wcf) and os.path.dirname(rf) == os.path.dirname(wcf) ] else: expandedfiles.append(wcf) # remove duplicates from wildcard matching through set filelist = list(set(expandedfiles)) for f in filelist: if f == 'gmlog/errors': localfile = os.path.join(logdir, '%s.log' % pandaid) elif f.find('.log') != -1: localfile = os.path.join(logdir, '%s.out' % pandaid) else: localfile = os.path.join(tmpdldir, f) remotefile = arc.URL(str(jobid + '/' + f)) dp.SetURL(remotefile) localdp = arc_utils.DataPoint(str(localfile), userconfig) # do the copy status = dm.Transfer(dp, localdp.h, arc.FileCache(), arc.URLMap()) if not status and str(status).find( 'File unavailable' ) == -1: # tmp fix for globus error which is always retried if status.Retryable(): log.warning( 'Failed to download but will retry {0}: {1}'.format( dp.GetURL().str(), str(status))) notfetchedretry.append(jobid) else: log.error( 'Failed to download with permanent failure {0}: {1}'. format(dp.GetURL().str(), str(status))) notfetched.append(jobid) else: os.chmod(localfile, 0644) log.info('Downloaded {0}'.format(dp.GetURL().str())) if jobid not in notfetched and jobid not in notfetchedretry: fetched.append(jobid) return (fetched, notfetched, notfetchedretry)
def fetchSome(self, jobs, downloadfiles): # Get specified files for the jobs in downloadfiles # jobs: id: Job object # downloadfiles: id: list of files relative to session dir, with wildcards if not jobs or not downloadfiles: return ([], [], []) # construct datapoint object, initialising connection. Use the same # object until base URL changes. TODO group by base URL. datapoint = aCTUtils.DataPoint(jobs.values()[0].JobID, self.uc) dp = datapoint.h dm = arc.DataMover() dm.retry(False) dm.passive(True) dm.secure(False) fetched = [] notfetched = [] notfetchedretry = [] for (id, job) in jobs.items(): if id not in downloadfiles: continue jobid = job.JobID # If connection URL is different reconnect if arc.URL(jobid).ConnectionURL() != dp: datapoint = aCTUtils.DataPoint(jobid, self.uc) dp = datapoint.h localdir = str(self.conf.get(['tmp', 'dir' ])) + jobid[jobid.rfind('/'):] + '/' files = downloadfiles[id].split(';') if re.search('[\*\[\]\?]', downloadfiles[id]): # found wildcard, need to get sessiondir list remotefiles = self.listUrlRecursive(jobid) expandedfiles = [] for wcf in files: if re.search('[\*\[\]\?]', wcf): # only match wildcards in matching dirs expandedfiles += [ rf for rf in remotefiles if fnmatch.fnmatch(rf, wcf) and os.path.dirname(rf) == os.path.dirname(wcf) ] else: expandedfiles.append(wcf) # remove duplicates from wildcard matching through set files = list(set(expandedfiles)) for f in files: localfile = str(localdir + f) localfiledir = localfile[:localfile.rfind('/')] # create required local dirs try: os.makedirs(localfiledir, 0755) except OSError as e: if e.errno != errno.EEXIST or not os.path.isdir( localfiledir): self.log.warning('Failed to create directory %s: %s', localfiledir, os.strerror(e.errno)) notfetched.append(jobid) break remotefile = arc.URL(str(jobid + '/' + f)) dp.SetURL(remotefile) localdp = aCTUtils.DataPoint(localfile, self.uc) # do the copy status = dm.Transfer(dp, localdp.h, arc.FileCache(), arc.URLMap()) if not status and str(status).find( 'File unavailable' ) == -1: # tmp fix for globus error which is always retried if status.Retryable(): self.log.warning( 'Failed to download but will retry %s: %s', dp.GetURL().str(), str(status)) notfetchedretry.append(jobid) else: self.log.error( 'Failed to download with permanent failure %s: %s', dp.GetURL().str(), str(status)) notfetched.append(jobid) break self.log.info('Downloaded %s', dp.GetURL().str()) if jobid not in notfetched and jobid not in notfetchedretry: fetched.append(jobid) return (fetched, notfetched, notfetchedretry)
root_logger.addDestination(stream) # Set threshold to VERBOSE or DEBUG for more information root_logger.setThreshold(arc.ERROR) # User configuration - paths to proxy certificates etc can be set here # With no arguments default values are used cfg = arc.UserConfig() # Convert the arguments to DataPoint objects source = arc.datapoint_from_url(sys.argv[1], cfg) if source is None: root_logger.msg(arc.ERROR, "Can't handle source "+sys.argv[1]) sys.exit(1) destination = arc.datapoint_from_url(sys.argv[2], cfg) if destination is None: root_logger.msg(arc.ERROR, "Can't handle destination "+sys.argv[2]) sys.exit(1) # DataMover does the transfer mover = arc.DataMover() # Show transfer progress mover.verbose(True) # Don't attempt to retry on error mover.retry(False) # Do the transfer status = mover.Transfer(source, destination, arc.FileCache(), arc.URLMap()) # Print the exit status of the transfer sys.stdout.write("%s\n"%str(status))