def download_from_remote(self): if self.is_remote and self.remote_object.exists(): logger.info("Downloading from remote: {}".format(self.file)) self.remote_object.download() else: raise RemoteFileException( "The file to be downloaded does not seem to exist remotely.")
def handle_remote(self, job, upload=True): """ Remove local files if they are no longer needed, and upload to S3. """ if upload: # handle output files for f in job.expanded_output: if f.is_remote: f.upload_to_remote() remote_mtime = f.mtime # immediately force local mtime to match remote, # since conversions from S3 headers are not 100% reliable # without this, newness comparisons may fail down the line f.touch(times=(remote_mtime, remote_mtime)) if not f.exists_remote: raise RemoteFileException( "The file upload was attempted, but it does not " "exist on remote. Check that your credentials have " "read AND write permissions.") if not self.keep_remote_local: # handle input files needed = lambda job_, f: any( f in files for j, files in self.depending[job_].items() if not self.finished(j) and self.needrun(j) and j != job) def unneeded_files(): putative = lambda f: f.is_remote and not f.protected and not f.should_keep_local generated_input = set() for job_, files in self.dependencies[job].items(): generated_input |= files for f in filter(putative, files): if not needed(job_, f): yield f for f in filter(putative, job.output): if not needed(job, f) and not f in self.targetfiles: for f_ in job.expand_dynamic(f): yield f for f in filter(putative, job.input): # TODO what about remote inputs that are used by multiple jobs? if f not in generated_input: yield f for f in unneeded_files(): logger.info("Removing local output file: {}".format(f)) f.remove() job.rmdir_empty_remote_dirs()