def _retrieve_singlefiles(job: CalcJobNode, transport: Transport, folder: SandboxFolder, retrieve_file_list: List[Tuple[str, str, str]], logger_extra: Optional[dict] = None): """Retrieve files specified through the singlefile list mechanism.""" singlefile_list = [] for (linkname, subclassname, filename) in retrieve_file_list: EXEC_LOGGER.debug('[retrieval of calc {}] Trying ' "to retrieve remote singlefile '{}'".format( job.pk, filename), extra=logger_extra) localfilename = os.path.join(folder.abspath, os.path.split(filename)[1]) transport.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append((linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [i for i in singlefile_list if os.path.exists(i[2])] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: cls = DataFactory(subclassname) singlefile = cls(file=filename) singlefile.add_incoming(job, link_type=LinkType.CREATE, link_label=linkname) singlefiles.append(singlefile) for fil in singlefiles: EXEC_LOGGER.debug( f'[retrieval of calc {job.pk}] Storing retrieved_singlefile={fil.pk}', extra=logger_extra) fil.store()
def retrieve_files_from_list( calculation: CalcJobNode, transport: Transport, folder: str, retrieve_list: List[Union[str, Tuple[str, str, int], list]]) -> None: """ Retrieve all the files in the retrieve_list from the remote into the local folder instance through the transport. The entries in the retrieve_list can be of two types: * a string * a list If it is a string, it represents the remote absolute filepath of the file. If the item is a list, the elements will correspond to the following: * remotepath * localpath * depth If the remotepath contains file patterns with wildcards, the localpath will be treated as the work directory of the folder and the depth integer determines upto what level of the original remotepath nesting the files will be copied. :param transport: the Transport instance. :param folder: an absolute path to a folder that contains the files to copy. :param retrieve_list: the list of files to retrieve. """ for item in retrieve_list: if isinstance(item, (list, tuple)): tmp_rname, tmp_lname, depth = item # if there are more than one file I do something differently if transport.has_magic(tmp_rname): remote_names = transport.glob(tmp_rname) local_names = [] for rem in remote_names: to_append = rem.split( os.path.sep)[-depth:] if depth > 0 else [] local_names.append( os.path.sep.join([tmp_lname] + to_append)) else: remote_names = [tmp_rname] to_append = tmp_rname.split( os.path.sep)[-depth:] if depth > 0 else [] local_names = [os.path.sep.join([tmp_lname] + to_append)] if depth > 1: # create directories in the folder, if needed for this_local_file in local_names: new_folder = os.path.join( folder, os.path.split(this_local_file)[0]) if not os.path.exists(new_folder): os.makedirs(new_folder) else: # it is a string if transport.has_magic(item): remote_names = transport.glob(item) local_names = [os.path.split(rem)[1] for rem in remote_names] else: remote_names = [item] local_names = [os.path.split(item)[1]] for rem, loc in zip(remote_names, local_names): transport.logger.debug( f"[retrieval of calc {calculation.pk}] Trying to retrieve remote item '{rem}'" ) transport.get(rem, os.path.join(folder, loc), ignore_nonexisting=True)