def _gen_helper_dict(filtered_inputs): ''' Create a dict of values for the downloaded files. This is similar to the variables created when running a bash app. ''' file_key_descs, _ignore = file_load_utils.analyze_bash_vars( file_load_utils.get_input_json_file(), None) flattened_dict = {} def add_if_no_collision(key, value, dict_): if key not in dict_: dict_[key] = value for input_ in filtered_inputs: if input_ not in file_key_descs: continue input_var_dict = file_key_descs[input_] add_if_no_collision(input_ + '_path', input_var_dict["path"], flattened_dict) add_if_no_collision(input_ + '_name', input_var_dict["basename"], flattened_dict) add_if_no_collision(input_ + '_prefix', input_var_dict["prefix"], flattened_dict) return flattened_dict
def download_all_inputs(exclude=None, parallel=False, max_threads=None): ''' :param exclude: List of input variables that should not be downloaded. :type exclude: Array of strings :param parallel: Should we download multiple files in parallel? (default: False) :type filename: boolean :param max_threads: If parallel is True, how many threads should be used to download files? (default: number of cores) :type append: int :returns: dict of lists of strings where each key is the input variable and each list element is the full path to the file that has been downloaded. This function downloads all files that were supplied as inputs to the app. By convention, if an input parameter "FOO" has value {"$dnanexus_link": "file-xxxx"} and filename INPUT.TXT, then the linked file will be downloaded into the path: $HOME/in/FOO/INPUT.TXT If an input is an array of files, then all files will be placed into numbered subdirectories under a parent directory named for the input. For example, if the input key is FOO, and the inputs are {A, B, C}.vcf then, the directory structure will be: $HOME/in/FOO/0/A.vcf 1/B.vcf 2/C.vcf Zero padding is used to ensure argument order. For example, if there are 12 input files {A, B, C, D, E, F, G, H, I, J, K, L}.txt, the directory structure will be: $HOME/in/FOO/00/A.vcf ... 11/L.vcf This allows using shell globbing (FOO/*/*.vcf) to get all the files in the input order and prevents issues with files which have the same filename.''' # Input directory, where all inputs are downloaded idir = file_load_utils.get_input_dir() try: job_input_file = file_load_utils.get_input_json_file() dirs, inputs, rest = file_load_utils.get_job_input_filenames(job_input_file) except IOError: msg = 'Error: Could not find the input json file: {0}.\n'.format(job_input_file) msg += ' This function should only be called from within a running job.' print(msg) raise # Exclude directories dirs_to_create = [] for d in dirs: if (exclude is None) or (d not in exclude): dirs_to_create.append(d) # Create the directory structure, in preparation for download. # Allows performing the download in parallel. _create_dirs(idir, dirs_to_create) # Remove excluded inputs if exclude: inputs = file_load_utils.filter_dict(inputs, exclude) # Convert to a flat list of elements to download to_download = [] for ival_list in inputs.values(): to_download.extend(ival_list) # Download the files if parallel: max_num_parallel_downloads = max_threads or multiprocessing.cpu_count() _parallel_file_download(to_download, idir, max_num_parallel_downloads) else: _sequential_file_download(to_download, idir) helper_vars = _gen_helper_dict(inputs) return helper_vars
def mount_all_inputs(exclude=None, verbose=False): ''' :param exclude: List of input variables that should not be mounted. :type exclude: Array of strings :returns: dict of lists of strings where each key is the input variable and each list element is the full path to the file that has been mounted. :param verbose: Start dxfuse with '-verbose 2' logging :type verbose: boolean This function mounts all files that were supplied as inputs to the app. By convention, if an input parameter "FOO" has value {"$dnanexus_link": "file-xxxx"} and filename INPUT.TXT, then the linked file will be mounted into the path: $HOME/in/FOO/INPUT.TXT If an input is an array of files, then all files will be placed into numbered subdirectories under a parent directory named for the input. For example, if the input key is FOO, and the inputs are {A, B, C}.vcf then, the directory structure will be: $HOME/in/FOO/0/A.vcf 1/B.vcf 2/C.vcf Zero padding is used to ensure argument order. For example, if there are 12 input files {A, B, C, D, E, F, G, H, I, J, K, L}.txt, the directory structure will be: $HOME/in/FOO/00/A.vcf ... 11/L.vcf This allows using shell globbing (FOO/*/*.vcf) to get all the files in the input order and prevents issues with files which have the same filename.''' print("Mounting inputs...") home_dir = os.environ["HOME"] mount_dir = os.path.join(home_dir, "in") mount_manifest_file = os.path.join(home_dir, "mount-manifest.json") dxfuse_cmd = _which("dxfuse") if dxfuse_cmd is None: err_exit("dxfuse is not installed on this system") subprocess.check_output(["mkdir", mount_dir]) try: job_input_file = file_load_utils.get_input_json_file() dirs, inputs, rest = file_load_utils.get_job_input_filenames(job_input_file) except IOError: msg = 'Error: Could not find the input json file: {0}.\n'.format(job_input_file) msg += ' This function should only be called from within a running job.' print(msg) raise # Remove excluded inputs if exclude: inputs = file_load_utils.filter_dict(inputs, exclude) # Convert to a flat list of elements to mount to_mount = [] for ival_list in inputs.values(): to_mount.extend(ival_list) files_manifest = _build_mount_manifest(to_mount) with open(mount_manifest_file, 'w') as mfile: json.dump(files_manifest, mfile) dxfuse_version = subprocess.check_output([dxfuse_cmd, "-version"]) print("Using dxfuse version " + str(dxfuse_version)) uid = str(int(subprocess.check_output(["id", "-u"]))) gid = str(int(subprocess.check_output(["id", "-g"]))) cmd = [dxfuse_cmd, "-uid", uid, "-gid", gid, mount_dir, mount_manifest_file] if verbose: cmd[1:1] = ["-verbose", "2"] print(subprocess.check_output(cmd)) print("Done mounting inputs.") subprocess.call(["find", mount_dir, "-name", "*"]) helper_vars = _gen_helper_dict(inputs) return helper_vars