コード例 #1
0
def _gen_helper_dict(filtered_inputs):
    '''
    Create a dict of values for the downloaded files. This is similar to the variables created
    when running a bash app.
    '''

    file_key_descs, _ignore = file_load_utils.analyze_bash_vars(
        file_load_utils.get_input_json_file(), None)

    flattened_dict = {}

    def add_if_no_collision(key, value, dict_):
        if key not in dict_:
            dict_[key] = value

    for input_ in filtered_inputs:
        if input_ not in file_key_descs:
            continue
        input_var_dict = file_key_descs[input_]
        add_if_no_collision(input_ + '_path', input_var_dict["path"],
                            flattened_dict)
        add_if_no_collision(input_ + '_name', input_var_dict["basename"],
                            flattened_dict)
        add_if_no_collision(input_ + '_prefix', input_var_dict["prefix"],
                            flattened_dict)

    return flattened_dict
コード例 #2
0
def _gen_helper_dict(filtered_inputs):
    '''
    Create a dict of values for the downloaded files. This is similar to the variables created
    when running a bash app.
    '''

    file_key_descs, _ignore = file_load_utils.analyze_bash_vars(
        file_load_utils.get_input_json_file(), None)

    flattened_dict = {}

    def add_if_no_collision(key, value, dict_):
        if key not in dict_:
            dict_[key] = value

    for input_ in filtered_inputs:
        if input_ not in file_key_descs:
            continue
        input_var_dict = file_key_descs[input_]
        add_if_no_collision(input_ + '_path', input_var_dict["path"], flattened_dict)
        add_if_no_collision(input_ + '_name', input_var_dict["basename"], flattened_dict)
        add_if_no_collision(input_ + '_prefix', input_var_dict["prefix"], flattened_dict)

    return flattened_dict
コード例 #3
0
def download_all_inputs(exclude=None, parallel=False, max_threads=None):
    '''
    :param exclude: List of input variables that should not be downloaded.
    :type exclude: Array of strings
    :param parallel: Should we download multiple files in parallel? (default: False)
    :type filename: boolean
    :param max_threads: If parallel is True, how many threads should be used
        to download files? (default: number of cores)
    :type append: int
    :returns: dict of lists of strings where each key is the input variable
                and each list element is the full path to the file that has
                been downloaded.

    This function downloads all files that were supplied as inputs to the app.
    By convention, if an input parameter "FOO" has value

        {"$dnanexus_link": "file-xxxx"}

    and filename INPUT.TXT, then the linked file will be downloaded into the
    path:

        $HOME/in/FOO/INPUT.TXT

    If an input is an array of files, then all files will be placed into
    numbered subdirectories under a parent directory named for the
    input. For example, if the input key is FOO, and the inputs are {A, B,
    C}.vcf then, the directory structure will be:

        $HOME/in/FOO/0/A.vcf
                     1/B.vcf
                     2/C.vcf

    Zero padding is used to ensure argument order. For example, if there are
    12 input files {A, B, C, D, E, F, G, H, I, J, K, L}.txt, the directory
    structure will be:

        $HOME/in/FOO/00/A.vcf
                     ...
                     11/L.vcf

    This allows using shell globbing (FOO/*/*.vcf) to get all the files in the input
    order and prevents issues with files which have the same filename.'''

    # Input directory, where all inputs are downloaded
    idir = file_load_utils.get_input_dir()
    try:
        job_input_file = file_load_utils.get_input_json_file()
        dirs, inputs, rest = file_load_utils.get_job_input_filenames(job_input_file)
    except IOError:
        msg = 'Error: Could not find the input json file: {0}.\n'.format(job_input_file)
        msg += '       This function should only be called from within a running job.'
        print(msg)
        raise

    # Exclude directories
    dirs_to_create = []
    for d in dirs:
        if (exclude is None) or (d not in exclude):
            dirs_to_create.append(d)

    # Create the directory structure, in preparation for download.
    # Allows performing the download in parallel.
    _create_dirs(idir, dirs_to_create)

    # Remove excluded inputs
    if exclude:
        inputs = file_load_utils.filter_dict(inputs, exclude)

    # Convert to a flat list of elements to download
    to_download = []
    for ival_list in inputs.values():
        to_download.extend(ival_list)

    # Download the files
    if parallel:
        max_num_parallel_downloads = max_threads or multiprocessing.cpu_count()
        _parallel_file_download(to_download, idir, max_num_parallel_downloads)
    else:
        _sequential_file_download(to_download, idir)

    helper_vars = _gen_helper_dict(inputs)
    return helper_vars
コード例 #4
0
def mount_all_inputs(exclude=None, verbose=False):
    '''
    :param exclude: List of input variables that should not be mounted.
    :type exclude: Array of strings
    :returns: dict of lists of strings where each key is the input variable
                and each list element is the full path to the file that has
                been mounted.
    :param verbose: Start dxfuse with '-verbose 2' logging
    :type verbose: boolean


    This function mounts all files that were supplied as inputs to the app.
    By convention, if an input parameter "FOO" has value

        {"$dnanexus_link": "file-xxxx"}

    and filename INPUT.TXT, then the linked file will be mounted into the
    path:

        $HOME/in/FOO/INPUT.TXT

    If an input is an array of files, then all files will be placed into
    numbered subdirectories under a parent directory named for the
    input. For example, if the input key is FOO, and the inputs are {A, B,
    C}.vcf then, the directory structure will be:

        $HOME/in/FOO/0/A.vcf
                     1/B.vcf
                     2/C.vcf

    Zero padding is used to ensure argument order. For example, if there are
    12 input files {A, B, C, D, E, F, G, H, I, J, K, L}.txt, the directory
    structure will be:

        $HOME/in/FOO/00/A.vcf
                     ...
                     11/L.vcf

    This allows using shell globbing (FOO/*/*.vcf) to get all the files in the input
    order and prevents issues with files which have the same filename.'''

    print("Mounting inputs...")

    home_dir = os.environ["HOME"]
    mount_dir = os.path.join(home_dir, "in")
    mount_manifest_file = os.path.join(home_dir, "mount-manifest.json")
    dxfuse_cmd = _which("dxfuse")
    if dxfuse_cmd is None:
        err_exit("dxfuse is not installed on this system")

    subprocess.check_output(["mkdir", mount_dir])

    try:
        job_input_file = file_load_utils.get_input_json_file()
        dirs, inputs, rest = file_load_utils.get_job_input_filenames(job_input_file)
    except IOError:
        msg = 'Error: Could not find the input json file: {0}.\n'.format(job_input_file)
        msg += '       This function should only be called from within a running job.'
        print(msg)
        raise

    # Remove excluded inputs
    if exclude:
        inputs = file_load_utils.filter_dict(inputs, exclude)

    # Convert to a flat list of elements to mount
    to_mount = []
    for ival_list in inputs.values():
        to_mount.extend(ival_list)

    files_manifest = _build_mount_manifest(to_mount)
    with open(mount_manifest_file, 'w') as mfile:
        json.dump(files_manifest, mfile)

    dxfuse_version = subprocess.check_output([dxfuse_cmd, "-version"])
    print("Using dxfuse version " + str(dxfuse_version))

    uid = str(int(subprocess.check_output(["id", "-u"])))
    gid = str(int(subprocess.check_output(["id", "-g"])))
    cmd = [dxfuse_cmd, "-uid", uid, "-gid", gid, mount_dir, mount_manifest_file]
    if verbose:
        cmd[1:1] = ["-verbose", "2"]
    print(subprocess.check_output(cmd))

    print("Done mounting inputs.")

    subprocess.call(["find", mount_dir, "-name", "*"])

    helper_vars = _gen_helper_dict(inputs)
    return helper_vars