Esempio n. 1
0
    def test_find_database_index_index(self):
        """
        Test the find database index function with an index as input
        """

        db_index = utilities.find_database_index(cfg.bowtie2_db_index,
                                                 "bowtie2")

        self.assertEqual(db_index, cfg.bowtie2_db_index)
Esempio n. 2
0
def update_configuration(args):
    """ Update the run settings based on the arguments provided """

    # get the full path for the output directory
    args.output_dir = os.path.abspath(args.output_dir)

    # set if temp output should be removed
    args.remove_temp_output = not args.store_temp_output

    # if intermediate output should be removed, then also remove temp output
    if args.remove_intermediate_output:
        args.remove_temp_output = True

    # check the input files are non-empty and readable
    args.input = []
    if (args.input1 and args.input2):
        args.input.append(os.path.abspath(args.input1))
        args.input.append(os.path.abspath(args.input2))
    if (args.unpaired):
        args.input.append(os.path.abspath(args.unpaired))
    utilities.is_file_readable(args.input[0], exit_on_error=True)
    if len(args.input) == 2:
        utilities.is_file_readable(args.input[1], exit_on_error=True)
    elif len(args.input) > 2:
        sys.exit("ERROR: Please provide at most 2 input files.")
    elif len(args.input) == 0:
        sys.exit(
            "ERROR: Please provide --input1/--input2 or --unpaired (input) files."
        )

    #Store original file paths for FASTQC
    for input in args.input:
        original_input_files.append(input)

    # create the output directory if needed
    utilities.create_directory(args.output_dir)

    # set bowtie2 options
    if args.bowtie2_options:
        # parse the options from the user into any array of options
        args.bowtie2_options = utilities.format_options_to_list(
            args.bowtie2_options)
    else:
        # if not set by user, then set to default options
        args.bowtie2_options = config.bowtie2_options

    # add the quality scores to the bowtie2 options
    args.bowtie2_options += [
        config.bowtie2_flag_start + args.trimmomatic_quality_scores
    ]

    # set the mode for single end input file
    if len(args.input) == 1:
        args.decontaminate_pairs = "unpaired"

    # set the bowtie2 mode based on the pairs input
    args.discordant = False
    if args.decontaminate_pairs != "lenient":
        args.discordant = True

    # update the quality score option into a flag for trimmomatic
    args.trimmomatic_quality_scores = config.trimmomatic_flag_start + args.trimmomatic_quality_scores

    # find the location of trimmomatic, trimmomatic does not need to be executable
    if not args.bypass_trim:
        args.trimmomatic_path = utilities.find_dependency(
            args.trimmomatic_path,
            config.trimmomatic_jar,
            "trimmomatic",
            "--trimmomatic",
            bypass_permissions_check=True)

    # find the location of bmtagger, if set to run
    if args.reference_db:
        if args.bmtagger:
            args.bmtagger_path = utilities.find_dependency(
                args.bmtagger_path,
                config.bmtagger_exe,
                "bmtagger",
                "--bmtagger",
                bypass_permissions_check=False)
            # add this folder to path, so as to be able to find other dependencies like bmfilter
            utilities.add_exe_to_path(os.path.dirname(args.bmtagger_path))
        else:
            # find the location of bowtie2, if not running with bmtagger
            args.bowtie2_path = utilities.find_dependency(
                args.bowtie2_path,
                config.bowtie2_exe,
                "bowtie2",
                "--bowtie2",
                bypass_permissions_check=False)

    # find the location of trf, if set to run
    if not args.bypass_trf:
        args.trf_path = utilities.find_dependency(
            args.trf_path,
            config.trf_exe,
            "trf",
            "--trf",
            bypass_permissions_check=False)

    # if fastqc is set to be run, check if the executable can be found
    if args.fastqc_start or args.fastqc_end:
        args.fastqc_path = utilities.find_dependency(
            args.fastqc_path,
            config.fastqc_exe,
            "fastqc",
            "--fastqc",
            bypass_permissions_check=False)

    # set the default output prefix
    if args.output_prefix == None:
        if args.input[0].endswith(".gz") or args.input[0].endswith(".bz2"):
            # remove compression extension if present
            infile_base = os.path.splitext(
                os.path.splitext(os.path.basename(args.input[0]))[0])[0]
        else:
            infile_base = os.path.splitext(os.path.basename(args.input[0]))[0]
        args.output_prefix = infile_base + "_kneaddata"

    # find the bowtie2 indexes for each of the reference databases
    # reference database inputs can be directories, indexes, or index files
    if args.reference_db:
        reference_indexes = []
        database_type = "bowtie2"
        if args.bmtagger:
            database_type = "bmtagger"
        for directory in args.reference_db:
            reference_indexes.append(
                utilities.find_database_index(os.path.abspath(directory),
                                              database_type))

        args.reference_db = reference_indexes

    return args