Beispiel #1
0
    def test_creat_commands_slf_exception(self):
        all_files = ['sample1_r1.fastq', 'sample2_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        output_dir = 'foo-boo-loo-bloop'

        with self.assertRaises(IOError):
            actual_command = create_commands_slf(all_files,
                                                 demultiplexing_method,
                                                 output_dir)[0][0][1]
Beispiel #2
0
    def test_create_commands_slf_by_sampleID(self):
        """ Properly creates commands for SampleIDs options """

        all_files = ['sample1_r1.fastq', 'sample2_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        output_dir = "sl_out"

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir)[0][0][1]

        expected_command = "split_libraries_fastq.py  -i sample1_r1.fastq,sample2_r1.fastq --sample_ids sample1,sample2 -o sl_out  --barcode_type 'not-barcoded'"

        self.assertEqual(actual_command, expected_command)
Beispiel #3
0
    def test_create_commands_slf_by_sampleID(self):
        """ Properly creates commands for SampleIDs options """

        all_files = ['sample1_r1.fastq', 'sample2_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        output_dir = "sl_out"

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir)[0][0][1]

        expected_command = "split_libraries_fastq.py  -i sample1_r1.fastq,sample2_r1.fastq --sample_ids sample1,sample2 -o sl_out  --barcode_type 'not-barcoded'"

        self.assertEqual(actual_command, expected_command)
Beispiel #4
0
    def test_create_commands_slf_mapping_barcodes(self):
        """ Properly creates commands for barcode/mapping files option """

        all_files = {'sample1_r1.fastq':('sample1_mapping.txt',
            'sample1_bc.fastq'), 'sample2_r1.fastq':('sample2_mapping.txt',
            'sample2_bc.fastq')}
        demultiplexing_method = 'mapping_barcode_files'
        output_dir = "sl_out"

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir)[0][0][1]

        expected_command = "split_libraries_fastq.py  -i sample1_r1.fastq,sample2_r1.fastq --barcode_read_fps sample1_mapping.txt,sample2_mapping.txt --mapping_fps sample1_bc.fastq,sample2_bc.fastq -o sl_out "
        self.assertEqual(actual_command, expected_command)
Beispiel #5
0
    def test_create_commands_slf_mapping_barcodes(self):
        """ Properly creates commands for barcode/mapping files option """

        all_files = {'sample1_r1.fastq':('sample1_mapping.txt',
            'sample1_bc.fastq'), 'sample2_r1.fastq':('sample2_mapping.txt',
            'sample2_bc.fastq')}
        demultiplexing_method = 'mapping_barcode_files'
        output_dir = "sl_out"

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir)[0][0][1]

        expected_command = "split_libraries_fastq.py  -i sample1_r1.fastq,sample2_r1.fastq --barcode_read_fps sample1_mapping.txt,sample2_mapping.txt --mapping_fps sample1_bc.fastq,sample2_bc.fastq -o sl_out "
        self.assertEqual(actual_command, expected_command)
Beispiel #6
0
    def test_create_commands_slf_by_sampleID(self):
        """ Properly creates commands for SampleIDs options """

        all_files = ['sample1_r1.fastq', 'sample2_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        # the output directory has to exist
        output_dir = self.temp_dir

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir)[0][0][1]

        expected = ("split_libraries_fastq.py  -i {output_dir}/seqs.txt -o "
                    "{output_dir} --read_arguments_from_file --sample_ids "
                    "{output_dir}/sample_ids.txt --barcode_type "
                    "'not-barcoded' ").format(output_dir=output_dir)

        self.assertEqual(actual_command, expected)
Beispiel #7
0
    def test_create_commands_slf_added_options(self):
        """ Properly creates slf commands with added parameters """

        all_files = ['sample1/sample_r1.fastq', 'sample2/sample_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        output_dir = "sl_out"
        params = "--max_bad_run_length 15"
        leading_text = "echo"
        trailing_text = " | qsub -N BigErn -k oe"
        include_input_dir_path = True
        remove_filepath_in_name = True

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir, params, leading_text, trailing_text,
            include_input_dir_path, remove_filepath_in_name)[0][0][1]

        expected_command = "echo split_libraries_fastq.py --max_bad_run_length 15 -i sample1/sample_r1.fastq,sample2/sample_r1.fastq --sample_ids sample1,sample2 -o sl_out  | qsub -N BigErn -k oe --barcode_type 'not-barcoded'"

        self.assertEqual(actual_command, expected_command)
Beispiel #8
0
    def test_create_commands_slf_added_options(self):
        """ Properly creates slf commands with added parameters """

        all_files = ['sample1/sample_r1.fastq', 'sample2/sample_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        output_dir = "sl_out"
        params = "--max_bad_run_length 15"
        leading_text = "echo"
        trailing_text = " | qsub -N BigErn -k oe"
        include_input_dir_path = True
        remove_filepath_in_name = True

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir, params, leading_text, trailing_text,
            include_input_dir_path, remove_filepath_in_name)[0][0][1]

        expected_command = "echo split_libraries_fastq.py --max_bad_run_length 15 -i sample1/sample_r1.fastq,sample2/sample_r1.fastq --sample_ids sample1,sample2 -o sl_out  | qsub -N BigErn -k oe --barcode_type 'not-barcoded'"

        self.assertEqual(actual_command, expected_command)
Beispiel #9
0
    def test_create_commands_slf_mapping_barcodes(self):
        """ Properly creates commands for barcode/mapping files option """

        all_files = {'sample1_r1.fastq':('sample1_mapping.txt',
            'sample1_bc.fastq'), 'sample2_r1.fastq':('sample2_mapping.txt',
            'sample2_bc.fastq')}
        demultiplexing_method = 'mapping_barcode_files'
        # the output directory has to exist
        output_dir = self.temp_dir

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir)[0][0][1]

        expected = ("split_libraries_fastq.py  -i {output_dir}/seqs.txt -o "
                    "{output_dir} --read_arguments_from_file "
                    "--barcode_read_fps {output_dir}/barcodes.txt "
                    "--mapping_fps {output_dir}/maps.txt ").format(
                    output_dir=output_dir)
        self.assertEqual(actual_command, expected)
Beispiel #10
0
    def test_create_commands_slf_added_options(self):
        """ Properly creates slf commands with added parameters """

        all_files = ['sample1/sample_r1.fastq', 'sample2/sample_r1.fastq']
        demultiplexing_method = 'sampleid_by_file'
        # the output directory has to exist
        output_dir = self.temp_dir
        params = "--max_bad_run_length 15"
        leading_text = "echo"
        trailing_text = " | qsub -N BigErn -k oe"
        include_input_dir_path = True
        remove_filepath_in_name = True

        actual_command = create_commands_slf(all_files, demultiplexing_method,
            output_dir, params, leading_text, trailing_text,
            include_input_dir_path, remove_filepath_in_name)[0][0][1]

        expected = ("echo split_libraries_fastq.py --max_bad_run_length 15 -i "
                    "{output_dir}/seqs.txt -o {output_dir} "
                    "--read_arguments_from_file --sample_ids "
                    "{output_dir}/sample_ids.txt --barcode_type 'not-barcoded'"
                    "  | qsub -N BigErn -k oe").format(output_dir=output_dir)
        self.assertEqual(actual_command, expected)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
                            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping, read_indicator,
                                       barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method,
                                   output_dir, params_str, leading_text,
                                   trailing_text, include_input_dir_path,
                                   remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping,
            read_indicator, barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)