def test_get_matching_files(self): """ Properly returns matching reads/barcodes/mapping files """ all_fastq = [ 'sample1_r1_000.fastq', 'sample2_r1_000.fastq', 'sample1_bc_000.fastq', 'sample2_bc_000.fastq' ] all_mapping = ['sample1_mapping_000.txt', 'sample2_mapping_000.txt'] read_indicator = 'r1' barcode_indicator = 'bc' mapping_indicator = 'mapping' actual_matching_files = get_matching_files(all_fastq, all_mapping, read_indicator, barcode_indicator, mapping_indicator) actual_reads = set(actual_matching_files.keys()) actual_bcs_mapping = set(actual_matching_files.values()) expected_matching_reads = set( ['sample1_r1_000.fastq', 'sample2_r1_000.fastq']) expected_matching_bcs_reads = set([ ('sample1_bc_000.fastq', 'sample1_mapping_000.txt'), ('sample2_bc_000.fastq', 'sample2_mapping_000.txt') ]) self.assertEqual(actual_reads, expected_matching_reads) self.assertEqual(actual_bcs_mapping, expected_matching_bcs_reads)
def test_get_matching_files(self): """ Properly returns matching reads/barcodes/mapping files """ all_fastq = ['sample1_r1_000.fastq', 'sample2_r1_000.fastq', 'sample1_bc_000.fastq', 'sample2_bc_000.fastq'] all_mapping = ['sample1_mapping_000.txt', 'sample2_mapping_000.txt'] read_indicator = 'r1' barcode_indicator = 'bc' mapping_indicator = 'mapping' actual_matching_files = get_matching_files(all_fastq, all_mapping, read_indicator, barcode_indicator, mapping_indicator) actual_reads = set(actual_matching_files.keys()) actual_bcs_mapping = set(actual_matching_files.values()) expected_matching_reads = set(['sample1_r1_000.fastq', 'sample2_r1_000.fastq']) expected_matching_bcs_reads = set([('sample1_bc_000.fastq', 'sample1_mapping_000.txt'), ('sample2_bc_000.fastq', 'sample2_mapping_000.txt')]) self.assertEqual(actual_reads, expected_matching_reads) self.assertEqual(actual_bcs_mapping, expected_matching_bcs_reads)
def main(): option_parser, opts, args =\ parse_command_line_parameters(suppress_verbose=True, **script_info) input_dir = opts.input_dir demultiplexing_method = opts.demultiplexing_method parameter_fp = opts.parameter_fp read_indicator = opts.read_indicator barcode_indicator = opts.barcode_indicator mapping_indicator = opts.mapping_indicator mapping_extensions = opts.mapping_extensions.split(',') sampleid_indicator = opts.sampleid_indicator leading_text = opts.leading_text trailing_text = opts.trailing_text include_input_dir_path = opts.include_input_dir_path output_dir = abspath(opts.output_dir) remove_filepath_in_name = opts.remove_filepath_in_name print_only = opts.print_only if remove_filepath_in_name and not include_input_dir_path: option_parser.error("If --remove_filepath_in_name enabled, " "--include_input_dir_path must be enabled.") if opts.parameter_fp: with open(opts.parameter_fp, 'U') as parameter_f: params_dict = parse_qiime_parameters(parameter_f) params_str = get_params_str(params_dict['split_libraries_fastq']) else: params_dict = {} params_str = "" create_dir(output_dir) all_fastq = [] all_mapping = [] extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq'] for root, dir, fps in walk(input_dir): for fp in fps: for extension in extensions: if fp.endswith(extension): all_fastq += [abspath(join(root, fp))] if demultiplexing_method == 'mapping_barcode_files': for root, dir, fps in walk(input_dir): for fp in fps: for mapping_extension in mapping_extensions: if fp.endswith(mapping_extension): all_mapping += [abspath(join(root, fp))] all_files = get_matching_files(all_fastq, all_mapping, read_indicator, barcode_indicator, mapping_indicator) else: all_files = all_fastq commands = create_commands_slf(all_files, demultiplexing_method, output_dir, params_str, leading_text, trailing_text, include_input_dir_path, remove_filepath_in_name, sampleid_indicator) qiime_config = load_qiime_config() if print_only: command_handler = print_commands else: command_handler = call_commands_serially logger = WorkflowLogger(generate_log_fp(output_dir), params=params_dict, qiime_config=qiime_config) # Call the command handler on the list of commands command_handler(commands, status_update_callback=no_status_updates, logger=logger, close_logger_on_success=True)