def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
Example #2
0
    def test_get_preprocess_fastq_cmd(self):
        raw_data = RawData(1)
        params = PreprocessedIlluminaParams(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz')
        bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz')

        exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i "
                     "{} -b {} "
                     "-m ".format(seqs_fp, bc_fp))
        exp_cmd_2 = ("-o {0} --barcode_type golay_12 --max_bad_run_length 3 "
                     "--max_barcode_errors 1.5 "
                     "--min_per_read_length_fraction 0.75 "
                     "--phred_quality_threshold 3 "
                     "--sequence_max_n 0".format(obs_output_dir))

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_1 = obs_cmd[:len(exp_cmd_1)]
        obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1]

        self.assertEqual(obs_cmd_1, exp_cmd_1)
        self.assertEqual(obs_cmd_2, exp_cmd_2)
    def test_get_preprocess_fastq_cmd(self):
        raw_data = RawData(1)
        params = PreprocessedIlluminaParams(1)
        prep_template = PrepTemplate(1)
        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(
            raw_data, prep_template, params)

        get_raw_path = partial(join, self.db_dir, 'raw_data')
        seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz')
        bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz')

        exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i "
                     "{} -b {} "
                     "-m ".format(seqs_fp, bc_fp))
        exp_cmd_2 = ("-o {0} --barcode_type golay_12 --max_bad_run_length 3 "
                     "--max_barcode_errors 1.5 "
                     "--min_per_read_length_fraction 0.75 "
                     "--phred_quality_threshold 3 "
                     "--sequence_max_n 0".format(obs_output_dir))

        # We are splitting the command into two parts because there is no way
        # that we can know the filepath of the mapping file. We thus split the
        # command on the mapping file path and we check that the two parts
        # of the commands is correct
        obs_cmd_1 = obs_cmd[:len(exp_cmd_1)]
        obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1]

        self.assertEqual(obs_cmd_1, exp_cmd_1)
        self.assertEqual(obs_cmd_2, exp_cmd_2)
Example #4
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)