def test_insert_preprocessed_data(self): study = Study(1) params = PreprocessedIlluminaParams(1) prep_template = PrepTemplate(1) prep_out_dir = mkdtemp() self.dirs_to_remove.append(prep_out_dir) path_builder = partial(join, prep_out_dir) db_path_builder = partial(join, join(self.db_dir, "preprocessed_data")) file_suffixes = [ 'seqs.fna', 'seqs.fastq', 'seqs.demux', 'split_library_log.txt' ] db_files = [] for f_suff in file_suffixes: fp = path_builder(f_suff) with open(fp, 'w') as f: f.write("\n") self.files_to_remove.append(fp) db_files.append(db_path_builder("3_%s" % f_suff)) self.files_to_remove.extend(db_files) _insert_preprocessed_data(study, params, prep_template, prep_out_dir) # Check that the files have been copied for fp in db_files: self.assertTrue(exists(fp)) # Check that a new preprocessed data has been created self.assertTrue( self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.preprocessed_data WHERE " "preprocessed_data_id=%s)", (3, ))[0])
def test_get_preprocess_fastq_cmd(self): raw_data = RawData(1) params = PreprocessedIlluminaParams(1) prep_template = PrepTemplate(1) obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd( raw_data, prep_template, params) get_raw_path = partial(join, self.db_dir, 'raw_data') seqs_fp = get_raw_path('1_s_G1_L001_sequences.fastq.gz') bc_fp = get_raw_path('1_s_G1_L001_sequences_barcodes.fastq.gz') exp_cmd_1 = ("split_libraries_fastq.py --store_demultiplexed_fastq -i " "{} -b {} " "-m ".format(seqs_fp, bc_fp)) exp_cmd_2 = ("-o {0} --barcode_type golay_12 --max_bad_run_length 3 " "--max_barcode_errors 1.5 " "--min_per_read_length_fraction 0.75 " "--phred_quality_threshold 3 " "--sequence_max_n 0".format(obs_output_dir)) # We are splitting the command into two parts because there is no way # that we can know the filepath of the mapping file. We thus split the # command on the mapping file path and we check that the two parts # of the commands is correct obs_cmd_1 = obs_cmd[:len(exp_cmd_1)] obs_cmd_2 = obs_cmd[len(exp_cmd_1):].split(" ", 1)[1] self.assertEqual(obs_cmd_1, exp_cmd_1) self.assertEqual(obs_cmd_2, exp_cmd_2)
def test_to_str(self): params = PreprocessedIlluminaParams(1) obs = params.to_str() exp = ("--barcode_type golay_12 --max_bad_run_length 3 " "--max_barcode_errors 1.5 --min_per_read_length_fraction 0.75 " "--phred_quality_threshold 3 --sequence_max_n 0") self.assertEqual(obs, exp)
def test_values(self): obs = PreprocessedIlluminaParams(1).values exp = {'max_barcode_errors': 1.5, 'sequence_max_n': 0, 'max_bad_run_length': 3, 'rev_comp': False, 'phred_quality_threshold': 3, 'rev_comp_barcode': False, 'rev_comp_mapping_barcodes': False, 'min_per_read_length_fraction': 0.75, 'barcode_type': 'golay_12'} self.assertEqual(obs, exp)
def test_name(self): obs = PreprocessedIlluminaParams(1).name self.assertEqual(obs, "Defaults")
def test_iter(self): obs = list(PreprocessedIlluminaParams.iter()) exp = [PreprocessedIlluminaParams(1)] for o, e in zip(obs, exp): self.assertEqual(o.id, e.id)