def _filtered_fastq_generator(fw_fastq_handler, rv_fastq_handler, min_len, max_len, retain_len, counters): paired_fastq_records = paired_fastq_generator(fw_fastq_handler, rv_fastq_handler, True) for curr_pair_fastq_records in paired_fastq_records: counters["num_pairs"] += 1 _report_progress(counters["num_pairs"]) fw_record = curr_pair_fastq_records[0] fw_passing_seq = _check_and_trim_seq(_get_upper_seq(fw_record), min_len, max_len, retain_len, False) if fw_passing_seq is not None: rv_record = curr_pair_fastq_records[1] rv_passing_seq = _check_and_trim_seq(_get_upper_seq(rv_record), min_len, max_len, retain_len, True) if rv_passing_seq is not None: counters["num_pairs_passing"] += 1 fw_record.sequence = fw_passing_seq fw_record.quality = trim_seq(fw_record.quality, retain_len, False) rv_record.sequence = rv_passing_seq rv_record.quality = trim_seq(rv_record.quality, retain_len, True) yield fw_record, rv_record
def test_trim_seq_short(self): input_seq = "ACGT" retain_len = 5 # trim from 5p end with self.assertRaises(ValueError): trim_seq(input_seq, retain_len, False) # trim from 3p end with self.assertRaises(ValueError): trim_seq(input_seq, retain_len, True)
def test_trim_seq_exact(self): input_seq = "ACGT" retain_len = 4 # trim from 5p end output_5p = trim_seq(input_seq, retain_len, False) self.assertEqual(input_seq, output_5p) # trim from 3p end output_3p = trim_seq(input_seq, retain_len, True) self.assertEqual(input_seq, output_3p)
def test_trim_seq_long(self): input_seq = "ACGT" retain_len = 3 # trim from 5p end output_5p = trim_seq(input_seq, retain_len, False) self.assertEqual("CGT", output_5p) # trim from 3p end output_3p = trim_seq(input_seq, retain_len, True) self.assertEqual("ACG", output_3p)
def _check_and_trim_seq(input_seq, min_len, max_len, retain_len, retain_5p_end): result = None seq_len = len(input_seq) if seq_len >= min_len and seq_len <= max_len: result = trim_seq(input_seq, retain_len, retain_5p_end) return result
def trim_grnas(grnas_name_and_seq_list, retain_len): result = [] for name_seq_tuple in grnas_name_and_seq_list: grna_name = name_seq_tuple[0] full_seq = name_seq_tuple[1] trimmed_seq = trim_seq(full_seq, retain_len, False) # False = do not retain from 5p end but from 3p end result.append((grna_name, trimmed_seq)) return result
def trim_grnas(grnas_name_and_seq_list, retain_len): result = [] for name_seq_tuple in grnas_name_and_seq_list: grna_name = name_seq_tuple[0] full_seq = name_seq_tuple[1] trimmed_seq = trim_seq( full_seq, retain_len, False) # False = do not retain from 5p end but from 3p end result.append((grna_name, trimmed_seq)) return result