Example #1
0
def _filtered_fastq_generator(fw_fastq_handler, rv_fastq_handler, min_len,
                              max_len, retain_len, counters):
    paired_fastq_records = paired_fastq_generator(fw_fastq_handler,
                                                  rv_fastq_handler, True)
    for curr_pair_fastq_records in paired_fastq_records:
        counters["num_pairs"] += 1
        _report_progress(counters["num_pairs"])

        fw_record = curr_pair_fastq_records[0]
        fw_passing_seq = _check_and_trim_seq(_get_upper_seq(fw_record),
                                             min_len, max_len, retain_len,
                                             False)
        if fw_passing_seq is not None:
            rv_record = curr_pair_fastq_records[1]
            rv_passing_seq = _check_and_trim_seq(_get_upper_seq(rv_record),
                                                 min_len, max_len, retain_len,
                                                 True)
            if rv_passing_seq is not None:
                counters["num_pairs_passing"] += 1
                fw_record.sequence = fw_passing_seq
                fw_record.quality = trim_seq(fw_record.quality, retain_len,
                                             False)
                rv_record.sequence = rv_passing_seq
                rv_record.quality = trim_seq(rv_record.quality, retain_len,
                                             True)
                yield fw_record, rv_record
    def test_trim_seq_short(self):
        input_seq = "ACGT"
        retain_len = 5

        # trim from 5p end
        with self.assertRaises(ValueError):
            trim_seq(input_seq, retain_len, False)

        # trim from 3p end
        with self.assertRaises(ValueError):
            trim_seq(input_seq, retain_len, True)
    def test_trim_seq_exact(self):
        input_seq = "ACGT"
        retain_len = 4

        # trim from 5p end
        output_5p = trim_seq(input_seq, retain_len, False)
        self.assertEqual(input_seq, output_5p)

        # trim from 3p end
        output_3p = trim_seq(input_seq, retain_len, True)
        self.assertEqual(input_seq, output_3p)
    def test_trim_seq_long(self):
        input_seq = "ACGT"
        retain_len = 3

        # trim from 5p end
        output_5p = trim_seq(input_seq, retain_len, False)
        self.assertEqual("CGT", output_5p)

        # trim from 3p end
        output_3p = trim_seq(input_seq, retain_len, True)
        self.assertEqual("ACG", output_3p)
Example #5
0
def _check_and_trim_seq(input_seq, min_len, max_len, retain_len,
                        retain_5p_end):
    result = None
    seq_len = len(input_seq)
    if seq_len >= min_len and seq_len <= max_len:
        result = trim_seq(input_seq, retain_len, retain_5p_end)
    return result
def trim_grnas(grnas_name_and_seq_list, retain_len):
    result = []
    for name_seq_tuple in grnas_name_and_seq_list:
        grna_name = name_seq_tuple[0]
        full_seq = name_seq_tuple[1]
        trimmed_seq = trim_seq(full_seq, retain_len, False)  # False = do not retain from 5p end but from 3p end
        result.append((grna_name, trimmed_seq))
    return result
def _filtered_fastq_generator(fw_fastq_handler, rv_fastq_handler, min_len, max_len, retain_len, counters):
    paired_fastq_records = paired_fastq_generator(fw_fastq_handler, rv_fastq_handler, True)
    for curr_pair_fastq_records in paired_fastq_records:
        counters["num_pairs"] += 1
        _report_progress(counters["num_pairs"])

        fw_record = curr_pair_fastq_records[0]
        fw_passing_seq = _check_and_trim_seq(_get_upper_seq(fw_record), min_len, max_len, retain_len, False)
        if fw_passing_seq is not None:
            rv_record = curr_pair_fastq_records[1]
            rv_passing_seq = _check_and_trim_seq(_get_upper_seq(rv_record), min_len, max_len, retain_len, True)
            if rv_passing_seq is not None:
                counters["num_pairs_passing"] += 1
                fw_record.sequence = fw_passing_seq
                fw_record.quality = trim_seq(fw_record.quality, retain_len, False)
                rv_record.sequence = rv_passing_seq
                rv_record.quality = trim_seq(rv_record.quality, retain_len, True)
                yield fw_record, rv_record
def trim_grnas(grnas_name_and_seq_list, retain_len):
    result = []
    for name_seq_tuple in grnas_name_and_seq_list:
        grna_name = name_seq_tuple[0]
        full_seq = name_seq_tuple[1]
        trimmed_seq = trim_seq(
            full_seq, retain_len,
            False)  # False = do not retain from 5p end but from 3p end
        result.append((grna_name, trimmed_seq))
    return result
def _check_and_trim_seq(input_seq, min_len, max_len, retain_len, retain_5p_end):
    result = None
    seq_len = len(input_seq)
    if seq_len >= min_len and seq_len <= max_len:
        result = trim_seq(input_seq, retain_len, retain_5p_end)
    return result