Exemple #1
0
    def filter_pairs_max_internal_softclip_prop(self, max_internal_softclip_prop):
        keep_pairs = list()
        for p in self.pairs:

            if sctools.is_right_softclipped_strict(p.forward_read) and \
                p.forward_read.reference_end < p.reverse_read.reference_end and \
                sctools.right_softclip_proportion(p.forward_read) > max_internal_softclip_prop:
                continue

            if sctools.is_left_softclipped_strict(p.reverse_read) and \
                p.reverse_read.reference_start > p.forward_read.reference_start and \
                sctools.left_softclip_proportion(p.reverse_read) > max_internal_softclip_prop:
                continue

            keep_pairs.append(p)

        self.pairs = keep_pairs
def prefilter_reads(bam, database_dict, min_perc_identity,
                    max_internal_softclip_prop, max_edge_distance):
    keep_reads = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

    for read in bam:

        if pysamtools.get_perc_identity(read) < min_perc_identity:
            continue

        if not read.is_reverse:

            if sctools.is_right_softclipped_strict(read) and \
                sctools.right_softclipped_position(read) < len(database_dict[read.reference_name]) and \
                sctools.right_softclip_proportion(read) > max_internal_softclip_prop:
                continue

            elif read.reference_start > max_edge_distance:
                continue

            elif sctools.is_left_softclipped_strict(read) and \
                abs(0 - sctools.left_softclip_reference_start(read)) > max_edge_distance:
                continue

        if read.is_reverse:

            if sctools.is_left_softclipped_strict(read) and \
                sctools.left_softclipped_position(read) >= 0 and \
                sctools.left_softclip_proportion(read) > max_internal_softclip_prop:
                continue

            elif (len(database_dict[read.reference_name]) -
                  read.reference_end) > max_edge_distance:
                continue

            elif sctools.is_right_softclipped_strict(read) and \
                abs(0 - (len(database_dict[read.reference_name]) - sctools.right_softclip_reference_end(read))) > max_edge_distance:
                continue

        pair_id, terminus_id = read.query_name.split('_')

        keep_reads[pair_id][read.reference_name][terminus_id].append(read)

    return keep_reads