def filter_pairs_max_internal_softclip_prop(self, max_internal_softclip_prop): keep_pairs = list() for p in self.pairs: if sctools.is_right_softclipped_strict(p.forward_read) and \ p.forward_read.reference_end < p.reverse_read.reference_end and \ sctools.right_softclip_proportion(p.forward_read) > max_internal_softclip_prop: continue if sctools.is_left_softclipped_strict(p.reverse_read) and \ p.reverse_read.reference_start > p.forward_read.reference_start and \ sctools.left_softclip_proportion(p.reverse_read) > max_internal_softclip_prop: continue keep_pairs.append(p) self.pairs = keep_pairs
def prefilter_reads(bam, database_dict, min_perc_identity, max_internal_softclip_prop, max_edge_distance): keep_reads = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for read in bam: if pysamtools.get_perc_identity(read) < min_perc_identity: continue if not read.is_reverse: if sctools.is_right_softclipped_strict(read) and \ sctools.right_softclipped_position(read) < len(database_dict[read.reference_name]) and \ sctools.right_softclip_proportion(read) > max_internal_softclip_prop: continue elif read.reference_start > max_edge_distance: continue elif sctools.is_left_softclipped_strict(read) and \ abs(0 - sctools.left_softclip_reference_start(read)) > max_edge_distance: continue if read.is_reverse: if sctools.is_left_softclipped_strict(read) and \ sctools.left_softclipped_position(read) >= 0 and \ sctools.left_softclip_proportion(read) > max_internal_softclip_prop: continue elif (len(database_dict[read.reference_name]) - read.reference_end) > max_edge_distance: continue elif sctools.is_right_softclipped_strict(read) and \ abs(0 - (len(database_dict[read.reference_name]) - sctools.right_softclip_reference_end(read))) > max_edge_distance: continue pair_id, terminus_id = read.query_name.split('_') keep_reads[pair_id][read.reference_name][terminus_id].append(read) return keep_reads