def getInsertionSequences(self, fasta, nucleotides = ["C","G","A","T"], up = 10, down = 10): """Get sequence content at insertions""" mat = np.zeros((len(nucleotides), up + down +1)) if np.sum(self.vals) == 0: return mat offset = max(up,down) seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset) sequence = get_sequence(seq_chunk, fasta) seq_mat = seq_to_mat(sequence, nucleotides) for i in range(self.length()): mat += self.vals[i] * seq_mat[:,(offset + i - up):(offset + i + down + 1)] return mat
def getStrandedInsertionSequences(self, fasta, nucleotides = ["C","G","A","T"], up = 10, down = 10): """Get sequence content at insertions, taking into account strand""" mat = np.zeros((len(nucleotides), up + down +1)) if np.sum(self.vals) == 0: return mat offset = max(up,down) seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset) sequence = get_sequence(seq_chunk, fasta) minus_sequence = complement(sequence) seq_mat = seq_to_mat(sequence, nucleotides) minus_seq_mat = seq_to_mat(minus_sequence, nucleotides) for i in range(self.length()): mat += self.plus[i] * seq_mat[:,(offset + i - up):(offset + i + down + 1)] mat += self.minus[i] * np.fliplr(minus_seq_mat[:,(offset + i - down):(offset + i + up + 1)]) return mat