Exemple #1
0
    def get_seq(self, chrom, start, end, strand):
        chrom = misc.match_chrom_format(chrom, list(self.fasta.keys()))

        seq = self.fasta[chrom][start:end + 1]
        if strand == "-":
            seq = misc.reverse_comp(seq)
        return seq
Exemple #2
0
    def align(self, seq):
        alns = []
        revseq = reverse_comp(seq)

        for i, name in enumerate(self.names_to_aligners):
            aligner = self.names_to_aligners[name]
            faln = aligner.align(seq)
            raln = aligner.align(revseq)

            cur_aln = pysam.AlignedSegment()
            cur_aln.reference_id = i
            if faln.score > raln.score:
                cur_aln.query_sequence = seq
                cur_aln.reference_start = faln.ref_begin
                cur_aln.set_tag("AS", faln.score)
                cur_aln.cigarstring = faln.cigar_string
            else:
                cur_aln.query_sequence = revseq
                cur_aln.reference_start = raln.ref_begin
                cur_aln.set_tag("AS", raln.score)
                cur_aln.cigarstring = raln.cigar_string

                cur_aln.is_reverse = True

            alns.append(cur_aln)

        return alns
Exemple #3
0
def simulate_read_pair(sequence, start, length=150, isize=400, flip=False):
    r1 = pysam.AlignedSegment()
    r1.query_sequence = sequence[start:start + length]

    r2 = pysam.AlignedSegment()
    pos2 = start + isize
    r2.query_sequence = reverse_comp(sequence[pos2 - length:pos2])

    if flip:
        r1, r2 = r2, r1
    return ReadPair(Alignment(r1), Alignment(r2), read_stats)
Exemple #4
0
def simple_dotplot(s1, s2, wordsize=8, scale=650):
    # scale is the final size of the output matrix for visualization

    l1 = int((len(s1)-wordsize))
    l2 = int((len(s2)-wordsize))

    width = int(numpy.ceil(l1/max([l1,l2]) * scale))
    height = int(numpy.ceil(l2/max([l1,l2]) * scale))
    
    mat = numpy.zeros((height, width))
    binsize = l1/(width-1)
    
    kmertopos1 = collections.defaultdict(list)
    
    # get positions of kmers in s1
    for i in range(l1):
        kmer = s1[i:i+wordsize]
        kmertopos1[kmer].append(i)

    # find all matching kmers from s2
    for i in range(l2):
        kmer = s2[i:i+wordsize]
        positions = kmertopos1[kmer]
        
        positions = (numpy.array(positions)/binsize).astype(int)
        y = int(i/binsize)

        mat[y, positions] += 1

    # find all rev-comp kmer matches from s2
    for i in range(l2):
        kmer = misc.reverse_comp(s2[i:i+wordsize])
        positions = kmertopos1[kmer]

        positions = (numpy.array(positions)/binsize).astype(int)
        y = int(i/binsize)

        mat[y, positions] += 1
    
    mat = mat[::-1,]

    return mat
Exemple #5
0
 def get_seq(self, chrom, start, end, strand):
     seq = self.names_to_contigs[chrom][start:end + 1]
     if strand == "-":
         seq = misc.reverse_comp(seq)
     return seq
Exemple #6
0
 def original_sequence(self):
     if self.is_reverse:
         return misc.reverse_comp(self.query_sequence)
     return self.query_sequence