コード例 #1
0
ファイル: test_all.py プロジェクト: jliptrap/pyfasta
def check_kmer_overlap(f):
    chr2 = f['chr2']

    kmers = Fasta.as_kmers(chr2, 10, overlap=2)
    for i, k in enumerate(list(kmers)[:-1]):
        assert (len(k[1]) == 10)
        assert (k[0] == (i * (10 - 2)))

    kmers = Fasta.as_kmers(chr2, 10, overlap=4)
    seqs = [k[1] for k in kmers]
    paired_seqs = zip(seqs[0:-1], seqs[1:])
    for a, b in paired_seqs:
        if len(a) < 4 or len(b) < 4: continue
        assert (a[-4:] == b[:4])
コード例 #2
0
    def segments(self):
        '''
        Generator for Segments
        '''
        startchr = self.start_chromosome
        start = self.start_location
        chrs = [
            x[0]
            for x in sorted(self.fasta.index.items(), key=lambda a: a[1][0])
        ]
        for chr in chrs:
            segcount = 0
            if self.verbose:
                print "Reading chr %s" % chr
            # Skip forward if a starting chr was defined
            if startchr is not None and startchr != chr:
                continue
            else:
                startchr = None

            for kmer in Fasta.as_kmers(self.fasta[chr], self.segment_size):
                end = start + self.segment_size
                seg = Segment(start, end, kmer[1], chr)
                segcount += 1
                if self.verbose and segcount % 1000 == 0:
                    print "Read %d segments" % segcount
                yield seg
                start = end
コード例 #3
0
 def segments(self):
     '''
     Generator for Segments
     '''
     startchr = self.start_chromosome
     start = self.start_location
     chrs = [x[0] for x in sorted(self.fasta.index.items(), key=lambda a: a[1][0])]
     for chr in chrs:
         segcount = 0
         if self.verbose:
             print "Reading chr %s" % chr
         # Skip forward if a starting chr was defined
         if startchr is not None and startchr != chr:
             continue
         else:
             startchr = None
             
         for kmer in Fasta.as_kmers(self.fasta[chr],self.segment_size):
             end = start + self.segment_size                
             seg = Segment(start, end, kmer[1] ,chr)
             segcount += 1
             if self.verbose and segcount % 1000 == 0:
                 print "Read %d segments" % segcount
             yield seg
             start = end
コード例 #4
0
ファイル: test_all.py プロジェクト: jliptrap/pyfasta
def check_kmers(f):
    seq = str(f['chr2'])

    kmers = list(Fasta.as_kmers(f['chr2'], 10))
    assert (len(kmers) == len(seq) / 10)
    assert (kmers[0] == (0, seq[:10]))

    seqs = [k[1] for k in kmers]
    assert ("".join(seqs) == seq)
    last_pair = kmers[-1]
    assert (seqs[-1][-1] == 'T')

    seq = str(f['chr3'])
    kmers = list(Fasta.as_kmers(f['chr3'], 1))
    assert (kmers[2][0] == 2)
    seqs = [k[1] for k in kmers]
    assert ("".join(seqs) == seq)
コード例 #5
0
def with_kmers(f, names, k, overlap):
    """
    split the sequences in Fasta object `f` into pieces of length `k` 
    with the given `overlap` the results are written to the array of files
    `fhs`
    """
    fhs = [open(name, 'wb') for name in names]
    i = 0
    for seqid in f.keys():
        seq = f[seqid]
        for (start0, subseq) in Fasta.as_kmers(seq, k, overlap=overlap):

            fh = fhs[i % len(fhs)]
            print >>fh, ">%s" % format_kmer(seqid, start0)
            print >>fh, subseq
            i += 1
コード例 #6
0
ファイル: split_fasta.py プロジェクト: jliptrap/pyfasta
def with_kmers(f, names, k, overlap):
    """
    split the sequences in Fasta object `f` into pieces of length `k` 
    with the given `overlap` the results are written to the array of files
    `fhs`
    """
    fhs = [open(name, 'wb') for name in names]
    i = 0
    for seqid in f.keys():
        seq = f[seqid]
        for (start0, subseq) in Fasta.as_kmers(seq, k, overlap=overlap):

            fh = fhs[i % len(fhs)]
            print >> fh, ">%s" % format_kmer(seqid, start0)
            print >> fh, subseq
            i += 1