Beispiel #1
0
 def testSequenceScoringWithStream(self):
     Kahn2014Rgn = nc.loadGFF('tutorial/Kahn2014.GFF')
     Kahn2014Seq = Kahn2014Rgn \
              .recenter(3000) \
              .extract(genome)
     tneg = MC.generateSet(n=len(Kahn2014Seq), length=3000)
     trainingSet = Kahn2014Seq.label(nc.positive) + tneg.label(nc.negative)
     PyPREdictor = nc.motifs.Ringrose2003GTGT() \
              .pairFreq(distCut = 219) \
              .model(nc.logOdds(
                     labelPositive = nc.positive,
                     labelNegative = nc.negative)
              ) \
              .sequenceModel(name = 'PyPREdictor (M2003+GTGT)',
                             windowSize = 500, windowStep = 250) \
              .train(trainingSet)
     PyPREdictor.batchsize = 100
     MC.generateSet(n=1000,
                    length=3000).saveFASTA('./temp/test.Background.fasta')
     seq1 = nc.streamFASTA('./temp/test.Background.fasta')
     seq2 = nc.loadFASTA('./temp/test.Background.fasta')
     score1 = PyPREdictor.getSequenceScores(seq1, nStreamFetch=1000)
     score2 = PyPREdictor.getSequenceScores(seq2, nStreamFetch=1000)
     diff = sum(abs(b - a) for a, b in zip(score1, score2))
     assert (diff == 0.0)
     assert (len(score1) == len(score2))
Beispiel #2
0
 def testExtractRegionSequences(self):
     testSeqs.saveFASTA('temp/test.fasta')
     seqByName = {seq.name: seq.seq for seq in testSeqs}
     # Ensure that saving and re-loading sequences yields identical sequences
     seqs = rsA.extractSequences(nc.loadFASTA('temp/test.fasta'))
     self.assertEqual([s.seq for s in seqs],
                      [seqByName[r.seq][r.start:r.end + 1] for r in rsA])
     seqs = rsA.extractSequences(nc.streamFASTA('temp/test.fasta'))
     self.assertEqual([s.seq for s in seqs],
                      [seqByName[r.seq][r.start:r.end + 1] for r in rsA])
     # Ensure that streaming of short blocks yields identical final sequences
     seqs = rsA.extractSequences(
         nc.streamFASTA('temp/test.fasta', wantBlockSize=50))
     self.assertEqual([s.seq for s in seqs],
                      [seqByName[r.seq][r.start:r.end + 1] for r in rsA])
Beispiel #3
0
 def testSaveLoadFASTA(self):
     testSeqs.saveFASTA('temp/test.fasta')
     sA = '\n'.join('%s: %s' % (s.name, s.seq) for s in testSeqs)
     sB = '\n'.join('%s: %s' % (s.name.split(' from FASTA file')[0], s.seq)
                    for s in nc.loadFASTA('temp/test.fasta'))
     self.assertEqual(sA, sB)