Beispiel #1
0
 def testKSpectrum(self):
     spec = nc.features.kSpectrum(5)
     testSeq = nc.sequence('', 'GAGAGAGAGT')
     kmerFreq = {
         k: v
         for k, v in zip([f.kmer for f in spec], spec.getAll(testSeq))
     }
     self.assertEqual(kmerFreq['AGAGT'], 100.)
     self.assertEqual(kmerFreq['ACTCT'], 100.)
     self.assertEqual(kmerFreq['AGAGA'], 200.)
     self.assertEqual(kmerFreq['TCTCT'], 200.)
     self.assertEqual(kmerFreq['GAGAG'], 300.)
     self.assertEqual(kmerFreq['CTCTC'], 300.)
     self.assertEqual(
         sum(kmerFreq[kmer] for kmer in kmerFreq.keys() if kmer not in
             ['AGAGT', 'ACTCT', 'AGAGA', 'TCTCT', 'GAGAG', 'CTCTC']), 0.)
     # Ensure that application to the reverse complement yields an identical spectrum
     specmotifs = set(k
                      for k, f in zip([f.kmer
                                       for f in spec], spec.getAll(testSeq))
                      if f > 0)
     specmotifsRC = set(k for k, f in zip(
         [f.kmer for f in spec],
         spec.getAll(
             nc.sequence(
                 '', nc.getReverseComplementaryDNASequence(testSeq.seq))))
                        if f > 0)
     self.assertEqual(specmotifs, specmotifsRC)
Beispiel #2
0
	def testKSpectrumMM(self):
		spec = nc.features.kSpectrumMM(5)
		testSeq = nc.sequence('', 'GAGAGAGAGT')
		motifs = set([ testSeq.seq[i:i+5] for i in range(len(testSeq)-4) ])
		motifs = motifs | set( nc.getReverseComplementaryDNASequence(m) for m in motifs )
		def getMM(m):
			return set(
				m[:i] + mut + m[i+1:]
				for i in range(len(m))
				for mut in [ 'A', 'C', 'G', 'T' ]
			)
		motifs = motifs | set( mm for m in motifs for mm in getMM(m) )
		# Ensure that only the main motif and mismatches are registered
		specmotifs = set(
			k
			for k, f in zip(
				[ f.kmer for f in spec ],
				spec.getAll(testSeq)
			)
			if f > 0
		)
		self.assertEqual(specmotifs, motifs)
		# Ensure that application to the reverse complement yields an identical spectrum
		specmotifsRC = set(
			k
			for k, f in zip(
				[ f.kmer for f in spec ],
				spec.getAll(nc.sequence('',
					nc.getReverseComplementaryDNASequence(testSeq.seq)))
			)
			if f > 0
		)
		self.assertEqual(specmotifs, specmotifsRC)
Beispiel #3
0
def prep():
	global genome
	global rsA, rsB
	global PcG
	global gwWin
	global PcGTargets
	global PRESeq
	global MC
	global testSeqs
	system('rm -rf ./temp')
	system('mkdir ./temp')
	genome = nc.streamFASTAGZ('tutorial/DmelR5.fasta.gz',
			restrictToSequences = [ '2L', '2R', '3L', '3R', '4', 'X' ])
	#
	rsA = nc.regions( 'A', [
		nc.region('X', 20, 50), nc.region('X', 80, 100), nc.region('X', 150, 300), nc.region('X', 305, 400), nc.region('X', 500, 600),
		nc.region('Y', 40, 100), nc.region('Y', 120, 200)
		] )
	#
	rsB = nc.regions( 'B', [
		nc.region('X', 30, 40), nc.region('X', 90, 120), nc.region('X', 140, 150), nc.region('X', 300, 310),
		nc.region('Y', 40, 100), nc.region('Y', 130, 300), nc.region('Y', 600, 700)
		] )
	#
	# Prepare data set
	#
	PcG = nc.biomarkers('PcG', [
		nc.loadGFFGZ('tutorial/Pc.gff3.gz').deltaResize(1000).rename('Pc'),
		nc.loadGFFGZ('tutorial/Psc.gff3.gz').deltaResize(1000).rename('Psc'),
		nc.loadGFFGZ('tutorial/dRING.gff3.gz').deltaResize(1000).rename('dRING'),
		nc.loadGFFGZ('tutorial/H3K27me3.gff3.gz').rename('H3K27me3'),
	])
	#
	gwWin = nc.getSequenceWindowRegions(
		genome,
		windowSize = 1000, windowStep = 100)
	#
	PcGTargets = PcG.HBMEs(gwWin, threshold = 4)
	#
	PRESeq = PcGTargets.recenter(3000).extract(genome)
	random.shuffle(PRESeq.sequences)
	#
	MC = nc.MarkovChain(trainingSequences = genome, degree = 4, pseudoCounts = 1, addReverseComplements = True)
	#
	testSeqs = nc.sequences('Test', [
		 nc.sequence('X', ''.join( random.choice(['A', 'C', 'G', 'T'])
		 	for _ in range(800) )),
		 nc.sequence('Y', ''.join( random.choice(['A', 'C', 'G', 'T'])
		 	for _ in range(1000) )),
	 ])
Beispiel #4
0
import random
import gnocis as nc

genome = nc.streamFASTAGZ(
    'tutorial/DmelR5.fasta.gz',
    restrictToSequences=['2L', '2R', '3L', '3R', '4', 'X'])

testSeqs = nc.sequences('Test', [
    nc.sequence(
        'X', ''.join(random.choice(['A', 'C', 'G', 'T']) for _ in range(800))),
    nc.sequence(
        'Y', ''.join(random.choice(['A', 'C', 'G', 'T'])
                     for _ in range(1000))),
])