Beispiel #1
0
	def test_comp_counts(self):
		"""Composition and motifs with skips"""
		comp = protprop.Composition()
		pp = protprop.ProteinProperties()
		aa_classes = ['FY','P','NQ']
		for xi in range(5):
			seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)])
			counts = pp.counts(seq, aa_classes)
			self.assertTrue(counts==[4,2,4])
Beispiel #2
0
	def test_run_skip(self):
		"""Composition and motifs with skips"""
		comp = protprop.Composition()
		pp = protprop.ProteinProperties()
		aa_classes = ['FY','P','NQ']
		for xi in range(5):
			seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)])
			self.assertTrue(pp.count(seq, 'FY')==4)
			mot = pp.motif(seq, ['FY','NQ'])
			#print seq, mot
			self.assertTrue(mot=='aabbaabb')
Beispiel #3
0
	def test_alldistcount(self):
		"""Number of distances for all distances"""
		comp = protprop.Composition()
		pp = protprop.ProteinProperties()
		aa_classes = ['A']
		aa = aa_classes[0]
		for xi in range(2,10):
			seq = ''.join(aa*xi)
			#print seq
			dists = pp.allDistances(seq, aa)
			self.assertTrue(len(dists[aa])==stats.Choose(xi,2))
Beispiel #4
0
	def test_run(self):
		"""Composition"""
		comp = protprop.Composition()
		fname = "tmp_composition.txt"
		inf = open(fname, 'w')
		inf.write("aa\tproportion\n")
		for aa in translate.AAs():
			inf.write("{}\t{}\n".format(aa, 1.0/20))
		inf.close()
		inf = open(fname, 'r')
		comp.read(inf)
		self.assertAlmostEqual(comp['A'], 1.0/20)
		inf.close()
		os.remove(fname)
Beispiel #5
0
	def test_neardist(self):
		"""Nearest distances"""
		comp = protprop.Composition()
		pp = protprop.ProteinProperties()
		aa_classes = ['FY','P','NQ']
		for xi in range(5):
			seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)])
			#print seq
			dists = pp.nearestDistances(seq, aa_classes)
			#print dists
			hist = stats.Histogram(vals=dists['FY'], n_bins=7, min_val=-0.5,max_val=6.5)
			#print hist
			self.assertTrue(hist[1].count==2)
			self.assertTrue(hist[4].count==1)
			self.assertTrue(hist[2].count==0)
Beispiel #6
0
	def test_alldist(self):
		"""All distances"""
		comp = protprop.Composition()
		pp = protprop.ProteinProperties()
		aa_classes = ['FY','P','NQ']
		for xi in range(5):
			seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)])
			#print seq
			dists = pp.allDistances(seq, aa_classes)
			#print dists
			hist = stats.Histogram(vals=dists['FY'], n_bins=7, min_val=-0.5,max_val=6.5)
			#print hist
			self.assertTrue(hist[1].count==2)
			self.assertTrue(hist[4].count==1)
			self.assertTrue(hist[2].count==0)
			answer = [1, 5, 6, 4, 5, 1]
			for (a,b) in zip(dists['FY'], answer):
				self.assertTrue(a==b)
Beispiel #7
0
        outf = file(options.out_fname, 'w')
        data_outs.addStream(outf)
    else:
        # By default, write to stdout
        data_outs.addStream(sys.stdout)

    # Write out parameters
    data_outs.write("# Run started {}\n".format(util.timestamp()))
    data_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
    data_outs.write("# Parameters:\n")
    optdict = vars(options)
    for (k, v) in optdict.items():
        data_outs.write("#\t{k}: {v}\n".format(k=k, v=v))

    # Composition to search for
    composition = protprop.Composition()
    if not options.motif is None:
        composition.initFromSequence(options.motif)
    else:
        fname = os.path.expanduser(options.composition_fname)
        if not os.path.isfile(fname):
            raise IOError("# Error: file {} does not exist".format(fname))
        with file(fname, 'r') as inf:
            composition.read(inf)

    # Read input
    if not os.path.isfile(options.in_fname):
        raise IOError("# Error: file {} does not exist".format(
            options.in_fname))
    (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r'))
    if options.translate_sequences: