def test_comp_counts(self): """Composition and motifs with skips""" comp = protprop.Composition() pp = protprop.ProteinProperties() aa_classes = ['FY','P','NQ'] for xi in range(5): seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)]) counts = pp.counts(seq, aa_classes) self.assertTrue(counts==[4,2,4])
def test_run_skip(self): """Composition and motifs with skips""" comp = protprop.Composition() pp = protprop.ProteinProperties() aa_classes = ['FY','P','NQ'] for xi in range(5): seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)]) self.assertTrue(pp.count(seq, 'FY')==4) mot = pp.motif(seq, ['FY','NQ']) #print seq, mot self.assertTrue(mot=='aabbaabb')
def test_alldistcount(self): """Number of distances for all distances""" comp = protprop.Composition() pp = protprop.ProteinProperties() aa_classes = ['A'] aa = aa_classes[0] for xi in range(2,10): seq = ''.join(aa*xi) #print seq dists = pp.allDistances(seq, aa) self.assertTrue(len(dists[aa])==stats.Choose(xi,2))
def test_run(self): """Composition""" comp = protprop.Composition() fname = "tmp_composition.txt" inf = open(fname, 'w') inf.write("aa\tproportion\n") for aa in translate.AAs(): inf.write("{}\t{}\n".format(aa, 1.0/20)) inf.close() inf = open(fname, 'r') comp.read(inf) self.assertAlmostEqual(comp['A'], 1.0/20) inf.close() os.remove(fname)
def test_neardist(self): """Nearest distances""" comp = protprop.Composition() pp = protprop.ProteinProperties() aa_classes = ['FY','P','NQ'] for xi in range(5): seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)]) #print seq dists = pp.nearestDistances(seq, aa_classes) #print dists hist = stats.Histogram(vals=dists['FY'], n_bins=7, min_val=-0.5,max_val=6.5) #print hist self.assertTrue(hist[1].count==2) self.assertTrue(hist[4].count==1) self.assertTrue(hist[2].count==0)
def test_alldist(self): """All distances""" comp = protprop.Composition() pp = protprop.ProteinProperties() aa_classes = ['FY','P','NQ'] for xi in range(5): seq = genMotif(aa_classes, [(0,2),(1,1),(2,2),(0,2),(1,1),(2,2)]) #print seq dists = pp.allDistances(seq, aa_classes) #print dists hist = stats.Histogram(vals=dists['FY'], n_bins=7, min_val=-0.5,max_val=6.5) #print hist self.assertTrue(hist[1].count==2) self.assertTrue(hist[4].count==1) self.assertTrue(hist[2].count==0) answer = [1, 5, 6, 4, 5, 1] for (a,b) in zip(dists['FY'], answer): self.assertTrue(a==b)
outf = file(options.out_fname, 'w') data_outs.addStream(outf) else: # By default, write to stdout data_outs.addStream(sys.stdout) # Write out parameters data_outs.write("# Run started {}\n".format(util.timestamp())) data_outs.write("# Command: {}\n".format(' '.join(sys.argv))) data_outs.write("# Parameters:\n") optdict = vars(options) for (k, v) in optdict.items(): data_outs.write("#\t{k}: {v}\n".format(k=k, v=v)) # Composition to search for composition = protprop.Composition() if not options.motif is None: composition.initFromSequence(options.motif) else: fname = os.path.expanduser(options.composition_fname) if not os.path.isfile(fname): raise IOError("# Error: file {} does not exist".format(fname)) with file(fname, 'r') as inf: composition.read(inf) # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format( options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) if options.translate_sequences: