Exemple #1
0
	def setUp(self):
		print "in cdhit test setup"
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq_cdHit.fasta"
		self.my_blast_outfile = "./createdbs/data/test_seq_cdHit.blast.xml"
		#self.my_blast_exe = "/usr/bin/blastall"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.processors = 3

		self.my_fasta_file = "./createdbs/data/test_seq_cdHit_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5

		sql_user="******"
		sql_password="******"
		sql_host="localhost"


		#kdrew: compare to command line
		#blastall -p "blastp" -d /home/kdrew/astral/1.75/astral95.1.75 -i /home/kdrew/scripts/function_prediction_python/createdbs/data/test_seq.fasta -m 7 -e 1e-8

		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold, blast_processors=self.processors)
		
		self.records = self.ba.runBlast()
		print "after blast"

		self.filtered = self.ba.filterBlast(self.records)
		print "after filter"

		self.ba.writeFasta(self.filtered)

		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP'], db_name="mygo")
		self.mg.connect()
		print "conneted to mygo"

		self.mgIEA = Mygo(sql_user,sql_password, sql_host, e_codes =['IEA'], db_name="mygo")
		self.mgIEA.connect()

		#kdrew: compare to command line
		#~/cd-hit/cd-hit/cd-hit -i test_seq_filtered.fasta -o test_seq_filtered.cd_hit.out -c .95 -s .5

		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		
		self.cd.runCDHit()

		self.cd.printCDHit()
Exemple #2
0
	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()
		self.log_ratio_metric = hpf.function.metric.LogRatios()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		del self.clstr_terms
		self.prob_metric.compute_metric(self.freq_metric)
		self.log_ratio_metric.compute_metric(self.prob_metric)

		log_ratio_table_name = "test_log_ratio"

		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.log_ratio_metric.upload_metric(self.conn, log_ratio_table_name, delete_table=True)
class SimpleMIRunTestCase(unittest.TestCase):
		
	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()
		self.mi_metric = hpf.function.metric.MutualInformation()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		self.prob_metric.compute_metric(self.freq_metric)
		self.mi_metric.compute_metric(self.prob_metric)

		mi_table_name = "test_mutual_info"
		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.mi_metric.upload_metric(self.conn, mi_table_name, delete_table=True)


	def testMI(self):
		#kdrew: compute probs ("a.51.1", "GO:0043227") from scratch to calculate mutual information 
		acc_prob = ((3)/(4+TINY_NUM))
		acc2_prob = ((2)/(4+TINY_NUM))
		not_acc_prob = ((1)/(4+TINY_NUM))
		not_acc2_prob = ((2)/(4+TINY_NUM))

		acc_acc2_prob = ((2)/(3+TINY_NUM)) * acc_prob
		not_acc_acc2_prob =((0)/(1+TINY_NUM)) * not_acc_prob
		acc_not_acc2_prob =((1)/(2+TINY_NUM)) * not_acc2_prob

		#kdrew: mutual information calculation
		x = (acc_acc2_prob * math.log(acc_acc2_prob/(acc_prob*acc2_prob+TINY_NUM)+TINY_NUM)) + (not_acc_acc2_prob * math.log(not_acc_acc2_prob/(not_acc_prob*acc2_prob+TINY_NUM)+TINY_NUM))
		#x += (acc_not_acc2_prob * math.log(acc_not_acc2_prob/(acc_prob*not_acc2_prob+TINY_NUM)+TINY_NUM))

		print "MI(a.51.1,GO:0043227) test: ", x, " compute: ", self.mi_metric.get_metric("a.51.1", "GO:0043227")

		assert  round(x,3) == round(self.mi_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong mutual information"

		#kdrew: compute probs ("all", "all") from scratch to calculate mutual information 
		acc_prob = ((4)/(4+TINY_NUM))
		acc2_prob = ((4)/(4+TINY_NUM))
		not_acc_prob = ((0)/(4+TINY_NUM))
		not_acc2_prob = ((0)/(4+TINY_NUM))

		acc_acc2_prob = ((4)/(4+TINY_NUM)) * ((4)/(4+TINY_NUM))
		not_acc_acc2_prob = ((0)/(4+TINY_NUM)) * acc_prob
		acc_not_acc2_prob = ((0)/(4+TINY_NUM)) * acc_prob

		x = (acc_acc2_prob * math.log(acc_acc2_prob/(acc_prob*acc2_prob+TINY_NUM)+TINY_NUM)) + (not_acc_acc2_prob * math.log(not_acc_acc2_prob/(not_acc_prob*acc2_prob+TINY_NUM)+TINY_NUM))
		#x += (acc_not_acc2_prob * math.log(acc_not_acc2_prob/(acc_prob*not_acc2_prob)))

		print "MI(all,all) test: ", x, " compute: ", self.mi_metric.get_metric("all", "all")

		assert  round(x,3) == round(self.mi_metric.get_metric(Term(a="all"), Term(a="all")),3), "wrong mutual information"

	def testTheoryMITerm(self):
		assert round(0.0,3) == round(self.mi_metric.get_metric("all", "all"),3), "wrong mutual information"
		assert round(0.0,3) == round(self.mi_metric.get_metric("GO:0000133", "GO:0000135"),3), "wrong mutual information"

	def testTheoryMISF(self):
		assert round(0.14384,3) == round(self.mi_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong mutual information"
		assert round(0.04247,3) == round(self.mi_metric.get_metric("GO:0043227", "a.51.1"),3), "wrong mutual information"
		assert round(0.21576,3) == round(self.mi_metric.get_metric("a.51.1","a.51.1"),3), "wrong mutual information"
Exemple #4
0
class SimpleCDHitRunTestCase(unittest.TestCase):
	def setUp(self):
		print "in cdhit test setup"
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq_cdHit.fasta"
		self.my_blast_outfile = "./createdbs/data/test_seq_cdHit.blast.xml"
		#self.my_blast_exe = "/usr/bin/blastall"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.processors = 3

		self.my_fasta_file = "./createdbs/data/test_seq_cdHit_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5

		sql_user="******"
		sql_password="******"
		sql_host="localhost"


		#kdrew: compare to command line
		#blastall -p "blastp" -d /home/kdrew/astral/1.75/astral95.1.75 -i /home/kdrew/scripts/function_prediction_python/createdbs/data/test_seq.fasta -m 7 -e 1e-8

		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold, blast_processors=self.processors)
		
		self.records = self.ba.runBlast()
		print "after blast"

		self.filtered = self.ba.filterBlast(self.records)
		print "after filter"

		self.ba.writeFasta(self.filtered)

		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP'], db_name="mygo")
		self.mg.connect()
		print "conneted to mygo"

		self.mgIEA = Mygo(sql_user,sql_password, sql_host, e_codes =['IEA'], db_name="mygo")
		self.mgIEA.connect()

		#kdrew: compare to command line
		#~/cd-hit/cd-hit/cd-hit -i test_seq_filtered.fasta -o test_seq_filtered.cd_hit.out -c .95 -s .5

		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		
		self.cd.runCDHit()

		self.cd.printCDHit()

	def testFastaOut(self):
		assert 1395 == os.path.getsize(self.my_fasta_file.rpartition('.')[0]+".cd_hit.out"), 'wrong file size'

	#kdrew: function deprecated
	#def testGetGoTerms(self):

		#clstr_terms = self.cd.getClusterGoTerms(self.mg)
		#assert 0 == len(clstr_terms['196690']), 'incorrect number of terms returned'
		#assert 21 == len(clstr_terms['1605014']), 'incorrect number of terms returned'

	#	clstr_terms = self.cd.getClusterGoTerms(self.mgIEA)
	#	assert 33 == len(clstr_terms['196690']), 'incorrect number of terms returned'
	#	assert 0 == len(clstr_terms['1605014']), 'incorrect number of terms returned'

	def testGetClusters(self):

		clstr_terms = self.cd.getClusters(self.filtered, self.mg, mustHaveGO=False)
		assert 1 == len(clstr_terms['196690']), 'incorrect number of terms returned'
		assert 22 == len(clstr_terms['1605014']), 'incorrect number of terms returned'

		try:
			x = clstr_terms['3']
			assert False, 'sequence was not removed'
		except KeyError:
			assert True

		clstr_terms = self.cd.getClusters(self.filtered, self.mgIEA, mustHaveGO=False)
		assert 34 == len(clstr_terms['196690']), 'incorrect number of terms returned'
		assert 1 == len(clstr_terms['1605014']), 'incorrect number of terms returned'
Exemple #5
0
class SimpleProbabilityRunTestCase(unittest.TestCase):

	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		self.prob_metric.compute_metric(self.freq_metric)

		probability_table_name = "test_probability"

		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.prob_metric.upload_metric(self.conn, probability_table_name, delete_table=True)

		#print "\n\nfreq_metric:"
		#self.freq_metric.printTables()
		#print "\n\nprob_metric:"
		#self.prob_metric.printTables()
		#print "\n\nbgprob_metric:"
		#print "\n\n"
	

	def testProbTerm(self):
		print "testing probability of terms"
		print "P(all|all) ", self.prob_metric.get_metric("all", "all")
		#kdrew: P( all | all)
		#assert (4+TINY_NUM)/(4+TINY_NUM) == self.prob_metric.get_metric("all", "all"), "wrong probability"
		assert (4)/(4+TINY_NUM) == self.prob_metric.get_metric("all", "all"), "wrong probability"

		#kdrew: P( GO:0043170 | all )
		assert (1)/(4+TINY_NUM) == self.prob_metric.get_metric("all", "GO:0043170" ), "wrong probability"

		#kdrew: P( GO:0043170 | GO:0008324)
		assert (1)/(3+TINY_NUM) == self.prob_metric.get_metric(Term(a="GO:0008324"), Term(a="GO:0043170") ), "wrong probability"

		#kdrew: P( GO:0000135 | GO:0000133 ) testing something not there
		assert TINY_NUM == self.prob_metric.get_metric(Term(a="GO:0000133"), Term(a="GO:0000135")), "wrong probability"

	def testProbSF(self):
		#kdrew: P( GO:0043227 | a.51.1 )
		#assert (2+TINY_NUM)/(3+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong probability"
		assert (2)/(3+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong probability"

		#kdrew: P( GO:0001530 | a.51.1 )
		assert TINY_NUM == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0001530")), "wrong probability"

		#kdrew: P( all | not a.51.1 )
		print "P(all|not a.51.1): ",self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()), Term(a="all"))
		assert round(1.0,3) == round(self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()), Term(a="all")),3), "wrong probability"
		#assert (1)/(1+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()), Term(a="all")), "wrong probability"


		#kdrew: P( not a.51.1 | all )
		print "P(nota.51.1|all): ", self.prob_metric.get_metric(Term(a="all"), SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()))
		assert (1.0 - (3.0/(4+TINY_NUM))) == self.prob_metric.get_metric(Term(a="all"), SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id())), "wrong probability"

		#kdrew: P( a.51.1 | a.51.1 )
		assert (3)/(3+TINY_NUM) == self.prob_metric.get_metric("a.51.1","a.51.1"), "wrong probability"

	def testBGProbSF(self):
		#kdrew: P( a.51.1 )
		print "P(a.51.1): ", self.prob_metric.get_metric("a.51.1",None)
		assert (3)/(4+TINY_NUM) == self.prob_metric.get_metric("a.51.1",None), "wrong probability"
		#kdrew: P( not a.51.1 )
		print "not a.51.1: ", self.prob_metric.get_metric(get_not_id("a.51.1"),None)
		assert 1.0 - (3)/(4+TINY_NUM) == self.prob_metric.get_metric(get_not_id("a.51.1"),None), "wrong probability"

		#kdrew: P( a.51.1 )
		assert (3)/(4+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"),None), "wrong probability"
		#kdrew: P( not a.51.1 )
		assert 1.0 - (3)/(4+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()),None), "wrong probability"

	def testBGProbMF(self):
		#kdrew: P( GO:0043227 )
		assert (2)/(4+TINY_NUM) == self.prob_metric.get_metric( Term(a="GO:0043227"),None), "wrong probability"
		#kdrew: P( not GO:0043227 )
		assert 1.0 - (2)/(4+TINY_NUM) == self.prob_metric.get_metric( Term(Term(a="GO:0043227").get_not_id()),None), "wrong probability"
		#kdrew: P( GO:0001530 )
		assert TINY_NUM == self.prob_metric.get_metric( Term(a="GO:0001530"),None), "wrong probability"

	def testPseudoCountProb(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)

		x = (((2)/(3+TINY_NUM)) * 2) + (((2)/(4+TINY_NUM)) * pseudo_count_test)
		x = x/(2+pseudo_count_test+TINY_NUM)
		assert x == self.prob_metric2.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong probability"
		assert x == self.prob_metric2.get_metric("a.51.1", "GO:0043227"), "wrong probability"

		x = (((4)/(4+TINY_NUM)) * 4) + (((4)/(4+TINY_NUM)) * pseudo_count_test)
		x = x/(4+pseudo_count_test+TINY_NUM)
		assert x == self.prob_metric2.get_metric("all","all"), "wrong probability"

		#kdrew: P( GO:0001530 | a.51.1 )
		assert TINY_NUM == self.prob_metric2.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0001530")), "wrong probability"

	def testPseudoCountProb(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)

		keys = self.prob_metric2.get_all_ids()
		for key in keys:
			assert 0.99 < self.prob_metric2.get_metric(key[0],"all")	
		

	def testTheoryProbTerm(self):
		assert round(1.0,3) == round(self.prob_metric.get_metric("all", "all"),3), "wrong probability"
		assert round(0.0,3) == round(self.prob_metric.get_metric("all", get_not_id("all")),3), "wrong probability"
		print "all: ", self.prob_metric.get_metric("all",None)
		print "all|all: ", self.prob_metric.get_metric("all","all")
		print "all|notall: ", self.prob_metric.get_metric(get_not_id("all"),"all")
		assert round(0.0,3) == round(self.prob_metric.get_metric(get_not_id("all"),"all"),3), "wrong probability"
		assert round(0.25,3) == round(self.prob_metric.get_metric("all", "GO:0043170"),3) , "wrong probability"
		assert round(0.3333,3) == round(self.prob_metric.get_metric("GO:0008324", "GO:0043170"),3), "wrong probability"
		assert round(0.0,3) == round(self.prob_metric.get_metric("GO:0000133", "GO:0000135"),3), "wrong probability"

	def testTheoryProbSF(self):
		assert round(0.66666,3) == round(self.prob_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong probability"
		assert round(0.5,3) == round(self.prob_metric.get_metric(get_not_id("GO:0043227"),"a.51.1"),3), "wrong probability"
		assert round(0.0,3) == round(self.prob_metric.get_metric("a.51.1", "GO:0001530"),3), "wrong probability"
		assert round(1.0,3) == round(self.prob_metric.get_metric(get_not_id("a.51.1"), "all"),3), "wrong probability"
		assert round(0.25,3) == round(self.prob_metric.get_metric("all", get_not_id("a.51.1")),3), "wrong probability"
		assert round(1.0,3) == round(self.prob_metric.get_metric("a.51.1","a.51.1"),3), "wrong probability"

	def testTheoryPseudoCountProb(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)
		#self.freq_metric.printTables()
		probability_table_name = "test_pseudo_probability"
		self.prob_metric2.upload_metric(self.conn, probability_table_name, delete_table=True)

		print "psuedo_count P(GO:0043227|a.51.1): ", self.prob_metric2.get_metric("a.51.1", "GO:0043227")
		assert round(0.5833,3) == round(self.prob_metric2.get_metric("a.51.1", "GO:0043227"),3), "wrong probability"

		assert round(1.0,3) == round(self.prob_metric2.get_metric("all", "all"),3), "wrong probability"

		#kdrew: P( GO:0001530 | a.51.1 )
		assert round(0.0,3) == round(self.prob_metric2.get_metric("a.51.1", "GO:0001530"),3), "wrong probability"


	def testSanity(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)


		for key in self.prob_metric.get_all_ids():
			print key,": ",self.prob_metric2.get_metric(key[0],key[1])
			if 1 <= self.prob_metric2.get_metric(key[0],key[1]):
				print "over one ", key,": ",self.prob_metric2.get_metric(key[0],key[1])
			if 0 >= self.prob_metric2.get_metric(key[0],key[1]):
				print "under zero ", key,": ",self.prob_metric2.get_metric(key[0],key[1])
				print "key0: ", self.prob_metric2.get_metric(key[0])
				print "key1: ", self.prob_metric2.get_metric(key[1])
				print "P(1|not0): ", self.prob_metric.get_metric(key[1])
			assert 1 >= self.prob_metric2.get_metric(key[0],key[1]), "metric larger than 1: "
			assert 0 <= self.prob_metric2.get_metric(key[0],key[1]), "metric smaller than 0: "
Exemple #6
0
class SimpleLogRatioRunTestCase(unittest.TestCase):
		
	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()
		self.log_ratio_metric = hpf.function.metric.LogRatios()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		del self.clstr_terms
		self.prob_metric.compute_metric(self.freq_metric)
		self.log_ratio_metric.compute_metric(self.prob_metric)

		log_ratio_table_name = "test_log_ratio"

		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.log_ratio_metric.upload_metric(self.conn, log_ratio_table_name, delete_table=True)

	

	def testLogRatioTerm(self):
		#kdrew: first TINY_NUM is so don't divide by 0 when computing probability
		#kdrew: second TINY_NUM is so don't divide by 0 when computing log ratio
		#kdrew: third TINY_NUM is so don't take log(0)
		print "lr(all|all): " , self.log_ratio_metric.get_metric(Term(a="all"), Term(a="all"))
		print "lr(all|all) compute: ", math.log(((4)/(4+TINY_NUM))/(TINY_NUM+TINY_NUM)+TINY_NUM)
		assert math.log(((4)/(4+TINY_NUM))/(TINY_NUM+TINY_NUM)+TINY_NUM) == self.log_ratio_metric.get_metric(Term(a="all"), Term(a="all")), "wrong log ratio"
		assert math.log((TINY_NUM)) == self.log_ratio_metric.get_metric(Term(a="GO:0000133"), Term(a="GO:0000135")), "wrong log ratio"

	def testLogRatioSF(self):
		print "log_ratio (GO:0043227 | a.51.1): ", self.log_ratio_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227"))
		print "compute log_ratio (GO:0043227 | a.51.1): ", math.log(((2+TINY_NUM)/(3+TINY_NUM))/(TINY_NUM))
		assert math.log((((2)/(3+TINY_NUM))/(TINY_NUM+TINY_NUM))+TINY_NUM) == self.log_ratio_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong log ratio "
		assert math.log(TINY_NUM) == self.log_ratio_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0001530")), "wrong log_ratio count"
		assert math.log(TINY_NUM) == self.log_ratio_metric.get_metric(Term(a="GO:0001530"), SuperfamilyEntry(sf_id = "a.51.1")), "wrong log_ratio count"

	def testTheoryLRTerm(self):
		assert round(15.4249,3) == round(self.log_ratio_metric.get_metric("all", "all"),3), "wrong log ratio"
		assert round(14.0386,3) == round(self.log_ratio_metric.get_metric("all", "GO:0043170"),3) , "wrong log ratio"
		assert round(0.0,3) == round(self.log_ratio_metric.get_metric("GO:0043170","all"),3) , "wrong log ratio"
		assert round(0.4054,3) == round(self.log_ratio_metric.get_metric("GO:0043170", "GO:0008324"),3), "wrong log ratio"
		assert round(-16.11809,3) == round(self.log_ratio_metric.get_metric("GO:0000133", "GO:0000135"),3), "wrong log ratio"

	def testTheoryLRSF(self):
		assert round(15.0194,3) == round(self.log_ratio_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong log ratio"
		assert round(0.6931,3) == round(self.log_ratio_metric.get_metric("GO:0043227", "a.51.1"),3), "wrong log ratio"
		assert round(15.4249,3) == round(self.log_ratio_metric.get_metric("a.51.1","a.51.1"),3), "wrong log ratio"