コード例 #1
0
ファイル: cdhit_goTest.py プロジェクト: bsmithers/hpf
	def setUp(self):
		print "in cdhit test setup"
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq_cdHit.fasta"
		self.my_blast_outfile = "./createdbs/data/test_seq_cdHit.blast.xml"
		#self.my_blast_exe = "/usr/bin/blastall"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.processors = 3

		self.my_fasta_file = "./createdbs/data/test_seq_cdHit_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5

		sql_user="******"
		sql_password="******"
		sql_host="localhost"


		#kdrew: compare to command line
		#blastall -p "blastp" -d /home/kdrew/astral/1.75/astral95.1.75 -i /home/kdrew/scripts/function_prediction_python/createdbs/data/test_seq.fasta -m 7 -e 1e-8

		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold, blast_processors=self.processors)
		
		self.records = self.ba.runBlast()
		print "after blast"

		self.filtered = self.ba.filterBlast(self.records)
		print "after filter"

		self.ba.writeFasta(self.filtered)

		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP'], db_name="mygo")
		self.mg.connect()
		print "conneted to mygo"

		self.mgIEA = Mygo(sql_user,sql_password, sql_host, e_codes =['IEA'], db_name="mygo")
		self.mgIEA.connect()

		#kdrew: compare to command line
		#~/cd-hit/cd-hit/cd-hit -i test_seq_filtered.fasta -o test_seq_filtered.cd_hit.out -c .95 -s .5

		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		
		self.cd.runCDHit()

		self.cd.printCDHit()
コード例 #2
0
ファイル: mygoTest.py プロジェクト: dpenfoldbrown/hpf
	def setUp(self):
		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"

		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =('TAS','IDA','IMP'), db_name="mygo")
		self.mg.connect()
コード例 #3
0
ファイル: funcPred_analysis.py プロジェクト: bsmithers/hpf
	def __init__(self, config_file=None, section=None):
		config = ConfigParser.RawConfigParser()
		config.read(config_file)

		if None == config_file:
			self._default_init()
			return

		#kdrew: database parameters
		self.sql_user= config.get(section, 'sql_user')
		self.sql_password= config.get(section, 'sql_password')
		self.new_mygo_host= config.get(section, 'new_mygo_host')
		self.new_mygo_db = config.get(section, 'new_mygo_db')
		self.new_annotations_table = config.get(section, 'new_annotations_table')
		self.old_mygo_host= config.get(section, 'old_mygo_host')
		self.old_mygo_db = config.get(section, 'old_mygo_db')
		self.old_annotations_table = config.get(section, 'old_annotations_table')

		self.sequence_host = config.get(section, 'sequence_host')
		self.sequence_db = config.get(section, 'sequence_db')
		self.sequence_table = config.get(section, 'sequence_table')
		self.domain_table = config.get(section, 'domain_table')

		self.prediction_host = config.get(section, 'prediction_host')
		self.prediction_db = config.get(section, 'prediction_db')
		self.prediction_table = config.get(section, 'prediction_table')

		#kdrew: mygo parameters
		self.evidence_codes_str = config.get(section,'evidence_codes')
		self.evidence_codes = list(self.evidence_codes_str.split(','))
		self.term_types_str = config.get(section,'term_types')
		self.term_types = list(self.term_types_str.split(','))

		self.pls_min = config.getfloat(section,'pls_min')
		self.base_max = config.getfloat(section,'base_max')
		self.single_domain = config.getboolean(section,'single_domain')

		self.conn = MySQLdb.connect(host=self.sequence_host, user=self.sql_user, passwd=self.sql_password, db=self.sequence_db)
		self.pred_conn = MySQLdb.connect(host=self.prediction_host, user=self.sql_user, passwd=self.sql_password, db=self.prediction_db)
		self.new_mg = Mygo(self.sql_user,self.sql_password, self.new_mygo_host, e_codes = self.evidence_codes, db_name=self.new_mygo_db)
		self.new_mg.connect()
		self.old_mg = Mygo(self.sql_user,self.sql_password, self.old_mygo_host, e_codes = self.evidence_codes, db_name=self.old_mygo_db)
		self.old_mg.connect()
コード例 #4
0
ファイル: mygoTest.py プロジェクト: dpenfoldbrown/hpf
class SimpleMygoRunTestCase(unittest.TestCase):
	def setUp(self):
		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"

		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =('TAS','IDA','IMP'), db_name="mygo")
		self.mg.connect()


	def testDBConnect(self):
		assert self.mg.isConnected(), 'did not connect to db'

	def testNumTerms(self):
		self.terms = self.mg.getGoTerms([1605014])
		self.terms.print_terms()
		print "number of terms: ", len(self.terms)
		assert 21 == len(self.terms), 'wrong number of terms'

	def testAllNumTerms(self):
		self.all_terms = self.mg.get_all_terms()
		assert 20608 == len(self.all_terms), 'wrong number of terms'
コード例 #5
0
ファイル: diff_annotations.py プロジェクト: bsmithers/hpf
	def __init__(self, config_file=None, section=None):
		config = ConfigParser.RawConfigParser()
		config.read(config_file)

		if None == config_file:
			self._default_init()
			return

		#kdrew: database parameters
		self.sql_user= config.get(section, 'sql_user')
		self.sql_password= config.get(section, 'sql_password')
		self.mygo_host= config.get(section, 'mygo_host')
		self.mygo_previous_db = config.get(section, 'mygo_previous_db')
		self.mygo_recent_db = config.get(section, 'mygo_recent_db')

		#kdrew: store table parameters
		self.store_host = config.get(section, 'store_host')
		self.store_db = config.get(section, 'store_db')
		self.store_table_name = config.get(section, 'store_table_name')
		self.store_source = config.get(section, 'store_source')
		self.store_evidence_code = config.get(section, 'store_evidence_code')

		self.annotation_host= config.get(section, 'annotation_host')
		self.annotation_db = config.get(section, 'annotation_db')
		self.annotation_table_recent = config.get(section, 'annotation_table_name_recent')
		self.annotation_table_previous = config.get(section, 'annotation_table_name_previous')

		self.previous_source = config.get(section, 'previous_source')
		self.recent_source = config.get(section, 'recent_source')
		
		#kdrew: mygo parameters
		self.recent_evidence_codes_str = config.get(section,'recent_evidence_codes')
		self.recent_evidence_codes = list(self.recent_evidence_codes_str.split(','))
		self.previous_evidence_codes_str = config.get(section,'previous_evidence_codes')
		self.previous_evidence_codes = list(self.previous_evidence_codes_str.split(','))
		self.term_types_str = config.get(section,'term_types')
		self.term_types = list(self.term_types_str.split(','))

		self.conn = MySQLdb.connect(host=self.annotation_host, user=self.sql_user, passwd=self.sql_password, db=self.annotation_db)
		self.store_conn = MySQLdb.connect(host=self.store_host, user=self.sql_user, passwd=self.sql_password, db=self.store_db)
		self.justStore = GO_IEA_Just_Store(self.store_conn, self.store_table_name, self.store_evidence_code, self.store_source)
		self.mg = Mygo(self.sql_user,self.sql_password, self.mygo_host, e_codes = self.recent_evidence_codes, db_name=self.mygo_recent_db)
		self.mg.connect()
コード例 #6
0
ファイル: hddb_goIEA.py プロジェクト: dpenfoldbrown/hpf
	def __init__(self, config_file=None, section=None):
		config = ConfigParser.RawConfigParser()
		config.read(config_file)

		if None == config_file:
			self._default_init()
			return

		#kdrew: blast parameters
		self.my_blast_db = config.get(section, 'blast_db')
		self.my_blast_file = config.get(section, 'blast_file')
		self.my_blast_outfile = config.get(section, 'blast_outfile')
		self.my_blast_exe = config.get(section, 'blast_exe')
		self.e_value_threshold = config.getfloat(section, 'blast_e_value_threshold')
		self.length_threshold = config.getfloat(section, 'blast_length_threshold')
		self.processors = config.getint(section, 'blast_processors')
		self.multi_hits = config.getboolean(section, 'multi_hits')

		self.blast_checkpoint =  config.getboolean(section,'blast_checkpoint')
		self.filter_checkpoint =  config.getboolean(section,'filter_checkpoint')

		#kdrew: database parameters
		self.sql_user= config.get(section, 'sql_user')
		self.sql_password= config.get(section, 'sql_password')
		self.mygo_host= config.get(section, 'mygo_host')
		self.mygo_db = config.get(section, 'mygo_db')
		self.store_host= config.get(section, 'store_host')
		self.store_db = config.get(section, 'store_db')

		#kdrew: store table parameters
		self.store_table = config.get(section, 'store_table_name')

		self.store_evidence_code = config.get(section, 'store_evidence_code')
		self.store_source = config.get(section, 'store_source')
		
		#kdrew: mygo parameters
		self.evidence_codes_str = config.get(section,'evidence_codes')
		self.evidence_codes = list(self.evidence_codes_str.split(','))

		self.conn = MySQLdb.connect(host=self.store_host, user=self.sql_user, passwd=self.sql_password, db=self.store_db)
		self.mg = Mygo(self.sql_user,self.sql_password, self.mygo_host, e_codes = self.evidence_codes, db_name=self.mygo_db)
		self.mg.connect()
コード例 #7
0
ファイル: log_ratiosTest.py プロジェクト: bsmithers/hpf
	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()
		self.log_ratio_metric = hpf.function.metric.LogRatios()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		del self.clstr_terms
		self.prob_metric.compute_metric(self.freq_metric)
		self.log_ratio_metric.compute_metric(self.prob_metric)

		log_ratio_table_name = "test_log_ratio"

		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.log_ratio_metric.upload_metric(self.conn, log_ratio_table_name, delete_table=True)
コード例 #8
0
class SimpleMIRunTestCase(unittest.TestCase):
		
	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()
		self.mi_metric = hpf.function.metric.MutualInformation()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		self.prob_metric.compute_metric(self.freq_metric)
		self.mi_metric.compute_metric(self.prob_metric)

		mi_table_name = "test_mutual_info"
		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.mi_metric.upload_metric(self.conn, mi_table_name, delete_table=True)


	def testMI(self):
		#kdrew: compute probs ("a.51.1", "GO:0043227") from scratch to calculate mutual information 
		acc_prob = ((3)/(4+TINY_NUM))
		acc2_prob = ((2)/(4+TINY_NUM))
		not_acc_prob = ((1)/(4+TINY_NUM))
		not_acc2_prob = ((2)/(4+TINY_NUM))

		acc_acc2_prob = ((2)/(3+TINY_NUM)) * acc_prob
		not_acc_acc2_prob =((0)/(1+TINY_NUM)) * not_acc_prob
		acc_not_acc2_prob =((1)/(2+TINY_NUM)) * not_acc2_prob

		#kdrew: mutual information calculation
		x = (acc_acc2_prob * math.log(acc_acc2_prob/(acc_prob*acc2_prob+TINY_NUM)+TINY_NUM)) + (not_acc_acc2_prob * math.log(not_acc_acc2_prob/(not_acc_prob*acc2_prob+TINY_NUM)+TINY_NUM))
		#x += (acc_not_acc2_prob * math.log(acc_not_acc2_prob/(acc_prob*not_acc2_prob+TINY_NUM)+TINY_NUM))

		print "MI(a.51.1,GO:0043227) test: ", x, " compute: ", self.mi_metric.get_metric("a.51.1", "GO:0043227")

		assert  round(x,3) == round(self.mi_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong mutual information"

		#kdrew: compute probs ("all", "all") from scratch to calculate mutual information 
		acc_prob = ((4)/(4+TINY_NUM))
		acc2_prob = ((4)/(4+TINY_NUM))
		not_acc_prob = ((0)/(4+TINY_NUM))
		not_acc2_prob = ((0)/(4+TINY_NUM))

		acc_acc2_prob = ((4)/(4+TINY_NUM)) * ((4)/(4+TINY_NUM))
		not_acc_acc2_prob = ((0)/(4+TINY_NUM)) * acc_prob
		acc_not_acc2_prob = ((0)/(4+TINY_NUM)) * acc_prob

		x = (acc_acc2_prob * math.log(acc_acc2_prob/(acc_prob*acc2_prob+TINY_NUM)+TINY_NUM)) + (not_acc_acc2_prob * math.log(not_acc_acc2_prob/(not_acc_prob*acc2_prob+TINY_NUM)+TINY_NUM))
		#x += (acc_not_acc2_prob * math.log(acc_not_acc2_prob/(acc_prob*not_acc2_prob)))

		print "MI(all,all) test: ", x, " compute: ", self.mi_metric.get_metric("all", "all")

		assert  round(x,3) == round(self.mi_metric.get_metric(Term(a="all"), Term(a="all")),3), "wrong mutual information"

	def testTheoryMITerm(self):
		assert round(0.0,3) == round(self.mi_metric.get_metric("all", "all"),3), "wrong mutual information"
		assert round(0.0,3) == round(self.mi_metric.get_metric("GO:0000133", "GO:0000135"),3), "wrong mutual information"

	def testTheoryMISF(self):
		assert round(0.14384,3) == round(self.mi_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong mutual information"
		assert round(0.04247,3) == round(self.mi_metric.get_metric("GO:0043227", "a.51.1"),3), "wrong mutual information"
		assert round(0.21576,3) == round(self.mi_metric.get_metric("a.51.1","a.51.1"),3), "wrong mutual information"
コード例 #9
0
ファイル: cdhit_goTest.py プロジェクト: bsmithers/hpf
class SimpleCDHitRunTestCase(unittest.TestCase):
	def setUp(self):
		print "in cdhit test setup"
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq_cdHit.fasta"
		self.my_blast_outfile = "./createdbs/data/test_seq_cdHit.blast.xml"
		#self.my_blast_exe = "/usr/bin/blastall"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.processors = 3

		self.my_fasta_file = "./createdbs/data/test_seq_cdHit_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5

		sql_user="******"
		sql_password="******"
		sql_host="localhost"


		#kdrew: compare to command line
		#blastall -p "blastp" -d /home/kdrew/astral/1.75/astral95.1.75 -i /home/kdrew/scripts/function_prediction_python/createdbs/data/test_seq.fasta -m 7 -e 1e-8

		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold, blast_processors=self.processors)
		
		self.records = self.ba.runBlast()
		print "after blast"

		self.filtered = self.ba.filterBlast(self.records)
		print "after filter"

		self.ba.writeFasta(self.filtered)

		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP'], db_name="mygo")
		self.mg.connect()
		print "conneted to mygo"

		self.mgIEA = Mygo(sql_user,sql_password, sql_host, e_codes =['IEA'], db_name="mygo")
		self.mgIEA.connect()

		#kdrew: compare to command line
		#~/cd-hit/cd-hit/cd-hit -i test_seq_filtered.fasta -o test_seq_filtered.cd_hit.out -c .95 -s .5

		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		
		self.cd.runCDHit()

		self.cd.printCDHit()

	def testFastaOut(self):
		assert 1395 == os.path.getsize(self.my_fasta_file.rpartition('.')[0]+".cd_hit.out"), 'wrong file size'

	#kdrew: function deprecated
	#def testGetGoTerms(self):

		#clstr_terms = self.cd.getClusterGoTerms(self.mg)
		#assert 0 == len(clstr_terms['196690']), 'incorrect number of terms returned'
		#assert 21 == len(clstr_terms['1605014']), 'incorrect number of terms returned'

	#	clstr_terms = self.cd.getClusterGoTerms(self.mgIEA)
	#	assert 33 == len(clstr_terms['196690']), 'incorrect number of terms returned'
	#	assert 0 == len(clstr_terms['1605014']), 'incorrect number of terms returned'

	def testGetClusters(self):

		clstr_terms = self.cd.getClusters(self.filtered, self.mg, mustHaveGO=False)
		assert 1 == len(clstr_terms['196690']), 'incorrect number of terms returned'
		assert 22 == len(clstr_terms['1605014']), 'incorrect number of terms returned'

		try:
			x = clstr_terms['3']
			assert False, 'sequence was not removed'
		except KeyError:
			assert True

		clstr_terms = self.cd.getClusters(self.filtered, self.mgIEA, mustHaveGO=False)
		assert 34 == len(clstr_terms['196690']), 'incorrect number of terms returned'
		assert 1 == len(clstr_terms['1605014']), 'incorrect number of terms returned'
コード例 #10
0
ファイル: probabilityTest.py プロジェクト: bsmithers/hpf
class SimpleProbabilityRunTestCase(unittest.TestCase):

	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		self.prob_metric.compute_metric(self.freq_metric)

		probability_table_name = "test_probability"

		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.prob_metric.upload_metric(self.conn, probability_table_name, delete_table=True)

		#print "\n\nfreq_metric:"
		#self.freq_metric.printTables()
		#print "\n\nprob_metric:"
		#self.prob_metric.printTables()
		#print "\n\nbgprob_metric:"
		#print "\n\n"
	

	def testProbTerm(self):
		print "testing probability of terms"
		print "P(all|all) ", self.prob_metric.get_metric("all", "all")
		#kdrew: P( all | all)
		#assert (4+TINY_NUM)/(4+TINY_NUM) == self.prob_metric.get_metric("all", "all"), "wrong probability"
		assert (4)/(4+TINY_NUM) == self.prob_metric.get_metric("all", "all"), "wrong probability"

		#kdrew: P( GO:0043170 | all )
		assert (1)/(4+TINY_NUM) == self.prob_metric.get_metric("all", "GO:0043170" ), "wrong probability"

		#kdrew: P( GO:0043170 | GO:0008324)
		assert (1)/(3+TINY_NUM) == self.prob_metric.get_metric(Term(a="GO:0008324"), Term(a="GO:0043170") ), "wrong probability"

		#kdrew: P( GO:0000135 | GO:0000133 ) testing something not there
		assert TINY_NUM == self.prob_metric.get_metric(Term(a="GO:0000133"), Term(a="GO:0000135")), "wrong probability"

	def testProbSF(self):
		#kdrew: P( GO:0043227 | a.51.1 )
		#assert (2+TINY_NUM)/(3+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong probability"
		assert (2)/(3+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong probability"

		#kdrew: P( GO:0001530 | a.51.1 )
		assert TINY_NUM == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0001530")), "wrong probability"

		#kdrew: P( all | not a.51.1 )
		print "P(all|not a.51.1): ",self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()), Term(a="all"))
		assert round(1.0,3) == round(self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()), Term(a="all")),3), "wrong probability"
		#assert (1)/(1+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()), Term(a="all")), "wrong probability"


		#kdrew: P( not a.51.1 | all )
		print "P(nota.51.1|all): ", self.prob_metric.get_metric(Term(a="all"), SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()))
		assert (1.0 - (3.0/(4+TINY_NUM))) == self.prob_metric.get_metric(Term(a="all"), SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id())), "wrong probability"

		#kdrew: P( a.51.1 | a.51.1 )
		assert (3)/(3+TINY_NUM) == self.prob_metric.get_metric("a.51.1","a.51.1"), "wrong probability"

	def testBGProbSF(self):
		#kdrew: P( a.51.1 )
		print "P(a.51.1): ", self.prob_metric.get_metric("a.51.1",None)
		assert (3)/(4+TINY_NUM) == self.prob_metric.get_metric("a.51.1",None), "wrong probability"
		#kdrew: P( not a.51.1 )
		print "not a.51.1: ", self.prob_metric.get_metric(get_not_id("a.51.1"),None)
		assert 1.0 - (3)/(4+TINY_NUM) == self.prob_metric.get_metric(get_not_id("a.51.1"),None), "wrong probability"

		#kdrew: P( a.51.1 )
		assert (3)/(4+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"),None), "wrong probability"
		#kdrew: P( not a.51.1 )
		assert 1.0 - (3)/(4+TINY_NUM) == self.prob_metric.get_metric(SuperfamilyEntry(SuperfamilyEntry(sf_id = "a.51.1").get_not_id()),None), "wrong probability"

	def testBGProbMF(self):
		#kdrew: P( GO:0043227 )
		assert (2)/(4+TINY_NUM) == self.prob_metric.get_metric( Term(a="GO:0043227"),None), "wrong probability"
		#kdrew: P( not GO:0043227 )
		assert 1.0 - (2)/(4+TINY_NUM) == self.prob_metric.get_metric( Term(Term(a="GO:0043227").get_not_id()),None), "wrong probability"
		#kdrew: P( GO:0001530 )
		assert TINY_NUM == self.prob_metric.get_metric( Term(a="GO:0001530"),None), "wrong probability"

	def testPseudoCountProb(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)

		x = (((2)/(3+TINY_NUM)) * 2) + (((2)/(4+TINY_NUM)) * pseudo_count_test)
		x = x/(2+pseudo_count_test+TINY_NUM)
		assert x == self.prob_metric2.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong probability"
		assert x == self.prob_metric2.get_metric("a.51.1", "GO:0043227"), "wrong probability"

		x = (((4)/(4+TINY_NUM)) * 4) + (((4)/(4+TINY_NUM)) * pseudo_count_test)
		x = x/(4+pseudo_count_test+TINY_NUM)
		assert x == self.prob_metric2.get_metric("all","all"), "wrong probability"

		#kdrew: P( GO:0001530 | a.51.1 )
		assert TINY_NUM == self.prob_metric2.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0001530")), "wrong probability"

	def testPseudoCountProb(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)

		keys = self.prob_metric2.get_all_ids()
		for key in keys:
			assert 0.99 < self.prob_metric2.get_metric(key[0],"all")	
		

	def testTheoryProbTerm(self):
		assert round(1.0,3) == round(self.prob_metric.get_metric("all", "all"),3), "wrong probability"
		assert round(0.0,3) == round(self.prob_metric.get_metric("all", get_not_id("all")),3), "wrong probability"
		print "all: ", self.prob_metric.get_metric("all",None)
		print "all|all: ", self.prob_metric.get_metric("all","all")
		print "all|notall: ", self.prob_metric.get_metric(get_not_id("all"),"all")
		assert round(0.0,3) == round(self.prob_metric.get_metric(get_not_id("all"),"all"),3), "wrong probability"
		assert round(0.25,3) == round(self.prob_metric.get_metric("all", "GO:0043170"),3) , "wrong probability"
		assert round(0.3333,3) == round(self.prob_metric.get_metric("GO:0008324", "GO:0043170"),3), "wrong probability"
		assert round(0.0,3) == round(self.prob_metric.get_metric("GO:0000133", "GO:0000135"),3), "wrong probability"

	def testTheoryProbSF(self):
		assert round(0.66666,3) == round(self.prob_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong probability"
		assert round(0.5,3) == round(self.prob_metric.get_metric(get_not_id("GO:0043227"),"a.51.1"),3), "wrong probability"
		assert round(0.0,3) == round(self.prob_metric.get_metric("a.51.1", "GO:0001530"),3), "wrong probability"
		assert round(1.0,3) == round(self.prob_metric.get_metric(get_not_id("a.51.1"), "all"),3), "wrong probability"
		assert round(0.25,3) == round(self.prob_metric.get_metric("all", get_not_id("a.51.1")),3), "wrong probability"
		assert round(1.0,3) == round(self.prob_metric.get_metric("a.51.1","a.51.1"),3), "wrong probability"

	def testTheoryPseudoCountProb(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)
		#self.freq_metric.printTables()
		probability_table_name = "test_pseudo_probability"
		self.prob_metric2.upload_metric(self.conn, probability_table_name, delete_table=True)

		print "psuedo_count P(GO:0043227|a.51.1): ", self.prob_metric2.get_metric("a.51.1", "GO:0043227")
		assert round(0.5833,3) == round(self.prob_metric2.get_metric("a.51.1", "GO:0043227"),3), "wrong probability"

		assert round(1.0,3) == round(self.prob_metric2.get_metric("all", "all"),3), "wrong probability"

		#kdrew: P( GO:0001530 | a.51.1 )
		assert round(0.0,3) == round(self.prob_metric2.get_metric("a.51.1", "GO:0001530"),3), "wrong probability"


	def testSanity(self):
		pseudo_count_test = 2
		self.prob_metric2 = hpf.function.metric.Probability(pc = pseudo_count_test)
		self.prob_metric2.compute_metric(self.freq_metric, pseudo_count=True)


		for key in self.prob_metric.get_all_ids():
			print key,": ",self.prob_metric2.get_metric(key[0],key[1])
			if 1 <= self.prob_metric2.get_metric(key[0],key[1]):
				print "over one ", key,": ",self.prob_metric2.get_metric(key[0],key[1])
			if 0 >= self.prob_metric2.get_metric(key[0],key[1]):
				print "under zero ", key,": ",self.prob_metric2.get_metric(key[0],key[1])
				print "key0: ", self.prob_metric2.get_metric(key[0])
				print "key1: ", self.prob_metric2.get_metric(key[1])
				print "P(1|not0): ", self.prob_metric.get_metric(key[1])
			assert 1 >= self.prob_metric2.get_metric(key[0],key[1]), "metric larger than 1: "
			assert 0 <= self.prob_metric2.get_metric(key[0],key[1]), "metric smaller than 0: "
コード例 #11
0
ファイル: hddb_goIEA.py プロジェクト: dpenfoldbrown/hpf
class GO_IEA():

	def __init__(self, config_file=None, section=None):
		config = ConfigParser.RawConfigParser()
		config.read(config_file)

		if None == config_file:
			self._default_init()
			return

		#kdrew: blast parameters
		self.my_blast_db = config.get(section, 'blast_db')
		self.my_blast_file = config.get(section, 'blast_file')
		self.my_blast_outfile = config.get(section, 'blast_outfile')
		self.my_blast_exe = config.get(section, 'blast_exe')
		self.e_value_threshold = config.getfloat(section, 'blast_e_value_threshold')
		self.length_threshold = config.getfloat(section, 'blast_length_threshold')
		self.processors = config.getint(section, 'blast_processors')
		self.multi_hits = config.getboolean(section, 'multi_hits')

		self.blast_checkpoint =  config.getboolean(section,'blast_checkpoint')
		self.filter_checkpoint =  config.getboolean(section,'filter_checkpoint')

		#kdrew: database parameters
		self.sql_user= config.get(section, 'sql_user')
		self.sql_password= config.get(section, 'sql_password')
		self.mygo_host= config.get(section, 'mygo_host')
		self.mygo_db = config.get(section, 'mygo_db')
		self.store_host= config.get(section, 'store_host')
		self.store_db = config.get(section, 'store_db')

		#kdrew: store table parameters
		self.store_table = config.get(section, 'store_table_name')

		self.store_evidence_code = config.get(section, 'store_evidence_code')
		self.store_source = config.get(section, 'store_source')
		
		#kdrew: mygo parameters
		self.evidence_codes_str = config.get(section,'evidence_codes')
		self.evidence_codes = list(self.evidence_codes_str.split(','))

		self.conn = MySQLdb.connect(host=self.store_host, user=self.sql_user, passwd=self.sql_password, db=self.store_db)
		self.mg = Mygo(self.sql_user,self.sql_password, self.mygo_host, e_codes = self.evidence_codes, db_name=self.mygo_db)
		self.mg.connect()

	def goIEA(self):

		self.ba = GOBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold, blast_processors=self.processors, multi_hits=self.multi_hits)

		if self.blast_checkpoint:
			self.records = self.ba.runBlast()
		#kdrew: a little kludgey, if not running blast but are going to filter from blast file, read in the blast out file
		elif self.filter_checkpoint:
			outfile_handle = open(self.my_blast_outfile)
			self.records = self.ba.runBlast(result_handle=outfile_handle)

		for blast_record in self.records:
			filtered_record = self.ba.filterBlastOne(blast_record)
			if None != filtered_record:
				if self.multi_hits:
					seqs = list()
					for f_rec in filtered_record:
						seqs.append(f_rec.hit_id)
					terms = self.mg.getGoTerms(seqs, full=True, ancestors=False)
					self.store_terms(filtered_record[0].query_id, terms, seqs)
				else:
					print filtered_record.hit_id
					terms = self.mg.getGoTerms(filtered_record.hit_id, full=True, ancestors=False)
					self.store_terms(filtered_record.query_id, terms, [filtered_record.hit_id])


	def store_terms(self, query_id, terms, hit_seqs):
		cursor = self.conn.cursor(MySQLdb.cursors.DictCursor)
		create_query = """CREATE TABLE IF NOT EXISTS """+self.store_table+""" (
		  id int(11) NOT NULL auto_increment,
		  sequence_key int(11) NOT NULL default '0',
		  domain_sequence_key int(11) NOT NULL,
		  acc varchar(20) NOT NULL default '',
		  name varchar(255) NOT NULL,
		  term_type enum('molecular_function','biological_process','cellular_component') NOT NULL,
		  evidence_code varchar(50) NOT NULL default '',
		  xref_dbname varchar(50) NOT NULL,
		  xref_key varchar(50) NOT NULL,
		  probability double NOT NULL,
		  llr double NOT NULL,
		  level int(11) NOT NULL,
		  source varchar(50) NOT NULL default 'unknown',
		  hit_seqs mediumtext NOT NULL default '',
		  insert_date date default NULL,
		  timestamp timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
		  PRIMARY KEY  (id),
		  UNIQUE KEY sequence_key (sequence_key,domain_sequence_key,acc,evidence_code,source),
		  KEY evidence_code (evidence_code)
		)"""
		cursor.execute(create_query)

		query = "INSERT INTO "+self.store_table+" (sequence_key, acc, name, term_type, evidence_code, source,hit_seqs, insert_date) VALUES (%s, %s,%s,%s,%s,%s,%s,%s)"

		query_list = list()
		for term in terms:
			query_list.append((query_id, term.acc, term.name, term.type, self.store_evidence_code, self.store_source,",".join(map(str,hit_seqs)), datetime.date.today()))

		while query_list:
			small_data, query_list = query_list[:EXECUTEMANY_SIZE], query_list[EXECUTEMANY_SIZE:]
			cursor.executemany(query,small_data)
コード例 #12
0
ファイル: funcPred_analysis.py プロジェクト: bsmithers/hpf
class FuncPred_Analysis():

	def __init__(self, config_file=None, section=None):
		config = ConfigParser.RawConfigParser()
		config.read(config_file)

		if None == config_file:
			self._default_init()
			return

		#kdrew: database parameters
		self.sql_user= config.get(section, 'sql_user')
		self.sql_password= config.get(section, 'sql_password')
		self.new_mygo_host= config.get(section, 'new_mygo_host')
		self.new_mygo_db = config.get(section, 'new_mygo_db')
		self.new_annotations_table = config.get(section, 'new_annotations_table')
		self.old_mygo_host= config.get(section, 'old_mygo_host')
		self.old_mygo_db = config.get(section, 'old_mygo_db')
		self.old_annotations_table = config.get(section, 'old_annotations_table')

		self.sequence_host = config.get(section, 'sequence_host')
		self.sequence_db = config.get(section, 'sequence_db')
		self.sequence_table = config.get(section, 'sequence_table')
		self.domain_table = config.get(section, 'domain_table')

		self.prediction_host = config.get(section, 'prediction_host')
		self.prediction_db = config.get(section, 'prediction_db')
		self.prediction_table = config.get(section, 'prediction_table')

		#kdrew: mygo parameters
		self.evidence_codes_str = config.get(section,'evidence_codes')
		self.evidence_codes = list(self.evidence_codes_str.split(','))
		self.term_types_str = config.get(section,'term_types')
		self.term_types = list(self.term_types_str.split(','))

		self.pls_min = config.getfloat(section,'pls_min')
		self.base_max = config.getfloat(section,'base_max')
		self.single_domain = config.getboolean(section,'single_domain')

		self.conn = MySQLdb.connect(host=self.sequence_host, user=self.sql_user, passwd=self.sql_password, db=self.sequence_db)
		self.pred_conn = MySQLdb.connect(host=self.prediction_host, user=self.sql_user, passwd=self.sql_password, db=self.prediction_db)
		self.new_mg = Mygo(self.sql_user,self.sql_password, self.new_mygo_host, e_codes = self.evidence_codes, db_name=self.new_mygo_db)
		self.new_mg.connect()
		self.old_mg = Mygo(self.sql_user,self.sql_password, self.old_mygo_host, e_codes = self.evidence_codes, db_name=self.old_mygo_db)
		self.old_mg.connect()

	#kdrew: find difference beween terms using set theory
	#def diff_terms(self, recent_terms, previous_terms):
	#	recent_set = set(recent_terms)
	#	previous_set = set(previous_terms)		
	#	return list(recent_set - previous_set)

	def funcpred_analysis(self):
		cursor = self.conn.cursor(MySQLdb.cursors.DictCursor)

		#kdrew: get domains from diff tables (eg. hddb_IEA_mygoLite_062005_062009)
		domains = NewAnnotatedDomains(self.conn, self.sequence_table, self.domain_table, self.evidence_codes)
		domains.load_domains()

		for domain in domains:

			#kdrew: if single domain flag is set and parent and domain sequence keys do not match, move to the next domain
			#kdrew: generally when parent sequence key and domain sequence key are not the same the protein is a single domain protein
			if self.single_domain and domain.get_parent_sequence_key() != domain.get_domain_sequence_key():
				continue

			print domain.get_parent_sequence_key(), domain.get_domain_sequence_key()

		#kdrew: get_old_annotations
			old_anns = Terms(ecode=self.evidence_codes)
			old_anns.load_terms(conn=self.old_mg.get_conn(), psk=domain.get_parent_sequence_key(), go_type=self.term_types, term_table=self.old_annotations_table)
			#print "old annotations: "
			#old_anns.print_terms()
		#kdrew: get_new_annotations
			new_anns = Terms(ecode=self.evidence_codes)
			new_anns.load_terms(self.new_mg.get_conn(), domain.get_parent_sequence_key(), self.term_types, term_table=self.new_annotations_table)
			#print "new annotations: "
			#new_anns.print_terms()
		#kdrew: get_predictions
			preds = Predictions(conn=self.pred_conn, table=self.prediction_table)
			preds.load_predictions(domain.get_parent_sequence_key(), domain.get_domain_sequence_key())

		#kdrew: predictions_trim = predictions - old_annotations
			predictions_trim = Predictions()
			#kdrew: produces a list of every prediction that is not in the old annotations
			predictions_trim.extend(list((x) for x in preds if x.function not in old_anns))
			#print "predictions_trim: ", predictions_trim.filter(pls_llr=0, base_llr=-2)

		#kdrew: new_annotations_trim = new_annotations - old_annotations
			#kdrew: produces a list of only the new annotations that are truely new, not been seen before
			new_annotations_trim = list(set(new_anns) - set(old_anns))
			#print "new annotations trim: ", new_annotations_trim

		#kdrew: predictions_trim & new_annotations_trim
			new_ann_trim_tmp = list((x) for x in new_annotations_trim if predictions_trim.getByAcc(x.acc))
			#print "new_ann_trim_tmp: ", new_ann_trim_tmp
			prediction_new_annotation_intersection = Predictions()
			prediction_new_annotation_intersection.extend(list((x) for x in predictions_trim if x.function in new_annotations_trim))

			if len(prediction_new_annotation_intersection) > 0:
				print "pred & new: " , prediction_new_annotation_intersection.filter(pls_llr=self.pls_min, base_llr=self.base_max)
コード例 #13
0
ファイル: diff_annotations.py プロジェクト: bsmithers/hpf
class DiffAnnotations():

	def __init__(self, config_file=None, section=None):
		config = ConfigParser.RawConfigParser()
		config.read(config_file)

		if None == config_file:
			self._default_init()
			return

		#kdrew: database parameters
		self.sql_user= config.get(section, 'sql_user')
		self.sql_password= config.get(section, 'sql_password')
		self.mygo_host= config.get(section, 'mygo_host')
		self.mygo_previous_db = config.get(section, 'mygo_previous_db')
		self.mygo_recent_db = config.get(section, 'mygo_recent_db')

		#kdrew: store table parameters
		self.store_host = config.get(section, 'store_host')
		self.store_db = config.get(section, 'store_db')
		self.store_table_name = config.get(section, 'store_table_name')
		self.store_source = config.get(section, 'store_source')
		self.store_evidence_code = config.get(section, 'store_evidence_code')

		self.annotation_host= config.get(section, 'annotation_host')
		self.annotation_db = config.get(section, 'annotation_db')
		self.annotation_table_recent = config.get(section, 'annotation_table_name_recent')
		self.annotation_table_previous = config.get(section, 'annotation_table_name_previous')

		self.previous_source = config.get(section, 'previous_source')
		self.recent_source = config.get(section, 'recent_source')
		
		#kdrew: mygo parameters
		self.recent_evidence_codes_str = config.get(section,'recent_evidence_codes')
		self.recent_evidence_codes = list(self.recent_evidence_codes_str.split(','))
		self.previous_evidence_codes_str = config.get(section,'previous_evidence_codes')
		self.previous_evidence_codes = list(self.previous_evidence_codes_str.split(','))
		self.term_types_str = config.get(section,'term_types')
		self.term_types = list(self.term_types_str.split(','))

		self.conn = MySQLdb.connect(host=self.annotation_host, user=self.sql_user, passwd=self.sql_password, db=self.annotation_db)
		self.store_conn = MySQLdb.connect(host=self.store_host, user=self.sql_user, passwd=self.sql_password, db=self.store_db)
		self.justStore = GO_IEA_Just_Store(self.store_conn, self.store_table_name, self.store_evidence_code, self.store_source)
		self.mg = Mygo(self.sql_user,self.sql_password, self.mygo_host, e_codes = self.recent_evidence_codes, db_name=self.mygo_recent_db)
		self.mg.connect()

	#kdrew: find difference beween terms using set theory
	def diff_terms(self, recent_terms, previous_terms):
		recent_set = set(recent_terms)
		previous_set = set(previous_terms)		
		return list(recent_set - previous_set)

	def diff(self):
		cursor = self.conn.cursor(MySQLdb.cursors.DictCursor)

		#kdrew: get annotations from recent
		#kdrew: get annotations from previous
		#kdrew: compare

		previous_codes_fmt = "'"+"','".join(self.previous_evidence_codes)+"'"
		print self.previous_evidence_codes 
		print previous_codes_fmt
		recent_codes_fmt = "'"+"','".join(self.recent_evidence_codes)+"'"
		print self.recent_evidence_codes 
		print recent_codes_fmt
		type_fmt = "'"+"','".join(self.term_types)+"'"
		print self.term_types
		print type_fmt

		#kdrew: get all sequence keys with a specific term_type (ie. molecular_function) and a given evidence code
		query_seqs = """select distinct hig.sequence_key from """+self.annotation_table_recent+""" as hig where hig.term_type in (%s) and hig.evidence_code in (%s)""" % (type_fmt, recent_codes_fmt)
		print query_seqs
		cursor.execute(query_seqs)
		seqs = cursor.fetchall()
		print len(seqs)

		#kdrew: for every sequence key
		for seq in seqs:
			sequence_key = seq["sequence_key"]
			#kdrew: get all annotations for the sequence key from the earlier database
			query = """select distinct t2.acc, t2.name, t2.term_type
				from """+self.annotation_table_previous+""" as hig, """+self.mygo_previous_db+""".term as t, """+self.mygo_previous_db+""".term as t2, """+self.mygo_previous_db+""".graph_path as gp 
				where gp.term2_id = t.id and t.acc = hig.acc and t2.id = gp.term1_id and hig.term_type in (%s) and t2.term_type in (%s) and hig.evidence_code in (%s) and hig.sequence_key = %s""" % (type_fmt,type_fmt,previous_codes_fmt,str(sequence_key))
			cursor.execute(query)

			previous_term_rows = cursor.fetchall()
			previous_terms = rows2terms(previous_term_rows, self.previous_evidence_codes,full=True)

			#kdrew: get all annotations for the sequence key from the recent database
			query2 = """select distinct t2.acc, t2.name, t2.term_type
				from """+self.annotation_table_recent+""" as hig, """+self.mygo_recent_db+""".term as t, """+self.mygo_recent_db+""".term as t2, """+self.mygo_recent_db+""".graph_path as gp 
				where gp.term2_id = t.id and t.acc = hig.acc and t2.id = gp.term1_id and hig.term_type in (%s) and t2.term_type in (%s) and hig.evidence_code in (%s) and hig.sequence_key = %s""" % (type_fmt,type_fmt,recent_codes_fmt,str(sequence_key))
			cursor.execute(query2)
			recent_term_rows = cursor.fetchall()
			recent_terms = rows2terms(recent_term_rows, self.recent_evidence_codes,full=True)

			new_terms = self.diff_terms(recent_terms, previous_terms)
			print "sequence_key: ", sequence_key, " new terms: ", new_terms
			self.justStore.store_terms(sequence_key, new_terms)
コード例 #14
0
ファイル: log_ratiosTest.py プロジェクト: bsmithers/hpf
class SimpleLogRatioRunTestCase(unittest.TestCase):
		
	def setUp(self):
		self.freq_metric = hpf.function.metric.Frequency()
		self.prob_metric = hpf.function.metric.Probability()
		self.log_ratio_metric = hpf.function.metric.LogRatios()

		#kdrew: run blast filter
		self.my_blast_db = "/Users/kdrew/astral/1.75/astral95.1.75"
		self.my_blast_file = "./createdbs/data/test_seq.fasta"
		self.my_blast_exe = "/Users/patrick/.local/share/blast-2.2.18/bin/blastall"
		self.e_value_threshold = 1e-8
		self.length_threshold = .85
		self.ba = AstralBlastFilter(self.my_blast_exe, self.my_blast_db, self.my_blast_file, self.e_value_threshold, self.length_threshold)
		self.records = self.ba.runBlast()
		self.filtered = self.ba.filterBlast(self.records)

		#kdrew: run cluster
		self.my_fasta_file = "./createdbs/data/test_seq_filtered.fasta"
		self.my_cd_hit_exe = "/Users/kdrew/programs/cd-hit/cd-hit"
		self.identity_cutoff = .95
		self.length_cutoff = .5
		self.cd = CDHitGO(self.my_cd_hit_exe, self.my_fasta_file, id_c=self.identity_cutoff, len_c=self.length_cutoff)
		self.cd.runCDHit()

		sql_user="******"
		sql_password="******"
		sql_host="mcpeepants.bio.nyu.edu"
		self.mg = Mygo(sql_user,sql_password, sql_host, e_codes =['TAS','IDA','IMP','IEA'], db_name="mygo")
		self.mg.connect()
		self.clstr_terms = self.cd.getClusters(self.filtered, self.mg)

		self.freq_metric.compute_metric(self.clstr_terms)
		del self.clstr_terms
		self.prob_metric.compute_metric(self.freq_metric)
		self.log_ratio_metric.compute_metric(self.prob_metric)

		log_ratio_table_name = "test_log_ratio"

		self.conn = MySQLdb.connect(host=sql_host, user=sql_user, passwd=sql_password, db="hpf")
		self.log_ratio_metric.upload_metric(self.conn, log_ratio_table_name, delete_table=True)

	

	def testLogRatioTerm(self):
		#kdrew: first TINY_NUM is so don't divide by 0 when computing probability
		#kdrew: second TINY_NUM is so don't divide by 0 when computing log ratio
		#kdrew: third TINY_NUM is so don't take log(0)
		print "lr(all|all): " , self.log_ratio_metric.get_metric(Term(a="all"), Term(a="all"))
		print "lr(all|all) compute: ", math.log(((4)/(4+TINY_NUM))/(TINY_NUM+TINY_NUM)+TINY_NUM)
		assert math.log(((4)/(4+TINY_NUM))/(TINY_NUM+TINY_NUM)+TINY_NUM) == self.log_ratio_metric.get_metric(Term(a="all"), Term(a="all")), "wrong log ratio"
		assert math.log((TINY_NUM)) == self.log_ratio_metric.get_metric(Term(a="GO:0000133"), Term(a="GO:0000135")), "wrong log ratio"

	def testLogRatioSF(self):
		print "log_ratio (GO:0043227 | a.51.1): ", self.log_ratio_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227"))
		print "compute log_ratio (GO:0043227 | a.51.1): ", math.log(((2+TINY_NUM)/(3+TINY_NUM))/(TINY_NUM))
		assert math.log((((2)/(3+TINY_NUM))/(TINY_NUM+TINY_NUM))+TINY_NUM) == self.log_ratio_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0043227")), "wrong log ratio "
		assert math.log(TINY_NUM) == self.log_ratio_metric.get_metric(SuperfamilyEntry(sf_id = "a.51.1"), Term(a="GO:0001530")), "wrong log_ratio count"
		assert math.log(TINY_NUM) == self.log_ratio_metric.get_metric(Term(a="GO:0001530"), SuperfamilyEntry(sf_id = "a.51.1")), "wrong log_ratio count"

	def testTheoryLRTerm(self):
		assert round(15.4249,3) == round(self.log_ratio_metric.get_metric("all", "all"),3), "wrong log ratio"
		assert round(14.0386,3) == round(self.log_ratio_metric.get_metric("all", "GO:0043170"),3) , "wrong log ratio"
		assert round(0.0,3) == round(self.log_ratio_metric.get_metric("GO:0043170","all"),3) , "wrong log ratio"
		assert round(0.4054,3) == round(self.log_ratio_metric.get_metric("GO:0043170", "GO:0008324"),3), "wrong log ratio"
		assert round(-16.11809,3) == round(self.log_ratio_metric.get_metric("GO:0000133", "GO:0000135"),3), "wrong log ratio"

	def testTheoryLRSF(self):
		assert round(15.0194,3) == round(self.log_ratio_metric.get_metric("a.51.1", "GO:0043227"),3), "wrong log ratio"
		assert round(0.6931,3) == round(self.log_ratio_metric.get_metric("GO:0043227", "a.51.1"),3), "wrong log ratio"
		assert round(15.4249,3) == round(self.log_ratio_metric.get_metric("a.51.1","a.51.1"),3), "wrong log ratio"