Example #1
0
    def runSSCheck(self):
        cursor = self.since_solved_conn.cursor(MySQLdb.cursors.DictCursor)

        query = """

				SELECT distinct mc.cluster_id AS cluster_id, mc.sequence_key, s.sequence, ss.ascession_date
				FROM since_solved.mcm_cluster mc, since_solved.since_solved ss, hpf.sequence as s
				WHERE mc.cluster_id = ss.cluster_id AND mc.sequence_key = s.id
					AND ss.ascession_date >= '2005-01-01'
			"""

        print query
        cursor.execute(query)
        rows = cursor.fetchall()

        for row in rows:
            if len(row["sequence"]) > 0:
                pdb_dates = get_pdb_date_Blast(
                    row["sequence"], self.e_value_threshold, self.length_threshold, self.identity_cutoff
                )
                min_date = get_min_date(pdb_dates)
                if min_date != None and self.date_cutoff > min_date[0]:
                    print row["sequence_key"], min_date, row["sequence"]
Example #2
0
	def runDateCheck(self):
		cursor = self.sql_conn.cursor(MySQLdb.cursors.DictCursor)

		#query = """
#
#				#AND ss.ascession_date >= '2005-01-01'
#
#				SELECT distinct md.sequence_key, s.sequence
#				FROM protein as p, sequence as s, domain as d, mcmData as md
#				WHERE p.sequence_key = d.parent_sequence_key and d.domain_sequence_key = md.sequence_key and d.domain_sequence_key = s.id
#					AND p.experiment_key = %s
#			"""
#
#		print query
#		cursor.execute(query, (self.experiment_key,))
#		rows = cursor.fetchall ()
#		
#		for row in rows:
#			if len(row["sequence"])  > 0:
				#pdb_dates = get_pdb_date_Blast(row["sequence"], self.e_value_threshold, self.length_threshold, self.identity_cutoff)
				
		ba = PDBBlastFilter(eval_cutoff = self.e_value_threshold, length_cutoff = self.length_threshold, identity_cutoff = self.identity_cutoff, multi_hits=True)
		outfile_handle = open(self.my_blast_outfile)
		blast_records = NCBIXML.parse(outfile_handle)
		filtered = ba.filterBlast(blast_records)
		
		for key in filtered.keys():
			pdb_dates = get_pdb_date(filtered[key], parse_hit_id=True)

			#print pdb_dates
			min_date = get_min_date(pdb_dates)
			if min_date != None and self.date_cutoff > min_date[0]:
				print "contaminated: ", key,  min_date
			elif min_date != None:
				print "clean: ", key, min_date
			else:
				print "clean (mindate is None): ",key