def runSSCheck(self): cursor = self.since_solved_conn.cursor(MySQLdb.cursors.DictCursor) query = """ SELECT distinct mc.cluster_id AS cluster_id, mc.sequence_key, s.sequence, ss.ascession_date FROM since_solved.mcm_cluster mc, since_solved.since_solved ss, hpf.sequence as s WHERE mc.cluster_id = ss.cluster_id AND mc.sequence_key = s.id AND ss.ascession_date >= '2005-01-01' """ print query cursor.execute(query) rows = cursor.fetchall() for row in rows: if len(row["sequence"]) > 0: pdb_dates = get_pdb_date_Blast( row["sequence"], self.e_value_threshold, self.length_threshold, self.identity_cutoff ) min_date = get_min_date(pdb_dates) if min_date != None and self.date_cutoff > min_date[0]: print row["sequence_key"], min_date, row["sequence"]
def runDateCheck(self): cursor = self.sql_conn.cursor(MySQLdb.cursors.DictCursor) #query = """ # # #AND ss.ascession_date >= '2005-01-01' # # SELECT distinct md.sequence_key, s.sequence # FROM protein as p, sequence as s, domain as d, mcmData as md # WHERE p.sequence_key = d.parent_sequence_key and d.domain_sequence_key = md.sequence_key and d.domain_sequence_key = s.id # AND p.experiment_key = %s # """ # # print query # cursor.execute(query, (self.experiment_key,)) # rows = cursor.fetchall () # # for row in rows: # if len(row["sequence"]) > 0: #pdb_dates = get_pdb_date_Blast(row["sequence"], self.e_value_threshold, self.length_threshold, self.identity_cutoff) ba = PDBBlastFilter(eval_cutoff = self.e_value_threshold, length_cutoff = self.length_threshold, identity_cutoff = self.identity_cutoff, multi_hits=True) outfile_handle = open(self.my_blast_outfile) blast_records = NCBIXML.parse(outfile_handle) filtered = ba.filterBlast(blast_records) for key in filtered.keys(): pdb_dates = get_pdb_date(filtered[key], parse_hit_id=True) #print pdb_dates min_date = get_min_date(pdb_dates) if min_date != None and self.date_cutoff > min_date[0]: print "contaminated: ", key, min_date elif min_date != None: print "clean: ", key, min_date else: print "clean (mindate is None): ",key