def match_ancestors_across_models(con): """This method fills data in the table AncestorsAcrossModels""" cur = con.cursor() modelids = get_phylo_modelids(con) msaids = get_alignment_method_ids(con) ancid_childrenids = {} # key = Ancestor ID, value = list of Taxa IDs """Pull the map of taxon names to IDs from the database. We'll access this information a lot, so let's save it in a separate hashtable rather than repeatedly querying the databse.""" taxonname_id = {} sql = "select id, shortname from Taxa" cur.execute(sql) for ii in cur.fetchall(): id = ii[0] name = ii[1] taxonname_id[name] = id for modelid in modelids: for msaid in msaids: sql = "select newick from AncestralCladogram where unsupportedmltreeid in (select id from UnsupportedMlPhylogenies where almethod=" + msaid.__str__( ) + " and phylomodelid=" + modelid.__str__() + ")" cur.execute(sql) xx = cur.fetchone() if xx is None: write_error(con, "I cannot find the ancestral Newick cladogram for almethod=" + msaid.__str__() + " and phylomodelid=" + modelid.__str__()) cladonewick = xx[0].__str__() t = Tree() t.read_from_string(cladonewick, "newick") for node in t.nodes(): if node.is_leaf() == False and node.level() > 0: sql = "select id from Ancestors where name='Node" + node.label + \ "' and almethod=" + msaid.__str__() + " and phylomodel=" + modelid.__str__() cur.execute(sql) ancid = cur.fetchone()[0] ancid_childrenids[ancid] = [] for l in node.leaf_iter(): # print "978:", l taxonname = l.as_newick_string() # print "980:", taxonname taxonname = re.sub("'", "", taxonname) ancid_childrenids[ancid].append( taxonname_id[taxonname]) # key = Ancestor ID, value = list of other ancestor IDs with the same # children. ancid_matches = {} for anc1 in ancid_childrenids: ancid_matches[anc1] = [] mychildren = ancid_childrenids[anc1] mychildren.sort() for anc2 in ancid_childrenids: if anc1 == anc2: """Skip the self comparison.""" continue theirchildren = ancid_childrenids[anc2] theirchildren.sort() if mychildren == theirchildren: ancid_matches[anc1].append(anc2) sql = "delete from AncestorsAcrossModels" cur.execute(sql) con.commit() for anc1 in ancid_matches: for anc2 in ancid_matches[anc1]: sql = "insert into AncestorsAcrossModels (ancid, same_ancid) values(" + anc1.__str__( ) + "," + anc2.__str__() + ")" cur.execute(sql) con.commit()