def get_dataset_from_treebase(study_id, phylesystem_loc='api'): ATT_list = [] nexson = get_nexson(study_id, phylesystem_loc) treebase_url = nexson['nexml'][u'^ot:dataDeposit'][u'@href'] if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']: sys.stderr("No treebase record associated with study ") sys.exit() else: tb_id = treebase_url.split(':S')[1] dna = DataSet.get(url="https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format(tb_id), schema="nexml") return(dna)
def get_dataset_from_treebase(study_id, phylesystem_loc="api"): """Function is used to get the aln from treebase, for a tree that OpenTree has the mapped tree. """ try: nexson = get_nexson(study_id, phylesystem_loc) except HTTPError as err: sys.stderr.write(err) sys.stderr.write( "couldn't find study id {} in phylesystem location {}\n".format( study_id, phylesystem_loc)) treebase_url = nexson['nexml'][u'^ot:dataDeposit'][u'@href'] if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']: sys.stderr.write("No treebase record associated with study ") sys.exit(-2) else: tb_id = treebase_url.split(':S')[1] url = "https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format( tb_id) if _DEBUG: sys.stderr.write(url + "\n") dna = DataSet.get(url=url, schema="nexml") return dna
def parse_nexus(nexus_loc): """ Parses nexus file into a dictionary for template engine Parameters ---------- nexus_loc: str location of nexus file Returns ------- dict dictionary containing tree in newick format, and a list of taxa containing their sequences, dates, and labels. """ return_dict = {"taxa": {}} print("Parsing nexus file... ", end="") nexus = DataSet.get(path=nexus_loc, schema="nexus") return_dict["tree"] = nexus.tree_lists[0].as_string("newick") count = 0 max_date = 2020.1 for k, v in nexus.char_matrices[0].items(): count += 1 date = parse_date(k.label) if date > max_date: max_date = date taxa_dict = {"id": k.label, "date": date, "sequence": str(v)} return_dict["taxa"][k.label] = taxa_dict print("Done. {} taxa loaded".format(count)) return_dict["site_count"] = nexus.char_matrices[0].max_sequence_size return_dict["taxon_count"] = len(return_dict["taxa"]) return_dict["cutoff"] = max_date - 2019.75 return_dict["grid_points"] = round(return_dict["cutoff"] * 52) return return_dict