Exemple #1
0
def get_dataset_from_treebase(study_id,
                                phylesystem_loc='api'):
    ATT_list = []
    nexson = get_nexson(study_id, phylesystem_loc)
    treebase_url =  nexson['nexml'][u'^ot:dataDeposit'][u'@href']
    if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']:
        sys.stderr("No treebase record associated with study ")
        sys.exit()
    else:
        tb_id = treebase_url.split(':S')[1]
        dna = DataSet.get(url="https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format(tb_id),
                                    schema="nexml")
        return(dna)
def get_dataset_from_treebase(study_id, phylesystem_loc="api"):
    """Function is used to get the aln from treebase, for a tree that OpenTree has the mapped tree.
    """
    try:
        nexson = get_nexson(study_id, phylesystem_loc)
    except HTTPError as err:
        sys.stderr.write(err)
        sys.stderr.write(
            "couldn't find study id {} in phylesystem location {}\n".format(
                study_id, phylesystem_loc))
    treebase_url = nexson['nexml'][u'^ot:dataDeposit'][u'@href']
    if 'treebase' not in nexson['nexml'][u'^ot:dataDeposit'][u'@href']:
        sys.stderr.write("No treebase record associated with study ")
        sys.exit(-2)
    else:
        tb_id = treebase_url.split(':S')[1]
        url = "https://treebase.org/treebase-web/search/downloadAStudy.html?id={}&format=nexml".format(
            tb_id)
        if _DEBUG:
            sys.stderr.write(url + "\n")
        dna = DataSet.get(url=url, schema="nexml")
        return dna
Exemple #3
0
def parse_nexus(nexus_loc):
    """ Parses nexus file into a dictionary for template engine
    Parameters
    ----------
    nexus_loc: str
        location of nexus file

    Returns
    -------
    dict
        dictionary containing tree in newick format, and a list of taxa containing their sequences, dates, and labels.
    """
    return_dict = {"taxa": {}}

    print("Parsing nexus file... ", end="")
    nexus = DataSet.get(path=nexus_loc, schema="nexus")

    return_dict["tree"] = nexus.tree_lists[0].as_string("newick")

    count = 0

    max_date = 2020.1
    for k, v in nexus.char_matrices[0].items():
        count += 1

        date = parse_date(k.label)
        if date > max_date:
            max_date = date

        taxa_dict = {"id": k.label, "date": date, "sequence": str(v)}
        return_dict["taxa"][k.label] = taxa_dict
    print("Done. {} taxa loaded".format(count))

    return_dict["site_count"] = nexus.char_matrices[0].max_sequence_size
    return_dict["taxon_count"] = len(return_dict["taxa"])
    return_dict["cutoff"] = max_date - 2019.75
    return_dict["grid_points"] = round(return_dict["cutoff"] * 52)

    return return_dict