Example #1
0
 def testCreated(self):
     b = get_empty_nexson()
     aa = validate_nexson(b)
     annot = aa[0]
     self.assertFalse(annot.has_error())
     b = get_empty_nexson(include_cc0=True)
     aa = validate_nexson(b)
     annot = aa[0]
     self.assertFalse(annot.has_error())
Example #2
0
 def testCreated(self):
     b = get_empty_nexson()
     aa = validate_nexson(b)
     annot = aa[0]
     self.assertFalse(annot.has_error())
     b = get_empty_nexson(include_cc0=True)
     aa = validate_nexson(b)
     annot = aa[0]
     self.assertFalse(annot.has_error())
Example #3
0
    def _new_nexson_with_crossref_metadata(doi, ref_string, include_cc0=False):
        if doi:
            # use the supplied DOI to fetch study metadata
            search_term = doi
        elif ref_string:
            # use the supplied reference text to fetch study metadata
            search_term = ref_string

        # look for matching studies via CrossRef.org API
        doi_lookup_response = fetch(
            'http://search.crossref.org/dois?%s' % 
            urlencode({'q': search_term})
        )
        doi_lookup_response = unicode(doi_lookup_response, 'utf-8')   # make sure it's Unicode!
        matching_records = anyjson.loads(doi_lookup_response)

        # if we got a match, grab the first (probably only) record
        if len(matching_records) > 0:
            match = matching_records[0];

            # Convert HTML reference string to plain text
            raw_publication_reference = match.get('fullCitation', '')
            ref_element_tree = web2pyHTMLParser(raw_publication_reference).tree
            # root of this tree is the complete mini-DOM
            ref_root = ref_element_tree.elements()[0]
            # reduce this root to plain text (strip any tags)

            meta_publication_reference = ref_root.flatten().decode('utf-8')
            meta_publication_url = match.get('doi', u'')  # already in URL form
            meta_year = match.get('year', u'')
            
        else:
            # Add a bogus reference string to signal the lack of results
            if doi:
                meta_publication_reference = u'No matching publication found for this DOI!'
            else:
                meta_publication_reference = u'No matching publication found for this reference string'
            meta_publication_url = u''
            meta_year = u''

        # add any found values to a fresh NexSON template
        nexson = get_empty_nexson(BY_ID_HONEY_BADGERFISH, include_cc0=include_cc0)
        nexml_el = nexson['nexml']
        nexml_el[u'^ot:studyPublicationReference'] = meta_publication_reference
        if meta_publication_url:
            nexml_el[u'^ot:studyPublication'] = {'@href': meta_publication_url}
        if meta_year:
            nexml_el[u'^ot:studyYear'] = meta_year
        return nexson
def _main():
    import argparse
    _HELP_MESSAGE = '''Takes a filepath to Newick tree file with propinquity-style
leaf labels - unique numeric suffixes which identify the taxon.
Writes a NexSON representation of the tree to
'''

    parser = argparse.ArgumentParser(description=_HELP_MESSAGE,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("-i", "--ids",
                        required=True,
                        help="comma separated list of tree IDs to be assigned to the trees in the newick file.")
    parser.add_argument('newick', help='filepath of the newick tree')
    args = parser.parse_args()
    if not os.path.exists(args.newick):
        sys.exit('The file "{}" does not exist'.format(args.newick))
    tree_id_list = args.ids.split(',')
    if not tree_id_list:
        sys.exit('At least one tree ID must be provided')
    tree_id_it = iter(tree_id_list)
    out = codecs.getwriter('utf-8')(sys.stdout)
    pyid2int = {}
    curr_nd_counter = 1
    with codecs.open(args.newick, 'r', encoding='utf8') as inp:
        tree = parse_newick(stream=inp)
        tree_id = tree_id_it.next()
        nexson = get_empty_nexson()
        body = nexson['nexml']
        all_otus_groups = body['otusById'].values()
        assert len(all_otus_groups) == 1
        first_otus_group = all_otus_groups[0]
        all_trees_groups = body['treesById'].values()
        assert len(all_trees_groups) == 1
        first_trees_group = all_trees_groups[0]
        first_trees_group['^ot:treeElementOrder'].append(tree_id)
        otus = first_otus_group['otuById']
        all_trees_dict = first_trees_group['treeById']
        ntree = all_trees_dict.setdefault(tree_id, {})
        ebsi, nbi = {}, {}
        ntree['edgeBySourceId'] = ebsi
        ntree['nodeById'] = nbi
        root_node_id = None
        for node in tree._root.preorder_iter():
            nid = id(node)
            i = pyid2int.get(nid)
            if i is None:
                i = curr_nd_counter
                curr_nd_counter += 1
                pyid2int[nid] = i
            node_id_s = 'node{}'.format(i)
            otu_id_s = 'otu{}'.format(i)
            n_obj = nbi.setdefault(node_id_s, {})
            if node is tree._root:
                n_obj['@root'] = True
                root_node_id = node_id_s
            else:
                edge_id_s = 'edge{}'.format(i)
                pid = id(node.parent)
                pni = 'node{}'.format(pyid2int[pid])
                ed = ebsi.setdefault(pni, {})
                ed[edge_id_s] = {'@source': pni, '@target': node_id_s}
            if not node.children:
                n_obj['@otu'] = otu_id_s
                orig = node._id
                ott_id = ott_id_from_label(orig)
                otus[otu_id_s] = {"^ot:originalLabel": orig, "^ot:ottId": ott_id, "^ot:ottTaxonName": orig}
        assert root_node_id is not None
        ntree['^ot:rootNodeId'] = root_node_id
        write_as_json(nexson, out)
Example #5
0
                new_study_nexson = import_nexson_from_treebase(treebase_id, nexson_syntax_version=BY_ID_HONEY_BADGERFISH)
            # elif importing_from_nexml_fetch:
            #     if not (nexml_fetch_url.startswith('http://') or nexml_fetch_url.startswith('https://')):
            #         raise HTTP(400, json.dumps({
            #             "error": 1,
            #             "description": 'Expecting: "nexml_fetch_url" to startwith http:// or https://',
            #         }))
            #     new_study_nexson = get_ot_study_info_from_treebase_nexml(src=nexml_fetch_url,
            #                                                     nexson_syntax_version=BY_ID_HONEY_BADGERFISH)
            # elif importing_from_nexml_string:
            #     new_study_nexson = get_ot_study_info_from_treebase_nexml(nexml_content=nexml_pasted_string,
            #                                                    nexson_syntax_version=BY_ID_HONEY_BADGERFISH)
            elif importing_from_crossref_API:
                new_study_nexson = _new_nexson_with_crossref_metadata(doi=publication_doi_for_crossref, ref_string=publication_ref, include_cc0=cc0_agreement)
            else:   # assumes 'import-method-MANUAL_ENTRY', or insufficient args above
                new_study_nexson = get_empty_nexson(BY_ID_HONEY_BADGERFISH, include_cc0=cc0_agreement)
                if publication_doi:
                    # submitter entered an invalid DOI (or other URL); add it now
                    new_study_nexson['nexml'][u'^ot:studyPublication'] = {'@href': publication_doi}

            nexml = new_study_nexson['nexml']

            # If submitter requested the CC0 waiver or other waiver/license, make sure it's here
            if importing_from_treebase_id or cc0_agreement:
                nexml['^xhtml:license'] = {'@href': 'http://creativecommons.org/publicdomain/zero/1.0/'}
            elif using_existing_license:
                existing_license = kwargs.get('alternate_license', '')
                if existing_license == 'CC-0':
                    nexml['^xhtml:license'] = {'@name': 'CC0', '@href': 'http://creativecommons.org/publicdomain/zero/1.0/'}
                    pass
                elif existing_license == 'CC-BY-2.0':