def testCreated(self): b = get_empty_nexson() aa = validate_nexson(b) annot = aa[0] self.assertFalse(annot.has_error()) b = get_empty_nexson(include_cc0=True) aa = validate_nexson(b) annot = aa[0] self.assertFalse(annot.has_error())
def _new_nexson_with_crossref_metadata(doi, ref_string, include_cc0=False): if doi: # use the supplied DOI to fetch study metadata search_term = doi elif ref_string: # use the supplied reference text to fetch study metadata search_term = ref_string # look for matching studies via CrossRef.org API doi_lookup_response = fetch( 'http://search.crossref.org/dois?%s' % urlencode({'q': search_term}) ) doi_lookup_response = unicode(doi_lookup_response, 'utf-8') # make sure it's Unicode! matching_records = anyjson.loads(doi_lookup_response) # if we got a match, grab the first (probably only) record if len(matching_records) > 0: match = matching_records[0]; # Convert HTML reference string to plain text raw_publication_reference = match.get('fullCitation', '') ref_element_tree = web2pyHTMLParser(raw_publication_reference).tree # root of this tree is the complete mini-DOM ref_root = ref_element_tree.elements()[0] # reduce this root to plain text (strip any tags) meta_publication_reference = ref_root.flatten().decode('utf-8') meta_publication_url = match.get('doi', u'') # already in URL form meta_year = match.get('year', u'') else: # Add a bogus reference string to signal the lack of results if doi: meta_publication_reference = u'No matching publication found for this DOI!' else: meta_publication_reference = u'No matching publication found for this reference string' meta_publication_url = u'' meta_year = u'' # add any found values to a fresh NexSON template nexson = get_empty_nexson(BY_ID_HONEY_BADGERFISH, include_cc0=include_cc0) nexml_el = nexson['nexml'] nexml_el[u'^ot:studyPublicationReference'] = meta_publication_reference if meta_publication_url: nexml_el[u'^ot:studyPublication'] = {'@href': meta_publication_url} if meta_year: nexml_el[u'^ot:studyYear'] = meta_year return nexson
def _main(): import argparse _HELP_MESSAGE = '''Takes a filepath to Newick tree file with propinquity-style leaf labels - unique numeric suffixes which identify the taxon. Writes a NexSON representation of the tree to ''' parser = argparse.ArgumentParser(description=_HELP_MESSAGE, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("-i", "--ids", required=True, help="comma separated list of tree IDs to be assigned to the trees in the newick file.") parser.add_argument('newick', help='filepath of the newick tree') args = parser.parse_args() if not os.path.exists(args.newick): sys.exit('The file "{}" does not exist'.format(args.newick)) tree_id_list = args.ids.split(',') if not tree_id_list: sys.exit('At least one tree ID must be provided') tree_id_it = iter(tree_id_list) out = codecs.getwriter('utf-8')(sys.stdout) pyid2int = {} curr_nd_counter = 1 with codecs.open(args.newick, 'r', encoding='utf8') as inp: tree = parse_newick(stream=inp) tree_id = tree_id_it.next() nexson = get_empty_nexson() body = nexson['nexml'] all_otus_groups = body['otusById'].values() assert len(all_otus_groups) == 1 first_otus_group = all_otus_groups[0] all_trees_groups = body['treesById'].values() assert len(all_trees_groups) == 1 first_trees_group = all_trees_groups[0] first_trees_group['^ot:treeElementOrder'].append(tree_id) otus = first_otus_group['otuById'] all_trees_dict = first_trees_group['treeById'] ntree = all_trees_dict.setdefault(tree_id, {}) ebsi, nbi = {}, {} ntree['edgeBySourceId'] = ebsi ntree['nodeById'] = nbi root_node_id = None for node in tree._root.preorder_iter(): nid = id(node) i = pyid2int.get(nid) if i is None: i = curr_nd_counter curr_nd_counter += 1 pyid2int[nid] = i node_id_s = 'node{}'.format(i) otu_id_s = 'otu{}'.format(i) n_obj = nbi.setdefault(node_id_s, {}) if node is tree._root: n_obj['@root'] = True root_node_id = node_id_s else: edge_id_s = 'edge{}'.format(i) pid = id(node.parent) pni = 'node{}'.format(pyid2int[pid]) ed = ebsi.setdefault(pni, {}) ed[edge_id_s] = {'@source': pni, '@target': node_id_s} if not node.children: n_obj['@otu'] = otu_id_s orig = node._id ott_id = ott_id_from_label(orig) otus[otu_id_s] = {"^ot:originalLabel": orig, "^ot:ottId": ott_id, "^ot:ottTaxonName": orig} assert root_node_id is not None ntree['^ot:rootNodeId'] = root_node_id write_as_json(nexson, out)
new_study_nexson = import_nexson_from_treebase(treebase_id, nexson_syntax_version=BY_ID_HONEY_BADGERFISH) # elif importing_from_nexml_fetch: # if not (nexml_fetch_url.startswith('http://') or nexml_fetch_url.startswith('https://')): # raise HTTP(400, json.dumps({ # "error": 1, # "description": 'Expecting: "nexml_fetch_url" to startwith http:// or https://', # })) # new_study_nexson = get_ot_study_info_from_treebase_nexml(src=nexml_fetch_url, # nexson_syntax_version=BY_ID_HONEY_BADGERFISH) # elif importing_from_nexml_string: # new_study_nexson = get_ot_study_info_from_treebase_nexml(nexml_content=nexml_pasted_string, # nexson_syntax_version=BY_ID_HONEY_BADGERFISH) elif importing_from_crossref_API: new_study_nexson = _new_nexson_with_crossref_metadata(doi=publication_doi_for_crossref, ref_string=publication_ref, include_cc0=cc0_agreement) else: # assumes 'import-method-MANUAL_ENTRY', or insufficient args above new_study_nexson = get_empty_nexson(BY_ID_HONEY_BADGERFISH, include_cc0=cc0_agreement) if publication_doi: # submitter entered an invalid DOI (or other URL); add it now new_study_nexson['nexml'][u'^ot:studyPublication'] = {'@href': publication_doi} nexml = new_study_nexson['nexml'] # If submitter requested the CC0 waiver or other waiver/license, make sure it's here if importing_from_treebase_id or cc0_agreement: nexml['^xhtml:license'] = {'@href': 'http://creativecommons.org/publicdomain/zero/1.0/'} elif using_existing_license: existing_license = kwargs.get('alternate_license', '') if existing_license == 'CC-0': nexml['^xhtml:license'] = {'@name': 'CC0', '@href': 'http://creativecommons.org/publicdomain/zero/1.0/'} pass elif existing_license == 'CC-BY-2.0':