def _new_nexson_with_crossref_metadata(doi, ref_string, include_cc0=False): if doi: # use the supplied DOI to fetch study metadata search_term = doi elif ref_string: # use the supplied reference text to fetch study metadata search_term = ref_string # look for matching studies via CrossRef.org API doi_lookup_response = fetch( 'http://search.crossref.org/dois?%s' % urlencode({'q': search_term}) ) doi_lookup_response = unicode(doi_lookup_response, 'utf-8') # make sure it's Unicode! matching_records = anyjson.loads(doi_lookup_response) # if we got a match, grab the first (probably only) record if len(matching_records) > 0: match = matching_records[0]; # Convert HTML reference string to plain text raw_publication_reference = match.get('fullCitation', '') ref_element_tree = web2pyHTMLParser(raw_publication_reference).tree # root of this tree is the complete mini-DOM ref_root = ref_element_tree.elements()[0] # reduce this root to plain text (strip any tags) meta_publication_reference = ref_root.flatten().decode('utf-8') meta_publication_url = match.get('doi', u'') # already in URL form meta_year = match.get('year', u'') else: # Add a bogus reference string to signal the lack of results if doi: meta_publication_reference = u'No matching publication found for this DOI!' else: meta_publication_reference = u'No matching publication found for this reference string' meta_publication_url = u'' meta_year = u'' # add any found values to a fresh NexSON template nexson = get_empty_nexson(BY_ID_HONEY_BADGERFISH, include_cc0=include_cc0) nexml_el = nexson['nexml'] nexml_el[u'^ot:studyPublicationReference'] = meta_publication_reference if meta_publication_url: nexml_el[u'^ot:studyPublication'] = {'@href': meta_publication_url} if meta_year: nexml_el[u'^ot:studyYear'] = meta_year return nexson
def test_web2pyHTMLParser(self): #tag should not be a byte self.assertEqual(web2pyHTMLParser("<div></div>").tree.components[0].tag, 'div') a = str(web2pyHTMLParser('<div>a<span>b</div>c').tree) self.assertEqual(a, "<div>a<span>b</span></div>c") tree = web2pyHTMLParser('hello<div a="b">world</div>').tree tree.element(_a='b')['_c']=5 self.assertEqual(str(tree), 'hello<div a="b" c="5">world</div>') a = str(web2pyHTMLParser('<div><img class="img"/></div>', closed=['img']).tree) self.assertEqual(a, '<div><img class="img" /></div>') #greater-than sign ( > ) --> decimal > --> hexadecimal > #Less-than sign ( < ) --> decimal < --> hexadecimal < # test decimal a = str(web2pyHTMLParser('<div>< ></div>').tree) self.assertEqual(a, '<div>< ></div>') # test hexadecimal a = str(web2pyHTMLParser('<div>< ></div>').tree) self.assertEqual(a, '<div>< ></div>')