sys.exit(0) doi = doi.replace("doi:", "") print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f, v) authors = metaheaders.get_multi_item("dc.creator") if not authors: authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a if metaheaders.get_item("citation_date"): metaheaders.print_date("citation_date") elif metaheaders.get_item("dc.date"): metaheaders.print_date("dc.date") print "end_tsv" print "status\tok"
doi = doi.replace("doi:","") print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f,v) authors = metaheaders.get_multi_item("dc.creator") if not authors: authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a if metaheaders.get_item("citation_date"): metaheaders.print_date("citation_date") elif metaheaders.get_item("dc.date"): metaheaders.print_date("dc.date") print "end_tsv" print "status\tok"
bail("Couldn't find an DOI") docType = metapropsheaders.get_item("og:type"); if not docType: bail("Cannot determine the publication type") if docType != "article": bail("Only supports journal papers ('article', 'JOUR') at this moment, but found " + docType) if not metaheaders.get_item("citation_title"): bail("Cannot find a title in that article") print "type\tREP" metaheaders.print_item("title","citation_title") authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a journal = metaheaders.get_item("citation_technical_report_institution") if journal: print "journal\t%s" % journal issn = metaheaders.get_item("citation_issn") if issn: print "issn\t%s" % issn abstract = metaheaders.get_item("description") if abstract: print "abstract\t%s" % abstract
bail('Unable to find a DOI') sys.exit(0) print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f, v) authors = metaheaders.get_multi_item("DC.Contributor") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("DC.Date") # Hmmm. there are sometimes 2 issns, one empty issn = metaheaders.get_multi_item("citation_issn") if issn: for i in issn: if i != "": print "issn\t%s" % i break root = metaheaders.root
bail('Unable to find a DOI') sys.exit(0) print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f, v) authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("citation_publication_date") # Hmmm. there are sometimes 2 issns, one empty issn = metaheaders.get_multi_item("citation_issn") if issn: for i in issn: if i != "": print "issn\t%s" % i break print "end_tsv"
bail('Unable to find a DOI') sys.exit(0) print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f,v) authors = metaheaders.get_multi_item("DC.Contributor") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("DC.Date") # Hmmm. there are sometimes 2 issns, one empty issn = metaheaders.get_multi_item("citation_issn") if issn: for i in issn: if i != "": print "issn\t%s" % i break root = metaheaders.root
# url = sys.stdin.readline().strip() # # Fetch the page # try: page = urllib2.urlopen(url).read().strip() except: bail("Couldn't fetch page (" + url + ")") # # DOI is in the page # metaheaders = metaheaders.MetaHeaders(page=page) dois = metaheaders.get_multi_item("DC.identifier") doi = None if dois: for doi_str in dois: doi_match = re.search(r'doi:(10\.[^/]+/[^\s]+)', doi_str, re.IGNORECASE) if doi_match: doi = doi_match.group(1) if not doi: bail("Couldn't find a DOI") if not metaheaders.get_item("DC.title"): bail("Unable to find the article title") print "begin_tsv" print "publisher\tDryad Digital Repository" print "type\tGEN"
if doi: print "linkout\tIUCR\t\t%s\t\t" % key print "linkout\tDOI\t\t%s\t\t" % doi print "url\thttp://dx.doi.org/" + doi print "doi\t" + doi else: bail("Couldn't find a DOI") if not metaheaders.get_item("DC.title"): bail("Cannot find a title in that article") title = metaheaders.get_item("DC.title") if title: print "title\t%s" % title.encode('utf-8') authors = metaheaders.get_multi_item("DC.creator") firstAuthorSurname = None if authors: for a in authors: if not firstAuthorSurname: firstAuthorSurname = "" match = re.search(r'^([^\s|^,]+)', a.encode('utf-8'), re.IGNORECASE) if match: firstAuthorSurname = match.group(1) print "author\t%s" % a.encode('utf-8') shortCode = None if metaheaders.get_item("DC.link"): link = metaheaders.get_item("DC.link") match = re.search(r'^http://scripts.iucr.org/cgi-bin/paper\?(.+)', link, re.IGNORECASE) if match: