bail("Cannot find a title in that article") print "type\tREP" metaheaders.print_item("title","citation_title") authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a journal = metaheaders.get_item("citation_technical_report_institution") if journal: print "journal\t%s" % journal issn = metaheaders.get_item("citation_issn") if issn: print "issn\t%s" % issn abstract = metaheaders.get_item("description") if abstract: print "abstract\t%s" % abstract start_page = metaheaders.get_item("citation_firstpage") if start_page: print "start_page\t%s" % start_page metaheaders.print_date("citation_date") print "end_tsv" print "status\tok"
sys.exit(0) doi = doi.replace("doi:", "") print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f, v) authors = metaheaders.get_multi_item("dc.creator") if not authors: authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a if metaheaders.get_item("citation_date"): metaheaders.print_date("citation_date") elif metaheaders.get_item("dc.date"): metaheaders.print_date("dc.date") print "end_tsv" print "status\tok"
print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f, v) authors = metaheaders.get_multi_item("DC.Contributor") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("DC.Date") # Hmmm. there are sometimes 2 issns, one empty issn = metaheaders.get_multi_item("citation_issn") if issn: for i in issn: if i != "": print "issn\t%s" % i break root = metaheaders.root abs = [] for p in root.cssselect("#abstract p"): abs.append(p.xpath("string()")) if len(abs) > 0: abstract = ' '.join(abs)
print "begin_tsv" print "linkout\tDOI\t\t%s\t\t" % (doi) print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f, v) authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("citation_publication_date") # Hmmm. there are sometimes 2 issns, one empty issn = metaheaders.get_multi_item("citation_issn") if issn: for i in issn: if i != "": print "issn\t%s" % i break print "end_tsv" print "status\tok"
print "type\tJOUR" print "doi\t" + doi for f in key_map.keys(): k = key_map[f] v = metaheaders.get_item(k) if not v: continue v = v.strip() print "%s\t%s" % (f,v) authors = metaheaders.get_multi_item("DC.Contributor") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("DC.Date") # Hmmm. there are sometimes 2 issns, one empty issn = metaheaders.get_multi_item("citation_issn") if issn: for i in issn: if i != "": print "issn\t%s" % i break root = metaheaders.root abs = [] for p in root.cssselect("#abstract p"): abs.append(p.xpath("string()")) if len(abs) > 0: abstract = ' '.join(abs)
doi_match = re.search(r'doi:(10\.[^/]+/[^\s]+)', doi_str, re.IGNORECASE) if doi_match: doi = doi_match.group(1) if not doi: bail("Couldn't find a DOI") if not metaheaders.get_item("DC.title"): bail("Unable to find the article title") print "begin_tsv" print "publisher\tDryad Digital Repository" print "type\tGEN" metaheaders.print_item("title","DC.title") authors = metaheaders.get_multi_item("DC.creator") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("DCTERMS.issued") abstract = metaheaders.get_item("DC.description"); if abstract: print "abstract\t%s" % abstract print "linkout\tDRYAD\t\t%s\t\t" % (doi) print "linkout\tDOI\t\t%s\t\t" % (doi) print "end_tsv" print "status\tok"
if abstract: print "abstract\t%s" % abstract.encode('utf-8') volume = metaheaders.get_item("prism.volume") if volume: print "volume\t%s" % volume issue = metaheaders.get_item("prism.number") if issue: print "issue\t%s" % issue start_page = metaheaders.get_item("prism.startingPage") if start_page: print "start_page\t%s" % start_page end_page = metaheaders.get_item("prism.endingPage") if end_page: print "end_page\t%s" % end_page publisher = metaheaders.get_item("DC.publisher") if publisher: print "publisher\t%s" % publisher if metaheaders.get_item("citation_date"): metaheaders.print_date("citation_date") else: metaheaders.print_date("prism.coverDate") print "end_tsv" print "status\tok"