def resolve(self, citations, document = None): citation = {} if not utopia.citation.has_link(citations, {'mime': 'application/pdf'}, {'whence': 'pmc'}): # Try to resolve the PMC ID from either the DOI or the PubMed ID pmcid = utopia.citation.pick_from(citations, 'identifiers/pmc', default=None) if pmcid is None: doi = utopia.citation.pick_from(citations, 'identifiers/doi', default=None, record_in=citation) pmid = utopia.citation.pick_from(citations, 'identifiers/pubmed', default=None, record_in=citation) if doi is not None and pmcid is None: pmcid = utopia.tools.pmc.identify(doi, 'doi') if pmid is not None and pmcid is None: pmcid = utopia.tools.pmc.identify(pmid, 'pmid') # Generate PMC link to PDF if pmcid is not None: pdf_url = 'http://www.ncbi.nlm.nih.gov/pmc/articles/{0}/pdf/'.format(pmcid) citation.update({ 'links': [{ 'url': pdf_url, 'mime': 'application/pdf', 'type': 'article', 'title': 'Download article from PubMed Central', }], 'identifiers': {'pmc': pmcid} }) return citation
def resolve(self, citations, document = None): # If an ArXiv ID is present, look it up citation = {} arxiv_id = utopia.citation.pick_from(citations, 'identifiers[arxiv]', None, record_in=citation) if arxiv_id is not None: citation.update(utopia.tools.arxiv.resolve(arxiv_id)) return citation
def resolve(self, citations, document=None): # If a DOI is present, look it up citation = {} doi = utopia.citation.pick_from(citations, 'identifiers[doi]', default=None, record_in=citation) if doi is not None: citation.update(utopia.tools.crossref.resolve(doi)) return citation
def resolve(self, citations, document=None): # If a PubMed ID is present, look it up citation = {} pmid = utopia.citation.pick_from(citations, 'identifiers[pubmed]', default=None, record_in=citation) title = utopia.citation.pick_from(citations, 'title:pubmed', default=None) if title is None and pmid is not None: citation.update(utopia.tools.pubmed.resolve(pmid)) return citation
def resolve(self, citations, document=None): citation = {} pubmed_id = utopia.citation.pick_from(citations, 'identifiers[pubmed]', None, record_in=citation) if pubmed_id is None: doi = utopia.citation.pick_from(citations, 'identifiers[doi]', None, record_in=citation) if doi is not None: pubmed_id = utopia.tools.pubmed.identify(doi, 'doi') if pubmed_id is not None: citation['identifiers'] = {'pubmed': pubmed_id} if pubmed_id is None: title = utopia.citation.pick_from(citations, 'title', None, record_in=citation) if title is not None: title = title.strip(' .') pubmed_results = utopia.tools.pubmed.search(title) pubmed_title = pubmed_results.get('title', '').strip(' .') if len(pubmed_title) > 0: matched = False pubmed_pmid = pubmed_results.get('identifiers', {}).get('pubmed') if re.sub(r'[^\w]+', ' ', title).strip().lower() == re.sub( r'[^\w]+', ' ', pubmed_title).strip( ).lower(): # Fuzzy match matched = True elif document is not None: # Accept the pubmed title over the scraped title, if present in the document matches = document.findInContext( '', pubmed_title, '') # Fuzzy match if len(matches) > 0: matched = True pubmed_title = matches[0].text() if matched: citation.update(pubmed_results) citation['title'] = pubmed_title return citation
def resolve(self, citations, document=None): # Multiple responses leads to a no-op for citation in citations: if utopia.citation.pick(citation, 'provenance/whence', default=None) == 'cermine': # Bail if cermine results are already present return None # Get all the citations that don't look structured structure_keys = set(['title', 'authors', 'year']) citation = {} unstructured = utopia.citation.pick_from(citations, 'unstructured', default=None, record_in=citation) if unstructured is not None and len( structure_keys & set(unstructured.citation.keys())) == 0: structured = utopia.tools.cited.parse(unstructured) if len(structured) > 0: citation.update(structured[0]) return citation
def resolve(self, citations, document=None): citation = {} doi = utopia.citation.pick_from(citations, 'identifiers[doi]', default=None, record_in=citation) title = utopia.citation.pick_from(citations, 'title', default=None, record_in=citation) if doi is not None or title is not None: if doi is None: xref_results = utopia.tools.crossref.search(title) if len(xref_results) == 1: best = xref_results[0] xref_title = best.get('title', '').strip(' .') if len(xref_title) > 0: matched = False if document is not None and spineapi is not None: # Accept the crossref title if present in the document (do magic dash pattern thing) xref_title = re.sub( ur'[^-\u002D\u007E\u00AD\u058A\u05BE\u1400\u1806\u2010-\u2015\u2053\u207B\u208B\u2212\u2E17\u2E3A\u2E3B\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+', lambda x: re.escape(x.group(0)), xref_title) xref_title = re.sub( ur'[\u002D\u007E\u00AD\u058A\u05BE\u1400\u1806\u2010-\u2015\u2053\u207B\u208B\u2212\u2E17\u2E3A\u2E3B\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D-]+', lambda x: r'\p{{Pd}}{{{0}}}'.format( len(x.group(0))), xref_title) matches = document.search( xref_title, spineapi.RegExp + spineapi.IgnoreCase) matched = (len(matches) > 0) else: matched = (xref_title.lower() == title) if matched: citation.update(best) doi = citation.get('identifiers', {}).get('doi') if doi is not None and doi.startswith( 'http://dx.doi.org/'): doi = doi[18:] citation['identifiers']['doi'] = doi if doi is not None: if None not in (document, title): # What is this DOI's article's title according to crossref? try: xref_results = utopia.tools.crossref.resolve(doi) xref_title = xref_results.get('title', '') if len(xref_title) > 0: print 'crossref: resolved title:', xref_title.encode( 'utf8') if re.sub(r'[^\w]+', ' ', title).strip() == re.sub( r'[^\w]+', ' ', xref_title).strip(): # Fuzzy match print 'crossref: titles match precisely' citation.update(xref_results) else: # Accept the crossref title over the scraped title, if present in the document matches = document.findInContext( '', xref_title, '') # Fuzzy match if len(matches) > 0: citation.update(xref_results) print 'crossref: overriding scraped title with crossref title' else: print 'crossref: ignoring resolved citations' # FIXME should we discard the DOI at this point? except Exception as e: import traceback traceback.print_exc() return citation