def resolve(self, citations, document = None):
        citation = {}
        if not utopia.citation.has_link(citations, {'mime': 'application/pdf'}, {'whence': 'pmc'}):
            # Try to resolve the PMC ID from either the DOI or the PubMed ID
            pmcid = utopia.citation.pick_from(citations, 'identifiers/pmc', default=None)
            if pmcid is None:
                doi = utopia.citation.pick_from(citations, 'identifiers/doi', default=None, record_in=citation)
                pmid = utopia.citation.pick_from(citations, 'identifiers/pubmed', default=None, record_in=citation)
                if doi is not None and pmcid is None:
                    pmcid = utopia.tools.pmc.identify(doi, 'doi')
                if pmid is not None and pmcid is None:
                    pmcid = utopia.tools.pmc.identify(pmid, 'pmid')

            # Generate PMC link to PDF
            if pmcid is not None:
                pdf_url = 'http://www.ncbi.nlm.nih.gov/pmc/articles/{0}/pdf/'.format(pmcid)
                citation.update({
                    'links': [{
                        'url': pdf_url,
                        'mime': 'application/pdf',
                        'type': 'article',
                        'title': 'Download article from PubMed Central',
                    }],
                    'identifiers': {'pmc': pmcid}
                })
                return citation
 def resolve(self, citations, document = None):
     # If an ArXiv ID is present, look it up
     citation = {}
     arxiv_id = utopia.citation.pick_from(citations, 'identifiers[arxiv]', None, record_in=citation)
     if arxiv_id is not None:
         citation.update(utopia.tools.arxiv.resolve(arxiv_id))
     return citation
 def resolve(self, citations, document=None):
     # If a DOI is present, look it up
     citation = {}
     doi = utopia.citation.pick_from(citations,
                                     'identifiers[doi]',
                                     default=None,
                                     record_in=citation)
     if doi is not None:
         citation.update(utopia.tools.crossref.resolve(doi))
     return citation
Beispiel #4
0
 def resolve(self, citations, document=None):
     # If a PubMed ID is present, look it up
     citation = {}
     pmid = utopia.citation.pick_from(citations,
                                      'identifiers[pubmed]',
                                      default=None,
                                      record_in=citation)
     title = utopia.citation.pick_from(citations,
                                       'title:pubmed',
                                       default=None)
     if title is None and pmid is not None:
         citation.update(utopia.tools.pubmed.resolve(pmid))
     return citation
Beispiel #5
0
    def resolve(self, citations, document=None):
        citation = {}
        pubmed_id = utopia.citation.pick_from(citations,
                                              'identifiers[pubmed]',
                                              None,
                                              record_in=citation)
        if pubmed_id is None:
            doi = utopia.citation.pick_from(citations,
                                            'identifiers[doi]',
                                            None,
                                            record_in=citation)
            if doi is not None:
                pubmed_id = utopia.tools.pubmed.identify(doi, 'doi')
                if pubmed_id is not None:
                    citation['identifiers'] = {'pubmed': pubmed_id}
            if pubmed_id is None:
                title = utopia.citation.pick_from(citations,
                                                  'title',
                                                  None,
                                                  record_in=citation)
                if title is not None:
                    title = title.strip(' .')
                    pubmed_results = utopia.tools.pubmed.search(title)
                    pubmed_title = pubmed_results.get('title', '').strip(' .')
                    if len(pubmed_title) > 0:
                        matched = False
                        pubmed_pmid = pubmed_results.get('identifiers',
                                                         {}).get('pubmed')
                        if re.sub(r'[^\w]+', ' ',
                                  title).strip().lower() == re.sub(
                                      r'[^\w]+', ' ', pubmed_title).strip(
                                      ).lower():  # Fuzzy match
                            matched = True
                        elif document is not None:
                            # Accept the pubmed title over the scraped title, if present in the document
                            matches = document.findInContext(
                                '', pubmed_title, '')  # Fuzzy match
                            if len(matches) > 0:
                                matched = True
                                pubmed_title = matches[0].text()
                        if matched:
                            citation.update(pubmed_results)
                            citation['title'] = pubmed_title

        return citation
    def resolve(self, citations, document=None):
        # Multiple responses leads to a no-op
        for citation in citations:
            if utopia.citation.pick(citation,
                                    'provenance/whence',
                                    default=None) == 'cermine':
                # Bail if cermine results are already present
                return None

        # Get all the citations that don't look structured
        structure_keys = set(['title', 'authors', 'year'])
        citation = {}
        unstructured = utopia.citation.pick_from(citations,
                                                 'unstructured',
                                                 default=None,
                                                 record_in=citation)
        if unstructured is not None and len(
                structure_keys & set(unstructured.citation.keys())) == 0:
            structured = utopia.tools.cited.parse(unstructured)
            if len(structured) > 0:
                citation.update(structured[0])
                return citation
    def resolve(self, citations, document=None):
        citation = {}
        doi = utopia.citation.pick_from(citations,
                                        'identifiers[doi]',
                                        default=None,
                                        record_in=citation)
        title = utopia.citation.pick_from(citations,
                                          'title',
                                          default=None,
                                          record_in=citation)
        if doi is not None or title is not None:
            if doi is None:
                xref_results = utopia.tools.crossref.search(title)
                if len(xref_results) == 1:
                    best = xref_results[0]
                    xref_title = best.get('title', '').strip(' .')
                    if len(xref_title) > 0:
                        matched = False
                        if document is not None and spineapi is not None:
                            # Accept the crossref title if present in the document (do magic dash pattern thing)
                            xref_title = re.sub(
                                ur'[^-\u002D\u007E\u00AD\u058A\u05BE\u1400\u1806\u2010-\u2015\u2053\u207B\u208B\u2212\u2E17\u2E3A\u2E3B\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+',
                                lambda x: re.escape(x.group(0)), xref_title)
                            xref_title = re.sub(
                                ur'[\u002D\u007E\u00AD\u058A\u05BE\u1400\u1806\u2010-\u2015\u2053\u207B\u208B\u2212\u2E17\u2E3A\u2E3B\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D-]+',
                                lambda x: r'\p{{Pd}}{{{0}}}'.format(
                                    len(x.group(0))), xref_title)
                            matches = document.search(
                                xref_title,
                                spineapi.RegExp + spineapi.IgnoreCase)
                            matched = (len(matches) > 0)
                        else:
                            matched = (xref_title.lower() == title)
                        if matched:
                            citation.update(best)
                            doi = citation.get('identifiers', {}).get('doi')
                            if doi is not None and doi.startswith(
                                    'http://dx.doi.org/'):
                                doi = doi[18:]
                                citation['identifiers']['doi'] = doi
            if doi is not None:
                if None not in (document, title):
                    # What is this DOI's article's title according to crossref?
                    try:
                        xref_results = utopia.tools.crossref.resolve(doi)
                        xref_title = xref_results.get('title', '')
                        if len(xref_title) > 0:
                            print 'crossref: resolved title:', xref_title.encode(
                                'utf8')

                            if re.sub(r'[^\w]+', ' ', title).strip() == re.sub(
                                    r'[^\w]+', ' ',
                                    xref_title).strip():  # Fuzzy match
                                print 'crossref: titles match precisely'
                                citation.update(xref_results)
                            else:
                                # Accept the crossref title over the scraped title, if present in the document
                                matches = document.findInContext(
                                    '', xref_title, '')  # Fuzzy match
                                if len(matches) > 0:
                                    citation.update(xref_results)
                                    print 'crossref: overriding scraped title with crossref title'
                                else:
                                    print 'crossref: ignoring resolved citations'
                                    # FIXME should we discard the DOI at this point?
                    except Exception as e:
                        import traceback
                        traceback.print_exc()

        return citation