예제 #1
0
def main():
    doi = _extract_doi(args.identifier[0])

    if doi is None:
        print(item)
    elif args.bibtex:
        result = cn.content_negotiation(doi, format="bibtex")
        bibtex = parse_string(result, "bibtex")
        try:
            name = "".join(
                bibtex.entries.values()[0].persons.values()[0][0].last_names)
            name = name.replace("ä", "ae").replace("ö",
                                                   "oe").replace("ü", "ue")
            name = unidecode(name)
            shortdoi = _short_doi(doi)[3:]
            year = bibtex.entries.values()[0].fields["year"]
            key = "{}_{}_{}".format(name, year, shortdoi)
            new = BibliographyData()
            new.add_entry(key, bibtex.entries[bibtex.entries.keys()[0]])
            print(new.to_string("bibtex"))
        except KeyError:
            print(result)
    else:
        try:
            result = cn.content_negotiation(doi, format=args.format)
            print(result)
        except requests.exceptions.HTTPError:
            print(doi)
    print()
def test_content_negotiation_style():
    "content negotiation - style"
    res_apa = cn.content_negotiation(
        ids=u"10.1126/science.169.3946.635", format="text", style="apa"
    )
    res_ieee = cn.content_negotiation(
        ids=u"10.1126/science.169.3946.635", format="text", style="ieee"
    )
    assert res_apa != res_ieee
예제 #3
0
def references_to_bib(refs):

    parsed_refs = []
    for ref in refs:

        if ref in _REFERENCE_CACHE:
            parsed_ref = _REFERENCE_CACHE[ref]
        elif ref.startswith('url:'):
            url = ref.split('url:')[1]
            parsed_ref = "@article\{{{0},\n\turl = {{{1}}}\n\}}".format(
                url.__hash__(), url)
        elif ref.startswith('doi:'):
            doi = ref.split('doi:')[1]
            parsed_ref = content_negotiation(doi, format='bibentry')
        else:
            raise ValueError('Unknown reference style for'
                             'reference: {}'.format(ref))

        if ref not in _REFERENCE_CACHE:
            _REFERENCE_CACHE[ref] = parsed_ref
            dumpfn(_REFERENCE_CACHE, _REFERENCE_CACHE_PATH)

        parsed_refs.append(ref)

    return refs
예제 #4
0
    def load_records(self, DOIs=None):
        """Load all crossref items as valid records"""

        records = cn.content_negotiation(ids=DOIs, format='citeproc-json')
        # Records might be a str or unicode (python 2)
        if not isinstance(records, list):
            records = [records, ]
        self.records = []
        for r in records:
            data = json.loads(r)
            try:
                record = self.to_record(data)
            except Exception:
                e, v, tb = sys.exc_info()
                msg = _(
                    "An error occured while loading the following DOI: {}. "
                    "Check logs for details."
                ).format(
                    data.get('DOI')
                )
                logger.error(
                    '{}, error: {} [{}], data: {}'.format(msg, e, v, data)
                )
                raise DOILoaderError(msg)
            self.records.append(record)
예제 #5
0
def query_arXiv(title, author):
    """Query arXiv for the extracted data
    
    Args:
        title (str): The title of the paper
        author (List(str)): A list of the authors of the paper
    
    Raises:
        ExtractionError: No suitable search criteria extracted
        ExtractionError: Entry found but no DOI
        ExtractionError: No matches found
    
    Returns:
        str: A BibTeX entry for the queried data
    """
    # print("Querying arXiv")
    if author and title:
        results = arxiv.query(title + " " + author[0], max_results=5)
    elif title:
        results = arxiv.query(title, max_results=5)
    else:
        raise ExtractionError("No suitable search criteria extracted")
    for result in results:
        if SequenceMatcher(None, result["title"].upper(),
                           title.upper()).ratio() > 0.9:
            if result["doi"]:
                BibTeX = cn.content_negotiation(ids=result["doi"],
                                                format="bibentry")
                return BibTeX
            else:
                BibTeX = generate_bibtex_from_arXiv(result)
                return BibTeX
    else:
        raise ExtractionError("No matches found")
예제 #6
0
파일: utils.py 프로젝트: shyshy903/propnet
def references_to_bib(refs, check_if_valid_citation=True):
    """
    Takes a list of reference strings and converts them to bibtex
    entries

    Args:
        refs ([str]): list of string references, which can be
            bibtex entries, digital object identifiers ("doi:DOI_GOES_HERE")
            or urls ("url:URL_GOES_HERE")
        check_if_valid_citation (bool): True checks to see if the
            reference generates a valid markdown-style citation. Throws ValueError
            if conversion is not successful.

    Returns:
        (list): list of bibtex formatted strings

    """
    parsed_refs = []
    for ref in refs:
        if ref in _REFERENCE_CACHE:
            parsed_ref = _REFERENCE_CACHE[ref]
        elif ref.startswith('@'):
            parsed_ref = ref
        elif ref.startswith('url:'):
            # uses arbitrary key
            url = ref.split('url:')[1]
            parsed_ref = """@misc{{url:{0},
                       url = {{{1}}}
                       }}""".format(str(abs(url.__hash__()))[0:6], url)
        elif ref.startswith('doi:'):
            doi = ref.split('doi:')[1]
            parsed_ref = content_negotiation(doi, format='bibentry')
        elif ref.startswith('isbn:'):
            isbn = ref.split('isbn:')[1]
            parsed_ref = bibformatters['bibtex'](meta(isbn))
        else:
            raise ValueError(
                'Unknown reference style for '
                'reference: {} (please either '
                'supply a BibTeX string, or a string '
                'starting with url: followed by a URL or '
                'starting with doi: followed by a DOI)'.format(ref))

        if check_if_valid_citation:
            try:
                _ = references_to_markdown(parsed_ref)
            except Exception as ex:
                raise ValueError(
                    "Reference '{}' returned the following error.\n"
                    "You may need to manually generate a bibtex string:\n"
                    "{}".format(ref, ex))
        if ref not in _REFERENCE_CACHE:
            _REFERENCE_CACHE[ref] = parsed_ref
            dumpfn(_REFERENCE_CACHE, _REFERENCE_CACHE_PATH)
        parsed_refs.append(parsed_ref)
    return parsed_refs
예제 #7
0
def get_crossref_metadata(title, path):
    """
    Gets Crossref metadata, given an article's title. Then puts the metadata on the clipboard
    :param title: Title to search for
    :param path: PDF-Path, not necessary
    """

    print "getting crossref"

    # Searches the Crossref API for the given title, gets best result
    cr = Crossref()
    query = cr.works(query=title, limit=1)

    doi = ''

    # Extract DOI out of Crossref answer
    for item in query['message']['items']:
        doi = item['DOI']

    # Not used, but useful. Gets metadata from isbnlib, given DOI
    # print isbnlib.doi2tex(doi)

    # Gets APA citation, given DOI
    apa_citation = cn.content_negotiation(ids=doi, format="text", style="apa")

    # We could get more formats this way, but this is not used at the moment, better performance without getting these formats
    # rdf_citation = cn.content_negotiation(ids=doi, format="rdf-xml")
    # json_citation = cn.content_negotiation(ids=doi, format="citeproc-json")
    # bib_entry = cn.content_negotiation(ids=doi, format="bibentry")

    # Prettify APA citation
    apa_citation = prettify_UTF8_Strings(apa_citation).strip('\n')
    print apa_citation

    clp.OpenClipboard(None)
    citations = {}
    citations['APA'] = apa_citation
    try:
        citations['content'] = unicode(clp.GetClipboardData(clp.CF_TEXT),
                                       errors='replace')
    except:
        citations['content'] = 'no text content available'
    # Puts the citations on the clipboard
    clp.SetClipboardData(citation_format, json.dumps(citations))

    sources = {}
    sources['source'] = path
    try:
        sources['content'] = unicode(clp.GetClipboardData(clp.CF_TEXT),
                                     errors='replace')
    except:
        sources['content'] = 'no text content available'
    # Puts the sources on the clipboard
    clp.SetClipboardData(src_format, json.dumps(sources))
    clp.CloseClipboard()
예제 #8
0
def load_bibtex_cached(doi, reload_cache=False):
    cache = load_doi_cache()
    if reload_cache or doi not in cache:
        try:
            entry = cn.content_negotiation(ids=doi)
        except Exception as e:
            error("There was a problem contacting crossref:\n", str(e))

        cache[doi] = entry
        store_doi_cache(cache)
    return cache[doi]
예제 #9
0
def make_references(publications, output_dir):
    """
    Create reference bib file
    Args:
        publications: the list of publications
        output_dir: the output directory

    Returns:
        A list of reference identifiers
    """
    log = Logger()
    cr = Crossref()
    lines = []
    references = []

    for i, publication in enumerate(publications):
        log.notice(
            f"Querying and formatting {i + 1} out of {len(publications)} publications"
        )
        link = publication[LINK]
        title = publication[TITLE]

        # Check if it is a DOI url
        if link and "doi.org" in link:
            doi = urlparse(link).path.strip("/")

        # Extract the DOI using the title
        else:
            results = cr.works(query_bibliographic=title, limit=1)
            if (results["message"]["total-results"] == 0
                    or results["message"]["items"][0]["title"][0].lower() !=
                    title.lower()):
                log.warn(f'Could not find the doi for "{title}"')

                continue

            doi = results["message"]["items"][0]["DOI"]

        try:
            reference = cn.content_negotiation(doi)
            lines.append(reference)
            references.append(
                re.sub("^@.*{", "",
                       reference.split("\n")[0]).strip(","))
        except HTTPError:
            log.warn(f'Could not Create reference for "{title}"')

    with open(os.path.join(output_dir, "references.bib"), "w") as f:
        f.write("\n\n".join(lines))

    return references
예제 #10
0
def crossref_publications(doi_missed):
    global final_result
    for i in set(doi_missed):
        try:
            crossrefObject=cn.content_negotiation(ids = i,format='citeproc-json')
        # print('Calling ',i)
            data=json.loads(crossrefObject)
            if 'published-print' in data.keys():
                final_result.append(['crossref',data['title'],data['published-print']['date-parts'][0][0],data['DOI']])
            else:
                final_result.append(['crossref',data['title'],data['published-online']['date-parts'][0][0],data['DOI']])
        except requests.exceptions.HTTPError as error:
            print('DOI not found ',error)
            crossref_doi.append(i)
예제 #11
0
def make_references(publications, output_dir):
    """
    Create reference bib file
    Args:
        publications: the list of publications
        output_dir: the output directory

    Returns:
        A list of reference identifiers
    """
    log = Logger()
    cr = Crossref()
    lines = []
    references = []

    for i, publication in enumerate(publications):
        log.notice(f'Querying and formatting {i + 1} out of {len(publications)} publications')
        link = publication[LINK]
        title = publication[TITLE]

        # Check if it is a DOI url
        if link and 'doi.org' in link:
            doi = urlparse(link).path.strip('/')

        # Extract the DOI using the title
        else:
            results = cr.works(query_title=title, limit=1)
            if results['message']['total-results'] == 0 or \
                    results['message']['items'][0]['title'][0].lower() != title.lower():
                log.warn(f'Could not find the doi for "{title}"')

                continue

            doi = results['message']['items'][0]['DOI']

        try:
            reference = cn.content_negotiation(doi)
            lines.append(reference)
            references.append(re.sub('^@.*{', '', reference.split('\n')[0]).strip(','))
        except HTTPError:
            log.warn(f'Could not Create reference for "{title}"')

    with open(os.path.join(output_dir, 'references.bib'), 'w') as f:
        f.write('\n\n'.join(lines))

    return references
예제 #12
0
파일: bot.py 프로젝트: bailliem/trial2rev
def batch_doi2pmid(dois):
    """
    resolve article PMID from DOI by feeding article citation to PubMed advanced search
    @param dois: list of DOIs to resolve
    @return: list of corresponding PMIDs
    """
    citations = []
    for doi in dois:
        if doi[-1] == '.':
            doi = doi[:-1]
        try:
            # what if one fails?!
            cit = cn.content_negotiation(ids=doi, format="citeproc-json")
            if isinstance(cit, list):
                for c in cit:
                    citations.append(c)
            else:
                citations.append(cit)
        except Exception as e:
            print e
            continue
    parsed_citations = []
    for x in citations:
        try:
            cit = json.loads(x)
        except TypeError as e:
            print e
            continue
        parsed_cit = {}
        if 'page' in cit:
            parsed_cit['first_page'] = cit['page'].split('-')[0]
        if 'volume' in cit:
            parsed_cit['volume'] = cit['volume']
        if 'container-title' in cit:
            parsed_cit['journal'] = cit['container-title']
        if 'issued' in cit:
            parsed_cit['year'] = cit['issued']['date-parts'][0][0]
        if 'author' in cit:
            if 'family' in cit['author'][0]:
                parsed_cit['aulast'] = cit['author'][0]['family']
        parsed_citations.append(parsed_cit)
    pmids = ecitmatch_tools.batch_pmids_for_citation(parsed_citations, debug=False)
    return pmids
예제 #13
0
def get_citation(self, doi, localpath=None, verbose=False):

    # Try loaded values first
    if self.citations_df is not None:

        matches = self.citations[(self.citations_df.doi == doi)
                                 | (self.citations_df.note == doi)]

        if len(matches) == 1:
            if verbose:
                print('Citation retrieved from loaded citations')
            return matches[0]
        elif len(matches) > 1:
            raise ValueError('Multiple loaded records found for the given doi')

    doifname = doi.lower().replace('/', '_')

    # Try localpath next
    if localpath is None:
        localpath = self.localpath
    if localpath is not None:
        for fname in Path(localpath, 'Citation').glob(doifname + '.*'):
            if verbose:
                print(f'Citation retrieved from local file {fname.name}')
            with open(fname, encoding='UTF-8') as f:
                return Citation(f.read())

    # Try remote next
    try:
        record = self.cdcs.query(template='Citation', title=doifname)
        assert len(record) == 1
    except:
        pass
    else:
        if verbose:
            print(f'Citation retrieved from remote database')
        return Citation(record.iloc[0].xml_content)

    # Lastly, download from CrossRef
    bibtex = cn.content_negotiation(ids=doi, format="bibtex")
    if verbose:
        print(f'Citation retrieved from CrossRef')
    return Citation(bibtex)
예제 #14
0
    def fetch(self, doi, localdir=None, verbose=True):
        """
        Fetches bibtex for published content.  First checks localdir, then
        potentials github, then CrossRef.

        Parameters
        ----------
        doi : str or list
            The reference doi to fetch content for.
        localdir : Path, optional
            The local directory for the .bib files.  If not given, will use
            the default path in potentials/data/bibtex directory.
        """
        localfile = self.localfilepath(doi=doi, localdir=localdir)
        if localfile.is_file():
            # Load bibtex from file
            with open(localfile, encoding='UTF-8') as f:
                entry = f.read()
            if verbose:
                print(f'bibtex loaded {doi} from localdir')
        else:
            try:
                r = requests.get(f'https://github.com/usnistgov/potentials/raw/master/data/bibtex/{localfile.name}')
                r.raise_for_status()
                entry = r.text
                if verbose:
                    print(f'bibtex downloaded {doi} from github')
            except:
                # Download using habanero
                entry = cn.content_negotiation(ids=doi, format="bibtex")
                if verbose:
                    print(f'bibtex downloaded {doi} from CrossRef')

        # Parse and extract content
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bibdatabase = bibtexparser.loads(entry, parser=parser)
        
        # Set object attributes
        self.__doi = doi
        self.__bibdatabase = bibdatabase
        self.__content = self.__bibdatabase.entries[0]
예제 #15
0
def references_to_bib(refs):
    """
    Takes a list of reference strings and converts them to bibtex
    entries

    Args:
        refs ([str]): list of string references, which can be
            bibtex entries, digital object identifiers ("doi:DOI_GOES_HERE")
            or urls ("url:URL_GOES_HERE")

    Returns:
        (list): list of bibtex formatted strings

    """
    parsed_refs = []
    for ref in refs:
        if ref in _REFERENCE_CACHE:
            parsed_ref = _REFERENCE_CACHE[ref]
        elif ref.startswith('@'):
            parsed_ref = ref
        elif ref.startswith('url:'):
            # uses arbitrary key
            url = ref.split('url:')[1]
            parsed_ref = """@misc{{url:{0},
                       url = {{{1}}}
                       }}""".format(str(abs(url.__hash__()))[0:6], url)
        elif ref.startswith('doi:'):
            doi = ref.split('doi:')[1]
            parsed_ref = content_negotiation(doi, format='bibentry')
        else:
            raise ValueError(
                'Unknown reference style for '
                'reference: {} (please either '
                'supply a BibTeX string, or a string '
                'starting with url: followed by a URL or '
                'starting with doi: followed by a DOI)'.format(ref))
        if ref not in _REFERENCE_CACHE:
            _REFERENCE_CACHE[ref] = parsed_ref
            dumpfn(_REFERENCE_CACHE, _REFERENCE_CACHE_PATH)
        parsed_refs.append(parsed_ref)
    return parsed_refs
예제 #16
0
def fetch_citation(self, doi, local=None, remote=None, verbose=False):
    """
    Retrieves a single citation based on its DOI.  First, the database is checked
    for matches with the DOI, then with the record name.  If no matches are found
    in the database, then the corresponding citation is downloaded from CrossRef.

    Parameters
    ----------
    doi : str
        The citation's DOI.  If the citation has no DOI, then the citation's
        record name should be given instead.
    local : bool, optional
        Indicates if the local location is to be searched.  Default value
        matches the value set when the database was initialized.
    remote : bool, optional
        Indicates if the remote location is to be searched.  Default value
        matches the value set when the database was initialized.
    verbose : bool, optional
        If True, info messages will be printed during operations.  Default
        value is False.
    """
    if local is not False or remote is not False:
        # Try fetching based on doi
        try:
            return self.get_citation(doi=doi, local=local, remote=remote, verbose=verbose)
        except:
            pass

        # Try fetching based on name
        try:
            return self.get_citation(name=doi, local=local, remote=remote, verbose=True)
        except:
            pass
    
    # Fetch from CrossRef if database search failed/skipped
    bibtex = cn.content_negotiation(ids=doi, format="bibtex")
    if verbose:
        print('Citation retrieved from CrossRef')

    return load_record('Citation', bibtex)
예제 #17
0
def query_crossref(title, author):
    """Query Crossref for extracted data
    
    Args:
        title (str): The title of the paper
        author (List(str)): A list of the authors of the paper
    
    Raises:
        ExtractionError: No suitable search criteria extracted
        ExtractionError: No suitable Crossref candidates
        ExtractionError: Crossref returned an error
    
    Returns:
        str: A BibTeX entry for the queried data
    """
    # Search for the paper on Crossref
    cr = Crossref(mailto="*****@*****.**")
    # print("Querying Crossref")
    if author and title:
        r = cr.works(query=title + " " + author[0])
    elif title:
        r = cr.works(query=title)
    else:
        raise ExtractionError("No suitable search criteria extracted")
    BibTeX = ""
    print(json.dumps(r), file=open("cn.json", "w"))
    if r["status"] == "ok":
        for result in r["message"]["items"]:
            # If the titles are similar enough
            if "title" in result:
                if SequenceMatcher(None, result["title"][0].upper(),
                                   title.upper()).ratio() > 0.9:
                    # If the title is similar enough, perform content negotiaiton
                    BibTeX = cn.content_negotiation(ids=result["DOI"],
                                                    format="bibentry")
                    return BibTeX
        else:
            raise ExtractionError("No suitable Crossref candidates")
    else:
        raise ExtractionError("Crossref returned an error")
예제 #18
0
 def get_content(self):
     template_args = {}
     for hint in self.software_requirement_hints:
         for package in hint['packages']:
             package_name = package['package']
             versions = package['version']
             citation = package[SCHEMA_ORG_CITATION]
             if citation.startswith(HTTPS_DOI_URL):
                 doi_name = citation.replace(HTTPS_DOI_URL, '')
                 apa_citation = cn.content_negotiation(ids=doi_name,
                                                       format="text",
                                                       style="apa")
             else:
                 apa_citation = citation
             template_args[package_name] = {
                 'version': versions[-1],
                 'citation': apa_citation
             }
     template_args['description'] = self.workflow_version_description
     response = requests.get(self.jinja_template_url)
     response.raise_for_status()
     template = Template(response.text)
     return template.render(**template_args)
예제 #19
0
def fetch_bibtex(dois, biblibrary):
    """
    Fetches bibtex for published content.  If there is locally saved content,
    it will load it.  Otherwise, will download from CrossRef.

    Parameters
    ----------
    dois : list
        The reference dois to fetch content for.
    biblibrary : str
        Path to the directory containing bibtex files.
    
    Returns
    -------
    list of bibtexparser.bibdatabase.BibDatabase
    """

    bib_databases = []

    for doi in dois:
        fname = Path(biblibrary, bibfname(doi))
        try:
            # Load bibtex from file
            with open(fname, encoding='UTF-8') as bibtex_file:
                bibtex = bibtex_file.read()
        except:
            # Download using habanero
            bibtex = cn.content_negotiation(ids=doi, format="bibtex")
            with open(fname, 'w', encoding='UTF-8') as bibtex_file:
                bibtex_file.write(bibtex)

        # Parse and extract content
        parser = BibTexParser()
        parser.customization = convert_to_unicode
        bib_databases.append(bibtexparser.loads(bibtex, parser=parser))

    return bib_databases
예제 #20
0
def get_doi_citation_crossref(doi):
    """
    get

    Parameters
    ----------
    doi : str
        DOI in the format "https://doi.org/10.1109/5.771073"
                    or    "doi:10.5066/F70R9MFW"
                    or    "http://dx.doi.org/10.1109/5.771073"

    Returns
    -------
        dict with publication information pulled from crossref site
    """
    cite_data = json.loads(cn.content_negotiation(ids=doi,
                                                  format="citeproc-json"))
    cite_data['geoform'] = 'publication'
    if 'publisher-location' in cite_data:
        cite_data['pubplace'] = cite_data['publisher-location']
    else:
        cite_data['pubplace'] = 'n/a'

    return cite_data
예제 #21
0
def test_content_negotiation():
    "content negotiation - deafult - bibtex"
    res = cn.content_negotiation(ids = '10.1126/science.169.3946.635')
    assert str == str(res).__class__
예제 #22
0
    def update_contents(self, new_store_contents):
        """
        Structure -> mpid -> BibTeX references from MP -> (optional doi lookup
        via Crossref) -> formatting.
        Formatting is very messy right now.
        DOI lookup and (possibly) formatting should be cached in a builder.
        """

        struct = self.from_data(new_store_contents)

        if not isinstance(struct, Structure):
            raise PreventUpdate(
                "Literature mentions can only be retrieved for crystallographic "
                "structures at present and not molecules. Please make a feature "
                "request if this would be useful for you, and it will be "
                "prioritized."
            )

        with MPRester() as mpr:
            mpids = mpr.find_structure(struct)

            if len(mpids) == 0:
                raise PreventUpdate(
                    "No structures in the Materials Project database match this "
                    "crystal structure, so literature mentions cannot be retrieved. "
                    "Please submit this structure to Materials Project if you'd "
                    "like it to be added to the Materials Project database."
                )

            all_references = []
            for mpid in mpids:
                all_references.append(mpr.get_materials_id_references(mpid))
                self.logger.debug(f"Retrieved references for {mpid}.")

        if self.use_crossref:

            cr = Crossref(mailto=CROSSREF_MAILTO)
            individual_references = set()
            for references in all_references:
                individual_references.update(set(references.split("\n\n")))

            # exclude Materials Proect references (these are intended to be
            # references for the structure specifically)
            refs_to_remove = set()
            for ref in individual_references:
                if "Jain2013" in ref:
                    refs_to_remove.add(ref)
            individual_references -= refs_to_remove

            works = [cr.works(query=ref, limit=1) for ref in individual_references]
            self.logger.debug(f"Retrieved {len(works)} works from Crossref.")

            items = [
                work["message"]["items"][0]
                for work in works
                if len(work["message"]["items"]) > 0
            ]

            dois_to_item = {
                item["DOI"]: {
                    "cited-by": item.get("is-referenced-by-count", 0),
                    "score": item["score"],
                    "title": item.get("title", None),
                    "authors": item.get("author", []),
                    "journal": item.get("container-title", [None])[0],
                    "issue": item.get("issue", None),
                    "volume": item.get("volume", None),
                    "pages": item.get("page", None),
                    "date-parts": item.get("issued", {}).get("date-parts", [[None]]),
                }
                for item in items
                if item["score"] > 40
            }

            num_refs = len(dois_to_item)
            sorted_dois = sorted(
                list(dois_to_item.keys()),
                key=lambda doi: -dois_to_item[doi]["cited-by"],
            )

            if self.use_crossref_formatting:
                # use Crossref to retrieve pre-formatted text

                # remove leading "1. " from Science CSL style
                refs = {
                    doi: content_negotiation(ids=doi, format="text", style="science")[
                        3:
                    ]
                    for doi in dois_to_item.keys()
                }
                self.logger.debug(
                    f"Retrieved {len(refs)} formatted references from Crossref."
                )
                md = "  \n\n".join(
                    f"> [{refs[doi]}](https://dx.doi.org/{doi}) "
                    f"Cited by {dois_to_item[doi]['cited-by']}."
                    for doi in sorted_dois
                )
                formatted_references = dcc.Markdown(
                    md, className="mpc-markdown"
                )

            else:
                # else retrieve BibTeX entries to extract a nice author list
                # and perform our own formatting

                entries = {
                    doi: content_negotiation(ids=doi, format="bibtex")
                    for doi in sorted_dois
                }

                formatted_entries = []
                for doi, entry in entries.items():
                    author_string = self._bibtex_entry_to_author_text(entry)
                    journal_div = self._item_to_journal_div(dois_to_item[doi])

                    formatted_entries.append(
                        html.Blockquote(
                            [
                                html.A(
                                    [
                                        html.Div(
                                            [
                                                html.I(
                                                    # necessary since titles can contain HTML for superscripts etc.
                                                    dcc.Markdown(
                                                        dois_to_item[doi]["title"],
                                                        dangerously_allow_html=True
                                                    )
                                                )
                                            ]
                                        ),
                                        html.Div([author_string]),
                                        html.Div(
                                            [
                                                journal_div,
                                                html.Span(
                                                    f" Cited by {dois_to_item[doi]['cited-by']}."
                                                ),
                                            ]
                                        ),
                                    ],
                                    href=f"https://dx.doi.org/{doi}",
                                )
                            ],
                            className="mpc",
                            style={"padding-left": "1rem", "margin-bottom": "1rem"}
                        )
                    )

                formatted_references = html.Div(formatted_entries)
        else:
            # this uses pybtex directly on stored BibTeX entries from MP
            # most-accurate references and faster since no Crossref lookup
            # is required but no dois/hyperlinks available
            all_entries = {}
            for references in all_references:
                all_entries.update(Parser().parse_string(references).entries)
            md = self._pybtex_entries_to_markdown(all_entries)
            formatted_references = dcc.Markdown(md, className="mpc-markdown")
            num_refs = len(all_entries)

        return html.Div(
            [
                Label(f"{num_refs} references found{':' if num_refs>0 else '.'}"),
                formatted_references,
            ],
            style={"max-height": "20rem", "overflow-y": "scroll"},
        )
예제 #23
0
def batch_doi2pmid(dois):
    """
    resolve article PMID from DOI by feeding article citation to PubMed advanced search
    @param dois: list of DOIs to resolve
    @return: list of corresponding PMIDs
    """
    citations = []
    for doi in dois:
        if doi[-1] == '.':
            doi = doi[:-1]

        while True:
            try:
                # what if one fails?!
                print('bp7', doi)
                cit = cn.content_negotiation(ids=doi,
                                             format="citeproc-json",
                                             timeout=300)
                print('bp7 end')
                if isinstance(cit, list):
                    for c in cit:
                        citations.append(c)
                else:
                    citations.append(cit)
                break
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 503:
                    print('retrying...', e)
                    time.sleep(5)
                    continue
                elif e.response.status_code == 500:
                    print('500 error', e.response.json())
                    break
                else:
                    print('UNHANDLED HTTP ERROR', e)
                    break
            except (requests.exceptions.ConnectionError,
                    requests.exceptions.Timeout) as e:
                print('timeout or connection error, retrying', e)
                time.sleep(5)
                continue

    parsed_citations = []
    for x in citations:
        print('bp8')
        try:
            cit = json.loads(x)
        except TypeError as e:
            print(e)
            continue
        parsed_cit = {}
        if 'page' in cit:
            parsed_cit['first_page'] = cit['page'].split('-')[0]
        if 'volume' in cit:
            parsed_cit['volume'] = cit['volume']
        if 'container-title' in cit:
            parsed_cit['journal'] = cit['container-title']
        if 'issued' in cit:
            parsed_cit['year'] = cit['issued']['date-parts'][0][0]
        if 'author' in cit:
            if 'family' in cit['author'][0]:
                parsed_cit['aulast'] = cit['author'][0]['family']
        parsed_citations.append(parsed_cit)
    print('bp9')
    pmids = ecitmatch_tools.batch_pmids_for_citation(parsed_citations,
                                                     debug=True)
    print('bp10')
    return pmids
예제 #24
0
def test_content_negotiation_citeproc_json():
    "content negotiation - citeproc-json"
    res = cn.content_negotiation(ids = '10.1126/science.169.3946.635', format = "citeproc-json")
    assert str == str(res).__class__
예제 #25
0
파일: providers.py 프로젝트: jdumas/autobib
def crossref_query(authors, title):
    """
    Query Crossref database.

    Args:
        authors (list): a list of strings for up the first authors last names.
        title (str): the title of the article.
        filename (str): the original path of the file to link to.

    Returns:
        A tuple (bibtex, json, score) where the first element is the data in
        bibtex format (returned as a record/dict), the second element is the
        data returned in json format, and the third element is the score of the
        match given by Crossref.
    """
    cr = Crossref()
    # works?query.title=An+Improved+Adaptive+Constraint+Aggregation+for+Integrated+Layout+and+Topology+Optimization&query.author=Gao+Zhu+Zhang+Zhou&sort=score&rows=1
    # query = ['+' + name + '' for name in authors]
    # query = 'query.title=' + urllib.parse.quote_plus(title) + '&query.author=' + urllib.parse.quote_plus(' '.join(authors)) + '&sort=score&rows=1'
    # print(query)
    if ''.join(authors):
        args = dict(
            query_title=urllib.parse.quote_plus(title),
            query_author=urllib.parse.quote_plus(' '.join(authors))
        )
    else:
        args = dict(
            query=urllib.parse.quote_plus(title),
        )
    x = cr.works(sort='score', limit=1, **args)
    # x = cr.works(query=query)
    assert x['status'] == "ok"

    # No result found
    if not x['message']['items']:
        print_score(0)
        return (None, [], 0)

    best_item = x['message']['items'][0]
    # print(json.dumps(best_item, indent=4))
    for item in x['message']['items']:
        if item['score'] < best_item['score']:
            break
        else:
            best_item = pick_best(title, best_item, item)

    # Retrieve DOI and json item
    doi = best_item['DOI']
    res_json = best_item

    # If the entry is invalid, return a score of 0
    if 'author' not in res_json or not res_json['title']:
        print_score(0)
        return (None, res_json, 0)

    # Retrieve metadata as bibtex entry
    res_bib = cn.content_negotiation(ids=doi, format="bibentry")
    res_bib = re.sub('ä', 'ä', res_bib)
    res_bib = re.sub('Ö', 'Ö', res_bib)
    res_bib = re.sub('รถ', 'ö', res_bib)
    res_bib = re.sub('Ăź', 'ü', res_bib)
    res_bib = re.sub('̈o', 'ö', res_bib)
    res_bib = re.sub('ďż˝', 'ø', res_bib)
    res_bib = re.sub('ĂŤ', 'ë', res_bib)
    db = bibtexparser.loads(res_bib)
    assert len(db.entries) == 1
    res_bib = db.entries[0]

    # If article has subtitle(s), fix bibtex entry
    subtitles = None
    if 'subtitle' in res_json:
        subtitles = [x for x in res_json['subtitle'] if not str.isupper(x)]

    if subtitles:
        # Discard subtitle that are all uppercase
        title = ' '.join(res_json['title'])
        subtitle = ' '.join(subtitles)
        if title.lower().startswith(subtitle.lower()) or utils.simratio(title, subtitle) > 0.95:
            # Don't repeat title if the subtitle is too similar to the title
            new_title = title
        else:
            new_title = title + ": " + subtitle
        res_bib['title'] = new_title
    else:
        new_title = ' '.join(res_json['title'])
        res_bib['title'] = new_title

    # Post-process title
    res_bib['title'] = re.sub('\\*$', '', res_bib['title'])
    res_bib['title'] = re.sub('^[0-9]*\\. ', '', res_bib['title'])
    res_bib['title'] = re.sub('\\.*$', '', res_bib['title'])

    # If bibtex entry has a 'journal' field, then use the longest alias from the json
    if 'journal' in res_bib:
        best = ""
        for container in res_json['container-title']:
            if len(container) > len(best):
                best = container
        res_bib['journal'] = best

    # If entry is missing the year, set score to 0
    score = res_json['score']
    if 'year' not in res_bib:
        score = 0

    # Fix incorrect year in crossref entry
    if 'published-print' in res_json:
        item = res_json['published-print']
        if 'date-parts' in item and len(item['date-parts']) == 1:
            date = item['date-parts'][0]
            year = date[0]
            month = date[1] if len(date) > 1 else None
            if str(year) != res_bib['year']:
                res_bib['year'] = str(year)
                if month is None and 'month' in res_bib:
                    del res_bib['month']
                elif month is not None:
                    assert month >= 1 and month <= 12
                    month_str = utils.MONTHS[month - 1]
                    res_bib['month'] = month_str

    # Fix potential ambiguous author entries
    msg = utils.fix_author_field(res_bib, res_json)

    print('C: ' + nomenclature.gen_filename(res_bib))
    print_score(score)

    # If score is above threshold, display msg from fix_author_field
    if score >= config.crossref_accept_threshold and msg:
        print(msg)

    # Return database entry
    return (res_bib, res_json, score)
예제 #26
0
def crossref_query(authors, title):
    """
    Query Crossref database.

    Args:
        authors (list): a list of strings for up the first authors last names.
        title (str): the title of the article.
        filename (str): the original path of the file to link to.

    Returns:
        A tuple (bibtex, json, score) where the first element is the data in
        bibtex format (returned as a record/dict), the second element is the
        data returned in json format, and the third element is the score of the
        match given by Crossref.
    """
    cr = Crossref()
    query = ['+"' + name + '"' for name in authors]
    query = ' '.join(query) + ' +"' + title + '"'
    x = cr.works(query=query)
    assert x['status'] == "ok"

    # No result found
    if not x['message']['items']:
        print_score(0)
        return (None, [], 0)

    best_item = x['message']['items'][0]
    for item in x['message']['items']:
        if item['score'] < best_item['score']:
            break
        else:
            best_item = pick_best(title, best_item, item)

    # Retrieve DOI and json item
    doi = best_item['DOI']
    res_json = best_item

    # If the entry is invalid, return a score of 0
    if 'author' not in res_json or not res_json['title']:
        print_score(0)
        return (None, res_json, 0)

    # Retrieve metadata as bibtex entry
    res_bib = cn.content_negotiation(ids=doi, format="bibentry")
    res_bib = re.sub('ä', 'ä', res_bib)
    res_bib = re.sub('Ö', 'Ö', res_bib)
    res_bib = re.sub('รถ', 'ö', res_bib)
    res_bib = re.sub('Ăź', 'ü', res_bib)
    res_bib = re.sub('̈o', 'ö', res_bib)
    res_bib = re.sub('ďż˝', 'ø', res_bib)
    res_bib = re.sub('ĂŤ', 'ë', res_bib)
    db = bibtexparser.loads(res_bib)
    assert len(db.entries) == 1
    res_bib = db.entries[0]

    # If article has subtitle(s), fix bibtex entry
    if 'subtitle' in res_json:
        subtitles = [x for x in res_json['subtitle'] if not str.isupper(x)]
    else:
        subtitles = []
    if len(subtitles) > 0:
        # Discard subtitle that are all uppercase
        title = ' '.join(res_json['title'])
        subtitle = ' '.join(subtitles)
        if title.lower().startswith(
                subtitle.lower()) or utils.simratio(title, subtitle) > 0.95:
            # Don't repeat title if the subtitle is too similar to the title
            new_title = title
        else:
            new_title = title + ": " + subtitle
        res_bib['title'] = new_title
    else:
        new_title = ' '.join(res_json['title'])
        res_bib['title'] = new_title

    # Post-process title
    res_bib['title'] = re.sub('\\*$', '', res_bib['title'])
    res_bib['title'] = re.sub('^[0-9]*\\. ', '', res_bib['title'])
    res_bib['title'] = re.sub('\\.*$', '', res_bib['title'])

    # If bibtex entry has a 'journal' field, then use the longest alias from the json
    if 'journal' in res_bib:
        best = ""
        for container in res_json['container-title']:
            if len(container) > len(best):
                best = container
        res_bib['journal'] = best

    # If entry is missing the year, set score to 0
    score = res_json['score']
    if 'year' not in res_bib:
        score = 0

    # Fix incorrect year in crossref entry
    if 'published-print' in res_json:
        item = res_json['published-print']
        if 'date-parts' in item and len(item['date-parts']) == 1:
            date = item['date-parts'][0]
            year = date[0]
            month = date[1] if len(date) > 1 else None
            if str(year) != res_bib['year']:
                res_bib['year'] = str(year)
                if month is None and 'month' in res_bib:
                    del res_bib['month']
                elif month is not None:
                    assert month >= 1 and month <= 12
                    month_str = utils.MONTHS[month - 1]
                    res_bib['month'] = month_str

    # Fix potential ambiguous author entries
    msg = utils.fix_author_field(res_bib, res_json)

    print('C: ' + nomenclature.gen_filename(res_bib))
    print_score(score)

    # If score is above threshold, display msg from fix_author_field
    if score >= config.crossref_accept_threshold and msg:
        print(msg)

    # Return database entry
    return (res_bib, res_json, score)
예제 #27
0
def test_content_negotiation_with_unicode_doi():
    "content negotiation - unicode"
    res = cn.content_negotiation(ids=u"10.1126/science.169.3946.635")
    assert str == str(res).__class__
예제 #28
0
def test_content_negotiation_alt_url():
    "content negotiation - alternative url"
    res = cn.content_negotiation(
        ids="10.1126/science.169.3946.635", url="http://doi.org"
    )
    assert str == str(res).__class__
예제 #29
0
def test_content_negotiation_ids_missing():
    with pytest.raises(TypeError):
        cn.content_negotiation()
예제 #30
0
def test_content_negotiation_raises_an_http_error_with_bad_requests():
    with pytest.raises(HTTPError):
        res = cn.content_negotiation(ids="10.1126/foo")
예제 #31
0
def test_content_negotiation_ids_none():
    with pytest.raises(TypeError):
        cn.content_negotiation(ids=None)