コード例 #1
0
ファイル: isbn.py プロジェクト: tinloaf/bibchex
    def _query_blocking(self, entry, provider):
        isbn = entry.data.get('isbn')

        # Okay, we're actually going to make a HTTP request
        self._ratelimit.get()

        if not isbn:
            self._ui.finish_subtask('ISBNQuery')
            return None

        if notisbn(isbn):
            self._ui.finish_subtask('ISBNQuery')
            return (None, "{} is not a valid ISBN.".format(isbn))

        try:
            bibtex_data = self._formatter(meta(isbn, service=provider))
        except ISBNLibException as e:
            self._ui.finish_subtask('ISBNQuery')
            return (None, e)
        except socket.timeout:
            self._ui.finish_subtask('ISBNQuery')
            raise RetrievalProblem("Socket timeout during"
                                   " ISBN metadata retrieval")

        try:
            parsed_data = bibtexparser.loads(bibtex_data)
        except:
            self._ui.finish_subtask('ISBNQuery')
            raise RetrievalProblem("Data from ISBN source could not be parsed")

        if len(parsed_data.entries) != 1:
            self._ui.finish_subtask('ISBNQuery')
            raise RetrievalProblem(
                "ISBN search did not return exactly one result.")

        retrieved = Entry(parsed_data.entries[0], self._ui)
        s = Suggestion("isbn_{}".format(provider), entry)
        for (k, v) in retrieved.data.items():
            if k.lower() == 'id':
                continue
            s.add_field(k, v)

        for (first, last) in s.authors:
            s.add_author(first, last)

        for (first, last) in s.editors:
            s.add_editor(first, last)

        return (s, None)
コード例 #2
0
    def _query_blocking(self, entry):
        doi = entry.get_probable_doi()
        if not doi:
            self._ui.finish_subtask('CrossrefQuery')
            return None

        try:
            data = crossref_commons.retrieval.get_publication_as_json(doi)
        except ValueError as e:
            self._ui.finish_subtask('CrossrefQuery')
            if str(e) == f"DOI {doi} does not exist":
                # This isn't really an error, CrossRef just does not know
                # about them
                pass
            else:
                LOGGER.error((f"Error retrieving data for {entry.get_id()}. "
                              f"{e}"))
            return None
        except ConnectionError as e:
            # TODO retry?
            self._ui.finish_subtask('CrossrefQuery')
            LOGGER.error(
                (f"Connection error retrieving data for {entry.get_id()}. "
                 f"{e}"))
            return None

        s = Suggestion("crossref", entry)

        # Special handling for type
        btype = TYPE_MAPPING.get(data['type'])
        if not btype:
            LOGGER.warn(
                "Type {} not found in crossref source. (Entry {})".format(
                    data['type'], entry.get_id()))
        else:
            s.add_field('entrytype', btype)

        # Special handling for authors
        for author_data in data.get('author', []):
            s.add_author(
                author_data.get('given', "").strip(),
                author_data.get('family', "").strip())

        # Special handling for editors
        for editor_data in data.get('editor', []):
            s.add_editor(
                editor_data.get('given', "").strip(),
                editor_data.get('family', "").strip())

        # Special handling for journal / book title
        if btype in ['journal-article', 'book-chapter']:
            journal = flexistrip(data.get('container-title'))
            if journal:
                s.add_field('journal', journal)

        # Special handling for URL. Only take it if it's not a DOI-Url
        url = flexistrip(data.get('URL'))
        if url and (CrossrefSource.DOI_URL_RE.match(url) is None):
            s.add_field('url', url)

        # All other fields
        for field_from, field_to in FIELD_MAPPING.items():
            if isinstance(field_to, dict):
                if entry.data['entrytype'] in field_to:
                    field_to = field_to[entry.data['entrytype']]
                else:
                    field_to = field_to.get('default')

            if not field_to:
                continue

            if field_from in data:
                s.add_field(field_to, flexistrip(data[field_from]))

        self._ui.finish_subtask('CrossrefQuery')
        return s
コード例 #3
0
    def _query_blocking(self, entry):
        doi = entry.get_probable_doi()

        if not doi:
            self._ui.finish_subtask('DataCiteQuery')
            return None

        # Okay, we're actually going to make a HTTP request
        self._ratelimit.get()

        url = "https://api.datacite.org/dois/{}".format(
            urllib.parse.quote(doi))
        response = requests.get(url)

        if response.status_code != 200:
            self._ui.finish_subtask('DataCiteQuery')
            return None

        try:
            data = response.json()
        except ValueError:
            LOGGER.warn("Response did not contain JSON")
            self._ui.finish_subtask('DataCiteQuery')
            return None

        if 'errors' in data:
            self._ui.finish_subtask('DataCiteQuery')
            return None

        attrs = data['data']['attributes']

        s = Suggestion('datacite', entry)

        # Authors
        for i in range(0, len(attrs['creators'])):
            adata = attrs['creators'][i]
            if 'givenName' in adata and 'familyName' in adata:
                s.add_author(adata['givenName'], adata['familyName'])

        # Editors
        for i in range(0, len(attrs['contributors'])):
            adata = attrs['contributors'][i]
            if adata.get('contributorType') == 'Editor':
                if 'givenName' in adata and 'familyName' in adata:
                    s.add_editor(adata['givenName'], adata['familyName'])

        # Title…s?
        # TODO what happens if there are multiple titles?
        if path_exists(attrs, ('titles', 0, 'title')):
            s.add_field('title', attrs['titles'][0]['title'])

        if 'publisher' in attrs:
            s.add_field('publisher', attrs['publisher'])

        if 'publicationYear' in attrs:
            s.add_field('year', attrs['publicationYear'])

        if 'url' in attrs:
            s.add_field('url', attrs['url'])

        ctype = None
        if path_exists(attrs, ('container', 'type')):
            ctype = attrs['container']['type']
            cdata = attrs['container']

        if ctype == 'Journal':
            if 'title' in cdata:
                s.add_field('journal', cdata['title'])
        elif ctype == 'Book Series':
            if 'title' in cdata:
                s.add_field('booktitle', cdata['title'])

        if ctype in ('Journal', 'Book Series'):
            if 'volume' in cdata:
                s.add_field('volume', cdata['volume'])
            if 'issue' in cdata:
                s.add_field('issue', cdata['issue'])
            if cdata.get('identifierType') == 'ISSN':
                s.add_field('issn', cdata['identifier'])
            if 'firstPage' in cdata and 'lastPage' in cdata:
                s.add_field(
                    'pages', '{}--{}'.format(cdata['firstPage'],
                                             cdata['lastPage']))

        if path_exists(attrs, ('type', 'bibtex')):
            s.add_field('ENTRYTYPE', attrs['type']['bibtex'])

        self._ui.finish_subtask('DataCiteQuery')
        return s