def _query_blocking(self, entry, provider): isbn = entry.data.get('isbn') # Okay, we're actually going to make a HTTP request self._ratelimit.get() if not isbn: self._ui.finish_subtask('ISBNQuery') return None if notisbn(isbn): self._ui.finish_subtask('ISBNQuery') return (None, "{} is not a valid ISBN.".format(isbn)) try: bibtex_data = self._formatter(meta(isbn, service=provider)) except ISBNLibException as e: self._ui.finish_subtask('ISBNQuery') return (None, e) except socket.timeout: self._ui.finish_subtask('ISBNQuery') raise RetrievalProblem("Socket timeout during" " ISBN metadata retrieval") try: parsed_data = bibtexparser.loads(bibtex_data) except: self._ui.finish_subtask('ISBNQuery') raise RetrievalProblem("Data from ISBN source could not be parsed") if len(parsed_data.entries) != 1: self._ui.finish_subtask('ISBNQuery') raise RetrievalProblem( "ISBN search did not return exactly one result.") retrieved = Entry(parsed_data.entries[0], self._ui) s = Suggestion("isbn_{}".format(provider), entry) for (k, v) in retrieved.data.items(): if k.lower() == 'id': continue s.add_field(k, v) for (first, last) in s.authors: s.add_author(first, last) for (first, last) in s.editors: s.add_editor(first, last) return (s, None)
def _query_blocking(self, entry): doi = entry.get_probable_doi() if not doi: self._ui.finish_subtask('CrossrefQuery') return None try: data = crossref_commons.retrieval.get_publication_as_json(doi) except ValueError as e: self._ui.finish_subtask('CrossrefQuery') if str(e) == f"DOI {doi} does not exist": # This isn't really an error, CrossRef just does not know # about them pass else: LOGGER.error((f"Error retrieving data for {entry.get_id()}. " f"{e}")) return None except ConnectionError as e: # TODO retry? self._ui.finish_subtask('CrossrefQuery') LOGGER.error( (f"Connection error retrieving data for {entry.get_id()}. " f"{e}")) return None s = Suggestion("crossref", entry) # Special handling for type btype = TYPE_MAPPING.get(data['type']) if not btype: LOGGER.warn( "Type {} not found in crossref source. (Entry {})".format( data['type'], entry.get_id())) else: s.add_field('entrytype', btype) # Special handling for authors for author_data in data.get('author', []): s.add_author( author_data.get('given', "").strip(), author_data.get('family', "").strip()) # Special handling for editors for editor_data in data.get('editor', []): s.add_editor( editor_data.get('given', "").strip(), editor_data.get('family', "").strip()) # Special handling for journal / book title if btype in ['journal-article', 'book-chapter']: journal = flexistrip(data.get('container-title')) if journal: s.add_field('journal', journal) # Special handling for URL. Only take it if it's not a DOI-Url url = flexistrip(data.get('URL')) if url and (CrossrefSource.DOI_URL_RE.match(url) is None): s.add_field('url', url) # All other fields for field_from, field_to in FIELD_MAPPING.items(): if isinstance(field_to, dict): if entry.data['entrytype'] in field_to: field_to = field_to[entry.data['entrytype']] else: field_to = field_to.get('default') if not field_to: continue if field_from in data: s.add_field(field_to, flexistrip(data[field_from])) self._ui.finish_subtask('CrossrefQuery') return s
def _query_blocking(self, entry): doi = entry.get_probable_doi() if not doi: self._ui.finish_subtask('DataCiteQuery') return None # Okay, we're actually going to make a HTTP request self._ratelimit.get() url = "https://api.datacite.org/dois/{}".format( urllib.parse.quote(doi)) response = requests.get(url) if response.status_code != 200: self._ui.finish_subtask('DataCiteQuery') return None try: data = response.json() except ValueError: LOGGER.warn("Response did not contain JSON") self._ui.finish_subtask('DataCiteQuery') return None if 'errors' in data: self._ui.finish_subtask('DataCiteQuery') return None attrs = data['data']['attributes'] s = Suggestion('datacite', entry) # Authors for i in range(0, len(attrs['creators'])): adata = attrs['creators'][i] if 'givenName' in adata and 'familyName' in adata: s.add_author(adata['givenName'], adata['familyName']) # Editors for i in range(0, len(attrs['contributors'])): adata = attrs['contributors'][i] if adata.get('contributorType') == 'Editor': if 'givenName' in adata and 'familyName' in adata: s.add_editor(adata['givenName'], adata['familyName']) # Title…s? # TODO what happens if there are multiple titles? if path_exists(attrs, ('titles', 0, 'title')): s.add_field('title', attrs['titles'][0]['title']) if 'publisher' in attrs: s.add_field('publisher', attrs['publisher']) if 'publicationYear' in attrs: s.add_field('year', attrs['publicationYear']) if 'url' in attrs: s.add_field('url', attrs['url']) ctype = None if path_exists(attrs, ('container', 'type')): ctype = attrs['container']['type'] cdata = attrs['container'] if ctype == 'Journal': if 'title' in cdata: s.add_field('journal', cdata['title']) elif ctype == 'Book Series': if 'title' in cdata: s.add_field('booktitle', cdata['title']) if ctype in ('Journal', 'Book Series'): if 'volume' in cdata: s.add_field('volume', cdata['volume']) if 'issue' in cdata: s.add_field('issue', cdata['issue']) if cdata.get('identifierType') == 'ISSN': s.add_field('issn', cdata['identifier']) if 'firstPage' in cdata and 'lastPage' in cdata: s.add_field( 'pages', '{}--{}'.format(cdata['firstPage'], cdata['lastPage'])) if path_exists(attrs, ('type', 'bibtex')): s.add_field('ENTRYTYPE', attrs['type']['bibtex']) self._ui.finish_subtask('DataCiteQuery') return s