def consulta(lista_isbns, servico): formatador_json = bibformatters['json'] isbn_dict = {} if servico == 'gbooks': for isbn in lista_isbns: try: data = isbnlib.meta(isbn, service='goob') isbn_dict[str(isbn)] = formatador_json(data) except isbnlib.dev.DataNotFoundAtServiceError: print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico)) except isbnlib.dev._exceptions.NoDataForSelectorError: print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico)) elif servico == 'openl': for isbn in lista_isbns: try: a = isbn a = a.strip() data = isbnlib.meta(isbn, service = 'openl') isbn_dict[str(isbn)] = formatador_json(data) except isbnlib.dev.DataNotFoundAtServiceError: print("Entrada com ISBN %s não foi encontrada no serviço %s." % (isbn, servico)) elif servico == 'crref': for isbn in lista_isbns: a = isbn a = a.strip() works = Works() l = works.filter(isbn = str(a)) b = [] for item in l: b.append((item))
def all_articles(self): articles = [] try: works = Works(etiquette=Prompt.etiquette) if __debug__: log(f'asking Crossref for all works by {self.doi_prefix}') for item in works.filter(prefix=self.doi_prefix): doi = item.get('DOI', '') title = item.get('title', [''])[0] online = item.get('published-online', None) if not online or 'date-parts' not in online: if __debug__: log(f'skipping {doi} lacking published-online') continue else: date = '-'.join( format(x, '02') for x in online['date-parts'][0]) if __debug__: log(f'keeping publication {doi} dated {date}') pdf = pdf_link(item.get('link', [])) jats = '' image = '' basename = tail_of_doi(doi) status = 'complete' if all([pdf, doi, title, date ]) else 'incomplete' articles.append( Article(self.issn, doi, date, title, basename, pdf, jats, image, status)) except Exception as ex: if __debug__: log(f'crossref API exception: {str(ex)}') raise ServerError(f'Failed to get data from Crossref: {str(ex)}') return articles
def main(list_): books = [] crossref_works = Works() with open(list_) as f: lines = [line.rstrip() for line in f] for line in lines: book_object = create_book_metadata_obejct(line) books.append(book_object) for book in books: for result in crossref_works.filter(isbn=book['isbn'])\ .sample(100)\ .select('title', 'ISBN', 'link', 'author'): try: if book['isbn'] in result['ISBN']\ and book['title'] in result['title']: if result.get('author'): for author in result['author']: name = f"{author['given']} {author['family']}" book['authors'].append(name) if result.get('link'): for link in result['link']: book['access_link'] = link['URL'] except KeyError as err: print(err) pass sleep(0.5) normalised_json = json_normalize(books, sep=',') normalised_json.to_excel(f'results_{list_}.xlsx', verbose=True, sheet_name='books', encoding='utf-8')
class Metadata: ''' This class retrieve and organise book and chapters metadata associated to the user given ISBN. ''' def __init__(self, isbn): self.works = Works() self.isbn = isbn # Get book metadata self.book_metadata = self.get_book_metadata() self.chapters_data = self.get_chapters_data() def get_book_metadata(self): ''' Get book metadata associated to the supplied ISBN ''' return self.works.filter(isbn=self.isbn).select('title', 'DOI', 'type') def get_chapters_data(self): ''' Returns a python list of dictionaries with the book chapter data. ''' book_data = [d for d in self.book_metadata] book_title = book_data[0]['title'][0] chapters_data = self.works.filter(container_title=book_title, type='book-chapter') \ .select('DOI', 'license', 'author', 'title', 'type', 'page', 'publisher', 'container-title', 'abstract') # Assert that at least one DOI have been discovered if not chapters_data: raise AssertionError('Couldn\'t find any chapter-level DOIs' + ' for the supplied --isbn value') return chapters_data def get_doi_suffix(self): ''' Return the book DOI suffix (string) ''' book_types = ['monograph', 'edited-book', 'book'] book_doi = [ item['DOI'] for item in self.book_metadata if item['type'] in book_types ] if not book_doi: raise AssertionError('Couldn\'t find book DOI') return book_doi[0].split('/')[1] @staticmethod def get_author_name(data, position): """ Returns author name (if specified for the given position) """ name = '' if len(data['author']) > position: name = '{} {}'\ .format(data['author'][position]['given'], data['author'][position]['family']) return name @staticmethod def write_metadata(chapter_data, output_file_path): """ Writes metadata to file_name """ arguments = ['-Title={}'.format(chapter_data['title'][0]), '-Author={}'.format(Metadata .join_author_names(chapter_data)), # Add publisher to the dc:publisher field '-Publisher={}'.format(chapter_data['publisher']), '-ModDate={}'.format(datetime.now() .strftime("%Y:%m:%d %T")), # Add Abstract in the dc:description field '-Description={}'.format(chapter_data \ .get('abstract', '')), # Add a copyright notice in the dc:rights field '-Copyright={}'.format(Metadata.get_rights(chapter_data)), # Add DOI to the dc:identifier field '-Identifier={}'.format(chapter_data['DOI']), # Add format to the dc:format field '-Format={}'.format('application/pdf'), # Add date to the dc:date field '-Date={}'.format(datetime.now() .strftime("%Y:%m:%d")), # Add language to the dc:language field '-Language={}'.format('en')] cmd = ['exiftool'] cmd.append('-q') cmd.extend(arguments) cmd.append(output_file_path) run(cmd) print('{}: Metadata written'.format(path.split(output_file_path)[1])) @staticmethod def get_rights(chapter_data): ''' Compose a simple copyright statement, just like: '© John Doe https://creativecommons.org/licenses/by/2.0/' Author name and licence link are pulled out from chapter_data ''' data = { 'authors_names': Metadata.join_author_names(chapter_data), 'copyright_url': chapter_data.get('license', 'n.d.')[0]['URL'] } rights_str = '© {authors_names} {copyright_url}'.format(**data) return rights_str @staticmethod def join_author_names(chapter_data): """ Returns a string with author names, separated by semicolon """ # Make a list with author names, i.e. ['Jhon Doe', ''] authors = [ Metadata.get_author_name(chapter_data, 0), Metadata.get_author_name(chapter_data, 1), Metadata.get_author_name(chapter_data, 2) ] # Return a string with the names, filtering empty fields return '; '.join(filter(None, authors))