def get_document(doc_id): """ Retrieves a document :param doc_id: :param api_key: :return: """ logger.info(f'Retrieving document {doc_id}') api_key = key_manager.get_key('abstract') client = elsclient.ElsClient(api_key) doc = ElsAbstract(scopus_id=doc_id) doc.read(client) doc_data = doc.data doc_data['_id'] = doc_data['coredata']['dc:identifier'].split(':')[1] refs = get_doc_refs(doc_data['_id']) if 'affiliation' in doc_data and isinstance(doc_data['affiliation'], dict): doc_data['affiliation'] = [doc_data['affiliation']] if refs: refs_data = refs.get('references', {}).get('reference', []) if isinstance(refs_data, list): doc_data['references'] = refs_data elif isinstance(refs_data, dict): # Fix the reference data to always make it an array doc_data['references'] = [refs_data] doc_data['last_modified'] = datetime.now() return doc_data
def get_affiliation(affl_id): logger.info(f'Retrieving affiliation {affl_id}') api_key = key_manager.get_key('affiliation') client = elsclient.ElsClient(api_key) affl = ElsAffil(affil_id=affl_id) if not affl.read(client): logger.error("[!]Read affiliation failed: %s", affl_id) affl.data['_id'] = affl.data['coredata']['dc:identifier'].split(':')[1] yield affl.data
def get_doc_refs(doc_id): """ Retrieves references of a document :param doc_id: :param api_key: :return: """ logger.info(f'Retrieving document references {doc_id}') api_key = key_manager.get_key('abstract') client = elsclient.ElsClient(api_key) refs = ElsAbstract(scopus_id=doc_id, params={'view': 'REF'}) refs.read(client) return refs.data
def get_docs_by_author(author_id): """ Get documents published by an author :param author_id: :param api_key: :return: """ logger.info(f'Searching docs by author {author_id}') api_key = key_manager.get_key('scopus_search') client = elsclient.ElsClient(api_key) search = ElsSearch(f'au-id({author_id})', 'scopus') search.execute(client, True) for doc in search.results: yield doc
def get_doc_authors(doc_id): """ Retrieves document-author relationship in tuples :param doc_id: :param api_key: :return: """ logger.info(f'Retrieving author affiliations of document {doc_id}') api_key = key_manager.get_key('abstract') client = elsclient.ElsClient(api_key) abstract = ElsAbstract(scopus_id=doc_id, params={'field': 'author,affiliation'}) abstract.read(client) for author in abstract.authors: yield {'doc': doc_id, 'author': author.get('@auid', None), 'seq': author.get('@seq', None)}
def get_author_by_name(last_name, first_name, affiliation_id='60005248'): """ Search author by first name, last name and affiliation :param last_name: :param first_name: :param affiliation_id: :param api_key: :return: """ logger.info('searching authors by name and affiliation') api_key = key_manager.get_key('scopus_search') query = f'authlast({last_name}) and authfirst({first_name}) and af-id({affiliation_id})' client = elsclient.ElsClient(api_key) search = ElsSearch(query, 'author') search.execute(client) yield {'first_name': first_name, 'last_name': last_name, 'results': search.results}
def get_serial_by_title(title): """ Retrieves serials by the title :param title: :param api_key: :return: """ logger.info('Searching serial by title: ' + title) api_key = key_manager.get_key('serial_title') client = elsclient.ElsClient(api_key) search = ElsSerialTitleSearch(title) search.execute(client) for serial in search.results: serial['_id'] = serial['source-id'] yield serial
def get_author(author_id): """ Retrieves an author :param author_id: :param api_key: :return: """ logger.info(f'Retrieving author {author_id}') api_key = key_manager.get_key('author') client = elsclient.ElsClient(api_key) author = ElsAuthor(author_id=author_id) if not author.read(client): logger.error("[!]Read author failed: %s", author_id) author.data['_id'] = author.data['coredata']['dc:identifier'].split(':')[1] author.data['last_modified'] = datetime.now() return author.data
def get_serial(serial_id): """ Retrieves a serial by a scopus ID :param serial_id: :param api_key: :return: """ logger.info(f'Retrieving serial {serial_id}') api_key = key_manager.get_key('serial') client = elsclient.ElsClient(api_key) serial = ElsSerial(scopus_id=serial_id, params={'view': 'STANDARD'}) if not serial.read(client): logger.error("[!]Read serial failed: %s", serial_id) if 'error' in serial.data: logger.error("[!]Read serial with error: %s, %s", serial.data['error'], serial_id) else: data = serial.data['entry'][0] data['_id'] = data['source-id'] return data
def get_docs_by_year(year, affl_id='60005248', get_all=False): """ Get documents by year :param year: :param affl_id: :param get_all: :param api_key: :return: """ logger.info(f'Searching docs for year {year} and affiliation {affl_id}') api_key = key_manager.get_key('scopus_search') client = elsclient.ElsClient(api_key) # Split the search since for recent years JHU has publications more than 5,000 each year. search_one = ElsSearch(f'af-id({affl_id})', 'scopus', {'date': year}) search_one.execute(client, get_all) all_results = search_one.results # # search_two = ElsSearch('af-id(60005248) AND NOT subjarea(MEDI)', 'scopus', {'date': year}) # search_two.execute(client, get_all) # all_results += search_two.results return all_results