def get_articles(ath): print('Getting documents for: "' + str(ath.id) + '"') payload = { 'query': 'AU-ID({0})'.format(ath.id), 'count': '100', 'cursor': '*', 'field': 'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message', 'date': '2007-2019' } p = Request('GET', 'https://api.elsevier.com/content/search/scopus', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print('Author has written: ' + str(search.tot_num_res) + ' documents') # Save collaborations if (search.tot_num_res > 0): save_colls(search.results) print(str(search.num_res) + " Documents saved") total_saved = 0 total_saved += search.num_res while total_saved < search.tot_num_res: payload = { 'query': 'AU-ID({0})'.format(ath.id), 'count': '100', 'cursor': search.cursor['@next'], 'field': 'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message', 'date': '2007-2019' } p = Request('GET', 'https://api.elsevier.com/content/search/scopus', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) # Save collaborations save_colls(search.results) # Store latest fetched page index last_page = Author.update(last_page=total_saved).where( Author.id == ath.id) last_page.execute() # Increment saved total_saved += search.num_res print(str(total_saved) + " Documents saved") author_fetched = Author.update( docs_fetched=True, last_page=total_saved).where(Author.id == ath.id) author_fetched.execute() print(str(ath.id) + " - DONE")
for subject in cats: db.connect() payload = { 'query': 'AFFIL({0}) AND SUBJAREA({1})'.format(country, subject), 'count': 200 } print('Getting authors in: "{0} {1}" '.format(country, subject).rjust(10), end="") p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print(Colour.BOLD + str(search.num_res) + " Authors saved" + Colour.END) def save_authors(authors): # Save each author in db for author in authors: a_id = str(author['dc:identifier']).split('AUTHOR_ID:') a_id = int(a_id[-1]) db_author = None # Check if author was already saved in db try: db_author = Author.get(Author.id == a_id) db_author.country = list(db_author.country)
def get_coauthors(ath): def save_coauthors(coauthors): colist = [] for coauthor in coauthors: db_author = save_author(coauthor) colist.append(db_author.id) return colist # Check for author row in coauthors table db_coauthor, created = Coauthors.get_or_create(id=ath.id) coauthors = [] print(Style.BRIGHT + 'Getting coauthors of: "' + str(ath.id) + '"' + Style.RESET_ALL) payload = { 'co-author': '%s' % ath.id, 'count': 50, 'start': db_coauthor.last_page or 0, } p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print(Colour.OKGREEN + 'Author has ' + Colour.BOLD + str(search.tot_num_res) + ' coauthors' + Colour.END) coauthors = coauthors + save_coauthors(search.results) total_saved = 0 total_saved += search.num_res while total_saved < search.tot_num_res: # Store latest fetched page index last_page = Coauthors.update(last_page=total_saved).where( Coauthors.id == ath.id) last_page.execute() search = None # Build next url payload = { 'co-author': '%s' % ath.id, 'count': 25, 'start': total_saved, } p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) # Save authors coauthors = coauthors + save_coauthors(search.results) # Increment saved total_saved += search.num_res print( Colour.BOLD + str(total_saved) + " Coauthors saved for '" + str(ath.id) + "'" + Colour.END, ) return coauthors
def get_coauthors(ath): def save_coauthors(coauthors): colist = [] for coauthor in coauthors: a_id = str(coauthor['dc:identifier']).split('AUTHOR_ID:') a_id = int(a_id[-1]) colist.append(a_id) db_author = None # Check if author was already saved in db try: db_author = Author.get(Author.id == a_id) except Author.DoesNotExist: subject_areas = coauthor.get('subject-area') or [] if isinstance(coauthor.get('subject-area'), list) is False: subject_areas = [coauthor.get('subject-area')] db_author = Author(id=a_id) db_author.full_name = coauthor['preferred-name'] if coauthor.get('subject-area') is not None: db_author.subject_areas = [dict(frequency=s['@frequency'], name=s['$']) for s in subject_areas] db_author.document_count = coauthor.get('document-count') db_author.affiliation_current = coauthor.get('affiliation-current') # Write into db db_author.save(force_insert=True) return colist db_author = Coauthors.get(Coauthors.id == author.id) coauthors = [] print(Style.BRIGHT + 'Getting coauthors of: "' + str(ath.id) + '"' + Style.RESET_ALL) payload = { 'co-author': '%s' % ath.id, 'count': 25, 'start': ath.last_page, } p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print(Colour.OKGREEN + 'Author has ' + Colour.BOLD + str(search.tot_num_res) + ' coauthors' + Colour.END) coauthors = coauthors + save_coauthors(search.results) total_saved = 0 total_saved += search.num_res while total_saved < search.tot_num_res: # Store latest fetched page index last_page = Coauthors.update(last_page=total_saved).where(Coauthors.id == ath.id) last_page.execute() search = None # Build next url payload = { 'co-author': '%s' % ath.id, 'count': 25, 'start': total_saved, } p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) # Save authors coauthors = coauthors + save_coauthors(search.results) # Increment saved total_saved += search.num_res return coauthors
def get_articles(ath): def save_colls(articles): for article in articles: if article.get('author') is not None and len(article['author']) > 1: id = re_abs_id.findall(article.get('dc:identifier')) id = id[0] save_collaboration(article, id) print(Style.BRIGHT + 'Getting documents for: "' + str(ath.id) + '"' + Style.RESET_ALL) payload = { 'query': 'AU-ID({0})'.format(ath.id), 'count': 25, 'start': ath.last_page, 'field': 'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message,affiliation', 'date': '2007-2019' } p = Request('GET', 'https://api.elsevier.com/content/search/scopus', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print(Colour.OKGREEN + 'Author has written: ' + Colour.BOLD + str(search.tot_num_res) + ' documents' + Colour.END) print(Colour.UNDERLINE + str(search.num_res) + " Documents gathered" + Colour.END) # Save collaborations save_colls(search.results) total_saved = 0 total_saved += search.num_res next_url = None while total_saved < search.tot_num_res: # Store latest fetched page index last_page = Author.update(last_page=total_saved).where(Author.id == ath.id) last_page.execute() search = None payload = { 'query': 'AU-ID({0})'.format(ath.id), 'count': 25, 'start': total_saved, 'field': 'author-count,author,dc:identifier,prism:coverDate,citedby-count,authkeywords,message,affiliation', 'date': '2007-2019' } p = Request('GET', 'https://api.elsevier.com/content/search/scopus', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) # Save collaborations save_colls(search.results) # Increment saved total_saved += search.num_res print(Colour.UNDERLINE + str(total_saved) + " Abstracts gathered" + Colour.END) fetched = Author.update(docs_fetched=True, last_page=total_saved).where(Author.id == ath.id) fetched.execute() print(Colour.OKGREEN + "DONE" + Colour.END)
def main(): # Return Author peewee model def save_author(author, save_metrics=False): a_id = str(author['dc:identifier']).split('AUTHOR_ID:') a_id = int(a_id[-1]) if isinstance(author.get('subject-area'), list) is False: author['subject-area'] = [author.get('subject-area')] # Check if author was already saved in db db_author, created = Author.get_or_create(id=a_id) if created: db_author.full_name = author.get('preferred-name'), db_author.subject_areas = [ dict(frequency=s['@frequency'], name=s['$']) for s in author['subject-area'] if s is not None ], db_author.affiliation_current = author.get('affiliation-current') if save_metrics: # Request metrics for author profile metrics = ElsAuthor(author_id=a_id) metrics.read_metrics(client) db_author.document_count = metrics.data['coredata'].get( 'document-count') db_author.cited_by_count = metrics.data['coredata'].get( 'cited-by-count') db_author.h_index = metrics.data.get('h_index') db_author.coauthors_count = metrics.data.get('coauthors_count') db_author.is_sample = True db_author.save() print("+1 ", end="", flush=True) else: print(".", end="", flush=True) return db_author def get_coauthors(ath): def save_coauthors(coauthors): colist = [] for coauthor in coauthors: db_author = save_author(coauthor) colist.append(db_author.id) return colist # Check for author row in coauthors table db_coauthor, created = Coauthors.get_or_create(id=ath.id) # Check if coauthors were already saved if (created is False and db_coauthor.co_list) and len(db_coauthor.co_list) > 0: return db_coauthor.co_list coauthors = [] print(Style.BRIGHT + 'Getting coauthors of: "' + str(ath.id) + '"' + Style.RESET_ALL) payload = { 'co-author': '%s' % ath.id, 'count': 25, 'start': db_coauthor.last_page or 0, } p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print(Colour.OKGREEN + 'Author has ' + Colour.BOLD + str(search.tot_num_res) + ' coauthors' + Colour.END) coauthors = coauthors + save_coauthors(search.results) total_saved = 0 total_saved += search.num_res while total_saved < search.tot_num_res: # Store latest fetched page index last_page = Coauthors.update(last_page=total_saved).where( Coauthors.id == ath.id) last_page.execute() search = None # Build next url payload = { 'co-author': '%s' % ath.id, 'count': 25, 'start': total_saved, } p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) # Save authors coauthors = coauthors + save_coauthors(search.results) # Increment saved total_saved += search.num_res print( Colour.BOLD + str(total_saved) + " Coauthors saved for '" + str(ath.id) + "'" + Colour.END, ) return coauthors def save_authors(authors): # Save author # get author coauthors # save coauthors data for author in authors: db_author = save_author(author, save_metrics=True) # Save categories only for authors and not for coauthors cat_tuple_to_list = db_author.cat if isinstance(cat_tuple_to_list, tuple): # because f**k python cat_tuple_to_list = cat_tuple_to_list[0] # because f**k python else: cat_tuple_to_list = list(db_author.cat or []) # because f**k python cat_tuple_to_list = [ i for i in cat_tuple_to_list if not isinstance(i, list) ] # because f**k python db_author.cat = list(set(cat_tuple_to_list + [cat])) db_author.save() coauthors = get_coauthors(db_author) # Store only unique coauthors ids coauthors = list(set(coauthors)) fetched = Coauthors.update(co_list=coauthors).where( Coauthors.id == db_author.id) fetched.execute() def save_latest(new_country, offset=0, total=0, is_final=False): with open("config.json", "w") as io: new_config = config if is_final: new_config[new_country] = 1 new_config['latest_offset'] = 0 new_config['total'] = 0 else: new_config['latest_offset'] = offset new_config['total'] = total json.dump(new_config, io) for country in countries: saved = config.get(country) if not saved == 1 and config['latest_offset'] <= config['total']: payload = { 'query': 'SUBJAREA({0}) AND AFFIL({1})'.format(cat, country), 'count': 50, 'start': config['latest_offset'], 'sort': '-document-count', } print('Getting most cited authors from ' + country + ' starting from latest ' + str(config['latest_offset'])) p = Request('GET', 'https://api.elsevier.com/content/search/author', params=payload).prepare() search = ElsSearch(p.url) search.execute(client) print(Colour.BOLD + "Going to save " + str(search.num_res) + " authors " + Colour.END) # Save save_authors(search.results) total_saved = config['latest_offset'] total_saved += search.num_res save_latest(country, total_saved, limit) next_url = None while total_saved < limit: for e in search.links: if e['@ref'] == 'next': next_url = e['@href'] search = None search = ElsSearch(next_url) search.execute(client) # Save save_authors(search.results) # Increment saved total_saved += search.num_res save_latest(country, total_saved, limit) print(Colour.BOLD + str(total_saved) + " Authors saved" + Colour.END, end="") print(Colour.OKGREEN + "DONE" + Colour.END) save_latest(country, 0, 0, is_final=True) elif config['latest_offset'] == config['total']: save_latest(country, 0, 0, is_final=True)