def query_scholarly(self, author = None, keyword = None, pub = None): import scholarly; RESULTS = {} if author is not None and keyword is not None and pub is not None: RESULTS['author'] = scholarly.search_author(author); RESULTS['keyword'] = scholarly.search_keyword(keyword); RESULTS['pub'] = scholarly.search_pubs_query(pub); return RESULTS; elif author is not None: return scholarly.search_author(author); elif keyword is not None: return scholarly.search_keyword(keyword); elif pub is not None: return scholarly.search_pubs_query(pub);
def extract_store_papers(): with open(outputfile, 'w', newline='', encoding='utf-8') as outfile: fieldnames = ['author', 'title'] writer = csv.DictWriter(outfile, fieldnames=fieldnames) writer.writeheader() for author in authors: print(author) if author['Name'] is not None: if author['Institution'] is not None: search_query = scholarly.search_author(author['Name'] + ', ' + author['Institution']) else: search_query = scholarly.search_author(author['Name']) author_details = next(search_query).fill() for pub in author_details.publications: writer.writerow({'author': author['Name'], 'title': pub.bib['title']})
def search( author, affiliation, ): ordered_author = utils.order_author(author) if cache: first_result_json = cache.get('{}+{}'.format( ordered_author, affiliation, )) if first_result_json: return first_result_json query = sch.search_author(', '.join( (i for i in (ordered_author, affiliation) if i))) results = [i.__dict__ for i in query] if not results: return 'No record found' for i in results: del i['_filled'] if cache: cache.set( '{}+{}'.format(ordered_author, affiliation), json.dumps(results), ) cache.pexpire( '{}+{}'.format(ordered_author, affiliation), datetime.timedelta(days=1), ) return jsonify(results)
def getGscholarLinks(authorName, driver = None): pageNames= [] pageImages=[] pageLinks= [] pageAffs=[] try: searchAuthors = scholarly.search_author(authorName) while len(pageNames) < 5: try: cntAuthor = next(searchAuthors) except: break pageNames.append(cntAuthor.name) pageLinks.append('https://scholar.google.com/citations?user='******'&hl=en') pageImages.append('https://scholar.google.com' + cntAuthor.url_picture) if type(cntAuthor.affiliation) == list: pageAffs.append(cntAuthor.affiliation[0]) else: pageAffs.append(cntAuthor.affiliation) except: driver.get("https://scholar.google.com.eg/citations?hl=en&view_op=search_authors&mauthors="+authorName) authors=driver.find_elements_by_xpath('//*[@class="gsc_1usr gs_scl"]') for author in authors: link= author.find_element_by_xpath('.//h3[@class="gsc_oai_name"]/a').get_attribute('href') name= author.find_element_by_xpath('.//h3[@class="gsc_oai_name"]/a').get_attribute('textContent') image=author.find_element_by_xpath('//*[@id="gsc_sa_ccl"]/div/span/img').get_attribute('src') affliation=author.find_element_by_xpath('.//*[@class="gsc_oai_aff"]').get_attribute('innerHTML') pageLinks.append(link) pageNames.append(name) pageImages.append(image) pageAffs.append(affliation) return pageNames, pageLinks, pageImages, pageAffs
def get(self): try: author_name = request.args.get("name") search_query = search_author(author_name) author = next(search_query).fill() articles = [] for pub in author.publications: title = pub.bib['title'] article = { 'title': title, 'year': pub.bib['year'] if 'year' in pub.bib else '', 'citationCount': pub.citedby if hasattr(pub, 'citedby') else '' } articles.append(article) result = { 'name': author.name, 'hIndex': author.hindex, 'citationCount': author.citedby, 'citesPerYear': author.cites_per_year, 'articles': articles } return result, 200, {'Access-Control-Allow-Origin': '*'} except StopIteration: return { 'notFoundError': 'No Google Scholar profile found' }, 200, { 'Access-Control-Allow-Origin': '*' } except: return { 'forbiddenError': 'Access to Google Scholar forbidden' }, 200, { 'Access-Control-Allow-Origin': '*' }
def fetch_citations(author, filesave="citations.json"): print("Looking up " + author) search = scholarly.search_author(author) author = next(search).fill() publications = [] for i, pub in enumerate(author.publications): if "year" in pub.bib: pubyear = pub.bib["year"] # often this gets messed up upon .fill() pub = pub.fill() pub.bib["year"] = pubyear else: pub = pub.fill() if not "year" in pub.bib: # skip publications that really don't have a year, # they probably are crap that was picked up by the search robot continue print("Fetching: " + str(i) + "/" + str(len(author.publications)) + ": " + pub.bib["title"] + " (" + str(pub.bib["year"]) + ")") pub.bib.pop("abstract", None) dpub = pub.__dict__ #cites = [] #for c in pub.get_citedby(): # c.fill() # c.bib.pop("abstract",None) # cites.append(c.__dict__) # print(" Cited by ", c.bib["title"]) #dpub["citing"] = cites publications.append(dpub) f = open(filesave, "w") f.write(json.dumps(publications)) f.close()
def scholar_view(request, nodeType, nodeLabel): # Call scholarly library to retrieve Google Scholar information mostCited = None # If request is for an Author if nodeType == "Author": try: query = scholarly.search_author(nodeLabel) result = next(query).fill() mostCited = result.publications[0].fill() # If Author cannot be found except Exception as e: result = "Sorry, we could not find this author's profile." nodeType = "Bad query" mostCited = None # If request is for a Publicaiton elif nodeType == "Publication": try: query = scholarly.search_pubs_query(nodeLabel) result = next(query) print(result) # If Publication cannot be found except Exception as e: result = "Sorry, we could not find this paper's profile." nodeType = "Bad query" return render(request, "external/google_search_results.html", { "result": result, "nodeType": nodeType, "mostCited": mostCited })
def getAuthorCitations(author): # print(author) # check if author has initials in name author_name_without_initials = author.split(' ')[0] + ' ' + author.split( ' ')[-1] if author != author_name_without_initials: flag_authorHasInitialsInName = True else: flag_authorHasInitialsInName = False search_query = scholarly.search_author(author) try: # check if author is listed in google scholar author_stats = next(search_query) try: # if multiple people exist with same name, skip author next(search_query) author_count = 2 except: author_count = 1 except: author_count = 0 # if author is uniquely identified in scholar, get citation count if author_count == 1: try: author_citation_count = author_stats.citedby except: author_citation_count = 0 elif (author_count == 0) and flag_authorHasInitialsInName: author_citation_count = getAuthorCitations( author_name_without_initials) else: author_citation_count = 0 return author_citation_count
def example(author_name="Iris Howley"): """ Example function from the scholarly website. 'Here’s a quick example demonstrating how to retrieve an author’s profile then retrieve the titles of the papers that cite hermost popular (cited) paper.' https://pypi.org/project/scholarly/ :param author_name: Name of author to p[rint data for :return: None """ # Retrieve the author's data, fill-in, and print search_query = scholarly.search_author(author_name) author = next(search_query).fill() print(author) # Print the titles of the author's publications print([pub.bib['title'] for pub in author.publications]) # authors = pub.bib['author'].split(" and ") # Take a closer look at the first publication pub = author.publications[0].fill() print(pub) # Which papers cited that publication? print([citation.bib['title'] for citation in pub.get_citedby()])
def getGoogleInfo(name): search_query = scholarly.search_author(name+',Peking') author = next(search_query) author = author.fill() GoogleInfo = author.__str__() publicationTitles = [pub.bib['title'] for pub in author.publications] return GoogleInfo, publicationTitles
def main(): author_name = input("Enter the search term to identify your Scholar (e.g Ludo Waltman Leiden University): ") search = scholarly.search_author(author_name) try: author = next(search).fill() print("Working with the following Scholar profile: https://scholar.google.com/citations?user="******"No author found with search term.") exit() check_hindex = calculate_hindex(author.publications) scopus_count, citation_comparisons, problematic_publications, missing_publications = comparisons("scopus.csv", author) write_citation_counts(author_name, citation_comparisons) write_missing_publications(author_name, missing_publications) # Output findings to console if check_hindex != author.hindex: print("The h-index we have calculated from the Google Scholar publications is not equal to the h-index publicly listed on their Scholar profile. Calculated - " + str(check_hindex) + ", Shown - " + str(author.hindex)) print(str(len(author.publications)) + " publications were found for this Scholar on Google.") print(str(scopus_count) + " publications were found for this Scholar from the Scopus export.") print(str(scopus_count - len(missing_publications)) + " publications are common across both databases.") print(str(len(missing_publications)) + " publications on Scopus were not found on Scholar.") # Calculating the number of papers present on scholar but not scopus = Total scholar publications minus the common publications (total scopus count - not found on scholar) print(str(len(author.publications) - (scopus_count - len(missing_publications))) + " publications on Scholar were not found on Scopus.") print(str(len(problematic_publications)) + " publications on Scopus were not found on Scholar which may affect the h-index.") # Print our problematic publications to the console # Stored as [ [Title, Cite_Count] ] for miss in problematic_publications: pass print("\""+ str(miss[0]) + "\" is a missing publication from scholar with a citation count that may affect the total h-index of Google Scholar. The citation count of this article is " + str(miss[1]))
def getScholarID(name): print "Checking " + name if name in scholarLinks: # Already there. print "Found" return scholarLinks[name] if name in checked: if now - float(checked[name]) < expirationDate: return None origname = name # Trim off any trailing numerical suffixes. r = re.match(".*\s\d\d\d\d$", name) if r != None: name = name[:-5] if (name in scholarLinks): return scholarLinks[name] actualID = "FIXME" try: search_query = scholarly.search_author(name) name = name.decode('utf8') author = next(search_query).fill() # print author for (key, value) in author.__dict__.items(): if (key == "id"): actualID = value scholarLinks[origname] = actualID return actualID except: return None return None
def search(kental): return next(scholarly.search_author(kental)) # import scholarly # def manis(kental): # return next(scholarly.search_keyword(kental))
def publication_autheur(nom): search_query = scholarly.search_author(nom) author = next(search_query).fill() for i, pub in enumerate(author.publications): print(i + 1, pub.bib["title"]) #publication_autheur("ahmed guessoum")
def get_citated_by_list(df): pub = [] for i in range(len(df)): search_query = scholarly.search_author(df['dc:creator'][i]) author = next(search_query).fill() pub.append(author.publications[author.publications == df['dc:title'] [i]].fill()) return (pub)
def schol_looper(df): '''For each author, search Google Scholar/scholarly entry, enter into columns hindex, interests, affilation/email?''' for i in df.index: auth = df.get_value(i, 'Author') print(auth) print(next(scholarly.search_author(auth))) return
def test(self): search_query = scholarly.search_author(self.author_query) author = next(search_query).fill() for publ in author.publications[:1]: print( self.make_publication_markdown_from_scholar( publ.fill().__dict__)[1])
def getAuthor_nobmbre_de_publication_scholar(author_name): search_query = scholarly.search_author(author_name) author = next(search_query).fill() try: s=author.citedby except: s=0 for i,pub in enumerate(author.publications): pass return i+1,author.hindex,s
def is_pressed(self): self.box = Tk(className=self.entry['title']) # self.header = Text(self.box) # self.header.pack() # self.header.insert(END, self.entry['title']) #TODO: Add this later plz #self.header.config(font = ('Courier' , 26), height = 2) # self.authors = Text(self.box) # self.authors.pack() #TODO: I think that authors are in some other order that paper's authors_links = list() for author in self.entry['authors']: data = scholarly.search_author(author) label = Label(self.box, text=author, fg="blue" if data else "black", cursor="hand2") #label.pack(LEFT) authors_links.append(label) if data: scholar_link = "https://scholar.google.com/citations?user={}".format( next(data).id) authors_links[-1].bind( "<Button-1>", lambda e: webbrowser.open_new_tab(scholar_link)) # text = self.entry['summary'] # self.w = Text(self.box) # self.w.pack() # self.w.insert(END, text) photo_img_w = 40 photo_img_h = 40 #TODO: fix this getcwd file = os.path.join(os.getcwd(), "data/gui_data/arxiv.png") command = lambda: webbrowser.open_new_tab(self.entry['arxiv_url']) pdf_button = ImageButton(path_to_img=file, width=photo_img_w, height=photo_img_h, command=command, master=self.box) pdf_button.button.pack(side=LEFT) #TODO: fix this getcwd file = os.path.join(os.getcwd(), "data/gui_data/pdf.png") command = lambda: webbrowser.open_new_tab(self.entry['pdf_url']) pdf_button = ImageButton(path_to_img=file, width=photo_img_w, height=photo_img_h, command=command, master=self.box) pdf_button.button.pack(side=LEFT)
def publication_information_1(pub): p = [] search_query = scholarly.search_author(pub) author = next(search_query).fill() for i, pub in enumerate(author.publications): if "year" in pub.bib and "title" in pub.bib: print(i + 1, pub.bib["title"]) p.append(pub.bib["title"]) return p
def search_authors_by_name(name): search_query = scholarly.search_author(name) authors_summary = [] for i in range(0, 5): result = next(search_query, None) if result is None: break authors_summary.append([result.name, result.affiliation]) json = {"author_search_result": authors_summary} return json
def getGoogleScholarInfo(self, findThisAuthor): # print('^^^^^ Finding the author from Google Scholar^^^^^^') search_query = scholarly.search_author(findThisAuthor) author = next(search_query, None) if author is None: autho_notfound.append(findThisAuthor) # print ("Author not found in Google Scholar- " + findThisAuthor) return "not found" else: return author.affiliation
def publication_autheur(nom): p = [] search_query = scholarly.search_author(nom) author = next(search_query).fill() for i, pub in enumerate(author.publications): if "year" in pub.bib and "title" in pub.bib: p.append(pub.bib["title"].upper()) return p, len(p)
def generate_fixtures(): """ Get article """ pk = 0 all_fixtures = [] for author_name in author_name_list: # Search author on Google scholar search_query = scholarly.search_author(author_name) author = next(search_query).fill() if not author: continue # Look for author publications for i, pub_obj in enumerate(author.publications): current_pub = pub_obj.fill() if not hasattr(current_pub, 'bib'): continue pub = current_pub.bib if not pub: continue fixture = {"model": "website.publication", "pk": pk, "fields": {}} fixture['fields']['title'] = pub['title'] if 'title' in pub else "" fixture['fields']['url'] = pub['url'] if 'url' in pub else "" fixture['fields'][ 'author'] = pub['author'] if 'author' in pub else author_name fixture['fields']['doi'] = "" fixture['fields']['entry_type'] = "" fixture['fields'][ 'publisher'] = pub['publisher'] if 'publisher' in pub else "" fixture['fields']['published_in'] = "" fixture['fields']['year_of_publication'] = "" fixture['fields']['month_of_publication'] = "" fixture['fields']['bibtex'] = "" fixture['fields']['project_url'] = "" fixture['fields']['pdf'] = "" if 'abstract' in pub: content = pub['abstract'] if isinstance( pub['abstract'], str) else str(pub['abstract']) doc = html.fromstring(content) fixture['fields']['abstract'] = doc.text_content() else: fixture['fields']['abstract'] = "" fixture['fields']['created'] = datetime.datetime.now( tz=pytz.utc).isoformat() fixture['fields']['modified'] = datetime.datetime.now( tz=pytz.utc).isoformat() pk += 1 all_fixtures.append(fixture) return all_fixtures
def SinglePublication(request, authorName, pubind): search_query = scholarly.search_author(authorName) author = next(search_query).fill() myPublication = author.publications[pubind].fill() pages = None volume = None journal = None eprint = None originalUrl = None year = None context = {} citedby = None idCitations = None idScholarcitedby = None title = myPublication.bib['title'] abstract = re.sub('<[^>]+>', '', str(myPublication.bib['abstract'])) author = myPublication.bib['author'] if 'eprint' in myPublication.bib.keys(): eprint = myPublication.bib['eprint'] if 'journal' in myPublication.bib.keys(): journal = myPublication.bib['journal'] if 'pages' in myPublication.bib.keys(): pages = myPublication.bib['pages'] if 'originalUrl' in myPublication.bib.keys(): originalUrl = myPublication.bib['url'] if 'volume' in myPublication.bib.keys(): volume = myPublication.bib['volume'] if 'year' in myPublication.bib.keys(): year = myPublication.bib['year'] if 'citedby' in myPublication.bib.keys(): citedby = myPublication.bib['citedby'] if 'id_citations' in myPublication.bib.keys(): idCitations = myPublication.bib['id_citations'] if 'id_scholarcitedby' in myPublication.bib.keys(): idScholarcitedby = myPublication.id_scholarcitedby context['authorName'] = authorName, context['author'] = author, context['pubind'] = pubind, context['title'] = title, context['abstract'] = abstract, context['pages'] = pages context['eprint'] = eprint, context['journal'] = journal, context['originalUrl'] = originalUrl, context['volume'] = volume context['year'] = year, context['citedby'] = citedby, context['idCitations'] = idCitations, context['idScholarcitedby'] = idScholarcitedby return render(request, 'profiles/publication.html', context)
class MaClasse(): df = pd.read_csv("xx.csv", header=0) liste_chercheur = df['Nom et prénom'].values.tolist() print("le nombre de chercheurs est de : " + str(len(liste_chercheur))) b = iter(liste_chercheur) print(liste_chercheur) conn = mysql.connector.connect(host="localhost", user="******", password="******", database="wikicomp20") cursor = conn.cursor() while True: try: print("je suis dans 1") Prenom_nom = next(b) search_query = scholarly.search_author(Prenom_nom) author = next(search_query).fill() print("je suis dans 2") nom = author.name # nom du chercheur laboratoire = author.affiliation # Laboratoires Mail = author.email # Mail nom_competence = author.interests # compétences nombre_de_citations_par_ans = author.cites_per_year #def strrr(nom_competence): # str1 =" " # return (str1.join(nom_competence)) competence = ' '.join(nom_competence) print(competence) Ma_recherche = { 'Nom': nom, 'comp': competence, 'laboratoire': laboratoire } #nom_competence={'comp':n_competence,'categorie': "200"} #a= {'comp':cc, 'categorie':"20"} #print(nom_competence[0]) print(Ma_recherche) print(nom_competence) # print(nombre_de_citations_par_ans) except StopIteration: break finally: cursor.execute( """insert into prsl (Nom, competencess, laboratoire )values (%(Nom)s,%(comp)s,%(laboratoire)s) """, Ma_recherche) #cursor.execute("""insert into competence (nom_competence, categorie)values (%(comp)s,%(categorie)s) # """, a) conn.commit() conn.close()
def main(author_name): # Initialize api instance print author_name search_query = scholarly.search_author(author_name) author = next(search_query) print author author_fill = author.fill() pub = [pub.bib['title'] for pub in author_fill.publications] print pub return
def get_author(name): if name not in authors: try: search_query = scholarly.search_author(name) author = next(search_query).fill() authors[name] = author except: print('Cannot find author: ' + name) authors[name] = name return authors[name]
def save_prof_citations(): import scholarly dict_prof_cite = {} for i in dict_name_links: try: dict_prof_cite[i] = next(scholarly.search_author(i)).fill().citedby except: dict_prof_cite[i] = '' continue print(i) save_obj(dict_prof_cite, path_data + 'dict_prof_cite') return dict_prof_cite
def SingleAuthor(request, authorName): search_query = scholarly.search_author(authorName) author = next(search_query).fill() publicationlist = enumerate( [pub.bib['title'] for pub in author.publications]) context = { 'author': author, 'publicationlist': publicationlist, } return render(request, 'profiles/author.html', context)
def query_scholar_for_author_profile(author): try: _author = next(scholarly.search_author(author)) except: return "Ooopsie. Maybe we ran over the request limit?" if _author == None: return "Did not find a profile for %s" % author resp_str = "" resp_str += (_author.name + "\n") resp_str += (_author.affiliation + "\n") for interest in _author.interests: resp_str += (interest + ' - ') resp_str += "\n" resp_str += ("https://scholar.google.ch/citations?user=" + _author.id) return resp_str
def test_multiple_authors(self): ''' As of February 12, 2016, there are 26 'Zucker's, 3 pages worth ''' authors = [a.name for a in scholarly.search_author('Zucker')] self.assertEqual(len(authors), 26) self.assertIn(u'Steven W Zucker', authors)
def test_single_author(self): author = next(scholarly.search_author('Steven A. Cholewiak')).fill() self.assertEqual(author.name, u'Steven A. Cholewiak') self.assertEqual(author.id, u'4bahYMkAAAAJ')
def test_empty_author(self): authors = [a for a in scholarly.search_author('')] self.assertIs(len(authors), 0)
def test_multiple_authors(self): ''' As of July 24, 2015, there are 25 'Zucker's, 3 pages worth ''' authors = [a.name for a in scholarly.search_author('Zucker')] self.assertEqual(len(authors), 25) self.assertIn(u'Steven W Zucker', authors)
def remove_spaces(): for item in publications: cleaned.append(item.strip()) names = [] namefile = open("names.txt", "r") f = namefile.read().splitlines() for name in f: names.append(name) namefile.close() for name in names: info = scholarly.search_author(name).next() url = "https://scholar.google.co.in/citations?user=$$$$$$$$$$$$&hl=en&cstart=0&pagesize=100".replace( "$$$$$$$$$$$$", info.id ) next_url = url.replace("start=0", "start=101") next2_url = url.replace("start=0", "start=201") gsc_scraper(url) gsc_scraper(next_url) gsc_scraper(next2_url) remove_spaces() writetofile(cleaned, "a") cleaned = [] publications = [] urls = [] urlfile = open("URLS.txt", "r")