def getYears(self,file,label): paperYear=dict() counter=0 with open(file) as f: reader = csv.reader(f) for row in reader: print counter counter+=1 papername=row[1] try: content=dblp.search(papername) if content==None: time.sleep(20) content=dblp.search(papername) if content.empty: continue except: time.sleep(180) content=dblp.search(papername) if content==None: time.sleep(20) content=dblp.search(papername) if content.empty: continue paperYear[row[0]]=content if counter%200==0: pickle.dump(paperYear, open('notmatchedpaperyear_'+label+'.dat', "wb"), True)
def search_dblp(title): """Given a paper title, attempt to search for it on DBLP. If @title does not match anything, we iteratively loosen our search constraints by dropping the last word of the title until results are found. """ title = title.replace("{", "").replace("}", "") results = dblp.search([title]) if results.empty: return search_dblp(" ".join(title.split(" ")[:-1])) return results
print(". \tNumber of Publications: ", end="") print(len(clusterList[i])) sortedList = sorted(clusterList[i], key = getKey) for j in range(0, len(clusterList[i])): #clusterList[i][j].print_val() sortedList[j].print_val() print("--------------------------------------------------------------------------------------------------") pubList = [] search_input = input("Please enter the name of the researcher (case - insensitive): ") authors = dblp.search(search_input) if len(authors) == 0: print("No matching results found") exit() print("Please select one of the below researchers: (Wait till input is not asked for)") for i in range(0, len(authors)): author = authors[i] print(str(i+1) + ". " + author.name) print("number of publications = " + str(len(author.publications))) print("-----------------------------------------") opt_intput = input("Type 1 for affiliation timeline. Type 2 for clusterization.\n") if opt_intput == "1": auth_index = input('Please select one of the Author Index: ') i = int(auth_index) - 1
import dblp, requests search_string = input('Enter author name for search: ') search_results = dblp.search(search_string) if (search_results.empty): print("No results...\nExitting...") exit(1) print(f'Index\tType\tAuthor Names\tYear') for index, val in search_results.iterrows(): print(f'{index}\t{val.Type}\t{val.Authors}\t{val.Title}\t{val.Year}') print('\n\t\t---------------------------------\n') option = int(input('Enter index of the record to get data: ')) record = search_results.loc[option] request = requests.get(record.Link) print(request.status_code) print(request.text)
def _main(): g = nx.Graph() authors = dblp.search('Cosmin Bonchis') cosmin = authors[0] authors1 = dblp.search('Stefan Balint') stefanb = authors1[0] authors2 = dblp.search('Adrian Craciun') adrian = authors2[0] authors3 = dblp.search('Roxana Dogaru') roxana = authors3[0] authors4 = dblp.search('Teodor-Florin Fortis') teodor = authors4[0] authors5 = dblp.search('Marc Frincu') marc = authors5[0] authors6 = dblp.search('Victoria Iordan') victoria = authors6[0] authors7 = dblp.search('Gabriel Istrate') gabriel = authors7[0] authors8 = dblp.search('Ciprian Jichici') ciprianj = authors8[0] authors9 = dblp.search('Gabriel Iuhasz') iuhasz = authors9[0] authors10 = dblp.search('Eva Kaslik') eva = authors10[0] authors11 = dblp.search('Mircea Marin') mircea = authors11[0] authors12 = dblp.search('Flavia Micota') flavia = authors12[0] authors13 = dblp.search('Cristina Mindruta') cristina = authors13[0] authors14 = dblp.search('Stefan Maruster') stefanm = authors14[0] authors15 = dblp.search('Marian Neagul') marian = authors15[0] authors16 = dblp.search('Viorel Negru') viorel = authors16[0] authors17 = dblp.search('Dana Petcu') dana = authors17[0] authors18 = dblp.search('Daniel Pop') get_right_name(authors18, 'Daniel Pop') pop = authors18[0] authors19 = dblp.search('Horia Popa') horia = authors19[0] authors20 = dblp.search('Ciprian Pungila') ciprianp = authors20[0] authors21 = dblp.search('Monica Tirea') monica = authors21[0] authors22 = dblp.search('Daniela Zaharie') daniela = authors22[0] authors23 = dblp.search('Calin Sandru') calin = authors23[0] g.add_edge(stefanb.name, eva.name, weight=6) g.add_edge(stefanb.name, teodor.name, weight=4) g.add_edge(cosmin.name, gabriel.name, weight=6) g.add_edge(cosmin.name, dana.name, weight=1) g.add_edge(adrian.name, gabriel.name, weight=4) g.add_edge(adrian.name, mircea.name, weight=2) g.add_edge(roxana.name, flavia.name, weight=1) g.add_edge(roxana.name, daniela.name, weight=1) g.add_edge(teodor.name, viorel.name, weight=14) g.add_edge(teodor.name, pop.name, weight=2) g.add_edge(teodor.name, cristina.name, weight=3) g.add_edge(marc.name, dana.name, weight=16) g.add_edge(marc.name, marian.name, weight=4) g.add_edge(marc.name, daniela.name, weight=3) g.add_edge(victoria.name, dana.name, weight=2) g.add_edge(gabriel.name, mircea.name, weight=3) g.add_edge(ciprianj.name, pop.name, weight=1) g.add_edge(ciprianj.name, viorel.name, weight=1) g.add_edge(iuhasz.name, dana.name, weight=1) g.add_edge(iuhasz.name, viorel.name, weight=6) g.add_edge(iuhasz.name, daniela.name, weight=1) g.add_edge(iuhasz.name, monica.name, weight=1) g.add_edge(eva.name, dana.name, weight=1) g.add_edge(flavia.name, daniela.name, weight=1) g.add_edge(cristina.name, viorel.name, weight=2) g.add_edge(cristina.name, calin.name, weight=2) g.add_edge(cristina.name, dana.name, weight=1) g.add_edge(stefanm.name, viorel.name, weight=1) g.add_edge(marian.name, dana.name, weight=11) g.add_edge(marian.name, calin.name, weight=2) g.add_edge(marian.name, daniela.name, weight=2) g.add_edge(marian.name, pop.name, weight=1) g.add_edge(viorel.name, ciprianp.name, weight=5) g.add_edge(viorel.name, daniela.name, weight=17) g.add_edge(viorel.name, calin.name, weight=6) g.add_edge(viorel.name, pop.name, weight=9) g.add_edge(viorel.name, monica.name, weight=9) g.add_edge(viorel.name, dana.name, weight=10) g.add_edge(viorel.name, horia.name, weight=8) g.add_edge(dana.name, calin.name, weight=4) g.add_edge(dana.name, daniela.name, weight=15) g.add_edge(dana.name, pop.name, weight=1) g.add_edge(dana.name, horia.name, weight=1) g.add_edge(pop.name, daniela.name, weight=1) g.add_edge(pop.name, calin.name, weight=2) g.add_edge(pop.name, horia.name, weight=2) g.add_edge(horia.name, daniela.name, weight=1) pos = nx.circular_layout(g) edge_labels = {(u, v): d['weight'] for u, v, d in g.edges(data=True)} nx.draw_networkx_nodes(g, pos, node_size=1600, node_color='white', node_alpha=0.2, node_text_size=6, edge_color='blue', edge_alpha=0.2, edge_tickness=1, edge_text_pos=0.2, text_font='sans-serif') nx.draw_networkx_edges(g, pos) nx.draw_networkx_labels(g, pos) nx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels) plt.title("Professors Graph") plt.axis('off') plt.savefig('output.png') plt.show()
# http://www.informatik.uni-trier.de/~ley/db/ import dblp authors = dblp.search('Gregor von Laszewski') for author in authors: print author.name authors = dblp.search('Laszewski') for author in authors: print author.name
no_matched_author = [] no_pub_list = [] notes_count = 0 for eachLine in fp.readlines(): emailId, firstName, lastName = eachLine.split(',') name = firstName + " " + lastName.strip() file_name = firstName + "_" + lastName.strip() if name == '' or name == ' ': print emailId no_name += 1 no_name_list.append(emailId) else: try: authors = dblp.search(name) except Exception as e: sleep(2) authors = dblp.search(name) if authors: if len(authors) > 1: print "More than one authors found! " more_than_one += 1 author_disambiguation[name] = [] author = None for ath in authors: author_disambiguation[name].append(ath.name) if name.lower() == ath.name.lower(): author = ath if not author: no_matched_author.append(name)
# -*- coding: utf-8 -*- """ Created on Fri Jun 7 23:57:13 2019 @author: Markus Borg """ import dblp authors = dblp.search('markus borg') markus = authors[0] print("Publications: ", len(markus.publications)) for p in markus.publications: print(p.citations)
# for inside use only: while True: try: for node_in_file in nodes_by_lines: name_to_find = node_in_file.split(',')[1].replace('_', ' ') id_of_searched = int(node_in_file.split(',')[0]) print(name_to_find) print(id_of_searched) print(list_of_nodes_to_add) print(list_of_edges_to_add) # skip on passed ids if id_of_searched < position: continue options = dblp.search(name_to_find) chosen_publisher_index = choose_name(name_to_find, options) print(chosen_publisher_index) if chosen_publisher_index != -1: publisher = dblp.search( name_to_find)[chosen_publisher_index] # run on every publications and adding it to "nodes_by_lines" for article in publisher.publications: authors_of_article = article.authors print(authors_of_article) for author in authors_of_article: author = unidecode(author.lower()) if author != unidecode(publisher.name.lower()): author_splitted = author.split(' ') # normalizing names normalized_author = ""
# coding: utf-8 # In[38]: import dblp authors = dblp.search('alessandro bozzon') paper23=authors.iloc[23] # In[2]: url=paper23.Link # In[3]: import urllib import os webFile = urllib.urlopen(url) pdfFile = open(url.split('/')[-1], 'w') print pdfFile pdfFile.write(webFile.read()) webFile.close() pdfFile.close()
def _main(): g = nx.Graph() authors = dblp.search('Cosmin Bonchis') cosmin=authors[0] authors1 = dblp.search('Stefan Balint') stefanb=authors1[0] authors2 = dblp.search('Adrian Craciun') adrian=authors2[0] authors3 = dblp.search('Roxana Dogaru') roxana=authors3[0] authors4 = dblp.search('Teodor-Florin Fortis') teodor=authors4[0] authors5 = dblp.search('Marc Frincu') marc=authors5[0] authors6 = dblp.search('Victoria Iordan') victoria=authors6[0] authors7 = dblp.search('Gabriel Istrate') gabriel=authors7[0] authors8 = dblp.search('Ciprian Jichici') ciprianj=authors8[0] authors9 = dblp.search('Gabriel Iuhasz') iuhasz=authors9[0] authors10 = dblp.search('Eva Kaslik') eva=authors10[0] authors11 = dblp.search('Mircea Marin') mircea=authors11[0] authors12 = dblp.search('Flavia Micota') flavia=authors12[0] authors13 = dblp.search('Cristina Mindruta') cristina=authors13[0] authors14 = dblp.search('Stefan Maruster') stefanm=authors14[0] authors15 = dblp.search('Marian Neagul') marian=authors15[0] authors16 = dblp.search('Viorel Negru') viorel=authors16[0] authors17 = dblp.search('Dana Petcu') dana=authors17[0] authors18 = dblp.search('Daniel Pop') get_right_name(authors18,'Daniel Pop') pop=authors18[0] authors19 = dblp.search('Horia Popa') horia=authors19[0] authors20 = dblp.search('Ciprian Pungila') ciprianp=authors20[0] authors21 = dblp.search('Monica Tirea') monica=authors21[0] authors22 = dblp.search('Daniela Zaharie') daniela=authors22[0] authors23 = dblp.search('Calin Sandru') calin=authors23[0] g.add_edge(stefanb.name, eva.name, weight=6 ) g.add_edge(stefanb.name, teodor.name, weight=4 ) g.add_edge(cosmin.name, gabriel.name, weight=6) g.add_edge(cosmin.name, dana.name, weight=1) g.add_edge(adrian.name, gabriel.name, weight=4) g.add_edge(adrian.name, mircea.name, weight=2) g.add_edge(roxana.name, flavia.name, weight=1) g.add_edge(roxana.name, daniela.name, weight=1) g.add_edge(teodor.name, viorel.name, weight=14) g.add_edge(teodor.name, pop.name, weight=2) g.add_edge(teodor.name, cristina.name, weight=3) g.add_edge(marc.name, dana.name, weight=16) g.add_edge(marc.name, marian.name, weight=4) g.add_edge(marc.name, daniela.name, weight=3) g.add_edge(victoria.name, dana.name, weight=2) g.add_edge(gabriel.name, mircea.name, weight=3) g.add_edge(ciprianj.name, pop.name, weight=1) g.add_edge(ciprianj.name, viorel.name, weight=1) g.add_edge(iuhasz.name, dana.name, weight=1) g.add_edge(iuhasz.name,viorel.name, weight=6) g.add_edge(iuhasz.name,daniela.name, weight=1) g.add_edge(iuhasz.name, monica.name, weight=1) g.add_edge(eva.name, dana.name, weight=1) g.add_edge(flavia.name, daniela.name, weight=1) g.add_edge(cristina.name, viorel.name, weight=2) g.add_edge(cristina.name, calin.name, weight=2) g.add_edge(cristina.name, dana.name, weight=1) g.add_edge(stefanm.name, viorel.name, weight=1) g.add_edge(marian.name, dana.name, weight=11) g.add_edge(marian.name, calin.name, weight=2) g.add_edge(marian.name, daniela.name, weight=2) g.add_edge(marian.name, pop.name, weight=1) g.add_edge(viorel.name, ciprianp.name, weight=5) g.add_edge(viorel.name, daniela.name, weight=17) g.add_edge(viorel.name, calin.name, weight=6) g.add_edge(viorel.name, pop.name, weight=9) g.add_edge(viorel.name, monica.name, weight=9) g.add_edge(viorel.name, dana.name, weight=10) g.add_edge(viorel.name, horia.name, weight=8) g.add_edge(dana.name, calin.name, weight=4) g.add_edge(dana.name, daniela.name, weight=15) g.add_edge(dana.name, pop.name, weight=1) g.add_edge(dana.name, horia.name, weight=1) g.add_edge(pop.name, daniela.name, weight=1) g.add_edge(pop.name, calin.name, weight=2) g.add_edge(pop.name, horia.name, weight=2) g.add_edge(horia.name, daniela.name, weight=1) pos = nx.circular_layout(g) edge_labels = { (u,v): d['weight'] for u,v,d in g.edges(data=True) } nx.draw_networkx_nodes(g,pos,node_size=1600,node_color= 'white', node_alpha=0.2, node_text_size=6, edge_color='blue', edge_alpha=0.2, edge_tickness=1, edge_text_pos=0.2, text_font='sans-serif') nx.draw_networkx_edges(g,pos) nx.draw_networkx_labels(g,pos) nx.draw_networkx_edge_labels(g,pos,edge_labels=edge_labels) plt.title("Professors Graph") plt.axis('off') plt.savefig('output.png') plt.show()
import dblp autores = dblp.search('D. Aguirre-Guerrero') #Tamanio print autores.size #Tipo de objeto print autores.__class__ #Contenido de los objetos print autores #Nombre de las columnas y tipo de objeto print autores.dtypes print autores['Authors'] print autores['Link'] print autores['Title'] print autores['Type'] print autores['Where'] print autores['Year'] coautores = autores['Authors'] nombres = set() for coautor in coautores: nombres.update(coautor) print nombres
import dblp fout = open("author_graph_sample.txt", "w") authors = dblp.search('Fugang Wang') visitedAuth = [] authCount = 0 # for auth in authors: # print >>fout,auth.name for auth in authors: print >> fout, auth.name print >> fout, 70 * '=' for pub in auth.publications: for a in pub.authors: if a not in visitedAuth and a != auth.name: visitedAuth.append(a) authCount += 1 print >> fout, visitedAuth fout.write('Author count:' + str(authCount))
import dblp import csv import lxml import time pubs = {} auth = {} with open('people.csv', 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in reader: if row[0] not in auth: authors = dblp.search(row[0]) print row[0] + ": " + str(len(authors)) + " author(s) found; " + str(len(authors[0].publications)) + " publications" for i in range(0, len(authors[0].publications)): year = authors[0].publications[i].year title = authors[0].publications[i].title if row[1] and year < row[1]: break if row[2] and year > row[2]: break if year not in pubs: pubs[year] = {} if title not in pubs[year]: pubs[year][title] = {} pubs[year][title]['year'] = authors[0].publications[i].year pubs[year][title]['authors'] = authors[0].publications[i].authors
import dblp authors = dblp.search('Laszewski') for author in authors: for p in author.publications: print p.title