Esempio n. 1
0
 def getYears(self,file,label):
     paperYear=dict()
     counter=0
     with open(file) as f:
         reader = csv.reader(f)
         for row in reader:
             print counter
             counter+=1
             papername=row[1]
             try:
                 content=dblp.search(papername)
                 if content==None:
                     time.sleep(20)
                     content=dblp.search(papername)
                 if content.empty: continue
             except:
                 time.sleep(180)
                 content=dblp.search(papername)
                 if content==None:
                     time.sleep(20)
                     content=dblp.search(papername)
                 if content.empty: continue
             paperYear[row[0]]=content
             
             
             if counter%200==0:
                 pickle.dump(paperYear, open('notmatchedpaperyear_'+label+'.dat', "wb"), True)
Esempio n. 2
0
def search_dblp(title):
    """Given a paper title, attempt to search for it on DBLP.

    If @title does not match anything, we iteratively loosen our search
    constraints by dropping the last word of the title until results are found.
    """
    title = title.replace("{", "").replace("}", "")
    results = dblp.search([title])
    if results.empty:
        return search_dblp(" ".join(title.split(" ")[:-1]))
    return results
Esempio n. 3
0
        print(". \tNumber of Publications: ", end="")
        print(len(clusterList[i]))
        sortedList = sorted(clusterList[i], key = getKey)
        for j in range(0, len(clusterList[i])):
            #clusterList[i][j].print_val()
            sortedList[j].print_val()
        print("--------------------------------------------------------------------------------------------------")





pubList = []

search_input = input("Please enter the name of the researcher (case - insensitive): ")
authors = dblp.search(search_input)
if len(authors) == 0:
    print("No matching results found")
    exit()
print("Please select one of the below researchers: (Wait till input is not asked for)")

for i in range(0, len(authors)):
    author = authors[i]
    print(str(i+1) + ". " + author.name)
    print("number of publications = " + str(len(author.publications)))
    print("-----------------------------------------")
opt_intput = input("Type 1 for affiliation timeline. Type 2 for clusterization.\n")

if opt_intput == "1":
    auth_index = input('Please select one of the Author Index: ')
    i = int(auth_index) - 1
Esempio n. 4
0
import dblp, requests

search_string = input('Enter author name for search: ')
search_results = dblp.search(search_string)
if (search_results.empty):
    print("No results...\nExitting...")
    exit(1)
print(f'Index\tType\tAuthor Names\tYear')
for index, val in search_results.iterrows():
    print(f'{index}\t{val.Type}\t{val.Authors}\t{val.Title}\t{val.Year}')
print('\n\t\t---------------------------------\n')
option = int(input('Enter index of the record to get data: '))
record = search_results.loc[option]

request = requests.get(record.Link)
print(request.status_code)
print(request.text)
Esempio n. 5
0
def _main():
    g = nx.Graph()

    authors = dblp.search('Cosmin Bonchis')
    cosmin = authors[0]
    authors1 = dblp.search('Stefan Balint')
    stefanb = authors1[0]
    authors2 = dblp.search('Adrian Craciun')
    adrian = authors2[0]
    authors3 = dblp.search('Roxana Dogaru')
    roxana = authors3[0]
    authors4 = dblp.search('Teodor-Florin Fortis')
    teodor = authors4[0]
    authors5 = dblp.search('Marc Frincu')
    marc = authors5[0]
    authors6 = dblp.search('Victoria Iordan')
    victoria = authors6[0]
    authors7 = dblp.search('Gabriel Istrate')
    gabriel = authors7[0]
    authors8 = dblp.search('Ciprian Jichici')
    ciprianj = authors8[0]
    authors9 = dblp.search('Gabriel Iuhasz')
    iuhasz = authors9[0]
    authors10 = dblp.search('Eva Kaslik')
    eva = authors10[0]
    authors11 = dblp.search('Mircea Marin')
    mircea = authors11[0]
    authors12 = dblp.search('Flavia Micota')
    flavia = authors12[0]
    authors13 = dblp.search('Cristina Mindruta')
    cristina = authors13[0]
    authors14 = dblp.search('Stefan Maruster')
    stefanm = authors14[0]
    authors15 = dblp.search('Marian Neagul')
    marian = authors15[0]
    authors16 = dblp.search('Viorel Negru')
    viorel = authors16[0]
    authors17 = dblp.search('Dana Petcu')
    dana = authors17[0]
    authors18 = dblp.search('Daniel Pop')
    get_right_name(authors18, 'Daniel Pop')
    pop = authors18[0]
    authors19 = dblp.search('Horia Popa')
    horia = authors19[0]
    authors20 = dblp.search('Ciprian Pungila')
    ciprianp = authors20[0]
    authors21 = dblp.search('Monica Tirea')
    monica = authors21[0]
    authors22 = dblp.search('Daniela Zaharie')
    daniela = authors22[0]
    authors23 = dblp.search('Calin Sandru')
    calin = authors23[0]

    g.add_edge(stefanb.name, eva.name, weight=6)
    g.add_edge(stefanb.name, teodor.name, weight=4)
    g.add_edge(cosmin.name, gabriel.name, weight=6)
    g.add_edge(cosmin.name, dana.name, weight=1)

    g.add_edge(adrian.name, gabriel.name, weight=4)
    g.add_edge(adrian.name, mircea.name, weight=2)
    g.add_edge(roxana.name, flavia.name, weight=1)
    g.add_edge(roxana.name, daniela.name, weight=1)

    g.add_edge(teodor.name, viorel.name, weight=14)
    g.add_edge(teodor.name, pop.name, weight=2)
    g.add_edge(teodor.name, cristina.name, weight=3)
    g.add_edge(marc.name, dana.name, weight=16)

    g.add_edge(marc.name, marian.name, weight=4)
    g.add_edge(marc.name, daniela.name, weight=3)
    g.add_edge(victoria.name, dana.name, weight=2)
    g.add_edge(gabriel.name, mircea.name, weight=3)

    g.add_edge(ciprianj.name, pop.name, weight=1)
    g.add_edge(ciprianj.name, viorel.name, weight=1)
    g.add_edge(iuhasz.name, dana.name, weight=1)
    g.add_edge(iuhasz.name, viorel.name, weight=6)

    g.add_edge(iuhasz.name, daniela.name, weight=1)
    g.add_edge(iuhasz.name, monica.name, weight=1)
    g.add_edge(eva.name, dana.name, weight=1)
    g.add_edge(flavia.name, daniela.name, weight=1)

    g.add_edge(cristina.name, viorel.name, weight=2)
    g.add_edge(cristina.name, calin.name, weight=2)
    g.add_edge(cristina.name, dana.name, weight=1)
    g.add_edge(stefanm.name, viorel.name, weight=1)

    g.add_edge(marian.name, dana.name, weight=11)
    g.add_edge(marian.name, calin.name, weight=2)
    g.add_edge(marian.name, daniela.name, weight=2)
    g.add_edge(marian.name, pop.name, weight=1)

    g.add_edge(viorel.name, ciprianp.name, weight=5)
    g.add_edge(viorel.name, daniela.name, weight=17)
    g.add_edge(viorel.name, calin.name, weight=6)
    g.add_edge(viorel.name, pop.name, weight=9)

    g.add_edge(viorel.name, monica.name, weight=9)
    g.add_edge(viorel.name, dana.name, weight=10)
    g.add_edge(viorel.name, horia.name, weight=8)
    g.add_edge(dana.name, calin.name, weight=4)

    g.add_edge(dana.name, daniela.name, weight=15)
    g.add_edge(dana.name, pop.name, weight=1)
    g.add_edge(dana.name, horia.name, weight=1)

    g.add_edge(pop.name, daniela.name, weight=1)
    g.add_edge(pop.name, calin.name, weight=2)
    g.add_edge(pop.name, horia.name, weight=2)
    g.add_edge(horia.name, daniela.name, weight=1)

    pos = nx.circular_layout(g)

    edge_labels = {(u, v): d['weight'] for u, v, d in g.edges(data=True)}

    nx.draw_networkx_nodes(g,
                           pos,
                           node_size=1600,
                           node_color='white',
                           node_alpha=0.2,
                           node_text_size=6,
                           edge_color='blue',
                           edge_alpha=0.2,
                           edge_tickness=1,
                           edge_text_pos=0.2,
                           text_font='sans-serif')

    nx.draw_networkx_edges(g, pos)
    nx.draw_networkx_labels(g, pos)
    nx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels)

    plt.title("Professors Graph")
    plt.axis('off')

    plt.savefig('output.png')
    plt.show()
Esempio n. 6
0
# http://www.informatik.uni-trier.de/~ley/db/

import dblp

authors = dblp.search('Gregor von Laszewski')

for author in authors:
    print author.name

authors = dblp.search('Laszewski')

for author in authors:
    print author.name
    
no_matched_author = []
no_pub_list = []
notes_count = 0

for eachLine in fp.readlines():
    emailId, firstName, lastName = eachLine.split(',')
    name = firstName + " " + lastName.strip()
    file_name = firstName + "_" + lastName.strip()

    if name == '' or name == ' ':
        print emailId
        no_name += 1
        no_name_list.append(emailId)
    else:
        try:
            authors = dblp.search(name)
        except Exception as e:
            sleep(2)
            authors = dblp.search(name)
        if authors:
            if len(authors) > 1:
                print "More than one authors found! "
                more_than_one += 1
                author_disambiguation[name] = []
                author = None
                for ath in authors:
                    author_disambiguation[name].append(ath.name)
                    if name.lower() == ath.name.lower():
                        author = ath
                if not author:
                    no_matched_author.append(name)
Esempio n. 8
0
# -*- coding: utf-8 -*-
"""
Created on Fri Jun  7 23:57:13 2019

@author: Markus Borg
"""

import dblp

authors = dblp.search('markus borg')
markus = authors[0]
print("Publications: ", len(markus.publications))
for p in markus.publications:
    print(p.citations)
Esempio n. 9
0
    # for inside use only:
    while True:
        try:
            for node_in_file in nodes_by_lines:
                name_to_find = node_in_file.split(',')[1].replace('_', ' ')
                id_of_searched = int(node_in_file.split(',')[0])
                print(name_to_find)
                print(id_of_searched)
                print(list_of_nodes_to_add)
                print(list_of_edges_to_add)

                # skip on passed ids
                if id_of_searched < position:
                    continue

                options = dblp.search(name_to_find)
                chosen_publisher_index = choose_name(name_to_find, options)
                print(chosen_publisher_index)
                if chosen_publisher_index != -1:
                    publisher = dblp.search(
                        name_to_find)[chosen_publisher_index]
                    # run on every publications and adding it to "nodes_by_lines"
                    for article in publisher.publications:
                        authors_of_article = article.authors
                        print(authors_of_article)
                        for author in authors_of_article:
                            author = unidecode(author.lower())
                            if author != unidecode(publisher.name.lower()):
                                author_splitted = author.split(' ')
                                # normalizing names
                                normalized_author = ""
Esempio n. 10
0
# coding: utf-8

# In[38]:

import dblp
authors = dblp.search('alessandro bozzon')
paper23=authors.iloc[23]


# In[2]:

url=paper23.Link


# In[3]:

import urllib
import os

webFile = urllib.urlopen(url)
pdfFile = open(url.split('/')[-1], 'w')
print pdfFile
pdfFile.write(webFile.read())
webFile.close()
pdfFile.close()




Esempio n. 11
0
def _main():
    g = nx.Graph()

    authors = dblp.search('Cosmin Bonchis')
    cosmin=authors[0]
    authors1 = dblp.search('Stefan Balint')
    stefanb=authors1[0]
    authors2 = dblp.search('Adrian Craciun')
    adrian=authors2[0]
    authors3 = dblp.search('Roxana Dogaru')
    roxana=authors3[0]
    authors4 = dblp.search('Teodor-Florin Fortis')
    teodor=authors4[0]
    authors5 = dblp.search('Marc Frincu')
    marc=authors5[0]
    authors6 = dblp.search('Victoria Iordan')
    victoria=authors6[0]
    authors7 = dblp.search('Gabriel Istrate')
    gabriel=authors7[0]
    authors8 = dblp.search('Ciprian Jichici')
    ciprianj=authors8[0]
    authors9 = dblp.search('Gabriel Iuhasz')
    iuhasz=authors9[0]
    authors10 = dblp.search('Eva Kaslik')
    eva=authors10[0]
    authors11 = dblp.search('Mircea Marin')
    mircea=authors11[0]
    authors12 = dblp.search('Flavia Micota')
    flavia=authors12[0]
    authors13 = dblp.search('Cristina Mindruta')
    cristina=authors13[0]
    authors14 = dblp.search('Stefan Maruster')
    stefanm=authors14[0]
    authors15 = dblp.search('Marian Neagul')
    marian=authors15[0]
    authors16 = dblp.search('Viorel Negru')
    viorel=authors16[0]
    authors17 = dblp.search('Dana Petcu')
    dana=authors17[0]
    authors18 = dblp.search('Daniel Pop')
    get_right_name(authors18,'Daniel Pop')
    pop=authors18[0]
    authors19 = dblp.search('Horia Popa')
    horia=authors19[0]
    authors20 = dblp.search('Ciprian Pungila')
    ciprianp=authors20[0]
    authors21 = dblp.search('Monica Tirea')
    monica=authors21[0]
    authors22 = dblp.search('Daniela Zaharie')
    daniela=authors22[0]
    authors23 = dblp.search('Calin Sandru')
    calin=authors23[0]


    g.add_edge(stefanb.name, eva.name, weight=6 )
    g.add_edge(stefanb.name, teodor.name, weight=4 )
    g.add_edge(cosmin.name, gabriel.name, weight=6)
    g.add_edge(cosmin.name, dana.name, weight=1)
 
    g.add_edge(adrian.name, gabriel.name, weight=4)
    g.add_edge(adrian.name, mircea.name, weight=2)
    g.add_edge(roxana.name, flavia.name, weight=1)
    g.add_edge(roxana.name, daniela.name, weight=1)
 
    g.add_edge(teodor.name, viorel.name, weight=14)
    g.add_edge(teodor.name, pop.name, weight=2)
    g.add_edge(teodor.name, cristina.name, weight=3)
    g.add_edge(marc.name, dana.name, weight=16)

    g.add_edge(marc.name, marian.name, weight=4)
    g.add_edge(marc.name, daniela.name, weight=3)
    g.add_edge(victoria.name, dana.name, weight=2)
    g.add_edge(gabriel.name, mircea.name, weight=3)

    g.add_edge(ciprianj.name, pop.name, weight=1)
    g.add_edge(ciprianj.name, viorel.name, weight=1)
    g.add_edge(iuhasz.name, dana.name, weight=1)
    g.add_edge(iuhasz.name,viorel.name, weight=6)
 
    g.add_edge(iuhasz.name,daniela.name, weight=1)
    g.add_edge(iuhasz.name, monica.name, weight=1)
    g.add_edge(eva.name, dana.name, weight=1)
    g.add_edge(flavia.name, daniela.name, weight=1)

    g.add_edge(cristina.name, viorel.name, weight=2)
    g.add_edge(cristina.name, calin.name, weight=2)
    g.add_edge(cristina.name, dana.name, weight=1)
    g.add_edge(stefanm.name, viorel.name, weight=1)

    g.add_edge(marian.name, dana.name, weight=11)
    g.add_edge(marian.name, calin.name, weight=2)
    g.add_edge(marian.name, daniela.name, weight=2)
    g.add_edge(marian.name, pop.name, weight=1)

    g.add_edge(viorel.name, ciprianp.name, weight=5)
    g.add_edge(viorel.name, daniela.name, weight=17)
    g.add_edge(viorel.name, calin.name, weight=6)
    g.add_edge(viorel.name, pop.name, weight=9)

    g.add_edge(viorel.name, monica.name, weight=9)
    g.add_edge(viorel.name, dana.name, weight=10)
    g.add_edge(viorel.name, horia.name, weight=8)
    g.add_edge(dana.name, calin.name, weight=4)

    g.add_edge(dana.name, daniela.name, weight=15)
    g.add_edge(dana.name, pop.name, weight=1)
    g.add_edge(dana.name, horia.name, weight=1)

    g.add_edge(pop.name, daniela.name, weight=1)
    g.add_edge(pop.name, calin.name, weight=2)
    g.add_edge(pop.name, horia.name, weight=2)
    g.add_edge(horia.name, daniela.name, weight=1)

   

    pos = nx.circular_layout(g)
 
    edge_labels = { (u,v): d['weight'] for u,v,d in g.edges(data=True) }
 
    nx.draw_networkx_nodes(g,pos,node_size=1600,node_color= 'white',
               node_alpha=0.2,
               node_text_size=6,
               edge_color='blue', edge_alpha=0.2, edge_tickness=1,
               edge_text_pos=0.2,
               text_font='sans-serif')

    nx.draw_networkx_edges(g,pos)
    nx.draw_networkx_labels(g,pos)
    nx.draw_networkx_edge_labels(g,pos,edge_labels=edge_labels)
 
    plt.title("Professors Graph")
    plt.axis('off')
 
    plt.savefig('output.png')
    plt.show()
Esempio n. 12
0
import dblp
autores = dblp.search('D. Aguirre-Guerrero')
#Tamanio
print autores.size
#Tipo de objeto
print autores.__class__
#Contenido de los objetos
print autores
#Nombre de las columnas y tipo de objeto
print autores.dtypes
print autores['Authors']
print autores['Link']
print autores['Title']
print autores['Type']
print autores['Where']
print autores['Year']

coautores = autores['Authors']
nombres = set()
for coautor in coautores:
    nombres.update(coautor)
    print nombres
Esempio n. 13
0
import dblp

fout = open("author_graph_sample.txt", "w")

authors = dblp.search('Fugang Wang')

visitedAuth = []
authCount = 0

# for auth in authors:
#     print >>fout,auth.name

for auth in authors:
    print >> fout, auth.name
    print >> fout, 70 * '='
    for pub in auth.publications:
        for a in pub.authors:
            if a not in visitedAuth and a != auth.name:
                visitedAuth.append(a)
                authCount += 1

print >> fout, visitedAuth
fout.write('Author count:' + str(authCount))
Esempio n. 14
0
import dblp

fout = open("author_graph_sample.txt", "w")

authors = dblp.search('Fugang Wang')

visitedAuth = []
authCount = 0

# for auth in authors:
#     print >>fout,auth.name

for auth in authors:
    print >> fout, auth.name
    print >> fout, 70 * '='
    for pub in auth.publications:
        for a in pub.authors:
            if a not in visitedAuth and a != auth.name:
                visitedAuth.append(a)
                authCount += 1

print >> fout, visitedAuth
fout.write('Author count:' + str(authCount))
        
import dblp
import csv
import lxml
import time

pubs = {}
auth = {}

with open('people.csv', 'rb') as csvfile:
	reader = csv.reader(csvfile, delimiter=',', quotechar='|')
	for row in reader:
		if row[0] not in auth:
			authors = dblp.search(row[0])
			print row[0] + ": " + str(len(authors)) + " author(s) found; " + str(len(authors[0].publications)) + " publications"
			
			for i in range(0, len(authors[0].publications)):
				year = authors[0].publications[i].year
				title = authors[0].publications[i].title
				
				if row[1] and year < row[1]:
					break
				if row[2] and year > row[2]:
					break
				
				if year not in pubs:
					pubs[year] = {}
					
				if title not in pubs[year]:
					pubs[year][title] = {}
					pubs[year][title]['year'] = authors[0].publications[i].year
					pubs[year][title]['authors'] = authors[0].publications[i].authors
no_matched_author = []
no_pub_list = []
notes_count = 0

for eachLine in fp.readlines():
    emailId, firstName, lastName = eachLine.split(',')
    name = firstName + " " + lastName.strip()
    file_name = firstName + "_" + lastName.strip()

    if name == '' or name == ' ':
        print emailId
        no_name += 1
        no_name_list.append(emailId)
    else:
        try:
            authors = dblp.search(name)
        except Exception as e:
            sleep(2)
            authors = dblp.search(name)
        if authors:
            if len(authors) > 1:
                print "More than one authors found! "
                more_than_one += 1
                author_disambiguation[name] = []
                author = None
                for ath in authors:
                    author_disambiguation[name].append(ath.name)
                    if name.lower() == ath.name.lower():
                        author = ath
                if not author:
                    no_matched_author.append(name)
Esempio n. 17
0
import dblp

authors = dblp.search('Laszewski')

for author in authors:
    for p in author.publications:
        print p.title