def comm_AR(in_dir,parti,thr,dgcl_id,verbose): ## INI LL=20 # length of output lists CR_papers=dict() CR_authors=dict() limtitle = 120 partition=parti.copy() # transform partition values into lists if they are not (this function need to work in case when article belong to one or several clusters) if type(list(partition.values())[0]) is not list: for elt in partition: partition[elt]=[partition[elt]] if dgcl_id=='NaN': dgcl_id=dict() for elt in partition: dgcl_id[elt]=1; #.. cluster sizes cluster_size = dict(); allcom=[]; [allcom.extend(y) for y in partition.values()]; for com in set(allcom): list_nodes = [node for node in partition.keys() if com in partition[node]] cluster_size[com] = len(list_nodes) #.. stuff_papers = dict() stuff_authors = dict() for com in cluster_size: if cluster_size[com]>thr: stuff_papers[com]=[] stuff_authors[com]=dict() ## INPUT DATA # all authors my_auth=dict() src2 = os.path.join(in_dir, "authors.dat") pl = Utils.Author() pl.read_file(src2) for l in pl.authors: if l.id not in my_auth: my_auth[l.id]=l.author else: my_auth[l.id]+= ', ' + l.author # abstract """ my_abs=dict() src8 = os.path.join(in_dir, "abstracts.dat") pl = Utils.Abstract() pl.read_file(src8) for l in pl.abstracts: if l.id not in my_abs: my_abs[l.id]=l.abstract """ # article src1 = os.path.join(in_dir, "articles.dat") pl = Utils.Article() pl.read_file(src1) for l in pl.articles: if l.id in partition: ## dealt with cases when article belong to several com for com in partition[l.id]: if com in stuff_papers: footitle=l.title.replace('&','\&') if len(footitle)>limtitle: aux = footitle[0:limtitle].rfind(' ') footitle=footitle[0:aux] + "..." if l.id in my_auth: authors=my_auth[l.id] for auth in authors.split(', '): if auth not in stuff_authors[com]:stuff_authors[com][auth]=[0,0,0] stuff_authors[com][auth][0]+=int(l.times_cited) stuff_authors[com][auth][1]+=1 stuff_authors[com][auth][2]+=dgcl_id[l.id] else: authors= '' stuff_papers[com].append([l.firstAU, l.year, footitle, l.journal.replace('&','\&'), l.volume, l.doctype, int(l.times_cited), dgcl_id[l.id], authors ]) # TREAT DATA #.. define average degree of an author's papers in each cluster #for com in stuff_authors: # for auth in stuff_authors[com]: stuff_authors[com][auth][2]*=1.0/stuff_authors[com][auth][1] #.. prep dict for com in stuff_papers: CR_papers[com]=dict() CR_authors[com]=dict() for KK in ['MC','MC_K90','MC_K95','MR','MR_TC90','MR_TC95', 'MR_TCsup5']: CR_papers[com][KK]=dict() for KK in ['MC','MC_K50','MC_K80','MR','MR_TC50','MR_TC80']: CR_authors[com][KK]=dict() #.. compute most cited , most cited with d>avg(d), most representative, most representative with TC > avg(TC) for papers and authors in clusters for com in stuff_papers: # PAPERS foo = stuff_papers[com] """ auxTC=[elt[6] for elt in foo] auxTC.sort() avgTC=sum(auxTC)/len(foo) TC90=auxTC[int(round(len(foo)*0.9))-1] TC95=auxTC[int(round(len(foo)*0.95))-1] auxK=[elt[7] for elt in foo] auxK.sort() avgK=sum(auxK)/len(foo) K90=auxK[int(round(len(foo)*0.9))-1] K95=auxK[int(round(len(foo)*0.95))-1] """ # most cited papers foo.sort(key=lambda e:-e[6]) for k in range(min(LL,len(foo))): CR_papers[com]['MC'][k]=foo[k] """ # most cited papers with ... foof=[elt for elt in foo if elt[7]>K90] foof.sort(key=lambda e:-e[6]) CR_papers[com]['MC_K90']['p']=K90 for k in range(min(LL,len(foof))): CR_papers[com]['MC_K90'][k]=foof[k] # foof=[elt for elt in foo if elt[7]>K95] foof.sort(key=lambda e:-e[6]) CR_papers[com]['MC_K95']['p']=K95 for k in range(min(LL,len(foof))): CR_papers[com]['MC_K95'][k]=foof[k] """ # most representative papers (in terms of degree) foo.sort(key=lambda e:-e[7]) for k in range(min(LL,len(foo))): CR_papers[com]['MR'][k]=foo[k] """ # most representative papers cited at least 5 times foof=[elt for elt in foo if elt[6]>5] foof.sort(key=lambda e:-e[7]) for k in range(min(LL,len(foof))): CR_papers[com]['MR_TCsup5'][k]=foof[k] # most representative papers (in terms of degree) with ... foof=[elt for elt in foo if elt[6]>TC90] foof.sort(key=lambda e:-e[7]) CR_papers[com]['MR_TC90']['p']=TC90 for k in range(min(LL,len(foof))): CR_papers[com]['MR_TC90'][k]=foof[k] # foof=[elt for elt in foo if elt[6]>TC95] foof.sort(key=lambda e:-e[7]) CR_papers[com]['MR_TC95']['p']=TC95 for k in range(min(LL,len(foof))): CR_papers[com]['MR_TC95'][k]=foof[k] """ # AUTHORS foo = list(stuff_authors[com].items()) """ auxTC=[elt[1][0] for elt in foo] auxTC.sort() avgTC=sum(auxTC)/len(foo) TC50=auxTC[int(round((len(foo)-1)*0.5))] TC80=auxTC[int(round((len(foo)-1)*0.8))] auxK=[elt[1][2] for elt in foo] auxK.sort() avgK=sum(auxK)/len(foo) K50=auxK[int(round((len(foo)-1)*0.5))] K80=auxK[int(round((len(foo)-1)*0.8))] """ # most cited authors: foo.sort(key=lambda e:-e[1][0]) for k in range(min(LL,len(foo))): CR_authors[com]['MC'][k]=foo[k] """ # most cited authors with ...: foof=[elt for elt in foo if elt[1][2]>K50] foof.sort(key=lambda e:-e[1][0]) CR_authors[com]['MC_K50']['p']=K50 for k in range(min(LL,len(foof))): CR_authors[com]['MC_K50'][k]=foof[k] # foof=[elt for elt in foo if elt[1][2]>K80] foof.sort(key=lambda e:-e[1][0]) CR_authors[com]['MC_K80']['p']=K80 for k in range(min(LL,len(foof))): CR_authors[com]['MC_K80'][k]=foof[k] """ # most representative authors (in terms of degree) foo.sort(key=lambda e:-e[1][2]) for k in range(min(LL,len(foo))): CR_authors[com]['MR'][k]=foo[k] """ # most representative authors with ...: foof=[elt for elt in foo if elt[1][0]>TC50] foof.sort(key=lambda e:-e[1][2]) CR_authors[com]['MR_TC50']['p']=TC50 for k in range(min(LL,len(foof))): CR_authors[com]['MR_TC50'][k]=foof[k] # foof=[elt for elt in foo if elt[1][0]>TC80] foof.sort(key=lambda e:-e[1][2]) CR_authors[com]['MR_TC80']['p']=TC80 for k in range(min(LL,len(foof))): CR_authors[com]['MR_TC80'][k]=foof[k] """ return (CR_papers, CR_authors)