예제 #1
0
def comm_AR(in_dir,parti,thr,dgcl_id,verbose):
  ## INI
  LL=20 # length of output lists
  CR_papers=dict()
  CR_authors=dict()
  limtitle = 120
  
  partition=parti.copy()
  # transform partition values into lists if they are not (this function need to work in case when article belong to one or several clusters)
  if type(list(partition.values())[0]) is not list:
    for elt in partition: partition[elt]=[partition[elt]]

  if dgcl_id=='NaN':
    dgcl_id=dict()
    for elt in partition: dgcl_id[elt]=1;

  #.. cluster sizes
  cluster_size = dict();
  allcom=[]; [allcom.extend(y) for y in partition.values()];
  for com in set(allcom):
    list_nodes = [node for node in partition.keys() if com in partition[node]]
    cluster_size[com] = len(list_nodes)
  #..
  stuff_papers = dict()
  stuff_authors = dict()
  for com in cluster_size: 
    if cluster_size[com]>thr: 
      stuff_papers[com]=[]
      stuff_authors[com]=dict()

  ## INPUT DATA
  # all authors
  my_auth=dict()
  src2 = os.path.join(in_dir, "authors.dat") 
  pl = Utils.Author()
  pl.read_file(src2)
  for l in pl.authors:
    if l.id not in my_auth: my_auth[l.id]=l.author
    else: my_auth[l.id]+= ', ' + l.author

  # abstract
  """
  my_abs=dict()
  src8  = os.path.join(in_dir, "abstracts.dat") 
  pl = Utils.Abstract()
  pl.read_file(src8)
  for l in pl.abstracts:
    if l.id not in my_abs: 
      my_abs[l.id]=l.abstract
  """

  # article
  src1  = os.path.join(in_dir, "articles.dat") 
  pl = Utils.Article()
  pl.read_file(src1)  
  for l in pl.articles:
    if l.id in partition:
      ## dealt with cases when article belong to several com
      for com in partition[l.id]:
        if com in stuff_papers: 
          footitle=l.title.replace('&','\&')
          if len(footitle)>limtitle: 
            aux = footitle[0:limtitle].rfind(' ')
            footitle=footitle[0:aux] + "..."
          if l.id in my_auth: 
            authors=my_auth[l.id]
            for auth in authors.split(', '):
              if auth not in stuff_authors[com]:stuff_authors[com][auth]=[0,0,0]
              stuff_authors[com][auth][0]+=int(l.times_cited)
              stuff_authors[com][auth][1]+=1
              stuff_authors[com][auth][2]+=dgcl_id[l.id]
          else: authors= ''
          stuff_papers[com].append([l.firstAU, l.year, footitle, l.journal.replace('&','\&'), l.volume, l.doctype, int(l.times_cited), dgcl_id[l.id], authors ])  

  # TREAT DATA 
  #.. define average degree of an author's papers in each cluster
  #for com in stuff_authors: 
  #  for auth in stuff_authors[com]: stuff_authors[com][auth][2]*=1.0/stuff_authors[com][auth][1]

  #.. prep dict
  for com in stuff_papers: 
    CR_papers[com]=dict()
    CR_authors[com]=dict()
    for KK in ['MC','MC_K90','MC_K95','MR','MR_TC90','MR_TC95', 'MR_TCsup5']: CR_papers[com][KK]=dict()
    for KK in ['MC','MC_K50','MC_K80','MR','MR_TC50','MR_TC80']: CR_authors[com][KK]=dict()

  #.. compute most cited , most cited with d>avg(d), most representative, most representative with TC > avg(TC) for papers and authors in clusters 
  for com in stuff_papers:
    # PAPERS
    foo = stuff_papers[com]
    """
    auxTC=[elt[6] for elt in foo]
    auxTC.sort()
    avgTC=sum(auxTC)/len(foo)
    TC90=auxTC[int(round(len(foo)*0.9))-1]
    TC95=auxTC[int(round(len(foo)*0.95))-1]
    auxK=[elt[7] for elt in foo]
    auxK.sort()
    avgK=sum(auxK)/len(foo)
    K90=auxK[int(round(len(foo)*0.9))-1]
    K95=auxK[int(round(len(foo)*0.95))-1]
    """

    # most cited papers
    foo.sort(key=lambda e:-e[6])
    for k in range(min(LL,len(foo))):
      CR_papers[com]['MC'][k]=foo[k] 
    """
    # most cited papers with ...
    foof=[elt for elt in foo if elt[7]>K90]
    foof.sort(key=lambda e:-e[6])
    CR_papers[com]['MC_K90']['p']=K90
    for k in range(min(LL,len(foof))):
      CR_papers[com]['MC_K90'][k]=foof[k] 
    #  
    foof=[elt for elt in foo if elt[7]>K95]
    foof.sort(key=lambda e:-e[6])
    CR_papers[com]['MC_K95']['p']=K95
    for k in range(min(LL,len(foof))):
      CR_papers[com]['MC_K95'][k]=foof[k]  
    """     
    # most representative papers (in terms of degree)
    foo.sort(key=lambda e:-e[7])
    for k in range(min(LL,len(foo))):
      CR_papers[com]['MR'][k]=foo[k] 
    """
    # most representative papers cited at least 5 times
    foof=[elt for elt in foo if elt[6]>5]
    foof.sort(key=lambda e:-e[7])
    for k in range(min(LL,len(foof))):
      CR_papers[com]['MR_TCsup5'][k]=foof[k]       
    # most representative papers (in terms of degree) with ...
    foof=[elt for elt in foo if elt[6]>TC90]
    foof.sort(key=lambda e:-e[7])
    CR_papers[com]['MR_TC90']['p']=TC90
    for k in range(min(LL,len(foof))):
      CR_papers[com]['MR_TC90'][k]=foof[k] 
    #
    foof=[elt for elt in foo if elt[6]>TC95]
    foof.sort(key=lambda e:-e[7])
    CR_papers[com]['MR_TC95']['p']=TC95
    for k in range(min(LL,len(foof))):
      CR_papers[com]['MR_TC95'][k]=foof[k] 
    """
      

    # AUTHORS
    foo = list(stuff_authors[com].items())
    """
    auxTC=[elt[1][0] for elt in foo]
    auxTC.sort()
    avgTC=sum(auxTC)/len(foo)
    TC50=auxTC[int(round((len(foo)-1)*0.5))]
    TC80=auxTC[int(round((len(foo)-1)*0.8))]
    auxK=[elt[1][2] for elt in foo]
    auxK.sort()
    avgK=sum(auxK)/len(foo)
    K50=auxK[int(round((len(foo)-1)*0.5))]
    K80=auxK[int(round((len(foo)-1)*0.8))]
    """

    # most cited authors:
    foo.sort(key=lambda e:-e[1][0])
    for k in range(min(LL,len(foo))):
      CR_authors[com]['MC'][k]=foo[k]
    """
    # most cited authors with ...:
    foof=[elt for elt in foo if elt[1][2]>K50]
    foof.sort(key=lambda e:-e[1][0])
    CR_authors[com]['MC_K50']['p']=K50
    for k in range(min(LL,len(foof))):
      CR_authors[com]['MC_K50'][k]=foof[k]
    #
    foof=[elt for elt in foo if elt[1][2]>K80]
    foof.sort(key=lambda e:-e[1][0])
    CR_authors[com]['MC_K80']['p']=K80
    for k in range(min(LL,len(foof))):
      CR_authors[com]['MC_K80'][k]=foof[k]   
    """   
    # most representative authors (in terms of degree)
    foo.sort(key=lambda e:-e[1][2])
    for k in range(min(LL,len(foo))):
      CR_authors[com]['MR'][k]=foo[k]
    """
    # most representative authors with ...:
    foof=[elt for elt in foo if elt[1][0]>TC50]
    foof.sort(key=lambda e:-e[1][2])
    CR_authors[com]['MR_TC50']['p']=TC50
    for k in range(min(LL,len(foof))):
      CR_authors[com]['MR_TC50'][k]=foof[k]
    # 
    foof=[elt for elt in foo if elt[1][0]>TC80]
    foof.sort(key=lambda e:-e[1][2])
    CR_authors[com]['MR_TC80']['p']=TC80
    for k in range(min(LL,len(foof))):
      CR_authors[com]['MR_TC80'][k]=foof[k]
    """

  return (CR_papers, CR_authors)