def internal_directed_authorcite_matrix( w ) : """ returns a directed citations graph; this only involves authors such that cell (row,col) means "node row cites node col" """ authordict = authors_by_doi( w ) authors = list(w.set_authors()) result = zeros( (len(authors), len(authors)), dtype=int32 ) for paper in w.reader() : for node_author in authorlist_from_authorfield( paper['AU'] ) : for doi in cited_dois(paper) : if authordict.has_key(doi) : for cited_author in authordict[doi] : result[ authors.index( node_author ), authors.index( cited_author )] += 1 return (result, authors, authors)
def authors_in_papers_matrix( w ) : """ Returns an occurrence matrix; the rows represent the papers in the corpus; the columns the cited papers. The entries are the intersections where a corpus paper cites a column paper """ authors = list(w.set_authors()) row_dois = w.dois() result = zeros( (len(row_dois), len(authors)), dtype = int32 ) for paper in w.reader() : if paper['AU'] != '' and paper['DI'] != '': for author in authorlist_from_authorfield( paper['AU'] ) : if author != '': result[row_dois.index( paper['DI'] )][authors.index( author )] = 1 return ( result, row_dois, authors )
def authors_by_doi( w ) : """returns a dictionary with keys of the DOIs and values of author lists""" Result = {} for paper in w.reader() : Result[ paper['DI'] ] = authorlist_from_authorfield( paper['AU'] ) return Result