class AcademicLevel(): """docstring for AcademicLevel""" def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.auCoauNums =dict() for author in self.authors: self.auCoauNums[author] = len(self.mRedis.getAuCoauthors(author)) self.coauNumCoauLevel = dict() def getCoauNumLevel(self): index = 0 for author in self.authors: index += 1 if index % 100000 == 0: logging.info(index) coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) coauAvgLevel = sum([float(self.auCoauNums.get(coau)) for coau in coaus]) / coauNum CoauLevels = self.coauNumCoauLevel.setdefault(coauNum, []) CoauLevels.append(coauAvgLevel) def saveCoauNumLevel(self): with open(OUTPUT_COAUNUM_LEVEL_CONUM, 'w') as fileWriter: for coauNum, levels in self.coauNumCoauLevel.items(): cn = str(coauNum) cl = self.coauNumCoauLevel.get(coauNum) coAuLevel = str(sum(cl) / len(cl)) fileWriter.write(cn + '\t' + cn + '\t' + coAuLevel + '\n') fileWriter.close() self.coauNumCoauLevel = {}
def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.auCoauNums =dict() for author in self.authors: self.auCoauNums[author] = len(self.mRedis.getAuCoauthors(author)) self.coauNumCoauLevel = dict()
def __init__(self): logging.info("loading Redis data base...") self.mRedis = RedisHelper() logging.info("loading authors...") self.authors = self.mRedis.getAllAuthors() logging.info("loading authors' coauthors...") self.AuCoaus = self.loadAuCoauthors() logging.info("loading coauthor times...") self.CoauTimes = self.loadCoauTimes() logging.info("load data done!")
def __init__(self): self.G = nx.Graph() self.stars = dict() self.targets = dict() self.loadStarsAndTargets() logging.info('loadStarsAndTargets done---------------') self.shortestPathLength = dict() self.mRedis = RedisHelper() self.buildGraph() logging.info('---------------')
def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.authorsPN = dict() for author in self.authors: self.authorsPN[author] = sum([ len(self.mRedis.getAuCoauTimes(author, coau)) for coau in self.mRedis.getAuCoauthors(author) ]) self.coauNumAuLevel = dict() self.coauNumCoauLevel = dict()
def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.CoauNumColTimes = dict() for author in self.authors: coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) colTime = sum([ len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus ]) colTimes = self.CoauNumColTimes.setdefault(coauNum, []) colTimes.append(colTime)
def getDiGraph(self): mRedis = RedisHelper() authors = mRedis.getAllAuthors() count = 0 for author in authors: count += 1 if count % 1000 == 0: logging.info(count) for coau in mRedis.getAuCoauthors(author): self.graph.add_edge(author, coau) logging.info('load graph done!') logging.info('nodes:' + str(self.graph.number_of_nodes())) logging.info('edges:' + str(self.graph.number_of_edges())) return self.graph
def extracStarsAndTargets(): mRedis = RedisHelper() stars = dict() targets = dict() authors = mRedis.getAllAuthors() CoAuthorNumbers = dict() AuthorPRs = dict() index = 0 for author in authors: index += 1 if index % 1000 == 0: logging.info(index) coausNum = len(mRedis.getAuCoauthors(author)) tmp = CoAuthorNumbers.setdefault(coausNum, []) tmp.append(author) AuthorPRs[author] = mRedis.getAuthorPR(author) logging.info('Extracting target authors ...') for i in range(1, 251): logging.info(i) coaus = CoAuthorNumbers[i] if len(coaus) <= 100: for au in coaus: targets[au] = i for j in range(100): au = random.choice(coaus) if au not in targets.keys(): targets[au] = i candidateStars = sorted(AuthorPRs.iteritems(), key=lambda d: d[1], reverse=True)[0:400] count = 0 while count < 100: star, PR = random.choice(candidateStars) if star not in stars: stars[star] = PR count += 1 logging.info(count) authors = [] CoAuthorNumbers = {} AuthorPRs = {} candidateStars = {} with open(OUTPUT_STAR_AUTHORS, 'w') as fileWriter: for star, PR in stars.items(): fileWriter.write(star + '\t' + str(PR) + '\n') fileWriter.close() with open(OUTPUT_TARGET_AUTHORS, 'w') as fileWriter: for author, CoauNum in targets.items(): fileWriter.write(author + '\t' + str(CoauNum) + '\n') fileWriter.close()
def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.authorsPN =dict() for author in self.authors: self.authorsPN[author] = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in self.mRedis.getAuCoauthors(author)]) self.coauNumAuLevel = dict() self.coauNumCoauLevel = dict()
def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.authorsPR =dict() for author in self.authors: self.authorsPR[author] = self.mRedis.getAuthorPR(author) self.coauNumAuLevel = dict() self.coauNumCoauLevel = dict()
def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.CoauNumColTimes = dict() for author in self.authors: coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) colTime = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus]) colTimes = self.CoauNumColTimes.setdefault(coauNum, []) colTimes.append(colTime)
class CoauNumColTimes(): """docstring for CoauNumColTimes""" def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.CoauNumColTimes = dict() for author in self.authors: coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) colTime = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus]) colTimes = self.CoauNumColTimes.setdefault(coauNum, []) colTimes.append(colTime) def saveCoauNumColTimes(self): with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter: for coauNum, cts in self.CoauNumColTimes.items(): cn = str(coauNum) ct = str(sum(cts) / len(cts)) fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n') fileWriter.close() self.CoauNumColTimes = {}
class CoauNumColTimes(): """docstring for CoauNumColTimes""" def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.CoauNumColTimes = dict() for author in self.authors: coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) colTime = sum([ len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus ]) colTimes = self.CoauNumColTimes.setdefault(coauNum, []) colTimes.append(colTime) def saveCoauNumColTimes(self): with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter: for coauNum, cts in self.CoauNumColTimes.items(): cn = str(coauNum) ct = str(sum(cts) / len(cts)) fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n') fileWriter.close() self.CoauNumColTimes = {}
class Collaboration(): """docString for Collaboration""" def __init__(self): logging.info("loading Redis data base...") self.mRedis = RedisHelper() logging.info("loading authors...") self.authors = self.mRedis.getAllAuthors() logging.info("loading authors' coauthors...") self.AuCoaus = self.loadAuCoauthors() logging.info("loading coauthor times...") self.CoauTimes = self.loadCoauTimes() logging.info("load data done!") def loadAuCoauthors(self): auCoaus = dict() for author in self.authors: auCoaus[author] = list(self.mRedis.getAuCoauthors(author)) return auCoaus def loadCoauTimes(self): aucoauTimes = dict() for author in self.authors: for coau in self.getAuCoaus(author): aucoauTimes[author + ':' + coau] = self.mRedis.getAuCoauTimes( author, coau) return aucoauTimes def getCoauTimes(self, A, B): return self.CoauTimes.get(A + ':' + B) def getAuCoaus(self, A): return self.AuCoaus.get(A) def clearCache(self): self.authors = [] self.auCoaus = {} self.CoauTimes = {} def isCollabLeadByCoAu(self, A, B): minCoauTimeAB = min(self.getCoauTimes(A, B)) commonCoauthors = set(self.getAuCoaus(A)) & set(self.getAuCoaus(B)) if len(commonCoauthors) < 1: return False for C in commonCoauthors: minCoauTimeAC = min(self.getCoauTimes(A, C)) minCoauTimeBC = min(self.getCoauTimes(C, B)) if minCoauTimeAB > max(minCoauTimeAC, minCoauTimeBC): return True return False def isCollabLeadNewCoAu(self, A, B): minCoauTimeAB = min(self.getCoauTimes(A, B)) commonCoauthors = set(self.getAuCoaus(A)) & set(self.getAuCoaus(B)) if len(commonCoauthors) < 1: return False for C in commonCoauthors: minCoauTimeAC = min(self.getCoauTimes(A, C)) minCoauTimeBC = min(self.getCoauTimes(C, B)) if minCoauTimeAC > max(minCoauTimeAB, minCoauTimeBC): return True return False def getCollabLeadNewCoAuProb(self): logging.info("getCollabLeadNewCoAuProb...") coauNumCLCProb = dict() index = 0 for author in self.authors: index += 1 if index % 1000 == 0: logging.info(index) coaus = self.getAuCoaus(author) coausNum = len(coaus) prob = [self.isCollabLeadNewCoAu(author, coau) for coau in coaus].count(True) * 1.0 / coausNum probs = coauNumCLCProb.setdefault(coausNum, []) probs.append(prob) with open(OUTPUT_COAUNUM_COLLAB_LEAD_COAU_PROB, 'w') as fileWriter: for coauNum, probs in coauNumCLCProb.items(): coauNumStr = str(coauNum) probStr = str(sum(probs) / len(probs)) fileWriter.write(coauNumStr + '\t' + probStr + '\n') fileWriter.close() coauNumCLCProb = {} def getCollabLeadNewCoaus(self): logging.info("getCoausLeadByCollab...") coauNumLeadNewCoauNums = dict() index = 0 for author in self.authors: index += 1 if index % 1000 == 0: logging.info(index) coaus = self.getAuCoaus(author) coausNum = len(coaus) newCoausNum = [ self.isCollabLeadByCoAu(author, coau) for coau in coaus ].count(True) newCoausnums = coauNumLeadNewCoauNums.setdefault(coausNum, []) newCoausnums.append(newCoausNum) with open(OUTPUT_COAUNUM_COLLAB_LEAD_NEW_COAU_NUM, 'w') as fileWriter: for coauNum, newCoausnums in coauNumLeadNewCoauNums.items(): coauNumStr = str(coauNum) newCoausNumStr = str( sum(newCoausnums) * 1.0 / len(newCoausnums)) fileWriter.write(coauNumStr + '\t' + newCoausNumStr + '\n') fileWriter.close() coauNumLeadNewCoauNums = {} def getCollabLeadPotentialCoaus(self): logging.info("getCollabLeadPotentialCoaus...") coauNumLeadPotCoaus = dict() index = 0 for author in self.authors: index += 1 if index % 1000 == 0: logging.info(index) coaus = self.getAuCoaus(author) coausNum = len(coaus) potCoausNum = sum([len(self.getAuCoaus(coau)) for coau in coaus]) potCoausNums = coauNumLeadPotCoaus.setdefault(coausNum, []) potCoausNums.append(potCoausNum) with open(OUTPUT_COAUNUM_COLLAB_LEAD_POT_COAU_NUM, 'w') as fileWriter: for coauNum, potCoausNums in coauNumLeadPotCoaus.items(): coauNumStr = str(coauNum) potCoausNumStr = str( sum(potCoausNums) * 1.0 / len(potCoausNums)) fileWriter.write(coauNumStr + '\t' + potCoausNumStr + '\n') fileWriter.close() coauNumLeadPotCoaus = {}
class BaconNumber(object): """docstring for BaconNumber""" def __init__(self): self.G = nx.Graph() self.stars = dict() self.targets = dict() self.loadStarsAndTargets() logging.info('loadStarsAndTargets done---------------') self.shortestPathLength = dict() self.mRedis = RedisHelper() self.buildGraph() logging.info('---------------') def buildGraph(self): authors = self.mRedis.getAllAuthors() index = 0 for author in authors: index += 1 if index % 1000 == 0: logging.info(index) coaus = self.mRedis.getAuCoauthors(author) for coau in coaus: self.G.add_edge(author, coau) def getGraphNodeCount(self): return len(self.G.nodes()) def getGraphEdgeCount(self): return len(self.G.edges()) def shortestPath(self, s, t): return nx.shortest_path_length(self.G, s, t) def getShortestPathLength(self): self.targets = dict( sorted(self.targets.iteritems(), key=lambda d: d[1])) index = 0 for author, coausNum in self.targets.items(): for star in self.stars.keys(): try: length = self.shortestPath(author, star) except: length = -1 tmp = self.shortestPathLength.setdefault(author, []) tmp.append(length) index += 1 logging.info(str(index)) with open(OUTPUT_AUTHORS_BACON_NUM, 'w') as fileWriter: nodeCount = self.getGraphNodeCount() edgesCount = self.getGraphEdgeCount() fileWriter.write('nodes:' + str(nodeCount) + '\t' + 'edges:' + str(edgesCount) + '\n') logging.info('nodes:' + str(nodeCount) + '\t' + 'edges:' + str(edgesCount) + '\n') for author, bacons in self.shortestPathLength.items(): baconStr = '' count, sumB, avg = 0, 0.0, 0.0 for bacon in bacons: baconStr += str(bacon) + '\t' if bacon > 0 and bacon < 10000: sumB += bacon count += 1 avg = 0 if count == 0 else sumB * 1.0 / count sb = author + '\t' + str(self.targets[author].strip( '\n')) + '\t' + str(avg) + '\t' + baconStr + '\n' fileWriter.write(sb) fileWriter.close() self.shortestPathLength = {} self.G = None def loadStarsAndTargets(self): with open(OUTPUT_STAR_AUTHORS) as fileReader: for line in fileReader: star = line.split('\t')[0] coauNUm = line.split('\t')[1] self.stars[star] = coauNUm fileReader.close() with open(OUTPUT_TARGET_AUTHORS) as fileReader: for line in fileReader: target = line.split('\t')[0] coauNUm = line.split('\t')[1] self.targets[target] = coauNUm fileReader.close()
danglesum = alpha * sum(xlast[n] for n in dangling_nodes) #第2部分:计算dangling_nodes的PR总值 for n in x: for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] #第1部分:将节点n的PR资源分配给各个节点,循环之 for n in x: x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] #第3部分:节点n加上dangling nodes和均分的值 # 迭代检查 err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N*tol: return x return x raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter) if __name__ == '__main__': mRedis = RedisHelper() graph = Graph() G = graph.getDiGraph() pagerank = pagerank(G, max_iter = 30, tol = 0) logging.info('pagerank lentgh:' + str(len(pagerank))) count = 0 for k, v in pagerank.items(): count += 1 if count % 1000 == 0: logging.info(count) mRedis.addAuthorPR(k, v) graph = None pagerank = None
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) class CoauNumColTimes(): """docstring for CoauNumColTimes""" def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.CoauNumColTimes = dict() for author in self.authors: coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) colTime = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus]) colTimes = self.CoauNumColTimes.setdefault(coauNum, []) colTimes.append(colTime) def saveCoauNumColTimes(self): with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter: for coauNum, cts in self.CoauNumColTimes.items(): cn = str(coauNum) ct = str(sum(cts) / len(cts)) fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n') fileWriter.close() self.CoauNumColTimes = {} if __name__ == '__main__': # coauNumColTimes = CoauNumColTimes() # coauNumColTimes.saveCoauNumColTimes() mRedis = RedisHelper() print sum([len(mRedis.getAuCoauthors(au)) for au in mRedis.getAllAuthors()])
for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][ weight] #第1部分:将节点n的PR资源分配给各个节点,循环之 for n in x: x[n] += danglesum * dangling_weights[n] + ( 1.0 - alpha) * p[n] #第3部分:节点n加上dangling nodes和均分的值 # 迭代检查 err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x return x raise NetworkXError('pagerank: power iteration failed to converge ' 'in %d iterations.' % max_iter) if __name__ == '__main__': mRedis = RedisHelper() graph = Graph() G = graph.getDiGraph() pagerank = pagerank(G, max_iter=30, tol=0) logging.info('pagerank lentgh:' + str(len(pagerank))) count = 0 for k, v in pagerank.items(): count += 1 if count % 1000 == 0: logging.info(count) mRedis.addAuthorPR(k, v) graph = None pagerank = None
"""docstring for CoauNumColTimes""" def __init__(self): self.mRedis = RedisHelper() self.authors = self.mRedis.getAllAuthors() self.CoauNumColTimes = dict() for author in self.authors: coaus = self.mRedis.getAuCoauthors(author) coauNum = len(coaus) colTime = sum([ len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus ]) colTimes = self.CoauNumColTimes.setdefault(coauNum, []) colTimes.append(colTime) def saveCoauNumColTimes(self): with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter: for coauNum, cts in self.CoauNumColTimes.items(): cn = str(coauNum) ct = str(sum(cts) / len(cts)) fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n') fileWriter.close() self.CoauNumColTimes = {} if __name__ == '__main__': # coauNumColTimes = CoauNumColTimes() # coauNumColTimes.saveCoauNumColTimes() mRedis = RedisHelper() print sum( [len(mRedis.getAuCoauthors(au)) for au in mRedis.getAllAuthors()])