Beispiel #1
0
class AcademicLevel():
    """docstring for AcademicLevel"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.authors = self.mRedis.getAllAuthors()
        self.auCoauNums =dict()
        for author in self.authors:
            self.auCoauNums[author] = len(self.mRedis.getAuCoauthors(author))
        self.coauNumCoauLevel = dict()

    def getCoauNumLevel(self):
        index = 0
        for author in self.authors:
            index += 1
            if index % 100000 == 0:
                logging.info(index)
            coaus = self.mRedis.getAuCoauthors(author)
            coauNum = len(coaus)
            coauAvgLevel = sum([float(self.auCoauNums.get(coau)) for coau in coaus]) / coauNum
            CoauLevels = self.coauNumCoauLevel.setdefault(coauNum, [])
            CoauLevels.append(coauAvgLevel)

    def saveCoauNumLevel(self):
        with open(OUTPUT_COAUNUM_LEVEL_CONUM, 'w') as fileWriter:
            for coauNum, levels in self.coauNumCoauLevel.items():
                cn = str(coauNum)
                cl = self.coauNumCoauLevel.get(coauNum)
                coAuLevel = str(sum(cl) / len(cl))
                fileWriter.write(cn + '\t' + cn + '\t' + coAuLevel + '\n')
        fileWriter.close()
        self.coauNumCoauLevel = {}
Beispiel #2
0
 def __init__(self):
     self.mRedis = RedisHelper()
     self.authors = self.mRedis.getAllAuthors()
     self.auCoauNums =dict()
     for author in self.authors:
         self.auCoauNums[author] = len(self.mRedis.getAuCoauthors(author))
     self.coauNumCoauLevel = dict()
Beispiel #3
0
 def __init__(self):
     logging.info("loading Redis data base...")
     self.mRedis = RedisHelper()
     logging.info("loading authors...")
     self.authors = self.mRedis.getAllAuthors()
     logging.info("loading authors' coauthors...")
     self.AuCoaus = self.loadAuCoauthors()
     logging.info("loading coauthor times...")
     self.CoauTimes = self.loadCoauTimes()
     logging.info("load data done!")
Beispiel #4
0
 def __init__(self):
     self.G = nx.Graph()
     self.stars = dict()
     self.targets = dict()
     self.loadStarsAndTargets()
     logging.info('loadStarsAndTargets done---------------')
     self.shortestPathLength = dict()
     self.mRedis = RedisHelper()
     self.buildGraph()
     logging.info('---------------')
Beispiel #5
0
 def __init__(self):
     self.mRedis = RedisHelper()
     self.authors = self.mRedis.getAllAuthors()
     self.authorsPN = dict()
     for author in self.authors:
         self.authorsPN[author] = sum([
             len(self.mRedis.getAuCoauTimes(author, coau))
             for coau in self.mRedis.getAuCoauthors(author)
         ])
     self.coauNumAuLevel = dict()
     self.coauNumCoauLevel = dict()
Beispiel #6
0
 def __init__(self):
     self.mRedis = RedisHelper()
     self.authors = self.mRedis.getAllAuthors()
     self.CoauNumColTimes = dict()
     for author in self.authors:
         coaus = self.mRedis.getAuCoauthors(author)
         coauNum = len(coaus)
         colTime = sum([
             len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus
         ])
         colTimes = self.CoauNumColTimes.setdefault(coauNum, [])
         colTimes.append(colTime)
Beispiel #7
0
 def getDiGraph(self):
     mRedis = RedisHelper()
     authors = mRedis.getAllAuthors()
     count = 0
     for author in authors:
         count += 1
         if count % 1000 == 0:
             logging.info(count)
         for coau in mRedis.getAuCoauthors(author):
             self.graph.add_edge(author, coau)
     logging.info('load graph done!')
     logging.info('nodes:' + str(self.graph.number_of_nodes()))
     logging.info('edges:' + str(self.graph.number_of_edges()))
     return self.graph
Beispiel #8
0
 def getDiGraph(self):
     mRedis = RedisHelper()
     authors = mRedis.getAllAuthors()
     count = 0
     for author in authors:
         count += 1
         if count % 1000 == 0:
             logging.info(count)
         for coau in mRedis.getAuCoauthors(author):
             self.graph.add_edge(author, coau)
     logging.info('load graph done!')
     logging.info('nodes:' + str(self.graph.number_of_nodes()))
     logging.info('edges:' + str(self.graph.number_of_edges()))
     return self.graph
Beispiel #9
0
def extracStarsAndTargets():
    mRedis = RedisHelper()
    stars = dict()
    targets = dict()
    authors = mRedis.getAllAuthors()
    CoAuthorNumbers = dict()
    AuthorPRs = dict()
    index = 0
    for author in authors:
        index += 1
        if index % 1000 == 0:
            logging.info(index)
        coausNum = len(mRedis.getAuCoauthors(author))
        tmp = CoAuthorNumbers.setdefault(coausNum, [])
        tmp.append(author)
        AuthorPRs[author] = mRedis.getAuthorPR(author)
    logging.info('Extracting target authors ...')
    for i in range(1, 251):
        logging.info(i)
        coaus = CoAuthorNumbers[i]
        if len(coaus) <= 100:
            for au in coaus:
                targets[au] = i
        for j in range(100):
            au = random.choice(coaus)
            if au not in targets.keys():
                targets[au] = i
    candidateStars = sorted(AuthorPRs.iteritems(),
                            key=lambda d: d[1],
                            reverse=True)[0:400]
    count = 0
    while count < 100:
        star, PR = random.choice(candidateStars)
        if star not in stars:
            stars[star] = PR
            count += 1
            logging.info(count)
    authors = []
    CoAuthorNumbers = {}
    AuthorPRs = {}
    candidateStars = {}

    with open(OUTPUT_STAR_AUTHORS, 'w') as fileWriter:
        for star, PR in stars.items():
            fileWriter.write(star + '\t' + str(PR) + '\n')
    fileWriter.close()
    with open(OUTPUT_TARGET_AUTHORS, 'w') as fileWriter:
        for author, CoauNum in targets.items():
            fileWriter.write(author + '\t' + str(CoauNum) + '\n')
    fileWriter.close()
 def __init__(self):
     self.mRedis = RedisHelper()
     self.authors = self.mRedis.getAllAuthors()
     self.authorsPN =dict()
     for author in self.authors:
         self.authorsPN[author] =  sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in self.mRedis.getAuCoauthors(author)])
     self.coauNumAuLevel = dict()
     self.coauNumCoauLevel = dict()
Beispiel #11
0
 def __init__(self):
     self.mRedis = RedisHelper()
     self.authors = self.mRedis.getAllAuthors()
     self.authorsPR =dict()
     for author in self.authors:
         self.authorsPR[author] = self.mRedis.getAuthorPR(author)
     self.coauNumAuLevel = dict()
     self.coauNumCoauLevel = dict()
Beispiel #12
0
 def __init__(self):
     self.mRedis = RedisHelper()
     self.authors = self.mRedis.getAllAuthors()
     self.CoauNumColTimes = dict()
     for author in self.authors:
         coaus = self.mRedis.getAuCoauthors(author)
         coauNum = len(coaus)
         colTime = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus])
         colTimes = self.CoauNumColTimes.setdefault(coauNum, [])
         colTimes.append(colTime)
Beispiel #13
0
class CoauNumColTimes():
    """docstring for CoauNumColTimes"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.authors = self.mRedis.getAllAuthors()
        self.CoauNumColTimes = dict()
        for author in self.authors:
            coaus = self.mRedis.getAuCoauthors(author)
            coauNum = len(coaus)
            colTime = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus])
            colTimes = self.CoauNumColTimes.setdefault(coauNum, [])
            colTimes.append(colTime)

    def saveCoauNumColTimes(self):
        with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter:
            for coauNum, cts in self.CoauNumColTimes.items():
                cn = str(coauNum)
                ct = str(sum(cts) / len(cts))
                fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n')
        fileWriter.close()
        self.CoauNumColTimes = {}
Beispiel #14
0
class CoauNumColTimes():
    """docstring for CoauNumColTimes"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.authors = self.mRedis.getAllAuthors()
        self.CoauNumColTimes = dict()
        for author in self.authors:
            coaus = self.mRedis.getAuCoauthors(author)
            coauNum = len(coaus)
            colTime = sum([
                len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus
            ])
            colTimes = self.CoauNumColTimes.setdefault(coauNum, [])
            colTimes.append(colTime)

    def saveCoauNumColTimes(self):
        with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter:
            for coauNum, cts in self.CoauNumColTimes.items():
                cn = str(coauNum)
                ct = str(sum(cts) / len(cts))
                fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n')
        fileWriter.close()
        self.CoauNumColTimes = {}
Beispiel #15
0
class Collaboration():
    """docString for Collaboration"""
    def __init__(self):
        logging.info("loading Redis data base...")
        self.mRedis = RedisHelper()
        logging.info("loading authors...")
        self.authors = self.mRedis.getAllAuthors()
        logging.info("loading authors' coauthors...")
        self.AuCoaus = self.loadAuCoauthors()
        logging.info("loading coauthor times...")
        self.CoauTimes = self.loadCoauTimes()
        logging.info("load data done!")

    def loadAuCoauthors(self):
        auCoaus = dict()
        for author in self.authors:
            auCoaus[author] = list(self.mRedis.getAuCoauthors(author))
        return auCoaus

    def loadCoauTimes(self):
        aucoauTimes = dict()
        for author in self.authors:
            for coau in self.getAuCoaus(author):
                aucoauTimes[author + ':' + coau] = self.mRedis.getAuCoauTimes(
                    author, coau)
        return aucoauTimes

    def getCoauTimes(self, A, B):
        return self.CoauTimes.get(A + ':' + B)

    def getAuCoaus(self, A):
        return self.AuCoaus.get(A)

    def clearCache(self):
        self.authors = []
        self.auCoaus = {}
        self.CoauTimes = {}

    def isCollabLeadByCoAu(self, A, B):
        minCoauTimeAB = min(self.getCoauTimes(A, B))
        commonCoauthors = set(self.getAuCoaus(A)) & set(self.getAuCoaus(B))
        if len(commonCoauthors) < 1: return False
        for C in commonCoauthors:
            minCoauTimeAC = min(self.getCoauTimes(A, C))
            minCoauTimeBC = min(self.getCoauTimes(C, B))
            if minCoauTimeAB > max(minCoauTimeAC, minCoauTimeBC):
                return True
        return False

    def isCollabLeadNewCoAu(self, A, B):
        minCoauTimeAB = min(self.getCoauTimes(A, B))
        commonCoauthors = set(self.getAuCoaus(A)) & set(self.getAuCoaus(B))
        if len(commonCoauthors) < 1: return False
        for C in commonCoauthors:
            minCoauTimeAC = min(self.getCoauTimes(A, C))
            minCoauTimeBC = min(self.getCoauTimes(C, B))
            if minCoauTimeAC > max(minCoauTimeAB, minCoauTimeBC):
                return True
        return False

    def getCollabLeadNewCoAuProb(self):
        logging.info("getCollabLeadNewCoAuProb...")
        coauNumCLCProb = dict()
        index = 0
        for author in self.authors:
            index += 1
            if index % 1000 == 0: logging.info(index)
            coaus = self.getAuCoaus(author)
            coausNum = len(coaus)
            prob = [self.isCollabLeadNewCoAu(author, coau)
                    for coau in coaus].count(True) * 1.0 / coausNum
            probs = coauNumCLCProb.setdefault(coausNum, [])
            probs.append(prob)
        with open(OUTPUT_COAUNUM_COLLAB_LEAD_COAU_PROB, 'w') as fileWriter:
            for coauNum, probs in coauNumCLCProb.items():
                coauNumStr = str(coauNum)
                probStr = str(sum(probs) / len(probs))
                fileWriter.write(coauNumStr + '\t' + probStr + '\n')
        fileWriter.close()
        coauNumCLCProb = {}

    def getCollabLeadNewCoaus(self):
        logging.info("getCoausLeadByCollab...")
        coauNumLeadNewCoauNums = dict()
        index = 0
        for author in self.authors:
            index += 1
            if index % 1000 == 0: logging.info(index)
            coaus = self.getAuCoaus(author)
            coausNum = len(coaus)
            newCoausNum = [
                self.isCollabLeadByCoAu(author, coau) for coau in coaus
            ].count(True)
            newCoausnums = coauNumLeadNewCoauNums.setdefault(coausNum, [])
            newCoausnums.append(newCoausNum)
        with open(OUTPUT_COAUNUM_COLLAB_LEAD_NEW_COAU_NUM, 'w') as fileWriter:
            for coauNum, newCoausnums in coauNumLeadNewCoauNums.items():
                coauNumStr = str(coauNum)
                newCoausNumStr = str(
                    sum(newCoausnums) * 1.0 / len(newCoausnums))
                fileWriter.write(coauNumStr + '\t' + newCoausNumStr + '\n')
        fileWriter.close()
        coauNumLeadNewCoauNums = {}

    def getCollabLeadPotentialCoaus(self):
        logging.info("getCollabLeadPotentialCoaus...")
        coauNumLeadPotCoaus = dict()
        index = 0
        for author in self.authors:
            index += 1
            if index % 1000 == 0: logging.info(index)
            coaus = self.getAuCoaus(author)
            coausNum = len(coaus)
            potCoausNum = sum([len(self.getAuCoaus(coau)) for coau in coaus])
            potCoausNums = coauNumLeadPotCoaus.setdefault(coausNum, [])
            potCoausNums.append(potCoausNum)
        with open(OUTPUT_COAUNUM_COLLAB_LEAD_POT_COAU_NUM, 'w') as fileWriter:
            for coauNum, potCoausNums in coauNumLeadPotCoaus.items():
                coauNumStr = str(coauNum)
                potCoausNumStr = str(
                    sum(potCoausNums) * 1.0 / len(potCoausNums))
                fileWriter.write(coauNumStr + '\t' + potCoausNumStr + '\n')
        fileWriter.close()
        coauNumLeadPotCoaus = {}
Beispiel #16
0
class BaconNumber(object):
    """docstring for BaconNumber"""
    def __init__(self):
        self.G = nx.Graph()
        self.stars = dict()
        self.targets = dict()
        self.loadStarsAndTargets()
        logging.info('loadStarsAndTargets done---------------')
        self.shortestPathLength = dict()
        self.mRedis = RedisHelper()
        self.buildGraph()
        logging.info('---------------')

    def buildGraph(self):
        authors = self.mRedis.getAllAuthors()
        index = 0
        for author in authors:
            index += 1
            if index % 1000 == 0:
                logging.info(index)
            coaus = self.mRedis.getAuCoauthors(author)
            for coau in coaus:
                self.G.add_edge(author, coau)

    def getGraphNodeCount(self):
        return len(self.G.nodes())

    def getGraphEdgeCount(self):
        return len(self.G.edges())

    def shortestPath(self, s, t):
        return nx.shortest_path_length(self.G, s, t)

    def getShortestPathLength(self):
        self.targets = dict(
            sorted(self.targets.iteritems(), key=lambda d: d[1]))
        index = 0
        for author, coausNum in self.targets.items():
            for star in self.stars.keys():
                try:
                    length = self.shortestPath(author, star)
                except:
                    length = -1
                tmp = self.shortestPathLength.setdefault(author, [])
                tmp.append(length)
            index += 1
            logging.info(str(index))
        with open(OUTPUT_AUTHORS_BACON_NUM, 'w') as fileWriter:
            nodeCount = self.getGraphNodeCount()
            edgesCount = self.getGraphEdgeCount()
            fileWriter.write('nodes:' + str(nodeCount) + '\t' + 'edges:' +
                             str(edgesCount) + '\n')
            logging.info('nodes:' + str(nodeCount) + '\t' + 'edges:' +
                         str(edgesCount) + '\n')
            for author, bacons in self.shortestPathLength.items():
                baconStr = ''
                count, sumB, avg = 0, 0.0, 0.0
                for bacon in bacons:
                    baconStr += str(bacon) + '\t'
                    if bacon > 0 and bacon < 10000:
                        sumB += bacon
                        count += 1
                avg = 0 if count == 0 else sumB * 1.0 / count
                sb = author + '\t' + str(self.targets[author].strip(
                    '\n')) + '\t' + str(avg) + '\t' + baconStr + '\n'
                fileWriter.write(sb)
        fileWriter.close()
        self.shortestPathLength = {}
        self.G = None

    def loadStarsAndTargets(self):
        with open(OUTPUT_STAR_AUTHORS) as fileReader:
            for line in fileReader:
                star = line.split('\t')[0]
                coauNUm = line.split('\t')[1]
                self.stars[star] = coauNUm
        fileReader.close()
        with open(OUTPUT_TARGET_AUTHORS) as fileReader:
            for line in fileReader:
                target = line.split('\t')[0]
                coauNUm = line.split('\t')[1]
                self.targets[target] = coauNUm
        fileReader.close()
Beispiel #17
0
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes) #第2部分:计算dangling_nodes的PR总值
        for n in x:
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]    #第1部分:将节点n的PR资源分配给各个节点,循环之
        for n in x:
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]   #第3部分:节点n加上dangling nodes和均分的值

        # 迭代检查
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    return x
    raise NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)

if __name__ == '__main__':
    mRedis = RedisHelper()
    graph = Graph()
    G = graph.getDiGraph()
    pagerank = pagerank(G, max_iter = 30, tol = 0)
    logging.info('pagerank lentgh:' + str(len(pagerank)))
    count = 0
    for k, v in pagerank.items():
        count += 1
        if count % 1000 == 0:
            logging.info(count)
        mRedis.addAuthorPR(k, v)
    graph = None
    pagerank = None

Beispiel #18
0
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

class CoauNumColTimes():
    """docstring for CoauNumColTimes"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.authors = self.mRedis.getAllAuthors()
        self.CoauNumColTimes = dict()
        for author in self.authors:
            coaus = self.mRedis.getAuCoauthors(author)
            coauNum = len(coaus)
            colTime = sum([len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus])
            colTimes = self.CoauNumColTimes.setdefault(coauNum, [])
            colTimes.append(colTime)

    def saveCoauNumColTimes(self):
        with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter:
            for coauNum, cts in self.CoauNumColTimes.items():
                cn = str(coauNum)
                ct = str(sum(cts) / len(cts))
                fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n')
        fileWriter.close()
        self.CoauNumColTimes = {}

if __name__ == '__main__':
    # coauNumColTimes = CoauNumColTimes()
    # coauNumColTimes.saveCoauNumColTimes()
    mRedis = RedisHelper()
    print sum([len(mRedis.getAuCoauthors(au)) for au in mRedis.getAllAuthors()])
Beispiel #19
0
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][
                    weight]  #第1部分:将节点n的PR资源分配给各个节点,循环之
        for n in x:
            x[n] += danglesum * dangling_weights[n] + (
                1.0 - alpha) * p[n]  #第3部分:节点n加上dangling nodes和均分的值

        # 迭代检查
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N * tol:
            return x
    return x
    raise NetworkXError('pagerank: power iteration failed to converge '
                        'in %d iterations.' % max_iter)


if __name__ == '__main__':
    mRedis = RedisHelper()
    graph = Graph()
    G = graph.getDiGraph()
    pagerank = pagerank(G, max_iter=30, tol=0)
    logging.info('pagerank lentgh:' + str(len(pagerank)))
    count = 0
    for k, v in pagerank.items():
        count += 1
        if count % 1000 == 0:
            logging.info(count)
        mRedis.addAuthorPR(k, v)
    graph = None
    pagerank = None
Beispiel #20
0
    """docstring for CoauNumColTimes"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.authors = self.mRedis.getAllAuthors()
        self.CoauNumColTimes = dict()
        for author in self.authors:
            coaus = self.mRedis.getAuCoauthors(author)
            coauNum = len(coaus)
            colTime = sum([
                len(self.mRedis.getAuCoauTimes(author, coau)) for coau in coaus
            ])
            colTimes = self.CoauNumColTimes.setdefault(coauNum, [])
            colTimes.append(colTime)

    def saveCoauNumColTimes(self):
        with open(OUTPUT_COAUNUM_COLTIME, 'w') as fileWriter:
            for coauNum, cts in self.CoauNumColTimes.items():
                cn = str(coauNum)
                ct = str(sum(cts) / len(cts))
                fileWriter.write(cn + '\t' + cn + '\t' + ct + '\n')
        fileWriter.close()
        self.CoauNumColTimes = {}


if __name__ == '__main__':
    # coauNumColTimes = CoauNumColTimes()
    # coauNumColTimes.saveCoauNumColTimes()
    mRedis = RedisHelper()
    print sum(
        [len(mRedis.getAuCoauthors(au)) for au in mRedis.getAllAuthors()])