Ejemplo n.º 1
0
def getDDData(G, maxk, p):
    data = dict()
    for i in range(1, maxk + 1):
        S = degreeDiscountIC(G, i, p)
        size = avgSize(G, S, p, 200)
        data[i] = size
    return data
Ejemplo n.º 2
0
def getGreedyData(G, maxk, p):
    data = dict()
    for i in range(1, maxk + 1):
        S = generalGreedy(G, i, p)
        size = avgSize(G, S, p, 200)
        data[i] = size
    return data
Ejemplo n.º 3
0
def getHarvesterData(G, maxk, Ep):
    data = dict()
    for i in range(1, maxk + 1):
        S = Harvester(G, i, Ep, 100)
        size = avgSize(G, S, .03, 200)
        data[i] = size
    return data
Ejemplo n.º 4
0
def getDegreeHeu(G, maxk, p):
    data = dict()
    for i in range(1, maxk + 1):
        S = degreeHeuristic2(G, i, p)
        size = avgSize(G, S, p, 200)
        data[i] = size
    return data
Ejemplo n.º 5
0
def getNEIMData(G, maxk, p, result):
    data = dict()
    for i in range(1, maxk + 1):
        S = getSeedSet(i, result)
        size = avgSize(G, S, p, 200)
        data[i] = size
    return data
Ejemplo n.º 6
0
def getNewGreedyIC(G, maxk, p):
    data = dict()
    for i in range(1, maxk + 1):
        S = newGreedyIC(G, i, p)
        size = avgSize(G, S, p, 1)
        data[i] = size
    return data
Ejemplo n.º 7
0
def getDataTvsR(G, maxR, stepR, algo, k=50, p=.01):
    data = dict()
    for R in range(1, maxR + 2, stepR):
        S = algo(G, k, p, R)
        size = avgSize(G, S, p, R)
        data[R] = size
        print R
    return data
def getDataTvsR(G, maxR, stepR, algo, k = 50, p=.01):
    data = dict()
    for R in range(1, maxR+2, stepR):
        S = algo(G, k, p, R)
        size = avgSize(G, S, p, R)
        data[R] = size
        print R
    return data
Ejemplo n.º 9
0
def stopDegreeDiscount(G, tsize, ic_step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    ic_step -- step of change in k between 2 iterations of IC
    p -- propagation probability
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    '''
    S = []
    dd = PQ()  # degree discount
    t = dict()  # number of adjacent vertices that are in S
    d = dict()  # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight']
                    for v in G[u]])  # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u])  # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    # until necessary number of nodes can be reached
    Tspread = dict()  # spread for different k
    k = 0
    Tspread[k] = 0
    stepk = 1
    while Tspread[k] < tsize:
        u, priority = dd.pop_item(
        )  # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v][
                    'weight']  # increase number of selected neighbors
                priority = d[v] - 2 * t[v] - (
                    d[v] - t[v]) * t[v] * p  # discount of degree
                dd.add_task(v, -priority)
        # calculate IC spread with ic_step
        if stepk == ic_step:
            k = len(S)
            Tspread[k] = avgSize(G, S, p, iterations)
            print k, Tspread[k]
            stepk = 0
        stepk += 1

    # search precise boundary
    if abs(int(math.ceil(float(ic_step) / 2))) == 1:
        return S, Tspread
    else:
        return binarySearchBoundary(G, k, Tspread, tsize, ic_step, p,
                                    iterations)
Ejemplo n.º 10
0
def getCCData(G, maxk, p):
    data = dict()
    for i in range(1, maxk + 1):
        S = CC_heuristic(G, i, p)
        print(S)
        M = []
        for j in S:
            M.insert(-1, j[0])
        size = avgSize(G, M, p, 200)
        data[i] = size
    return data
Ejemplo n.º 11
0
def getData (G, maxk, algo, p, axis):
    data = dict()
    for k in range(1,maxk+1):
        if axis == "size":
            S = algo(G, k, p)
            size = avgSize(G, S, p, 200)
            data[k] = size
        elif axis == "time":
            start = time.time()
            S = algo(G, k, p)
            finish = time.time()
            data[k] = finish - start
    return data
Ejemplo n.º 12
0
def getData(G, maxk, algo, p, axis):
    data = dict()
    for k in range(1, maxk + 1):
        if axis == "size":
            S = algo(G, k, p)
            size = avgSize(G, S, p, 200)
            data[k] = size
        elif axis == "time":
            start = time.time()
            S = algo(G, k, p)
            finish = time.time()
            data[k] = finish - start
    return data
def stopDegreeDiscount(G, tsize, ic_step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    ic_step -- step of change in k between 2 iterations of IC
    p -- propagation probability
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    '''
    S = []
    dd = PQ() # degree discount
    t = dict() # number of adjacent vertices that are in S
    d = dict() # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u]) # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    # until necessary number of nodes can be reached
    Tspread = dict() # spread for different k
    k = 0
    Tspread[k] = 0
    stepk = 1
    while Tspread[k] < tsize:
        u, priority = dd.pop_item() # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v]['weight'] # increase number of selected neighbors
                priority = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p # discount of degree
                dd.add_task(v, -priority)
        # calculate IC spread with ic_step
        if stepk == ic_step:
            k = len(S)
            Tspread[k] = avgSize(G, S, p, iterations)
            print k, Tspread[k]
            stepk = 0
        stepk += 1

    # search precise boundary
    if abs(int(math.ceil(float(ic_step)/2))) == 1:
        return S, Tspread
    else:
        return binarySearchBoundary(G, k, Tspread, tsize, ic_step, p, iterations)
    R = 200

    # define map function
    def map_CC(it):
        print it
        return CC_parallel(G, seed_size, .01)

    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count() * 2)
    Scores = pool.map(map_CC, range(R))
    print 'Finished mapping in', time.time() - time2map
    time2reduce = time.time()
    print 'Reducing scores...'
    scores = {v: sum([s[v] for s in Scores]) for v in G}
    topScores = nlargest(seed_size,
                         scores.iteritems(),
                         key=lambda (dk, dv): dv)
    S = [v for (v, _) in topScores]
    print 'Time to reduce', time.time() - time2reduce

    print 'Average size is', avgSize(G, S, .01, 200)
    print 'Average size of 10 nodes is', avgSize(G, S[:10], .01, 200)

    print 'Total time:', time.time() - start

    # # write results S to file
    # with open('visualisation.txt', 'w') as f:
    #     for node in S:
    #         f.write(str(node) + os.linesep)

    console = []
def reverseCCWP(G, tsize, p, R, iterations):
    '''
     Input:
     G -- undirected graph (nx.Graph)
     tsize -- coverage size (int)
     p -- propagation probability among all edges (int)
     R -- number of iterations to discover CCs (int)
     iterations -- number of iterations to run IC to calculate influence spread
     Output:
     S -- seed set
    '''
    scores = dict(zip(G.nodes(), [0]*len(G))) # initialize scores
    start = time.time()
    minL = float("Inf") # number of nodes we start with (tbd later)
    maxL = 1
    avgL = -1
    for it in range(R):
        # remove blocked edges from graph G
        E = deepcopy(G)
        edge_rem = [e for e in E.edges() if random.random() < (1-p)**(E[e[0]][e[1]]['weight'])]
        E.remove_edges_from(edge_rem)

        # initialize CC
        CC = dict() # each component is reflection os the number of a component to its members
        explored = dict(zip(E.nodes(), [False]*len(E)))
        c = 0
        # perform BFS to discover CC
        for node in E:
            if not explored[node]:
                c += 1
                explored[node] = True
                CC[c] = [node]
                component = E[node].keys()
                for neighbor in component:
                    if not explored[neighbor]:
                        explored[neighbor] = True
                        CC[c].append(neighbor)
                        component.extend(E[neighbor].keys())

        # find top components that can reach tsize activated nodes
        sortedCC = sorted([(len(dv), dk) for (dk, dv) in CC.iteritems()], reverse=True)
        cumsum = 0 # sum of top components
        curL = 0 # current number of CC that achieve tsize
        # find curL first
        for length, numberCC in sortedCC:
            curL += 1
            cumsum += length
            if cumsum >= tsize:
                break
        # assign scores to L components
        for length, numberCC in sortedCC[:int(2.3*curL)]:
            weighted_score = 1.0/(length*curL)
            for node in CC[numberCC]:
                scores[node] += weighted_score
        if curL < minL:
            minL = curL
        if curL > maxL:
            maxL = curL
        print 'curL', curL

        avgL += curL

        print it + 1, R, time.time() - start
    print 'maxL', maxL
    print 'minL', minL
    print 'avgL', avgL/R

    # find nodes that achieve tsize coverage starting from top-maxL scores nodes
    orderedScores = sorted(scores.iteritems(), key = operator.itemgetter(1), reverse=True)
    topScores = orderedScores[:1]
    S = [node for (node,_) in topScores]
    coverage = avgSize(G, S, p, iterations)
    print '|S| = %s --> %s' %(len(S), coverage)
    # Penalization phase
    scores_copied = deepcopy(scores)
    # remove all nodes that are already in S
    for node in S:
        scores_copied.pop(node)
    # add new node by one penalizing scores at the same time
    while coverage < tsize:
        maxk, maxv = max(scores_copied.iteritems(), key = operator.itemgetter(1))
        print maxv
        S.append(maxk)
        scores_copied.pop(maxk)
        coverage = avgSize(G, S, p, iterations)
        print '|S| = %s --> %s' %(len(S), coverage)
        for v in G[maxk]:
            if v not in S:
                penalty = (1-p)**G[maxk][v]['weight']
                scores_copied[v] *= penalty
    return S
                penalty = (1-p)**G[maxk][v]['weight']
                scores_copied[v] *= penalty
    return S

if __name__ == '__main__':
    import time
    start = time.time()

    # read in graph
    G = nx.Graph()
    with open('../graphdata/hep.txt') as f:
        n, m = f.readline().split()
        for line in f:
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    tsize = 150
    p = .01
    R = 400
    iterations = 300
    S = reverseCCWP(G, tsize, p, R, iterations)
    print S
    print 'Necessary %s initial nodes to target %s nodes in graph G' %(len(S), avgSize(G, S, p, iterations))
    print time.time() - start
    console = []
Ejemplo n.º 17
0
def reverseCCWP(G, tsize, p, R, iterations):
    '''
     Input:
     G -- undirected graph (nx.Graph)
     tsize -- coverage size (int)
     p -- propagation probability among all edges (int)
     R -- number of iterations to discover CCs (int)
     iterations -- number of iterations to run IC to calculate influence spread
     Output:
     S -- seed set
    '''
    scores = dict(zip(G.nodes(), [0] * len(G)))  # initialize scores
    start = time.time()
    minL = float("Inf")  # number of nodes we start with (tbd later)
    maxL = 1
    avgL = -1
    for it in range(R):
        # remove blocked edges from graph G
        E = deepcopy(G)
        edge_rem = [
            e for e in E.edges()
            if random.random() < (1 - p)**(E[e[0]][e[1]]['weight'])
        ]
        E.remove_edges_from(edge_rem)

        # initialize CC
        CC = dict(
        )  # each component is reflection os the number of a component to its members
        explored = dict(zip(E.nodes(), [False] * len(E)))
        c = 0
        # perform BFS to discover CC
        for node in E:
            if not explored[node]:
                c += 1
                explored[node] = True
                CC[c] = [node]
                component = E[node].keys()
                for neighbor in component:
                    if not explored[neighbor]:
                        explored[neighbor] = True
                        CC[c].append(neighbor)
                        component.extend(E[neighbor].keys())

        # find top components that can reach tsize activated nodes
        sortedCC = sorted([(len(dv), dk) for (dk, dv) in CC.items()],
                          reverse=True)
        cumsum = 0  # sum of top components
        curL = 0  # current number of CC that achieve tsize
        # find curL first
        for length, numberCC in sortedCC:
            curL += 1
            cumsum += length
            if cumsum >= tsize:
                break
        # assign scores to L components
        for length, numberCC in sortedCC[:int(2.3 * curL)]:
            weighted_score = 1.0 / (length * curL)
            for node in CC[numberCC]:
                scores[node] += weighted_score
        if curL < minL:
            minL = curL
        if curL > maxL:
            maxL = curL
        print('curL', curL)

        avgL += curL

        print(it + 1, R, time.time() - start)
    print('maxL', maxL)
    print('minL', minL)
    print('avgL', avgL / R)

    # find nodes that achieve tsize coverage starting from top-maxL scores nodes
    orderedScores = sorted(scores.items(),
                           key=operator.itemgetter(1),
                           reverse=True)
    topScores = orderedScores[:1]
    S = [node for (node, _) in topScores]
    coverage = avgSize(G, S, p, iterations)
    print('|S| = %s --> %s' % (len(S), coverage))
    # Penalization phase
    scores_copied = deepcopy(scores)
    # remove all nodes that are already in S
    for node in S:
        scores_copied.pop(node)
    # add new node by one penalizing scores at the same time
    while coverage < tsize:
        maxk, maxv = max(scores_copied.items(), key=operator.itemgetter(1))
        print(maxv)
        S.append(maxk)
        scores_copied.pop(maxk)
        coverage = avgSize(G, S, p, iterations)
        print('|S| = %s --> %s' % (len(S), coverage))
        for v in G[maxk]:
            if v not in S:
                penalty = (1 - p)**G[maxk][v]['weight']
                scores_copied[v] *= penalty
    return S
Ejemplo n.º 18
0
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u,v, weight=1)
            # G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    seed_size = 5
    p = .01
    nodes = G.nodes()
    C = combinations(nodes, seed_size)

    spread = dict()
    for candidate in C:
        print candidate,
        time2spread = time.time()
        spread[candidate] = avgSize(G, list(candidate), p, 1000)
        print spread[candidate], time.time() - time2spread

    S, val = max(spread.iteritems(), key = lambda (dk, dv): dv)

    print 'S (by brute-force):', S, ' -->', val

    S2 = degreeDiscountIC(G, seed_size, p)
    print 'S (by degree discount):', tuple(S2), ' -->', avgSize(G, S2, p, 1000)
    print 'S (by degree discount) spreads to %s nodes (according to brute-force)' %(spread[tuple(sorted(S2))])
    print 'Total time:', time.time() - start

    console = []
Ejemplo n.º 19
0
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u, v, weight=1)
            # G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    seed_size = 5
    p = .01
    nodes = G.nodes()
    C = combinations(nodes, seed_size)

    spread = dict()
    for candidate in C:
        print candidate,
        time2spread = time.time()
        spread[candidate] = avgSize(G, list(candidate), p, 1000)
        print spread[candidate], time.time() - time2spread

    S, val = max(spread.iteritems(), key=lambda (dk, dv): dv)

    print 'S (by brute-force):', S, ' -->', val

    S2 = degreeDiscountIC(G, seed_size, p)
    print 'S (by degree discount):', tuple(S2), ' -->', avgSize(G, S2, p, 1000)
    print 'S (by degree discount) spreads to %s nodes (according to brute-force)' % (
        spread[tuple(sorted(S2))])
    print 'Total time:', time.time() - start

    console = []
Ejemplo n.º 20
0
    return S


if __name__ == '__main__':
    import time
    start = time.time()

    # read in graph
    G = nx.Graph()
    with open('../graphdata/hep.txt') as f:
        n, m = f.readline().split()
        for line in f:
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    tsize = 150
    p = .01
    R = 400
    iterations = 300
    S = reverseCCWP(G, tsize, p, R, iterations)
    print S
    print 'Necessary %s initial nodes to target %s nodes in graph G' % (
        len(S), avgSize(G, S, p, iterations))
    print time.time() - start
    console = []
    seed_size = 50

    print 'Start mapping...'
    time2map = time.time()
    R = 200
    # define map function
    def map_CC(it):
        print it
        return CC_parallel(G, seed_size, .01)
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()*2)
    Scores = pool.map(map_CC, range(R))
    print 'Finished mapping in', time.time() - time2map
    time2reduce = time.time()
    print 'Reducing scores...'
    scores = {v: sum([s[v] for s in Scores]) for v in G}
    topScores = nlargest(seed_size, scores.iteritems(), key = lambda (dk,dv): dv)
    S = [v for (v,_) in topScores]
    print 'Time to reduce', time.time() - time2reduce

    print 'Average size is', avgSize(G,S,.01,200)
    print 'Average size of 10 nodes is', avgSize(G,S[:10],.01,200)

    print 'Total time:', time.time() - start

    # # write results S to file
    # with open('visualisation.txt', 'w') as f:
    #     for node in S:
    #         f.write(str(node) + os.linesep)

    console = []
Ejemplo n.º 22
0
 def mapAvgSize (S):
     return avgSize(G, S, p, I)