def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations):
    # initialization for binary search

    R = iterations
    stepk = -int(math.ceil(float(step) / 2))
    k += stepk
    if k not in Tsize:
        S = newGreedyIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T)) / R
        Tsize[k] = avg
    # check values of Tsize in between last 2 calculated steps
    while stepk != 1:
        print k, stepk, Tsize[k]
        if Tsize[k] >= targeted_size:
            stepk = -int(math.ceil(float(abs(stepk)) / 2))
        else:
            stepk = int(math.ceil(float(abs(stepk)) / 2))
        k += stepk

        if k not in Tsize:
            S = (G, k, p)
            avg = 0
            for i in range(R):
                T = runIC(G, S, p)
                avg += float(len(T)) / R
            Tsize[k] = avg
    return S, Tsize
def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations):
    # initialization for binary search

    R = iterations
    stepk = -int(math.ceil(float(step)/2))
    k += stepk
    if k not in Tsize:
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T))/R
        Tsize[k] = avg
    # check values of Tsize in between last 2 calculated steps
    while stepk != 1:
        print k, stepk, Tsize[k]
        if Tsize[k] >= targeted_size:
            stepk = -int(math.ceil(float(abs(stepk))/2))
        else:
            stepk = int(math.ceil(float(abs(stepk))/2))
        k += stepk

        if k not in Tsize:
            S = degreeDiscountIC(G, k, p)
            avg = 0
            for i in range(R):
                T = runIC(G, S, p)
                avg += float(len(T))/R
            Tsize[k] = avg
    return S, Tsize
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    targeted_size -- desired size of targeted set
    step -- step after each to calculate spread
    p -- propagation probability
    R -- number of iterations to average influence spread
    Output:
    S -- seed set that achieves targeted_size
    Tsize -- averaged targeted size for different sizes of seed set
    '''

    Tsize = dict()
    k = 0
    Tsize[k] = 0
    R = iterations

    while Tsize[k] <= targeted_size:
        k += step
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T))/R
        Tsize[k] = avg

        print k, Tsize[k]

    # binary search for optimal solution
    return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations)
Example #4
0
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    targeted_size -- desired size of targeted set
    step -- step after each to calculate spread
    p -- propagation probability
    R -- number of iterations to average influence spread
    Output:
    S -- seed set that achieves targeted_size
    Tsize -- averaged targeted size for different sizes of seed set
    '''

    Tsize = dict()
    k = 0
    Tsize[k] = 0
    R = iterations

    while Tsize[k] <= targeted_size:
        k += step
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T)) / R
        Tsize[k] = avg

        print k, Tsize[k]

    # binary search for optimal solution
    return binarySearchBoundary(G, k, Tsize, targeted_size, step, p,
                                iterations)
def generalGreedy(G, k, p=0.01):
    """ Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    """
    import time

    start = time.time()
    R = 20  # number of times to run Random Cascade
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    for i in range(k):
        s = PQ()  # priority queue
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)  # initialize spread value
                for j in range(R):  # run R times Random Cascade
                    [priority, count, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(runIC(G, S + [v], p))) / R)  # add normalized spread value
        task, priority = s.pop_item()
        S.append(task)
        print i, k, time.time() - start
    return S
def generalGreedy(G, k, p=.01):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    import time
    start = time.time()
    R = 20  # number of times to run Random Cascade
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    for i in range(k):
        s = PQ()  # priority queue
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)  # initialize spread value
                for j in range(R):  # run R times Random Cascade
                    [priority, count, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(runIC(G, S + [v], p))) /
                               R)  # add normalized spread value
        task, priority = s.pop_item()
        S.append(task)
        print i, k, time.time() - start
    return S
    start = time.time()

    # read in graph
    G = nx.Graph()
    with open('graphdata/../graphdata/hep.txt') as f:
        n, m = f.readline().split()
        for line in f:
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u,v, weight=1)
    print 'Built graph G'
    print time.time() - start

    # # read in T
    # with open('lemma1.txt') as f:
    #     T = []
    #     k = int(f.readline())
    #     for line in f:
    #         T.append(int(line))
    # print 'Read %s activated nodes' %k
    # print time.time() - start
    S = [131, 639, 287, 267, 608, 100, 559, 124, 359, 66]
    k = len(S)
    T = runIC(G,S)

    highdegreeS = highdegreeSet(G,T,k)

    console = []
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade.
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output: S -- set of k nodes chosen

    TODO: add step functionality
    '''

    import time
    start = time.time()

    assert type(
        S0) == list, "S0 must be a list. %s provided instead" % type(S0)
    S = S0  # set of selected nodes
    tsize = 0
    R = iterations
    for i in range(R):
        T = runIC(G, S, p)
        tsize += float(len(T)) / R

    while tsize <= targeted_size:
        s = PQ(
        )  # number of additional nodes each remained mode will bring to the set S in R iterations
        Rv = dict()  # number of reachable nodes for node v
        # initialize values of s
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)
        # calculate potential additional spread for each vertex not in S
        prg_idx = 1
        idx = 1
        prcnt = .1  # for progress to print
        R = iterations  # number of iterations to run RanCas
        for j in range(R):
            # create new pruned graph E
            E = deepcopy(G)
            edge_rem = []  # edges to remove
            for (u, v) in E.edges():
                w = G[u][v]['weight']
                if random() < 1 - (1 - p)**w:
                    edge_rem.append((u, v))
            E.remove_edges_from(edge_rem)
            # find reachable vertices from S
            Rs = bfs(E, S)
            # find additional nodes each vertex would bring to the set S
            for v in G.nodes():
                if v not in S + Rs:  # if node has not chosen in S and has chosen by spread from S
                    [priority, c, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(bfs(E, [v]))) / R)

            if idx == int(prg_idx * prcnt * R):
                print '%s%%...' % (int(prg_idx * prcnt * 100))
                prg_idx += 1
            idx += 1
        # add vertex with maximum potential spread
        task, priority = s.pop_item()
        S.append(task)
        print i, len(S), task, -priority, time.time() - start

        tsize = 0
        for j in range(R):
            T = runIC(G, S, p)
            tsize += float(len(T)) / R
    return S
Example #9
0
    start = time.time()

    # read in graph
    G = nx.Graph()
    with open('graphdata/../graphdata/hep.txt') as f:
        n, m = f.readline().split()
        for line in f:
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    # # read in T
    # with open('lemma1.txt') as f:
    #     T = []
    #     k = int(f.readline())
    #     for line in f:
    #         T.append(int(line))
    # print 'Read %s activated nodes' %k
    # print time.time() - start
    S = [131, 639, 287, 267, 608, 100, 559, 124, 359, 66]
    k = len(S)
    T = runIC(G, S)

    highdegreeS = highdegreeSet(G, T, k)

    console = []
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations = 200):
    ''' Finds initial set of nodes to propagate in Independent Cascade.
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output: S -- set of k nodes chosen

    TODO: add step functionality
    '''

    import time
    start = time.time()

    assert type(S0) == list, "S0 must be a list. %s provided instead" % type(S0)
    S = S0 # set of selected nodes
    tsize = 0
    R = iterations
    for i in range(R):
        T = runIC(G, S, p)
        tsize += float(len(T))/R

    while tsize <= targeted_size:
        s = PQ() # number of additional nodes each remained mode will bring to the set S in R iterations
        Rv = dict() # number of reachable nodes for node v
        # initialize values of s
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)
        # calculate potential additional spread for each vertex not in S
        prg_idx = 1
        idx = 1
        prcnt = .1 # for progress to print
        R = iterations # number of iterations to run RanCas
        for j in range(R):
            # create new pruned graph E
            E = deepcopy(G)
            edge_rem = [] # edges to remove
            for (u,v) in E.edges():
                w = G[u][v]['weight']
                if random() < 1 - (1 - p)**w:
                    edge_rem.append((u,v))
            E.remove_edges_from(edge_rem)
            # find reachable vertices from S
            Rs = bfs(E, S)
            # find additional nodes each vertex would bring to the set S
            for v in G.nodes():
                if v not in S + Rs: # if node has not chosen in S and has chosen by spread from S
                    [priority, c, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(bfs(E, [v])))/R)

            if idx == int(prg_idx*prcnt*R):
                print '%s%%...' %(int(prg_idx*prcnt*100))
                prg_idx += 1
            idx += 1
        # add vertex with maximum potential spread
        task, priority = s.pop_item()
        S.append(task)
        print i, len(S), task, -priority, time.time() - start

        tsize = 0
        for j in range(R):
            T = runIC(G, S, p)
            tsize += float(len(T))/R
    return S