def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations):
    # initialization for binary search

    R = iterations
    stepk = -int(math.ceil(float(step)/2))
    k += stepk
    if k not in Tsize:
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T))/R
        Tsize[k] = avg
    # check values of Tsize in between last 2 calculated steps
    while stepk != 1:
        print k, stepk, Tsize[k]
        if Tsize[k] >= targeted_size:
            stepk = -int(math.ceil(float(abs(stepk))/2))
        else:
            stepk = int(math.ceil(float(abs(stepk))/2))
        k += stepk

        if k not in Tsize:
            S = degreeDiscountIC(G, k, p)
            avg = 0
            for i in range(R):
                T = runIC(G, S, p)
                avg += float(len(T))/R
            Tsize[k] = avg
    return S, Tsize
Exemple #2
0
def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations):
    # initialization for binary search

    R = iterations
    stepk = -int(math.ceil(float(step) / 2))
    k += stepk
    if k not in Tsize:
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T)) / R
        Tsize[k] = avg
    # check values of Tsize in between last 2 calculated steps
    while stepk != 1:
        print k, stepk, Tsize[k]
        if Tsize[k] >= targeted_size:
            stepk = -int(math.ceil(float(abs(stepk)) / 2))
        else:
            stepk = int(math.ceil(float(abs(stepk)) / 2))
        k += stepk

        if k not in Tsize:
            S = degreeDiscountIC(G, k, p)
            avg = 0
            for i in range(R):
                T = runIC(G, S, p)
                avg += float(len(T)) / R
            Tsize[k] = avg
    return S, Tsize
Exemple #3
0
def getDDData(G, maxk, p):
    data = dict()
    for i in range(1, maxk + 1):
        S = degreeDiscountIC(G, i, p)
        size = avgSize(G, S, p, 200)
        data[i] = size
    return data
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    targeted_size -- desired size of targeted set
    step -- step after each to calculate spread
    p -- propagation probability
    R -- number of iterations to average influence spread
    Output:
    S -- seed set that achieves targeted_size
    Tsize -- averaged targeted size for different sizes of seed set
    '''

    Tsize = dict()
    k = 0
    Tsize[k] = 0
    R = iterations

    while Tsize[k] <= targeted_size:
        k += step
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T))/R
        Tsize[k] = avg

        print k, Tsize[k]

    # binary search for optimal solution
    return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations)
Exemple #5
0
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    targeted_size -- desired size of targeted set
    step -- step after each to calculate spread
    p -- propagation probability
    R -- number of iterations to average influence spread
    Output:
    S -- seed set that achieves targeted_size
    Tsize -- averaged targeted size for different sizes of seed set
    '''

    Tsize = dict()
    k = 0
    Tsize[k] = 0
    R = iterations

    while Tsize[k] <= targeted_size:
        k += step
        S = degreeDiscountIC(G, k, p)
        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T)) / R
        Tsize[k] = avg

        print k, Tsize[k]

    # binary search for optimal solution
    return binarySearchBoundary(G, k, Tsize, targeted_size, step, p,
                                iterations)
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200, degreeDiscount_Heuristic='degreeDiscount'):
    ''' Finds initial set of nodes to propagate in Independent Cascade model
    Input: G -- networkx graph object
    targeted_size -- desired size of targeted set
    step -- step after each to calculate spread
    p -- propagation probability
    R -- number of iterations to average influence spread
    Output:
    S -- seed set that achieves targeted_size
    Tsize -- averaged targeted size for different sizes of seed set
    '''

    # calculate the time  of selecting the first initial nodes (afterwards we will select some of them)
    start_time = time.time()


    Tsize = dict()
    k = 0
    Tsize[k] = 0
    R = iterations

    while Tsize[k] <= targeted_size:
        k += step

        if (degreeDiscount_Heuristic == 'degreeDiscount'):
            S = degreeDiscountIC(G, k, p)
        else:
            S = degreeHeuristic(G, k, p)


        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T))/R
        Tsize[k] = avg

        print(k, Tsize[k])

    # binary search for optimal solution
    return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations, initial_time=time.time()-start_time)
Exemple #7
0
    G = nx.Graph()
    with open('graphdata/../graphdata/hep.txt') as f:
        n, m = f.readline().split()
        for line in f:
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u, v, weight=1)
            # G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    #calculate initial set
    seed_size = 10
    S = degreeDiscountIC(G, seed_size)
    print 'Initial set of', seed_size, 'nodes chosen'
    print time.time() - start

    # write results S to file
    with open('visualisation.txt', 'w') as f:
        for node in S:
            f.write(str(node) + os.linesep)

    # calculate average activated set size
    iterations = 200  # number of iterations
    avg = 0
    for i in range(iterations):
        T = runIC(G, S)
        avg += float(len(T)) / iterations
        # print i, 'iteration of IC'
def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations, degreeDiscount_Heuristic='degreeDiscount', initial_time=0):
    # Calculate the time it takes to select each node, initial_time is added in order keep track of the time needed for
    # the selection of initial nodes
    start_time = time.time()
    # keep a list for time needed to select each nodes
    timer_each_node = []
    timer_each_node.append(initial_time)

    # initialization for binary search
    R = iterations
    stepk = -int(math.ceil(float(step)/2))
    k += stepk
    if k not in Tsize:

        if (degreeDiscount_Heuristic == 'degreeDiscount'):
            S = degreeDiscountIC(G, k, p)
        else:
            S = degreeHeuristic(G, k, p)

        avg = 0
        for i in range(R):
            T = runIC(G, S, p)
            avg += float(len(T))/R


        timer_each_node.append(time.time() + initial_time - start_time)


        Tsize[k] = avg


    # check values of Tsize in between last 2 calculated steps
    while stepk != 1:
        print(k, stepk, Tsize[k])
        if Tsize[k] >= targeted_size:
            stepk = -int(math.ceil(float(abs(stepk))/2))
        else:
            stepk = int(math.ceil(float(abs(stepk))/2))
        k += stepk

        if k not in Tsize:

            if (degreeDiscount_Heuristic == 'degreeDiscount'):
                S = degreeDiscountIC(G, k, p)
            else:
                S = degreeHeuristic(G, k, p)


            # stores the influence spread of each node
            influence_spread = []


            avg = 0
            for i in range(R):
                T = runIC(G, S, p)
                avg += float(len(T))/R

                influence_spread.append(avg)

            # keep time for each NEW node selected
            timer_each_node.append(time.time() + initial_time - start_time)


            Tsize[k] = avg
            print("datafaq: ", Tsize)
            print("datafaq[k]: ", Tsize[k])

    print("leeeen: ", len(timer_each_node))
    return S, influence_spread, timer_each_node
def binaryDegreeDiscount(G, tsize, p=.01, a=0.38, step=5, iterations=200):
    ''' Finds minimal number of nodes necessary to reach tsize number of nodes
    using degreeDiscount algorithms and binary search.
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    p -- propagation probability
    a -- fraction of tsize to use as initial seed set size
    step -- step between iterations of binary search
    iterations -- number of iterations to average independent cascade
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    '''
    Tspread = dict()
    # find initial total spread
    k0 = int(a * tsize)
    S = degreeDiscountIC(G, k0, p)
    t = avgSize(G, S, p, iterations)
    Tspread[k0] = t
    # find bound (lower or upper) of total spread
    k = k0
    print k, step, Tspread[k]
    if t >= tsize:
        # find the value of k that doesn't spread influence up to tsize nodes
        step *= -1
        while t >= tsize:
            # reduce step if necessary
            while k + step < 0:
                step = int(math.ceil(float(step) / 2))
            k += step
            S = degreeDiscountIC(G, k, p)
            t = avgSize(G, S, p, iterations)
            Tspread[k] = t
            print k, step, Tspread[k]
    else:
        # find the value of k that spreads influence up to tsize nodes
        while t < tsize:
            k += step
            S = degreeDiscountIC(G, k, p)
            t = avgSize(G, S, p, iterations)
            Tspread[k] = t
            print k, step, Tspread[k]

    if Tspread[k] < Tspread[k - step]:
        k -= step
        step = abs(step)

    # search precise boundary
    stepk = step
    while abs(stepk) != 1:
        if Tspread[k] >= tsize:
            stepk = -int(math.ceil(float(abs(stepk)) / 2))
        else:
            stepk = int(math.ceil(float(abs(stepk)) / 2))
        k += stepk

        if k not in Tspread:
            S = degreeDiscountIC(G, k, p)
            Tspread[k] = avgSize(G, S, p, iterations)
        print k, stepk, Tspread[k]

    return S, Tspread
    #S = randomHeuristic(G, seed_size, p=.05)
    #S = newGreedyICRev(G, seed_size, I, pi, pa, p=.05)
    #S = newGreedyIC(G, seed_size, p=.05)
    time1 = time.clock() - start
    iterations = 200  # number of iterations
    avg1 = 0
    avg2 = 0
    for i in range(iterations):
        T, In = runIC(G, S, I)
        avg1 += float(len(T)) / iterations
        avg2 += float(len(In)) / iterations
    pr_res = str(seed_size)
    l1 = len(S)
    rev1 = int(round(avg1)) + pi * int(round(avg2)) -  c1 * l1

    c2 = 1.2 #seed node cost=1.2

    S = degreeDiscountIC(G, seed_size, p=.05)
    #S = Rev(G, seed_size, I, pi, pa, c2)
    avg1 = 0
    avg2 = 0
    for i in range(iterations):
        T, In = runIC(G, S, I)
        avg1 += float(len(T)) / iterations
        avg2 += float(len(In)) / iterations
    l2 = len(S)
    rev2 = int(round(avg1)) + pi * int(round(avg2)) - c2 * l2
    pr_res += '\t' + str(round(rev1,3)) + '\t' + str(round(rev2,3)) + '\t' + str(l1) + '\t' + str(l2)+ '\t' + str(time1) + '\n'

    print(pr_res)
def binaryDegreeDiscount(G, tsize, p=0.01, a=0.38, step=5, iterations=200):
    """ Finds minimal number of nodes necessary to reach tsize number of nodes
    using degreeDiscount algorithms and binary search.
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    p -- propagation probability
    a -- fraction of tsize to use as initial seed set size
    step -- step between iterations of binary search
    iterations -- number of iterations to average independent cascade
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    """
    Tspread = dict()
    # find initial total spread
    k0 = int(a * tsize)
    S = degreeDiscountIC(G, k0, p)
    t = avgSize(G, S, p, iterations)
    Tspread[k0] = t
    # find bound (lower or upper) of total spread
    k = k0
    print k, step, Tspread[k]
    if t >= tsize:
        # find the value of k that doesn't spread influence up to tsize nodes
        step *= -1
        while t >= tsize:
            # reduce step if necessary
            while k + step < 0:
                step = int(math.ceil(float(step) / 2))
            k += step
            S = degreeDiscountIC(G, k, p)
            t = avgSize(G, S, p, iterations)
            Tspread[k] = t
            print k, step, Tspread[k]
    else:
        # find the value of k that spreads influence up to tsize nodes
        while t < tsize:
            k += step
            S = degreeDiscountIC(G, k, p)
            t = avgSize(G, S, p, iterations)
            Tspread[k] = t
            print k, step, Tspread[k]

    if Tspread[k] < Tspread[k - step]:
        k -= step
        step = abs(step)

    # search precise boundary
    stepk = step
    while abs(stepk) != 1:
        if Tspread[k] >= tsize:
            stepk = -int(math.ceil(float(abs(stepk)) / 2))
        else:
            stepk = int(math.ceil(float(abs(stepk)) / 2))
        k += stepk

        if k not in Tspread:
            S = degreeDiscountIC(G, k, p)
            Tspread[k] = avgSize(G, S, p, iterations)
        print k, stepk, Tspread[k]

    return S, Tspread
    G = nx.Graph()
    with open('graphdata/../graphdata/hep.txt') as f:
        n, m = f.readline().split()
        for line in f:
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u,v, weight=1)
            # G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    #calculate initial set
    seed_size = 10
    S = degreeDiscountIC(G, seed_size)
    print 'Initial set of', seed_size, 'nodes chosen'
    print time.time() - start

    # write results S to file
    with open('visualisation.txt', 'w') as f:
        for node in S:
            f.write(str(node) + os.linesep)

    # calculate average activated set size
    iterations = 200 # number of iterations
    avg = 0
    for i in range(iterations):
        T = runIC(G, S)
        avg += float(len(T))/iterations
        # print i, 'iteration of IC'
def binaryDegreeDiscount(G,
                         tsize,
                         p=.01,
                         a=0.38,
                         step=5,
                         iterations=200,
                         degreeDiscount_Heuristic='degreeDiscount'):
    ''' Finds minimal number of nodes necessary to reach tsize number of nodes
    using degreeDiscount algorithms and binary search.
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    p -- propagation probability
    a -- fraction of tsize to use as initial seed set size
    step -- step between iterations of binary search
    iterations -- number of iterations to average independent cascade
    degreeDiscount_Heuristic -- whether to select degree Discount or degree Heuristic algorithm
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    '''

    # Calculate the time it takes to select each node
    start_time = time.time()
    # keep a list for time needed to select each nodes
    timer_each_node = []

    Tspread = dict()
    # find initial total spread
    k0 = int(a * tsize)

    if (degreeDiscount_Heuristic == 'degreeDiscount'):
        S = degreeDiscountIC(G, k0, p)
    else:
        S = degreeHeuristic(G, k0, p)

    t = avgSize(G, S, p, iterations)
    Tspread[k0] = t
    # find bound (lower or upper) of total spread
    k = k0
    print(k, step, Tspread[k])

    # keep time for each NEW node selected
    timer_each_node.append(time.time() - start_time)

    if t >= tsize:
        # find the value of k that doesn't spread influence up to tsize nodes
        step *= -1
        while t >= tsize:
            # reduce step if necessary
            while k + step < 0:
                step = int(math.ceil(float(step) / 2))
            k += step

            if (degreeDiscount_Heuristic == 'degreeDiscount'):
                S = degreeDiscountIC(G, k, p)
            else:
                S = degreeHeuristic(G, k, p)

            t = avgSize(G, S, p, iterations)
            Tspread[k] = t
            print(k, step, Tspread[k])

            # keep time for each NEW node selected
            timer_each_node.append(time.time() - start_time)
    else:
        # find the value of k that spreads influence up to tsize nodes
        while t < tsize:
            k += step

            if (degreeDiscount_Heuristic == 'degreeDiscount'):
                S = degreeDiscountIC(G, k, p)
            else:
                S = degreeHeuristic(G, k, p)

            t = avgSize(G, S, p, iterations)
            Tspread[k] = t
            print(k, step, Tspread[k])

            # keep time for each NEW node selected
            timer_each_node.append(time.time() - start_time)

    if Tspread[k] < Tspread[k - step]:
        k -= step
        step = abs(step)

    # search precise boundary
    stepk = step
    while abs(stepk) != 1:
        if Tspread[k] >= tsize:
            stepk = -int(math.ceil(float(abs(stepk)) / 2))
        else:
            stepk = int(math.ceil(float(abs(stepk)) / 2))
        k += stepk

        if k not in Tspread:

            if (degreeDiscount_Heuristic == 'degreeDiscount'):
                S = degreeDiscountIC(G, k, p)
            else:
                S = degreeHeuristic(G, k, p)

            Tspread[k] = avgSize(G, S, p, iterations)

            # keep time for each NEW node selected
            timer_each_node.append(time.time() - start_time)
        print(k, stepk, Tspread[k])

        print("(number of nodes) - (spread) : ", Tspread)

        # ======================================================================================================================
        # stores the influence spread of each node
        influence_spread = []
        avg = 0
        for i in range(iterations):
            T = runIC(G, S, p)
            avg += float(len(T)) / iterations
            influence_spread.append(avg)

        print("(spread) : ", influence_spread)

    return S, influence_spread, timer_each_node
Exemple #14
0
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u, v, weight=1)
            # G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    seed_size = 5
    p = .01
    nodes = G.nodes()
    C = combinations(nodes, seed_size)

    spread = dict()
    for candidate in C:
        print candidate,
        time2spread = time.time()
        spread[candidate] = avgSize(G, list(candidate), p, 1000)
        print spread[candidate], time.time() - time2spread

    S, val = max(spread.iteritems(), key=lambda (dk, dv): dv)

    print 'S (by brute-force):', S, ' -->', val

    S2 = degreeDiscountIC(G, seed_size, p)
    print 'S (by degree discount):', tuple(S2), ' -->', avgSize(G, S2, p, 1000)
    print 'S (by degree discount) spreads to %s nodes (according to brute-force)' % (
        spread[tuple(sorted(S2))])
    print 'Total time:', time.time() - start

    console = []
            u, v = map(int, line.split())
            try:
                G[u][v]['weight'] += 1
            except:
                G.add_edge(u,v, weight=1)
            # G.add_edge(u, v, weight=1)
    print 'Built graph G'
    print time.time() - start

    seed_size = 5
    p = .01
    nodes = G.nodes()
    C = combinations(nodes, seed_size)

    spread = dict()
    for candidate in C:
        print candidate,
        time2spread = time.time()
        spread[candidate] = avgSize(G, list(candidate), p, 1000)
        print spread[candidate], time.time() - time2spread

    S, val = max(spread.iteritems(), key = lambda (dk, dv): dv)

    print 'S (by brute-force):', S, ' -->', val

    S2 = degreeDiscountIC(G, seed_size, p)
    print 'S (by degree discount):', tuple(S2), ' -->', avgSize(G, S2, p, 1000)
    print 'S (by degree discount) spreads to %s nodes (according to brute-force)' %(spread[tuple(sorted(S2))])
    print 'Total time:', time.time() - start

    console = []