コード例 #1
0
def main():
    """Parse arguments, call the diameter_approximation_motwani."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute an approximation of the diameter and output its value"
    parser.add_argument("graph", help="graph file")
    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="increase verbosity (use multiple times for more verbosity)")
    args = parser.parse_args()

    # Set the desired level of logging

    # Read graph from file
    G = util.read_graph(args.graph)

    # Compute the 2/3 approximation
    (elapsed_time, diam) = diameter_approximation_motwani(G)

    # Print info
    print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time))
コード例 #2
0
def convert_graph(graphfile, clusmethod, filt=[]):
    assert clusmethod in ['filteronly', 'mtg', 'mtf', 'nocyc', 'dmst']

    G = util.dct2nx(util.read_graph(graphfile))

    # Step 0: If just filtering, do that
    if clusmethod == 'filteronly':
        G = filter_nodes(G, filt)
        return G

    # Step 1: Find equivalent terms based on method
    if clusmethod in ['mtg', 'mtf']:
        G = entmax2hyp(G)
        G = scc2equiv(G)

    if clusmethod == 'nocyc':
        # Find nodes with same direct hypernyms and hyponyms
        G = samehh2equiv(G)

    # Step 2: Collapse equivalent nodes
    G = util.consolidate_equiv(G)

    # Step 3: Find transitive closure
    G = hypernym_transitive_closure(G)

    # Step 4: Explode equiv nodes again
    G = util.expand_equiv_nodes(G)

    # Step 5: Remove nodes not in filter
    G = filter_nodes(G, filt)

    return G
コード例 #3
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph(args)
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    learn_embeddings(walks)
コード例 #4
0
def main():
    """Parse arguments and perform the computation."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute approximate betweenness centrality of all vertices in a graph using the algorihm by Brandes and Pich, and the time to compute them, and write them to file"
    parser.add_argument("epsilon", type=util.valid_interval_float,
            help="accuracy parameter")
    parser.add_argument("delta", type=util.valid_interval_float,
            help="confidence parameter")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    parser.add_argument("-m", "--maxconn", action="store_true", default=False,
            help="if the graph is not weakly connected, only save the largest connected component")
    parser.add_argument("-p", "--pickle", action="store_true", default=False,
            help="use pickle reader for input file")
    parser.add_argument("-s", "--samplesize", type=util.positive_int,
            default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation")
    parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600,
            help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)")
    parser.add_argument("-u", "--undirected", action="store_true", default=False,
            help="consider the graph as undirected ")
    parser.add_argument("-v", "--verbose", action="count", default=0, help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w", "--write", nargs="?", default=False, const="auto",
            help="write graph (and computed attributes) to file.")

    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    # Compute betweenness
    if args.samplesize:
        (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write,
                args.timeout)
    else:
        (stats, betw) = betweenness(G, args.epsilon, args.delta, args.write,
                args.timeout)

    # If specified, write betweenness as vertex attributes, and time as graph
    # attribute back to file
    if args.write:
        logging.info("Writing betweenness as vertex attributes and stats as graph attribute")
        if args.write == "auto":
            filename = os.path.splitext(args.graph)[0] + ("-undir" if args.undirected else "dir") + ".picklez"
            G.write(filename)
        else:
            G.write(args.write)

    # Write stats and betweenness to output
    util.write_to_output(stats, betw, args.output)
コード例 #5
0
ファイル: diameter_approx.py プロジェクト: rionda/centrsampl
def main():
    """Parse arguments, call the approximation, write it to file."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute an approximation of the diameter of a graph and the time needed to compute it, and (if specified) write these info as a graph attributes"
    parser.add_argument("graph", help="graph file")
    parser.add_argument("-i", "--implementation", choices=["homegrown",
        "igraph"], default="homegrown", 
        help="use specified implementation of betweenness computation")
    parser.add_argument("-m", "--maxconn", action="store_true", default=False,
            help="if the graph is not weakly connected, only save the largest connected component")
    parser.add_argument("-p", "--pickle", action="store_true", default=False,
            help="use pickle reader for input file")
    parser.add_argument("-u", "--undirected", action="store_true", default=False,
            help="consider the graph as undirected ")
    parser.add_argument("-v", "--verbose", action="count", default=0, 
            help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w", "--write", action="store_true", default=False,
    help="write the approximation of diameter of the graph as the 'approx_diameter' graph attribute and the time taken to compute it as the 'approx_diam_time' attribute")
    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Seed the random number generator
    random.seed()

     # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)   # Read graph from file

    # Compute the diameter
    (elapsed_time, diam) = diameter(G, args.implementation)

    # Print info
    print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time))

    # If requested, add graph attributes and write graph back to original file
    if args.write:
        logging.info("Writing diameter approximation and time to graph")
        G["approx_diam"] = diam
        G["approx_diam_time"] = elapsed_time
        # We use format auto-detection, which should work given that it worked
        # when we read the file
        G.write(args.graph) 
コード例 #6
0
def test_client():
    print('\nTest For Graph Search:\n')
    g = util.read_graph("./data/tinyCG.txt", False)
    source = 0
    bfs = False # assign search strategy here
    search = GraphSearch(g, source, not bfs)
    print('BFS' if not bfs else 'DFS', 'search strategy')
    for v in range(g.v()):
        print(source, 'to', v, ':  ', end='')
        if search.has_path_to(v):
            for i in search.path_to(v):
                if i != v:
                    print(i, '-> ', end='')
                else:
                    print(i)
    print(search.count(), 'vertexs connect to the source', search._source)
コード例 #7
0
def generate_random_weights(path,pickle,undirected):
    """Read number of nodes from the file path. Return int."""
    if pickle:
        G = util.read_graph(path)
    else:
        G = converter.convert(path, not undirected, False)

    print(G.ecount())
    txt_dir=path[0:len(path)-4] + "_weights.txt"
    file = open(txt_dir,'w')
    file.write("")

    for line in range(0,G.ecount()):
       file.write(repr(random.random()) + '\n')

    file.close()
    return 0
コード例 #8
0
def generate_random_weights(path, pickle, undirected):
    """Read number of nodes from the file path. Return int."""
    if pickle:
        G = util.read_graph(path)
    else:
        G = converter.convert(path, not undirected, False)

    print(G.ecount())
    txt_dir = path[0:len(path) - 4] + "_weights.txt"
    file = open(txt_dir, 'w')
    file.write("")

    for line in range(0, G.ecount()):
        file.write(repr(random.random()) + '\n')

    file.close()
    return 0
コード例 #9
0
def main():
    """Parse arguments, call the diameter_approximation_motwani."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute an approximation of the diameter and output its value"
    parser.add_argument("graph", help="graph file")
    parser.add_argument("-v", "--verbose", action="count", default=0,
            help="increase verbosity (use multiple times for more verbosity)")
    args = parser.parse_args()

    # Set the desired level of logging

    # Read graph from file                                   
    G = util.read_graph(args.graph)

    # Compute the 2/3 approximation
    (elapsed_time,diam) = diameter_approximation_motwani(G)

    # Print info
    print("{}, diameter={}, time={}".format(args.graph, diam,
        elapsed_time))
コード例 #10
0
ファイル: bfs.py プロジェクト: vgalilei/getpython
def main():
    graph = read_graph('aero.yml')
    discovered = dfs(graph)
コード例 #11
0
def compute_centralities(n,
                         l0,
                         d,
                         prop_mispl,
                         prop_neg,
                         network_no,
                         network_desc,
                         centralities,
                         force=False):
    """This method computes all the implemented centralities for a set of input 
    parameters.
       
    :param n: int
    :type n: int
    :param l0: number of modules from which the underlying graph is created
    :type l0: int
    :param d: density
    :type d: float
    :param prop_mispl: proportion of misplaced links
    :type prop_mispl: float
    :param prop_neg: proportion of negative links 
    :type prop_neg: float
    :param network_no: network no
    :type network_no: int
    :param network_desc: network description, i.e. whether network is weighted or unweighted
    :type network_desc: str. One of them: SIGNED_UNWEIGHTED, SIGNED_WEIGHTED
    :param centralities: centralities, e.g. consts.CENTR_DEGREE_NEG, consts.CENTR_DEGREE_POS, etc. 
    :type centralities: str list
    """
    network_folder = path.get_input_network_folder_path(
        n, l0, d, prop_mispl, prop_neg, network_no)
    network_path = os.path.join(network_folder,
                                consts.SIGNED_UNWEIGHTED + ".graphml")

    # we continue if the corresponding input network exists
    if os.path.exists(network_path):
        g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML)

        for centr_name in centralities:
            centr_folder_path = path.get_centrality_folder_path(
                n, l0, d, prop_mispl, prop_neg, network_no, network_desc)
            print("computing centrality: " + centr_name + " in " +
                  centr_folder_path)
            os.makedirs(centr_folder_path, exist_ok=True)

            result_filename = centr_name + ".csv"
            result_filepath = os.path.join(centr_folder_path, result_filename)
            if not os.path.exists(result_filepath) or force:
                result = None
                if centr_name == consts.CENTR_DEGREE_NEG:
                    result = centrality.degree_centrality.NegativeCentrality.undirected(
                        g, False).tolist()
                elif centr_name == consts.CENTR_DEGREE_POS:
                    result = centrality.degree_centrality.PositiveCentrality.undirected(
                        g, False).tolist()
                elif centr_name == consts.CENTR_DEGREE_PN:
                    result = centrality.degree_centrality.PNCentrality.undirected(
                        g, False).tolist()
                elif centr_name == consts.CENTR_EIGEN:
                    result = centrality.eigenvector_centrality.compute_eigenvector_centrality(
                        g)
                    #print(result)

                # write the centrality values into file (as the number of values as the number of lines)
                result_formatted = [util.format_4digits(e) for e in result]
                df = pd.DataFrame({consts.CENT_COL_NAME: result_formatted})
                df.to_csv(result_filepath, sep=",", quoting=1, index=False)

                # write the mean of the centrality values
                desc = consts.PREFIX_MEAN + centr_name
                result_filepath = os.path.join(
                    centr_folder_path, consts.PREFIX_MEAN + result_filename)
                result_formatted = util.format_4digits(mean(result))
                df = pd.DataFrame({desc: [result_formatted]})
                df.to_csv(result_filepath, sep=",", quoting=1, index=False)

                # write the standard deviation of the centrality values
                desc = consts.PREFIX_STD + centr_name
                result_filepath = os.path.join(
                    centr_folder_path, consts.PREFIX_STD + result_filename)
                result_formatted = util.format_4digits(stdev(result))
                df = pd.DataFrame({desc: [result_formatted]})
                df.to_csv(result_filepath, sep=",", quoting=1, index=False)
コード例 #12
0
'''
Created on Sep 23, 2020

@author: nejat
'''

import util
import consts
import centrality.degree_centrality
import path

if __name__ == '__main__':
    network_path = "../../in/n=20_l0=3_dens=1.0000/propMispl=0.2000/propNeg=0.7000/network=1/signed-unweighted.graphml"
    g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML)
    print(g.ecount())  # get the number of edges
    print(g.vcount())  # get the number of vertices
    result = centrality.degree_centrality.NegativeCentrality.undirected(
        g, False)
    result_list = result.tolist()
    print(result_list)
    print(len(result_list))
コード例 #13
0
def main():
    """Parse arguments, call betweenness(), write to file."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute approximate betweenness centrality of all vertices in a graph using sampling and VC-dimension, and the time to compute them, and write them to file"
    parser.add_argument("epsilon",
                        type=util.valid_interval_float,
                        help="accuracy parameter")
    parser.add_argument("delta",
                        type=util.valid_interval_float,
                        help="confidence parameter")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-a",
                       "--approximate",
                       action="store_true",
                       default=True,
                       help="use approximate diameter (default)")
    group.add_argument("-d",
                       "--diameter",
                       type=util.positive_int,
                       default=0,
                       help="value to use for the diameter")
    group.add_argument("-e",
                       "--exact",
                       action="store_true",
                       default=False,
                       help="use exact diameter")
    parser.add_argument(
        "-m",
        "--maxconn",
        action="store_true",
        default=False,
        help=
        "if the graph is not weakly connected, only save the largest connected component"
    )
    parser.add_argument("-p",
                        "--pickle",
                        action="store_true",
                        default=False,
                        help="use pickle reader for input file")
    parser.add_argument(
        "-s",
        "--samplesize",
        type=util.positive_int,
        default=0,
        help=
        "use specified sample size. Overrides epsilon, delta, and diameter computation"
    )
    parser.add_argument(
        "-t",
        "--timeout",
        type=util.positive_int,
        default=3600,
        help=
        "Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)"
    )
    parser.add_argument("-u",
                        "--undirected",
                        action="store_true",
                        default=False,
                        help="consider the graph as undirected ")
    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w",
                        "--write",
                        nargs="?",
                        default=False,
                        const="auto",
                        help="write graph (and computed attributes) to file.")
    parser.add_argument(
        "-l",
        "--weightFile",
        default="-",
        help=
        "random weights within the interval 0 to 1, must have as many entries as the number of edges"
    )

    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Seed the random number generator
    random.seed()

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    if args.exact:
        args.approximate = False

    # Read the weights
    weights_list = []
    if args.weightFile != "-":
        with open(args.weightFile, 'r') as weight_file:
            for line in weight_file:
                weights_list.append(float(line.strip()))

    # Compute betweenness
    if args.samplesize:
        (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write)
    else:
        if args.diameter > 0:
            (stats, betw) = betweenness(G, args.epsilon, args.delta,
                                        weights_list, args.diameter,
                                        args.write)
        else:
            (stats, betw) = betweenness(G, args.epsilon, args.delta,
                                        weights_list, args.approximate,
                                        args.write)

    # If specified, write betweenness as vertex attributes, and time as graph
    # attribute back to file
    if args.write:
        logging.info(
            "Writing betweenness as vertex attributes and stats as graph attribute"
        )
        if args.write == "auto":
            filename = os.path.splitext(args.graph)[0] + (
                "-undir" if args.undirected else "dir") + ".picklez"
            G.write(filename)
        else:
            G.write(args.write)

    # Write stats and betweenness to output
    util.write_to_output(stats, betw, args.output)
コード例 #14
0
ファイル: vc_sample.py プロジェクト: rionda/centrsampl
def main():
    """Parse arguments, call betweenness(), write to file."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute approximate betweenness centrality of all vertices in a graph using sampling and VC-dimension, and the time to compute them, and write them to file"
    parser.add_argument("epsilon", type=util.valid_interval_float,
            help="accuracy parameter")
    parser.add_argument("delta", type=util.valid_interval_float,
            help="confidence parameter")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-a", "--approximate", action="store_true",
            default=True, help="use approximate diameter (default)")
    group.add_argument("-d", "--diameter", type=util.positive_int, default=0,
            help="value to use for the diameter")
    group.add_argument("-e", "--exact", action="store_true", default=False,
            help="use exact diameter")
    parser.add_argument("-m", "--maxconn", action="store_true", default=False,
            help="if the graph is not weakly connected, only save the largest connected component")
    parser.add_argument("-p", "--pickle", action="store_true", default=False,
            help="use pickle reader for input file")
    parser.add_argument("-s", "--samplesize", type=util.positive_int,
            default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation")
    parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600,
            help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)")
    parser.add_argument("-u", "--undirected", action="store_true", default=False,
            help="consider the graph as undirected ")
    parser.add_argument("-v", "--verbose", action="count", default=0,
            help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w", "--write", nargs="?", default=False, const="auto",
            help="write graph (and computed attributes) to file.")
    parser.add_argument("-l", "--weightFile", default="-",
            help="random weights within the interval 0 to 1, must have as many entries as the number of edges")

    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Seed the random number generator
    random.seed()

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    if args.exact:
        args.approximate = False

    # Read the weights    
    weights_list=[]
    if args.weightFile != "-":
        with open(args.weightFile,'r') as weight_file:
            for line in weight_file:
                weights_list.append(float(line.strip()))

    # Compute betweenness
    if args.samplesize:
        (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write)
    else:
        if args.diameter > 0:
            (stats, betw) = betweenness(G, args.epsilon, args.delta,
                    weights_list, args.diameter, args.write)
        else:
            (stats, betw) = betweenness(G, args.epsilon, args.delta,
                    weights_list, args.approximate, args.write)

    # If specified, write betweenness as vertex attributes, and time as graph
    # attribute back to file
    if args.write:
        logging.info("Writing betweenness as vertex attributes and stats as graph attribute")
        if args.write == "auto":
            filename = os.path.splitext(args.graph)[0] + ("-undir" if args.undirected else "dir") + ".picklez"
            G.write(filename)
        else:
            G.write(args.write)

    # Write stats and betweenness to output
    util.write_to_output(stats, betw, args.output)
コード例 #15
0
def main():
    """Parse arguments, run experiments, collect results and stats, write to file."""
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "TODO"
    parser.add_argument("epsilon", type=util.valid_interval_float,
            help="accuracy parameter")
    parser.add_argument("delta", type=util.valid_interval_float,
            help="confidence parameter")
    parser.add_argument("runs", type=util.positive_int, default=20, help="number of runs")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-a", "--approximate", action="store_true",
            default=True, help="use approximate diameter (default)")
    group.add_argument("-d", "--diameter", type=util.positive_int, default=0,
            help="value to use for the diameter")
    group.add_argument("-e", "--exact", action="store_true", default=False,
            help="use exact diameter")
    parser.add_argument("-m", "--maxconn", action="store_true", default=False,
            help="if the graph is not weakly connected, only save the largest connected component")
    parser.add_argument("-p", "--pickle", action="store_true", default=False,
            help="use pickle reader for input file")
    parser.add_argument("-s", "--samplesize", type=util.positive_int,
            default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation")
    parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600,
            help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)")
    parser.add_argument("-u", "--undirected", action="store_true", default=False,
            help="consider the graph as undirected ")
    parser.add_argument("-v", "--verbose", action="count", default=0,
            help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w", "--weightFile", default="-",
            help="random weights within the interval 0 to 1, must have as many entries as the number of edges")

    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    if args.exact:
        args.approximate = False

    # Read the weights    
    weights_list=[]
    if args.weightFile != "-":
        with open(args.weightFile,'r') as weight_file:
            for line in weight_file:
                weights_list.append(float(line.strip()))

    # Perform experiment multiple times
    results = []
    for i in range(args.runs):
        logging.info("Run #%d", i)
        # Compute betweenness
        if args.samplesize:
            results.append(vc_sample.betweenness_sample_size(G,
                args.samplesize, False, args.timeout))
        else:
            if args.diameter > 0:
                results.append(vc_sample.betweenness(G, args.epsilon, args.delta,
                        weights_list, args.diameter, False, args.timeout))
            else:
                results.append(vc_sample.betweenness(G, args.epsilon, args.delta,
                        weights_list, args.approximate, False, args.timeout))

    # Compute aggregate statistics about the experiments
    stats = dict()
    stats["graph"]= os.path.basename(args.graph)
    stats["vertices"] = G.vcount()
    stats["edges"] = G.ecount()
    stats["runs"] = args.runs
    if args.samplesize:
        stats["sample_size"] = args.samplesize
    else:
        stats["delta"] = args.delta
        stats["epsilon"] = args.epsilon
        stats["sample_size"] = results[0][0]["sample_size"]

    stats_names = ["time", "forward_touched_edges", "backward_touched_edges"]
    if not args.samplesize:
        stats_names.append("diameter")
        stats_names.append("diameter_touched_edges")
    for stat_name in stats_names:
        values = sorted([x[0][stat_name] for x in results])
        stats[stat_name + "_max"] = values[-1]
        stats[stat_name + "_min"] = values[0]
        stats[stat_name + "_avg"] = sum(values) / args.runs
        if args.runs > 1:
            stats[stat_name + "_stddev"] = math.sqrt(sum([math.pow(value -
                stats[stat_name + "_avg"], 2) for value in values]) / (args.runs - 1))
        else:
            stats[stat_name + "_stddev"] = 0.0

    stats["betw_min"] = [0.0] * G.vcount()
    stats["betw_max"] = [0.0] * G.vcount()
    stats["betw_avg"] = [0.0] * G.vcount()
    for i in range(G.vcount()):
        betws = sorted([x[1][i] for x in results])
        stats["betw_min"][i]= betws[0]
        stats["betw_max"][i] = betws[-1]
        stats["betw_avg"][i] = sum(betws) / args.runs

    csvkeys="graph, runs, epsilon, delta, sample_size"
    csvkeys_names= ["{0}_avg, {0}_min, {0}_stddev, {0}_max, {0}_min".format(stat_name) 
            for stat_name in stats_names]
    csvkeys_list = [csvkeys] + csvkeys_names
    csvkeys = ",".join(csvkeys_list)
   # print(stats["betw_min"])   
    print(csvkeys)
    print(util.dict_to_csv(stats, csvkeys))
    # Write stats and results to output file
    try: 
        with open(args.output, "wb") as output:
            logging.info("Writing stats and results to %s", args.output)
            pickle.dump((stats, results), output)
            output.close()
            #pkl_file = open("vc_out.picklez", 'rb')
            #reader = pickle.load(pkl_file)
            #print(reader[0]["diameter_touched_edges_avg"])
    except OSError as E:
        logging.critical("Cannot write stats and results to %s: %s",
                args.output, E.strerror)
        sys.exit(2)
コード例 #16
0
ファイル: compare_exp.py プロジェクト: rionda/centrsampl
def main():
    """Parse arguments, do the comparison, write to output."""
    parser = argparse.ArgumentParser()
    parser.description = "compare estimation of betweenness centralities to exact values"
    parser.add_argument("epsilon", type=util.valid_interval_float, help="accuracy parameter")
    parser.add_argument("delta", type=util.valid_interval_float, help="confidence parameter")
    parser.add_argument("graph", help="graph file")
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-a", "--approximate", action="store_true",
            default=True, help="use approximate diameter when computing approximation of betweenness using VC-Dimension (default)")
    group.add_argument("-d", "--diameter", type=util.positive_int, default=0,
            help="value to use for the diameter")
    group.add_argument("-e", "--exact", action="store_true", default=False,
            help="use exact diameter when computing approximation of betweenness using VC-Dimension")
    parser.add_argument("-m", "--maxconn", action="store_true", default=False,
            help="if the graph is not weakly connected, only save the largest connected component")
    parser.add_argument("-p", "--pickle", action="store_true", default=False,
            help="use pickle reader for input file")
    parser.add_argument("-r", "--resultfiles", nargs=4, 
    help="Use results files rather than recomputing betweenness. Files should be specified as 'exact_res vc_res bp_res gss_res'")
    parser.add_argument("-s", "--samplesize", type=util.positive_int,
            default=0, help="use specified sample size. Overrides epsilon, delta, and diameter computation")
    parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600,
            help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)")
    parser.add_argument("-u", "--undirected", action="store_true", default=False,
            help="consider the graph as undirected ")
    parser.add_argument("-v", "--verbose", action="count", default=0,
            help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w", "--write", nargs="?", default=False, const="auto",
            help="write graph (and computed attributes) to file.")
    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Seed the random number generator
    random.seed()

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    if args.exact:
        args.approximate = False

    if not args.resultfiles:
        (exact_stats, exact_betw) = brandes_exact.betweenness(G, args.write,
                args.timeout)
        if args.samplesize: 
            (vc_stats, vc_betw) = vc_sample.betweenness_sample_size(G,
                    args.samplesize, args.write, args.timeout)
            (bp_stats, bp_betw) = brandespich_sample.betweenness_sample_size(G,
                    args.samplesize, args.write, args.timeout)
            (gss_stats, gss_betw) = geisbergerss_sample.betweenness_sample_size(G,
                    args.samplesize, args.write, args.timeout)
        else:
            if args.diameter > 0:
                (vc_stats, vc_betw) = vc_sample.betweenness(G, args.epsilon, args.delta,
                        args.diameter, args.write, args.timeout)
            else:
                (vc_stats, vc_betw) = vc_sample.betweenness(G, args.epsilon, args.delta,
                        args.approximate, args.write, args.timeout)

            (bp_stats, bp_betw) = brandespich_sample.betweenness(G,
                    args.epsilon, args.delta, args.write, args.timeout)
            (gss_stats, gss_betw) = geisbergerss_sample.betweenness(G,
                    args.epsilon, args.delta, args.write, args.timeout)
    else:
        (exact_stats, exact_betw) = util.read_stats_betw(args.result_files[0])
        (vc_stats, vc_betw) = util.read_stats_betw(args.result_files[1])
        (bp_stats, bp_betw) = util.read_stats_betw(args.result_files[2])
        (gss_stats, gss_betw) = util.read_stats_betw(args.result_files[3])

    #Compute useful graph statistics (mainly diameter)
    if "diam" not in G.attributes():
        diameter.diameter(G)

    # If specified, write betweenness as vertex attributes, and time and
    # diameter as graph attributes back to file

    if args.write:
        logging.info("Writing betweenness as vertex attributes and stats as graph attribute")
        if args.write == "auto":
            filename = os.path.splitext(args.graph)[0] + ("-undir" if args.undirected else "dir") + ".picklez"
            G.write(filename)
        else:
            G.write(args.write)

    # Compute error statistics
    # It is not a problem to sort the error by value because we only compute
    # aggregates.
    
    # Normalize
    #normalizer = math.pow(G.vcount(),2)-G.vcount() 
    #norm_exact_betw = [a/normalizer for a in exact_betw]
    #norm_vc_betw = [a/normalizer for a in vc_betw]
    #norm_bp_betw = [a/normalizer for a in bp_betw]
    #norm_gss_betw = [a/normalizer for a in gss_betw]

    #VC-STATISTICS
    logging.info("Computing error statistics")
    max_err = args.epsilon * G.vcount() * (G.vcount() - 1) / 2
    vc_errs = sorted([abs(a - b) for a,b in zip(exact_betw,vc_betw)])
    vc_stats["err_avg"] = sum(vc_errs) / G.vcount()
    vc_stats["err_max"] = vc_errs[-1]
    vc_stats["err_min"] = list(itertools.filterfalse(lambda x: x == 0, vc_errs))[0]
    vc_stats["err_stddev"] = math.sqrt(sum([math.pow(err - vc_stats["err_avg"], 2) for err in vc_errs]) / (G.vcount() -1))
    vc_stats["euc_dist"] = math.sqrt(sum([math.pow(a - b, 2) for a,b in zip(exact_betw,vc_betw)]))
    vc_stats["wrong_eps"] = 0;
    for i in range(G.vcount()):
        err = abs(exact_betw[i] - vc_betw[i])
        #if err > max_err:
            #vc_stats["wrong_eps"] += 1
            #if vc_stats["wrong_eps"] == 1:
                #print("## VC wrong epsilon ##")
            #print("{} {} {} {} {} {} {}".format(i, G.vs[i].degree(),
                #exact_betw[i], vc_betw[i], bp_betw[i],
                #err, err / (G.vcount() * (G.vcount() -1) / 2)))
    #BP-STATISTICS
    bp_errs = sorted([abs(a - b) for a,b in zip(exact_betw,bp_betw)])
    bp_stats["err_avg"] = sum(bp_errs) / G.vcount()
    bp_stats["err_max"] = max(bp_errs)
    bp_stats["err_min"] = list(itertools.filterfalse(lambda x: x == 0, bp_errs))[0]
    bp_stats["err_stddev"] = math.sqrt(sum([math.pow(err - bp_stats["err_avg"], 2) for err in bp_errs]) / (G.vcount() -1))
    bp_stats["euc_dist"] = math.sqrt(sum([math.pow(a - b, 2) for a,b in zip(exact_betw,bp_betw)]))
    bp_stats["wrong_eps"] = 0
    for i in range(G.vcount()):
        err = abs(exact_betw[i] - bp_betw[i])
        #if err > max_err:
            #bp_stats["wrong_eps"] += 1
            #if bp_stats["wrong_eps"] == 1:
                #print("## BP wrong epsilon ##")
            #print("{} {} {} {} {} {} {}".format(i, G.vs[i].degree(),
                 #exact_betw[i], bp_betw[i], vc_betw[i], err, err / (G.vcount() * (G.vcount() -1) / 2)))
    #GSS-STATISTICS
    gss_errs = sorted([abs(a - b) for a,b in zip(exact_betw,gss_betw)])
    gss_stats["err_avg"] = sum(gss_errs) / G.vcount()
    gss_stats["err_max"] = max(gss_errs)
    gss_stats["err_min"] = list(itertools.filterfalse(lambda x: x == 0, gss_errs))[0]
    gss_stats["err_stddev"] = math.sqrt(sum([math.pow(err - gss_stats["err_avg"], 2) for err in gss_errs]) / (G.vcount() -1))
    gss_stats["euc_dist"] = math.sqrt(sum([math.pow(a - b, 2) for a,b in zip(exact_betw,gss_betw)]))
    gss_stats["wrong_eps"] = 0
    for i in range(G.vcount()):
        err = abs(exact_betw[i] - gss_betw[i])
        #if err > max_err:
            #gss_stats["wrong_eps"] += 1
            #if gss_stats["wrong_eps"] == 1:
                #print("## GSS wrong epsilon ##")
            #print("{} {} {} {} {} {} {}".format(i, G.vs[i].degree(),
                 #exact_betw[i], gss_betw[i], vc_betw[i], err, err / (G.vcount() * (G.vcount() -1) / 2)))

    # Print statistics to output as CSV
    logging.info("Printing statistics")
    print("graph,nodes,edges,diam,directed,epsilon,delta,sample_size")
    print("{},{},{},{},{},{},{},{}".format(G["filename"], G.vcount(),
        G.ecount(), G["diam"], G.is_directed(), args.epsilon, args.delta,
        args.samplesize))
        
    #csvkeys="epsilon, delta, sample_size, time, wrong_eps, err_avg, err_max, err_min, err_stddev, forward_touched_edges, backward_touched_edges, diameter_touched_edges, euc_dist, diameter, diam_type"
    csvkeys="epsilon,delta,sample_size,time,wrong_eps,err_avg,err_stddev,forward_touched_edges,backward_touched_edges,diameter_touched_edges,euc_dist,diameter,diam_type"
    print("type,", csvkeys)
    print("vc,", util.dict_to_csv(vc_stats,csvkeys))
    print("bp,", util.dict_to_csv(bp_stats,csvkeys))
    print("gss,", util.dict_to_csv(gss_stats,csvkeys))
    print("exact,", util.dict_to_csv(exact_stats,csvkeys))
コード例 #17
0
def compute_stats(n, l0, d, prop_mispl, prop_neg, network_no, network_desc,
                         mystats, force=False):
    """This method computes all the implemented stats for the given signed network.
       
    :param n: int
    :type n: int
    :param l0: number of modules from which the underlying graph is created
    :type l0: int
    :param d: density
    :type d: float
    :param prop_mispl: proportion of misplaced links
    :type prop_mispl: float
    :param prop_neg: proportion of negative links 
    :type prop_neg: float
    :param network_no: network no
    :type network_no: int
    :param network_desc: network description, i.e. whether network is weighted or unweighted
    :type network_desc: str. One of them: SIGNED_UNWEIGHTED, SIGNED_WEIGHTED
    :param stats: graph related statistics, e.g. consts.STATS_SIGNED_TRIANGLES, consts.STATS_POS_NEG_RATIO
    :type stats: str list
    """
    network_folder = path.get_input_network_folder_path(n, l0, d, prop_mispl, prop_neg, network_no)
    network_path = os.path.join(network_folder, consts.SIGNED_UNWEIGHTED+".graphml")
    
    # we continue if the corresponding input network exists
    if os.path.exists(network_path):
        g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML)
    
        for stat_name in mystats:
            stat_folder_path = path.get_stat_folder_path(n, l0, d, prop_mispl, prop_neg,
                                                                 network_no, network_desc)
            print("computing stats: "+stat_name+" in "+stat_folder_path)
            os.makedirs(stat_folder_path, exist_ok=True)
        
            result_filename = stat_name+".csv"
            result_filepath = os.path.join(stat_folder_path,result_filename)
            if not os.path.exists(result_filepath) or force:
                result = None
                colnames = None 
                
                if stat_name == consts.STATS_NB_NODES:
                    result = [g.vcount()]
                    colnames = [consts.COL_NAMES[stat_name]]
                if stat_name == consts.STATS_SIGNED_TRIANGLES:
                    result = stats.str_balance.compute_signed_triangle_ratios(g)
                    result = [util.format_4digits(e) for e in result]
                    colnames = consts.COL_NAMES[stat_name]
                elif stat_name == consts.STATS_LARGEST_EIGENVALUE:
                    result = stats.spectral.retreive_largest_eigenvalue(g)
                    result = [util.format_4digits(result)]
                    colnames = [consts.COL_NAMES[stat_name]]
                elif stat_name == consts.STATS_POS_NEG_RATIO:
                    result = stats.structural.retreive_pos_neg_ratio(g)
                    result = [util.format_4digits(result)]
                    colnames = [consts.COL_NAMES[stat_name]]
                elif stat_name == consts.STATS_POS_PROP:
                    result = stats.structural.retreive_pos_prop(g)
                    result = [util.format_4digits(result)]
                    colnames = [consts.COL_NAMES[stat_name]]
                elif stat_name == consts.STATS_NEG_PROP:
                    result = stats.structural.retreive_neg_prop(g)
                    result = [util.format_4digits(result)]
                    colnames = [consts.COL_NAMES[stat_name]]
                    
                # write the result into file with its column name   
                result_filepath = os.path.join(stat_folder_path,result_filename)
                df = pd.DataFrame(data=result, index=colnames).transpose() # row vector
                df.to_csv(result_filepath, sep=",",quoting=1,index=False)
                
            else:
                print("already exists")        
コード例 #18
0
def main():
    """Parse arguments, call the approximation, write it to file."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute an approximation of the diameter of a graph and the time needed to compute it, and (if specified) write these info as a graph attributes"
    parser.add_argument("graph", help="graph file")
    parser.add_argument(
        "-i",
        "--implementation",
        choices=["homegrown", "igraph"],
        default="homegrown",
        help="use specified implementation of betweenness computation")
    parser.add_argument(
        "-m",
        "--maxconn",
        action="store_true",
        default=False,
        help=
        "if the graph is not weakly connected, only save the largest connected component"
    )
    parser.add_argument("-p",
                        "--pickle",
                        action="store_true",
                        default=False,
                        help="use pickle reader for input file")
    parser.add_argument("-u",
                        "--undirected",
                        action="store_true",
                        default=False,
                        help="consider the graph as undirected ")
    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument(
        "-w",
        "--write",
        action="store_true",
        default=False,
        help=
        "write the approximation of diameter of the graph as the 'approx_diameter' graph attribute and the time taken to compute it as the 'approx_diam_time' attribute"
    )
    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Seed the random number generator
    random.seed()

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected,
                              args.maxconn)  # Read graph from file

    # Compute the diameter
    (elapsed_time, diam) = diameter(G, args.implementation)

    # Print info
    print("{}, diameter={}, time={}".format(args.graph, diam, elapsed_time))

    # If requested, add graph attributes and write graph back to original file
    if args.write:
        logging.info("Writing diameter approximation and time to graph")
        G["approx_diam"] = diam
        G["approx_diam_time"] = elapsed_time
        # We use format auto-detection, which should work given that it worked
        # when we read the file
        G.write(args.graph)
コード例 #19
0
ファイル: visualize.py プロジェクト: Lesian/6808_lab4
from util import read_graph, visualize
actual = read_graph('actual1.graph')
a = read_graph('inferred1.graph')
b = read_graph('actual2.graph')
c = read_graph('inferred2.graph')
visualize('graph.svg', [actual, a, b, c], [], [], 4)
コード例 #20
0
def main():
    """Parse arguments and perform the computation."""

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Compute approximate betweenness centrality of all vertices in a graph using the algorihm by Brandes and Pich, and the time to compute them, and write them to file"
    parser.add_argument("epsilon",
                        type=util.valid_interval_float,
                        help="accuracy parameter")
    parser.add_argument("delta",
                        type=util.valid_interval_float,
                        help="confidence parameter")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    parser.add_argument(
        "-m",
        "--maxconn",
        action="store_true",
        default=False,
        help=
        "if the graph is not weakly connected, only save the largest connected component"
    )
    parser.add_argument("-p",
                        "--pickle",
                        action="store_true",
                        default=False,
                        help="use pickle reader for input file")
    parser.add_argument(
        "-s",
        "--samplesize",
        type=util.positive_int,
        default=0,
        help=
        "use specified sample size. Overrides epsilon, delta, and diameter computation"
    )
    parser.add_argument(
        "-t",
        "--timeout",
        type=util.positive_int,
        default=3600,
        help=
        "Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)"
    )
    parser.add_argument("-u",
                        "--undirected",
                        action="store_true",
                        default=False,
                        help="consider the graph as undirected ")
    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="increase verbosity (use multiple times for more verbosity)")
    parser.add_argument("-w",
                        "--write",
                        nargs="?",
                        default=False,
                        const="auto",
                        help="write graph (and computed attributes) to file.")

    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    # Compute betweenness
    if args.samplesize:
        (stats, betw) = betweenness_sample_size(G, args.samplesize, args.write,
                                                args.timeout)
    else:
        (stats, betw) = betweenness(G, args.epsilon, args.delta, args.write,
                                    args.timeout)

    # If specified, write betweenness as vertex attributes, and time as graph
    # attribute back to file
    if args.write:
        logging.info(
            "Writing betweenness as vertex attributes and stats as graph attribute"
        )
        if args.write == "auto":
            filename = os.path.splitext(args.graph)[0] + (
                "-undir" if args.undirected else "dir") + ".picklez"
            G.write(filename)
        else:
            G.write(args.write)

    # Write stats and betweenness to output
    util.write_to_output(stats, betw, args.output)
コード例 #21
0
ファイル: line.py プロジェクト: TIXhjq/Self-Learning
        for i, embedding in enumerate(embeddings):
            self.embeddings[idx2node[i]] = embedding

        return self.embeddings

    def transform(self):
        self.train()
        self.get_embedding()
        return self.embeddings


if __name__ == '__main__':
    from util import read_graph
    import os
    print(os.getcwd())
    Graph = read_graph('../wiki/Wiki_edgelist.txt')
    line = Line(
        Graph=Graph,
        dimension_size=128,
        per_vertex=100,
        walk_length=10,
        window_size=5,
        work=1,
        negative_ratio=1,
        batch_size=128,
        log_dir='logs/0/',
        epoch=100,
    )
    embeddings = line.transform()
    from evaluate import evaluate_tools
    tool = evaluate_tools(embeddings)
コード例 #22
0
##    print "LNG ERROR DISTRIBUTION"
##    for lng_e in sorted(n_by_lng_e.keys()):
##        print "%d\t%d" % (lng_e, n_by_lng_e[lng_e])

def output_result(filename, ans_loc_by_id, test_id_list):
    output = open(filename, "w")
    output.write("Id,Lat,Lon\n")
    id_list = sorted(test_id_list)
    for pid in id_list:
        lat, lng = ans_loc_by_id[pid]
        output.write("%d,%f,%f\n" % (pid, lat, lng))
    output.close()

if __name__ == "__main__":
    loc_by_id, info_by_id = read_loc_by_id("./data/posts-train.txt")
    graph = util.read_graph("./data/graph.txt")

    test_info_by_id = util.read_test_set("./data/posts-test-x.txt")
    info_by_id.update(test_info_by_id)
    test_id_list = test_info_by_id.keys()
    
    import time
    start = time.time()
    s = 0.7
    k = 50
#### exactly avg_avg
##    s = 0
##    k = 40000
        
    df = calculate_df(graph, loc_by_id, info_by_id)
    paras = make_invidx(graph, loc_by_id, df, info_by_id)
コード例 #23
0
ファイル: exact_experiment.py プロジェクト: rionda/centrsampl
def main():
    """Parse arguments, run experiments, collect results and stats, write to file."""
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Perform experiment to compute exact betweenness centrality of all vertices in a graph using Brandes' algorithm"
    parser.add_argument("runs", type=util.positive_int, default=20, help="number of runs")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    parser.add_argument("-m", "--maxconn", action="store_true", default=False,
            help="if the graph is not weakly connected, only save the largest connected component")
    parser.add_argument("-p", "--pickle", action="store_true", default=False,
            help="use pickle reader for input file")
    parser.add_argument("-t", "--timeout", type=util.positive_int, default=3600,
            help="Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)")
    parser.add_argument("-u", "--undirected", action="store_true", default=False,
            help="consider the graph as undirected ")
    parser.add_argument("-v", "--verbose", action="count", default=0, 
            help="increase verbosity (use multiple times for more verbosity)")
    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    # Perform experiment multiple times
    results = []
    for i in range(args.runs):
        logging.info("Run #%d", i)
        results.append(brandes_exact.betweenness(G, False, args.timeout))

    # Compute aggregate statistics about the experiments
    stats = dict(results[0][0])
    stats["graph"]= os.path.basename(args.graph)
    stats["vertices"] = G.vcount()
    stats["edges"] = G.ecount()
    stats["runs"] = args.runs
    del stats["time"]
    times = sorted([x[0]["time"] for x in results])
    stats["time_max"] = times[-1]
    stats["time_min"] = times[0]
    stats["time_avg"] = sum(times) / args.runs
    if args.runs > 1:
        stats["time_stddev"] = math.sqrt(sum([math.pow(time -
            stats["time_avg"], 2) for time in times]) / (args.runs - 1))
    else:
        stats["time_stddev"] = 0.0

    csvkeys="graph, runs, time_avg, time_stddev, time_max, time_min, forward_touched_edges, backward_touched_edges"
    print(csvkeys)
    print(util.dict_to_csv(stats, csvkeys))
    # Write stats and results to output file
    try: 
        with open(args.output, "wb") as output:
            logging.info("Writing stats and results to %s", args.output)
            pickle.dump((stats, results), output)
            output.close()
    except OSError as E:
        logging.critical("Cannot write stats and results to %s: %s",
                args.output, E.strerror)
        sys.exit(2)
コード例 #24
0
    # print (geneScores)
    R = R.sort_values(ascending=False)

    print (R)

    for idx, value in enumerate(R):
        R[idx] = abs(p/2 - idx + 1)

    return R


# export PYTHONPATH="/Users/csx/GitProject/sciMallNetworkScore:$PYTHONPATH"
if __name__ == "__main__":
    embPath = '/Users/csx/GitProject/sciMallNetworkScore/data/emb/test.emb'
    args = parse_args()
    G = read_graph(args)
    nodes = G.nodes
    pathWays = [set([32, 34, 3])]
    R = generateKSScore(embPath, list(nodes), p = len(nodes), n = 10, pathWays = pathWays, geneSets=set(nodes))
    for pathway in pathWays:
        Es = getEnrichStatisc(R, pathway, set(nodes), p = len(nodes))
        print ('pathway: 32 34 3 scores: ', Es)
    # print (G.nodes)
    # print (G.nodes)
	# geneSetsScore()





コード例 #25
0
def collect_features(n, l0, d, prop_mispl, prop_neg, network_no, network_desc,
                     centralities, stats):
    """This method collects all the indicated features, which are centrality measures
    and graph-related statistics (number of nodes, etc.).
       
    :param n: int
    :type n: int
    :param l0: number of modules from which the underlying graph is created
    :type l0: int
    :param d: density
    :type d: float
    :param prop_mispl: proportion of misplaced links
    :type prop_mispl: float
    :param prop_neg: proportion of negative links 
    :type prop_neg: float
    :param network_no: network no
    :type network_no: int
    :param network_desc: network description, i.e. whether network is weighted or unweighted
    :type network_desc: str. One of them: SIGNED_UNWEIGHTED, SIGNED_WEIGHTED
    :param centralities: centralities, e.g. consts.CENTR_DEGREE_NEG, consts.CENTR_DEGREE_POS, etc. 
    :type centralities: str list
    :param stats: graph related statistics, e.g. consts.STATS_SIGNED_TRIANGLES, consts.STATS_POS_NEG_RATIO
    :type stats: str list
    """
    features = pd.DataFrame([])

    network_folder = path.get_input_network_folder_path(
        n, l0, d, prop_mispl, prop_neg, network_no)
    network_path = os.path.join(network_folder,
                                consts.SIGNED_UNWEIGHTED + ".graphml")

    # we continue if the corresponding input network exists
    if os.path.exists(network_path):
        g = util.read_graph(network_path, consts.FILE_FORMAT_GRAPHML)

        stats_folder_path = path.get_stat_folder_path(n, l0, d, prop_mispl,
                                                      prop_neg, network_no,
                                                      network_desc)
        #print("..... collecting features in "+stats_folder_path)
        for stat_name in stats:
            result_filepath = os.path.join(stats_folder_path,
                                           stat_name + ".csv")
            if os.path.exists(result_filepath):
                df = pd.read_csv(os.path.join(stats_folder_path,
                                              stat_name + ".csv"),
                                 usecols=consts.COL_NAMES[stat_name])
                features = pd.concat([features, df], axis=1)

        # ===============================================================

        cent_folder_path = path.get_centrality_folder_path(
            n, l0, d, prop_mispl, prop_neg, network_no, network_desc)
        #print("..... collecting features in "+cent_folder_path)
        for centr_name in centralities:
            desc = consts.PREFIX_MEAN + centr_name
            result_filepath = os.path.join(cent_folder_path, desc + ".csv")
            if os.path.exists(result_filepath):
                df = pd.read_csv(result_filepath, usecols=[desc])
                features = pd.concat([features, df], axis=1)

            desc = consts.PREFIX_STD + centr_name
            result_filepath = os.path.join(cent_folder_path, desc + ".csv")
            if os.path.exists(result_filepath):
                df = pd.read_csv(result_filepath, usecols=[desc])
                features = pd.concat([features, df], axis=1)

    return features
コード例 #26
0
def main():
    """Parse arguments, run experiments, collect results and stats, write to file."""
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.description = "Perform experiment to compute exact betweenness centrality of all vertices in a graph using Brandes' algorithm"
    parser.add_argument("runs",
                        type=util.positive_int,
                        default=20,
                        help="number of runs")
    parser.add_argument("graph", help="graph file")
    parser.add_argument("output", help="output file")
    parser.add_argument(
        "-m",
        "--maxconn",
        action="store_true",
        default=False,
        help=
        "if the graph is not weakly connected, only save the largest connected component"
    )
    parser.add_argument("-p",
                        "--pickle",
                        action="store_true",
                        default=False,
                        help="use pickle reader for input file")
    parser.add_argument(
        "-t",
        "--timeout",
        type=util.positive_int,
        default=3600,
        help=
        "Timeout computation after specified number of seconds (default 3600 = 1h, 0 = no timeout)"
    )
    parser.add_argument("-u",
                        "--undirected",
                        action="store_true",
                        default=False,
                        help="consider the graph as undirected ")
    parser.add_argument(
        "-v",
        "--verbose",
        action="count",
        default=0,
        help="increase verbosity (use multiple times for more verbosity)")
    args = parser.parse_args()

    # Set the desired level of logging
    util.set_verbosity(args.verbose)

    # Read graph
    if args.pickle:
        G = util.read_graph(args.graph)
    else:
        G = converter.convert(args.graph, not args.undirected, args.maxconn)

    # Perform experiment multiple times
    results = []
    for i in range(args.runs):
        logging.info("Run #%d", i)
        results.append(brandes_exact.betweenness(G, False, args.timeout))

    # Compute aggregate statistics about the experiments
    stats = dict(results[0][0])
    stats["graph"] = os.path.basename(args.graph)
    stats["vertices"] = G.vcount()
    stats["edges"] = G.ecount()
    stats["runs"] = args.runs
    del stats["time"]
    times = sorted([x[0]["time"] for x in results])
    stats["time_max"] = times[-1]
    stats["time_min"] = times[0]
    stats["time_avg"] = sum(times) / args.runs
    if args.runs > 1:
        stats["time_stddev"] = math.sqrt(
            sum([math.pow(time - stats["time_avg"], 2)
                 for time in times]) / (args.runs - 1))
    else:
        stats["time_stddev"] = 0.0

    csvkeys = "graph, runs, time_avg, time_stddev, time_max, time_min, forward_touched_edges, backward_touched_edges"
    print(csvkeys)
    print(util.dict_to_csv(stats, csvkeys))
    # Write stats and results to output file
    try:
        with open(args.output, "wb") as output:
            logging.info("Writing stats and results to %s", args.output)
            pickle.dump((stats, results), output)
            output.close()
    except OSError as E:
        logging.critical("Cannot write stats and results to %s: %s",
                         args.output, E.strerror)
        sys.exit(2)
コード例 #27
0
ファイル: visualize.py プロジェクト: Lesian/6808_lab4
from util import read_graph, visualize
actual = read_graph('section3_graphs/inferred2.graph')
a = read_graph('section3_graphs/inferred1.graph')
visualize('inferred2.svg', [actual], [], [], 1)