Ejemplo n.º 1
0
def subgraphs_exploding_trees(orig, sub_graph_lst):
    # edl -> dimacs -> treeX -> CliqueTreeX
    # union the prod rules?
    prs_paths_lst = []
    for sbg_edl_fname in sub_graph_lst:
        dimacsFname = edgelist_to_dimacs(sbg_edl_fname)  # argsd['orig'][0])
        varElimLst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']

        ##
        # dict where values are the file path of the written trees
        dimacsTrees_d = tree_decomposition_with_varelims(
            dimacsFname, varElimLst)
        trees_lst = []
        for x in dimacsTrees_d.itervalues():
            [trees_lst.append(f[0]) for f in x]

        ##
        # to HRG Clique Tree, in stacked pd df form / returns indiv. filenames
        prs_paths_lst.append(
            convert_dimacs_tree_objs_to_hrg_clique_trees(orig, trees_lst))

    ##
    # stack production rules // returns an array of k (2) //
    prs_stacked_dfs = [
        get_hrg_prod_rules(multi_paths_lst)
        for multi_paths_lst in prs_paths_lst
    ]

    if len(prs_stacked_dfs) == 2:
        prs_stacked_df = pd.concat([prs_stacked_dfs[0], prs_stacked_dfs[1]])
    gb = prs_stacked_df.groupby(['cate']).groups.keys()

    ##
    # Jaccard Similarity
    get_isom_overlap_in_stacked_prod_rules(gb, prs_stacked_df)

    ##
    # isomorph_intersection_2dfstacked
    iso_union, iso_interx = isoint.isomorph_intersection_2dfstacked(
        prs_stacked_df)
    gname = graph_name(orig)
    iso_interx[[1, 2, 3,
                4]].to_csv('Results/{}_isom_interxn.tsv'.format(gname),
                           sep="\t",
                           header=False,
                           index=False)
    if os.path.exists('Results/{}_isom_interxn.tsv'.format(gname)):
        print("\t", 'Written:', 'Results/{}_isom_interxn.tsv'.format(gname))
        print(
            "\t",
            'Next step is to generate graphs using this subsect of production rules.'
        )
    else:
        print("!!Unable to savefile")

    print("Done")
    print(gb, "\n---------------<>---------------<>---------------")
    exit()
    '''
Ejemplo n.º 2
0
def transform_edgelist_to_dimacs(files, f_str=None):
    """
	edgelist to dimacs graph format
	:param files: list of file path(s)
	:return:
	"""
    Info("edgelist to dimacs graph format\n" + "-" * 40)
    rslt = []
    for f in files:
        g = nx.read_gpickle(f)
        g.name = graph_name(f)
        rslt.append(convert_nx_gObjs_to_dimacs_gObjs([g]))

    return rslt
Ejemplo n.º 3
0
def synth_checks_network_metrics(orig_graph):
    gname = graph_name(orig_graph)
    files = glob("./FakeGraphs/" + gname + "*")
    shl_db = shelve.open(files[0])  # open for read
    origG = load_edgelist(orig_graph)
    print("%%")
    print("%%", gname)
    print("%%")

    for k in shl_db.keys():
        synthGs = shl_db[k]
        # print synthGs[0].number_of_edges(), synthGs[0].number_of_nodes()

        metricx = ['degree']
        metrics.network_properties([origG],
                                   metricx,
                                   synthGs,
                                   name="hstars_" + origG.name,
                                   out_tsv=False)

    shl_db.close()
Ejemplo n.º 4
0
def ref_graph_largest_conn_componet(fname):
    df = Pandas_DataFrame_From_Edgelist([fname])[0]
    G = nx.from_pandas_dataframe(df, source='src', target='trg')
    Gc = max(nx.connected_component_subgraphs(G), key=len)
    gname = graph_name(fname)
    num_nodes = Gc.number_of_nodes()
    subg_fnm_lst = []

    ## sample of the graph larger than 500 nodes
    if num_nodes >= 500:
        cnt = 0
        for Gprime in gs.rwr_sample(G, 2, 300):
            subg_fnm_lst.append('.{}_lcc_{}.edl'.format(gname, cnt))
            try:
                nx.write_edgelist(Gprime,
                                  '.{}_lcc_{}.edl'.format(gname, cnt),
                                  data=False)
                cnt += 1
            except Exception, e:
                print(str(e), '\n!!Error writing to disk')
                return ""
Ejemplo n.º 5
0
def new_main(args):
    if not (args['base'] is None):
        Info("<- converts to dimacs")
        gn = graph_name(args['base'][0])
        f = "../datasets/" + gn + "*.p"
        files = glob(f)
        dimacs_lst = transform_edgelist_to_dimacs(files)
        results = []
        trees = explode_to_trees(dimacs_lst, results)

        pp.pprint(files)
        pp.pprint(dimacs_lst)
        pp.pprint(trees)
        print
        pp.pprint(results)
        exit(0)
    elif not (args['orig'] is None):
        Info("<- converts edgelist gpickle")
        f = args['orig'][0]
        g = load_edgelist(f)  # full graph
        Info("# of conn comp: %d" %
             len(list(nx.connected_component_subgraphs(g))))
        g = largest_conn_comp(f)  # largerst conn comp
        if isinstance(g, list):
            for k, Gprime in enumerate(g):
                subg_out_fname = max(graph_name(f).split("."), key=len)
                subg_out_fname = "../datasets/" + subg_out_fname
                subg_out_fname += "_{}.p".format(k)
                cc_fname = nx.write_gpickle(
                    Gprime, subg_out_fname)  # subgraph to temp edgelist
                if os.path.exists(subg_out_fname):
                    Info("Wrote %s" % subg_out_fname)
        else:
            subg_out_fname = max(graph_name(f).split("."), key=len)
            subg_out_fname = "../datasets/" + subg_out_fname
            subg_out_fname += ".p"
            cc_fname = nx.write_gpickle(g, subg_out_fname)
            if os.path.exists(subg_out_fname):
                Info("Wrote %s" % subg_out_fname)
        print("done")
        exit()
    elif not (args['edgelist2dimacs'] is None):
        f = args['edgelist2dimacs'][0]
        pfname = graph_name(f)
        pfname = "../datasets/{}.p".format(pfname)
        if not os.path.exists(pfname):
            Info("File not found, please run:")
            Info("  python explodingTree.py --orig path/to/edgelist")
        G = load_edgelist(f)
        subgraph = max(nx.connected_component_subgraphs(G), key=len)
        gprime_lst = []
        if subgraph.number_of_nodes() > 500:
            for j, Gprime in enumerate(gs.rwr_sample(subgraph, 2, 300)):
                Gprime.name = G.name + "_%d" % j
                gprime_lst.append(convert_graph_obj_2dimacs([Gprime]))
            print[x for x in gprime_lst]

    elif not (args['prules'] is None):
        gn = graph_name(args['prules'][0])
        print gn
        f = "../datasets/" + gn + "*.tree"
        files = glob(f)
        f = "../datasets/" + gn + "*.p"
        graphs = glob(f)
        for g in graphs:
            for f in files:
                dimacs_td_ct_fast(g, f)  # dimacs to tree (decomposition)
        exit(0)
    elif not (args['td'] is None):
        origG = args['td'][0]
        dimacs_f = glob("../datasets/" + graph_name(args['td'][0]) +
                        "*.dimacs")
        ''' "Explode to trees" '''  # ToDo
        var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
        for j, f in enumerate(dimacs_f):
            print f
            gn = graph_name(f)
            dimacs_file = "../datasets/{}.dimacs".format(gn)
            p = mp.Pool(processes=2)
            for vael in var_els:
                p.apply_async(dimacs_nddgo_tree_simple,
                              args=(
                                  dimacs_file,
                                  vael,
                              ),
                              callback=collect_results_trees)
            # xt.dimacs_nddgo_tree_simple(f, vael)
        p.close()
        p.join()

        # dimacs_td_ct_fast(oriG, tdfname) # dimacs to tree (decomposition)
    else:
        sys.exit(0)

        #	dimacs_convert_orig_graph(args['orig'])
        pickle_fname = "../datasets/" + f + ".p"
        g = nx.read_gpickle(pickle_fname)
        subgraph = max(nx.connected_component_subgraphs(g), key=len)
        if subgraph.number_of_nodes() > 500:
            for Gprime in gs.rwr_sample(subgraph, 2, 300):
                edgelist_in_dimacs_out(Gprime)
Ejemplo n.º 6
0
def xplodingTree(argsd):
    """
	Run a full set of tests.

	explodingTree.py Flaship function to run functions for a complete test

	Parameters
	----------
	arg1 : dict
	Passing the whole se of args needed

	Returns
	-------
	None

	"""
    sub_graphs_fnames_lst = ref_graph_largest_conn_componet(
        argsd['orig'][0])  # max largest conn componet
    if len(sub_graphs_fnames_lst) > 1:
        print('process subgraphs from sampling')
        print(sub_graphs_fnames_lst)
        subgraphs_exploding_trees(argsd['orig'][0], sub_graphs_fnames_lst)
        exit()

    dimacsFname = edgelist_to_dimacs(
        sub_graphs_fnames_lst[0])  # argsd['orig'][0])
    varElimLst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']

    ##
    # dict where values are the file path of the written trees
    dimacsTrees_d = tree_decomposition_with_varelims(dimacsFname, varElimLst)
    trees_lst = []
    for x in dimacsTrees_d.itervalues():
        [trees_lst.append(f[0]) for f in x]

    ##
    # to HRG Clique Tree, in stacked pd df form / returns indiv. filenames
    prs_paths_lst = convert_dimacs_tree_objs_to_hrg_clique_trees(
        argsd['orig'][0], trees_lst)

    ##
    # stack production rules
    prs_stacked_df = get_hrg_prod_rules(prs_paths_lst)
    gb = prs_stacked_df.groupby(['cate']).groups.keys()

    ##
    # Jaccard Similarity
    get_isom_overlap_in_stacked_prod_rules(gb, prs_stacked_df)

    ##
    # isomorph_intersection_2dfstacked
    iso_union, iso_interx = isoint.isomorph_intersection_2dfstacked(
        prs_stacked_df)
    gname = graph_name(argsd['orig'][0])
    iso_interx[[1, 2, 3,
                4]].to_csv('Results/{}_isom_interxn.tsv'.format(gname),
                           sep="\t",
                           header=False,
                           index=False)
    if os.path.exists('Results/{}_isom_interxn.tsv'.format(gname)):
        print("\t", 'Written:', 'Results/{}_isom_interxn.tsv'.format(gname))
        print(
            "\t",
            'Next step is to generate graphs using this subsect of production rules.'
        )
    else:
        print("!!Unable to savefile")

    print("Done")
    exit()

if __name__ == '__main__':
	if len (sys.argv) < 2:
		Info ("Usage:")
		Info ("python xplotree_subgraphs_prs.py path/to/orig_net_edgelist")
		sys.exit (1)
	elif sys.argv[1] == "-ut":
		fname = "/Users/sal.aguinaga/KynKon/datasets/out.as20000102"
	else:
		fname = sys.argv[1]

	if not os.path.exists (fname):
		Info ("Path to edgeslits does not exists.")
		sys.exit (1)
	gn = graph_name (fname)
	prsfname = '../ProdRules/{}.tsv.phrg.prs'.format(gn)
	if os.path.exists(prsfname):
		Info('{} already exists'.format(prsfname))
		sys.exit(0)
	og = load_edgelist (fname)
	og.name = gn
	# sgp = glob("../datasets/"+ gn + "*.p" )

	print ("--")
	print ("-- derive subgraphs")
	print ("--")

	Info ("sample 2 subg of 300 nodes and derive the set of production rules")

	prod_rules = get_sampled_gpickled_graphs(og)
Ejemplo n.º 8
0
# 	df = recompute_probabilities(staked_prs_df) # from core.baseball
# 	# test if stacked prs can fire
# 	stck_fired = probe_stacked_prs_likelihood_tofire(df, graph_name(f), el_base_info_d[graph_name(f)])
# 	print (stck_fired)
# 	#
# 	break

if __name__ == '__main__':
    import sys
    from core.utils import graph_name

    if len(sys.argv) < 2:
        Info("add an out.* dataset with its full path")
        exit()
    f = sys.argv[1]
    f = "../datasets/" + graph_name(f) + "*.tree"
    ftrees = glob(f)

    orig = sys.argv[
        1]  #"/Users/sal.aguinaga/KynKon/datasets/out.karate_club_graph"
    from core.utils import graph_name
    import networkx as nx

    gn = graph_name(orig)
    f = "../datasets/" + gn + "*.p"
    results = []
    for p in glob(f):
        pp.pprint(p)
        g = nx.read_gpickle(p)
        for tf in ftrees:
            print("\t"), tf