Ejemplo n.º 1
0
def hrg_clique_tree (G):
  if G is None: return

  #  ------------------ ##
  #  tree decomposition
  #  ------------------ ##
  num_nodes = G.number_of_nodes()

  prod_rules = {}
  if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 300):
      T = td.quickbb(Gprime)
      root = list(T)[0]
      T = td.make_rooted(T, root)
      T = phrg.binarize(T)
      root = list(T)[0]
      root, children = T
      td.new_visit(T, G, prod_rules)
  else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    td.new_visit(T, G, prod_rules)

  # pprint.pprint (children)
  return root, children
Ejemplo n.º 2
0
def Hstar_Graphs_Ignore_Time(df, graph_name, tslices, axs):
    if len(df.columns) == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr='ts')
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    # force to unrepeated edgesA
    if 0: print nx.info(G)
    G = G.to_undirected()
    if 0: print nx.info(G)
    exit()
    # Derive the prod rules in a naive way, where
    prod_rules = PHRG.probabilistic_hrg_learning(G)
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    num_nodes = G.number_of_nodes()

    print "Starting max size"
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    num_samples = 20
    print '*' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        Hstars.append(hstar)

    # if 0:
    #   g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    #   draw_degree_whole_graph(g,axs)
    #   draw_degree(Hstars, axs=axs, col='r')
    #   #axs.set_title('Rules derived by ignoring time')
    #   axs.set_ylabel('Frequency')
    #   axs.set_xlabel('degree')

    if 1:
        # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd']
        metricx = ['eigen']
        g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
        # graph_name = os.path.basename(f_path).rstrip('.tel')
        print ">", graph_name
        metrics.network_properties([g],
                                   metricx,
                                   Hstars,
                                   name=graph_name,
                                   out_tsv=True)
Ejemplo n.º 3
0
def synth_plots():
    num_nodes = 100
    samples = 5

    chunglu_M = []
    kron_M = []
    HRG_M = []
    pHRG_M = []
    G_M = []

    for i in range(0, samples):
        ##BA Graph
        G = nx.erdos_renyi_graph(num_nodes, .1)
        G_M.append(G)

        for i in range(0, samples):
            chunglu_M.append(nx.expected_degree_graph(G.degree().values()))

        HRG_M_s, degree = HRG.stochastic_hrg(G, samples)
        HRG_M = HRG_M + HRG_M_s
        pHRG_M_s = PHRG.probabilistic_hrg(G, samples)
        pHRG_M = pHRG_M + pHRG_M_s
        for i in range(0, samples):
            P = kronfit(G)
            k = math.log(num_nodes, 2)
            kron_M.append(
                product.kronecker_random_graph(int(math.floor(k)),
                                               P,
                                               directed=False))

    metrics.draw_network_value(G_M, chunglu_M, HRG_M, pHRG_M, kron_M)
Ejemplo n.º 4
0
def derive_prules_from(list_of_graphs):
    lst_prod_rules = []
    for g in list_of_graphs:
        if g.number_of_nodes() > 0:
            pr = PHRG.probabilistic_hrg_deriving_prod_rules(g)
            lst_prod_rules.append(pr)
    return lst_prod_rules
Ejemplo n.º 5
0
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
  """
  Args:
    rules: production rules (model)
    gname: graph name
    n:     target graph order (number of nodes)
    runs:  how many graphs to generate
  Returns: list of synthetic graphs

  """
  if n <=0: sys.exit(1)

  print runs
  print n
  print gname
  for i,x in enumerate(prod_rules):
    print i,'  ', x[:1]
  
  g = pcfg.Grammar('S')
  for (id, lhs, rhs, prob) in prod_rules:
    g.add_rule(pcfg.Rule(id, lhs, rhs, prob))
  print '... pcfg.Grammar'

  g.set_max_size(n)
  print "Done with max size"

  if DBG: print '*' * 40
  hstars_lst = []
  for i in range(0, runs):
    rule_list = g.sample(n)
    print 'g.sample'
    hstar = PHRG.grow(rule_list, g)[0]
    hstars_lst.append(hstar)

  return hstars_lst
Ejemplo n.º 6
0
def Hstar_Graphs_Control(G, graph_name, axs):
    print '-', Hstar_Graphs_Control, '-'
    # Derive the prod rules in a naive way, where
    prod_rules = PHRG.probabilistic_hrg_learning(G)
    print prod_rules
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    num_nodes = G.number_of_nodes()

    print "Starting max size", 'n=', num_nodes
    g.set_max_size(num_nodes)

    print "Done with max size"

    Hstars = []

    num_samples = 20
    print '*' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        Hstars.append(hstar)

    # if 0:
    #   g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts'])
    #   draw_degree_whole_graph(g,axs)
    #   draw_degree(Hstars, axs=axs, col='r')
    #   #axs.set_title('Rules derived by ignoring time')
    #   axs.set_ylabel('Frequency')
    #   axs.set_xlabel('degree')

    if 1:
        # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd']
        metricx = ['degree', 'gcd']
        # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts'])
        # graph_name = os.path.basename(f_path).rstrip('.tel')
        if DBG: print ">", graph_name
        metrics.network_properties([G],
                                   metricx,
                                   Hstars,
                                   name=graph_name,
                                   out_tsv=True)
Ejemplo n.º 7
0
def main(argsD):
    runs = argsD['runs']
    print
    print 'dataset: {}\nruns: {},'.format(argsD['orig'][0], runs),
    G = read_load_graph(argsD['orig'][0])
    print "(V,E): {},{}".format(G.number_of_nodes(), G.number_of_edges())
    ## if metrix
    if argsD['netstats']:
        compute_netstats(G, G.name)
        exit(0)

    if argsD['peek']:
        compute_netstats_peek(G, G.name, piikshl=True)
        exit(0)

    ofname = "Results/" + G.name + ".shl"
    # if argsD['rods']: ofname = ofname.split(".")[0] + "_rods.shl"
    database = shelve.open(ofname)

    if argsD['rods']:
        print '% --> Control Rods'
        start_time = time.time()
        HRG_M, degree = HRG.stochastic_hrg(G, runs)
        print("  %d, %s seconds ---" %
              (G.number_of_nodes(), time.time() - start_time))
        database['rods_hstars'] = HRG_M
    else:
        print '% --> PHRG'
        start_time = time.time()
        A = PHRG.probabilistic_hrg(G, runs)  # returns a list of Hstar graphs
        # print("  --- Total %s seconds ---" % (time.time() - start_time))
        print("  %d, %s seconds ---" %
              (G.number_of_nodes(), time.time() - start_time))
        database['prob_hstars'] = A

    print
    start_time = time.time()
    print '% --> CHLU'
    clgs = []
    z = G.degree().values()
    for i in range(runs):
        clgs.append(nx.expected_degree_graph(z))
    database['clgs'] = clgs
    print("  %d, %s seconds ---" %
          (G.number_of_nodes(), time.time() - start_time))
    # -- Kron Prod Graphs
    print '% --> Kron'
    start_time = time.time()
    database['kpgs'] = grow_graphs_using_krongen(G, gn=G.name, nbr_runs=runs)
    print("  %d, %s seconds ---" %
          (G.number_of_nodes(), time.time() - start_time))

    database.close()

    return
Ejemplo n.º 8
0
def get_clique_tree(g):
  g.remove_edges_from(g.selfloop_edges())
  giant_nodes = max(nx.connected_component_subgraphs(g), key=len)
  g = nx.subgraph(g, giant_nodes)
  prod_rules = {}
  T = td.quickbb(G)

  root = list(T)[0]
  T = td.make_rooted(T, root)

  T = phrg.binarize(T)
  unfold_2wide_tuple(T)
  return
Ejemplo n.º 9
0
def synthetic_graph_generator(ref, graph_model):
    G = ref
    synth_graph = None
    n = ref.number_of_nodes()

    if 'hrg' in graph_model:
        prod_rules = PHRG.probabilistic_hrg_deriving_prod_rules(
            G)  # derive rules

        g = pcfg.Grammar('S')
        for (id, lhs, rhs, prob) in prod_rules:
            g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

        num_nodes = G.number_of_nodes()
        # print "Starting max size",'n=',num_nodes
        g.set_max_size(num_nodes)
        # print "Done with max size"
        Hstars = []
        rule_list = g.sample(num_nodes)
        synth_graph = PHRG.grow(rule_list, g)[0]

    return synth_graph
Ejemplo n.º 10
0
def Growing_Network_Using_Final_State_ProdRules(pddf, prod_rules, nSlices,
                                                kSlice, axs):
    '''
  Grow a synthetic graph up to the end of block kSlice using HRG rules
  from the final (whole) state of the graph.
        pddf: pandas df
  prod_rules: production rules learned on the entire graph
     nSlices: total number of blocks (pseudo-states of the graph)
      kSlice: the current slice
         axs: axes to plot to
  '''

    span = (pddf['ts'].max() - pddf['ts'].min()) / nSlices

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1))
    mask = (pddf['ts'] >= pddf['ts'].min()) & (
        pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1))
    ldf = pddf.loc[mask]

    G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts'])

    num_nodes = G.number_of_nodes()
    print "Starting max size"
    g.set_max_size(num_nodes)
    print "Done with max size"

    num_samples = 20
    print '*' * 40
    tdf = pd.DataFrame()
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        df = pd.DataFrame.from_dict(hstar.degree().items())
        #
        tdf = pd.concat([df.groupby([1]).count(),
                         df.groupby([1]).count()],
                        axis=1)

    tdf = tdf[0].mean(axis=1)
    tdf.plot(ax=axs, color='r', label='Orig')
    # Orig Graph
    tdf = pd.DataFrame.from_dict(G.degree().items())
    gb = tdf.groupby([1]).count()
    gb[0].plot(ax=axs, color='b', label='Orig')
    axs.set_xscale('log')
    '''
Ejemplo n.º 11
0
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
    """
  Args:
    rules: production rules (model)
    gname: graph name
    n:     target graph order (number of nodes)
    runs:  how many graphs to generate
  Returns: list of synthetic graphs

  """
    if n <= 0: sys.exit(1)

    # print runs
    # for i,x in enumerate(prod_rules):
    #   print i,'  ', x

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        rhs = [f[1:-1] for f in re.findall("'.+?'", rhs)]
        prob = float(prob)
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # # mask = (pddf['ts'] >= pddf['ts'].min()+ span*kSlice) & (pddf['ts'] < pddf['ts'].min()+ span*(kSlice +1))
    # mask = (pddf['ts'] >= pddf['ts'].min()) & (pddf['ts'] < pddf['ts'].min() + span * (kSlice + 1))
    # ldf = pddf.loc[mask]
    #
    # G = nx.from_pandas_dataframe(ldf, 'src', 'trg', ['ts'])
    #
    num_nodes = n
    if DBG: print "Starting max size"
    g.set_max_size(num_nodes)
    if DBG: print "Done with max size"
    #
    # num_samples = 20
    if DBG: print '*' * 40
    hstars_lst = []
    for i in range(0, runs):
        rule_list = g.sample(num_nodes)
        hstar = PHRG.grow(rule_list, g)[0]
        hstars_lst.append(hstar)

    # print rule_list

    return hstars_lst
Ejemplo n.º 12
0
def hstar_fixed_graph_gen(args):
    import networkx as nx

    orig_fname = args['grow'][0]
    gn = graph_name(orig_fname)
    if os.path.exists("../datasets/{}.p".format(gn)):
        origG = nx.read_gpickle("../datasets/{}.p".format(gn))
    else:
        print("we load edgelist into an nx.obj")

    prs_files = glob("../ProdRules/{}*prs".format(gn))
    for f in prs_files:
        prod_rules = get_prod_rules(f)
        g = pcfg.Grammar('S')
        for (id, lhs, rhs, prob) in prod_rules:
            # print (id, lhs, rhs, prob)
            g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

        # exit() # Takes this out
        # ToDo: We nee to get these rules in the right format

        num_nodes = origG.number_of_nodes()

        print "Starting max size"
        g.set_max_size(num_nodes)

        print "Done with max size"

        Hstars = []

        num_samples = 20
        print '*' * 40
        for i in range(0, num_samples):
            rule_list = g.sample(num_nodes)
            hstar = PHRG.grow(rule_list, g)[0]
            Hstars.append(hstar)
    import pickle
    pickle.dump({
        'origG': origG,
        'hstars': Hstars
    }, open('../Results/{}_hstars.p'.format(gn), "wb"))
    if os.path.exists('../Results/{}_hstars.p'.format(gn)):
        print("Pickle written")
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1):
    """
    Args:
        rules: production rules (model)
        gname: graph name
        n:		 target graph order (number of nodes)
        runs:	how many graphs to generate

    Returns: list of synthetic graphs

    """
    DBG = True
    if n <= 0: sys.exit(1)

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in prod_rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    print
    print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")"

    num_nodes = n
    if DBG: print "Starting max size ..."
    t_start = time.time()
    g.set_max_size(num_nodes)
    print "Done with max size, took %s seconds" % (time.time() - t_start)

    hstars_lst = []
    print "	",
    for i in range(0, runs):
        print '>',
        rule_list = g.sample(num_nodes)
        hstar = phrg.grow(rule_list, g)[0]
        hstars_lst.append(hstar)

    return hstars_lst
def get_hrg_production_rules(edgelist_data_frame,
                             graph_name,
                             tw=False,
                             n_subg=2,
                             n_nodes=300,
                             nstats=False):
    from growing import derive_prules_from

    t_start = time.time()
    df = edgelist_data_frame
    if df.shape[1] == 4:
        G = nx.from_pandas_dataframe(df, 'src', 'trg',
                                     edge_attr=True)  # whole graph
    elif df.shape[1] == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    G.name = graph_name
    print "==> read in graph took: {} seconds".format(time.time() - t_start)

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    phrg.graph_checks(G)

    if DBG: print
    if DBG: print "--------------------"
    if not DBG: print "-Tree Decomposition-"
    if DBG: print "--------------------"

    prod_rules = {}
    K = n_subg
    n = n_nodes
    if num_nodes >= 500:
        print 'Grande'
        t_start = time.time()
        for Gprime in gs.rwr_sample(G, K, n):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = phrg.binarize(T)
            root = list(T)[0]
            root, children = T
            # td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
            Process(target=td.new_visit, args=(
                T,
                G,
                prod_rules,
            )).start()
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = phrg.binarize(T)
        root = list(T)[0]
        root, children = T
        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

        print_treewidth(T)
        exit()

    if DBG: print
    if DBG: print "--------------------"
    if DBG: print "- Production Rules -"
    if DBG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DBG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DBG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DBG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    df = pd.DataFrame(rules)
    '''print "++++++++++"
    df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t")
    if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): 
        print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name)
    else:
        print "Trouble saving"
    print "-----------"
    print [type(x) for x in rules[0]] '''
    '''
    Graph Generation of Synthetic Graphs
    Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the 
    original graph
    '''
    hStars = grow_exact_size_hrg_graphs_from_prod_rules(
        rules, graph_name, G.number_of_nodes(), 10)
    print '... hStart graphs:', len(hStars)
    d = {graph_name + "_hstars": hStars}
    with open(r"Results/{}_hstars.pickle".format(graph_name),
              "wb") as output_file:
        cPickle.dump(d, output_file)
    if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)):
        print "File saved"
    '''if nstats:
Ejemplo n.º 15
0
G = nx.subgraph(G, giant_nodes)
num_nodes = G.number_of_nodes()

graph_checks(G)

print
print "--------------------"
print "-Tree Decomposition-"
print "--------------------"

if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 100):
        T = td.quickbb(Gprime)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = hrg.binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)
else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = hrg.binarize(T)
    root = list(T)[0]
    root, children = T
    td.new_visit(T, G, prod_rules)


def flatten(tup):
    if type(tup) == frozenset:
Ejemplo n.º 16
0
def sampled_subgraphs_cliquetree(orig, tree_path):
    files = glob(tree_path + "*.dimacs.tree")
    prod_rules = {}
    graph_name = orig

    for fname in files:
        print '... input file:', fname

        df = Pandas_DataFrame_From_Edgelist([orig])[0]
        if df.shape[1] == 3:
            G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])
        else:
            G = nx.from_pandas_dataframe(df, 'src', 'trg')
        print nx.info(G)

        with open(fname, 'r') as f:  # read tree decomp from inddgo
            lines = f.readlines()
            lines = [x.rstrip('\r\n') for x in lines]

        cbags = {}
        bags = [x.split() for x in lines if x.startswith('B')]

        for b in bags:
            cbags[int(b[1])] = [int(x)
                                for x in b[3:]]  # what to do with bag size?

        edges = [x.split()[1:] for x in lines if x.startswith('e')]
        edges = [[int(k) for k in x] for x in edges]

        tree = defaultdict(set)
        for s, t in edges:
            tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
            if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())
        if DEBUG: print tree.keys()
        # root = list(tree)[0]
        root = frozenset(cbags[1])
        if DEBUG: print '.. Root:', root
        T = td.make_rooted(tree, root)
        if DEBUG: print '.. T rooted:', len(T)
        # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

        T = phrg.binarize(T)
        td.new_visit(
            T, G,
            prod_rules)  # ToDo: here is where something funny is goin on.

        if DEBUG: print "--------------------"
        if DEBUG: print "- Production Rules -"
        if DEBUG: print "--------------------"

        for k in prod_rules.iterkeys():
            if DEBUG: print k
            s = 0
            for d in prod_rules[k]:
                s += prod_rules[k][d]
            for d in prod_rules[k]:
                prod_rules[k][d] = float(prod_rules[k][d]) / float(
                    s)  # normailization step to create probs not counts.
                if DEBUG: print '\t -> ', d, prod_rules[k][d]
        print '... prod_rules size', len(prod_rules.keys())

    #  - production rules number -
    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])

            sid += 1
        id += 1

    df = pd.DataFrame(rules)

    print graph_name
    graph_name = os.path.basename(graph_name)
    print graph_name
    outdf_fname = "./ProdRules/" + graph_name + ".prules"
    if not os.path.isfile(outdf_fname + ".bz2"):
        print '...', outdf_fname, "written"
        df.to_csv(outdf_fname + ".bz2", compression="bz2")
    else:
        print '...', outdf_fname, "file exists"

    return
Ejemplo n.º 17
0
def dimacs_td_ct(tdfname):
    """ tree decomp to clique-tree """

    print '... input file:', tdfname
    fname = tdfname
    graph_name = os.path.basename(fname)
    gname = graph_name.split('.')[0]
    gfname = "datasets/out." + gname
    tdh = os.path.basename(fname).split('.')[1]  # tree decomp heuristic
    tfname = gname + "." + tdh

    G = load_edgelist(gfname)

    if DEBUG: print nx.info(G)
    print
    with open(fname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())
    if DEBUG: print tree.keys()
    root = list(tree)[0]
    if DEBUG: print '.. Root:', root
    root = frozenset(cbags[1])
    if DEBUG: print '.. Root:', root
    T = td.make_rooted(tree, root)
    if DEBUG: print '.. T rooted:', len(T)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)

    prod_rules = {}
    td.new_visit(T, G, prod_rules)

    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])

            sid += 1
        id += 1

    df = pd.DataFrame(rules)

    outdf_fname = "./ProdRules/" + tfname + ".prules"
    if not os.path.isfile(outdf_fname + ".bz2"):
        print '...', outdf_fname, "written"
        df.to_csv(outdf_fname + ".bz2", compression="bz2")
    else:
        print '...', outdf_fname, "file exists"
    return
Ejemplo n.º 18
0
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname=""):
	""""
	orig: path to original/refernce input graph
	tdfname: path fragment for a set of td pro rules
	gname: graph name (str)
	returns:
    """

	# if whole tree path
	# else, assume a path fragment
	print '... input graph  :', os.path.basename(orig)
	print '... td path frag :', tdfname

	G = load_edgelist(orig)  # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks

	G.name = gname

	files = glob(tdfname + "*.dimacs.tree")
	prod_rules = {}
	stacked_df = pd.DataFrame()

	mat_dict = {}
	for i, x in enumerate(sorted(files)):
		mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i
		if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]

	for tfname in sorted(files):
		tname = os.path.basename(tfname).split(".")
		tname = "_".join(tname[:2])

		with open(tfname, 'r') as f:  # read tree decomp from inddgo
			lines = f.readlines()
			lines = [x.rstrip('\r\n') for x in lines]

		cbags = {}
		bags = [x.split() for x in lines if x.startswith('B')]

		for b in bags:
			cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

		edges = [x.split()[1:] for x in lines if x.startswith('e')]
		edges = [[int(k) for k in x] for x in edges]

		tree = defaultdict(set)
		for s, t in edges:
			tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
			if DBG: print '.. # of keys in `tree`:', len(tree.keys())

		root = list(tree)[0]
		root = frozenset(cbags[1])
		T = td.make_rooted(tree, root)
		# nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

		T = phrg.binarize(T)
		# root = list(T)[0]
		# root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		# print ">>",len(T)

		td.new_visit(T, G, prod_rules)
		from json import dumps
		# print dumps(prod_rules, indent=4, sort_keys=True)

		for k in prod_rules.iterkeys():
			if DBG: print k
			s = 0
			for d in prod_rules[k]:
				s += prod_rules[k][d]
			for d in prod_rules[k]:
				prod_rules[k][d] = float(prod_rules[k][d]) / float(s)  # normailization step to create probs not counts.
				if DBG: print '\t -> ', d, prod_rules[k][d]

		if DBG: print "--------------------"
		if DBG: print '- Prod. Rules'
		if DBG: print "--------------------"
		rules = []
		# print dumps(prod_rules, indent=4, sort_keys=True)

		id = 0
		for k, v in prod_rules.iteritems():
			sid = 0
			for x in prod_rules[k]:
				rhs = re.findall("[^()]+", x)
				rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
				if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]
				sid += 1
			id += 1

		df = pd.DataFrame(rules)
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])
		# print df.shape
	print "\nStacked prod rules\n", "~" * 20
	print "  ", stacked_df.shape
	if args['verb']: print stacked_df.to_string()
	stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t")
	if os.path.exists(
		"../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname)

	print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20
	stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name]
	iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df)
	print "  ", iso_union.shape
	if args['verb']: print iso_union.to_string()

	print "\nIsomorphic intersection of the prod rules\n", "~" * 20
	print "  ", iso_interx.shape
	iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname))
	if os.path.exists(
		'../Results/{}_isom_interxn.tsv'.format(gname)): print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
Ejemplo n.º 19
0
def dimacs_td_ct_fast(oriG, tdfname):
    """ tree decomp to clique-tree
	parameters:
	   orig:	filepath to orig (input) graph in edgelist
	tdfname:	filepath to tree decomposition from INDDGO
	 synthg:	when the input graph is a syth (orig) graph
	Todo:
		currently not handling sythg in this version of dimacs_td_ct
	"""
    G = oriG
    if G is None:
        return (1)

    prod_rules = {}

    t_basename = os.path.basename(tdfname)
    out_tdfname = os.path.basename(t_basename) + ".prs"
    if os.path.exists("../ProdRules/" + out_tdfname):
        # print "==> exists:", out_tdfname
        return out_tdfname
        # else:
        #   print ("create folder ../ProdRules")

    print "../ProdRules/" + out_tdfname, tdfname

    with open(tdfname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())

    root = list(tree)[0]
    root = frozenset(cbags[1])
    T = td.make_rooted(tree, root)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    # td.new_visit(T, G, prod_rules, TD)
    # print ">>",len(T)

    print type(G)
    exit()
    td.new_visit(T, G, prod_rules)

    if 0: print "--------------------"
    if 0: print "- Production Rules -"
    if 0: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if 0:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    # print rules
    if 0: print "--------------------"
    if 0: print '- P. Rules', len(rules)
    if 0: print "--------------------"

    # ToDo.
    # Let's save these rules to file or print proper
    # write_prod_rules_to_tsv(rules, out_tdfname)

    # g = pcfg.Grammar('S')
    # for (id, lhs, rhs, prob) in rules:
    #	g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # Synthetic Graphs
    #	hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20)
    #	# metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen'
    #	metricx = ['gcd','avgdeg']
    #	metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True)

    return out_tdfname
Ejemplo n.º 20
0
def gcd():
    num_nodes = 1000

    ba_G = nx.barabasi_albert_graph(num_nodes, 3)
    er_G = nx.erdos_renyi_graph(num_nodes, .1)
    ws_G = nx.watts_strogatz_graph(num_nodes, 8, .1)
    nws_G = nx.newman_watts_strogatz_graph(num_nodes, 8, .1)

    graphs = [ba_G, er_G, ws_G, nws_G]

    samples = 50

    for G in graphs:
        chunglu_M = []
        for i in range(0, samples):
            chunglu_M.append(nx.expected_degree_graph(G.degree()))

        HRG_M, degree = HRG.stochastic_hrg(G, samples)
        pHRG_M = PHRG.probabilistic_hrg(G, samples)
        kron_M = []
        rmat_M = []
        for i in range(0, samples):
            P = kronfit(G)
            k = math.log(num_nodes, 2)
            kron_M.append(
                product.kronecker_random_graph(int(math.floor(k)),
                                               P,
                                               directed=False))

        df_g = metrics.external_rage(G)
        gcd_chunglu = []
        gcd_phrg = []
        gcd_hrg = []
        gcd_kron = []
        for chunglu_M_s in chunglu_M:
            df_chunglu = metrics.external_rage(chunglu_M_s)
            rgfd = metrics.tijana_eval_rgfd(df_g, df_chunglu)
            gcm_g = metrics.tijana_eval_compute_gcm(df_g)
            gcm_h = metrics.tijana_eval_compute_gcm(df_chunglu)
            gcd_chunglu.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h))
        for HRG_M_s in HRG_M:
            df_hrg = metrics.external_rage(HRG_M_s)
            rgfd = metrics.tijana_eval_rgfd(df_g, df_hrg)
            gcm_g = metrics.tijana_eval_compute_gcm(df_g)
            gcm_h = metrics.tijana_eval_compute_gcm(df_hrg)
            gcd_hrg.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h))
        for pHRG_M_s in pHRG_M:
            df_phrg = metrics.external_rage(pHRG_M_s)
            rgfd = metrics.tijana_eval_rgfd(df_g, df_phrg)
            gcm_g = metrics.tijana_eval_compute_gcm(df_g)
            gcm_h = metrics.tijana_eval_compute_gcm(df_phrg)
            gcd_phrg.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h))
        for kron_M_s in kron_M:
            df_kron = metrics.external_rage(kron_M_s)
            rgfd = metrics.tijana_eval_rgfd(df_g, df_kron)
            gcm_g = metrics.tijana_eval_compute_gcm(df_g)
            gcm_h = metrics.tijana_eval_compute_gcm(df_kron)
            gcd_kron.append(metrics.tijana_eval_compute_gcd(gcm_g, gcm_h))

        print gcd_chunglu
        print gcd_hrg
        print gcd_phrg
        print gcd_kron
        print
        print