def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-g', '--graph', help='graph name')
    parser.add_argument('-s', '--graph_suffix', default='', help='')
    parser.add_argument('-w', '--weighted', action='store_true', help='')
    parser.add_argument('-r', '--only_reserve', action='store_true', help='')
    parser.add_argument('-n', '--only_normalize', action='store_true', help='')
    parser.add_argument('-o', '--output_path', help='')
    
    args = parser.parse_args()
    
    g = load_graph_by_name(args.graph,
                           weighted=args.weighted,
                           suffix=args.graph_suffix)
    if args.only_reserve:
        print('only_reserve')
        new_g = reverse_edge_weights(g)
    elif args.only_normalize:
        print('only normlize')
        new_g = normalize_globally(g)
    else:
        new_g = preprocess(g)

    new_g.save(args.output_path)
    print('saved to {}'.format(args.output_path))
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-g', '--graph', help='graph name')
    parser.add_argument('-d',
                        '--to_directed',
                        action='store_true',
                        help='if make directed or not')
    parser.add_argument('--p_min',
                        default=0.0,
                        type=float,
                        help='lower bound for edge weight')
    parser.add_argument('--p_max',
                        default=1.0,
                        type=float,
                        help='upper bound for edge weight')
    parser.add_argument('-o', '--output')

    args = parser.parse_args()
    g = load_graph_by_name(args.graph)
    remove_self_loops(g)

    if args.to_directed:
        g.set_directed(True)
        edges_iter = list(g.edges())
        for e in edges_iter:
            g.add_edge(e.target(), e.source())

    weights = g.new_edge_property('float')
    weights.a = np.random.random(
        g.num_edges()) * (args.p_max - args.p_min) + args.p_min

    g.edge_properties["weights"] = weights

    g.graph_properties['p_min'] = g.new_graph_property("float", args.p_min)
    g.graph_properties['p_max'] = g.new_graph_property("float", args.p_max)
    print(g.graph_properties['p_min'], args.p_min)
    print(g.graph_properties['p_max'], args.p_max)
    print('g.num_edges()', g.num_edges())

    output_path = args.output  # 'data/{}/graph_weighted.gt'.format(args.graph)
    g.save(output_path)

    print('dumped to {}'.format(output_path))
from matplotlib import pyplot as plt

from viz_helpers import lattice_node_pos
from minimum_steiner_tree import min_steiner_tree
from cascade_generator import si, observe_cascade
from eval_helpers import infection_precision_recall
from graph_helpers import remove_filters, load_graph_by_name

from inference import infer_infected_nodes
from query_selection import RandomQueryGenerator, OurQueryGenerator, PRQueryGenerator
from experiment import gen_input, one_round_experiment

# In[33]:

graph_name = 'karate'
g = load_graph_by_name(graph_name)

# In[9]:

if False:
    import graph_tool as gt
    pos = gt.draw.sfdp_layout(gv)
    vertex_text = g.new_vertex_property('string')
    for v in g.vertices():
        vertex_text[v] = str(v)
    gt.draw.graph_draw(gv, pos=pos, vertex_text=vertex_text)

# In[26]:


def one_combined_round(g, n_queries, obs, c):
# coding: utf-8

import sys
import numpy as np
from graph_helpers import load_graph_by_name
from preprocess_graph import reverse_edge_weights

graph_name = sys.argv[1]
g = load_graph_by_name(graph_name, weighted=True)

w = g.new_edge_property('float')
in_deg = g.degree_property_map('in', weight=None)
for u in g.vertices():
    for v in g.vertex(u).in_neighbours():  # v -> u
        w[g.edge(v, u)] = 1 / in_deg[u]

in_deg_weighted = g.degree_property_map('in', weight=w)
assert np.all(np.isclose(in_deg_weighted.a,
                         1)), 'maybe self-loops are not removed'

g.edge_properties['weights'] = w

g.save('data/{}/graph_weighted_sto.gt'.format(graph_name))

rev_g = reverse_edge_weights(g)

out_deg_weighted = g.degree_property_map(
    'out', weight=rev_g.edge_properties['weights'])
assert np.all(np.isclose(out_deg_weighted.a, 1))

rev_g.save('data/{}/graph_weighted_sto_rev.gt'.format(graph_name))
    print('-' * 10)
    for k, v in args._get_kwargs():
        print("{}={}".format(k, v))

    inf_result_dirname = 'outputs/{}/{}/{}'.format(args.inf_dirname,
                                                   args.data_id,
                                                   args.sampling_method)
    query_dirname = 'outputs/{}/{}/{}'.format(args.query_dirname, args.data_id,
                                              args.sampling_method)

    print('summarizing ', inf_result_dirname)
    # if n_queries is too large, e.g, 100,
    # we might have no hidden infected nodes left and average precision score is undefined
    n_queries = args.n_queries

    g = load_graph_by_name(args.graph_name)

    query_dir_ids = list(
        map(lambda s: s.strip(), args.query_dir_ids.split(',')))
    if args.legend_labels is not None:
        labels = list(map(lambda s: s.strip(), args.legend_labels.split(',')))
    else:
        labels = query_dir_ids
    print('query_dir_ids:', query_dir_ids)

    if args.eval_with_mask:
        pkl_dir = 'eval_result/{}'.format(args.eval_method)
    else:
        pkl_dir = 'eval_result/{}-no-mask'.format(args.eval_method)

    print('pkl dir', pkl_dir)
    args = parser.parse_args()

    print("Args:")
    print('-' * 10)
    for k, v in args._get_kwargs():
        print("{}={}".format(k, v))

    graph_name = args.graph
    suffix = args.graph_suffix
    n_runs = args.n_runs
    q = args.obs_fraction
    observation_method = args.observation_method
    min_size = args.min_size
    max_size = args.max_size

    g = load_graph_by_name(graph_name, weighted=True)
    norm_g = load_graph_by_name(graph_name, weighted=True, suffix=suffix)

    print('g.num_edges()', g.num_edges())
    print('norm_g.num_edges()', norm_g.num_edges())

    result = {}
    # if False:
    for eps in [0.0, 0.5]:
        rows = Parallel(n_jobs=-1)(
            delayed(one_run)(g,
                             norm_g,
                             q,
                             eps,
                             'pagerank',
                             min_size,
Beispiel #7
0
METHODS_WANT_TREE = {'leaves', 'bfs-head', 'bfs-tail'}

args = parser.parse_args()

print("Args:")
print('-' * 10)
for k, v in args._get_kwargs():
    print("{}={}".format(k, v))

graph_name = args.graph

if not args.use_edge_weights:
    print('uniform edge weight')
    g = load_graph_by_name(graph_name,
                           weighted=False,
                           suffix=args.graph_suffix)
    p = args.infection_proba
else:
    print('non-uniform edge weight')
    g = load_graph_by_name(graph_name, weighted=True, suffix=args.graph_suffix)
    p = g.edge_properties['weights']

print('p=', p)
print('p.a=', p.a)

# root_sampler = build_out_degree_root_sampler(g)
root_sampler = lambda: None
# root_sampler = lambda: 45

d = args.output_dir
Beispiel #8
0
from matplotlib import pyplot as plt

from graph_helpers import load_graph_by_name

graph_name = 'grqc'
suffix = 's0.03'
aspects = ['roc', 'ap', 'precision', 'recall', 'f1']

qs = ['0.1', '0.25', '0.5', '0.75']

eval_metric = 'mean'

for aspect in aspects:
    root_sampling_method = 'random_root'

    g = load_graph_by_name(graph_name, weighted=True, suffix='_' + suffix)

    methods = [
        'pagerank-eps0.0', 'pagerank-eps0.5', 'pagerank-eps1.0', 'random_root',
        'true root'
    ]

    columns_to_plot = []
    for q in qs:
        result_path = 'eval_result/{}-{}-q{}-by_root_sampling_methods.pkl'.format(
            graph_name, suffix, q)
        row = pkl.load(open(result_path, 'rb'))
        print('q={}'.format(q))
        print('-' * 10)
        print(row[aspect][root_sampling_method])
        columns_to_plot.append(
from collections import Counter

graph = 'grqc'
model = 'ic'
# suffix = '_tmp'
# cascade_fraction = 0
suffix = ''
cascade_fraction = 0.25
obs_frac = "0.5"
cascade_dir = 'cascade'

dirname = '{}/{}-m{}-s{}-o{}-omuniform/*'.format(cascade_dir, graph, model,
                                                 cascade_fraction, obs_frac)

print(dirname)
g = load_graph_by_name(graph, weighted=True, suffix=suffix)

gprop = g.graph_properties
if 'p_min' in gprop:
    p_min, p_max = gprop['p_min'], gprop['p_max']
    print('p_min={}, p_max={}'.format(p_min, p_max))
else:
    print('external weight initialization')

os = [pkl.load(open(p, 'rb'))[0] for p in glob(dirname)]
cs = [pkl.load(open(p, 'rb'))[1] for p in glob(dirname)]
obs_sizes = [len(o) for o in os]
c_sizes = [len(infected_nodes(c)) for c in cs]
roots = list(map(cascade_source, cs))
print('roots freq:')
print(Counter(roots).most_common(10))
Beispiel #10
0
def g():
    return load_graph_by_name('grqc', weighted=True)
                        help='number of samples')
    parser.add_argument('-j',
                        '--n_jobs',
                        default=-1,
                        type=int,
                        help='number of parallel jobs')

    args = parser.parse_args()

    openmp_set_num_threads(1)

    graph_name = args.graph
    sampling_method = args.sampling_method
    n_samples = args.n_samples

    g_rev = load_graph_by_name(graph_name, weighted=True, suffix='_reversed')

    cs = load_cascades(
        'cascade-weighted/{}-mic-s0.02-oleaves/'.format(graph_name))

    tuples_of_records = Parallel(n_jobs=args.n_jobs)(
        delayed(run_with_or_without_resampling)(g_rev, cid, c, X, n_samples,
                                                sampling_method)
        for cid, (X, c) in tqdm(cs, total=96))

    ap_records, p_records = zip(*tuples_of_records)
    ap_df = pd.DataFrame.from_records(ap_records)
    print('ap score:')
    print(ap_df.describe())

    pk_df = pd.DataFrame.from_records(p_records)