def fig_4c_4d(dest='synthetic-ml-on-2cl-big-n-small-s-truncated-feature.csv'): result = None kwargs_list = [{'n':80000, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \ 'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'first-n'} \ for s,sampling,i in product([16,32,64,128,256,512,1024], ['uniform','stratified'], range(20))] with Pool(48) as p: result = p.map(experiment_2cl, kwargs_list) with open(dest, 'w') as f: header = [ 'num_nodes', 'graph_id', 'sampling', 'data_size', 'num_neg', 'i' ] header.extend(['w_local_{}'.format(i) for i in range(8)]) header.extend(['se_local_{}'.format(i) for i in range(8)]) header.extend(['w_non_local_{}'.format(i) for i in range(2)]) header.extend(['se_non_local_{}'.format(i) for i in range(2)]) f.write(','.join(header) + '\n') for info, kwargs in zip(result, kwargs_list): row = [ 5000, kwargs['graph_id'], kwargs['sampling'], kwargs['n'], kwargs['s'], kwargs['i'] ] row.extend(info['weights_1']) row.extend(info['se_1']) row.extend(info['weights_2']) row.extend(info['se_2']) f.write(','.join(map(str, row)) + '\n')
def fig_3c(dest='synthetic-fix-n-10k.csv'): result = None kwargs_list = [{'n':10000, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \ 'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'random-uniform'} \ for s,sampling,i in product([3,6,12,24,48,96,192,384,768], ['uniform','stratified','importance'], range(50))] with Pool(4) as p: result = p.map(experiment, kwargs_list) write_csv(dest, kwargs_list, result)
def fig_3a_3b(dest='synthetic-vary-n-and-s-very-small.csv'): result = None kwargs_list = [{'n':n, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \ 'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'random-uniform'} \ for n,s,sampling,i in product(range(500,5001,500), [24,96], ['uniform','stratified','importance'], range(50))] with Pool(4) as p: result = p.map(experiment, kwargs_list) write_csv(dest, kwargs_list, result)
def fig_4a_4b(dest='synthetic-ml-on-1cl.csv'): result = None kwargs_list = [{'n':80000, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \ 'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'first-n'} \ for s,sampling,i in product([16,32,64,128,256,512,1024], ['uniform','stratified'], range(20))] with Pool(48) as p: result = p.map(experiment_1cl, kwargs_list) write_csv(dest, kwargs_list, result)
def extract_feature(**kwargs): n = kwargs['n'] s = kwargs['s'] seed = kwargs['feature_seed'] sampling = kwargs['sampling'] edge_sampling = kwargs['edge_sampling'] np.random.seed(seed) info = parse_info(PATH) er_edges = np.load(os.path.join(PATH, info['er_edges_path']), mmap_mode='r') choice_edges = np.load(os.path.join(PATH, info['choice_edges_path']), mmap_mode='r') G = DirectedMultiGraph(info['num_nodes']) for actor, target in er_edges: G.add_edge(actor, target) features = [] lnsws = [] to_sample = set(range(n)) if edge_sampling=='first-n' else \ set(random.sample(range(len(choice_edges)), n)) n1 = n2 = n3 = 1 for i, (actor, target) in enumerate(choice_edges): if i in to_sample: candidates, lnsw = None, None if sampling == 'stratified': candidates, lnsw = G.neg_samp_by_locality(actor, target, num_neg=s, max_num_local_sample=[s//3,s//3]) elif sampling == 'importance': s1 = int(np.floor((s-3) * n1 / (n1 + n2 + n3)) + 1) s2 = int(np.floor((s-3) * n2 / (n1 + n2 + n3)) + 1) candidates, lnsw = G.neg_samp_by_locality(actor, target, num_neg=s, max_num_local_sample=[s1,s2]) else: candidates = [target] candidates.extend(randint_excluding(0, G.num_nodes, [actor, target], size=s)) lnsw = [0.0] * len(candidates) feature = G.extract_feature(actor, candidates) if feature[5][0] + feature[6][0] > 0.5: n1 += 1 elif feature[8][0] > 0.5: n2 += 1 else: n3 += 1 features.append(feature[:-1].T) lnsws.append(lnsw) G.add_edge(actor, target) return np.array(features), np.array(lnsws)