def fig_4c_4d(dest='synthetic-ml-on-2cl-big-n-small-s-truncated-feature.csv'):
    result = None
    kwargs_list = [{'n':80000, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \
                    'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'first-n'} \
                    for s,sampling,i in product([16,32,64,128,256,512,1024], ['uniform','stratified'], range(20))]
    with Pool(48) as p:
        result = p.map(experiment_2cl, kwargs_list)

    with open(dest, 'w') as f:
        header = [
            'num_nodes', 'graph_id', 'sampling', 'data_size', 'num_neg', 'i'
        ]
        header.extend(['w_local_{}'.format(i) for i in range(8)])
        header.extend(['se_local_{}'.format(i) for i in range(8)])
        header.extend(['w_non_local_{}'.format(i) for i in range(2)])
        header.extend(['se_non_local_{}'.format(i) for i in range(2)])
        f.write(','.join(header) + '\n')
        for info, kwargs in zip(result, kwargs_list):
            row = [
                5000, kwargs['graph_id'], kwargs['sampling'], kwargs['n'],
                kwargs['s'], kwargs['i']
            ]
            row.extend(info['weights_1'])
            row.extend(info['se_1'])
            row.extend(info['weights_2'])
            row.extend(info['se_2'])
            f.write(','.join(map(str, row)) + '\n')
def fig_3c(dest='synthetic-fix-n-10k.csv'):
  result = None
  kwargs_list = [{'n':10000, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \
                  'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'random-uniform'} \
                  for s,sampling,i in product([3,6,12,24,48,96,192,384,768], ['uniform','stratified','importance'], range(50))]
  with Pool(4) as p:
    result = p.map(experiment, kwargs_list)
  write_csv(dest, kwargs_list, result)
def fig_3a_3b(dest='synthetic-vary-n-and-s-very-small.csv'):
  result = None
  kwargs_list = [{'n':n, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \
                  'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'random-uniform'} \
                  for n,s,sampling,i in product(range(500,5001,500), [24,96], ['uniform','stratified','importance'], range(50))]
  with Pool(4) as p:
    result = p.map(experiment, kwargs_list)
  write_csv(dest, kwargs_list, result)
def fig_4a_4b(dest='synthetic-ml-on-1cl.csv'):
    result = None
    kwargs_list = [{'n':80000, 's':s, 'sampling':sampling, 'i':i, 'graph_id':parse_info(PATH)['graph_seed'], \
                    'feature_seed': random.randint(0, 2**31-1),'edge_sampling':'first-n'} \
                    for s,sampling,i in product([16,32,64,128,256,512,1024], ['uniform','stratified'], range(20))]
    with Pool(48) as p:
        result = p.map(experiment_1cl, kwargs_list)
    write_csv(dest, kwargs_list, result)
def extract_feature(**kwargs):

  n = kwargs['n']
  s = kwargs['s']
  seed = kwargs['feature_seed']
  sampling = kwargs['sampling']
  edge_sampling = kwargs['edge_sampling']

  np.random.seed(seed)

  info = parse_info(PATH)
  er_edges     = np.load(os.path.join(PATH, info['er_edges_path']),     mmap_mode='r')
  choice_edges = np.load(os.path.join(PATH, info['choice_edges_path']), mmap_mode='r')

  G = DirectedMultiGraph(info['num_nodes'])
  for actor, target in er_edges:
    G.add_edge(actor, target)

  features = []
  lnsws = []

  to_sample = set(range(n)) if edge_sampling=='first-n' else \
              set(random.sample(range(len(choice_edges)), n))

  n1 = n2 = n3 = 1

  for i, (actor, target) in enumerate(choice_edges):
    if i in to_sample:
      candidates, lnsw = None, None
      if sampling == 'stratified':
        candidates, lnsw = G.neg_samp_by_locality(actor, target, num_neg=s, max_num_local_sample=[s//3,s//3])
      elif sampling == 'importance':
        s1 = int(np.floor((s-3) * n1 / (n1 + n2 + n3)) + 1)
        s2 = int(np.floor((s-3) * n2 / (n1 + n2 + n3)) + 1)
        candidates, lnsw = G.neg_samp_by_locality(actor, target, num_neg=s, max_num_local_sample=[s1,s2])
      else:
        candidates = [target]
        candidates.extend(randint_excluding(0, G.num_nodes, [actor, target], size=s))
        lnsw = [0.0] * len(candidates)

      feature = G.extract_feature(actor, candidates)
      if feature[5][0] + feature[6][0] > 0.5:
        n1 += 1
      elif feature[8][0] > 0.5:
        n2 += 1
      else:
        n3 += 1

      features.append(feature[:-1].T)
      lnsws.append(lnsw)

    G.add_edge(actor, target)

  return np.array(features), np.array(lnsws)