Example #1
0
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
                        always_rebuild=True):
  global __current_graph
  global __vertex2str
  __current_graph = G
  __vertex2str = {v:str(v) for v in G.nodes()}
  files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)]
  expected_size = len(G)
  args_list = []
  files = []

  if num_paths <= num_workers:
    paths_per_worker = [1 for x in range(num_paths)]
  else:
    paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x]))
                        for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
      if always_rebuild or size != (ppw*expected_size):
        args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
      else:
        files.append(file_)

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for file_ in executor.map(_write_walks_to_disk, args_list):
      files.append(file_)

  return files
Example #2
0
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
                        always_rebuild=True):
  global __current_graph
  __current_graph = G
  files_list = ["{}.{}".format(filebase, str(x)) for x in list(range(num_paths))]
  expected_size = len(G)
  args_list = []
  files = []

  if num_paths <= num_workers:
    paths_per_worker = [1 for x in range(num_paths)]
  else:
    paths_per_worker = [len(list(filter(lambda z: z!= None, [y for y in x])))
                        for x in graph.grouper(int(num_paths / num_workers) + 1, range(1, num_paths + 1))]

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
      if always_rebuild or size != (ppw*expected_size):
        args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
      else:
        files.append(file_)

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for file_ in executor.map(_write_walks_to_disk, args_list):
      files.append(file_)

  return files
Example #3
0
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
                        always_rebuild=True):
  global __current_graph
  global __vertex2str
  __current_graph = G
  __vertex2str = {v:str(v+1) for v in G.nodes()}
  files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)]
  expected_size = len(G)
  args_list = []
  files = []
  # files = filebase
  res = []

  if num_paths <= num_workers:
    paths_per_worker = [1 for x in range(num_paths)]
  else:
    paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x]))
                        for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
      if always_rebuild or size != (ppw*expected_size):
        args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
      else:
        files.append(file_)

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for outbuf in executor.map(_write_walks_to_disk, args_list):
      files.append(file_)
      # res.extend(outbuf)

  # res = np.array(res)
  # savemat(filebase, mdict={"walk": res})

  # args = [num_paths, path_length, alpha, rand, files]
  # _write_mat_walks_to_disk(args)

  return files