def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(), always_rebuild=True): global __current_graph global __vertex2str __current_graph = G __vertex2str = {v:str(v) for v in G.nodes()} files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)] expected_size = len(G) args_list = [] files = [] if num_paths <= num_workers: paths_per_worker = [1 for x in range(num_paths)] else: paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x])) for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))] with ProcessPoolExecutor(max_workers=num_workers) as executor: for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker): if always_rebuild or size != (ppw*expected_size): args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_)) else: files.append(file_) with ProcessPoolExecutor(max_workers=num_workers) as executor: for file_ in executor.map(_write_walks_to_disk, args_list): files.append(file_) return files
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(), always_rebuild=True): global __current_graph __current_graph = G files_list = ["{}.{}".format(filebase, str(x)) for x in list(range(num_paths))] expected_size = len(G) args_list = [] files = [] if num_paths <= num_workers: paths_per_worker = [1 for x in range(num_paths)] else: paths_per_worker = [len(list(filter(lambda z: z!= None, [y for y in x]))) for x in graph.grouper(int(num_paths / num_workers) + 1, range(1, num_paths + 1))] with ProcessPoolExecutor(max_workers=num_workers) as executor: for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker): if always_rebuild or size != (ppw*expected_size): args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_)) else: files.append(file_) with ProcessPoolExecutor(max_workers=num_workers) as executor: for file_ in executor.map(_write_walks_to_disk, args_list): files.append(file_) return files
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(), always_rebuild=True): global __current_graph global __vertex2str __current_graph = G __vertex2str = {v:str(v+1) for v in G.nodes()} files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)] expected_size = len(G) args_list = [] files = [] # files = filebase res = [] if num_paths <= num_workers: paths_per_worker = [1 for x in range(num_paths)] else: paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x])) for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))] with ProcessPoolExecutor(max_workers=num_workers) as executor: for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker): if always_rebuild or size != (ppw*expected_size): args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_)) else: files.append(file_) with ProcessPoolExecutor(max_workers=num_workers) as executor: for outbuf in executor.map(_write_walks_to_disk, args_list): files.append(file_) # res.extend(outbuf) # res = np.array(res) # savemat(filebase, mdict={"walk": res}) # args = [num_paths, path_length, alpha, rand, files] # _write_mat_walks_to_disk(args) return files