Example #1
0
class RayMapReduce(object):
    def __init__(self,
                 map_func,
                 reduce_func,
                 num_workers=None,
                 host_address=None):
        """
        map_func

          Map Function. 
        
        reduce_func

          Reducer function. 
         
        num_workers

          The number of workers to create in the pool. If None, then defaults to the
          number of CPUs available on the current host.
        
        host_address

          The IP address of master node. If None, then defaults to localhost.

        """
        from ray.util.multiprocessing.pool import Pool  # import within __init__()
        self.pool = Pool()
        self.map_func = map_func
        self.reduce_func = reduce_func

    def partition(self, mapped_values):
        """
        Organize the mapped values by their key.
        Returns an unsorted sequence of tuples with a key and a sequence of values.
        """
        partitioned_data = collections.defaultdict(list)
        for key, value in mapped_values:
            partitioned_data[key].append(value)
        return partitioned_data.items()

    def __call__(self, inputs, chunksize=1):
        """
        Process the inputs through the map and reduce functions given.
        
        inputs
          An iterable containing the input data to be processed.
        
        chunksize=1
          The portion of the input data to hand to each worker.  This
          can be used to tune performance during the mapping phase.
        """
        map_responses = self.pool.map(self.map_func,
                                      inputs,
                                      chunksize=chunksize)
        partitioned_data = self.partition(itertools.chain(*map_responses))
        reduced_values = self.pool.map(self.reduce_func, partitioned_data)
        return reduced_values
Example #2
0
File: core.py Project: mrpep/paips
    def __parallel_run_ray(self, run_async=False):
        """
        Initializes a ray pool. Asynchronous pools are still not implemented.
        """
        from ray.util.multiprocessing.pool import Pool

        def set_niceness(niceness):  # pool initializer
            os.nice(niceness)

        def worker_wrapper(x):
            os.nice(self.parameters.get('niceness', 20))
            for k, v in zip(self.parameters['parallel'], x):
                self.parameters[k] = v
            out = self.process()
            return out

        iterable_vars = list(
            zip(*[self.parameters[k] for k in self.parameters['parallel']]))
        n_cores = self.parameters.get('n_cores', 4)
        pool = Pool(processes=n_cores,
                    initializer=set_niceness,
                    initargs=(self.parameters.get('niceness', 20), ),
                    ray_address='auto')  #(Run in same host it was called)
        outs = pool.map(worker_wrapper, iterable_vars)

        return self.__process_outputs(outs)
Example #3
0
def approximate_pi_distributed(num_samples):
    from ray.util.multiprocessing.pool import Pool # NOTE: Only the import statement is changed.
    pool = Pool()
        
    start = time.time()
    num_inside = 0
    sample_batch_size = 100000
    for result in pool.map(sample, [sample_batch_size for _ in range(num_samples//sample_batch_size)]):
        num_inside += result
        
    print("pi ~= {}".format((4*num_inside)/num_samples))
    print("Finished in: {:.2f}s".format(time.time()-start))
Example #4
0
    for i in book_tets:
        yes = 'b' + str(func_get_movie(i))
        tet_dict[yes] = i
    list_movies = hope.index.tolist()

    "##################################################"

    "################## COMPUTE SIMILARITY BETWEEN MOVIE TETS ####################"

    num_movies = len(tet_dict)
    print(num_movies)
    pool = Pool(mp.cpu_count() - 2)
    # pool = Pool(mp.cpu_count()-2)
    results = (pool.map(partial(f,
                                tet_dict=tet_dict,
                                list_movies=list_movies,
                                spec=spec_book),
                        list_movies,
                        chunksize=2000))

    data = [x[1] for x in results]
    movies = []
    for x in results:
        movies.append(x[0])

    df = pd.DataFrame(data=data, index=movies, columns=movies).fillna(0)
    cols = df.columns.values.tolist()
    rows = list(df.index)

    X = df.to_numpy()
    X = X + X.T - np.diag(np.diag(X))
    #