def test_invalid_func_type(self): with self.assertRaises(TypeError): _ = comp_utils.parallel_compute(np.random.rand(2, 5), 'not callable') with self.assertRaises(TypeError): _ = comp_utils.parallel_compute(np.random.rand(2, 5), {'a': 1})
def _unit_computation(self, *args, **kwargs): """ Processing per chunk of the dataset Parameters ---------- args : list Not used kwargs : dictionary Not used """ half_v_steps = self.single_ao.size // 2 # first roll the data rolled_raw_data = np.roll(self.data, self.roll_pts, axis=1) # Ensure that the bias has a positive slope. Multiply current by -1 accordingly if self.verbose: print('Rank {} beginning parallel compute for Forward'.format(self.mpi_rank)) self.reverse_results = parallel_compute(rolled_raw_data[:, :half_v_steps] * -1, do_bayesian_inference, cores=self._cores, func_args=[self.rolled_bias[:half_v_steps] * -1, self.ex_freq], func_kwargs=self._bayes_parms, lengthy_computation=False, verbose=self.verbose) if self.verbose: print('Rank {} finished processing forward sections. Now working on reverse sections'.format(self.mpi_rank)) self.forward_results = parallel_compute(rolled_raw_data[:, half_v_steps:], do_bayesian_inference, cores=self._cores, func_args=[self.rolled_bias[half_v_steps:], self.ex_freq], func_kwargs=self._bayes_parms, lengthy_computation=False, verbose=self.verbose) if self.verbose: print('Rank {} Finished processing reverse loops (and this chunk)'.format(self.mpi_rank))
def base_parallel_compute(self, num_job_scaler=50, cores=None, lengthy_computation=False, func_args=None, expected_cores=None, func_kwargs=None): data = np.random.rand(MAX_CPU_CORES * num_job_scaler, 5) expected = np.mean(data, axis=1) if expected_cores is None: expected_cores = max(1, MAX_CPU_CORES - 1 - int(MAX_CPU_CORES > 4)) else: # Handles single core machines expected_cores = max(1, min(MAX_CPU_CORES, expected_cores)) exp_log = 'Rank 0 starting computing on {} cores (requested {} cores' \ ')'.format(expected_cores, cores) with data_utils.capture_stdout() as get_stdout: result = comp_utils.parallel_compute(data, np.mean, cores=cores, lengthy_computation=lengthy_computation, func_args=func_args, func_kwargs=func_kwargs, verbose=True) logs = get_stdout() self.assertIsInstance(result, list) self.assertEqual(data.shape[0], len(result)) self.assertTrue(np.allclose(expected, np.array(result))) self.assertTrue(exp_log in logs)
def base_with_custom_func(self, use_args=False, use_kwargs=False, cores=None, expected_cores=None): if not use_args and not use_kwargs: use_args = True func_args = None func_kwargs = None if use_args and not use_kwargs: func = func_w_args_only func_args = [0.5, 3] elif not use_args and use_kwargs: func = func_w_kwargs_only func_kwargs = {'arg_1': 0.5, 'arg_2': 3} elif use_args and use_kwargs: func = func_w_args_and_kwargs func_args = [0.5] func_kwargs = {'arg_2': 3} print('func: {}, args: {}, kwargs: {}'.format(func, func_args, func_kwargs)) data = np.random.rand(MAX_CPU_CORES * 50, 5) expected = (data - 0.5) * 3 if expected_cores is None: expected_cores = MAX_CPU_CORES - 1 - int(MAX_CPU_CORES > 4) exp_log = 'Rank 0 starting computing on {} cores (requested {} cores' \ ')'.format(expected_cores, cores) with data_utils.capture_stdout() as get_stdout: result = comp_utils.parallel_compute(data, func, cores=cores, lengthy_computation=False, func_args=func_args, func_kwargs=func_kwargs, verbose=True) logs = get_stdout() self.assertIsInstance(result, list) self.assertEqual(data.shape[0], len(result)) self.assertTrue(np.allclose(expected, np.array(result))) print(logs) self.assertTrue(exp_log in logs)
def _unit_computation(self, *args, **kwargs): """ The unit computation that is performed per data chunk. This allows room for any data pre / post-processing as well as multiple calls to parallel_compute if necessary """ # TODO: Try to use the functools.partials to preconfigure the map function # cores = number of processes / rank here if self.verbose and self.mpi_rank == 0: print("Rank {} at Process class' default _unit_computation() that " "will call parallel_compute()".format(self.mpi_rank)) self._results = parallel_compute(self.data, self._map_function, cores=self._cores, lengthy_computation=False, func_args=args, func_kwargs=kwargs, verbose=self.verbose)
def _unit_computation(self, *args, **kwargs): """ Processing per chunk of the dataset Parameters ---------- args : list Not used kwargs : dictionary Not used """ # get FFT of the entire data chunk self.data = np.fft.fftshift(np.fft.fft(self.data, axis=1), axes=1) if self.noise_threshold is not None: self.noise_floors = parallel_compute( self.data, get_noise_floor, cores=self._cores, func_args=[self.noise_threshold], verbose=self.verbose) if isinstance(self.composite_filter, np.ndarray): # multiple fft of data with composite filter self.data *= self.composite_filter if self.noise_threshold is not None: # apply thresholding self.data[np.abs(self.data) < np.tile( np.atleast_2d(self.noise_floors), self.data.shape[1])] = 1E-16 if self.write_condensed: # set self.condensed_data here self.condensed_data = self.data[:, self.hot_inds] if self.write_filtered: # take inverse FFT self.filtered_data = np.real( np.fft.ifft(np.fft.ifftshift(self.data, axes=1), axis=1)) if self.phase_rad > 0: # TODO: implement phase compensation # do np.roll on data # self.data = np.roll(self.data, 0, axis=1) pass
def _get_mean_response(self, labels): """ Gets the mean response for each cluster Parameters ------------- labels : 1D unsigned int array Array of cluster labels as obtained from the fit Returns --------- mean_resp : 2D numpy array Array of the mean response for each cluster arranged as [cluster number, response] """ print('Calculated the Mean Response of each cluster.') num_clusts = len(np.unique(labels)) def __mean_resp_for_cluster(clust_ind, h5_raw, labels_vec, data_slice, xform_func): # get all pixels with this label targ_pos = np.argwhere(labels_vec == clust_ind) # slice to get the responses for all these pixels, ensure that it's 2d data_chunk = np.atleast_2d(h5_raw[:, data_slice[1]][targ_pos, :]) # transform to real from whatever type it was avg_data = np.mean(xform_func(data_chunk), axis=0, keepdims=True) # transform back to the source data type and insert into the mean response return np.squeeze( stack_real_to_target_dtype(avg_data, h5_raw.dtype)) # TODO: Force usage of multiple threads. This should not take 3 cores mean_resp = np.array( parallel_compute(np.arange(num_clusts), __mean_resp_for_cluster, func_args=[ self.h5_main, labels, self.data_slice, self.data_transform_func ], lengthy_computation=False, verbose=self.verbose)) return mean_resp
def _estimate_compute_time_per_pixel(self, *args, **kwargs): """ Estimates how long it takes to compute an average pixel's worth of data. This information should be used by the user to limit the number of pixels that will be processed per batch to make best use of check-pointing. This function is exposed to the developer of the child classes. An approximate can be derived if it is simpler Returns ------- """ chosen_pos = np.random.randint(0, high=self.h5_main.shape[0] - 1, size=5) t0 = tm.time() _ = parallel_compute(self.h5_main[chosen_pos, :], self._map_function, cores=1, lengthy_computation=False, func_args=args, func_kwargs=kwargs, verbose=False) return (tm.time() - t0) / len(chosen_pos)
def _unit_computation(self, *args, **kwargs): """ The unit computation that is performed per data chunk. This allows room for any data pre / post-processing as well as multiple calls to parallel_compute if necessary :param *args: :type *args: :param **kwargs: :type **kwargs: """ args = [self.parm_dict, self.TF_norm, self.exc_wfm] if self.verbose and self.mpi_rank == 0: print("Rank {} at Process class' default _unit_computation() that " "will call parallel_compute()".format(self.mpi_rank)) self._results = parallel_compute(self.data, self._map_function, cores=self._cores, lengthy_computation=False, func_args=args, func_kwargs=kwargs, verbose=self.verbose)
def test_invalid_func_kwargs_type(self): with self.assertRaises(TypeError): _ = comp_utils.parallel_compute(np.random.rand(2, 5), np.mean, func_kwargs=['not', 'a', 'dict'])
def test_invalid_func_args_type(self): with self.assertRaises(TypeError): _ = comp_utils.parallel_compute(np.random.rand(2, 5), np.mean, func_args={'should be': 'a list'})
def test_invalid_data_type(self): with self.assertRaises(TypeError): _ = comp_utils.parallel_compute([[1, 2, 3], [4, 5, 6]], np.mean)
# For instance, it scales down the number of cores for small datasets if each computation is short. It also ensures that # 1-2 cores fewer than all available cores are used by default so that the user can continue using their computer for # other purposes while the computation runs. # # Lets apply this ``parallel_compute`` to this problem: cpu_cores = 2 args = [[20, 60]] kwargs = {'num_steps': 30} t_0 = time.time() # Execute the parallel computation parallel_results = parallel_compute(raw_data, find_all_peaks, cores=cpu_cores, func_args=args, func_kwargs=kwargs, joblib_backend='multiprocessing') cores_vec.append(cpu_cores) times_vec.append(time.time() - t_0) print('Parallel computation with {} cores took {} seconds'.format( cpu_cores, np.round(times_vec[-1], 2))) ######################################################################################################################## # Compare the results # ------------------- # By comparing the run-times for the two approaches, we see that the parallel computation is substantially faster than # the serial computation. Note that the numbers will differ between computers. Also, the computation was performed on # a relatively small dataset for illustrative purposes. The benefits of using such parallel computation will be far # more apparent for much larger datasets.