def test_invalid_num_jobs(self): with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores(14035.67) with self.assertRaises(ValueError): _ = comp_utils.recommend_cpu_cores(-14035) with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores('not a number')
def test_few_small_jobs(self): num_jobs = 13 ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False) self.assertEqual(ret_val, 1) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=MAX_CPU_CORES, lengthy_computation=False) self.assertEqual(ret_val, 1)
def test_few_large_jobs(self): num_jobs = 13 if MAX_CPU_CORES > 4: min_free_cores = 2 else: min_free_cores = 1 ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=True) self.assertEqual(ret_val, max(1, MAX_CPU_CORES-min_free_cores)) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=max(1, MAX_CPU_CORES - 1), lengthy_computation=True) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - 1))
def test_changing_min_cores(self): num_jobs = 14035 for min_free_cores in range(1, MAX_CPU_CORES): ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False, min_free_cores=min_free_cores) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores))
def test_illegal_min_free_cores(self): num_jobs = 14035 min_free_cores = MAX_CPU_CORES with self.assertRaises(ValueError): _ = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False, min_free_cores=min_free_cores)
def test_many_small_jobs(self): num_jobs = 14035 if MAX_CPU_CORES > 4: min_free_cores = 2 else: min_free_cores = 1 ret_val = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False) self.assertEqual(ret_val, max(1, MAX_CPU_CORES - min_free_cores)) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=1, lengthy_computation=False) self.assertEqual(ret_val, 1) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=MAX_CPU_CORES, lengthy_computation=False) self.assertEqual(ret_val, MAX_CPU_CORES) ret_val = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=5000, lengthy_computation=False) self.assertEqual(ret_val, MAX_CPU_CORES)
def fit_atom_positions_parallel(parm_dict, fitting_parms, num_cores=None): """ Fits the positions of N atoms in parallel Parameters ---------- parm_dict : dictionary Dictionary containing the guess positions, nearest neighbors and original image fitting_parms : dictionary Parameters used for atom position fitting num_cores : unsigned int (Optional. Default = available logical cores - 2) Number of cores to compute with Returns ------- results : list of tuples Guess and fit coefficients """ parm_dict['verbose'] = False all_atom_guesses = parm_dict['atom_pos_guess'] t_start = tm.time() num_cores = recommend_cpu_cores(all_atom_guesses.shape[0], requested_cores=num_cores, lengthy_computation=False) if num_cores > 1: pool = mp.Pool(processes=num_cores) parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms)) chunk = int(all_atom_guesses.shape[0] / num_cores) jobs = pool.imap(fit_atom_pos, parm_list, chunksize=chunk) results = [j for j in jobs] pool.close() else: parm_list = itt.izip(range(all_atom_guesses.shape[0]), itt.repeat(parm_dict), itt.repeat(fitting_parms)) results = [fit_atom_pos(parm) for parm in parm_list] tot_time = np.round(tm.time() - t_start) print('Took {} sec to find {} atoms with {} cores'.format( tot_time, len(results), num_cores)) return results
def test_invalid_requested_cores(self): with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores(14035, requested_cores=[4])
def test_invalid_min_cores(self): with self.assertRaises(TypeError): _ = comp_utils.recommend_cpu_cores(14035, min_free_cores=[4])
# parallel computations. ``recommend_cpu_cores()`` is a popular function that looks at the number of parallel operations, # available CPU cores, duration of each computation to recommend the number of cores that should be used for any # computation. If the developer / user requests the use of N CPU cores, this function will validate this number against # the number of available cores and the nature (lengthy / quick) of each computation. Unless, a suggested number of # cores is specified, ``recommend_cpu_cores()`` will always recommend the usage of N-2 CPU cores, where N is the total # number of logical cores (Intel uses hyper-threading) on the CPU to avoid using up all computational resources and # preventing the computation from making the computer otherwise unusable until the computation is complete # Here, we demonstrate this function being used in a few use cases: print('This CPU has {} cores available'.format(cpu_count())) ######################################################################################################################## # **Case 1**: several independent computations or jobs, each taking far less than 1 second. The number of desired cores # is not specified. The function will return 2 lesser than the total number of cores on the CPU num_jobs = 14035 recommeded_cores = comp_utils.recommend_cpu_cores(num_jobs, lengthy_computation=False) print('Recommended number of CPU cores for {} independent, FAST, and parallel ' 'computations is {}\n'.format(num_jobs, recommeded_cores)) ######################################################################################################################## # **Case 2**: Several independent and fast computations, and the function is asked if 3 cores is OK. In this case, the # function will allow the usage of the 3 cores so long as the CPU actually has 3 or more cores requested_cores = 3 recommeded_cores = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=requested_cores, lengthy_computation=False) print('Recommended number of CPU cores for {} independent, FAST, and parallel ' 'computations using the requested {} CPU cores is {}\n'.format(num_jobs, requested_cores, recommeded_cores)) ######################################################################################################################## # **Case 3**: Far fewer independent and fast computations, and the function is asked if 3 cores is OK. In this case, # configuring multiple cores for parallel computations will probably be slower than serial computation with a single # core. Hence, the function will recommend the use of only one core in this case.
def fit_atom_positions_parallel(self, plot_results=True, num_cores=None): """ Fits the positions of N atoms in parallel Parameters ---------- plot_results : optional boolean (default is True) Specifies whether to output a visualization of the fitting results num_cores : unsigned int (Optional. Default = available logical cores - 2) Number of cores to compute with Creates guess_dataset and fit_dataset with the results. Returns ------- fit_dataset: NxM numpy array of tuples where N is the number of atoms fit and M is the number of nearest neighbors considered. Each tuple contains the converged values for each gaussian. The value names are stored in the dtypes. """ t_start = tm.time() if num_cores is None: num_cores = recommend_cpu_cores(self.num_atoms, requested_cores=num_cores, lengthy_computation=False) print('Setting up guesses') self.guess_parms = [] for i in range(self.num_atoms): self.guess_parms.append(self.do_guess(i)) print('Fitting...') if num_cores > 1: pool = mp.Pool(processes=num_cores) parm_list = itt.izip(self.guess_parms, itt.repeat(self.fitting_parms)) chunk = int(self.num_atoms / num_cores) jobs = pool.imap(do_fit, parm_list, chunksize=chunk) self.fitting_results = [j for j in jobs] pool.close() else: parm_list = itt.izip(self.guess_parms, itt.repeat(self.fitting_parms)) self.fitting_results = [do_fit(parm) for parm in parm_list] print('Finalizing datasets...') self.guess_dataset = np.zeros(shape=(self.num_atoms, self.num_nearest_neighbors + 1), dtype=self.atom_coeff_dtype) self.fit_dataset = np.zeros(shape=self.guess_dataset.shape, dtype=self.guess_dataset.dtype) for atom_ind, single_atom_results in enumerate(self.fitting_results): types = np.hstack((self.h5_guess['type'][atom_ind], [ self.h5_guess['type'][neighbor] for neighbor in self.closest_neighbors_mat[atom_ind] ])) atom_data = np.hstack((np.vstack(types), single_atom_results)) atom_data = [tuple(element) for element in atom_data] self.fit_dataset[atom_ind] = atom_data single_atom_guess = self.guess_parms[atom_ind] atom_guess_data = np.hstack( (np.vstack(types), single_atom_guess[1])) atom_guess_data = [tuple(element) for element in atom_guess_data] self.guess_dataset[atom_ind] = atom_guess_data tot_time = np.round(tm.time() - t_start) print('Took {} sec to find {} atoms with {} cores'.format( tot_time, len(self.fitting_results), num_cores)) # if plotting is desired if plot_results: fig, axis = plt.subplots(figsize=(14, 14)) axis.hold(True) axis.imshow(self.cropped_clean_image, interpolation='none', cmap="gray") axis.scatter(self.guess_dataset[:, 0]['y'], self.guess_dataset[:, 0]['x'], color='yellow', label='Guess') axis.scatter(self.fit_dataset[:, 0]['y'], self.fit_dataset[:, 0]['x'], color='red', label='Fit') axis.legend() fig.tight_layout() fig.show() return self.fit_dataset
def _unit_compute_fit(self, obj_func, obj_func_args=[], solver_options={'jac': 'cs'}): """ Performs least-squares fitting on self.data using self.guess for initial conditions. Results of the computation are captured in self._results Parameters ---------- obj_func : callable Objective function to minimize on obj_func_args : list Arguments required by obj_func following the guess parameters (which should be the first argument) solver_options : dict, optional Keyword arguments passed onto scipy.optimize.least_squares """ # At this point data has been read in. Read in the guess as well: self._read_guess_chunk() if self.verbose and self.mpi_rank == 0: print('_unit_compute_fit got:\nobj_func: {}\nobj_func_args: {}\n' 'solver_options: {}'.format(obj_func, obj_func_args, solver_options)) # TODO: Generalize this bit. Use Parallel compute instead! if self.mpi_size > 1: if self.verbose: print('Rank {}: About to start serial computation' '.'.format(self.mpi_rank)) self._results = list() for pulse_resp, pulse_guess in zip(self.data, self._guess): curr_results = least_squares(obj_func, pulse_guess, args=[pulse_resp] + obj_func_args, **solver_options) self._results.append(curr_results) else: cores = recommend_cpu_cores(self.data.shape[0], verbose=self.verbose) if self.verbose: print('Starting parallel fitting with {} cores'.format(cores)) values = [ joblib.delayed(least_squares)(obj_func, pulse_guess, args=[pulse_resp] + obj_func_args, **solver_options) for pulse_resp, pulse_guess in zip(self.data, self._guess) ] self._results = joblib.Parallel(n_jobs=cores)(values) if self.verbose and self.mpi_rank == 0: print( 'Finished computing fits on {} objects. Results of length: {}' '.'.format(self.data.shape[0], len(self._results)))