def test_add_model(self): m1 = Model(id=1) cc = ComputeClass("name", "resource", 1, 100) cc.add_model(m1) assert len(cc.model_group.models.keys()) == 1
def test_remove_model(self): m1 = Model(id=1) cc = ComputeClass("name", "resource", 1, 100) cc.model_group = ModelGroup(m1) assert len(cc.model_group.models.keys()) == 1 cc.remove_model(1) assert len(cc.model_group.models.keys()) == 0
def test_init(self): # Test initialization of Compute Class object cc = ComputeClass("name", "resource", 1, 100) assert cc.name == "name" assert cc.resource == "resource" assert cc.value == 1 assert cc.max_tasks == 100 assert not cc.model_group
def test_clear(self): models = [Model(id=i) for i in range(100)] # Test clearing an empty CC (nothing should happen) cc = ComputeClass('name', 'resource', 1, 100) assert len(cc.model_group) == 0 cc.clear() assert len(cc.model_group) == 0 # Test clearing a single- and multi-model ModelGroup for i in range(1, len(models)): cc.model_group = ModelGroup(models=models[:i]) assert len(cc.model_group) == i cc.clear() assert len(cc.model_group) == 0
def add_compute_class(self, name, resource, value, max_queued_tasks=100): """Add a compute class representing a set of consistent recources. Parameters ---------- name : str The name of this set of compute resources. resource : str The resource to match, e.g. gpu_name, cores, etc. value The value of the resource that should be matched, e.g. "TITAN X (Pascal)", 8, etc. max_queued_tasks : int, optional The maximum number of tasks to queue for this compute class, default 100. """ cc = ComputeClass(name, resource, value, min(self.max_tasks, max_queued_tasks)) self.ccs[cc.id] = cc
def test_generate(self): # Test single model - with single DiscreteDomain m = RandomSearchModel(id=1) m.add_domain(DiscreteDomain(self.__discrete_domain__)) cc = ComputeClass("name", "resource", 2, 100) cc.model_group = ModelGroup(m) result = cc.generate(m.id) print(result) assert result[0] == m.id assert result[1] == m.results[0].id assert isinstance(result[-1][''], int) assert result[2][''] in self.__discrete_domain__ # Test single model - with single ContinuousDomain m = RandomSearchModel(id=1) m.add_domain(ContinuousDomain(uniform, loc=1.0, scale=5.0)) cc = ComputeClass("name", "resource", 2, 100) cc.model_group = ModelGroup(m) result = cc.generate(1) assert isinstance(result[-1][''], float) assert result[-1][''] >= 1.0 and result[-1][''] <= 6.0 # Test single model - with multiple Domains m = RandomSearchModel(id=1) m.add_domain(DiscreteDomain(self.__discrete_domain__, path='a')) m.add_domain(ContinuousDomain(uniform, path='b', loc=1.0, scale=5.0)) cc = ComputeClass("name", "resource", 2, 100) cc.model_group = ModelGroup(m) result = cc.generate(1) assert isinstance(result[-1]['a'], int) assert result[-1]['a'] in self.__discrete_domain__ assert isinstance(result[-1]['b'], float) assert result[-1]['b'] >= 1.0 and result[-1]['b'] <= 6.0 # Test multiple models m1 = RandomSearchModel(id=1) m1.add_domain(DiscreteDomain(self.__discrete_domain__)) m2 = GPBayesModel(id=2) m2.add_domain(DiscreteDomain(self.__discrete_domain__)) cc = ComputeClass("name", "resource", 2, 100) cc.model_group = ModelGroup(m1) cc.model_group.add_model(m2) result1 = cc.generate(1) result2 = cc.generate(2) assert isinstance(result1[-1][''], int) assert result1[-1][''] in self.__discrete_domain__ assert isinstance(result2[-1][''], int) assert result2[-1][''] in self.__discrete_domain__
def assign_to_ccs(self): """Assign trees to compute classes. Each independent model in the search (model being one of a disjoint set of search domains) is assigned to at least two compute classes based on its rank relative to other models. In this way, only a subset of models are evaluated on each set of hardware. Notes ----- This method accounts for differing counts of models and compute classes, adjusting for a greater number of models, a greater number of compute classes, or equal counts of models and compute classes. See Also -------- `shadho.ComputeClass` `pyrameter.ModelGroup` """ # If only one compute class exists, do nothing. If multiple compute # classes exist, heuristically assign search trees to CCs. If no # compute classes exist, create a dummy to wrap the search. if len(self.ccs) > 1: # Sort models in the search by complexity, priority, or both and # get the updated order. # self.backend.sort_spaces(use_complexity=self.use_complexity, self.sort_spaces(use_complexity=self.use_complexity, use_uncertainty=self.use_uncertainty) # Clear the current assignments for cc in self.ccs: cc.clear() # Determine if the number of compute classes or the number of # model ids is larger ccids = list(self.ccs.keys()) larger = self.searchspaces \ if len(self.searchspaces) >= len(ccids) \ else ccids smaller = ccids if larger == len(self.searchspaces) \ else self.searchspaces # Assign models to CCs such that each model is assigned to at # least two CCs. # Steps between `smaller` index increment x = float(len(larger)) / float(len(smaller)) y = x - 1 # Current step index (offset by 1 for 0-indexing) j = 0 # Current index of `smaller` m = len(smaller) / 2 # Halfway point for second assignment n = len(larger) / 2 # Halfway point for second assignment for i in range(len(larger)): # If at a step point for `smaller` increment the index if i > np.ceil(y): j += 1 y += x # Add the model to the current CC. If i <= n, add the model to # the next CC as well; if i > n, add to the previous CC. if smaller[j] in self.ccs: self.ccs[smaller[j]].add_searchspace(larger[i]) if j < m: self.ccs[smaller[j + 1]].add_searchspace( self.searchspaces[larger[i]]) else: self.ccs[smaller[j - 1]].add_searchspace( self.searchspaces[larger[i]]) else: self.ccs[larger[i]].add_searchspace(smaller[j]) if i < n: self.ccs[larger[i + 1]].add_searchspace( self.searchspaces[smaller[j]]) else: self.ccs[larger[i - 1]].add_searchspace( self.searchspaces[smaller[j]]) elif len(self.ccs) == 0: cc = ComputeClass('all', None, None, min(self.max_tasks, self.max_queued_tasks)) self.ccs[cc.id] = cc cc.add_searchspace(self.searchspaces) else: cc = list(self.ccs.values())[0] cc.clear() cc.add_searchspace(self.searchspaces)
def run(self): """Search hyperparameter values on remote workers. Generate and evaluate hyperparameters using the selected task manager and search strategy. Hyperparameters will be evaluated until timeout, and the optimal set will be printed to screen. Notes ----- If `self.await_pending` is True, Shadho will continue to evaluate hyperparameters in the queue without generating new hyperparameter values. This will continue until the queue is empty and all tasks have returned. """ # Set up the task manager as defined in `shadho.managers` if not hasattr(self, 'manager'): self.manager = create_manager(manager_type=self.config.manager, config=self.config, tmpdir=self.__tmpdir) # If no ComputeClass was created, create a dummy class. if len(self.ccs) == 0: cc = ComputeClass('all', None, None, min(self.max_tasks, self.max_queued_tasks)) self.ccs[cc.id] = cc else: for cc in self.ccs.values(): cc.optimizer = self.copy() cc.max_queued_tasks = max(cc.max_queued_tasks / len(self.ccs), 1) # Set up intial model/compute class assignments. self.assign_to_ccs() self.start = time.time() completed_tasks = 0 try: # Run the search until timeout or until all tasks complete while not self.done: # Generate hyperparameters and a flag to continue or stop if self.manager.hungry(pending_tasks=self.ready_trials, leeway=len(self.ccs)): for x in range(2 * self.manager.num_workers() - self.ready_trials): self.generate() # Run another task and await results result = self.manager.run_task() if result is not None: # If a task returned post-process as a success or fail if len(result) == 3: self.success(*result) # Store and move on completed_tasks += 1 else: self.failure(*result) # Resubmit if asked # Checkpoint the results to file or DB at some frequency if self.trial_count % self.save_frequency == 0: self.save() self.save() # If requested, continue the loop until all tasks return if self.await_pending: while not self.manager.empty(): result = self.manager.run_task() if result is not None: if len(result) == 3: self.success(*result) else: self.failure(*result) self.save() # On keyboard interrupt, save any results and clean up except KeyboardInterrupt: if hasattr(self, '__tmpdir') and self.__tmpdir is not None: os.rmdir(self.__tmpdir) self.end = time.time() # Save the results and print the optimal set of parameters to screen self.save() self.summary() return self.to_dataframes()
def run(self): """Search hyperparameter values on remote workers. Generate and evaluate hyperparameters using the selected task manager and search strategy. Hyperparameters will be evaluated until timeout, and the optimal set will be printed to screen. Notes ----- If `self.await_pending` is True, Shadho will continue to evaluate hyperparameters in the queue without generating new hyperparameter values. This will continue until the queue is empty and all tasks have returned. """ # Set up the task manager as defined in `shadho.managers` if not hasattr(self, 'manager'): self.manager = create_manager(manager_type=self.config.manager, config=self.config, tmpdir=self.__tmpdir) # Set up the backend hyperparameter generation and database # if not isinstance(self.backend, FMin): # self.backend = FMin(self.exp_key, self.spec, self.method, # self.backend, max_evals=self.max_evals) # If no ComputeClass was created, create a dummy class. if len(self.ccs) == 0: cc = ComputeClass('all', None, None, self.max_queued_tasks, super()) self.ccs[cc.id] = cc else: for cc in self.ccs.values(): cc.optimizer = self.copy() # Set up intial model/compute class assignments. self.assign_to_ccs() start = time.time() elapsed = 0 exhausted = False try: # Run the search until timeout or until all tasks complete while elapsed < self.timeout and not exhausted and ( elapsed == 0 or not self.manager.empty()): # Generate hyperparameters and a flag to continue or stop stop = self.generate() if not stop: # Run another task and await results result = self.manager.run_task() if result is not None: # If a task returned post-process as a success or fail if len(result) == 3: self.success(*result) # Store and move on else: self.failure(*result) # Resubmit if asked # Checkpoint the results to file or DB at some frequency # if self.backend.trial_count % self.save_frequency == 0: if self.trial_count % self.save_frequency == 0: # self.backend.save() self.save() # Update the time for timeout check elapsed = time.time() - start # exhausted = all([ss.done for ss in self.backend.searchspaces]) exhausted = all([ss.done for ss in self.searchspaces]) else: break # self.backend.save() self.save() # If requested, continue the loop until all tasks return if self.await_pending: while not self.manager.empty(): result = self.manager.run_task() if result is not None: if len(result) == 3: self.success(*result) else: self.failure(*result) # self.backend.save() self.save() # On keyboard interrupt, save any results and clean up except KeyboardInterrupt: if hasattr(self, '__tmpdir') and self.__tmpdir is not None: os.rmdir(self.__tmpdir) # Save the results and print the optimal set of parameters to screen # self.backend.save() self.save() # self.backend.summary() self.summary() # return self.backend.to_dataframes() return self.to_dataframes()