def gridsearch(self, param_grid, **params): """ DEPRECATED. USE OPTIMISE. TO BE TESTED AFTER LATEST CHANGES sklearn gridsearch and report """ log.warning("this needs to be tested") sys.exit() defaults = dict(scoring=self.scorer, cv=None) for k, v in defaults.items(): params.setdefault(k, v) gs = GridSearchCV(self.clf, param_grid=param_grid, **params) gs.fit(self.xtrain, self.ytrain) runsdf = pd.DataFrame([score.parameters for score in gs.grid_scores_]) try: # strip pipeline step name runsdf.columns = [col.split("__")[-1] for col in runsdf.columns] except: pass runsdf["score"] = [score.mean_validation_score for score in gs.grid_scores_] # create runs object so we can report it runs = Runs("runs_gridsearch") for i in range(len(runsdf)): runs.append(runsdf.ix[i].to_dict()) runs.plot() log.info("best model: %s"%gs.best_params_) log.info(gs.best_score_)
def __init__(self, folder="optimise"): self.intparams = [] self.logparams = [] self.fixedparams = dict() self.count = 1 self.runs = Runs(folder, "w")
class Optuner(): """ wrapper for optunity """ def __init__(self, folder="optimise"): self.intparams = [] self.logparams = [] self.fixedparams = dict() self.count = 1 self.runs = Runs(folder, "w") def cleanparams(self, params): """ recursively convert search_space to optunity format convert bools to int list of strings to sub-keys single values to fixed parameters single keys to fixed parameters create for later processing self.fixedparams=list of fixed parameters self.logparams=list of logspace params (10**value) self.intparams=list of keys to be constrained to int remove logparams (special key listing logspace params) """ # logspace params e.g. -3, 3 => 10**-3, 10**3 if not self.logparams: self.logparams = params.pop("logparams", []) for k in list(params.keys()): v = params[k] if isinstance(v, dict): if len(v.keys()) == 1: k2, v2 = list(v.items())[0] if not isinstance(v2, list): self.fixedparams[k] = k2 self.cleanparams(params[k]) continue try: if v and not isinstance(v, list): v = [v] # fixed param if len(set(v)) == 1: v = v[0] if v in self.logparams: self.fixedparams[k] = 10**v else: self.fixedparams[k] = v del params[k] # convert list of strings to sub-entries elif isinstance(v[0], str): params[k] = {v: None for v in v} # convert bool to int elif all([isinstance(v, bool) for v in v]): params[k] = sorted([x*1 for x in v]) self.intparams.append(k) # compile list of integer parameters elif all([isinstance(v, int) for v in v]): self.intparams.append(k) except (TypeError, KeyError): pass def make_target(self, func, **constants): """ make target func has signature score=func(**params) constants fixed for all iterations e.g. x and y """ def target(**params): """ target function for each optunity iteration enables fixed params in search space removes nulls converts intparams to integer converts logparams to logspace updates params with constants logs params, scores params provided by optimiser for each iteration """ starttime = time() # enable fixed parameters that are always the same value params.update(self.fixedparams) # remove k=None (bug in optunity) and v=None (na for this run) params = {k:v for k,v in params.items() if k and v} # convert intparams to integer for k in set(params) & set(self.intparams): params[k] = int(round(params[k])) # convert logparams to logspace i.e. -3, 3 searches .001 to 1000 for k in set(params) & set(self.logparams): params[k] = 10**params[k] # add in constants such as x, y params.update(constants) # report params if self.verbose >= 20: paramsout = ', '.join("{!s}={!r}".format(k,v) for (k,v) in params.items()) log.info("[%s] %s"%(self.count, paramsout)) ######################################### score = func(**params) ######################################### # log results params.update(score=score, elapsed=time()-starttime) for fixed in self.fixedparams: del params[fixed] self.runs.append(params) # report score if self.verbose >= 20: log.info("****** %s"%score) self.count += 1 return score return target def maximise(self, target, num_evals, search=None, verbose=20, **constants): """ maximise target within defined search space target has signature score=target(**params) constants are fixed for every iteration e.g. x and y verbose=20 report every iteration """ self.verbose = verbose if search is None: with open("search.yaml") as f: search = yaml.load(f) #pprint(search_space) self.cleanparams(search) #pprint(search_space) # note ignore return values as using runs instead # runs has functions such as plot, report and correlations optunity.maximize_structured(self.make_target(target, **constants), search_space=search, num_evals=num_evals) self.runs.report()