def objects_safe(cls, *args, **kwargs): for attempt in range(5): try: return cls.objects(*args, **kwargs) except pymongo.errors.AutoReconnect as e: wait_t = 0.5 * pow(2, attempt) # exponential back off l.warning( "PyMongo auto-reconnecting... %s. Waiting %.1f seconds.", str(e), wait_t) time.sleep(wait_t)
def __call__(self, result): curr_value = int(result.func_vals[-1]) best_value = int(np.min(result.func_vals)) result_test, result_list_test = self.ep.model.evaluate( self.ep.datasets, tags='TEST') result_val, result_list_val = self.ep.last_eval_result, self.ep.last_eval_result_list if curr_value == best_value: self.mongo_experiment.best_evaluation_on_test = result_test.mongo_object self.mongo_experiment.best_evaluation_on_val = result_val.mongo_object self.mongo_experiment.best_point = dict( zip([d.name for d in self.space], [float(x) for x in result.x])) point = Point( step = self.step, evaluation_on_test = result_test.mongo_object_with_deals, evaluation_on_val = result_val.mongo_object, detailed_evaluation_on_val = [r.mongo_object for r in result_list_val], detailed_evaluation_on_test = [r.mongo_object for r in result_list_test], coordinates = dict(zip([d.name for d in self.space], [float(x) for x in result.x_iters[-1]])) , experiment = self.mongo_experiment, test_days=result_test.days, test_mean=result_test.mean, test_std=result_test.std, test_deals_per_day=result_test.deals_per_day, test_diff=result_test.diff, test_min=result_test.min, test_max=result_test.max, test_total=result_test.total, val_days=result_val.days, val_mean=result_val.mean, val_std=result_val.std, val_deals_per_day=result_val.deals_per_day, val_diff=result_val.diff, val_min=result_val.min, val_max=result_val.max, val_total=result_val.total, clf_params=getattr(self.ep.model.clf, 'best_params_', None), fine_tuned=False ) point.save() self.mongo_experiment.points.append(point) self.mongo_experiment.updated_at = datetime.now(timezone.utc) try: self.mongo_experiment.save() except DocumentTooLarge: # if doc is too large then let's decrease it's size l.warning('Got document too large. Trying to decrease size of the doc') for p in self.mongo_experiment.points: p.detailed_evaluation_on_val = None self.mongo_experiment.save() self.step += 1
def _load_self_from_mongo_object(self, obj: ModelCollection): if not self.obj is None: raise BadLogic('Cannot load model instance twice') self.obj = obj model_ensemble = getattr(obj, 'model_ensemble', None) if model_ensemble is None or len(obj.model_ensemble) == 0: raise MalformedModel('Model has to have "model_ensemble" field') if len(obj.model_ensemble) == 1: l.warning( f'Strange ensemble, that has only one model. Obj.id={obj.id}') self.models = [] i = 0 for model_obj in model_ensemble: self.models.append( ModelInterface.load_from_mongo_object(model_obj)) i += 1 l.debug( f'Successfully loaded ensemble model, that has {i} members. obj.id={obj.id}' )
def fine_tune_experiment(exp_obj_id, model_space, top_mean=0, top_diff=0, remove_prev_results=False, minimal_deals_per_day=0.2, search_iterations=80): l.debug(f'Fine tuning experiment. Space is {model_space}') model_param_distributions = {} for k, v in model_space.items(): if v['type'] == 'int': model_param_distributions[k] = sp_randint(v['bounds'][0], v['bounds'][1]) if top_mean == 0 and top_diff == 0: l.warning('No finetuning is done. Provide top_profit or top_diff') return if remove_prev_results: for p in mongo.Point.objects_safe(experiment=exp_obj_id, fine_tuned=True): p.delete() already_fine_tuned = [] else: already_fine_tuned = [ p.step for p in mongo.Point.objects_safe(experiment=exp_obj_id, fine_tuned=True).only('step') ] top_mean_points = mongo.Point.objects_safe( experiment=exp_obj_id, fine_tuned__in=[None, False], test_deals_per_day__gt=minimal_deals_per_day, step__nin=already_fine_tuned).order_by('-test_mean').limit(top_mean) top_diff_points = mongo.Point.objects_safe( experiment=exp_obj_id, fine_tuned__in=[None, False], test_deals_per_day__gt=minimal_deals_per_day, step__nin=already_fine_tuned).order_by('-test_diff').limit(top_diff) ec = classes.ExperimentConfiguration() ec.load_from_experiment_step( exp_obj_id, 1, fine_tuned=False) # load initial config that we will update later on datasets = None processed_points = [] for points_set in [top_mean_points, top_diff_points]: for p in points_set: if p.id in processed_points: continue processed_points.append(p.id) ec.adjust_parameters(**p.coordinates) # assume that there are same datasets used within entire experiment if datasets == None: datasets = classes.SKModel.get_datasets_from_exp_config(ec) else: for d in datasets: d.update(update_source=False) model = classes.SKModel(ec) model.train(datasets=datasets, tags='TRAIN', model_param_distributions=model_param_distributions, random_search_iterations=search_iterations) result_test, result_list_test = model.evaluate(datasets, tags='TEST') result_val, result_list_val = model.evaluate(datasets, tags='VAL') point = classes.Point( step=p.step, evaluation_on_test=result_test.mongo_object_with_deals, evaluation_on_val=result_val.mongo_object, detailed_evaluation_on_val=[ r.mongo_object for r in result_list_val ], detailed_evaluation_on_test=[ r.mongo_object for r in result_list_test ], coordinates=p.coordinates, experiment=exp_obj_id, test_days=result_test.days, test_mean=result_test.mean, test_std=result_test.std, test_deals_per_day=result_test.deals_per_day, test_diff=result_test.diff, test_min=result_test.min, test_max=result_test.max, test_total=result_test.total, val_days=result_val.days, val_mean=result_val.mean, val_std=result_val.std, val_deals_per_day=result_val.deals_per_day, val_diff=result_val.diff, val_min=result_val.min, val_max=result_val.max, val_total=result_val.total, clf_params=getattr(model.clf, 'best_params_', None), fine_tuned=True) point.save_safe()