Esempio n. 1
0
 def objects_safe(cls, *args, **kwargs):
     for attempt in range(5):
         try:
             return cls.objects(*args, **kwargs)
         except pymongo.errors.AutoReconnect as e:
             wait_t = 0.5 * pow(2, attempt)  # exponential back off
             l.warning(
                 "PyMongo auto-reconnecting... %s. Waiting %.1f seconds.",
                 str(e), wait_t)
             time.sleep(wait_t)
Esempio n. 2
0
    def __call__(self, result):
        curr_value = int(result.func_vals[-1])
        best_value = int(np.min(result.func_vals))
        
        result_test, result_list_test = self.ep.model.evaluate(
            self.ep.datasets, tags='TEST')
        result_val, result_list_val = self.ep.last_eval_result, self.ep.last_eval_result_list

        if curr_value == best_value:
            self.mongo_experiment.best_evaluation_on_test = result_test.mongo_object
            self.mongo_experiment.best_evaluation_on_val = result_val.mongo_object
            self.mongo_experiment.best_point = dict(
                zip([d.name for d in self.space], [float(x) for x in result.x]))

        point = Point(
                step = self.step, 
                evaluation_on_test = result_test.mongo_object_with_deals,
                evaluation_on_val = result_val.mongo_object,
                detailed_evaluation_on_val = [r.mongo_object for r in result_list_val],
                detailed_evaluation_on_test = [r.mongo_object for r in result_list_test],
                coordinates = dict(zip([d.name for d in self.space], [float(x) for x in result.x_iters[-1]])) ,
                experiment = self.mongo_experiment,
                test_days=result_test.days,
                test_mean=result_test.mean,
                test_std=result_test.std,
                test_deals_per_day=result_test.deals_per_day,
                test_diff=result_test.diff,
                test_min=result_test.min,
                test_max=result_test.max,
                test_total=result_test.total,
                val_days=result_val.days,
                val_mean=result_val.mean,
                val_std=result_val.std,
                val_deals_per_day=result_val.deals_per_day,
                val_diff=result_val.diff,
                val_min=result_val.min,
                val_max=result_val.max,
                val_total=result_val.total,
                clf_params=getattr(self.ep.model.clf, 'best_params_', None),
                fine_tuned=False
                )
        point.save()

        self.mongo_experiment.points.append(point)
        self.mongo_experiment.updated_at = datetime.now(timezone.utc)
        try: 
            self.mongo_experiment.save()
        except DocumentTooLarge: # if doc is too large then let's decrease it's size
            l.warning('Got document too large. Trying to decrease size of the doc')
            for p in self.mongo_experiment.points:
                p.detailed_evaluation_on_val = None
            self.mongo_experiment.save()
            
        self.step += 1
Esempio n. 3
0
    def _load_self_from_mongo_object(self, obj: ModelCollection):
        if not self.obj is None:
            raise BadLogic('Cannot load model instance twice')
        self.obj = obj
        model_ensemble = getattr(obj, 'model_ensemble', None)
        if model_ensemble is None or len(obj.model_ensemble) == 0:
            raise MalformedModel('Model has to have "model_ensemble" field')
        if len(obj.model_ensemble) == 1:
            l.warning(
                f'Strange ensemble, that has only one model. Obj.id={obj.id}')

        self.models = []
        i = 0
        for model_obj in model_ensemble:
            self.models.append(
                ModelInterface.load_from_mongo_object(model_obj))
            i += 1

        l.debug(
            f'Successfully loaded ensemble model, that has {i} members. obj.id={obj.id}'
        )
def fine_tune_experiment(exp_obj_id,
                         model_space,
                         top_mean=0,
                         top_diff=0,
                         remove_prev_results=False,
                         minimal_deals_per_day=0.2,
                         search_iterations=80):

    l.debug(f'Fine tuning experiment. Space is {model_space}')
    model_param_distributions = {}
    for k, v in model_space.items():
        if v['type'] == 'int':
            model_param_distributions[k] = sp_randint(v['bounds'][0],
                                                      v['bounds'][1])

    if top_mean == 0 and top_diff == 0:
        l.warning('No finetuning is done. Provide top_profit or top_diff')
        return

    if remove_prev_results:
        for p in mongo.Point.objects_safe(experiment=exp_obj_id,
                                          fine_tuned=True):
            p.delete()
        already_fine_tuned = []
    else:
        already_fine_tuned = [
            p.step
            for p in mongo.Point.objects_safe(experiment=exp_obj_id,
                                              fine_tuned=True).only('step')
        ]

    top_mean_points = mongo.Point.objects_safe(
        experiment=exp_obj_id,
        fine_tuned__in=[None, False],
        test_deals_per_day__gt=minimal_deals_per_day,
        step__nin=already_fine_tuned).order_by('-test_mean').limit(top_mean)

    top_diff_points = mongo.Point.objects_safe(
        experiment=exp_obj_id,
        fine_tuned__in=[None, False],
        test_deals_per_day__gt=minimal_deals_per_day,
        step__nin=already_fine_tuned).order_by('-test_diff').limit(top_diff)

    ec = classes.ExperimentConfiguration()
    ec.load_from_experiment_step(
        exp_obj_id, 1,
        fine_tuned=False)  # load initial config that we will update later on
    datasets = None
    processed_points = []
    for points_set in [top_mean_points, top_diff_points]:
        for p in points_set:
            if p.id in processed_points: continue
            processed_points.append(p.id)

            ec.adjust_parameters(**p.coordinates)

            # assume that there are same datasets used within entire experiment
            if datasets == None:
                datasets = classes.SKModel.get_datasets_from_exp_config(ec)
            else:
                for d in datasets:
                    d.update(update_source=False)

            model = classes.SKModel(ec)
            model.train(datasets=datasets,
                        tags='TRAIN',
                        model_param_distributions=model_param_distributions,
                        random_search_iterations=search_iterations)

            result_test, result_list_test = model.evaluate(datasets,
                                                           tags='TEST')
            result_val, result_list_val = model.evaluate(datasets, tags='VAL')

            point = classes.Point(
                step=p.step,
                evaluation_on_test=result_test.mongo_object_with_deals,
                evaluation_on_val=result_val.mongo_object,
                detailed_evaluation_on_val=[
                    r.mongo_object for r in result_list_val
                ],
                detailed_evaluation_on_test=[
                    r.mongo_object for r in result_list_test
                ],
                coordinates=p.coordinates,
                experiment=exp_obj_id,
                test_days=result_test.days,
                test_mean=result_test.mean,
                test_std=result_test.std,
                test_deals_per_day=result_test.deals_per_day,
                test_diff=result_test.diff,
                test_min=result_test.min,
                test_max=result_test.max,
                test_total=result_test.total,
                val_days=result_val.days,
                val_mean=result_val.mean,
                val_std=result_val.std,
                val_deals_per_day=result_val.deals_per_day,
                val_diff=result_val.diff,
                val_min=result_val.min,
                val_max=result_val.max,
                val_total=result_val.total,
                clf_params=getattr(model.clf, 'best_params_', None),
                fine_tuned=True)
            point.save_safe()