def __init__(
            self, data, target, algos,
            callback=None, test_base_id=None, test_data_id=None,
            evaluator=None, features=None, feature_types=None,
            max_run_time=None, max_model=None, tolerance=None, nfold=None,
            ts_split_split_by=None, ts_split_cv=None, ts_split_train=None,
            ts_split_test=None, seed=None, balance_class=None,
            max_after_balance=None, sampling_factors=None,
            validation_percentage=None, holdout_percentage=None, apu=None,
            preprocessing=None, version=None):

        evaluator = check_is_enum(Evaluator, evaluator)
        algos = [check_is_enum(Algo, algo) for algo in algos]
        self.data = data
        if ts_split_train is None:
            train = None
        else:
            train = CoreBody.CVTrain.create(
                start=ts_split_train['start'], end=ts_split_train['end'])

        if ts_split_test is None:
            test = None
        else:
            test = CoreBody.CVTrain.create(
                start=ts_split_test['start'], end=ts_split_test['end'])

        cv = CoreBody.cv_obj_array(ts_split_cv)

        time_series_split = CoreBody.TimeSeriesSplit.create(
            split_by=ts_split_split_by,
            cv=cv,
            train=train,
            test=test)

        self.train_body = CoreBody.TrainBody.create(
            target=target,
            train_data_id='tmp_data_id',
            algos=algos,
            callback=callback,
            test_base_id=test_base_id,
            test_data_id=test_data_id,
            evaluator=evaluator,
            features=features,
            feature_types=feature_types,
            max_run_time=max_run_time,
            max_model=max_model,
            tolerance=tolerance,
            nfold=nfold,
            time_series_split=time_series_split,
            seed=seed,
            balance_class=balance_class,
            max_after_balance=max_after_balance,
            sampling_factors=sampling_factors,
            validation_percentage=validation_percentage,
            holdout_percentage=holdout_percentage,
            apu=apu,
            preprocessing=preprocessing,
            version=version)
    def __init__(
            self, data, target, datetime_column, forecast_horizon, gap, algorithms=None, feature_types=None,
            callback=None, version='v2', max_iteration=None, generation_size=None,
            mutation_rate=None, crossover_rate=None, tolerance=None, validation_percentage=None,
            holdout_percentage=None, max_model=None, seed=None, evaluator=None,
            max_run_time=None, nfold=None, time_unit=None, numerical_groupby_method=None,
            categorical_groupby_method=None, endogenous_features=None, exogenous_features=None,
            time_groups=None, max_window_for_feature_derivation=None):

        evaluator = check_is_enum(Evaluator, evaluator)
        time_unit = check_is_enum(TimeUnit, time_unit)
        numerical_groupby_method = check_is_enum(NumericalGroupByMethod, numerical_groupby_method)
        categorical_groupby_method = check_is_enum(CategoricalGroupByMethod, categorical_groupby_method)
        self.data = data

        geneticAlgorithm = CoreBody.GeneticAlgorithmParams.create(
            max_iteration=max_iteration,
            generation_size=generation_size,
            mutation_rate=mutation_rate,
            crossover_rate=crossover_rate
        )
        build_spec = CoreBody.BuildSpec.create(
            tolerance=tolerance,
            validation_percentage=validation_percentage,
            max_model=max_model,
            seed=seed,
            evaluator=evaluator,
            max_run_time=max_run_time,
            genetic_algorithm=geneticAlgorithm,
            nfold=nfold,
            algos=algorithms  # user specifies algorithm used in TS analysis
        )
        group_by = CoreBody.TSGroupBy.create(
            time_unit=time_unit,
            numerical_groupby_method=numerical_groupby_method,
            categorical_groupby_method=categorical_groupby_method
        )
        input_spec = CoreBody.InputSpec.create(
            train_data_id='tmp_data_id',
            target=target,
            endogenous_features=endogenous_features,
            exogenous_features=exogenous_features,
            datetime_column=datetime_column,
            forecast_horizon=forecast_horizon,
            gap=gap,
            feature_types=feature_types,
            time_groups=time_groups,
            max_window_for_feature_derivation=max_window_for_feature_derivation,
            group_by=group_by,
            holdout_percentage=holdout_percentage
        )

        self.train_auto_ts_body = CoreBody.TrainAutoTSBody.create(
            callback=callback,
            version=version,
            build_spec=build_spec,
            input_spec=input_spec
        )
    def getPredictParams(self):
        """Using pred_body to create the JSON request body for prediction.

        Returns:
            :obj:`dict`
        """
        if self.select_model == 'best':
            select_model_id = self.experiment.best_model.id
        elif self.select_model == 'model_id':
            if self.select_opt in self.experiment.models:
                select_model_id = self.select_opt
            else:
                logger.error('[%s] Invalid input model ID: %s',
                             self.__class__.__name__, self.select_opt)
                raise ValueError('Invalid input model ID: %s' %
                                 self.select_opt)
        elif self.select_model == 'recommendation':
            self.select_opt = check_is_enum(Evaluator, self.select_opt)
            for rec in self.experiment.recommendations:
                if self.select_opt == rec['evaluator']:
                    select_model_id = rec['model_id']
            if 'select_model_id' not in locals().keys():
                logger.error('[%s] Invalid input metric: %s',
                             self.__class__.__name__, self.select_opt)
                raise ValueError('Invalid input metric: %s' % self.select_opt)
        setattr(self.pred_body, 'data_id', self.data.id)
        setattr(self.pred_body, 'model_id', select_model_id)

        params = json.dumps(self.pred_body.jsonable(),
                            cls=CoreBody.ComplexEncoder)
        params = json.loads(params)
        return params
Beispiel #4
0
    def train_ts(train_input, select_model_by=Evaluator.mse, name=None):
        """Train time series model with data.

        Create a Time Series Experiment Job and scheduled the execution
        in CORO_TASKS list.  Record the Job in JOBS list.

        Args:
            train_input
                (:class:`~decanter.core.core_api.train_input.TrainTSInput`):
                Settings for training.
            name (:obj:`str`, optional): name for train time series action.

        Returns:
            :class:`~decanter.core.jobs.experiment.ExperimentTS` object

        Raises:
            AttributeError: If the function is called without
                :class:`~decanter.core.context.Context` created.
        """
        select_model_by = check_is_enum(Evaluator, select_model_by)
        logger.debug('[Core] Create Train Job')
        exp_ts = ExperimentTS(train_input=train_input,
                              select_model_by=select_model_by,
                              name=name)
        try:
            if Context.LOOP is None:
                raise AttributeError('[Core] event loop is \'NoneType\'')
            task = Context.LOOP.create_task(exp_ts.wait())
            Context.CORO_TASKS.append(task)
            Context.JOBS.append(exp_ts)
        except AttributeError:
            logger.error('[Core] Context not created')
            raise
        return exp_ts
Beispiel #5
0
    def get_attr(metric, score_types, exp_attributes):
        """Get all models metric of score_type in exp_attributes"""
        metric = check_is_enum(Evaluator, metric)
        model_names = []
        model_attr_lists = [[] for i in range(len(score_types))]
        for key, val in exp_attributes.items():
            model_names.append(key)
            for i, score_type in enumerate(score_types):
                model_attr_lists[i].append(val[score_type][metric])

        return model_names, model_attr_lists
Beispiel #6
0
    def __init__(self, train_input, select_model_by=Evaluator.auto, name=None):
        super().__init__(jobs=[train_input.data],
                         task=TrainTask(train_input, name=name),
                         name=gen_id(self.__class__.__name__, name))

        select_model_by = check_is_enum(Evaluator, select_model_by)
        self.train_input = train_input
        self.best_model = Model()
        self.select_model_by = select_model_by
        self.features = None
        self.train_data_id = None
        self.target = None
        self.test_base_id = None
        self.models = None
        self.hyperparameters = None
        self.attributes = None
        self.recommendations = None
        self.options = None
        self.created_at = None
        self.updated_at = None
        self.completed_at = None
    def train(train_input, select_model_by=Evaluator.auto, name=None):
        """Train model with data.

        Create a Experiment Job and scheduled the execution in CORO_TASKS list.
        Record the Job in JOBS list.

        Args:
            train_input
                (:class:`~decanter.core.core_api.train_input.TrainInput`):
                stores the settings for training.
            select_model_by
                (:class:`~decanter.core.enums.evaluators.Evaluator`):
                if predict by trained experiment, how should we select best model
            name (:obj:`str`, optional): name for train action.

        Returns:
            :class:`~decanter.core.jobs.experiment.Experiment` object

        Raises:
            AttributeError: If the function is called without
                :class:`~decanter.core.context.Context` created.
        """
        select_model_by = check_is_enum(Evaluator, select_model_by)
        logger.debug('[Core] Create Train Job')
        exp = Experiment(train_input=train_input,
                         select_model_by=select_model_by,
                         name=name)
        try:
            if Context.LOOP is None:
                raise AttributeError('[Core] event loop is \'NoneType\'')
            task = Context.LOOP.create_task(exp.wait())
            Context.CORO_TASKS.append(task)
            Context.JOBS.append(exp)
        except AttributeError:
            logger.error('[Core] Context not created')
            raise
        return exp