def __init__( self, data, target, algos, callback=None, test_base_id=None, test_data_id=None, evaluator=None, features=None, feature_types=None, max_run_time=None, max_model=None, tolerance=None, nfold=None, ts_split_split_by=None, ts_split_cv=None, ts_split_train=None, ts_split_test=None, seed=None, balance_class=None, max_after_balance=None, sampling_factors=None, validation_percentage=None, holdout_percentage=None, apu=None, preprocessing=None, version=None): evaluator = check_is_enum(Evaluator, evaluator) algos = [check_is_enum(Algo, algo) for algo in algos] self.data = data if ts_split_train is None: train = None else: train = CoreBody.CVTrain.create( start=ts_split_train['start'], end=ts_split_train['end']) if ts_split_test is None: test = None else: test = CoreBody.CVTrain.create( start=ts_split_test['start'], end=ts_split_test['end']) cv = CoreBody.cv_obj_array(ts_split_cv) time_series_split = CoreBody.TimeSeriesSplit.create( split_by=ts_split_split_by, cv=cv, train=train, test=test) self.train_body = CoreBody.TrainBody.create( target=target, train_data_id='tmp_data_id', algos=algos, callback=callback, test_base_id=test_base_id, test_data_id=test_data_id, evaluator=evaluator, features=features, feature_types=feature_types, max_run_time=max_run_time, max_model=max_model, tolerance=tolerance, nfold=nfold, time_series_split=time_series_split, seed=seed, balance_class=balance_class, max_after_balance=max_after_balance, sampling_factors=sampling_factors, validation_percentage=validation_percentage, holdout_percentage=holdout_percentage, apu=apu, preprocessing=preprocessing, version=version)
def __init__( self, data, target, datetime_column, forecast_horizon, gap, algorithms=None, feature_types=None, callback=None, version='v2', max_iteration=None, generation_size=None, mutation_rate=None, crossover_rate=None, tolerance=None, validation_percentage=None, holdout_percentage=None, max_model=None, seed=None, evaluator=None, max_run_time=None, nfold=None, time_unit=None, numerical_groupby_method=None, categorical_groupby_method=None, endogenous_features=None, exogenous_features=None, time_groups=None, max_window_for_feature_derivation=None): evaluator = check_is_enum(Evaluator, evaluator) time_unit = check_is_enum(TimeUnit, time_unit) numerical_groupby_method = check_is_enum(NumericalGroupByMethod, numerical_groupby_method) categorical_groupby_method = check_is_enum(CategoricalGroupByMethod, categorical_groupby_method) self.data = data geneticAlgorithm = CoreBody.GeneticAlgorithmParams.create( max_iteration=max_iteration, generation_size=generation_size, mutation_rate=mutation_rate, crossover_rate=crossover_rate ) build_spec = CoreBody.BuildSpec.create( tolerance=tolerance, validation_percentage=validation_percentage, max_model=max_model, seed=seed, evaluator=evaluator, max_run_time=max_run_time, genetic_algorithm=geneticAlgorithm, nfold=nfold, algos=algorithms # user specifies algorithm used in TS analysis ) group_by = CoreBody.TSGroupBy.create( time_unit=time_unit, numerical_groupby_method=numerical_groupby_method, categorical_groupby_method=categorical_groupby_method ) input_spec = CoreBody.InputSpec.create( train_data_id='tmp_data_id', target=target, endogenous_features=endogenous_features, exogenous_features=exogenous_features, datetime_column=datetime_column, forecast_horizon=forecast_horizon, gap=gap, feature_types=feature_types, time_groups=time_groups, max_window_for_feature_derivation=max_window_for_feature_derivation, group_by=group_by, holdout_percentage=holdout_percentage ) self.train_auto_ts_body = CoreBody.TrainAutoTSBody.create( callback=callback, version=version, build_spec=build_spec, input_spec=input_spec )
def getPredictParams(self): """Using pred_body to create the JSON request body for prediction. Returns: :obj:`dict` """ if self.select_model == 'best': select_model_id = self.experiment.best_model.id elif self.select_model == 'model_id': if self.select_opt in self.experiment.models: select_model_id = self.select_opt else: logger.error('[%s] Invalid input model ID: %s', self.__class__.__name__, self.select_opt) raise ValueError('Invalid input model ID: %s' % self.select_opt) elif self.select_model == 'recommendation': self.select_opt = check_is_enum(Evaluator, self.select_opt) for rec in self.experiment.recommendations: if self.select_opt == rec['evaluator']: select_model_id = rec['model_id'] if 'select_model_id' not in locals().keys(): logger.error('[%s] Invalid input metric: %s', self.__class__.__name__, self.select_opt) raise ValueError('Invalid input metric: %s' % self.select_opt) setattr(self.pred_body, 'data_id', self.data.id) setattr(self.pred_body, 'model_id', select_model_id) params = json.dumps(self.pred_body.jsonable(), cls=CoreBody.ComplexEncoder) params = json.loads(params) return params
def train_ts(train_input, select_model_by=Evaluator.mse, name=None): """Train time series model with data. Create a Time Series Experiment Job and scheduled the execution in CORO_TASKS list. Record the Job in JOBS list. Args: train_input (:class:`~decanter.core.core_api.train_input.TrainTSInput`): Settings for training. name (:obj:`str`, optional): name for train time series action. Returns: :class:`~decanter.core.jobs.experiment.ExperimentTS` object Raises: AttributeError: If the function is called without :class:`~decanter.core.context.Context` created. """ select_model_by = check_is_enum(Evaluator, select_model_by) logger.debug('[Core] Create Train Job') exp_ts = ExperimentTS(train_input=train_input, select_model_by=select_model_by, name=name) try: if Context.LOOP is None: raise AttributeError('[Core] event loop is \'NoneType\'') task = Context.LOOP.create_task(exp_ts.wait()) Context.CORO_TASKS.append(task) Context.JOBS.append(exp_ts) except AttributeError: logger.error('[Core] Context not created') raise return exp_ts
def get_attr(metric, score_types, exp_attributes): """Get all models metric of score_type in exp_attributes""" metric = check_is_enum(Evaluator, metric) model_names = [] model_attr_lists = [[] for i in range(len(score_types))] for key, val in exp_attributes.items(): model_names.append(key) for i, score_type in enumerate(score_types): model_attr_lists[i].append(val[score_type][metric]) return model_names, model_attr_lists
def __init__(self, train_input, select_model_by=Evaluator.auto, name=None): super().__init__(jobs=[train_input.data], task=TrainTask(train_input, name=name), name=gen_id(self.__class__.__name__, name)) select_model_by = check_is_enum(Evaluator, select_model_by) self.train_input = train_input self.best_model = Model() self.select_model_by = select_model_by self.features = None self.train_data_id = None self.target = None self.test_base_id = None self.models = None self.hyperparameters = None self.attributes = None self.recommendations = None self.options = None self.created_at = None self.updated_at = None self.completed_at = None
def train(train_input, select_model_by=Evaluator.auto, name=None): """Train model with data. Create a Experiment Job and scheduled the execution in CORO_TASKS list. Record the Job in JOBS list. Args: train_input (:class:`~decanter.core.core_api.train_input.TrainInput`): stores the settings for training. select_model_by (:class:`~decanter.core.enums.evaluators.Evaluator`): if predict by trained experiment, how should we select best model name (:obj:`str`, optional): name for train action. Returns: :class:`~decanter.core.jobs.experiment.Experiment` object Raises: AttributeError: If the function is called without :class:`~decanter.core.context.Context` created. """ select_model_by = check_is_enum(Evaluator, select_model_by) logger.debug('[Core] Create Train Job') exp = Experiment(train_input=train_input, select_model_by=select_model_by, name=name) try: if Context.LOOP is None: raise AttributeError('[Core] event loop is \'NoneType\'') task = Context.LOOP.create_task(exp.wait()) Context.CORO_TASKS.append(task) Context.JOBS.append(exp) except AttributeError: logger.error('[Core] Context not created') raise return exp