def frozen( ctx, credential, # shared options config_path): config = ctx.obj['config'] cmd = 'frozen' if not config.is_valid(cmds=[cmd]): raise Exception('configuration file error') project = Project(**config.get_params(cmd='environment')) if not hasattr(project, 'dr_project') or project.dr_project is None: raise NotSpecifiedValidParam( 'project id is not supplied. you should run build command befor, or apply command' ) model_id = config.get_param(cmd, param='model_id') with section('Freezing model'): if model_id is None: sort_key = Utils.get_appropriate_dataset(project.cv_method, project.validation_type, project.validation_params) model = project.search_models(sort_key=sort_key)[0] model_id = model.id config.set_params(cmd, params={'model_id': model_id}) new_model_id = project.frozen(**config.get_params(cmd)) ctx.obj['model_id'] = new_model_id click.echo('frozen command is succeed.') click.echo(f"'{model_id}' is frozen -> new model_id: {new_model_id}")
def fit( ctx, credential, # shared options config_path): config = ctx.obj['config'] cmd = 'fit' if not config.is_valid(cmds=[cmd]): raise Exception('configuration file error') project = Project(**config.get_params(cmd='environment')) if not hasattr(project, 'dr_project') or project.dr_project is None: raise NotSpecifiedValidParam( 'project id is not supplied. you should run build command befor, or apply command' ) with section("Training"): featurelist_name, model_type = project.fit(**config.get_params(cmd)) # set params for successor process ctx.obj['featurelist_name'] = featurelist_name if model_type == 'autopilot': sort_key = Utils.get_appropriate_dataset(project.cv_method, project.validation_type, project.validation_params) ctx.obj['model_id'] = project.search_models( featurelist_name=featurelist_name, sort_key=sort_key)[0].id else: ctx.obj['model_id'] = config.get_param(cmd, param='model_id') click.echo("fit command is succeed.") click.echo(f"'{model_type}' with featurelist: '{featurelist_name}'") click.echo(f"model_id: {ctx.obj['model_id']}")
def output(self, df): if not hasattr(self.io_instance, 'output'): raise TypeError( f"{plugin_type} plugin does not have output method") try: return self.io_instance.output(df) except Exception as e: raise NotSpecifiedValidParam('confirm input setting')
def to_df(self): if not hasattr(self.io_instance, 'to_df'): raise TypeError(f"{plugin_type} plugin does not have to_df method") try: return self.io_instance.to_df() except Exception as e: raise NotSpecifiedValidParam('confirm input setting')
def get_plugin_class(io_type, plugin_type): modules = pkgutil.iter_modules(path=[Path(__file__).parent]) target_module = None for loader, mod_name, ispkg in modules: if plugin_type in mod_name and io_type in mod_name: target_module = mod_name try: module = importlib.import_module(f"drctrl.plugins.{target_module}") except ModuleNotFoundError: raise NotSpecifiedValidParam( f"Not found '{io_type}_ {plugin_type}' module") return getattr(module, f"{io_type.capitalize()}{plugin_type.capitalize()}")
def set_target_and_run_autopilot(self, target_feature, metric): metrics_list = self.dr_project.get_metrics( feature_name=target_feature)['available_metrics'] if metric not in metrics_list: raise NotSpecifiedValidParam( f"metric should be specified from {metrics_list}") spec = self.get_partition_spec(method=self.cv_method, _type=self.validation_type, params=self.validation_params) self.dr_project.set_target(target=target_feature, metric=metric, mode=self.autopilot, partitioning_method=spec) return True
def predict( ctx, credential, # shared options config_path): cmd = 'predict' config = ctx.obj['config'] if not config.is_valid(cmds=[cmd]): raise Exception('configuration file error') project = Project(**config.get_params(cmd='environment')) if not hasattr(project, 'dr_project') or project.dr_project is None: raise NotSpecifiedValidParam( 'project id is not supplied. you should run build command befor, or apply command' ) prediction_column = Utils.get_appropriate_prediction_column( project.dr_project.target_type) params = config.get_params(cmd) model_id = config.get_param(cmd, param='model_id') with section('Upload dataset'): input_df = IOManager(io_type='input', io_params=config.get_param( cmd, param='input')).to_df() dataset = project.upload_dataset(input_df) click.echo( f"#rows: {dataset.num_rows}, #columns: {dataset.num_columns}") with section('Prediction'): # auto select best result if model_id is None: sort_key = Utils.get_appropriate_dataset(project.cv_method, project.validation_type, project.validation_params) model = project.search_models(sort_key=sort_key)[0] model_id = model.id predictions = project.predict(model_id=model_id, dataset_id=dataset.id) if config.get_param(cmd, param='feature_impact') or config.get_param( cmd, param='reasoncode'): feature_impacts = project.get_feature_impact(model_id) feature_impacts = pd.DataFrame(feature_impacts) # feature_impacts.to_csv(f"feature_impact_{model_id}.csv", index=False) if config.get_param(cmd, 'reasoncode'): max_codes = config.get_param(cmd, param='max_codes') or 3 rc_job = project.get_reasoncode_job(model_id, dataset.id, max_codes=max_codes) # TODO: parameterize in config time_to_wait_for_reasoncode = 20 * 60 # sec reasoncode = rc_job.get_result_when_complete( time_to_wait_for_reasoncode) reasoncodes = pd.DataFrame(reasoncode.get_all_as_dataframe()) reasoncodes.drop(columns=[prediction_column], inplace=True) predictions = pd.merge(left=predictions, right=reasoncodes, on='row_id') if config.get_param(cmd, param='merge_origin'): predictions = merge_dataset(predictions, input_df) with section('Output result'): IOManager(io_type='output', io_params=config.get_param( cmd, param='output')).output(predictions) with section('clearning'): if config.get_param(cmd, param='del_dataset') is not False: project.delete_dataset(dataset.id)