コード例 #1
0
ファイル: cli.py プロジェクト: recruit-tech/drctrl
def frozen(
        ctx,
        credential,  # shared options
        config_path):
    config = ctx.obj['config']
    cmd = 'frozen'

    if not config.is_valid(cmds=[cmd]):
        raise Exception('configuration file error')

    project = Project(**config.get_params(cmd='environment'))
    if not hasattr(project, 'dr_project') or project.dr_project is None:
        raise NotSpecifiedValidParam(
            'project id is not supplied. you should run build command befor, or apply command'
        )

    model_id = config.get_param(cmd, param='model_id')

    with section('Freezing model'):
        if model_id is None:
            sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                                     project.validation_type,
                                                     project.validation_params)
            model = project.search_models(sort_key=sort_key)[0]
            model_id = model.id
        config.set_params(cmd, params={'model_id': model_id})

        new_model_id = project.frozen(**config.get_params(cmd))
        ctx.obj['model_id'] = new_model_id

    click.echo('frozen command is succeed.')
    click.echo(f"'{model_id}' is frozen -> new model_id: {new_model_id}")
コード例 #2
0
ファイル: cli.py プロジェクト: recruit-tech/drctrl
def fit(
        ctx,
        credential,  # shared options
        config_path):
    config = ctx.obj['config']
    cmd = 'fit'

    if not config.is_valid(cmds=[cmd]):
        raise Exception('configuration file error')

    project = Project(**config.get_params(cmd='environment'))
    if not hasattr(project, 'dr_project') or project.dr_project is None:
        raise NotSpecifiedValidParam(
            'project id is not supplied. you should run build command befor, or apply command'
        )

    with section("Training"):
        featurelist_name, model_type = project.fit(**config.get_params(cmd))

        # set params for successor process
        ctx.obj['featurelist_name'] = featurelist_name
        if model_type == 'autopilot':
            sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                                     project.validation_type,
                                                     project.validation_params)
            ctx.obj['model_id'] = project.search_models(
                featurelist_name=featurelist_name, sort_key=sort_key)[0].id
        else:
            ctx.obj['model_id'] = config.get_param(cmd, param='model_id')

    click.echo("fit command is succeed.")
    click.echo(f"'{model_type}' with featurelist: '{featurelist_name}'")
    click.echo(f"model_id: {ctx.obj['model_id']}")
コード例 #3
0
 def output(self, df):
     if not hasattr(self.io_instance, 'output'):
         raise TypeError(
             f"{plugin_type} plugin does not have output  method")
     try:
         return self.io_instance.output(df)
     except Exception as e:
         raise NotSpecifiedValidParam('confirm input setting')
コード例 #4
0
    def to_df(self):
        if not hasattr(self.io_instance, 'to_df'):
            raise TypeError(f"{plugin_type} plugin does not have to_df method")

        try:
            return self.io_instance.to_df()
        except Exception as e:
            raise NotSpecifiedValidParam('confirm input setting')
コード例 #5
0
def get_plugin_class(io_type, plugin_type):
    modules = pkgutil.iter_modules(path=[Path(__file__).parent])

    target_module = None

    for loader, mod_name, ispkg in modules:
        if plugin_type in mod_name and io_type in mod_name:
            target_module = mod_name
    try:
        module = importlib.import_module(f"drctrl.plugins.{target_module}")
    except ModuleNotFoundError:
        raise NotSpecifiedValidParam(
            f"Not found '{io_type}_ {plugin_type}' module")

    return getattr(module, f"{io_type.capitalize()}{plugin_type.capitalize()}")
コード例 #6
0
    def set_target_and_run_autopilot(self, target_feature, metric):
        metrics_list = self.dr_project.get_metrics(
            feature_name=target_feature)['available_metrics']
        if metric not in metrics_list:
            raise NotSpecifiedValidParam(
                f"metric should be specified from {metrics_list}")

        spec = self.get_partition_spec(method=self.cv_method,
                                       _type=self.validation_type,
                                       params=self.validation_params)

        self.dr_project.set_target(target=target_feature,
                                   metric=metric,
                                   mode=self.autopilot,
                                   partitioning_method=spec)

        return True
コード例 #7
0
ファイル: cli.py プロジェクト: recruit-tech/drctrl
def predict(
        ctx,
        credential,  # shared options
        config_path):
    cmd = 'predict'
    config = ctx.obj['config']

    if not config.is_valid(cmds=[cmd]):
        raise Exception('configuration file error')

    project = Project(**config.get_params(cmd='environment'))
    if not hasattr(project, 'dr_project') or project.dr_project is None:
        raise NotSpecifiedValidParam(
            'project id is not supplied. you should run build command befor, or apply command'
        )

    prediction_column = Utils.get_appropriate_prediction_column(
        project.dr_project.target_type)

    params = config.get_params(cmd)
    model_id = config.get_param(cmd, param='model_id')

    with section('Upload dataset'):
        input_df = IOManager(io_type='input',
                             io_params=config.get_param(
                                 cmd, param='input')).to_df()
        dataset = project.upload_dataset(input_df)
        click.echo(
            f"#rows: {dataset.num_rows}, #columns: {dataset.num_columns}")

    with section('Prediction'):
        # auto select best result
        if model_id is None:
            sort_key = Utils.get_appropriate_dataset(project.cv_method,
                                                     project.validation_type,
                                                     project.validation_params)
            model = project.search_models(sort_key=sort_key)[0]
            model_id = model.id

        predictions = project.predict(model_id=model_id, dataset_id=dataset.id)

        if config.get_param(cmd, param='feature_impact') or config.get_param(
                cmd, param='reasoncode'):
            feature_impacts = project.get_feature_impact(model_id)
            feature_impacts = pd.DataFrame(feature_impacts)
            # feature_impacts.to_csv(f"feature_impact_{model_id}.csv", index=False)

        if config.get_param(cmd, 'reasoncode'):
            max_codes = config.get_param(cmd, param='max_codes') or 3

            rc_job = project.get_reasoncode_job(model_id,
                                                dataset.id,
                                                max_codes=max_codes)

            # TODO: parameterize in config
            time_to_wait_for_reasoncode = 20 * 60  # sec
            reasoncode = rc_job.get_result_when_complete(
                time_to_wait_for_reasoncode)
            reasoncodes = pd.DataFrame(reasoncode.get_all_as_dataframe())
            reasoncodes.drop(columns=[prediction_column], inplace=True)
            predictions = pd.merge(left=predictions,
                                   right=reasoncodes,
                                   on='row_id')

    if config.get_param(cmd, param='merge_origin'):
        predictions = merge_dataset(predictions, input_df)

    with section('Output result'):
        IOManager(io_type='output',
                  io_params=config.get_param(
                      cmd, param='output')).output(predictions)

    with section('clearning'):
        if config.get_param(cmd, param='del_dataset') is not False:
            project.delete_dataset(dataset.id)