예제 #1
0
def cli(recommendations, set_size, journey_id):
    model_path, _, test_path = postgres.get_best_model_paths()
    df_test = s3.read_parquet(test_path)
    model = s3.read_pickle(model_path)

    if not journey_id:
        click.echo("No Journey id specified. Try for example {}".format(
            df_test.sample(5).index.tolist()))
        return
    elif journey_id not in df_test.index.tolist():
        click.echo("Journey ID {} not found. Try for example {}".format(
            journey_id,
            df_test.sample(5).index.tolist()))
        return

    observation = df_test.loc[journey_id, :].copy()
    observation = observation.drop(["ttj_sub_12", "ttj"])
    interv_cols = [col for col in observation.index if "i_" in col[:2]]
    dem_cols = [col for col in observation.index if "d_" in col[:2]]

    click.echo("Journey {} --- Demographics".format(journey_id))
    click.echo("---------------")
    output = observation[dem_cols]
    click.echo(output[output == 1])
    # NOTE: Un-normalize age here.
    # Get maximum age from journey data instead
    click.echo("Age: {}".format(round(output["d_age"] * 78)))
    click.echo("---------------")
    click.echo("Use Model: {}".format(model.__class__.__name__))
    click.echo("---------------")
    observation[interv_cols] = 0
    base_probability = model.predict_proba(observation.to_numpy().reshape(
        1, -1))
    click.echo("Base employment probability {:.4f}".format(
        base_probability[0][1]))
    click.echo("---------------")
    click.echo("Intervention Recommendations".format(journey_id))
    click.echo("---------------")
    df_recs = get_top_recommendations(model,
                                      observation,
                                      set_size=set_size,
                                      n=recommendations)
    click.echo(df_recs)
    def run(self):
        params = yaml.load(open("./conf/base/parameters.yml"),
                           Loader=yaml.FullLoader)["evaluation_params"]

        model = s3.read_pickle(self.input()[0].path)
        model_id, test_path, train_path = get_model_info_by_path(
            self.input()[0].path)

        df_train = s3.read_parquet(train_path)
        df_test = s3.read_parquet(test_path)

        rec_error = get_aggregate_recommendation_error(
            df_train,
            df_test,
            model,
            params["set_size"],
            params["num_recs"],
            params["percent_sample"],
        )
        write_recommendation_eval(get_db_engine(), rec_error, model_id, params)
        self.task_complete = True
    def run(self):
        df_test = s3.read_parquet(self.input()[0][1].path)
        y_test = df_test.loc[:, "ttj_sub_12"]
        X_test = df_test.drop(["ttj", "ttj_sub_12"], axis="columns")

        lg = s3.read_pickle(self.input()[1].path)
        metrics = evaluate(lg, X_test, y_test)

        model_info_to_db(
            engine=get_db_engine(),
            model=lg,
            metrics=metrics,
            features=X_test.columns.tolist(),
            date=self.date,
            model_path=self.input()[1].path,
            train_data_path=self.input()[0][2].path,
            test_data_path=self.input()[0][3].path,
        )
        # NOTE: Set task as completed manually. Use the build-in
        # luigi.contrib.postgres.CopyToTable Task would the right.
        self.task_complete = True