Example #1
0
def make_rf(**kwargs):
    """ Create a random forest model and log it to mlflow """
    data_run_id = kwargs["ti"].xcom_pull(task_ids="process_data", key="run_id")
    client = MlflowClient()
    path = client.download_artifacts(data_run_id, "processed_data")  # Overkill in our case, but imagine they are on different servers, infrastructures

    df = pd.read_csv(path + "/germany.csv", parse_dates=[0], index_col=0)

    X = df[["windspeed", "temperature", "rad_horizontal", "rad_diffuse"]]
    y = df[["solar_GW", "wind_GW"]]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    runs = []
    for n_estimators in [4, 25]:
        for max_depth in [4, 10]:
            with mlflow.start_run(run_name="rf") as run:
                model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)
                model.fit(X_train, y_train)

                y_predict = model.predict(X_test)
                rmse, mae, r2 = eval_metrics(y_test, y_predict)

                mlflow.log_param("n_estimators", n_estimators)  # New
                mlflow.log_param("max_depth", max_depth)  # New
                mlflow.log_metric("rmse", rmse)  # New
                mlflow.log_metric("mae", mae)  # New
                mlflow.log_metric("r2", r2)  # New
                mlflow.sklearn.log_model(model, "model")  # New
                runs.append(run.info.run_id)

    kwargs["ti"].xcom_push(key="run_id", value=runs)
Example #2
0
def download_artifacts(run_id: str,
                       remote_path: Union[str, Path],
                       local_path: Union[str, Path],
                       client: MlflowClient = MlflowClient(),
                       no_cached=False) -> PathLike:
    if os.path.exists(local_path) and no_cached:
        os.remove(Path(local_path, remote_path))
    os.makedirs(local_path, exist_ok=True)
    return client.download_artifacts(run_id=run_id,
                                     path=remote_path,
                                     dst_path=local_path)
Example #3
0
def make_lr(**kwargs):
    """ Create a linear regression model and log it to mlflow """
    data_run_id = kwargs["ti"].xcom_pull(task_ids="process_data", key="run_id")
    client = MlflowClient()
    path = client.download_artifacts(data_run_id, "processed_data")  # Overkill in our case, but imagine they are on different servers, infrastructures

    df = pd.read_csv(path + "/germany.csv", parse_dates=[0], index_col=0)
    X = df[["windspeed", "temperature", "rad_horizontal", "rad_diffuse"]]
    y = df[["solar_GW", "wind_GW"]]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    with mlflow.start_run(run_name="lr") as run:
        model = LinearRegression()
        model.fit(X_train, y_train)

        y_predict = model.predict(X_test)
        rmse, mae, r2 = eval_metrics(y_test, y_predict)

        mlflow.log_metric("rmse", rmse)  # New
        mlflow.log_metric("mae", mae)  # New
        mlflow.log_metric("r2", r2)  # New
        mlflow.sklearn.log_model(model, "model")  # New

        kwargs["ti"].xcom_push(key="run_id", value=[run.info.run_id])