コード例 #1
0
ファイル: nodes.py プロジェクト: krprls/kedro-mlflow-example
def train_model(train_x: pd.DataFrame, train_y: pd.DataFrame,
                parameters: Dict[str, Any]) -> np.ndarray:
    """Node for training a simple multi-class logistic regression model. The
    number of training iterations as well as the learning rate are taken from
    conf/project/parameters.yml. All of the data as well as the parameters
    will be provided to this function at the time of execution.
    """
    num_iter = parameters["example_num_train_iter"]
    lr = parameters["example_learning_rate"]
    X = train_x.values
    Y = train_y.values

    # Add bias to the features
    bias = np.ones((X.shape[0], 1))
    X = np.concatenate((bias, X), axis=1)

    weights = []
    # Train one model for each class in Y
    for k in range(Y.shape[1]):
        # Initialise weights
        theta = np.zeros(X.shape[1])
        y = Y[:, k]
        for _ in range(num_iter):
            z = np.dot(X, theta)
            h = _sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            theta -= lr * gradient
        # Save the weights for each model
        weights.append(theta)

    # Return a joint multi-class model with weights for all classes
    model = np.vstack(weights).transpose()
    sklearn.log_model(sk_model=model, artifact_path="model")
    return model
コード例 #2
0
 def test_model_log(self):
     old_uri = mlflow.get_tracking_uri()
     # should_start_run tests whether or not calling log_model() automatically starts a run.
     for should_start_run in [False, True]:
         with TempDir(chdr=True, remove_on_exit=True) as tmp:
             try:
                 mlflow.set_tracking_uri("test")
                 if should_start_run:
                     mlflow.start_run()
                 artifact_path = "linear"
                 conda_env = os.path.join(tmp.path(), "conda_env.yaml")
                 _mlflow_conda_env(conda_env, additional_pip_deps=["sklearn"])
                 sklearn.log_model(sk_model=self._linear_lr,
                                   artifact_path=artifact_path,
                                   conda_env=conda_env)
                 x = sklearn.load_model(artifact_path, run_id=mlflow.active_run().info.run_uuid)
                 model_path = _get_model_log_dir(
                         artifact_path, mlflow.active_run().info.run_uuid)
                 model_config = Model.load(os.path.join(model_path, "MLmodel"))
                 assert pyfunc.FLAVOR_NAME in model_config.flavors
                 assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
                 env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
                 assert os.path.exists(os.path.join(model_path, env_path))
                 xpred = x.predict(self._X)
                 np.testing.assert_array_equal(self._linear_lr_predict, xpred)
             finally:
                 mlflow.end_run()
                 mlflow.set_tracking_uri(old_uri)
コード例 #3
0
ファイル: train.py プロジェクト: zp672087110/ml-engine
def train_and_validate(train, valid, model='lm'):
    """train a model and evaluate it with train and valid data
    """

    for i in (train, valid):
        assert isinstance(i, pd.DataFrame), \
            'Error: data must be pandas.DataFrame.'

    train = train.values.astype('f8')
    valid = valid.values.astype('f8')

    x_train, y_train = train[:, :-1], train[:, -1]
    x_valid, y_valid = valid[:, :-1], valid[:, -1]

    pipeline = train_pipeline(x_train, y_train, model)

    r2_train, rmsle_train = calculate_metrics(x_train, y_train, pipeline)
    r2_valid, rmsle_valid = calculate_metrics(x_valid, y_valid, pipeline)

    set_tracking_uri(DEFAULT_URI)
    log_metric('r2_train', r2_train)
    log_metric('r2_valid', r2_valid)
    log_metric('rmsle_train', rmsle_train)
    log_metric('rmsle_valid', rmsle_valid)
    log_model(pipeline, 'pipeline_' + model)

    return pipeline, r2_train, rmsle_train, r2_valid, rmsle_valid
コード例 #4
0
ファイル: nodes.py プロジェクト: Minyus/kedro-mlflow-demo
def train_model(train_x: pd.DataFrame, train_y: pd.DataFrame,
                parameters: Dict[str, Any]) -> sklearn_Pipeline:
    """Node for training a simple multi-class logistic regression model. The
    number of training iterations as well as the learning rate are taken from
    conf/project/parameters.yml. All of the data as well as the parameters
    will be provided to this function at the time of execution.
    """
    # Build a multi-class logistic regression model
    model_params = parameters['model_params']
    model = LogisticRegression(**model_params)

    if parameters['model_standard_scaler']:
        # Prepare column transformer to do scaling
        col_transformer = ColumnTransformer(
            [
                (
                    'standard_scaler_sl',
                    StandardScaler(),
                    ["sepal_length"],
                ),
                (
                    'standard_scaler_sw',
                    StandardScaler(),
                    ["sepal_width"],
                ),
                (
                    'standard_scaler_pl',
                    StandardScaler(),
                    ["petal_length"],
                ),
                (
                    'standard_scaler_pw',
                    StandardScaler(),
                    ["petal_width"],
                ),
            ],
            remainder='passthrough',
        )

        # Make pipeline w/ scaler
        model_pipeline = sklearn_Pipeline(steps=[
            ('col_transformer', col_transformer),
            ('model', model),
        ])
    else:
        # Make pipeline w/o scaler
        model_pipeline = sklearn_Pipeline(steps=[
            ('model', model),
        ])

    # Fit
    model_pipeline.fit(train_x, train_y)

    mlflow_sklearn.log_model(sk_model=model_pipeline, artifact_path="model")
    mlflow.log_params(model_params)

    return model_pipeline
コード例 #5
0
 def test_model_log(self):
     with TempDir(chdr=True, remove_on_exit=True):
         tracking.start_run()
         try:
             sklearn.log_model(sk_model=self._linear_lr,
                               artifact_path="linear")
             x = sklearn.load_model(
                 "linear", run_id=tracking.active_run().info.run_uuid)
             xpred = x.predict(self._X)
             np.testing.assert_array_equal(self._linear_lr_predict, xpred)
         finally:
             tracking.end_run()
コード例 #6
0
 def test_model_log(self):
     old_uri = tracking.get_tracking_uri()
     # should_start_run tests whether or not calling log_model() automatically starts a run.
     for should_start_run in [False, True]:
         with TempDir(chdr=True, remove_on_exit=True) as tmp:
             try:
                 tracking.set_tracking_uri("test")
                 if should_start_run:
                     tracking.start_run()
                 sklearn.log_model(sk_model=self._linear_lr, artifact_path="linear")
                 x = sklearn.load_model("linear", run_id=tracking.active_run().info.run_uuid)
                 xpred = x.predict(self._X)
                 np.testing.assert_array_equal(self._linear_lr_predict, xpred)
             finally:
                 tracking.end_run()
                 tracking.set_tracking_uri(old_uri)
コード例 #7
0
def train(training_pandasData, test_pandasData, label_col, feat_cols, n_trees,
          m_depth, learning_rate, loss, training_data_path, test_data_path):

    print("train:                 " + training_data_path)
    print("test:                  " + test_data_path)
    print("n_trees:              ", n_trees)
    print("m-depth:              ", m_depth)
    print("learning-rate:        ", learning_rate)
    print("loss:                  " + loss)
    print("label-col:             " + label_col)
    for feat in feat_cols:
        print("feat-cols:             " + feat)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandasData[label_col]
    trainingFeatures = training_pandasData[feat_cols]

    testLabels = test_pandasData[label_col]
    testFeatures = test_pandasData[feat_cols]

    # We will use a GBT regressor model.
    xgbr = xgb.XGBRegressor(max_depth=m_depth,
                            learning_rate=learning_rate,
                            n_estimators=n_trees)

    # Here we train the model
    xgbr.fit(trainingFeatures, trainingLabels, eval_metric=loss)

    # Calculating the scores of the model.
    test_rmse = mean_squared_error(testLabels, xgbr.predict(testFeatures))**0.5
    r2_score_training = xgbr.score(trainingFeatures, trainingLabels)
    r2_score_test = xgbr.score(testFeatures, testLabels)

    print("Test RMSE:", test_rmse)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    # Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_score_training)
    mlflow.log_metric("Test R2", r2_score_test)

    # Saving the model as an artifact.
    sklearn.log_model(xgbr, "model")

    run_id = mlflow.active_run().info.run_uuid
    print("Run with id %s finished" % run_id)
コード例 #8
0
def train(training_pandas_data, test_pandas_data, label_col, feat_cols, alpha,
          l1_ratio, training_data_path, test_data_path):

    print("train:         " + training_data_path)
    print("test:          " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use a linear Elastic Net model.
    en = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the scores of the model.
    test_rmse = mean_squared_error(testLabels, en.predict(testFeatures))**0.5
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = en.score(testFeatures, testLabels)
    print("Test RMSE:", test_rmse)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_score_training)
    mlflow.log_metric("Test R2", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")

    run_id = mlflow.active_run().info.run_uuid
    print("Run with id %s finished" % run_id)
コード例 #9
0
ファイル: train_linear.py プロジェクト: tsandh/mlflow-apps
def train(training_pandas_data, test_pandas_data, label_col, 
          feat_cols, alpha, l1_ratio, max_iter, tol, training_data_path, test_data_path):

    print("train:         " + training_data_path)
    print("test:          " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("max_iter:     ", max_iter)
    print("tol:     ", tol)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use an SGD model.
    en = SGDRegressor(alpha=alpha, l1_ratio=l1_ratio, warm_start=True, max_iter=max_iter, tol=tol)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the scores of the model.
    test_rmse = mean_squared_error(testLabels, en.predict(testFeatures))**0.5
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = en.score(testFeatures, testLabels)
    print("Test RMSE:", test_rmse)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_score_training)
    mlflow.log_metric("Test R2", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")
コード例 #10
0
ファイル: train_linear.py プロジェクト: smurching/mlflow-apps
def train(training_pandas_data, test_pandas_data, label_col, feat_cols, alpha,
          l1_ratio, training_data_path, test_data_path):

    print("training-data-path:    " + training_data_path)
    print("test-data-path:        " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use a linear Elastic Net model.
    en = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the score of the model.
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = 0
    r2_score_test = en.score(testFeatures, testLabels)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the r2 score for both sets.
    mlflow.log_metric("R2 score for training set", r2_score_training)
    mlflow.log_metric("R2 score for test set", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")

    run_id = mlflow.tracking.active_run().info.run_uuid
    print("Run with id %s finished" % run_id)
コード例 #11
0
    tracking_uri = get_tracking_uri()
    if tracking_uri.startswith("http://"):
        store = RestStore({'hostname': tracking_uri})
        metric_obj = store.get_metric(run.info.run_uuid, "my_metric_name")
        metric_history = store.get_metric_history(run.info.run_uuid,
                                                  "my_metric_name")
        param_obj = store.get_param(run.info.run_uuid, "my_param")
        print("Got metric %s, %s" % (metric_obj.key, metric_obj.value))
        print("Got param %s, %s" % (param_obj.key, param_obj.value))
        print("Got metric history %s" % metric_history)
    local_dir = tempfile.mkdtemp()
    message = "test artifact written during run %s within artifact URI %s\n" \
              % (active_run().info.run_uuid, get_artifact_uri())
    try:
        file_path = os.path.join(local_dir, "some_output_file.txt")
        with open(file_path, "w") as handle:
            handle.write(message)
        log_artifacts(local_dir, "some_subdir")
        log_artifact(file_path, "another_dir")
    finally:
        shutil.rmtree(local_dir)

    X = np.array([-2, -1, 0, 1, 2, 1]).reshape(-1, 1)
    y = np.array([0, 0, 1, 1, 1, 0])
    lr = LogisticRegression()
    lr.fit(X, y)
    score = lr.score(X, y)
    log_metric("logistic_regression_score", score)
    log_model(lr, "model")
    print("Model saved in run %s" % active_run().info.run_uuid)
コード例 #12
0
from mlflow.sklearn import log_model
import mlflow

if __name__ == "__main__":
    remote_server_uri = 'http://127.0.0.1:1234'  # set to your server URI

    # Make sure to also set environment variable MLFLOW_TRACKING_URI='remote_server_uri'
    # see: https://github.com/mlflow/mlflow/issues/608#issuecomment-454316004
    mlflow.set_tracking_uri(remote_server_uri)
    conda_env = 'log_reg.yaml'

    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    with mlflow.start_run():
        logreg = LogisticRegression(C=1e5)
        logreg.fit(x_train, y_train)
        predictions = logreg.predict(x_test)

        log_metric('acc', accuracy_score(y_test, predictions))
        log_model(sk_model=logreg,
                  registered_model_name='LogisticReg-Iris-Model',
                  artifact_path='model_artifact',
                  conda_env=conda_env)
コード例 #13
0
import os
from mlflow.sklearn import save_model, log_model

mlflow.set_tracking_uri(os.environ["MLFLOW_HOST"])
mlflow.set_experiment("iris-exp")

# Injest data
iris = load_iris()

# Prepare training data
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

with mlflow.start_run():
    # Train model
    n_estimators = 150
    mlflow.log_param("n_estimators", n_estimators)
    clf = RandomForestClassifier(n_estimators=n_estimators)
    clf.fit(X_train, y_train)

    # Evaluate
    y_pred = clf.predict(X_test)

    score = metrics.accuracy_score(y_test, y_pred)
    print(f"Accuracy: {score}")
    mlflow.log_metric(key="accuracy", value=score)

    # Save model
    # save_model(clf, 'iris_model') uncomment if you want to save localy
    log_model(clf, 'iris_model')
コード例 #14
0
ファイル: train.py プロジェクト: petra-rglv/mlflow_demo
from mlflow.sklearn import log_model

from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

if __name__ == "__main__":
    # Load the Data
    data = pd.read_csv('Iris.csv')
    X = data.drop(['Id', 'Species'], axis=1)
    y = data['Species']

    # Train Test split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        random_state=5)
    # Train model
    rfc = RandomForestClassifier(n_estimators=50)
    rfc.fit(X_train, y_train)

    # Accuracy score
    y_pred = rfc.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy score: %.2f" % accuracy)
    # Track the run
    log_param("n_estimators", rfc.n_estimators)
    log_metric("accuracy", accuracy)
    log_model(rfc, "rfc_model")

    print("Model saved in run %s" % mlflow.active_run().info.run_uuid)
コード例 #15
0
mlflow.log_metric("r_mape_xgb", r_mape_xgb)
mlflow.log_metric("r_mae_xgb",r_mae_xgb)
mlflow.log_metric("r_mse_xgb",r_mse_xgb)
mlflow.log_metric("r_msle_xgb",r_msle_xgb)
mlflow.log_metric("r_r2_xgb",r_r2_xgb)

print("Building the final model...")
#result for csv
result = pd.concat([train_csv,test_csv])
#result for txt
#result_txt = np.concatenate((X_txt_train, X_txt_test))
y = result['price']
#csv
regr.fit(result[features], y)
svm.fit(result[features], y)
xgboost.fit(result[features], y)
#txt
#regr.fit(result_txt, y)
#svm.fit(result_txt, y)
#xgboost.fit(result_txt, y)

joblib.dump(regr,city+"_rf.pkl")
log_model(regr, "model_rf")

joblib.dump(svm,city+"_svm.pkl")
log_model(svm, "model_svm")

joblib.dump(xgboost,city+"_xgb.pkl")
log_model(xgboost, "model_xgb")

コード例 #16
0
x = df[['sepal_length']]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=2)


MAX_ITER = 2

REMOTE_MLFLOW_SERVER = os.environ['REMOTE_TRACKING_SERVER']
mlflow.set_tracking_uri(REMOTE_MLFLOW_SERVER)

try:
    mlflow.create_experiment("iris_lr")
except:
    print('The experiment may already exist.')

mlflow.set_experiment("iris_lr")

with mlflow.start_run(nested=True):

    log_param("MAX_ITER", MAX_ITER)
    
    clf = LogisticRegression(max_iter=MAX_ITER)
    clf.fit(x_train, y_train)

    y_pred = clf.predict(x_test)
    acc = accuracy_score(y_test, y_pred)

    log_metric("Accuracy", acc)
    log_model(clf, "Model")

print(classification_report(y_test, y_pred, target_names=le.classes_))
コード例 #17
0
        'classification',
        'classification',
        'classification',
    ]
})

helper_features = ['ew_'+str(x) for x in range(2,14)] + \
                  ['lw_distinct_series', 'lw_distinct_episodes'] + \
                  ['genre_share_drama', 'genre_share_comedy', 'genre_share_sport', 'genre_share_music']

# Create the missing value imputer to learn sensible substitute values
mvi = missing_values.missing_value_imputer(impute_strategies=impute_strategies,
                                           helper_features=helper_features)

sklearn.save_model(mvi, f"missing_value_imputer-{date.today()}")
sklearn.log_model(mvi, f"missing_value_imputer-{date.today()}")

# Train the imputer on df (the training set)
df_impute = mvi.train(df)

# Replacing missing value columns in fresh data with imputed columns
non_imputed = [
    c for c in df.columns if c not in mvi.impute_strategies.colname.values
]
df = pd.concat([df[non_imputed], df_impute], axis=1)

# Pickle the imputer for use with model scoring
utils.pickler(mvi, pickle_dir + '/prep/missing_value_imputer')

print("Missing value imputation completed\n")
コード例 #18
0
def evaluate_model(config_path: Text):

    pipeline_config = yaml.load(open(config_path), Loader=yaml.FullLoader)
    config = pipeline_config.get('evaluate')

    logger = get_logger(name='EVALUATE MODEL', loglevel=pipeline_config.get('base').get('loglevel'))
    logger.debug(f'Start evaluation...')

    EXPERIMENT_NAME = pipeline_config['base']['experiments']['name']
    MLFLOW_TRACKING_URI = pipeline_config['base']['MLFLOW_TRACKING_URI']
    target_column = pipeline_config['dataset_build']['target_column']
    test_dataset = get_dataset(pipeline_config['split_train_test']['test_csv'])
    model_name = pipeline_config['base']['experiments']['model_name']
    models_folder = pipeline_config['base']['experiments']['models_folder']

    model = joblib.load(os.path.join(models_folder, model_name))
    logger.debug(f'Model {model}')

    # Get X and Y
    y_test = test_dataset.loc[:, target_column].values.astype("float32")
    X_test = test_dataset.drop(target_column, axis=1).values
    X_test = X_test.astype("float32")

    scores = model.predict(X_test)

    f1 = f1_score(y_true=y_test, y_pred=scores, average='macro')
    cm = confusion_matrix(scores, y_test)

    test_report = {
        'f1_score': f1,
        'confusion_matrix': cm.tolist()
    }
    test_report_filepath = os.path.join(pipeline_config['base']['experiments']['experiments_folder'],
                            config['metrics_report'])
    json.dump(obj=test_report, fp=open(test_report_filepath, 'w'), indent=2)
    logger.debug(f'Test report: {test_report}')

    species = test_dataset['species'].unique().tolist()
    plt = plot_confusion_matrix(cm, species, normalize=False)
    confusion_matrix_filepath = os.path.join(pipeline_config['base']['experiments']['experiments_folder'],
                            f'{EXPERIMENT_NAME}_confusion_matrix.svg')
    plt.savefig(confusion_matrix_filepath)

    # Set tracking URI

    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
    mlflow.tracking.get_tracking_uri()
    client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

    if client.get_experiment_by_name(EXPERIMENT_NAME):
        pass
    else:
        client.create_experiment(EXPERIMENT_NAME)

    experiment_id = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id


    with mlflow.start_run(experiment_id=experiment_id) as run:

        logger.debug(f'Start logging for Experiment run: {run}')
        logger.debug(run.info)
        logger.debug(run.info.run_uuid)

        log_param(key='estimator', value=pipeline_config['train']['estimator_name'])
        log_param(key='cv', value=pipeline_config['train']['cv'])

        log_metric(key='f1_score', value=f1)
        log_artifact(local_path=confusion_matrix_filepath)
        log_artifact(local_path=test_report_filepath)
        log_model(model, artifact_path=model_name)

    logger.debug(f'Metrics and artefacts logged to MLPanel')
コード例 #19
0
ファイル: example3.py プロジェクト: mlrepa/mlpanel
            out_stats.write(statistics.SerializeToString())

        mlflow.log_artifact(DATASET)
        mlflow.log_artifact(TARGET_LABELED_DATASET)
        mlflow.log_artifact(IRIS_STATISTICS)

        train, test = train_test_split(dataset, test_size=0.2, random_state=42)

        X_train = train.drop(TARGET_COLUMN, axis=1).astype('float32')
        y_train = train[TARGET_COLUMN].astype('int32')

        X_test = test.drop(TARGET_COLUMN, axis=1).astype('float32')
        y_test = test[TARGET_COLUMN].astype('int32')

        svc_model = SVC(C=args.C, kernel=args.kernel)

        # Create an instance of Logistic Regression Classifier and fit the data.
        svc_model.fit(X_train, y_train)

        mlflow.log_params(svc_model.get_params())

        prediction = svc_model.predict(X_test)

        f1 = f1_score(y_test, prediction, average='macro')

        mlflow.log_metric('f1', f1)

        log_model(svc_model, 'model')

        mlflow.register_model(f'runs:/{run.info.run_uuid}/model', f'{experiment_name}Model')
コード例 #20
0
ファイル: nodes.py プロジェクト: mbloem/kedro-mlflow-demo
def train_model(train_x: pd.DataFrame, train_y: pd.DataFrame,
                parameters: Dict[str, Any]) -> sklearn_Pipeline:
    """Node for training a simple multi-class logistic regression model. The
    number of training iterations as well as the learning rate are taken from
    conf/project/parameters.yml. All of the data as well as the parameters
    will be provided to this function at the time of execution.
    """
    # Build a multi-class logistic regression model
    model_params = parameters['model_params']
    model = LogisticRegression(**model_params)

    if parameters['model_standard_scaler']:
        # Prepare column transformer to do scaling
        col_transformer = ColumnTransformer(
            [
                (
                    'standard_scaler',
                    StandardScaler(copy=False),
                    [
                        "sepal_length",
                        "sepal_width",
                        "petal_length",
                        "petal_width",
                    ],
                ),
            ],
            remainder='drop',
        )

        # Make pipeline w/ scaler
        model_pipeline = sklearn_Pipeline(steps=[
            ('col_transformer', col_transformer),
            ('model', model),
        ])
    else:
        # Make pipeline w/o scaler
        model_pipeline = sklearn_Pipeline(steps=[
            ('model', model),
        ])

    # Fit
    model_pipeline.fit(train_x, train_y)

    mlflow.set_experiment('iris-example')
    mlflow_sklearn.log_model(sk_model=model_pipeline, artifact_path="model")
    mlflow.log_params(model_params)

    # Print out the model pipeline
    # See: http://www.xavierdupre.fr/app/mlinsights/helpsphinx/notebooks/visualize_pipeline.html
    dot = pipeline2dot(model_pipeline, train_x)
    dot_filename = 'pipeline_dot.dot'
    with open(dot_filename, 'w', encoding='utf-8') as f:
        f.write(dot)
    if sys.platform.startswith("win") and "Graphviz" not in os.environ["PATH"]:
        os.environ['PATH'] = os.environ[
            'PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin'
    cmd = "dot -G=300 -Tpng {0} -o{0}.png".format(dot_filename)
    run_cmd(cmd, wait=True, fLOG=print)
    mlflow.log_artifact('{0}.png'.format(dot_filename), 'model')

    return model_pipeline
コード例 #21
0
import mlflow.pyfunc
import mlflow
from mlflow.sklearn import log_model, save_model
from sklearn.svm import LinearSVC

model = LinearSVC()

path = 'linear_svc'

log_model(model, artifact_path=path)
コード例 #22
0
        mlflow.log_artifact(DATASET)
        mlflow.log_artifact(TARGET_LABELED_DATASET)
        mlflow.log_artifact(IRIS_STATISTICS)

        train, test = train_test_split(dataset, test_size=0.2, random_state=42)

        X_train = train.drop(TARGET_COLUMN, axis=1).astype('float32')
        y_train = train[TARGET_COLUMN].astype('int32')

        X_test = test.drop(TARGET_COLUMN, axis=1).astype('float32')
        y_test = test[TARGET_COLUMN].astype('int32')

        logreg = LogisticRegression(C=args.C,
                                    solver=args.solver,
                                    multi_class='multinomial')
        # Create an instance of Logistic Regression Classifier and fit the data.
        logreg.fit(X_train, y_train)

        mlflow.log_params(logreg.get_params())

        prediction = logreg.predict(X_test)
        f1 = f1_score(y_test, prediction, average='macro')

        mlflow.log_metric('f1', f1)

        log_model(logreg, 'model')

        mlflow.register_model(f'runs:/{run.info.run_uuid}/model',
                              f'{experiment_name}Model')