Ejemplos de load_dataset en Python

Lenguaje de programación: Python

Namespace/Package Name: gradient_boosting_model.processing.data_management

Método / Función: load_dataset

Ejemplos en hotexamples.com: 10

Python load_dataset - 10 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de gradient_boosting_model.processing.data_management.load_dataset extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def test_model_prediction_differentials(client):
    test_inputs_df = load_dataset(file_name="test.csv")
    secondary_test_inputs_df = test_inputs_df.rename(
        columns=SECONDARY_VARIABLES_TO_RENAME)

    primary_response = client.post(
        "v1/predictions/primary",
        json=test_inputs_df.to_dict(orient="records"))
    primary_predictions = json.loads(primary_response.data)["predictions"]

    secondary_response = client.post(
        "v1/predictions/secondary",
        json=secondary_test_inputs_df.to_dict(orient="records"),
    )
    secondary_predictions = json.loads(secondary_response.data)["predictions"]

    # We just pass in the first 10 rows as the two models' validation differs
    # which means they filter out a slightly different number of rows
    # which would cause the differential tests to fail.
    compare_differences(
        expected_predictions=secondary_predictions[:10],
        actual_predictions=primary_predictions[:10],
        # you would adjust the rel_tol level parameter on your model.
        # right now this is extremely permissive of variation.
        rel_tol=0.2,
    )

Ejemplo n.º 2

Mostrar archivo

Archivo: conftest.py Proyecto: cbymar/testing-and-monitoring-ml-deployments

def pipeline_inputs():
    # For larger datasets, here we would use a testing sub-sample.
    data = load_dataset(file_name=config.app_config.training_data_file)

    # Divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.model_config.features],  # predictors
        data[config.model_config.target],
        test_size=config.model_config.test_size,
        # we are setting the random seed here
        # for reproducibility
        random_state=config.model_config.random_state,
    )

    return X_train, X_test, y_train, y_test

Ejemplo n.º 3

Mostrar archivo

Archivo: populate_database.py Proyecto: namsri21/testing-and-monitoring-ml-deployments

def populate_database(n_predictions: int = 500, anomaly: bool = False) -> None:
    """
    Manipulate the test data to generate random
    predictions and save them to the database.
    Before running this script, ensure that the
    API and Database docker containers are running.
    """

    print(f"Preparing to generate: {n_predictions} predictions.")

    # Load the gradient boosting test dataset which
    # is included in the model package
    test_inputs_df = load_dataset(file_name="test.csv")
    clean_inputs_df = _prepare_inputs(dataframe=test_inputs_df)
    if len(clean_inputs_df) < n_predictions:
        print(
            f"If you want {n_predictions} predictions, you need to"
            "extend the script to handle more predictions."
        )

    if anomaly:
        # set extremely low values to generate an outlier
        n_predictions = 1
        clean_inputs_df.loc[:, "FirstFlrSF"] = 1
        clean_inputs_df.loc[:, "LotArea"] = 1
        clean_inputs_df.loc[:, "OverallQual"] = 1
        clean_inputs_df.loc[:, "GrLivArea"] = 1

    for index, data in clean_inputs_df.iterrows():
        if index > n_predictions:
            if anomaly:
                print('Created 1 anomaly')
            break

        response = requests.post(
            f"{LOCAL_URL}/v1/predictions/regression",
            headers=HEADERS,
            json=[data.to_dict()],
        )
        response.raise_for_status()

        if index % 50 == 0:
            print(f"{index} predictions complete")

            # prevent overloading the server
            time.sleep(0.5)

    print("Prediction generation complete.")

Ejemplo n.º 4

Mostrar archivo

def test_prediction_endpoint(api_endpoint, expected_no_predictions, client):
    # Given
    # Load the test dataset which is included in the model package
    test_inputs_df = load_dataset(file_name="test.csv")  # dataframe
    if api_endpoint == "v1/predictions/secondary":
        # adjust column names to those expected by the secondary model
        test_inputs_df.rename(columns=SECONDARY_VARIABLES_TO_RENAME,
                              inplace=True)

    # When
    response = client.post(api_endpoint,
                           json=test_inputs_df.to_dict(orient="records"))

    # Then
    assert response.status_code == 200
    data = json.loads(response.data)
    assert data["errors"] is None
    assert len(data["predictions"]) == expected_no_predictions

Ejemplo n.º 5

Mostrar archivo

def test_prediction_validation(field, field_value, index, expected_error,
                               client):
    # Given
    # Load the test dataset which is included in the model package
    test_inputs_df = load_dataset(file_name="test.csv")  # dataframe

    # Check gradient_boosting_model.processing.validation import HouseDataInputSchema
    # and you will see the expected values for the inputs to the house price prediction
    # model. In this test, inputs are changed to incorrect values to check the validation.
    test_inputs_df.loc[index, field] = field_value

    # When
    response = client.post("/v1/predictions/primary",
                           json=test_inputs_df.to_dict(orient="records"))

    # Then
    assert response.status_code == 400
    data = json.loads(response.data)
    assert data == expected_error

Ejemplo n.º 6

Mostrar archivo

Archivo: train_pipeline.py Proyecto: CaioMar/mlops

def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.app_config.training_data_file)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.model_config.features],  # predictors
        data[config.model_config.target],
        test_size=config.model_config.test_size,
        # we are setting the random seed here
        # for reproducibility
        random_state=config.model_config.random_state,
    )

    pipeline.price_pipe.fit(X_train, y_train)

    _logger.warning(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)

Ejemplo n.º 7

Mostrar archivo

Archivo: conftest.py Proyecto: cbymar/testing-and-monitoring-ml-deployments

def sample_input_data():
    """easy access to the test data, which is referenced in yaml"""
    return load_dataset(file_name=config.app_config.test_data_file)

Ejemplo n.º 8

Mostrar archivo

Archivo: conftest.py Proyecto: cbymar/testing-and-monitoring-ml-deployments

def raw_training_data():
    # For larger datasets, here we would use a testing sub-sample.
    return load_dataset(file_name=config.app_config.training_data_file)

Ejemplo n.º 9

Mostrar archivo

def sample_input_data():
    return load_dataset(file_name=config.app_config.test_data_file)

Ejemplo n.º 10

Mostrar archivo

def test_inputs_df():
    # Load the gradient boosting test dataset which
    # is included in the model package
    test_inputs_df = load_dataset(file_name="test.csv")
    return test_inputs_df.copy(deep=True)