Python balance_data Beispiele

Programmiersprache: Python

Namespace / Paketname: ludwig.data.preprocessing

Methode / Funktion: balance_data

Beispiele auf hotexamples.com: 3

Python balance_data - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die ludwig.data.preprocessing.balance_data, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def test_multiple_class_failure():
    config = {
        "input_features": [
            {"name": "Index", "proc_column": "Index", "type": "number"},
            {"name": "random_1", "proc_column": "random_1", "type": "number"},
            {"name": "random_2", "proc_column": "random_2", "type": "number"},
        ],
        "output_features": [
            {"name": "Label", "proc_column": "Label", "type": "binary"},
            {"name": "Label2", "proc_column": "Label2", "type": "binary"},
        ],
        "preprocessing": {},
    }
    input_df = pd.DataFrame(
        {
            "Index": np.arange(0, 200, 1),
            "random_1": np.random.randint(0, 50, 200),
            "random_2": np.random.choice(["Type A", "Type B", "Type C", "Type D"], 200),
            "Label": np.concatenate((np.zeros(180), np.ones(20))),
            "Label2": np.concatenate((np.zeros(180), np.ones(20))),
            "split": np.zeros(200),
        }
    )

    backend = LocalBackend()

    with pytest.raises(ValueError):
        balance_data(input_df, config["output_features"], config["preprocessing"], backend)

Beispiel #2

Datei anzeigen

def test_balance(method, balance):
    config = {
        "input_features": [
            {"name": "Index", "proc_column": "Index", "type": "number"},
            {"name": "random_1", "proc_column": "random_1", "type": "number"},
            {"name": "random_2", "proc_column": "random_2", "type": "number"},
        ],
        "output_features": [{"name": "Label", "proc_column": "Label", "type": "binary"}],
        "preprocessing": {"oversample_minority": None, "undersample_majority": None},
    }
    input_df = pd.DataFrame(
        {
            "Index": np.arange(0, 200, 1),
            "random_1": np.random.randint(0, 50, 200),
            "random_2": np.random.choice(["Type A", "Type B", "Type C", "Type D"], 200),
            "Label": np.concatenate((np.zeros(180), np.ones(20))),
            "split": np.zeros(200),
        }
    )

    config["preprocessing"][method] = balance
    backend = LocalBackend()

    test_df = balance_data(input_df, config["output_features"], config["preprocessing"], backend)
    target = config["output_features"][0][NAME]
    majority_class = test_df[target].value_counts()[test_df[target].value_counts().idxmax()]
    minority_class = test_df[target].value_counts()[test_df[target].value_counts().idxmin()]
    new_class_balance = round(minority_class / majority_class, 2)

    assert abs(balance - new_class_balance) < BALANCE_PERCENTAGE_TOLERANCE

Beispiel #3

Datei anzeigen

Datei: test_ray.py Projekt: skanjila/ludwig

def test_balance_ray(method, balance):
    config = {
        "input_features": [
            {"name": "Index", "proc_column": "Index", "type": "number"},
            {"name": "random_1", "proc_column": "random_1", "type": "number"},
            {"name": "random_2", "proc_column": "random_2", "type": "number"},
        ],
        "output_features": [{"name": "Label", "proc_column": "Label", "type": "binary"}],
        "preprocessing": {"oversample_minority": None, "undersample_majority": None},
    }
    input_df = pd.DataFrame(
        {
            "Index": np.arange(0, 200, 1),
            "random_1": np.random.randint(0, 50, 200),
            "random_2": np.random.choice(["Type A", "Type B", "Type C", "Type D"], 200),
            "Label": np.concatenate((np.zeros(180), np.ones(20))),
            "split": np.zeros(200),
        }
    )
    config["preprocessing"][method] = balance
    target = config["output_features"][0][NAME]

    with ray_start(num_cpus=2, num_gpus=None):
        backend = create_ray_backend()
        input_df = backend.df_engine.from_pandas(input_df)
        test_df = balance_data(input_df, config["output_features"], config["preprocessing"], backend)

        majority_class = test_df[target].value_counts().compute()[test_df[target].value_counts().compute().idxmax()]
        minority_class = test_df[target].value_counts().compute()[test_df[target].value_counts().compute().idxmin()]
        new_class_balance = round(minority_class / majority_class, 2)

        assert (balance - BALANCE_PERCENTAGE_TOLERANCE) <= new_class_balance
        assert (balance + BALANCE_PERCENTAGE_TOLERANCE) >= new_class_balance