Python train_test_splitの例

プログラミング言語: Python

名前空間/パッケージ名: ray.air

メソッド/関数: train_test_split

hotexamples.comのコード掲載数: 3

Python train_test_split - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのray.air.train_test_splitの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def get_datasets(a=5, b=10, size=1000, split=0.8) -> Tuple[Dataset]:
    def get_dataset(a, b, size) -> Dataset:
        items = [i / size for i in range(size)]
        dataset = ray.data.from_items([{
            "x": x,
            "y": a * x + b
        } for x in items])
        return dataset

    dataset = get_dataset(a, b, size)

    train_dataset, validation_dataset = train_test_split(dataset,
                                                         split,
                                                         shuffle=True)
    return train_dataset, validation_dataset

コード例 #2

ファイルを表示

ファイル: xgboost_batch_prediction.py プロジェクト: parasj/ray

import ray
from ray.data.preprocessors import StandardScaler
from ray.air import train_test_split
from ray.train.batch_predictor import BatchPredictor
from ray.train.xgboost import XGBoostTrainer, XGBoostPredictor
from ray.air.config import ScalingConfig

# Split data into train and validation.
dataset = ray.data.read_csv(
    "s3://anonymous@air-example-data/breast_cancer.csv")
train_dataset, valid_dataset = train_test_split(dataset, test_size=0.3)
test_dataset = valid_dataset.drop_columns(["target"])

columns_to_scale = ["mean radius", "mean texture"]
preprocessor = StandardScaler(columns=columns_to_scale)

trainer = XGBoostTrainer(
    label_column="target",
    num_boost_round=20,
    scaling_config=ScalingConfig(num_workers=2),
    params={
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    },
    datasets={"train": train_dataset},
    preprocessor=preprocessor,
)
result = trainer.fit()

# You can also create a checkpoint from a trained model using
# `XGBoostCheckpoint.from_model`.

コード例 #3

ファイルを表示

def test_train_test_split(ray_start_4_cpus):
    ds = ray.data.range(8)

    # float
    train, test = train_test_split(ds, test_size=0.25)
    assert train.take() == [0, 1, 2, 3, 4, 5]
    assert test.take() == [6, 7]

    # int
    train, test = train_test_split(ds, test_size=2)
    assert train.take() == [0, 1, 2, 3, 4, 5]
    assert test.take() == [6, 7]

    # shuffle
    train, test = train_test_split(ds, test_size=0.25, shuffle=True, seed=1)
    assert train.take() == [5, 7, 6, 3, 0, 4]
    assert test.take() == [2, 1]

    # error handling
    with pytest.raises(TypeError):
        train_test_split(ds, test_size=[1])

    with pytest.raises(ValueError):
        train_test_split(ds, test_size=-1)

    with pytest.raises(ValueError):
        train_test_split(ds, test_size=0)

    with pytest.raises(ValueError):
        train_test_split(ds, test_size=1.1)

    with pytest.raises(ValueError):
        train_test_split(ds, test_size=9)