コード例 #1
0
ファイル: dataset_test.py プロジェクト: bcebere/ite-api
def test_dataset_twins_load(train_ratio: float,
                            downsample: Optional[int]) -> None:
    # Data Input (11400 patients, 30 features, 2 potential outcomes)

    total = 11400
    feat_count = 30
    outcomes = 2

    [Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = ds.load(
        "twins",
        train_ratio,
        downsample=downsample,
    )

    train_cnt = int(total * train_ratio)
    test_cnt = total - train_cnt
    if downsample:
        train_cnt = min(downsample, train_cnt)
        test_cnt = min(downsample, test_cnt)

    assert Train_X.shape == (train_cnt, feat_count)
    assert Train_T.shape == (train_cnt, )
    assert Train_Y.shape == (train_cnt, )
    assert Opt_Train_Y.shape == (train_cnt, outcomes)
    assert Test_X.shape == (test_cnt, feat_count)
    assert Test_Y.shape == (test_cnt, outcomes)
コード例 #2
0
def test_unified_api_cmgp() -> None:
    train_ratio = 0.8

    dataset = ds.load(
        "twins",
        train_ratio,
        downsample=1000,
    )
    [Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataset

    dim = len(Train_X[0])
    dim_outcome = Test_Y.shape[1]

    model = Model(
        "CMGP",
        dim=dim,
        dim_outcome=dim_outcome,
        max_gp_iterations=50,
    )
    assert model is not None

    metrics = model.train(*dataset)
    metrics.print()

    predicted = model.predict(Test_X)
    assert predicted.shape == (Test_X.shape[0], 2)

    test_metrics = model.test(Test_X, Test_Y)
    test_metrics.print()
コード例 #3
0
def search(algorithm: str, iterations: int = 2000) -> List[Any]:
    assert algorithm in ["GANITE", "GANITE_TORCH"]

    # load dataset
    dataset = ds.load("twins", 0.8)
    [Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataset

    dim = len(Train_X[0])
    dim_outcome = Test_Y.shape[1]

    # define the space of hyperparameters to search
    search_space = list()
    search_space.append(Integer(3, 10, name="num_discr_iterations"))
    search_space.append(Categorical([32, 64, 128, 256], name="minibatch_size"))
    search_space.append(
        Categorical(
            [dim, int(dim / 2),
             int(dim / 3),
             int(dim / 4),
             int(dim / 5)],
            name="dim_hidden",
        ))
    search_space.append(Categorical([0, 0.1, 0.5, 1, 2, 5, 10], name="alpha"))
    search_space.append(Categorical([0, 0.1, 0.5, 1, 2, 5, 10], name="beta"))
    search_space.append(Integer(1, 9, name="depth"))

    # define the function used to evaluate a given configuration
    @use_named_args(search_space)
    def evaluate_model(**params: Any) -> float:
        # configure the model with specific hyperparameters
        model_class: Any
        if algorithm == "GANITE":
            model_class = Ganite
        elif algorithm == "GANITE_TORCH":
            model_class = GaniteTorch
        else:
            raise Exception(f"model not supported {model_class}")

        model = model_class(
            dim,
            dim_outcome,
            num_iterations=iterations,
            **params,
        )

        model.train(*dataset)
        test_metrics = model.test(Test_X, Test_Y)
        return test_metrics.sqrt_PEHE()

    # perform optimization
    result = gp_minimize(evaluate_model, search_space)

    return result.x
コード例 #4
0
def test_ganite_torch_short_training(
    plt: Any,
    iterations: int,
    num_discr_iterations: int,
    alpha: float,
    beta: float,
    batch_size: int,
    depth: int,
    dim_hidden: int,
) -> None:
    train_ratio = 0.8

    dataset = ds.load("twins", train_ratio)

    [Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataset

    dim = len(Train_X[0])
    dim_hidden = dim if dim_hidden == 0 else dim_hidden
    dim_outcome = Test_Y.shape[1]

    model = alg.GaniteTorch(
        dim,
        dim_outcome,
        dim_hidden=dim_hidden,
        num_iterations=iterations,
        alpha=alpha,
        beta=beta,
        minibatch_size=batch_size,
        depth=depth,
        num_discr_iterations=num_discr_iterations,
    )
    assert model is not None

    metrics = model.train(*dataset)

    metrics.print()

    try:
        metrics.plot(plt, thresholds=[0.2, 0.25, 0.3, 0.35])
    except BaseException as e:
        print("failed to plot(maybe rerun with --plots):", e)

    predicted = model.predict(Test_X)

    assert predicted.shape == (Test_X.shape[0], 2)

    test_metrics = model.test(Test_X, Test_Y)
    test_metrics.print()

    print("Top 5 worst errors ", Test_X[test_metrics.worst_mistakes()])

    assert 0.2 < test_metrics.sqrt_PEHE() and test_metrics.sqrt_PEHE() < 0.4
コード例 #5
0
def test_cmgp_short_training(plt: Any, ) -> None:
    train_ratio = 0.8

    [Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = ds.load(
        "twins",
        train_ratio,
        downsample=1000,
    )

    dim = len(Train_X[0])
    dim_outcome = Test_Y.shape[1]

    model = alg.CMGP(
        dim=dim,
        dim_outcome=dim_outcome,
        max_gp_iterations=50,
    )
    assert model is not None

    for experiment in range(4):
        dataset = ds.load(
            "twins",
            train_ratio,
            downsample=1000,
        )

        metrics = model.train(*dataset)

        test_metrics = model.test(Test_X, Test_Y)
        assert 0.2 < test_metrics.sqrt_PEHE() and test_metrics.sqrt_PEHE(
        ) < 0.4

    metrics.print()
    try:
        metrics.plot(plt, with_ci=True, thresholds=[0.2, 0.25, 0.3, 0.35])
    except BaseException as e:
        print("failed to plot(maybe rerun with --plots):", e)
コード例 #6
0
def test_unified_api_ganite(ganite_ver: str) -> None:
    train_ratio = 0.8
    dataset = ds.load("twins", train_ratio)
    [Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataset

    dim = len(Train_X[0])
    dim_hidden = dim
    dim_outcome = Test_Y.shape[1]

    model = Model(
        ganite_ver,
        dim,
        dim_outcome,
        dim_hidden=dim_hidden,
        num_iterations=10,
        alpha=2,
        beta=2,
        minibatch_size=128,
        depth=2,
        num_discr_iterations=4,
    )
    assert model.core.minibatch_size == 128
    assert model.core.alpha == 2
    assert model.core.beta == 2
    assert model.core.depth == 2
    assert model.core.num_iterations == 10
    assert model.core.num_discr_iterations == 4

    metrics = model.train(*dataset)
    metrics.print()

    predicted = model.predict(Test_X)
    assert predicted.shape == (Test_X.shape[0], 2)

    test_metrics = model.test(Test_X, Test_Y)
    test_metrics.print()
コード例 #7
0
# Import depends
import ite.algs.ganite_torch.model as alg
import ite.datasets as ds
import ite.utils.numpy as utils

# Double check that we are using the correct interpreter.
print(sys.executable)

# ## Load the Dataset
#
# Next, we load the Twins dataset, process the data, and sample a training set and a test set.
#

train_ratio = 0.8

dataset = ds.load("twins", train_ratio)
[Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataset

pd.DataFrame(data=Train_X[:5])

# ## Load the model
#
# Next, we define the model.
#
#
# The constructor supports the following parameters:
#  - `dim`: The number of features in X.
#  - `dim_outcome`: The number of potential outcomes.
#  - `dim_hidden`: hyperparameter for tuning the size of the hidden layer.
#  - `depth`: hyperparameter for the number of hidden layers in the generator and inference blocks.
#  - `num_iterations`: hyperparameter for the number of training epochs.
コード例 #8
0
ファイル: dataset_test.py プロジェクト: bcebere/ite-api
def test_sanity() -> None:
    with pytest.raises(BaseException):
        ds.load("test")
コード例 #9
0
import ite.datasets as ds

# Double check that we are using the correct interpreter.
print(sys.executable)

# ## Load the Dataset
#
# The example is done using the Twins dataset.
#
# Next, we load the dataset, process the data, and sample a training set and a test set.
#
# For CGMP, we have to downsample to 1000 training items. For the rest, we load without downsampling.

train_ratio = 0.8

full_dataloader = ds.load("twins", train_ratio)
cmgp_dataloader = ds.load("twins", train_ratio, downsample=1000)


# ## Load and train GANITE(Tensorflow version)
#
# The constructor requires the name of the chosen algorithm for the first parameter - `GANITE`.
#
# The constructor supports the same parameters as the "native" version:
#  - `dim`: The number of features in X.
#  - `dim_outcome`: The number of potential outcomes.
#  - `dim_hidden`: hyperparameter for tuning the size of the hidden layer.
#  - `depth`: hyperparameter for the number of hidden layers in the generator and inference blocks.
#  - `num_iterations`: hyperparameter for the number of training epochs.
#  - `alpha`: hyperparameter used for the Generator block loss.
#  - `beta`: hyperparameter used for the ITE block loss.
コード例 #10
0
# Double check that we are using the correct interpreter.
print(sys.executable)

# Disable TF logging
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# ## Load the Dataset
#
# The example is done using the Twins dataset.
#
# Next, we load the dataset, process the data, and sample a training set and a test set.

train_ratio = 0.8

dataloader = ds.load("twins", train_ratio)
[Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataloader


pd.DataFrame(data=Train_X[:5])


# ## Load the model
#
# Next, we define the model.
#
#
# The constructor supports the following parameters:
#  - `dim`: The number of features in X.
#  - `dim_outcome`: The number of potential outcomes.
#  - `dim_hidden`: hyperparameter for tuning the size of the hidden layer.
コード例 #11
0
import ite.algs.causal_multitask_gaussian_processes.model as alg
import ite.datasets as ds
import ite.utils.numpy as utils

# Double check that we are using the correct interpreter.
print(sys.executable)

# ## Load the Dataset
#
# The example is done using the Twins dataset.
#
# __Important__: For CGMP, we have to downsample the dataset to 1000 training items.

train_ratio = 0.8

dataset = ds.load("twins", train_ratio, downsample=1000)
[Train_X, Train_T, Train_Y, Opt_Train_Y, Test_X, Test_Y] = dataset


# ## Load the model
#
# Next, we define the model.
#
#
# The constructor supports the following parameters:
#  - `dim`: The number of features in X.
#  - `dim_outcome`: The number of potential outcomes.
#  - `max_gp_iterations`: Maximum number of GP iterations before stopping the training.

dim = len(Train_X[0])
dim_outcome = Test_Y.shape[1]