Example #1
0
def test_model(caplog):
    """Unit test of model."""
    caplog.set_level(logging.INFO)

    dirpath = "temp_test_model"

    Meta.reset()
    init(dirpath)

    def ce_loss(module_name, immediate_output_dict, Y, active):
        return F.cross_entropy(immediate_output_dict[module_name][0][active],
                               (Y.view(-1))[active])

    def output(module_name, immediate_output_dict):
        return F.softmax(immediate_output_dict[module_name][0], dim=1)

    task1 = EmmentalTask(
        name="task_1",
        module_pool=nn.ModuleDict({
            "m1": nn.Linear(10, 10, bias=False),
            "m2": nn.Linear(10, 2, bias=False)
        }),
        task_flow=[
            {
                "name": "m1",
                "module": "m1",
                "inputs": [("_input_", "data")]
            },
            {
                "name": "m2",
                "module": "m2",
                "inputs": [("m1", 0)]
            },
        ],
        loss_func=partial(ce_loss, "m2"),
        output_func=partial(output, "m2"),
        scorer=Scorer(metrics=["accuracy"]),
    )

    new_task1 = EmmentalTask(
        name="task_1",
        module_pool=nn.ModuleDict({
            "m1": nn.Linear(10, 5, bias=False),
            "m2": nn.Linear(5, 2, bias=False)
        }),
        task_flow=[
            {
                "name": "m1",
                "module": "m1",
                "inputs": [("_input_", "data")]
            },
            {
                "name": "m2",
                "module": "m2",
                "inputs": [("m1", 0)]
            },
        ],
        loss_func=partial(ce_loss, "m2"),
        output_func=partial(output, "m2"),
        scorer=Scorer(metrics=["accuracy"]),
    )

    task2 = EmmentalTask(
        name="task_2",
        module_pool=nn.ModuleDict({
            "m1": nn.Linear(10, 5, bias=False),
            "m2": nn.Linear(5, 2, bias=False)
        }),
        task_flow=[
            {
                "name": "m1",
                "module": "m1",
                "inputs": [("_input_", "data")]
            },
            {
                "name": "m2",
                "module": "m2",
                "inputs": [("m1", 0)]
            },
        ],
        loss_func=partial(ce_loss, "m2"),
        output_func=partial(output, "m2"),
        scorer=Scorer(metrics=["accuracy"]),
    )

    config = {"model_config": {"dataparallel": False}}
    Meta.update_config(config)

    model = EmmentalModel(name="test", tasks=task1)

    assert repr(model) == "EmmentalModel(name=test)"
    assert model.name == "test"
    assert model.task_names == set(["task_1"])
    assert model.module_pool["m1"].weight.data.size() == (10, 10)
    assert model.module_pool["m2"].weight.data.size() == (2, 10)

    model.update_task(new_task1)

    assert model.module_pool["m1"].weight.data.size() == (5, 10)
    assert model.module_pool["m2"].weight.data.size() == (2, 5)

    model.update_task(task2)

    assert model.task_names == set(["task_1"])

    model.add_task(task2)

    assert model.task_names == set(["task_1", "task_2"])

    model.remove_task("task_1")
    assert model.task_names == set(["task_2"])

    model.remove_task("task_1")
    assert model.task_names == set(["task_2"])

    model.save(f"{dirpath}/saved_model.pth")

    model.load(f"{dirpath}/saved_model.pth")

    # Test add_tasks
    model = EmmentalModel(name="test")

    model.add_tasks([task1, task2])
    assert model.task_names == set(["task_1", "task_2"])

    shutil.rmtree(dirpath)
Example #2
0
def test_e2e_skip_trained_epoch(caplog):
    """Run an end-to-end test."""
    caplog.set_level(logging.INFO)

    dirpath = "temp_test_e2e_skip_trained"
    use_exact_log_path = True
    Meta.reset()
    init(dirpath, use_exact_log_path=use_exact_log_path)

    # Generate synthetic data
    N = 500
    X = np.random.random((N, 2)) * 2 - 1
    Y = (X[:, 0] > X[:, 1] + 0.25).astype(int)

    X = [torch.Tensor(X[i]) for i in range(N)]
    # Create dataset and dataloader

    X_train, X_dev, X_test = (
        X[:int(0.8 * N)],
        X[int(0.8 * N):int(0.9 * N)],
        X[int(0.9 * N):],
    )
    Y_train, Y_dev, Y_test = (
        torch.tensor(Y[:int(0.8 * N)]),
        torch.tensor(Y[int(0.8 * N):int(0.9 * N)]),
        torch.tensor(Y[int(0.9 * N):]),
    )

    train_dataset = EmmentalDataset(
        name="synthetic",
        X_dict={"data": X_train},
        Y_dict={"label1": Y_train},
    )

    dev_dataset = EmmentalDataset(
        name="synthetic",
        X_dict={"data": X_dev},
        Y_dict={"label1": Y_dev},
    )

    test_dataset = EmmentalDataset(
        name="synthetic",
        X_dict={"data": X_test},
        Y_dict={"label1": Y_test},
    )

    task_to_label_dict = {"task1": "label1"}

    train_dataloader = EmmentalDataLoader(
        task_to_label_dict=task_to_label_dict,
        dataset=train_dataset,
        split="train",
        batch_size=10,
    )
    dev_dataloader = EmmentalDataLoader(
        task_to_label_dict=task_to_label_dict,
        dataset=dev_dataset,
        split="valid",
        batch_size=10,
    )
    test_dataloader = EmmentalDataLoader(
        task_to_label_dict=task_to_label_dict,
        dataset=test_dataset,
        split="test",
        batch_size=10,
    )

    # Create task
    def ce_loss(task_name, immediate_output_dict, Y):
        module_name = f"{task_name}_pred_head"
        return F.cross_entropy(immediate_output_dict[module_name], Y)

    def output(task_name, immediate_output_dict):
        module_name = f"{task_name}_pred_head"
        return F.softmax(immediate_output_dict[module_name], dim=1)

    task_metrics = {"task1": ["accuracy"]}

    class IdentityModule(nn.Module):
        def __init__(self):
            """Initialize IdentityModule."""
            super().__init__()

        def forward(self, input):
            return {"out": input}

    tasks = [
        EmmentalTask(
            name=task_name,
            module_pool=nn.ModuleDict({
                "input_module0":
                IdentityModule(),
                "input_module1":
                nn.Linear(2, 8),
                f"{task_name}_pred_head":
                nn.Linear(8, 2),
            }),
            task_flow=[
                Action(name="input",
                       module="input_module0",
                       inputs=[("_input_", "data")]),
                Action(name="input1",
                       module="input_module1",
                       inputs=[("input", "out")]),
                Action(
                    name=f"{task_name}_pred_head",
                    module=f"{task_name}_pred_head",
                    inputs=[("input1", 0)],
                ),
            ],
            module_device={"input_module0": -1},
            loss_func=partial(ce_loss, task_name),
            output_func=partial(output, task_name),
            action_outputs=None,
            scorer=Scorer(metrics=task_metrics[task_name]),
            require_prob_for_eval=False,
            require_pred_for_eval=True,
        ) for task_name in ["task1"]
    ]
    # Build model

    model = EmmentalModel(name="all", tasks=tasks)

    # Create learner
    emmental_learner = EmmentalLearner()

    config = {
        "meta_config": {
            "seed": 0,
            "verbose": True
        },
        "learner_config": {
            "n_epochs": 1,
            "epochs_learned": 0,
            "steps_learned": 0,
            "skip_learned_data": False,
            "online_eval": True,
            "optimizer_config": {
                "lr": 0.01,
                "grad_clip": 100
            },
        },
        "logging_config": {
            "counter_unit": "batch",
            "evaluation_freq": 5,
            "writer_config": {
                "writer": "json",
                "write_loss_per_step": True,
                "verbose": True,
            },
            "checkpointing": True,
            "checkpointer_config": {
                "checkpoint_path": None,
                "checkpoint_freq": 1,
                "checkpoint_metric": {
                    "model/all/train/loss": "min"
                },
                "checkpoint_task_metrics": None,
                "checkpoint_runway": 1,
                "checkpoint_all": False,
                "clear_intermediate_checkpoints": True,
                "clear_all_checkpoints": False,
            },
        },
    }
    Meta.update_config(config)

    # Learning
    emmental_learner.learn(
        model,
        [train_dataloader, dev_dataloader],
    )

    test_score = model.score(test_dataloader)

    assert test_score["task1/synthetic/test/loss"] > 0.3

    Meta.reset()
    init(dirpath, use_exact_log_path=use_exact_log_path)

    config = {
        "meta_config": {
            "seed": 0,
            "verbose": False
        },
        "learner_config": {
            "n_epochs":
            5,
            "epochs_learned":
            1,
            "steps_learned":
            0,
            "skip_learned_data":
            True,
            "online_eval":
            False,
            "optimizer_config": {
                "lr": 0.01,
                "grad_clip": 100
            },
            "optimizer_path":
            (f"{dirpath}/"
             "best_model_model_all_train_loss.optimizer.pth"),
            "scheduler_path":
            (f"{dirpath}/"
             "best_model_model_all_train_loss.scheduler.pth"),
        },
        "model_config": {
            "model_path":
            f"{dirpath}/best_model_model_all_train_loss.model.pth"
        },
        "logging_config": {
            "counter_unit": "batch",
            "evaluation_freq": 5,
            "writer_config": {
                "writer": "json",
                "write_loss_per_step": True,
                "verbose": True,
            },
            "checkpointing": True,
            "checkpointer_config": {
                "checkpoint_path": None,
                "checkpoint_freq": 1,
                "checkpoint_metric": {
                    "model/all/train/loss": "min"
                },
                "checkpoint_task_metrics": None,
                "checkpoint_runway": 1,
                "checkpoint_all": False,
                "clear_intermediate_checkpoints": True,
                "clear_all_checkpoints": False,
            },
        },
    }
    Meta.update_config(config)

    if Meta.config["model_config"]["model_path"]:
        model.load(Meta.config["model_config"]["model_path"])

    # Learning
    emmental_learner.learn(
        model,
        [train_dataloader, dev_dataloader],
    )

    test_score = model.score(test_dataloader)

    assert test_score["task1/synthetic/test/loss"] <= 0.4

    shutil.rmtree(dirpath)