Exemple #1
0
def test_tensorboard_writer(caplog):
    """Unit test of log_writer."""
    caplog.set_level(logging.INFO)

    emmental.Meta.reset()

    emmental.init()

    log_writer = TensorBoardWriter()

    log_writer.add_scalar(name="step 1", value=0.1, step=1)
    log_writer.add_scalar(name="step 2", value=0.2, step=2)

    config_filename = "config.yaml"
    log_writer.write_config(config_filename)

    # Test config
    with open(os.path.join(emmental.Meta.log_path, config_filename), "r") as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    assert config["meta_config"]["verbose"] is True
    assert config["logging_config"]["counter_unit"] == "epoch"
    assert config["logging_config"]["checkpointing"] is False

    log_writer.write_log()

    log_writer.close()
Exemple #2
0
def test_lbfgs_optimizer(caplog):
    """Unit test of LBFGS optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "lbfgs"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default LBFGS setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.001,
        "max_iter": 20,
        "max_eval": 25,
        "tolerance_grad": 1e-07,
        "tolerance_change": 1e-09,
        "history_size": 100,
        "line_search_fn": None,
    }

    # Test new LBFGS setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "max_iter": 30,
                    "max_eval": 40,
                    "tolerance_grad": 1e-04,
                    "tolerance_change": 1e-05,
                    "history_size": 10,
                    "line_search_fn": "strong_wolfe",
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.02,
        "max_iter": 30,
        "max_eval": 40,
        "tolerance_grad": 1e-04,
        "tolerance_change": 1e-05,
        "history_size": 10,
        "line_search_fn": "strong_wolfe",
    }

    shutil.rmtree(dirpath)
def test_asgd_optimizer(caplog):
    """Unit test of ASGD optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "asgd"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default ASGD setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert isequal(
        emmental_learner.optimizer.defaults,
        {
            "lr": 0.001,
            "lambd": 0.0001,
            "alpha": 0.75,
            "t0": 1_000_000.0,
            "weight_decay": 0,
        },
    )
def test_mixed_scheduler_no_y_dict(caplog):
    """Unit test of mixed scheduler with no y_dict."""
    caplog.set_level(logging.INFO)

    init()

    task1 = "task1"
    x1 = np.random.rand(20, 2)

    task2 = "task2"
    x2 = np.random.rand(30, 3)

    dataloaders = [
        EmmentalDataLoader(
            task_to_label_dict={task_name: None},
            dataset=EmmentalDataset(name=task_name, X_dict={"feature": x}),
            split="train",
            batch_size=10,
            shuffle=True,
        ) for task_name, x in [(task1, x1), (task2, x2)]
    ]

    scheduler = MixedScheduler()

    assert scheduler.get_num_batches(dataloaders) == 2

    batch_y_dict_1 = [
        batch[0].Y_dict for batch in scheduler.get_batches(dataloaders)
    ]
    batch_y_dict_2 = [
        batch[1].Y_dict for batch in scheduler.get_batches(dataloaders)
    ]

    assert batch_y_dict_1 == [None] * 2
    assert batch_y_dict_2 == [None] * 2
def test_logging_manager_tensorboard(caplog):
    """Unit test of logging_manager (tensorboard)."""
    caplog.set_level(logging.INFO)

    emmental.init()
    Meta.update_config(
        config={
            "logging_config": {
                "counter_unit": "epoch",
                "evaluation_freq": 1,
                "checkpointing": False,
                "checkpointer_config": {
                    "checkpoint_freq": 2
                },
                "writer_config": {
                    "writer": "tensorboard"
                },
            }
        })

    logging_manager = LoggingManager(n_batches_per_epoch=2)

    logging_manager.update(5)

    assert type(logging_manager.writer) == TensorBoardWriter
Exemple #6
0
def test_checkpointer_wrong_metric_mode(caplog):
    """Unit test of checkpointer (wrong metric mode)"""

    caplog.set_level(logging.INFO)

    emmental.Meta.reset()

    emmental.init()
    emmental.Meta.update_config(
        config={
            "logging_config": {
                "counter_unit": "sample",
                "evaluation_freq": 10,
                "checkpointing": True,
                "checkpointer_config": {
                    "checkpoint_metric": {
                        "model/all/train/loss": "min1"
                    }
                },
            }
        })

    with pytest.raises(ValueError):
        checkpointer = Checkpointer()
        checkpointer.clear()
Exemple #7
0
def test_checkpointer_metric(caplog):
    """Unit test of checkpointer (metric)"""

    caplog.set_level(logging.INFO)

    checkpoint_path = "temp_test_checkpointer"

    emmental.Meta.reset()

    emmental.init()
    emmental.Meta.update_config(
        config={
            "logging_config": {
                "counter_unit": "sample",
                "evaluation_freq": 10,
                "checkpointing": True,
                "checkpointer_config": {
                    "checkpoint_metric": None,
                    "checkpoint_task_metrics": {
                        "model/all/train/loss": "min",
                        "model/all/train/accuracy": "max",
                    },
                    "checkpoint_freq": 2,
                    "checkpoint_path": checkpoint_path,
                },
            }
        })

    checkpointer = Checkpointer()
    checkpointer.clear()

    assert os.path.exists(checkpoint_path) is True
    shutil.rmtree(checkpoint_path)
Exemple #8
0
def test_checkpointer_specific_path(caplog):
    """Unit test of checkpointer"""

    caplog.set_level(logging.INFO)

    checkpoint_path = "temp_test_checkpointer"

    emmental.Meta.reset()

    emmental.init()
    emmental.Meta.update_config(
        config={
            "logging_config": {
                "counter_unit": "sample",
                "evaluation_freq": 10,
                "checkpointing": True,
                "checkpointer_config": {
                    "checkpoint_freq": 2,
                    "checkpoint_path": checkpoint_path,
                },
            }
        })

    checkpointer = Checkpointer()
    checkpointer.clear()

    assert os.path.exists(checkpoint_path) is True
    shutil.rmtree(checkpoint_path)
Exemple #9
0
 def setUp(self) -> None:
     self.args = parser_utils.parse_boot_and_emm_args(
         "test/run_args/test_end2end.json")
     # This _MUST_ get passed the args so it gets a random seed set
     emmental.init(log_dir="test/temp_log", config=self.args)
     if not os.path.exists(emmental.Meta.log_path):
         os.makedirs(emmental.Meta.log_path)
def test_logging_manager_no_writer(caplog):
    """Unit test of logging_manager (no writer)."""
    caplog.set_level(logging.INFO)

    emmental.init()
    Meta.update_config(
        config={
            "logging_config": {
                "counter_unit": "epoch",
                "evaluation_freq": 1,
                "checkpointing": False,
                "checkpointer_config": {
                    "checkpoint_freq": 2
                },
                "writer_config": {
                    "writer": None
                },
            }
        })

    logging_manager = LoggingManager(n_batches_per_epoch=2)

    logging_manager.update(5)

    assert logging_manager.writer is None
Exemple #11
0
def setup(config, run_config_path=None):
    """
    Setup distributed backend and save configuration files.
    Args:
        config: config
        run_config_path: path for original run config

    Returns:
    """
    # torch.multiprocessing.set_sharing_strategy("file_system")
    # spawn method must be fork to work with Meta.config
    torch.multiprocessing.set_start_method("fork", force=True)
    """
    ulimit -n 500000
    python3 -m torch.distributed.launch --nproc_per_node=2  bootleg/run.py --config_script ...
    """
    log_level = logging.getLevelName(config.run_config.log_level.upper())
    emmental.init(
        log_dir=config["meta_config"]["log_path"],
        config=config,
        use_exact_log_path=config["meta_config"]["use_exact_log_path"],
        local_rank=config.learner_config.local_rank,
        level=log_level,
    )
    log = logging.getLogger()
    # Remove streaming handlers and use rich
    log.handlers = [
        h for h in log.handlers if not type(h) is logging.StreamHandler
    ]
    log.addHandler(RichHandler())
    # Set up distributed backend
    emmental.Meta.init_distributed_backend()

    cmd_msg = " ".join(sys.argv)
    # Log configuration into filess
    if config.learner_config.local_rank in [0, -1]:
        write_to_file(f"{emmental.Meta.log_path}/cmd.txt", cmd_msg)
        dump_yaml_file(f"{emmental.Meta.log_path}/parsed_config.yaml",
                       emmental.Meta.config)
        # Dump the run config (does not contain defaults)
        if run_config_path is not None:
            dump_yaml_file(
                f"{emmental.Meta.log_path}/run_config.yaml",
                load_yaml_file(run_config_path),
            )

    log_rank_0_info(logger, f"COMMAND: {cmd_msg}")
    log_rank_0_info(
        logger,
        f"Saving config to {emmental.Meta.log_path}/parsed_config.yaml")

    git_hash = "Not able to retrieve git hash"
    try:
        git_hash = subprocess.check_output([
            "git", "log", "-n", "1", "--pretty=tformat:%h-%ad", "--date=short"
        ]).strip()
    except subprocess.CalledProcessError:
        pass
    log_rank_0_info(logger, f"Git Hash: {git_hash}")
Exemple #12
0
def test_adagrad_optimizer(caplog):
    """Unit test of Adagrad optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "adagrad"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default Adagrad setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert isequal(
        emmental_learner.optimizer.defaults,
        {
            "lr": 0.001,
            "lr_decay": 0,
            "initial_accumulator_value": 0,
            "eps": 1e-10,
            "weight_decay": 0,
        },
    )

    # Test new Adagrad setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "lr_decay": 0.1,
                    "initial_accumulator_value": 0.2,
                    "eps": 1e-5,
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert isequal(
        emmental_learner.optimizer.defaults,
        {
            "lr": 0.02,
            "lr_decay": 0.1,
            "initial_accumulator_value": 0.2,
            "eps": 1e-5,
            "weight_decay": 0.05,
        },
    )

    shutil.rmtree(dirpath)
Exemple #13
0
def test_config_check_in_meta(caplog):
    """Unit test of config check in meta."""
    caplog.set_level(logging.INFO)

    dirpath = "temp_test_meta_log_folder"

    Meta.reset()
    init(dirpath)

    config = {
        "logging_config": {
            "evaluation_freq": 5.0,
        },
    }
    Meta.update_config(config)

    assert type(Meta.config["logging_config"]["evaluation_freq"]) == int
    assert Meta.config["logging_config"]["evaluation_freq"] == 5

    config = {
        "logging_config": {
            "counter_unit": "batch",
            "evaluation_freq": 2.3,
        },
    }
    Meta.update_config(config)

    assert type(Meta.config["logging_config"]["evaluation_freq"]) == int
    assert Meta.config["logging_config"]["evaluation_freq"] == 3

    config = {
        "logging_config": {
            "counter_unit": "sample",
            "evaluation_freq": 0.2,
        },
    }
    Meta.update_config(config)

    assert type(Meta.config["logging_config"]["evaluation_freq"]) == int
    assert Meta.config["logging_config"]["evaluation_freq"] == 1

    config = {
        "logging_config": {
            "counter_unit": "epoch",
            "evaluation_freq": 1,
            "writer_config": {
                "write_loss_per_step": True
            },
        },
    }
    Meta.update_config(config)

    assert (
        Meta.config["logging_config"]["writer_config"]["write_loss_per_step"]
        is False)

    # Remove the temp folder
    shutil.rmtree(dirpath)
Exemple #14
0
def main(args):
    # Initialize Emmental
    config = parse_args_to_config(args)
    emmental.init(log_dir=config["meta_config"]["log_path"], config=config)

    # Log configuration into files
    cmd_msg = " ".join(sys.argv)
    logger.info(f"COMMAND: {cmd_msg}")
    write_to_file(f"{emmental.Meta.log_path}/cmd.txt", cmd_msg)

    logger.info(f"Config: {emmental.Meta.config}")
    write_to_file(f"{emmental.Meta.log_path}/config.txt", emmental.Meta.config)

    # Create dataloaders
    dataloaders = get_dataloaders(args)

    # Assign transforms to dataloaders
    aug_dataloaders = []
    if args.augment_policy:
        for idx in range(len(dataloaders)):
            if dataloaders[idx].split in args.train_split:
                dataloaders[idx].dataset.transform_cls = Augmentation(
                    args=args)

    config["learner_config"]["task_scheduler_config"][
        "task_scheduler"] = AugScheduler(augment_k=args.augment_k,
                                         enlarge=args.augment_enlarge)
    emmental.Meta.config["learner_config"]["task_scheduler_config"][
        "task_scheduler"] = config["learner_config"]["task_scheduler_config"][
            "task_scheduler"]

    # Create tasks
    model = EmmentalModel(name=f"{args.task}_task")
    model.add_task(create_task(args))

    # Set cudnn benchmark
    cudnn.benchmark = True

    # Load the best model from the pretrained model
    if config["model_config"]["model_path"] is not None:
        model.load(config["model_config"]["model_path"])

    if args.train:
        emmental_learner = EmmentalLearner()
        emmental_learner.learn(model, dataloaders + aug_dataloaders)

    # Remove all extra augmentation policy
    for idx in range(len(dataloaders)):
        dataloaders[idx].dataset.transform_cls = None

    scores = model.score(dataloaders)

    # Save metrics and models
    logger.info(f"Metrics: {scores}")
    scores["log_path"] = emmental.Meta.log_path
    write_to_json_file(f"{emmental.Meta.log_path}/metrics.txt", scores)
    model.save(f"{emmental.Meta.log_path}/last_model.pth")
 def setUp(self) -> None:
     emmental.init(log_dir="test/temp_log")
     if not os.path.exists(emmental.Meta.log_path):
         os.makedirs(emmental.Meta.log_path)
     self.args = parser_utils.parse_boot_and_emm_args(
         "test/run_args/test_embeddings.json"
     )
     self.entity_symbols = EntitySymbolsSubclass()
     self.tokenizer = load_tokenizer()
Exemple #16
0
 def _init_meta(self, _seed, exp_dir, meta_config, learner_config, logging_config):
     emmental.init(path.join(exp_dir, '_emmental_logs'))
     Meta.update_config(
         config={
             'meta_config': {**meta_config, 'seed': _seed},
             'learner_config': learner_config,
             'logging_config': logging_config
         }
     )
Exemple #17
0
def test_plateau_scheduler(caplog):
    """Unit test of plateau scheduler."""
    caplog.set_level(logging.INFO)

    lr_scheduler = "plateau"
    dirpath = "temp_test_scheduler"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    config = {
        "learner_config": {
            "n_epochs": 4,
            "optimizer_config": {"optimizer": "sgd", "lr": 10},
            "lr_scheduler_config": {
                "lr_scheduler": lr_scheduler,
                "plateau_config": {
                    "metric": "model/train/all/loss",
                    "mode": "min",
                    "factor": 0.1,
                    "patience": 1,
                    "threshold": 0.0001,
                    "threshold_mode": "rel",
                    "cooldown": 0,
                    "eps": 1e-08,
                },
            },
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner.n_batches_per_epoch = 1
    emmental_learner._set_learning_counter()
    emmental_learner._set_optimizer(model)
    emmental_learner._set_lr_scheduler(model)

    assert emmental_learner.optimizer.param_groups[0]["lr"] == 10

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 0, {"model/train/all/loss": 1})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 10) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 1, {"model/train/all/loss": 1})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 10) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 2, {"model/train/all/loss": 1})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 1) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 3, {"model/train/all/loss": 0.1})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 1) < 1e-5

    shutil.rmtree(dirpath)
Exemple #18
0
def test_emmental_task(caplog):
    """Unit test of emmental task."""
    caplog.set_level(logging.INFO)

    emmental.init()

    def ce_loss(module_name, output_dict, Y):
        return F.cross_entropy(output_dict[module_name][0], Y.view(-1))

    def output(module_name, output_dict):
        return F.softmax(output_dict[module_name][0], dim=1)

    task_name = "task1"
    task_metrics = {task_name: ["accuracy"]}
    scorer = Scorer(metrics=task_metrics[task_name])

    task = EmmentalTask(
        name=task_name,
        module_pool=nn.ModuleDict({
            "input_module0": IdentityModule(),
            "input_module1": IdentityModule(),
            f"{task_name}_pred_head": IdentityModule(),
        }),
        task_flow=[
            Action("input1", "input_module0", [("_input_", "data")]),
            Action("input2", "input_module1", [("input1", 0)]),
            Action(f"{task_name}_pred_head", f"{task_name}_pred_head",
                   [("input2", 0)]),
        ],
        module_device={
            "input_module0": -1,
            "input_module1": 0,
            "input_module": -1
        },
        loss_func=partial(ce_loss, f"{task_name}_pred_head"),
        output_func=partial(output, f"{task_name}_pred_head"),
        action_outputs=None,
        scorer=scorer,
        require_prob_for_eval=False,
        require_pred_for_eval=True,
        weight=2.0,
    )

    assert task.name == task_name
    assert set(list(task.module_pool.keys())) == set(
        ["input_module0", "input_module1", f"{task_name}_pred_head"])
    assert task.action_outputs is None
    assert task.scorer == scorer
    assert len(task.task_flow) == 3
    assert task.module_device == {
        "input_module0": torch.device("cpu"),
        "input_module1": torch.device(0),
    }
    assert task.require_prob_for_eval is False
    assert task.require_pred_for_eval is True
    assert task.weight == 2.0
def test_rms_prop_optimizer(caplog):
    """Unit test of RMSprop optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "rms_prop"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default RMSprop setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.001,
        "alpha": 0.99,
        "eps": 1e-08,
        "momentum": 0,
        "centered": False,
        "weight_decay": 0,
    }

    # Test new RMSprop setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "alpha": 0.9,
                    "eps": 1e-05,
                    "momentum": 0.1,
                    "centered": True,
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.02,
        "alpha": 0.9,
        "eps": 1e-05,
        "momentum": 0.1,
        "centered": True,
        "weight_decay": 0.05,
    }

    shutil.rmtree(dirpath)
def test_adadelta_optimizer(caplog):
    """Unit test of Adadelta optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "adadelta"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default Adadelta setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert isequal(
        emmental_learner.optimizer.defaults,
        {
            "lr": 0.001,
            "rho": 0.9,
            "eps": 1e-06,
            "weight_decay": 0
        },
    )

    # Test new Adadelta setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "rho": 0.6,
                    "eps": 1e-05
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert isequal(
        emmental_learner.optimizer.defaults,
        {
            "lr": 0.02,
            "rho": 0.6,
            "eps": 1e-05,
            "weight_decay": 0.05
        },
    )

    shutil.rmtree(dirpath)
Exemple #21
0
def test_bert_adam_optimizer(caplog):
    """Unit test of BertAdam optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "bert_adam"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default BertAdam setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "weight_decay": 0.0,
    }

    # Test new BertAdam setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "betas": (0.8, 0.9),
                    "eps": 1e-05
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.02,
        "betas": (0.8, 0.9),
        "eps": 1e-05,
        "weight_decay": 0.05,
    }

    # Test BertAdam setp
    emmental_learner.optimizer.zero_grad()
    torch.Tensor(1)
    F.mse_loss(model(torch.randn(1, 1)), torch.randn(1, 1)).backward()
    emmental_learner.optimizer.step()

    shutil.rmtree(dirpath)
 def setUp(self) -> None:
     emmental.init(log_dir="test/temp_log")
     if not os.path.exists(emmental.Meta.log_path):
         os.makedirs(emmental.Meta.log_path)
     self.args = parser_utils.parse_boot_and_emm_args(
         "test/run_args/test_embeddings.json"
     )
     self.entity_symbols = EntitySymbolsSubclass()
     self.kg_adj = os.path.join(self.args.data_config.emb_dir, "kg_adj.txt")
     self.kg_adj_json = os.path.join(self.args.data_config.emb_dir, "kg_adj.json")
Exemple #23
0
def test_step_scheduler(caplog):
    """Unit test of step scheduler."""
    caplog.set_level(logging.INFO)

    lr_scheduler = "step"
    dirpath = "temp_test_scheduler"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    config = {
        "learner_config": {
            "n_epochs": 4,
            "optimizer_config": {
                "optimizer": "sgd",
                "lr": 10
            },
            "lr_scheduler_config": {
                "lr_scheduler": lr_scheduler,
                "step_config": {
                    "step_size": 2,
                    "gamma": 0.1,
                    "last_epoch": -1
                },
            },
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner.n_batches_per_epoch = 1
    emmental_learner._set_learning_counter()
    emmental_learner._set_optimizer(model)
    emmental_learner._set_lr_scheduler(model)

    assert emmental_learner.optimizer.param_groups[0]["lr"] == 10

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 0, {})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 10) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 1, {})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 1) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 2, {})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 1) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 3, {})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 0.1) < 1e-5

    shutil.rmtree(dirpath)
Exemple #24
0
def test_mixed_scheduler(caplog):
    """Unit test of mixed scheduler."""
    caplog.set_level(logging.INFO)

    init()

    task1 = "task1"
    x1 = np.random.rand(20, 2)
    y1 = torch.from_numpy(np.random.rand(20))

    task2 = "task2"
    x2 = np.random.rand(30, 3)
    y2 = torch.from_numpy(np.random.rand(30))

    dataloaders = [
        EmmentalDataLoader(
            task_to_label_dict={task_name: "label"},
            dataset=EmmentalDataset(
                name=task_name, X_dict={"feature": x}, Y_dict={"label": y}
            ),
            split="train",
            batch_size=10,
            shuffle=True,
        )
        for task_name, x, y in [(task1, x1, y1), (task2, x2, y2)]
    ]

    scheduler = MixedScheduler()

    assert scheduler.get_num_batches(dataloaders) == 2

    batch_task_names_1 = [
        batch_data[0][-2] for batch_data in scheduler.get_batches(dataloaders)
    ]
    batch_task_names_2 = [
        batch_data[1][-2] for batch_data in scheduler.get_batches(dataloaders)
    ]

    assert batch_task_names_1 == [task1, task1]
    assert batch_task_names_2 == [task2, task2]

    scheduler = MixedScheduler(fillup=True)

    assert scheduler.get_num_batches(dataloaders) == 3

    batch_task_names_1 = [
        batch_data[0][-2] for batch_data in scheduler.get_batches(dataloaders)
    ]
    batch_task_names_2 = [
        batch_data[1][-2] for batch_data in scheduler.get_batches(dataloaders)
    ]

    assert batch_task_names_1 == [task1, task1, task1]
    assert batch_task_names_2 == [task2, task2, task2]
Exemple #25
0
def test_adamw_optimizer(caplog):
    """Unit test of AdamW optimizer"""

    caplog.set_level(logging.INFO)

    optimizer = "adamw"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default AdamW setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.001,
        "betas": (0.9, 0.999),
        "eps": 1e-08,
        "amsgrad": False,
        "weight_decay": 0,
    }

    # Test new AdamW setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "betas": (0.9, 0.99),
                    "eps": 1e-05,
                    "amsgrad": True,
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.02,
        "betas": (0.9, 0.99),
        "eps": 1e-05,
        "amsgrad": True,
        "weight_decay": 0.05,
    }

    shutil.rmtree(dirpath)
Exemple #26
0
 def _init_meta(self, seed, exp_dir, meta_config, learner_config, logging_config):
     emmental.init(path.join(exp_dir, '_emmental_logs'))
     print(seed)
     Meta.update_config(
         config={
             'meta_config': {**meta_config, 'seed': seed},
             'model_config': {'device': meta_config['device']},
             'learner_config': learner_config,
             'logging_config': logging_config
         }
     )
     print(Meta.config)
def test_sgd_optimizer(caplog):
    """Unit test of SGD optimizer."""
    caplog.set_level(logging.INFO)

    optimizer = "sgd"
    dirpath = "temp_test_optimizer"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    # Test default SGD setting
    config = {"learner_config": {"optimizer_config": {"optimizer": optimizer}}}
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.001,
        "momentum": 0,
        "dampening": 0,
        "nesterov": False,
        "weight_decay": 0.0,
    }

    # Test new SGD setting
    config = {
        "learner_config": {
            "optimizer_config": {
                "optimizer": optimizer,
                "lr": 0.02,
                "l2": 0.05,
                f"{optimizer}_config": {
                    "momentum": 0.1,
                    "dampening": 0,
                    "nesterov": True,
                },
            }
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner._set_optimizer(model)

    assert emmental_learner.optimizer.defaults == {
        "lr": 0.02,
        "momentum": 0.1,
        "dampening": 0,
        "nesterov": True,
        "weight_decay": 0.05,
    }

    shutil.rmtree(dirpath)
 def setUp(self) -> None:
     self.args = parser_utils.parse_boot_and_emm_args(
         "test/run_args/test_embeddings.json"
     )
     emmental.init(log_dir="test/temp_log", config=self.args)
     if not os.path.exists(emmental.Meta.log_path):
         os.makedirs(emmental.Meta.log_path)
     self.args.data_config.ent_embeddings = [
         DottedDict(
             {
                 "key": "learned1",
                 "load_class": "LearnedEntityEmb",
                 "dropout1d": 0.5,
                 "args": {"learned_embedding_size": 5, "tail_init": False},
             }
         ),
         DottedDict(
             {
                 "key": "learned2",
                 "dropout2d": 0.5,
                 "load_class": "LearnedEntityEmb",
                 "args": {"learned_embedding_size": 5, "tail_init": False},
             }
         ),
         DottedDict(
             {
                 "key": "learned3",
                 "load_class": "LearnedEntityEmb",
                 "freeze": True,
                 "args": {"learned_embedding_size": 5, "tail_init": False},
             }
         ),
         DottedDict(
             {
                 "key": "learned4",
                 "load_class": "LearnedEntityEmb",
                 "normalize": False,
                 "args": {"learned_embedding_size": 5, "tail_init": False},
             }
         ),
         DottedDict(
             {
                 "key": "learned5",
                 "load_class": "LearnedEntityEmb",
                 "cpu": True,
                 "args": {"learned_embedding_size": 5, "tail_init": False},
             }
         ),
     ]
     self.tokenizer = load_tokenizer()
     self.entity_symbols = EntitySymbolsSubclass()
def test_logging_manager_no_checkpointing(caplog):
    """Unit test of logging_manager (no checkpointing)"""

    caplog.set_level(logging.INFO)

    emmental.init()
    Meta.update_config(
        config={
            "meta_config": {
                "verbose": False
            },
            "logging_config": {
                "counter_unit": "epoch",
                "evaluation_freq": 1,
                "checkpointing": False,
                "checkpointer_config": {
                    "checkpoint_freq": 2
                },
                "writer_config": {
                    "writer": "json"
                },
            },
        })

    logging_manager = LoggingManager(n_batches_per_epoch=2)

    logging_manager.update(5)
    assert logging_manager.trigger_evaluation() is False
    assert logging_manager.trigger_checkpointing() is False

    logging_manager.update(5)
    assert logging_manager.trigger_evaluation() is True
    assert logging_manager.trigger_checkpointing() is False

    logging_manager.update(10)
    assert logging_manager.trigger_evaluation() is False
    assert logging_manager.trigger_checkpointing() is False

    logging_manager.update(5)
    assert logging_manager.trigger_evaluation() is True
    assert logging_manager.trigger_checkpointing() is False

    assert logging_manager.epoch_count == 0

    assert logging_manager.sample_total == 25
    assert logging_manager.batch_total == 4
    assert logging_manager.epoch_total == 2

    model = EmmentalModel()

    logging_manager.close(model)
def test_cosine_annealing_scheduler(caplog):
    """Unit test of cosine annealing scheduler"""

    caplog.set_level(logging.INFO)

    lr_scheduler = "cosine_annealing"
    dirpath = "temp_test_scheduler"
    model = nn.Linear(1, 1)
    emmental_learner = EmmentalLearner()

    Meta.reset()
    emmental.init(dirpath)

    config = {
        "learner_config": {
            "n_epochs": 4,
            "optimizer_config": {
                "optimizer": "sgd",
                "lr": 10
            },
            "lr_scheduler_config": {
                "lr_scheduler": lr_scheduler
            },
        }
    }
    emmental.Meta.update_config(config)
    emmental_learner.n_batches_per_epoch = 1
    emmental_learner._set_optimizer(model)
    emmental_learner._set_lr_scheduler(model)

    assert emmental_learner.optimizer.param_groups[0]["lr"] == 10

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 0, {})
    assert (abs(emmental_learner.optimizer.param_groups[0]["lr"] -
                8.535533905932738) < 1e-5)

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 1, {})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"] - 5) < 1e-5

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 2, {})
    assert (abs(emmental_learner.optimizer.param_groups[0]["lr"] -
                1.4644660940672627) < 1e-5)

    emmental_learner.optimizer.step()
    emmental_learner._update_lr_scheduler(model, 3, {})
    assert abs(emmental_learner.optimizer.param_groups[0]["lr"]) < 1e-5

    shutil.rmtree(dirpath)