コード例 #1
0
def run_config(config):
    params = Params(json.loads(config))
    params_copy = params.duplicate()

    if 'dataset_reader' in params:
        reader = DatasetReader.from_params(params.pop('dataset_reader'))
    else:
        raise RuntimeError('`dataset_reader` section is required')

    all_instances = []
    if 'train_data_path' in params:
        print('Reading the training data...')
        train_data = reader.read(params.pop('train_data_path'))
        all_instances.extend(train_data)
    else:
        raise RuntimeError('`train_data_path` section is required')

    validation_data = None
    if 'validation_data_path' in params:
        print('Reading the validation data...')
        validation_data = reader.read(params.pop('validation_data_path'))
        all_instances.extend(validation_data)

    print('Building the vocabulary...')
    vocab = Vocabulary.from_instances(all_instances)

    model = None
    iterator = None
    if 'model' not in params:
        # 'dataset' mode — just preview the (first 10) instances
        print('Showing the first 10 instances:')
        for inst in all_instances[:10]:
            print(inst)
    else:
        model = Model.from_params(vocab=vocab, params=params.pop('model'))

        loader_params = deepcopy(params.pop("data_loader"))
        train_data_loader = DataLoader.from_params(dataset=train_data,
                                                   params=loader_params)
        dev_data_loader = DataLoader.from_params(dataset=validation_data,
                                                 params=loader_params)
        train_data.index_with(vocab)

        # set up a temporary, empty directory for serialization
        with tempfile.TemporaryDirectory() as serialization_dir:
            trainer = Trainer.from_params(
                model=model,
                serialization_dir=serialization_dir,
                data_loader=train_data_loader,
                validation_data_loader=dev_data_loader,
                params=params.pop('trainer'))
            trainer.train()

    return {
        'params': params_copy,
        'dataset_reader': reader,
        'vocab': vocab,
        'iterator': iterator,
        'model': model
    }
コード例 #2
0
ファイル: nmt_model.py プロジェクト: omardroubi/CS224n-2019
    def __init__(self, config: Params, vocab: Vocabulary):
        super().__init__()
        # embed_size, hidden_size, vocab, dropout_rate = 0.2
        self.source_embedder = NMTEmbedder(vocab.get_vocab_size("char_src"),
                                           config.duplicate())
        self.target_embedder = NMTEmbedder(vocab.get_vocab_size("char_trg"),
                                           config.duplicate())

        self.hidden_size = config.pop("hidden_size")
        self.dropout_rate = config.pop("dropout_rate")
        self.vocab = vocab
        self.target_vocab_size = self.vocab.get_vocab_size("token_trg")
        self.device = config.pop("device")

        self.encoder = nn.LSTM(
            input_size=self.source_embedder.char_emb_size,
            hidden_size=self.hidden_size,
            bidirectional=True,
            bias=True,
            batch_first=True,
        )
        self.decoder = nn.LSTMCell(
            input_size=self.source_embedder.char_emb_size + self.hidden_size,
            hidden_size=self.hidden_size,
            bias=True,
        )
        self.h_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        self.c_projection = nn.Linear(in_features=self.hidden_size * 2,
                                      out_features=self.hidden_size,
                                      bias=False)
        self.att_projection = nn.Linear(in_features=self.hidden_size * 2,
                                        out_features=self.hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(
            in_features=self.hidden_size * 3,
            out_features=self.hidden_size,
            bias=False)
        self.target_vocab_projection = nn.Linear(
            in_features=self.hidden_size,
            out_features=self.target_vocab_size,
            bias=False,
        )
        self.dropout = nn.Dropout(p=self.dropout_rate)
コード例 #3
0
class TestTrainerUtil(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.snli_file = str(self.FIXTURES_ROOT / "data" / "snli.jsonl")
        self.params = Params({
            "dataset_reader": {
                "type": "snli"
            },
            "train_data_path": self.snli_file
        })
        self.cache_directory = str(self.FIXTURES_ROOT / "data_cache")

    def tearDown(self):
        super().tearDown()
        shutil.rmtree(self.cache_directory)

    def test_datasets_from_params_uses_caching_correctly_in_simplest_case(
            self):
        # We'll rely on the dataset reader tests to be sure of the functionality of this caching;
        # we're just checking here that things get hooked up correctly to the right spots.
        cache_prefix = "prefix"
        _ = util.datasets_from_params(self.params.duplicate(),
                                      self.cache_directory, cache_prefix)

        expected_cache_file = (
            f"{self.cache_directory}/{cache_prefix}/{flatten_filename(self.snli_file)}"
        )
        expected_param_file = f"{self.cache_directory}/{cache_prefix}/params.json"
        assert os.path.exists(expected_cache_file)
        assert os.path.exists(expected_param_file)
        with open(expected_param_file, "r") as param_file:
            saved_params = json.load(param_file)
            assert saved_params == self.params.pop("dataset_reader").as_dict(
                quiet=True)

    def test_datasets_from_params_uses_caching_correctly_with_hashed_params(
            self):
        # We'll rely on the dataset reader tests to be sure of the functionality of this caching;
        # we're just checking here that things get hooked up correctly to the right spots.
        _ = util.datasets_from_params(self.params, self.cache_directory)

        cache_prefix = util._dataset_reader_param_hash(Params({"type":
                                                               "snli"}))
        expected_cache_file = (
            f"{self.cache_directory}/{cache_prefix}/{flatten_filename(self.snli_file)}"
        )
        assert os.path.exists(expected_cache_file)
コード例 #4
0
def train_model(
    params: Params,
    serialization_dir: str,
    file_friendly_logging: bool = False,
    recover: bool = False,
    force: bool = False,
    node_rank: int = 0,
    include_package: List[str] = None,
    batch_weight_key: str = "",
) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    # Parameters

    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see ``Model.from_archive``.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    node_rank : ``int``, optional
        Rank of the current node in distributed training
    include_package : ``List[str]``, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.
    batch_weight_key : ``str``, optional (default="")
        If non-empty, name of metric used to weight the loss on a per-batch basis.

    # Returns

    best_model : ``Model``
        The model with the best epoch weights.
    """
    training_util.create_serialization_dir(params, serialization_dir, recover,
                                           force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            file_friendly_logging=file_friendly_logging,
            include_package=include_package,
            batch_weight_key=batch_weight_key,
        )
        archive_model(serialization_dir)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        logging.info(
            f"Switching to distributed training mode since multiple GPUs are configured"
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}")

        # Creating `Vocabulary` objects from workers could be problematic since
        # the data iterators in each worker will yield only `rank` specific
        # instances. Hence it is safe to construct the vocabulary and write it
        # to disk before initializing the distributed context. The workers will
        # load the vocabulary from the path specified.
        if params.get("vocabulary", Params({})).get("type",
                                                    "") != "from_files":
            vocab = training_util.make_vocab_from_params(
                params.duplicate(), serialization_dir)
            params["vocabulary"] = {
                "type": "from_files",
                "directory": os.path.join(serialization_dir, "vocabulary"),
                "padding_token": vocab._padding_token,
                "oov_token": vocab._oov_token,
            }

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                file_friendly_logging,
                include_package,
                batch_weight_key,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
            ),
            nprocs=num_procs,
        )
        archive_model(serialization_dir)
        model = Model.load(params, serialization_dir)
        return model
コード例 #5
0
def train_model(
    params: Params,
    serialization_dir: Union[str, PathLike],
    recover: bool = False,
    force: bool = False,
    node_rank: int = 0,
    include_package: List[str] = None,
    dry_run: bool = False,
    file_friendly_logging: bool = False,
) -> Optional[Model]:
    """
    Trains the model specified in the given [`Params`](../common/params.md#params) object, using the data
    and training parameters also specified in that object, and saves the results in `serialization_dir`.

    # Parameters

    params : `Params`
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : `str`
        The directory in which to save results and logs.
    recover : `bool`, optional (default=`False`)
        If `True`, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see `Model.from_archive`.
    force : `bool`, optional (default=`False`)
        If `True`, we will overwrite the serialization directory if it already exists.
    node_rank : `int`, optional
        Rank of the current node in distributed training
    include_package : `List[str]`, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.
    dry_run : `bool`, optional (default=`False`)
        Do not train a model, but create a vocabulary, show dataset statistics and other training
        information.
    file_friendly_logging : `bool`, optional (default=`False`)
        If `True`, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.

    # Returns

    best_model : `Optional[Model]`
        The model with the best epoch weights or `None` if in dry run.
    """
    common_logging.FILE_FRIENDLY_LOGGING = file_friendly_logging

    training_util.create_serialization_dir(params, serialization_dir, recover,
                                           force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            include_package=include_package,
            dry_run=dry_run,
            file_friendly_logging=file_friendly_logging,
        )

        if not dry_run:
            archive_model(serialization_dir)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        # Creating `Vocabulary` objects from workers could be problematic since
        # the data loaders in each worker will yield only `rank` specific
        # instances. Hence it is safe to construct the vocabulary and write it
        # to disk before initializing the distributed context. The workers will
        # load the vocabulary from the path specified.
        vocab_dir = os.path.join(serialization_dir, "vocabulary")
        if recover:
            vocab = Vocabulary.from_files(vocab_dir)
        else:
            vocab = training_util.make_vocab_from_params(
                params.duplicate(),
                serialization_dir,
                print_statistics=dry_run)
        params["vocabulary"] = {
            "type": "from_files",
            "directory": vocab_dir,
            "padding_token": vocab._padding_token,
            "oov_token": vocab._oov_token,
        }

        logging.info(
            "Switching to distributed training mode since multiple GPUs are configured | "
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}")

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                include_package,
                dry_run,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
                file_friendly_logging,
            ),
            nprocs=num_procs,
        )
        if dry_run:
            return None
        else:
            archive_model(serialization_dir)
            model = Model.load(params, serialization_dir)
            return model
コード例 #6
0
ファイル: train_test.py プロジェクト: zheng5yu9/allennlp
    def test_train_number_of_steps(self):
        number_of_epochs = 2

        last_num_steps_per_epoch: Optional[int] = None

        @LearningRateScheduler.register("mock")
        class MockLRScheduler(ExponentialLearningRateScheduler):
            def __init__(self, optimizer: torch.optim.Optimizer, num_steps_per_epoch: int):
                super().__init__(optimizer)
                nonlocal last_num_steps_per_epoch
                last_num_steps_per_epoch = num_steps_per_epoch

        batch_callback_counter = 0

        @BatchCallback.register("counter")
        class CounterBatchCallback(BatchCallback):
            def __call__(
                self,
                trainer: GradientDescentTrainer,
                batch_inputs: List[List[TensorDict]],
                batch_outputs: List[Dict[str, Any]],
                epoch: int,
                batch_number: int,
                is_training: bool,
                is_master: bool,
            ) -> None:
                nonlocal batch_callback_counter
                if is_training:
                    batch_callback_counter += 1

        params = Params(
            {
                "model": {
                    "type": "simple_tagger",
                    "text_field_embedder": {
                        "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
                    },
                    "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "test_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "validation_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "evaluate_on_test": True,
                "data_loader": {"batch_size": 2},
                "trainer": {
                    "num_epochs": number_of_epochs,
                    "optimizer": "adam",
                    "learning_rate_scheduler": {"type": "mock"},
                    "batch_callbacks": ["counter"],
                },
            }
        )
        train_model(
            params.duplicate(), serialization_dir=os.path.join(self.TEST_DIR, "train_normal")
        )
        assert batch_callback_counter == last_num_steps_per_epoch * number_of_epochs
        batch_callback_counter = 0
        normal_steps_per_epoch = last_num_steps_per_epoch

        original_batch_size = params["data_loader"]["batch_size"]
        params["data_loader"]["batch_size"] = 1
        train_model(
            params.duplicate(), serialization_dir=os.path.join(self.TEST_DIR, "train_with_bs1")
        )
        assert batch_callback_counter == last_num_steps_per_epoch * number_of_epochs
        batch_callback_counter = 0
        assert normal_steps_per_epoch == math.ceil(last_num_steps_per_epoch / original_batch_size)

        params["data_loader"]["batch_size"] = original_batch_size
        params["trainer"]["num_gradient_accumulation_steps"] = 3
        train_model(params, serialization_dir=os.path.join(self.TEST_DIR, "train_with_ga"))
        assert batch_callback_counter == last_num_steps_per_epoch * number_of_epochs
        batch_callback_counter = 0
        assert math.ceil(normal_steps_per_epoch / 3) == last_num_steps_per_epoch
コード例 #7
0
ファイル: train.py プロジェクト: loopylangur/allennlp
def train_model(
    params: Params,
    serialization_dir: str,
    file_friendly_logging: bool = False,
    recover: bool = False,
    force: bool = False,
    cache_directory: str = None,
    cache_prefix: str = None,
    node_rank: int = 0,
    include_package: List[str] = None,
) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    # Parameters

    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    cache_directory : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    cache_prefix : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    node_rank : ``int``, optional
        Rank of the current node in distributed training
    include_package : ``List[str]``, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.

    # Returns

    best_model : ``Model``
        The model with the best epoch weights.
    """
    create_serialization_dir(params, serialization_dir, recover, force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            file_friendly_logging=file_friendly_logging,
            recover=recover,
            cache_directory=cache_directory,
            cache_prefix=cache_prefix,
            include_package=include_package,
        )
        archive_model(serialization_dir,
                      files_to_archive=params.files_to_archive)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        os.environ["MASTER_ADDR"] = master_addr
        os.environ["MASTER_PORT"] = str(master_port)
        os.environ["WORLD_SIZE"] = str(world_size)

        logging.info(
            f"Switching to distributed training mode since multiple GPUs are configured"
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}")

        # Creating `Vocabulary` objects from workers could be problematic since the data iterators
        # in each worker will yield only `rank` specific instances. Hence it is safe to construct
        # the vocabulary and write it to disk before initializing the distributed context. The workers
        # will load the vocabulary from the path specified.
        make_vocab_from_params(params.duplicate(), serialization_dir)
        params["vocabulary"] = {
            "directory_path": os.path.join(serialization_dir, "vocabulary"),
            "extend": False,  # vocab extension would have been done above
        }

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                file_friendly_logging,
                recover,
                cache_directory,
                cache_prefix,
                include_package,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
            ),
            nprocs=num_procs,
        )
        archive_model(serialization_dir,
                      files_to_archive=params.files_to_archive)
        model = Model.load(params, serialization_dir)
        return model
コード例 #8
0
ファイル: train.py プロジェクト: text-machine-lab/allennlp
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False,
                force: bool = False,
                cache_directory: str = None,
                cache_prefix: str = None) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    cache_directory : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    cache_prefix : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.

    Returns
    -------
    best_model: ``Model``
        The model with the best epoch weights.
    """
    prepare_environment(params)
    create_serialization_dir(params, serialization_dir, recover, force)
    stdout_handler = prepare_global_logging(serialization_dir,
                                            file_friendly_logging)

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    check_for_gpu(cuda_device)

    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)

    trainer_type = params.get("trainer", {}).get("type", "default")

    if trainer_type == "default":
        # Special logic to instantiate backward-compatible trainer.
        pieces = TrainerPieces.from_params(
            params,  # pylint: disable=no-member
            serialization_dir,
            recover,
            cache_directory,
            cache_prefix)
        trainer = Trainer.from_params(
            model=pieces.model,
            serialization_dir=serialization_dir,
            iterator=pieces.iterator,
            train_data=pieces.train_dataset,
            validation_data=pieces.validation_dataset,
            params=pieces.params,
            validation_iterator=pieces.validation_iterator)
    else:
        # Workaround to obtain the evaluation parts.
        pieces = TrainerPieces.from_params(
            params.duplicate(),  # pylint: disable=no-member
            serialization_dir,
            recover,
            cache_directory,
            cache_prefix)

        trainer = TrainerBase.from_params(params, serialization_dir, recover)

    evaluation_iterator = pieces.validation_iterator or pieces.iterator
    evaluation_dataset = pieces.test_dataset

    params.assert_empty('base train command')

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Training interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Evaluate
    if evaluation_dataset and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
            trainer.model,
            evaluation_dataset,
            evaluation_iterator,
            cuda_device=trainer._cuda_devices[0],  # pylint: disable=protected-access,
            # TODO(brendanr): Pass in an arg following Joel's trainer refactor.
            batch_weight_key="")

        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif evaluation_dataset:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    cleanup_global_logging(stdout_handler)

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)
    dump_metrics(os.path.join(serialization_dir, "metrics.json"),
                 metrics,
                 log=True)

    # We count on the trainer to have the model with best weights
    return trainer.model
コード例 #9
0
ファイル: archival_test.py プロジェクト: himkt/allennlp
class ArchivalTest(AllenNlpTestCase):
    def setup_method(self):
        super().setup_method()

        self.params = Params(
            {
                "model": {
                    "type": "simple_tagger",
                    "text_field_embedder": {
                        "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
                    },
                    "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"),
                "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"),
                "data_loader": {"batch_size": 2},
                "trainer": {"num_epochs": 2, "optimizer": "adam", "cuda_device": -1},
            }
        )

    def test_archiving(self):
        # copy params, since they'll get consumed during training
        params_copy = self.params.duplicate()
        params_dict_copy = copy.deepcopy(self.params.as_dict())

        # `train_model` should create an archive
        serialization_dir = self.TEST_DIR / "archive_test"
        model = train_model(self.params, serialization_dir=serialization_dir)

        archive_path = serialization_dir / "model.tar.gz"

        # load from the archive
        archive = load_archive(archive_path)
        model2 = archive.model

        assert_models_equal(model, model2)

        assert isinstance(
            archive.dataset_reader,
            type(DatasetReader.from_params(params_copy["dataset_reader"].duplicate())),
        )
        assert isinstance(
            archive.validation_dataset_reader,
            type(DatasetReader.from_params(params_copy["dataset_reader"].duplicate())),
        )  # validation_dataset_reader is not in the config, so fall back to dataset_reader

        # check that params are the same
        params2 = archive.config
        assert params2.as_dict() == params_dict_copy

    def test_archive_model_uses_archive_path(self):

        serialization_dir = self.TEST_DIR / "serialization"
        # Train a model
        train_model(self.params, serialization_dir=serialization_dir)
        # Use a new path.
        archive_model(
            serialization_dir=serialization_dir, archive_path=serialization_dir / "new_path.tar.gz"
        )
        archive = load_archive(serialization_dir / "new_path.tar.gz")
        assert archive

    def test_loading_serialization_directory(self):
        # copy params, since they'll get consumed during training
        params_dict_copy = copy.deepcopy(self.params.as_dict())

        # `train_model` should create an archive
        serialization_dir = self.TEST_DIR / "serialization"
        model = train_model(self.params, serialization_dir=serialization_dir)

        # load from the serialization directory itself
        archive = load_archive(serialization_dir)
        model2 = archive.model

        assert_models_equal(model, model2)

        # check that params are the same
        params2 = archive.config
        assert params2.as_dict() == params_dict_copy

    def test_can_load_from_archive_model(self):
        serialization_dir = self.FIXTURES_ROOT / "basic_classifier" / "from_archive_serialization"
        archive_path = serialization_dir / "model.tar.gz"
        model = load_archive(archive_path).model

        # We want to be sure that we don't just not crash, but also be sure that we loaded the right
        # weights for the model.  We'll do that by making sure that we didn't just load the model
        # that's in the `archive_path` of the config file, which is this one.
        base_model_path = self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        base_model = load_archive(base_model_path).model
        base_model_params = dict(base_model.named_parameters())
        for name, parameters in model.named_parameters():
            if parameters.size() == base_model_params[name].size():
                assert not (parameters == base_model_params[name]).all()
            else:
                # In this case, the parameters are definitely different, no need for the above
                # check.
                pass

    def test_include_in_archive(self):
        self.params["include_in_archive"] = ["metrics_epoch_*.json"]

        serialization_dir = self.TEST_DIR / "serialization"
        # Train a model
        train_model(self.params, serialization_dir=serialization_dir)

        # Assert that the additional targets were archived
        with tempfile.TemporaryDirectory() as tempdir:
            with tarfile.open(serialization_dir / "model.tar.gz", "r:gz") as archive:
                archive.extractall(tempdir)
            assert os.path.isfile(os.path.join(tempdir, "metrics_epoch_0.json"))
            assert os.path.isfile(os.path.join(tempdir, "metrics_epoch_1.json"))
            assert not os.path.isfile(os.path.join(tempdir, "metrics.json"))

    def test_invalid_include_in_archive(self):
        self.params["include_in_archive"] = [CONFIG_NAME]

        serialization_dir = self.TEST_DIR / "serialization"

        with pytest.raises(ConfigurationError) as exc:
            train_model(self.params, serialization_dir=serialization_dir)
            assert "are saved names and cannot be used" in str(exc.value)
コード例 #10
0
def train_single(config: Params,
                 instances: List[Instance],
                 partially_labeled_instances: List[Instance],
                 reader: DatasetReader,
                 index: int,
                 cuda_device: int) -> List[str]:
    instances = deepcopy(instances)
    partially_labeled_instances = deepcopy(partially_labeled_instances)
    config = deepcopy(config)
    test_instance = instances[index]
    train_val_instances = instances[:index] + instances[index + 1:]

    random.shuffle(train_val_instances)
    num_train_instances = int(0.9 * len(train_val_instances))
    train_instances = train_val_instances[:num_train_instances]
    val_instances = train_val_instances[num_train_instances:]
    trainer = get_trainer_from_config(config.duplicate(),
                                      train_instances=train_instances,
                                      val_instances=val_instances,
                                      device=cuda_device)
    trainer.train()

    metric, should_decrease = trainer._validation_metric, trainer._metric_tracker._should_decrease
    patience = trainer._metric_tracker._patience
    bad_epochs = -1
    num_epochs = 5
    model = trainer.model
    metrics = get_metrics(trainer)
    originalModel = ModelData(metric=metrics[metric], weights=model.state_dict())
    bestModel = ModelData(metric=metrics[metric], model=model)

    for _ in range(num_epochs):
        best_model = bestModel.model
        train_instances = get_training_data(partially_labeled_instances, best_model, reader)
        trainer = get_trainer_from_config(
            config.duplicate(),
            train_instances=train_instances,
            val_instances=val_instances,
            vocab=best_model.vocab,
            device=cuda_device)
        trainer.train()
        # update the parameters
        with torch.no_grad():
            for name, value in trainer.model.named_parameters():
                if name in originalModel.weights and value.requires_grad:
                    value.mul_(0.1).add_(0.9 * originalModel.weights[name])
        metrics = get_metrics(trainer)
        if should_decrease:
            if metrics[metric] < bestModel.metric:
                bestModel = ModelData(metric=metrics[metric], model=trainer.model)
                bad_epochs = 0
            else:
                bad_epochs += 1
        else:
            if metrics[metric] > bestModel.metric:
                bestModel = ModelData(metric=metrics[metric], model=trainer.model)
                bad_epochs = 0
            else:
                bad_epochs += 1
        if bad_epochs == patience:
            break
    model = bestModel.model
    model.eval()
    prediction = sanitize(model.forward_on_instance(test_instance))
    return prediction, model