def default_callbacks(
        self,
        validation_metric: str = "-loss",
        patience: int = None,
        max_checkpoints: int = 20,
        checkpoint_every: int = None,
        model_save_interval: float = None,
        serialization_dir: str = "__DEFAULT__",
        validation_data: Iterable[Instance] = None,
        validation_iterator: DataIterator = None,
        batch_size: int = 2,
    ):
        if serialization_dir == "__DEFAULT__":
            serialization_dir = self.TEST_DIR
        checkpointer = Checkpointer(serialization_dir, checkpoint_every,
                                    max_checkpoints)
        tensorboard = TensorboardWriter(get_batch_num_total=lambda: None)

        if validation_iterator is None:
            validation_iterator = BasicIterator(batch_size=batch_size)
            validation_iterator.index_with(self.vocab)

        return [
            LogToTensorboard(log_batch_size_period=10,
                             tensorboard=tensorboard),
            Checkpoint(checkpointer, model_save_interval),
            Validate(
                validation_data=self.instances
                if validation_data is None else validation_data,
                validation_iterator=validation_iterator,
            ),
            TrackMetrics(patience, validation_metric),
            GradientNormAndClip(),
        ]
    def from_partial_objects(
        cls,
        serialization_dir: str,
        train_dataset_readers: Dict[str, DatasetReader],
        train_file_paths: Dict[str, str],
        model: Lazy[Model],
        iterator: DataIterator,
        mingler: DatasetMingler,
        optimizer: Lazy[Optimizer],
        num_epochs: int = 10,
    ) -> "MultiTaskTrainer":

        datasets = {
            name: reader.read(train_file_paths[name])
            for name, reader in train_dataset_readers.items()
        }

        instances = (instance for dataset in datasets.values()
                     for instance in dataset)
        vocab = Vocabulary.from_instances(instances=instances)
        model = model.construct(vocab=vocab)
        iterator.index_with(vocab)

        parameters = [[n, p] for n, p in model.named_parameters()
                      if p.requires_grad]
        optimizer_ = optimizer.construct(model_parameters=parameters)

        return MultiTaskTrainer(model, serialization_dir, iterator, mingler,
                                optimizer_, datasets, num_epochs)
Ejemplo n.º 3
0
    def default_callbacks(self,
                          validation_metric: str = "-loss",
                          patience: int = None,
                          max_checkpoints: int = 20,
                          checkpoint_every: int = None,
                          serialization_dir: str = "__DEFAULT__",
                          iterator: DataIterator = None,
                          validation_data: Iterable[Instance] = None,
                          validation_iterator: DataIterator = None,
                          batch_size: int = 2):
        if serialization_dir == "__DEFAULT__":
            serialization_dir = self.TEST_DIR
        checkpointer = Checkpointer(serialization_dir,
                                    checkpoint_every,
                                    max_checkpoints)
        tensorboard = TensorboardWriter(get_batch_num_total=lambda: None)

        if iterator is None:
            iterator = BasicIterator(batch_size=batch_size)
            iterator.index_with(self.vocab)

        return [
                LogToTensorboard(log_batch_size_period=10, tensorboard=tensorboard),
                Checkpoint(checkpointer),
                Validate(validation_data=self.instances if validation_data is None else validation_data,
                         validation_iterator=iterator if validation_iterator is None else validation_iterator),
                TrackMetrics(patience, validation_metric),
                TrainSupervised(),
                GenerateTrainingBatches(self.instances, iterator, True)
        ]