def __init__(
        self,
        vocab: Vocabulary,
        generator: Params,
        encoder: Params,
        samples: int,
        lambda_init: float,
        desired_length: float,
        rationale_extractor: Model = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):
        '''
            The lagrangian relaxation code has been mostly copied from code accompanying original paper.
        '''

        super(KumaraswamyGenEncClassifier,
              self).__init__(vocab, initializer, regularizer)
        self._vocabulary = vocab
        self._num_labels = self._vocabulary.get_vocab_size("labels")

        self._generator = Model.from_params(
            vocab=vocab,
            regularizer=regularizer,
            initializer=initializer,
            params=Params(generator),
        )
        self._encoder = Model.from_params(
            vocab=vocab,
            regularizer=regularizer,
            initializer=initializer,
            params=Params(encoder),
        )

        self._samples = samples
        self._desired_length = min(1.0, max(0.0, desired_length))
        self._rationale_extractor = rationale_extractor

        self._loss_tracks = {
            k: Average()
            for k in [
                "_lasso_loss", "_base_loss", "_rat_length", "_lambda0",
                "_c0_ma", "_c0"
            ]
        }

        s_min = torch.Tensor([-0.1])
        s_max = torch.Tensor([1.1])
        self.support = [s_min, s_max]

        self.lagrange_alpha = 0.9
        self.lagrange_lr = 0.01
        self.lambda_min = 1e-12
        self.lambda_max = 5.0
        self.register_buffer("lambda0", torch.full((1, ), lambda_init))
        self.register_buffer("c0_ma", torch.full((1, ), 0.0))  # moving average

        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        generator: Params,
        encoder: Params,
        samples: int,
        reg_loss_lambda: float,
        desired_length: float,
        reg_loss_mu: float,
        rationale_extractor: Model = None,
        supervise_rationale: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(BernoulliGenEncClassifier,
              self).__init__(vocab, initializer, regularizer)
        self._vocabulary = vocab
        self._num_labels = self._vocabulary.get_vocab_size("labels")

        self._generator = Model.from_params(
            vocab=vocab,
            regularizer=regularizer,
            initializer=initializer,
            params=Params(generator),
            supervise_rationale=supervise_rationale,
            max_length_ratio=desired_length,
        )
        self._encoder = Model.from_params(
            vocab=vocab,
            regularizer=regularizer,
            initializer=initializer,
            params=Params(encoder),
        )

        self._samples = samples
        self._reg_loss_lambda = reg_loss_lambda
        self._reg_loss_mu = reg_loss_mu
        self._desired_length = min(1.0, max(0.0, desired_length))
        self._rationale_extractor = rationale_extractor

        self._loss_tracks = {
            k: Average()
            for k in [
                "_lasso_loss",
                "_base_loss",
                "_rat_length",
                "_fused_lasso_loss",
                "_censored_lasso_loss",
                "_generator_loss",
            ]
        }

        self._supervise_rationale = supervise_rationale

        initializer(self)
Esempio n. 3
0
    def from_params(cls, params: Params, vocab: Vocabulary,
                    **extras) -> 'QaSrlParser':
        span_detector_params = params.pop("span_detector")
        question_predictor_params = params.pop("question_predictor")
        span_detector = Model.from_params(vocab=vocab,
                                          params=span_detector_params)
        question_predictor = Model.from_params(
            vocab=vocab, params=question_predictor_params)

        return QaSrlParser(vocab,
                           span_detector=span_detector,
                           question_predictor=question_predictor)
    def __init__(
        self,
        vocab: Vocabulary,
        generator: Params,
        encoder: Params,
        samples: int,
        reg_loss_lambda: float,
        desired_length: float,
        reg_loss_mu: float = 2,
        rationale_extractor: Model = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(EncoderGeneratorModel, self).__init__(vocab, initializer,
                                                    regularizer)
        self._vocabulary = vocab
        self._num_labels = self._vocabulary.get_vocab_size("labels")

        self._generator = Model.from_params(vocab=vocab,
                                            regularizer=regularizer,
                                            initializer=initializer,
                                            params=Params(generator))
        self._encoder = Model.from_params(vocab=vocab,
                                          regularizer=regularizer,
                                          initializer=initializer,
                                          params=Params(encoder))

        self._samples = samples
        self._reg_loss_lambda = reg_loss_lambda
        self._reg_loss_mu = reg_loss_mu
        self._desired_length = min(1.0, max(0.0, desired_length))
        self._rationale_extractor = rationale_extractor

        self._loss_tracks = {
            k: Average()
            for k in [
                "_lasso_loss", "_base_loss", "_rat_length",
                "_fused_lasso_loss", "_average_span_length"
            ]
        }

        s_min = torch.Tensor([-0.1])
        s_max = torch.Tensor([1.1])
        self.support = [s_min, s_max]

        # self.lagrange_alpha = 0.5
        # self.lagrange_lr = 0.01
        # self.register_buffer('lambda0', torch.full((1,), reg_loss_lambda))
        # self.register_buffer('sparsity_ma', torch.full((1,), 0.))  # moving average

        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        generator: Params,
        encoder: Params,
        samples: int,
        reg_loss_lambda: float,
        desired_length: float,
        reg_loss_mu: float,
        rationale_extractor: Model = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(KumaraswamyGenEncClassifier,
              self).__init__(vocab, initializer, regularizer)
        self._vocabulary = vocab
        self._num_labels = self._vocabulary.get_vocab_size("labels")

        self._generator = Model.from_params(
            vocab=vocab,
            regularizer=regularizer,
            initializer=initializer,
            params=Params(generator),
        )
        self._encoder = Model.from_params(
            vocab=vocab,
            regularizer=regularizer,
            initializer=initializer,
            params=Params(encoder),
        )

        self._samples = samples
        self._reg_loss_lambda = reg_loss_lambda
        self._reg_loss_mu = reg_loss_mu
        self._desired_length = min(1.0, max(0.0, desired_length))
        self._rationale_extractor = rationale_extractor

        self._loss_tracks = {
            k: Average()
            for k in [
                "_lasso_loss", "_base_loss", "_rat_length",
                "_fused_lasso_loss", "_censored_lasso_loss", "_generator_loss"
            ]
        }

        s_min = torch.Tensor([-0.1])
        s_max = torch.Tensor([1.1])
        self.support = [s_min, s_max]

        initializer(self)
Esempio n. 6
0
def load_teacher_model(teacher_path=None, device=-1):
    models = {}
    if teacher_path is not None:
        for tea_path in teacher_path.split(","):
            # teacher path is something like "Models/HotpotQA,Models/SQuAD"
            tea_name = tea_path.split("/")[-1]
            config = Params.from_file(os.path.join(tea_path, CONFIG_NAME))
            vocab_tea = Vocabulary.from_files(
                os.path.join(tea_path, "vocabulary"))
            model = Model.from_params(vocab=vocab_tea,
                                      params=config.get("model"))

            tea_model = copy.deepcopy(model)
            model_state = torch.load(
                os.path.join(tea_path, "best.th"),
                map_location=nn_util.device_mapping(cuda_device))
            tea_model.load_state_dict(model_state)
            logger.info("Load teacher model from %s", tea_path)

            # freeze the parameters of teacher model
            for p in tea_model.parameters():
                p.requires_grad = False

            if device >= 0:
                tea_model.to(device=device)
            models[tea_name] = tea_model

    return models
Esempio n. 7
0
    def _load(cls,
              config: Params,
              serialization_dir: str,
              weights_file: str = None,
              cuda_device: int = -1) -> 'Model':
        """
        Ensembles don't have vocabularies or weights of their own, so they override _load.
        """
        model_params = config.get('model')

        # The experiment config tells us how to _train_ a model, including where to get pre-trained
        # embeddings from.  We're now _loading_ the model, so those embeddings will already be
        # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
        # want the code to look for it, so we remove it from the parameters here.
        remove_pretrained_embedding_params(model_params)
        model = Model.from_params(vocab=None, params=model_params)

        # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are
        # in sync with the weights
        if cuda_device >= 0:
            model.cuda(cuda_device)
        else:
            model.cpu()

        return model
Esempio n. 8
0
def train(model_path, train_path, val_path, seed, vocabulary_path=None, config_path=None):
    assert os.path.isdir(model_path), "Model directory does not exist"
    set_seed(seed)

    config_path = config_path or os.path.join(model_path, "config.json")
    assert os.path.isfile(config_path), "Config file does not exist"
    params = Params.from_file(config_path)

    vocabulary_path = vocabulary_path or os.path.join(model_path, "vocabulary")
    assert os.path.exists(vocabulary_path), "Vocabulary is not ready, do not forget to run preprocess.py first"
    vocabulary = Vocabulary.from_files(vocabulary_path)

    reader_params = params.duplicate().pop("reader", default=Params({}))
    reader = DatasetReader.from_params(reader_params)
    train_dataset = reader.read(train_path)
    val_dataset = reader.read(val_path) if val_path else None

    model_params = params.pop("model")
    model = Model.from_params(model_params, vocab=vocabulary)
    print(model)
    print("Trainable params count: ", sum(p.numel() for p in model.parameters() if p.requires_grad))

    iterator = DataIterator.from_params(params.pop('iterator'))
    iterator.index_with(vocabulary)
    trainer = Trainer.from_params(model, model_path, iterator,
                                  train_dataset, val_dataset, params.pop('trainer'))
    trainer.train()
    def from_params(params: Params, iterator, val_iterator, vocab, all_datasets,
                    serialization_dir: str, recover: bool = False) -> \
            'TrainerPieces':
        model = Model.from_params(vocab=vocab, params=params.pop('model'))

        # Initializing the model can have side effect of expanding the vocabulary
        vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

        train_data = all_datasets['train']
        validation_data = all_datasets.get('validation')
        test_data = all_datasets.get('test')

        trainer_params = params.pop("trainer")
        no_grad_regexes = trainer_params.pop("no_grad", ())
        for name, parameter in model.named_parameters():
            if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

        frozen_parameter_names, tunable_parameter_names = \
            get_frozen_and_tunable_parameter_names(model)
        logger.info("Following parameters are Frozen  (without gradient):")
        for name in frozen_parameter_names:
            logger.info(name)
        logger.info("Following parameters are Tunable (with gradient):")
        for name in tunable_parameter_names:
            logger.info(name)

        return TrainerPieces(model, iterator, train_data, validation_data,
                             test_data, val_iterator, trainer_params)
Esempio n. 10
0
    def _load(
        cls,
        config: Params,
        serialization_dir: str,
        weights_file: Optional[str] = None,
        cuda_device: int = -1,
        opt_level: Optional[str] = None,
    ) -> Model:
        """
        Ensembles don't have vocabularies or weights of their own, so they override _load.
        """
        if opt_level is not None:
            raise NotImplementedError(f"{cls.__name__} does not support AMP yet.")

        model_params = config.get("model")

        # The experiment config tells us how to _train_ a model, including where to get pre-trained
        # embeddings from.  We're now _loading_ the model, so those embeddings will already be
        # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
        # want the code to look for it, so we remove it from the parameters here.
        remove_weights_related_keys_from_params(model_params)
        model = Model.from_params(vocab=None, params=model_params)

        # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are
        # in sync with the weights
        if cuda_device >= 0:
            model.cuda(cuda_device)
        else:
            model.cpu()

        return model
Esempio n. 11
0
    def _load(cls,
              config: Params,
              serialization_dir: str,
              weights_file: str = None,
              cuda_device: int = -1) -> 'Model':
        """
        Ensembles don't have vocabularies or weights of their own, so they override _load.
        """
        model_params = config.get('model')

        # The experiment config tells us how to _train_ a model, including where to get pre-trained
        # embeddings from.  We're now _loading_ the model, so those embeddings will already be
        # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
        # want the code to look for it, so we remove it from the parameters here.
        remove_pretrained_embedding_params(model_params)
        model = Model.from_params(vocab=None, params=model_params)

        # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are
        # in sync with the weights
        if cuda_device >= 0:
            model.cuda(cuda_device)
        else:
            model.cpu()

        return model
Esempio n. 12
0
    def from_params(params: Params, serialization_dir: str, recover: bool = False) -> 'TrainerPieces':
        all_datasets = training_util.datasets_from_params(params)
        datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

        for dataset in datasets_for_vocab_creation:
            if dataset not in all_datasets:
                raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

        logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                    ", ".join(datasets_for_vocab_creation))

        if recover and os.path.exists(os.path.join(serialization_dir, "vocabulary")):
            vocab = Vocabulary.from_files(os.path.join(serialization_dir, "vocabulary"))
            params.pop("vocabulary", {})
        else:
            vocab = Vocabulary.from_params(
                    params.pop("vocabulary", {}),
                    (instance for key, dataset in all_datasets.items()
                     for instance in dataset
                     if key in datasets_for_vocab_creation)
            )

        model = Model.from_params(vocab=vocab, params=params.pop('model'))

        # Initializing the model can have side effect of expanding the vocabulary
        vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(model.vocab)
        validation_iterator_params = params.pop("validation_iterator", None)
        if validation_iterator_params:
            validation_iterator = DataIterator.from_params(validation_iterator_params)
            validation_iterator.index_with(model.vocab)
        else:
            validation_iterator = None

        train_data = all_datasets['train']
        validation_data = all_datasets.get('validation')
        test_data = all_datasets.get('test')

        trainer_params = params.pop("trainer")
        no_grad_regexes = trainer_params.pop("no_grad", ())
        for name, parameter in model.named_parameters():
            if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

        frozen_parameter_names, tunable_parameter_names = \
                    get_frozen_and_tunable_parameter_names(model)
        logger.info("Following parameters are Frozen  (without gradient):")
        for name in frozen_parameter_names:
            logger.info(name)
        logger.info("Following parameters are Tunable (with gradient):")
        for name in tunable_parameter_names:
            logger.info(name)

        return TrainerPieces(model, iterator,
                             train_data, validation_data, test_data,
                             validation_iterator, trainer_params)
Esempio n. 13
0
    def __init__(
        self,
        vocab: Vocabulary,
        generator: Params,
        encoder: Params,
        samples: int,
        reg_loss_lambda: float,
        desired_length: float,
        reg_loss_mu: float = 2,
        rationale_extractor: Model = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(EncoderGeneratorModel, self).__init__(vocab, initializer,
                                                    regularizer)
        self._vocabulary = vocab
        self._num_labels = self._vocabulary.get_vocab_size("labels")

        self._generator = Model.from_params(vocab=vocab,
                                            regularizer=regularizer,
                                            initializer=initializer,
                                            params=Params(generator))
        self._encoder = Model.from_params(vocab=vocab,
                                          regularizer=regularizer,
                                          initializer=initializer,
                                          params=Params(encoder))

        self._samples = samples
        self._reg_loss_lambda = reg_loss_lambda
        self._reg_loss_mu = reg_loss_mu
        self._desired_length = min(1.0, max(0.0, desired_length))
        self._rationale_extractor = rationale_extractor

        self._loss_tracks = {
            k: Average()
            for k in [
                "_lasso_loss", "_base_loss", "_rat_length",
                "_fused_lasso_loss", "_average_span_length"
            ]
        }

        initializer(self)
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'GraphAndTextModel':
        text_model = params.pop("text_model", None)
        text_model = Model.from_params(vocab, text_model)

        graph_model = params.pop("graph_model")
        graph_model = Model.from_params(vocab, graph_model)

        classify_feed_forward = FeedForward.from_params(
            params.pop('classify_feed_forward'))

        initializer = InitializerApplicator.from_params(
            params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params.pop('regularizer', []))

        params.assert_empty(cls.__name__)
        return cls(vocab=vocab,
                   classify_feed_forward=classify_feed_forward,
                   text_model=text_model,
                   graph_model=graph_model,
                   initializer=initializer,
                   regularizer=regularizer)
Esempio n. 15
0
def openie_model(serialization_dir, weights_file=None, cuda_device=-1):
    """
    Instantiates an already-trained model, based on the experiment
    configuration and some optional overrides.
    """
    # Load config
    config = Params.from_file(os.path.join(serialization_dir, CONFIG_NAME), "")
    config.loading_from_archive = True
    config = config.duplicate()

    weights_file = weights_file or os.path.join(serialization_dir,
                                                _WEIGHTS_NAME)

    # Load vocabulary from file
    vocab_dir = os.path.join(serialization_dir, "vocabulary")
    # If the config specifies a vocabulary subclass, we need to use it.
    vocab_params = config.get("vocabulary", Params({}))
    vocab_choice = vocab_params.pop_choice("type", Vocabulary.list_available(),
                                           True)
    vocab = Vocabulary.by_name(vocab_choice).from_files(vocab_dir)

    model_params = config.get("model")

    # The experiment config tells us how to _train_ a model, including where to get pre-trained
    # embeddings from.  We're now _loading_ the model, so those embeddings will already be
    # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
    # want the code to look for it, so we remove it from the parameters here.
    remove_pretrained_embedding_params(model_params)
    model = Model.from_params(vocab=vocab, params=model_params)

    # If vocab+embedding extension was done, the model initialized from from_params
    # and one defined by state dict in weights_file might not have same embedding shapes.
    # Eg. when model embedder module was transferred along with vocab extension, the
    # initialized embedding weight shape would be smaller than one in the state_dict.
    # So calling model embedding extension is required before load_state_dict.
    # If vocab and model embeddings are in sync, following would be just a no-op.
    model.extend_embedder_vocab()

    model_state = torch.load(weights_file,
                             map_location=util.device_mapping(cuda_device))

    model.load_state_dict(model_state)

    # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are
    # in sync with the weights
    if cuda_device >= 0:
        model.cuda(cuda_device)
    else:
        model.cpu()

    return model, config
Esempio n. 16
0
    def __init__(
        self,
        vocab: Vocabulary,
        generator: Params,
        encoder: Params,
        reg_loss_lambda: float,
        reg_loss_mu: float = 2,
        reinforce_loss_weight: float = 1.0,
        rationale_supervision_loss_weight: float = 1.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ):

        super(EncoderGeneratorModel, self).__init__(vocab, initializer,
                                                    regularizer)
        self._vocabulary = vocab
        self._num_labels = self._vocabulary.get_vocab_size("labels")

        self._generator = Model.from_params(vocab=vocab,
                                            regularizer=regularizer,
                                            initializer=initializer,
                                            params=Params(generator))
        self._encoder = Model.from_params(vocab=vocab,
                                          regularizer=regularizer,
                                          initializer=initializer,
                                          params=Params(encoder))

        self._reg_loss_lambda = reg_loss_lambda
        self._reg_loss_mu = reg_loss_mu
        self._reinforce_loss_weight = reinforce_loss_weight
        self._rationale_supervision_loss_weight = rationale_supervision_loss_weight

        self._loss_tracks = {
            k: Average()
            for k in ["lasso_loss", "base_loss", "fused_lasso_loss"]
        }

        initializer(self)
Esempio n. 17
0
    def from_params(cls, params: Params,
                    vocab: Vocabulary) -> 'CMVMultiChannelPredictor':

        response_only_predictor = Model.from_params(
            params=params.pop('response_only_predictor'), vocab=vocab)

        op_response_predictor = Model.from_params(
            params=params.pop('op_response_predictor'), vocab=vocab)

        output_feedforward = FeedForward.from_params(
            params=params.pop('output_feedforward'))

        dropout = params.pop("dropout", 0)

        initializer = InitializerApplicator.from_params(
            params=params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params=params.pop('regularizer', []))

        params.assert_empty(cls.__name__)

        return cls(vocab, response_only_predictor, op_response_predictor,
                   output_feedforward, dropout, initializer, regularizer)
Esempio n. 18
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    params = Params.from_file(args.extractor_config_file)

    model = Model.from_params(vocab=None, params=params.pop('model'))
    if args.cuda_device >= 0:
        model.to(args.cuda_device)
    else:
        model.to(None)

    archive = Archive(model=model, config=params)

    return Predictor.from_archive(
        archive,
        args.predictor,
        dataset_reader_to_load=args.dataset_reader_choice)
    def _test_model(self, file_name):
        params = self.params[file_name].duplicate()
        reader_params = params.duplicate().pop("reader", default=Params({}))
        if reader_params["type"] == "cnn_dailymail":
            reader_params["cnn_tokenized_dir"] = TEST_STORIES_DIR
            dataset_file = TEST_URLS_FILE
        elif reader_params["type"] == "ria":
            dataset_file = RIA_EXAMPLE_FILE
        else:
            assert False

        reader = DatasetReader.from_params(reader_params)
        tokenizer = reader._tokenizer
        dataset = reader.read(dataset_file)
        vocabulary_params = params.pop("vocabulary", default=Params({}))
        vocabulary = Vocabulary.from_params(vocabulary_params,
                                            instances=dataset)

        model_params = params.pop("model")
        model = Model.from_params(model_params, vocab=vocabulary)
        print(model)
        print("Trainable params count: ",
              sum(p.numel() for p in model.parameters() if p.requires_grad))

        iterator = DataIterator.from_params(params.pop('iterator'))
        iterator.index_with(vocabulary)
        trainer = Trainer.from_params(model, None, iterator, dataset, None,
                                      params.pop('trainer'))
        trainer.train()

        model.eval()
        predictor = Seq2SeqPredictor(model, reader)
        for article, reference_sents in reader.parse_set(dataset_file):
            ref_words = [
                token.text for token in tokenizer.tokenize(reference_sents)
            ]
            decoded_words = predictor.predict(article)["predicted_tokens"]
            self.assertGreaterEqual(len(decoded_words), len(ref_words))
            unk_count = 0
            while DEFAULT_OOV_TOKEN in decoded_words:
                unk_index = decoded_words.index(DEFAULT_OOV_TOKEN)
                decoded_words.pop(unk_index)
                unk_count += 1
                if unk_index < len(ref_words):
                    ref_words.pop(unk_index)
            self.assertLess(unk_count, 5)
            self.assertListEqual(decoded_words[:len(ref_words)], ref_words)
Esempio n. 20
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    params = Params.from_file(args.scorer_config_file)
    archive = load_archive(
        args.archive_file,
        weights_file=args.weights_file,
        cuda_device=args.cuda_device,
        overrides=args.overrides,
    )

    model = Model.from_params(vocab=None, model=archive.model, params=params)
    model.to(args.cuda_device)

    archive = Archive(model=model, config=archive.config)

    return Predictor.from_archive(
        archive,
        args.predictor,
        dataset_reader_to_load=args.dataset_reader_choice)
Esempio n. 21
0
def instantiate_model_from_config(config_file_path: str,
                                  cuda_device: int = -1,
                                  overrides: str = None,
                                  include_package: str = "models") -> Model:
    logging.disable(logging.INFO)
    import_module_and_submodules(include_package)

    params = Params.from_file(config_file_path, overrides)

    vocab_dir = params.pop("vocabulary").pop("directory_path")
    vocab = Vocabulary.from_files(vocab_dir)

    model = Model.from_params(vocab=vocab, params=params.pop("model"))

    if cuda_device >= 0:
        model.cuda(cuda_device)
    else:
        model.cpu()

    return model
Esempio n. 22
0
def load_model(model_name="conll_full_elmo"):
    """
    Load both vocabulary and model and create and instance of
    HMTL full model.
    """
    if model_name not in [
            "conll_small_elmo", "conll_medium_elmo", "conll_full_elmo"
    ]:
        raise ValueError(f"{model_name} is not a valid name of model.")
    serialization_dir = "model_dumps" + "/" + model_name
    params = Params.from_file(
        params_file=os.path.join(serialization_dir, "config.json"))

    # Load TokenIndexer
    task_keys = [key for key in params.keys() if re.search("^task_", key)]
    token_indexer_params = params.pop(task_keys[-1]).pop("data_params").pop(
        "dataset_reader").pop("token_indexers")
    # see https://github.com/allenai/allennlp/issues/181 for better syntax
    token_indexers = {}
    for name, indexer_params in token_indexer_params.items():
        token_indexers[name] = TokenIndexer.from_params(indexer_params)

    # Load the vocabulary
    logger.info("Loading Vocavulary from %s",
                os.path.join(serialization_dir, "vocabulary"))
    vocab = Vocabulary.from_files(os.path.join(serialization_dir,
                                               "vocabulary"))
    logger.info("Vocabulary loaded")

    # Create model and load weights
    model_params = params.pop("model")
    model = Model.from_params(vocab=vocab,
                              params=model_params,
                              regularizer=None)
    model_state_path = os.path.join(serialization_dir, "weights.th")
    model_state = torch.load(model_state_path, map_location="cpu")
    model.load_state_dict(state_dict=model_state)

    return model, vocab, token_indexers
    def from_params(
        cls,  # type: ignore
        params: Params,
        serialization_dir: str,
        recover: bool = False,
    ) -> "MultiTaskTrainer":
        readers = {
            name: DatasetReader.from_params(reader_params)
            for name, reader_params in params.pop(
                "train_dataset_readers").items()
        }
        train_file_paths = params.pop("train_file_paths").as_dict()

        datasets = {
            name: reader.read(train_file_paths[name])
            for name, reader in readers.items()
        }

        instances = (instance for dataset in datasets.values()
                     for instance in dataset)
        vocab = Vocabulary.from_params(Params({}), instances)
        model = Model.from_params(params.pop("model"), vocab=vocab)
        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(vocab)
        mingler = DatasetMingler.from_params(params.pop("mingler"))

        parameters = [[n, p] for n, p in model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))

        num_epochs = params.pop_int("num_epochs", 10)

        _ = params.pop("trainer", Params({}))

        params.assert_empty(__name__)

        return MultiTaskTrainer(model, serialization_dir, iterator, mingler,
                                optimizer, datasets, num_epochs)
def modified_model_load(config: Params,
                        serialization_dir: str,
                        weights_file: str = None,
                        cuda_device: int = -1) -> Model:
    """
    Instantiates an already-trained model, based on the experiment
    configuration and some optional overrides.
    """
    weights_file = weights_file or os.path.join(serialization_dir,
                                                _DEFAULT_WEIGHTS)

    # Load vocabulary from file
    vocab_dir = os.path.join(serialization_dir, 'vocabulary')
    # If the config specifies a vocabulary subclass, we need to use it.
    vocab = Vocabulary.from_files(vocab_dir)

    model_params = config.get('model')

    # The experiment config tells us how to _train_ a model, including where to get pre-trained
    # embeddings from.  We're now _loading_ the model, so those embeddings will already be
    # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
    # want the code to look for it, so we remove it from the parameters here.
    remove_pretrained_embedding_params(model_params)
    model = Model.from_params(vocab=vocab, params=model_params)
    model_state = torch.load(weights_file,
                             map_location=util.device_mapping(cuda_device))
    model.load_state_dict(model_state, strict=False)

    # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are
    # in sync with the weights
    if cuda_device >= 0:
        model.cuda(cuda_device)
    else:
        model.cpu()

    return model
Esempio n. 25
0
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    prepare_environment(params)

    create_serialization_dir(params, serialization_dir, recover)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("Creating a vocabulary using %s data.", ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(params.pop("vocabulary", {}),
                                   (instance for key, dataset in all_datasets.items()
                                    for instance in dataset
                                    if key in datasets_for_vocab_creation))
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab, params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    trainer = Trainer.from_params(model,
                                  serialization_dir,
                                  iterator,
                                  train_data,
                                  validation_data,
                                  trainer_params)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info("Training interrupted by the user. Attempting to create "
                         "a model archive using the current best epoch weights.")
            archive_model(serialization_dir, files_to_archive=params.files_to_archive)
        raise

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    if test_data and evaluate_on_test:
        test_metrics = evaluate(model, test_data, iterator, cuda_device=trainer._cuda_devices[0])  # pylint: disable=protected-access
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info("To evaluate on the test set after training, pass the "
                    "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
Esempio n. 26
0
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.

    Returns
    -------
    best_model: ``Model``
        The model with the best epoch weights.
    """
    prepare_environment(params)

    create_serialization_dir(params, serialization_dir, recover)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    check_for_gpu(params.params.get('trainer').get('cuda_device', -1))

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("Creating a vocabulary using %s data.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        (instance for key, dataset in all_datasets.items()
         for instance in dataset if key in datasets_for_vocab_creation))

    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)
    validation_iterator_params = params.pop("validation_iterator", None)
    if validation_iterator_params:
        validation_iterator = DataIterator.from_params(
            validation_iterator_params)
        validation_iterator.index_with(vocab)
    else:
        validation_iterator = None

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    trainer = Trainer.from_params(model,
                                  serialization_dir,
                                  iterator,
                                  train_data,
                                  validation_data,
                                  trainer_params,
                                  validation_iterator=validation_iterator)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info(
                "Training interrupted by the user. Attempting to create "
                "a model archive using the current best epoch weights.")
            archive_model(serialization_dir,
                          files_to_archive=params.files_to_archive)
        raise

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    logger.info("Loading the best epoch weights.")
    best_model_state_path = os.path.join(serialization_dir, 'best.th')
    best_model_state = torch.load(best_model_state_path)
    best_model = model
    best_model.load_state_dict(best_model_state)

    if test_data and evaluate_on_test:
        logger.info(
            "The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
            best_model,
            test_data,
            validation_iterator or iterator,
            cuda_device=trainer._cuda_devices[0]  # pylint: disable=protected-access
        )
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"),
              "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return best_model
Esempio n. 27
0
    def from_params(cls,
                    params: Params,
                    serialization_dir: str,
                    recover: bool = False,
                    cache_directory: str = None,
                    cache_prefix: str = None) -> 'PtDistTrainer':
        all_datasets = training_util.datasets_from_params(
            params, cache_directory, cache_prefix)
        vocab = Vocabulary.from_files(params.vocabulary.directory_path)

        model = Model.from_params(vocab=vocab, params=params.pop('model'))
        model.extend_embedder_vocab()
        if is_master_rank():
            vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

        train_data = all_datasets['train']
        validation_data = all_datasets.get('validation')

        batch_size = params.iterator.batch_size

        trainer_params = params.pop("trainer")
        keys = [key for key in params]
        for key in keys:
            params.pop(key)
        params = trainer_params
        validation_metric = params.pop("validation_metric", "-loss")
        shuffle = params.pop_bool("shuffle", True)
        num_epochs = params.pop_int("num_epochs", 20)
        cuda_device = parse_cuda_device(params.pop("cuda_device", -1))
        grad_clipping = params.pop_float("grad_clipping", None)
        lr_scheduler_params = params.pop("learning_rate_scheduler", None)
        pretrain_file = params.pop("pretrain_file", None)

        no_grad_regexes = params.pop("no_grad", ())
        for name, parameter in model.named_parameters():
            if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

        frozen_parameter_names, tunable_parameter_names = \
            get_frozen_and_tunable_parameter_names(model)
        logger.info("Following parameters are Frozen  (without gradient):")
        for name in frozen_parameter_names:
            logger.info(name)
        logger.info("Following parameters are Tunable (with gradient):")
        for name in tunable_parameter_names:
            logger.info(name)

        model = model.cuda(dist.get_rank())
        if pretrain_file:
            model_state = torch.load(pretrain_file,
                                     map_location=nn_util.device_mapping(
                                         dist.get_rank()))
            model.load_state_dict(model_state)

        parameters = [[n, p] for n, p in model.named_parameters()
                      if p.requires_grad]
        # print([n for n, p in model.named_parameters() if p.requires_grad])
        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))

        if lr_scheduler_params:
            lr_scheduler = LearningRateScheduler.from_params(
                optimizer, lr_scheduler_params)
        else:
            lr_scheduler = None

        num_serialized_models_to_keep = params.pop_int(
            "num_serialized_models_to_keep", 20)
        checkpointer = Checkpointer(
            serialization_dir=serialization_dir,
            num_serialized_models_to_keep=num_serialized_models_to_keep,
            keep_serialized_model_every_num_seconds=None)

        return cls(model,
                   optimizer,
                   train_data,
                   validation_data,
                   batch_size=batch_size,
                   validation_metric=validation_metric,
                   shuffle=shuffle,
                   num_epochs=num_epochs,
                   serialization_dir=serialization_dir,
                   cuda_device=cuda_device,
                   grad_clipping=grad_clipping,
                   learning_rate_scheduler=lr_scheduler,
                   checkpointer=checkpointer)
Esempio n. 28
0
        }})
    params = Params.from_file(model_config, overrides)
    model_file = 'checkpoint/%s%s/' % (model_name, attn)
    iterator = DataIterator.from_params(params.pop("iterator"))

    torch.manual_seed(0)
    numpy.random.seed(0)

    if write_file:
        wf = Write_outfile(Wfile_name)

    print("Loading vocabulary")
    vocab = Vocabulary.from_files(model_file + 'vocabulary')

    print('Initialing model')
    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    print("Loading Model file from %s" % (model_file + 'best.th'))
    with open(model_file + 'best.th', 'rb') as f:
        model.load_state_dict(torch.load(f, encoding='utf-8'))

    iterator.index_with(vocab)
    dataset_reader_params = params.pop('dataset_reader')
    datareader = DatasetReader.from_params(dataset_reader_params)
    model.eval()

    #读取文件数据
    for file in files:
        dom = xml.dom.minidom.parse(file)
        root = dom.documentElement
        OrgQ_list = root.getElementsByTagName('OrgQuestion')
        q1_last = None
Esempio n. 29
0
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    prepare_environment(params)

    create_serialization_dir(params, serialization_dir, recover)

    # TODO(mattg): pull this block out into a separate function (maybe just add this to
    # `prepare_environment`?)
    Tqdm.set_slower_interval(file_friendly_logging)
    sys.stdout = TeeLogger(
        os.path.join(serialization_dir, "stdout.log"),  # type: ignore
        sys.stdout,
        file_friendly_logging)
    sys.stderr = TeeLogger(
        os.path.join(serialization_dir, "stderr.log"),  # type: ignore
        sys.stderr,
        file_friendly_logging)
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)

    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("Creating a vocabulary using %s data.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        (instance for key, dataset in all_datasets.items()
         for instance in dataset if key in datasets_for_vocab_creation))
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab, params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')
    metrics = trainer.train()

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    if test_data and evaluate_on_test:
        test_metrics = evaluate(model,
                                test_data,
                                iterator,
                                cuda_device=trainer._cuda_devices[0])  # pylint: disable=protected-access
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"),
              "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
Esempio n. 30
0
def train_model(params: Params,
                serialization_dir: str,
                cuda_device: int,
                train_data_path: str,
                validation_data_path: str,
                test_data_path: str,
                file_friendly_logging: bool = False) -> Model:
    """
    This function can be used as an entry point to running models in AllenNLP
    directly from a JSON specification using a :class:`Driver`. Note that if
    you care about reproducibility, you should avoid running code using Pytorch
    or numpy which affect the reproducibility of your experiment before you
    import and use this function, these libraries rely on random seeds which
    can be set in this function via a JSON specification file. Note that this
    function performs training and will also evaluate the trained model on
    development and test sets if provided in the parameter json.

    Parameters
    ----------
    params: Params, required.
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: str, required
        The directory in which to save results and logs.
    """
    prepare_environment(params)

    os.makedirs(serialization_dir, exist_ok=True)
    sys.stdout = TeeLogger(
        os.path.join(serialization_dir, "stdout.log"),  # type: ignore
        sys.stdout,
        file_friendly_logging)
    sys.stderr = TeeLogger(
        os.path.join(serialization_dir, "stderr.log"),  # type: ignore
        sys.stderr,
        file_friendly_logging)
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)
    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, "model_params.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    # all_datasets = datasets_from_params(params)
    all_datasets = datasets_from_args(params, train_data_path,
                                      validation_data_path, test_data_path)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("Creating a vocabulary using %s data.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        (instance for key, dataset in all_datasets.items()
         for instance in dataset if key in datasets_for_vocab_creation))
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab, params.pop('model'))
    if cuda_device >= 0:
        model = model.cuda(cuda_device)
    # iterator = DataIterator.from_params(params.pop("iterator"))
    # iterator.index_with(vocab)
    train_iterator = DataIterator.from_params(params.pop("train_iterator"))
    val_iterator = DataIterator.from_params(params.pop("val_iterator"))
    train_iterator.index_with(vocab)
    val_iterator.index_with(vocab)

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    trainer = Trainer.from_params(model, serialization_dir, train_iterator,
                                  val_iterator, cuda_device, train_data,
                                  validation_data, trainer_params)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    # params.assert_empty('base train command')
    metrics = trainer.train()

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    if test_data and evaluate_on_test:
        test_metrics = evaluate(model,
                                test_data,
                                val_iterator,
                                cuda_device=trainer._cuda_devices[0])  # pylint: disable=protected-access
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info(
            "To evaluate on the test set after training, pass the "
            "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    metrics_json = json.dumps(metrics, indent=2)
    with open(os.path.join(serialization_dir, "metrics.json"),
              "w") as metrics_file:
        metrics_file.write(metrics_json)
    logger.info("Metrics: %s", metrics_json)

    return model
Esempio n. 31
0
def train_model(params: Params,
                serialization_dir: str,
                file_friendly_logging: bool = False,
                recover: bool = False,
                force: bool = False) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.

    Returns
    -------
    best_model: ``Model``
        The model with the best epoch weights.
    """
    prepare_environment(params)

    create_serialization_dir(params, serialization_dir, recover, force)
    prepare_global_logging(serialization_dir, file_friendly_logging)

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    if isinstance(cuda_device, list):
        for device in cuda_device:
            check_for_gpu(device)
    else:
        check_for_gpu(cuda_device)

    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            (instance for key, dataset in all_datasets.items()
             for instance in dataset
             if key in datasets_for_vocab_creation)
    )

    model = Model.from_params(vocab=vocab, params=params.pop('model'))

    # Initializing the model can have side effect of expanding the vocabulary
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)
    validation_iterator_params = params.pop("validation_iterator", None)
    if validation_iterator_params:
        validation_iterator = DataIterator.from_params(validation_iterator_params)
        validation_iterator.index_with(vocab)
    else:
        validation_iterator = None

    train_data = all_datasets['train']
    validation_data = all_datasets.get('validation')
    test_data = all_datasets.get('test')

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)

    trainer_choice = trainer_params.pop_choice("type",
                                               Trainer.list_available(),
                                               default_to_first_choice=True)
    trainer = Trainer.by_name(trainer_choice).from_params(model=model,
                                                          serialization_dir=serialization_dir,
                                                          iterator=iterator,
                                                          train_data=train_data,
                                                          validation_data=validation_data,
                                                          params=trainer_params,
                                                          validation_iterator=validation_iterator)

    evaluate_on_test = params.pop_bool("evaluate_on_test", False)
    params.assert_empty('base train command')

    try:
        metrics = trainer.train()
    except KeyboardInterrupt:
        # if we have completed an epoch, try to create a model archive.
        if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)):
            logging.info("Training interrupted by the user. Attempting to create "
                         "a model archive using the current best epoch weights.")
            archive_model(serialization_dir, files_to_archive=params.files_to_archive)
        raise

    # Now tar up results
    archive_model(serialization_dir, files_to_archive=params.files_to_archive)

    logger.info("Loading the best epoch weights.")
    best_model_state_path = os.path.join(serialization_dir, 'best.th')
    best_model_state = torch.load(best_model_state_path)
    best_model = model
    best_model.load_state_dict(best_model_state)

    if test_data and evaluate_on_test:
        logger.info("The model will be evaluated using the best epoch weights.")
        test_metrics = evaluate(
                best_model, test_data, validation_iterator or iterator,
                cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access
        )
        for key, value in test_metrics.items():
            metrics["test_" + key] = value

    elif test_data:
        logger.info("To evaluate on the test set after training, pass the "
                    "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True)

    return best_model
Esempio n. 32
0
    vocab = Vocabulary.from_files(
        os.path.join(args.serialization_dir, "vocabulary"))
    logger.info("Vocabulary loaded")

    ### Load the data iterators ###
    task_list = create_and_set_iterators(params=params,
                                         task_list=task_list,
                                         vocab=vocab)

    ### Regularization	###
    regularizer = None

    ### Create model ###
    model_params = params.pop("model")
    model = Model.from_params(vocab=vocab,
                              params=model_params,
                              regularizer=regularizer)

    ### Real evaluation ###
    cuda_device = params.pop("multi_task_trainer").pop_int("cuda_device", -1)

    metrics = {task._name: {} for task in task_list}
    for task in task_list:
        if not task._evaluate_on_test:
            continue

        logger.info("Task %s will be evaluated using the best epoch weights.",
                    task._name)
        assert (
            task._test_data is not None
        ), "Task {} wants to be evaluated on test dataset but no there is no test data loaded.".format(
Esempio n. 33
0
def train_model(params: Params, serialization_dir: str) -> Model:
    """
    This function can be used as an entry point to running models in AllenNLP
    directly from a JSON specification using a :class:`Driver`. Note that if
    you care about reproducibility, you should avoid running code using Pytorch
    or numpy which affect the reproducibility of your experiment before you
    import and use this function, these libraries rely on random seeds which
    can be set in this function via a JSON specification file. Note that this
    function performs training and will also evaluate the trained model on
    development and test sets if provided in the parameter json.

    Parameters
    ----------
    params: Params, required.
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: str, required
        The directory in which to save results and logs.
    """
    prepare_environment(params)

    os.makedirs(serialization_dir, exist_ok=True)
    sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"), sys.stdout)  # type: ignore
    sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"), sys.stderr)  # type: ignore
    handler = logging.FileHandler(os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)
    serialization_params = deepcopy(params).as_dict(quiet=True)
    with open(os.path.join(serialization_dir, "model_params.json"), "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    # Now we begin assembling the required parts for the Trainer.
    dataset_reader = DatasetReader.from_params(params.pop('dataset_reader'))

    train_data_path = params.pop('train_data_path')
    logger.info("Reading training data from %s", train_data_path)
    train_data = dataset_reader.read(train_data_path)

    all_datasets: List[Dataset] = [train_data]
    datasets_in_vocab = ["train"]

    validation_data_path = params.pop('validation_data_path', None)
    if validation_data_path is not None:
        logger.info("Reading validation data from %s", validation_data_path)
        validation_data = dataset_reader.read(validation_data_path)
        all_datasets.append(validation_data)
        datasets_in_vocab.append("validation")
    else:
        validation_data = None

    test_data_path = params.pop("test_data_path", None)
    if test_data_path is not None:
        logger.info("Reading test data from %s", test_data_path)
        test_data = dataset_reader.read(test_data_path)
        all_datasets.append(test_data)
        datasets_in_vocab.append("test")
    else:
        test_data = None

    logger.info("Creating a vocabulary using %s data.", ", ".join(datasets_in_vocab))
    vocab = Vocabulary.from_params(params.pop("vocabulary", {}),
                                   Dataset([instance for dataset in all_datasets
                                            for instance in dataset.instances]))
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab, params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))

    train_data.index_instances(vocab)
    if validation_data:
        validation_data.index_instances(vocab)

    trainer_params = params.pop("trainer")
    trainer = Trainer.from_params(model,
                                  serialization_dir,
                                  iterator,
                                  train_data,
                                  validation_data,
                                  trainer_params)

    evaluate_on_test = params.pop("evaluate_on_test", False)
    params.assert_empty('base train command')
    trainer.train()

    # Now tar up results
    archive_model(serialization_dir)

    if test_data and evaluate_on_test:
        test_data.index_instances(vocab)
        evaluate(model, test_data, iterator, cuda_device=trainer._cuda_device)  # pylint: disable=protected-access

    elif test_data:
        logger.info("To evaluate on the test set after training, pass the "
                    "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.")

    return model