コード例 #1
0
    def check_model_computes_gradients_correctly(model: Model,
                                                 model_batch: Dict[str, Union[Any, Dict[str, Any]]],
                                                 params_to_ignore: Set[str] = None):
        print("Checking gradients")
        model.zero_grad()
        result = model(**model_batch)
        result["loss"].backward()
        has_zero_or_none_grads = {}
        for name, parameter in model.named_parameters():
            zeros = torch.zeros(parameter.size())
            if params_to_ignore and name in params_to_ignore:
                continue
            if parameter.requires_grad:

                if parameter.grad is None:
                    has_zero_or_none_grads[name] = "No gradient computed (i.e parameter.grad is None)"

                elif parameter.grad.is_sparse or parameter.grad.data.is_sparse:
                    pass

                # Some parameters will only be partially updated,
                # like embeddings, so we just check that any gradient is non-zero.
                elif (parameter.grad.cpu() == zeros).all():
                    has_zero_or_none_grads[name] = f"zeros with shape ({tuple(parameter.grad.size())})"
            else:
                assert parameter.grad is None

        if has_zero_or_none_grads:
            for name, grad in has_zero_or_none_grads.items():
                print(f"Parameter: {name} had incorrect gradient: {grad}")
            raise Exception("Incorrect gradients found. See stdout for more info.")
コード例 #2
0
 def test_mismatching_dimensions_throws_configuration_error(self):
     params = Params.from_file(self.param_file)
     # Make the phrase layer wrong - it should be 150 to match
     # the embedding + binary feature dimensions.
     params["model"]["encoder"]["input_size"] = 10
     with pytest.raises(ConfigurationError):
         Model.from_params(vocab=self.vocab, params=params.pop("model"))
コード例 #3
0
 def test_mismatching_dimensions_throws_configuration_error(self):
     params = Params.from_file(self.param_file)
     # Make the encoder wrong - it should be 2 to match
     # the embedding dimension from the text_field_embedder.
     params["model"]["encoder"]["input_size"] = 10
     with pytest.raises(ConfigurationError):
         Model.from_params(vocab=self.vocab, params=params.pop("model"))
コード例 #4
0
 def test_mismatching_contextualizer_unidirectionality_throws_configuration_error(self):
     params = Params.from_file(self.param_file)
     # Make the contextualizer unidirectionality wrong - it should be
     # False to match the language model.
     params["model"]["contextualizer"]["bidirectional"] = (not self.bidirectional)
     with pytest.raises(ConfigurationError):
         Model.from_params(vocab=self.vocab, params=params.get("model"))
コード例 #5
0
 def test_elmo_but_no_set_flags_throws_configuration_error(self):
     # pylint: disable=line-too-long
     params = Params.from_file(self.FIXTURES_ROOT / 'biattentive_classification_network' / 'elmo_experiment.json')
     # Elmo is specified in the model, but set both flags to false.
     params["model"]["use_input_elmo"] = False
     params["model"]["use_integrator_output_elmo"] = False
     with pytest.raises(ConfigurationError):
         Model.from_params(vocab=self.vocab, params=params.get("model"))
コード例 #6
0
    def test_mismatched_dimensions_raise_configuration_errors(self):
        params = Params.from_file(self.param_file)
        # Make the input_dim to the first feedforward_layer wrong - it should be 2.
        params["model"]["attend_feedforward"]["input_dim"] = 10
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop("model"))

        params = Params.from_file(self.param_file)
        # Make the projection output_dim of the last layer wrong - it should be
        # 3, equal to the number of classes.
        params["model"]["aggregate_feedforward"]["output_dim"] = 10
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop("model"))
コード例 #7
0
 def test_simple_tagger_constraint_type_deprecated(self):
     params = Params({"model": {
             "type": "crf_tagger",
             "constraint_type": "IOB1",
             "text_field_embedder": {
                     "token_embedders": {
                             "tokens": {
                                     "type": "embedding",
                                     "embedding_dim": 50
                             },
                     }
             },
             "encoder": {
                     "type": "gru",
                     "input_size": 50,
                     "hidden_size": 10,
                     "num_layers": 2,
                     "dropout": 0.5,
                     "bidirectional": True
             }}})
     with pytest.warns(DeprecationWarning):
         model = Model.from_params(vocab=self.vocab,
                                   params=params.pop("model"))
     assert model._f1_metric is not None
     assert model._f1_metric._label_encoding == "IOB1"
     assert model.label_encoding == "IOB1"
     assert model.crf._constraint_mask.sum().item() != (model.num_tags + 2)**2
コード例 #8
0
ファイル: bidaf_test.py プロジェクト: Jordan-Sauchuk/allennlp
    def test_batch_predictions_are_consistent(self):
        # The CNN encoder has problems with this kind of test - it's not properly masked yet, so
        # changing the amount of padding in the batch will result in small differences in the
        # output of the encoder.  Because BiDAF is so deep, these differences get magnified through
        # the network and make this test impossible.  So, we'll remove the CNN encoder entirely
        # from the model for this test.  If/when we fix the CNN encoder to work correctly with
        # masking, we can change this back to how the other models run this test, with just a
        # single line.
        # pylint: disable=protected-access,attribute-defined-outside-init

        # Save some state.
        saved_model = self.model
        saved_instances = self.instances

        # Modify the state, run the test with modified state.
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])
        reader._token_indexers = {'tokens': reader._token_indexers['tokens']}
        self.instances = reader.read('tests/fixtures/data/squad.json')
        vocab = Vocabulary.from_instances(self.instances)
        for instance in self.instances:
            instance.index_fields(vocab)
        del params['model']['text_field_embedder']['token_characters']
        params['model']['phrase_layer']['input_size'] = 2
        self.model = Model.from_params(vocab, params['model'])

        self.ensure_batch_predictions_are_consistent()

        # Restore the state.
        self.model = saved_model
        self.instances = saved_instances
コード例 #9
0
    def setUp(self):
        super().setUp()
        params = Params({
                "model": {
                    "type": "simple_tagger",
                    "text_field_embedder": {
                        "token_embedders": {
                            "tokens": {
                                "type": "embedding",
                                "embedding_dim": 5
                            }
                        }
                    },
                    "encoder": {
                        "type": "lstm",
                        "input_size": 5,
                        "hidden_size": 7,
                        "num_layers": 2
                    }
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'),
                "validation_data_path": str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'),
                "iterator": {"type": "basic", "batch_size": 2},
                "trainer": {
                    "cuda_device": -1,
                    "num_epochs": 2,
                    "optimizer": "adam"
                }
            })
        all_datasets = datasets_from_params(params)
        vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            (instance for dataset in all_datasets.values()
             for instance in dataset)
        )
        model = Model.from_params(vocab=vocab, params=params.pop('model'))
        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(vocab)
        train_data = all_datasets['train']
        trainer_params = params.pop("trainer")
        serialization_dir = os.path.join(self.TEST_DIR, 'test_search_learning_rate')

        self.trainer = Trainer.from_params(model,
                                           serialization_dir,
                                           iterator,
                                           train_data,
                                           params=trainer_params,
                                           validation_data=None,
                                           validation_iterator=None)
コード例 #10
0
 def test_forward_with_epoch_num_changes_cost_weight(self):
     # Redefining model. We do not want this to change the state of ``self.model``.
     params = Params.from_file(self.param_file)
     model = Model.from_params(vocab=self.vocab, params=params['model'])
     # Initial cost weight, before forward is called.
     assert model._checklist_cost_weight == 0.8
     iterator = EpochTrackingBucketIterator(sorting_keys=[['sentence', 'num_tokens']])
     cost_weights = []
     for epoch_data in iterator(self.dataset, num_epochs=4):
         model.forward(**epoch_data)
         cost_weights.append(model._checklist_cost_weight)
     # The config file has ``wait_num_epochs`` set to 0, so the model starts decreasing the cost
     # weight at epoch 0 itself.
     assert_almost_equal(cost_weights, [0.72, 0.648, 0.5832, 0.52488])
コード例 #11
0
ファイル: dry_run.py プロジェクト: apmoore1/allennlp
def dry_run_from_params(params: Params, serialization_dir: str) -> None:
    prepare_environment(params)

    vocab_params = params.pop("vocabulary", {})
    os.makedirs(serialization_dir, exist_ok=True)
    vocab_dir = os.path.join(serialization_dir, "vocabulary")

    if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None:
        raise ConfigurationError("The 'vocabulary' directory in the provided "
                                 "serialization directory is non-empty")

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                ", ".join(datasets_for_vocab_creation))

    instances = [instance for key, dataset in all_datasets.items()
                 for instance in dataset
                 if key in datasets_for_vocab_creation]

    vocab = Vocabulary.from_params(vocab_params, instances)
    dataset = Batch(instances)
    dataset.index_instances(vocab)
    dataset.print_statistics()
    vocab.print_statistics()

    logger.info(f"writing the vocabulary to {vocab_dir}.")
    vocab.save_to_files(vocab_dir)

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)
コード例 #12
0
    def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        instances = reader.read(dataset_file)
        vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(self.vocab, params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)
コード例 #13
0
def main(serialization_directory, device):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    """

    config = Params.from_file(os.path.join(serialization_directory, "config.json"))
    dataset_reader = DatasetReader.from_params(config['dataset_reader'])
    evaluation_data_path = config['validation_data_path']

    model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)

    prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
    gold_file_path = os.path.join(serialization_directory, "gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("Reading evaluation data from {}".format(evaluation_data_path))
    instances = dataset_reader.read(evaluation_data_path)
    iterator = BasicIterator(batch_size=32)
    iterator.index_with(model.vocab)

    model_predictions = []
    batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False)
    for batch in Tqdm.tqdm(batches):
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    for instance, prediction in zip(instances, model_predictions):
        fields = instance.fields
        try:
            # Most sentences have a verbal predicate, but not all.
            verb_index = fields["verb_indicator"].labels.index(1)
        except ValueError:
            verb_index = None

        gold_tags = fields["tags"].labels
        sentence = fields["tokens"].tokens

        write_to_conll_eval_file(prediction_file, gold_file,
                                 verb_index, sentence, prediction, gold_tags)
    prediction_file.close()
    gold_file.close()
コード例 #14
0
ファイル: model_test_case.py プロジェクト: pyknife/allennlp
    def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        instances = reader.read(dataset_file)
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if 'vocabulary' in params:
            vocab_params = params['vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab)
コード例 #15
0
ファイル: bidaf_test.py プロジェクト: Jordan-Sauchuk/allennlp
    def test_mismatching_dimensions_throws_configuration_error(self):
        params = Params.from_file(self.param_file)
        # Make the phrase layer wrong - it should be 10 to match
        # the embedding + char cnn dimensions.
        params["model"]["phrase_layer"]["input_size"] = 12
        with pytest.raises(ConfigurationError):
            Model.from_params(self.vocab, params.pop("model"))

        params = Params.from_file(self.param_file)
        # Make the modeling layer input_dimension wrong - it should be 40 to match
        # 4 * output_dim of the phrase_layer.
        params["model"]["phrase_layer"]["input_size"] = 30
        with pytest.raises(ConfigurationError):
            Model.from_params(self.vocab, params.pop("model"))

        params = Params.from_file(self.param_file)
        # Make the modeling layer input_dimension wrong - it should be 70 to match
        # 4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim.
        params["model"]["span_end_encoder"]["input_size"] = 50
        with pytest.raises(ConfigurationError):
            Model.from_params(self.vocab, params.pop("model"))
コード例 #16
0
    def test_elmo_num_repr_set_flags_mismatch_throws_configuration_error(self):
        # pylint: disable=line-too-long
        params = Params.from_file(self.FIXTURES_ROOT / 'biattentive_classification_network' / 'elmo_experiment.json')
        # Elmo is specified in the model, with num_output_representations=2. Set
        # only one flag to true.
        tmp_params = deepcopy(params)
        tmp_params["model"]["use_input_elmo"] = False
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=tmp_params.get("model"))

        tmp_params = deepcopy(params)
        tmp_params["model"]["use_input_elmo"] = True
        tmp_params["model"]["use_integrator_output_elmo"] = False
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=tmp_params.get("model"))

        # set num_output_representations to 1, and set both flags to True.
        tmp_params = deepcopy(params)
        tmp_params["model"]["elmo"]["num_output_representations"] = 1
        tmp_params["model"]["use_input_elmo"] = True
        tmp_params["model"]["use_integrator_output_elmo"] = True
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=tmp_params.get("model"))
コード例 #17
0
    def test_no_elmo_but_set_flags_throws_configuration_error(self):
        params = Params.from_file(self.param_file)
        # There is no elmo specified in self.param_file, but set
        # use_input_elmo and use_integrator_output_elmo to True.
        # use_input_elmo set to True
        tmp_params = deepcopy(params)
        tmp_params["model"]["use_input_elmo"] = True
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=tmp_params.get("model"))

        # use_integrator_output_elmo set to True
        tmp_params = deepcopy(params)
        tmp_params["model"]["use_input_elmo"] = False
        tmp_params["model"]["use_integrator_output_elmo"] = True
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=tmp_params.get("model"))

        # both use_input_elmo and use_integrator_output_elmo set to True
        tmp_params = deepcopy(params)
        tmp_params["model"]["use_input_elmo"] = True
        tmp_params["model"]["use_integrator_output_elmo"] = True
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=tmp_params.get("model"))
コード例 #18
0
 def optimiser(model: Model) -> torch.optim.Optimizer:
     return AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3)
コード例 #19
0
 def __init__(self, model: Model, dataset_reader: DatasetReader, frozen: bool = True) -> None:
     if frozen:
         model.eval()
     self._model = model
     self._dataset_reader = dataset_reader
     self.cuda_device = next(self._model.named_parameters())[1].get_device()
コード例 #20
0
def find_learning_rate_model(params: Params, serialization_dir: str,
                             start_lr: float = 1e-5,
                             end_lr: float = 10,
                             num_batches: int = 100,
                             linear_steps: bool = False,
                             stopping_factor: float = None,
                             force: bool = False) -> None:
    """
    Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir``

    Parameters
    ----------
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results.
    start_lr: ``float``
        Learning rate to start the search.
    end_lr: ``float``
        Learning rate upto which search is done.
    num_batches: ``int``
        Number of mini-batches to run Learning rate finder.
    linear_steps: ``bool``
        Increase learning rate linearly if False exponentially.
    stopping_factor: ``float``
        Stop the search when the current loss exceeds the best loss recorded by
        multiple of stopping factor. If ``None`` search proceeds till the ``end_lr``
    force: ``bool``
        If True and the serialization directory already exists, everything in it will
        be removed prior to finding the learning rate.
    """
    if os.path.exists(serialization_dir) and force:
        shutil.rmtree(serialization_dir)

    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        raise ConfigurationError(f'Serialization directory {serialization_dir} already exists and is '
                                 f'not empty.')
    else:
        os.makedirs(serialization_dir, exist_ok=True)

    prepare_environment(params)

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    check_for_gpu(cuda_device)

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            (instance for key, dataset in all_datasets.items()
             for instance in dataset
             if key in datasets_for_vocab_creation)
    )

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    train_data = all_datasets['train']

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)


    trainer_choice = trainer_params.pop("type", "default")
    if trainer_choice != "default":
        raise ConfigurationError("currently find-learning-rate only works with the default Trainer")
    trainer = Trainer.from_params(model=model,
                                  serialization_dir=serialization_dir,
                                  iterator=iterator,
                                  train_data=train_data,
                                  validation_data=None,
                                  params=trainer_params,
                                  validation_iterator=None)

    logger.info(f'Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations.')
    learning_rates, losses = search_learning_rate(trainer,
                                                  start_lr=start_lr,
                                                  end_lr=end_lr,
                                                  num_batches=num_batches,
                                                  linear_steps=linear_steps,
                                                  stopping_factor=stopping_factor)
    logger.info(f'Finished learning rate search.')
    losses = _smooth(losses, 0.98)

    _save_plot(learning_rates, losses, os.path.join(serialization_dir, 'lr-losses.png'))
コード例 #21
0
def main(model_dir,
         model_type,
         compression_rate,
         max_sentences,
         model_index=None):
    print(compression_rate, max_sentences)

    i = 0
    if model_index:
        i = model_index

    params = Params.from_file(os.path.join(model_dir, 'model_params.json'))
    ds_params = params.pop('dataset_reader', {})
    data_params = ds_params.pop('data', {})
    dataset_reader = CMVReader.from_params(ds_params)

    vocab = Vocabulary.from_params(
        Params({"directory_path": os.path.join(model_dir, 'vocabulary')}))

    val_iterator = DataIterator.from_params(params.pop('generator_iterator'))

    cmv_predictor = Model.from_params(params=params.pop('cmv_predictor'),
                                      vocab=vocab)
    document_embedder = Model.from_params(
        params=params.pop('document_embedder'), vocab=vocab)
    cmv_extractor = Model.from_params(params=params.pop('cmv_extractor'))

    cmv_actor_critic_params = params.pop('cmv_actor_critic', None)
    cmv_actor_critic = None
    if cmv_actor_critic_params is not None:
        cmv_actor_critic = Model.from_params(params=cmv_actor_critic_params)

    cmv_discriminator_params = params.pop('cmv_discriminator', None)
    cmv_discriminator = None
    if cmv_discriminator_params is not None:
        cmv_discriminator = Model.from_params(params=cmv_discriminator_params)

    params = dict(document_embedder=document_embedder,
                  cmv_predictor=cmv_predictor,
                  cmv_extractor=cmv_extractor,
                  cmv_actor_critic=cmv_actor_critic)
    if model_type == 'generator':
        params.update(dict(cmv_discriminator=cmv_discriminator))

    model = model_types[model_type](**params)

    data = dataset_reader.read('val', **data_params)
    data.index_instances(vocab)

    while True:
        model_filename = 'model_state_epoch_{}.th'.format(i)
        model_filename = os.path.join(os.path.join(model_dir, model_type),
                                      model_filename)

        print(model_filename)
        if not os.path.exists(model_filename):
            break

        #load file then do forward_on_instance
        model_state = torch.load(model_filename,
                                 map_location=util.device_mapping(-1))
        model.load_state_dict(model_state)
        model.eval()

        val_generator = val_iterator(data, num_epochs=1, shuffle=False)

        model._cmv_extractor._compression_rate = compression_rate
        for batch in val_generator:
            #batch is a tensor dict
            document, mask = model._document_embedder(batch['original_post'])
            idxs, probs, gold_loss = model._cmv_extractor(
                document,
                mask,
                batch['label'],
                gold_evidence=batch['weakpoints'],
                n_abs=max_sentences)

            #extracted_sentences = extract(batch['original_post'], idxs)
            #fake_output = model._cmv_predictor(batch['response'], batch['label'], extracted_sentences)
            for bidx, e in enumerate(batch['weakpoints']):
                if int(e.ne(-1).sum()) == 0:
                    continue
                print(e.numpy().tolist())
                print(idxs[bidx].numpy().tolist())
                for idx, sentence in enumerate(
                        batch['original_post']['tokens'][bidx]):
                    o = [
                        model._cmv_predictor.vocab.get_token_from_index(
                            int(index),
                            'tokens').replace('@@end@@', '').replace(
                                '@@UNKNOWN@@', 'UNK') for index in sentence
                        if int(index)
                    ]
                    if len(o):
                        print(idx, ' '.join(o))
                print()

        #print(model._cmv_predictor.get_metrics(reset=True))
        print(model._cmv_extractor.get_metrics(reset=True))

        i += 1
        if model_index is not None:
            break
コード例 #22
0
def train_model(params: Union[Params, Dict[str, Any]], cuda_device: int,
                serialization_dir: str, filtering: str) -> Model:
    """
    This function can be used as an entry point to running models in AllenNLP
    directly from a JSON specification using a :class:`Driver`. Note that if
    you care about reproducibility, you should avoid running code using Pytorch
    or numpy which affect the reproducibility of your experiment before you
    import and use this function, these libraries rely on random seeds which
    can be set in this function via a JSON specification file. Note that this
    function performs training and will also evaluate the trained model on
    development and test sets if provided in the parameter json.

    Parameters
    ----------
    params: Params, required.
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: str, required
        The directory in which to save results and logs.
    """

    SimpleRandom.set_seeds()

    os.makedirs(serialization_dir, exist_ok=True)
    try:
        sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"),
                               sys.stdout, True)  # type: ignore
        sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"),
                               sys.stderr, True)  # type: ignore
    except TypeError:
        sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"),
                               sys.stdout)  # type: ignore
        sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"),
                               sys.stderr)  # type: ignore
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)
    serialization_params = deepcopy(params).as_dict(quiet=True)

    with open(os.path.join(serialization_dir, "model_params.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    # Now we begin assembling the required parts for the Trainer.
    ds_params = params.pop('dataset_reader', {})
    read_settings = ds_params.pop('read_settings', {})
    dataset_reader = FEVERReader.from_params(ds_params)

    train_data_path = params.pop('train_data_path')
    logger.info("Reading training data from %s", train_data_path)
    train_data = dataset_reader.read(
        train_data_path,
        include_metadata=True,
        replace_with_gold=read_settings.pop('replace_gold', False),
        pad_with_nearest=read_settings.pop('pad_with_nearest', 0))

    validation_data_path = params.pop('validation_data_path', None)
    if validation_data_path is not None:
        logger.info("Reading validation data from %s", validation_data_path)
        validation_data = dataset_reader.read(validation_data_path,
                                              include_metadata=True)
    else:
        validation_data = None

    vocab_params = params.pop("vocabulary", {})
    dataset = None
    print(dict(vocab_params), 'directory_path' not in vocab_params)
    assert ('directory_path' in vocab_params)
    vocab = Vocabulary.from_params(vocab_params, dataset)
    print(vocab)
    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    trainer_params = params.pop("trainer")
    if cuda_device is not None:
        trainer_params["cuda_device"] = cuda_device
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    trainer.train()

    # Now tar up results
    archive_model(serialization_dir)

    return model
コード例 #23
0
 def test_encoder_feedforward_dim_match(self):
     params = Params.from_file(self.feedforward_config)
     params['model']['context_encoder']['hidden_size'] = 5
     with pytest.raises(ConfigurationError):
         Model.from_params(vocab=self.vocab, params=params.get('model'))
コード例 #24
0
            params.pop(key, None)
        #Pdb().set_trace()
        pieces = gan_trainer_hm.TrainerPiecesForSemi.from_params(
            params, serialization_dir, args.recover, semi_supervision)  # pylint: disable=no-member

        trainer = Trainer.from_params(
            model=pieces.model,
            serialization_dir=serialization_dir,
            iterator=pieces.iterator,
            train_data=pieces.train_dataset,
            validation_data=pieces.validation_dataset,
            params=pieces.params,
            validation_iterator=pieces.validation_iterator)

        #pieces for constrained learning"
        constraints_model = Model.from_params(
            vocab=pieces.model.vocab, params=params.pop('dd_constraints'))
        dd_params = [[n, p] for n, p in constraints_model.named_parameters()
                     if p.requires_grad]
        dd_optimizer = None
        if len(dd_params) > 0:
            dd_optimizer = Optimizer.from_params(dd_params,
                                                 params.pop("dd_optimizer"))
        else:
            _ = params.pop('dd_optimizer')
        params.assert_empty('base train command')

        try:
            semi_trainer = gan_trainer_hm.SemiSupervisedTrainer(
                trainer, constraints_model, dd_optimizer,
                pieces.validation_iterator, pieces.unlabelled_dataset,
                semi_supervision, which_mixer, dd_warmup_iters, dd_update_freq,
コード例 #25
0
ファイル: evaluation.py プロジェクト: ydwisroad/competitions
    def run(  # type: ignore
        self,
        model: Model,
        dataset: DatasetDict,
        split: str = "validation",
        data_loader: Optional[Lazy[TangoDataLoader]] = None,
    ) -> EvaluationResult:
        """
        Runs an evaluation on a dataset.

        * `model` is the model we want to evaluate.
        * `dataset` is the dataset we want to evaluate on.
        * `split` is the name of the split we want to evaluate on.
        * `data_loader` gives you the chance to choose a custom dataloader for the evaluation.
          By default this step evaluates on batches of 32 instances each.
        """

        concrete_data_loader: TangoDataLoader
        if data_loader is None:
            concrete_data_loader = BatchSizeDataLoader(dataset.splits[split],
                                                       batch_size=32,
                                                       shuffle=False)
        else:
            concrete_data_loader = data_loader.construct(
                instances=dataset.splits[split])

        if torch.cuda.device_count() > 0:
            cuda_device = torch.device(0)
        else:
            cuda_device = torch.device("cpu")
        check_for_gpu(cuda_device)

        generator_tqdm = Tqdm.tqdm(iter(concrete_data_loader))

        # Number of batches in instances.
        predictions: List[Dict[str, Any]] = []
        # Number of batches where the model produces a loss.
        loss_count = 0
        batch_count = 0
        # Cumulative loss
        total_loss = 0.0

        with torch.inference_mode():
            model.eval()

            for batch in concrete_data_loader:
                batch_count += 1
                batch = move_to_device(batch, cuda_device)
                output_dict = model(**batch)

                metrics = model.get_metrics()

                loss = output_dict.pop("loss", None)
                if loss is not None:
                    loss_count += 1
                    total_loss += loss.item()
                    metrics["loss"] = total_loss / loss_count

                    if any(
                            metric_name.startswith("_")
                            for metric_name in metrics):
                        self.logger.warning_once(
                            'Metrics with names beginning with "_" will '
                            "not be logged to the tqdm progress bar.")

                    description = (", ".join([
                        "%s: %.2f" % (name, value)
                        for name, value in metrics.items()
                        if not name.startswith("_")
                    ]) + " ||")
                    generator_tqdm.set_description(description, refresh=False)

                output_dict = sanitize(output_dict)

                # This is write-only code, but it's quite fast.
                predictions.extend(
                    dict(zip(output_dict.keys(), x))
                    for x in zip(*output_dict.values()))

            final_metrics = model.get_metrics(reset=True)

        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError(
                    "The model you are trying to evaluate only sometimes produced a loss!"
                )
            final_metrics["loss"] = total_loss / loss_count

        return self.EvaluationResult(final_metrics, predictions)
コード例 #26
0
    def test_model_load(self):
        params = Params.from_file('tests/fixtures/srl/experiment.json')
        model = Model.load(params)

        assert isinstance(model, SemanticRoleLabeler)
コード例 #27
0
 def test_embedding_encoder_dim_match(self):
     params = Params.from_file(self.in_context_config)
     params['model']['context_field_embedder']["token_embedders"]['tokens']["embedding_dim"] = 5
     with pytest.raises(ConfigurationError):
         Model.from_params(vocab=self.vocab, params=params.get('model'))
コード例 #28
0
    def test_model_load(self):
        params = Params.from_file(self.FIXTURES_ROOT / 'decomposable_attention' / 'experiment.json')
        model = Model.load(params, serialization_dir=self.FIXTURES_ROOT /
                           'decomposable_attention' / 'serialization')

        assert isinstance(model, DecomposableAttention)
コード例 #29
0
    def from_params(params: Params, serialization_dir: str, recover: bool = False) -> 'TrainerPieces':
        all_datasets = multitask_datasets_from_params(params)
        datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

        for dataset in datasets_for_vocab_creation:
            if dataset not in all_datasets:
                raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

        logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                    ", ".join(datasets_for_vocab_creation))

        if recover and os.path.exists(os.path.join(serialization_dir, "vocabulary")):
            vocab = Vocabulary.from_files(os.path.join(serialization_dir, "vocabulary"))
            params.pop("vocabulary", {})
        else:
            vocab = Vocabulary.from_params(
                    params.pop("vocabulary", {}),
                    (instance for key, dataset in all_datasets.items()
                     for instance in dataset
                     if key in datasets_for_vocab_creation)
            )

        model = Model.from_params(vocab=vocab, params=params.pop('model'))

        # If vocab extension is ON for training, embedding extension should also be
        # done. If vocab and embeddings are already in sync, it would be a no-op.
        model.extend_embedder_vocab()

        # Initializing the model can have side effect of expanding the vocabulary
        vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(model.vocab)
        validation_iterator_params = params.pop("validation_iterator", None)
        if validation_iterator_params:
            validation_iterator = DataIterator.from_params(validation_iterator_params)
            validation_iterator.index_with(model.vocab)
        else:
            validation_iterator = None

        train_data = all_datasets['train']
        validation_data = all_datasets.get('validation')
        test_data = all_datasets.get('test')

        trainer_params = params.pop("trainer")
        no_grad_regexes = trainer_params.pop("no_grad", ())
        for name, parameter in model.named_parameters():
            if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

        frozen_parameter_names, tunable_parameter_names = \
                    get_frozen_and_tunable_parameter_names(model)
        logger.info("Following parameters are Frozen  (without gradient):")
        for name in frozen_parameter_names:
            logger.info(name)
        logger.info("Following parameters are Tunable (with gradient):")
        for name in tunable_parameter_names:
            logger.info(name)

        return MultiTaskTrainerPieces(model, iterator,
                             train_data, validation_data, test_data,
                             validation_iterator, trainer_params)
コード例 #30
0
    def __init__(self,
                 vocab: Vocabulary,
                 kg_model: Model = None,
                 entity_embedding: Embedding = None,
                 concat_entity_embedder: EntityEmbedder = None,
                 contextual_embedding_dim: int = None,
                 span_encoder_config: Dict[str, int] = None,
                 margin: float = 0.2,
                 decode_threshold: float = 0.0,
                 loss_type: str = 'margin',
                 max_sequence_length: int = 512,
                 dropout: float = 0.1,
                 output_feed_forward_hidden_dim: int = 100,
                 initializer_range: float = 0.02,
                 include_null_embedding_in_dot_attention: bool = False,
                 namespace: str = 'entity',
                 regularizer: RegularizerApplicator = None):

        super().__init__(vocab,
                         margin=margin,
                         decode_threshold=decode_threshold,
                         loss_type=loss_type,
                         namespace=namespace,
                         regularizer=regularizer)

        num_embeddings_passed = sum(
            [kg_model is not None, entity_embedding is not None, concat_entity_embedder is not None]
        )
        if num_embeddings_passed != 1:
            raise ValueError("Linking model needs either a kg factorisation model or an entity embedding.")

        elif kg_model is not None:
            entity_embedding = kg_model.get_entity_embedding()
            entity_embedding_dim = entity_embedding.embedding_dim

        elif entity_embedding is not None:
            entity_embedding_dim = entity_embedding.get_output_dim()

        elif concat_entity_embedder is not None:
            entity_embedding_dim = concat_entity_embedder.get_output_dim()
            set_requires_grad(concat_entity_embedder, False)
            entity_embedding = concat_entity_embedder

        if loss_type == 'margin':
            weighted_entity_threshold = decode_threshold
        else:
            weighted_entity_threshold = None

        null_entity_id = self.vocab.get_token_index('@@NULL@@', namespace)
        assert null_entity_id != self.vocab.get_token_index('@@UNKNOWN@@', namespace)

        self.disambiguator = EntityDisambiguator(
            contextual_embedding_dim,
            entity_embedding_dim=entity_embedding_dim,
            entity_embeddings=entity_embedding,
            max_sequence_length=max_sequence_length,
            span_encoder_config=span_encoder_config,
            dropout=dropout,
            output_feed_forward_hidden_dim=output_feed_forward_hidden_dim,
            initializer_range=initializer_range,
            weighted_entity_threshold=weighted_entity_threshold,
            include_null_embedding_in_dot_attention=include_null_embedding_in_dot_attention,
            null_entity_id=null_entity_id)
コード例 #31
0
def dry_run_from_params(params: Params, serialization_dir: str,
                force: bool = False,
                cache_directory: str = None,
                cache_prefix: str = None) -> None:

    """
    :param params:
    :param serialization_dir:
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    cache_directory : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    cache_prefix : ``str``, optional
        For caching data pre-processing.  See :func:`allennlp.training.util.datasets_from_params`.
    :return:
    """

    if os.path.exists(serialization_dir) and force:
        shutil.rmtree(serialization_dir)

    prepare_environment(params)

    vocab_params = params.pop("vocabulary", {})
    os.makedirs(serialization_dir, exist_ok=True)
    vocab_dir = os.path.join(serialization_dir, "vocabulary")

    if os.path.isdir(vocab_dir) and os.listdir(vocab_dir) is not None:
        raise ConfigurationError("The 'vocabulary' directory in the provided "
                                 "serialization directory is non-empty")

    all_datasets = datasets_from_params(params, cache_directory, cache_prefix)
    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                ", ".join(datasets_for_vocab_creation))

    instances = [instance for key, dataset in all_datasets.items()
                 for instance in dataset
                 if key in datasets_for_vocab_creation]

    vocab = Vocabulary.from_params(vocab_params, instances)
    dataset = Batch(instances)
    dataset.index_instances(vocab)
    dataset.print_statistics()
    vocab.print_statistics()

    logger.info(f"writing the vocabulary to {vocab_dir}.")
    vocab.save_to_files(vocab_dir)

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    frozen_parameter_names, tunable_parameter_names = \
                   get_frozen_and_tunable_parameter_names(model)
    logger.info("Following parameters are Frozen  (without gradient):")
    for name in frozen_parameter_names:
        logger.info(name)
    logger.info("Following parameters are Tunable (with gradient):")
    for name in tunable_parameter_names:
        logger.info(name)
コード例 #32
0
loader_params = {'batch_size': 96 // NUM_GPUS, 'num_gpus': NUM_GPUS, 'num_workers': num_workers}

only_use_relevant_dets = False
vcr_modes = VCR.eval_splits(embs_to_load=params['dataset_reader'].get('embs', 'bert_da'),
                            only_use_relevant_dets=params['dataset_reader'].get('only_use_relevant_dets', only_use_relevant_dets))
probs_grp = []
ids_grp = []
for (vcr_dataset, mode_long) in zip(vcr_modes, ['answer'] + [f'rationale_{i}' for i in range(4)]):
    mode = mode_long.split('_')[0]

    test_loader = VCRLoader.from_dataset(vcr_dataset, **loader_params)

    # Load the params again because allennlp will delete them... ugh.
    params = Params.from_file(args.params)
    print("Loading {} for {}".format(params['model'].get('type', 'WTF?'), mode), flush=True)
    model = Model.from_params(vocab=vcr_dataset.vocab, params=params['model'])
    for submodule in model.detector.backbone.modules():
        if isinstance(submodule, BatchNorm2d):
            submodule.track_running_stats = False

    model_state = torch.load(getattr(args, f'{mode}_ckpt'), map_location=device_mapping(-1))
    model.load_state_dict(model_state)

    model = DataParallel(model).cuda() if NUM_GPUS > 1 else model.cuda()
    model.eval()

    test_probs = []
    test_ids = []
    for b, (time_per_batch, batch) in enumerate(time_batch(test_loader)):
        with torch.no_grad():
            batch = _to_gpu(batch)
コード例 #33
0
ファイル: run_eval.py プロジェクト: viswanathgs/r2c
        probs_grp.append(probs)
        ids_grp.append(ids)

    # Double check the IDs are in the same order for everything
    assert [x == ids_grp[0] for x in ids_grp]
    probs_grp = np.stack(probs_grp, 1).reshape((-1, 20))
    to_leaderboard_csv(probs_grp, ids_grp[0], args.outfile)


if __name__ == '__main__':
    args = parse_args()

    params = Params.from_file(args.params)
    multitask = 'MultiTask' in params['model']['type']

    model = Model.from_params(params=params['model'])
    LOG.info('Loaded model {} from {}'.format(params['model'].get('type', ''),
                                              args.params))

    num_gpus = torch.cuda.device_count()
    assert num_gpus >= 1, "No CUDA devices found"
    LOG.info('Found {} GPUs'.format(num_gpus))
    model = DataParallel(model).cuda() if num_gpus > 1 else model.cuda()

    if args.answer_model or args.rationale_model:
        assert args.split == 'val', "Not yet supported"
        compute_baseline(model, params, args)

    if args.ar_model and not multitask:
        if args.split == 'val':
            joint_eval(model, params, args)
コード例 #34
0
        mask = util.get_text_field_mask(text)
        # Shape: (batch_size, encoding_dim)
        encoded_text = self.encoder(embedded_text, mask)
        # Shape: (batch_size, num_labels)
        logits = self.classifier(encoded_text)
        # Shape: (batch_size, num_labels)
        probs = torch.nn.functional.softmax(logits)
        # Shape: (1,)
        loss = torch.nn.functional.cross_entropy(logits, label)
        return {'loss': loss, 'probs': probs}


iterator = BasicIterator(batch_size=2)
iterator.index_with(vocab)

model_params = """
{
  "type": "simple_classifier",
  "embedder": {"token_embedders": {
    "tokens": {"type": "embedding", "embedding_dim": 10}
  }},
  "encoder": {"type": "bag_of_embeddings"}
}
"""

model = Model.from_params(vocab, Params(json.loads(model_params)))

for batch in iterator(instances, num_epochs=1):
    outputs = model(batch)
    print(f"Model outputs: {outputs}")
コード例 #35
0
    def test_model_load(self):
        params = Params.from_file('tests/fixtures/decomposable_attention/experiment.json')
        model = Model.load(params, serialization_dir='tests/fixtures/decomposable_attention/serialization')

        assert isinstance(model, DecomposableAttention)
コード例 #36
0
def _from_params(
        cls,  # type: ignore
        model: Model,
        serialization_dir: str,
        iterator: DataIterator,
        train_data: Iterable[Instance],
        validation_data: Optional[Iterable[Instance]],
        params: Params,
        validation_iterator: DataIterator = None) -> DecompTrainer:
    # pylint: disable=arguments-differ
    patience = params.pop_int("patience", None)
    validation_metric = params.pop("validation_metric", "-loss")
    shuffle = params.pop_bool("shuffle", True)

    num_epochs = params.pop_int("num_epochs", 20)

    cuda_device = parse_cuda_device(params.pop("cuda_device", -1))
    grad_norm = params.pop_float("grad_norm", None)
    grad_clipping = params.pop_float("grad_clipping", None)
    lr_scheduler_params = params.pop("learning_rate_scheduler", None)
    momentum_scheduler_params = params.pop("momentum_scheduler", None)

    validation_data_path = params.pop("validation_data_path", None)
    validation_prediction_path = params.pop("validation_prediction_path", None)

    semantics_only = params.pop("semantics_only", False)
    drop_syntax = params.pop("drop_syntax", True)
    include_attribute_scores = params.pop("include_attribute_scores", False)

    warmup_epochs = params.pop("warmup_epochs", 0)

    if isinstance(cuda_device, list):
        model_device = cuda_device[0]
    else:
        model_device = cuda_device
    if model_device >= 0:
        # Moving model to GPU here so that the optimizer state gets constructed on
        # the right device.
        model = model.cuda(model_device)

    bert_optim_params = params.pop("bert_optimizer", None)
    bert_name = "_bert_encoder"

    if bert_optim_params is not None:
        tune_after_layer_num = params.pop("bert_tune_layer", 12)

        frozen_regex_str = [
            "(_bert_encoder\.bert_model\.embeddings.*)",
            "(_bert_encoder\.bert_model\.pooler.*)"
        ]
        tune_regex_str = []
        for i in range(0, 12):
            # match all numbers greater than layer num via disjunction
            tune_regex_one = f"({bert_name}\.bert_model\.encoder\.layer\.{i}\..*)"
            if i >= tune_after_layer_num:
                tune_regex_str.append(tune_regex_one)
            else:
                frozen_regex_str.append(tune_regex_one)
        tune_regex = re.compile("|".join(tune_regex_str))
        frozen_regex = re.compile("|".join(frozen_regex_str))
        # decide which params require grad for which optimizer
        all_names = [n for n, p in model.named_parameters()]
        tune_bert_names = [
            n for n in all_names if tune_regex.match(n) is not None
        ]
        frozen_names = [
            n for n in all_names if frozen_regex.match(n) is not None
        ]
        # assert that they're disjoint
        assert (len(set(frozen_names) & set(tune_bert_names)) == 0)
        # set tunable params to require gradient, frozen ones to not require
        for i, (n, p) in enumerate(model.named_parameters()):
            if n in frozen_names:
                p.requires_grad = False
            else:
                p.requires_grad = True

        # extract BERT
        bert_params = [[n, p] for n, p in model.named_parameters()
                       if p.requires_grad and n in tune_bert_names]
        # make sure this matches the tuneable bert params
        assert ([x[0] for x in bert_params] == tune_bert_names)
        bert_optimizer = Optimizer.from_params(bert_params, bert_optim_params)
    else:
        # freeze all BERT params
        tune_bert_names = []
        bert_optimizer = None
        for i, (n, p) in enumerate(model.named_parameters()):
            if "_bert_encoder" in n:
                p.requires_grad = False

    # model params
    parameters = [[n, p] for n, p in model.named_parameters()
                  if p.requires_grad and n not in tune_bert_names]
    optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))
    if "moving_average" in params:
        moving_average = MovingAverage.from_params(
            params.pop("moving_average"), parameters=parameters)
    else:
        moving_average = None

    if lr_scheduler_params:
        lr_scheduler = LearningRateScheduler.from_params(
            optimizer, lr_scheduler_params)
    else:
        lr_scheduler = None
    if momentum_scheduler_params:
        momentum_scheduler = MomentumScheduler.from_params(
            optimizer, momentum_scheduler_params)
    else:
        momentum_scheduler = None

    if 'checkpointer' in params:
        if 'keep_serialized_model_every_num_seconds' in params or \
                'num_serialized_models_to_keep' in params:
            raise ConfigurationError(
                "Checkpointer may be initialized either from the 'checkpointer' key or from the "
                "keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'"
                " but the passed config uses both methods.")
        checkpointer = Checkpointer.from_params(params.pop("checkpointer"))
    else:
        num_serialized_models_to_keep = params.pop_int(
            "num_serialized_models_to_keep", 20)
        keep_serialized_model_every_num_seconds = params.pop_int(
            "keep_serialized_model_every_num_seconds", None)
        checkpointer = Checkpointer(
            serialization_dir=serialization_dir,
            num_serialized_models_to_keep=num_serialized_models_to_keep,
            keep_serialized_model_every_num_seconds=
            keep_serialized_model_every_num_seconds)
    model_save_interval = params.pop_float("model_save_interval", None)
    summary_interval = params.pop_int("summary_interval", 100)
    histogram_interval = params.pop_int("histogram_interval", None)
    should_log_parameter_statistics = params.pop_bool(
        "should_log_parameter_statistics", True)
    should_log_learning_rate = params.pop_bool("should_log_learning_rate",
                                               False)
    log_batch_size_period = params.pop_int("log_batch_size_period", None)
    syntactic_method = params.pop("syntactic_method", None)
    accumulate_batches = params.pop("accumulate_batches", 1)

    params.assert_empty(cls.__name__)
    return cls(model=model,
               optimizer=optimizer,
               bert_optimizer=bert_optimizer,
               iterator=iterator,
               train_dataset=train_data,
               validation_dataset=validation_data,
               validation_data_path=validation_data_path,
               validation_prediction_path=validation_prediction_path,
               semantics_only=semantics_only,
               warmup_epochs=warmup_epochs,
               syntactic_method=syntactic_method,
               drop_syntax=drop_syntax,
               include_attribute_scores=include_attribute_scores,
               patience=patience,
               validation_metric=validation_metric,
               validation_iterator=validation_iterator,
               shuffle=shuffle,
               num_epochs=num_epochs,
               serialization_dir=serialization_dir,
               cuda_device=cuda_device,
               grad_norm=grad_norm,
               grad_clipping=grad_clipping,
               learning_rate_scheduler=lr_scheduler,
               momentum_scheduler=momentum_scheduler,
               checkpointer=checkpointer,
               model_save_interval=model_save_interval,
               summary_interval=summary_interval,
               histogram_interval=histogram_interval,
               should_log_parameter_statistics=should_log_parameter_statistics,
               should_log_learning_rate=should_log_learning_rate,
               log_batch_size_period=log_batch_size_period,
               moving_average=moving_average,
               accumulate_batches=accumulate_batches)
コード例 #37
0
ファイル: train_esim.py プロジェクト: rmattam/fever-baselines
def train_model(db: FeverDocDB, params: Union[Params, Dict[str, Any]],
                cuda_device: int, serialization_dir: str,
                filtering: str) -> Model:
    """
    This function can be used as an entry point to running models in AllenNLP
    directly from a JSON specification using a :class:`Driver`. Note that if
    you care about reproducibility, you should avoid running code using Pytorch
    or numpy which affect the reproducibility of your experiment before you
    import and use this function, these libraries rely on random seeds which
    can be set in this function via a JSON specification file. Note that this
    function performs training and will also evaluate the trained model on
    development and test sets if provided in the parameter json.

    Parameters
    ----------
    params: Params, required.
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: str, required
        The directory in which to save results and logs.
    """

    SimpleRandom.set_seeds()

    os.makedirs(serialization_dir, exist_ok=True)
    sys.stdout = TeeLogger(os.path.join(serialization_dir, "stdout.log"),
                           sys.stdout, True)  # type: ignore
    sys.stderr = TeeLogger(os.path.join(serialization_dir, "stderr.log"),
                           sys.stderr, True)  # type: ignore
    handler = logging.FileHandler(
        os.path.join(serialization_dir, "python_logging.log"))
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter(
            '%(asctime)s - %(levelname)s - %(name)s - %(message)s'))
    logging.getLogger().addHandler(handler)
    serialization_params = deepcopy(params).as_dict(quiet=True)

    with open(os.path.join(serialization_dir, "config.json"),
              "w") as param_file:
        json.dump(serialization_params, param_file, indent=4)

    # Now we begin assembling the required parts for the Trainer.
    ds_params = params.pop('dataset_reader', {})
    dataset_reader = FEVERReader(
        db,
        sentence_level=ds_params.pop("sentence_level", False),
        wiki_tokenizer=Tokenizer.from_params(
            ds_params.pop('wiki_tokenizer', {})),
        claim_tokenizer=Tokenizer.from_params(
            ds_params.pop('claim_tokenizer', {})),
        token_indexers=FEVERReader.custom_dict_from_params(
            ds_params.pop('token_indexers', {})),
        filtering=filtering)

    train_data_path = params.pop('train_data_path')
    logger.info("Reading training data from %s", train_data_path)
    train_data = dataset_reader.read(train_data_path)

    all_datasets = [train_data]
    datasets_in_vocab = ["train"]

    validation_data_path = params.pop('validation_data_path', None)
    if validation_data_path is not None:
        logger.info("Reading validation data from %s", validation_data_path)
        validation_data = dataset_reader.read(validation_data_path)
        all_datasets.append(validation_data)
        datasets_in_vocab.append("validation")
    else:
        validation_data = None

    logger.info("Creating a vocabulary using %s data.",
                ", ".join(datasets_in_vocab))

    #handle all_datasets
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        [instance for dataset in all_datasets for instance in dataset])

    vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

    try:
        model = Model.from_params(params.pop('model'), vocab=vocab)
        iterator = DataIterator.from_params(params.pop("iterator"))
    except Exception as e:
        logger.info("Crashed with error: " + str(e))

    iterator.index_with(vocab)

    trainer_params = params.pop("trainer")
    if cuda_device is not None:
        trainer_params["cuda_device"] = cuda_device
    trainer = Trainer.from_params(model, serialization_dir, iterator,
                                  train_data, validation_data, trainer_params)

    trainer.train()

    # Now tar up results
    archive_model(serialization_dir)

    return model
コード例 #38
0
def find_learning_rate_model(params: Params, serialization_dir: str,
                             start_lr: float = 1e-5,
                             end_lr: float = 10,
                             num_batches: int = 100,
                             linear_steps: bool = False,
                             stopping_factor: float = None,
                             force: bool = False) -> None:
    """
    Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir``

    Parameters
    ----------
    trainer: :class:`~allennlp.common.registrable.Registrable`
    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results.
    start_lr: ``float``
        Learning rate to start the search.
    end_lr: ``float``
        Learning rate upto which search is done.
    num_batches: ``int``
        Number of mini-batches to run Learning rate finder.
    linear_steps: ``bool``
        Increase learning rate linearly if False exponentially.
    stopping_factor: ``float``
        Stop the search when the current loss exceeds the best loss recorded by
        multiple of stopping factor. If ``None`` search proceeds till the ``end_lr``
    force: ``bool``
        If True and the serialization directory already exists, everything in it will
        be removed prior to finding the learning rate.
    """
    if os.path.exists(serialization_dir) and force:
        shutil.rmtree(serialization_dir)

    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        raise ConfigurationError(f'Serialization directory {serialization_dir} already exists and is '
                                 f'not empty.')
    else:
        os.makedirs(serialization_dir, exist_ok=True)

    prepare_environment(params)

    cuda_device = params.params.get('trainer').get('cuda_device', -1)
    if isinstance(cuda_device, list):
        for device in cuda_device:
            check_for_gpu(device)
    else:
        check_for_gpu(cuda_device)

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                ", ".join(datasets_for_vocab_creation))
    vocab = Vocabulary.from_params(
            params.pop("vocabulary", {}),
            (instance for key, dataset in all_datasets.items()
             for instance in dataset
             if key in datasets_for_vocab_creation)
    )

    model = Model.from_params(vocab=vocab, params=params.pop('model'))
    iterator = DataIterator.from_params(params.pop("iterator"))
    iterator.index_with(vocab)

    train_data = all_datasets['train']

    trainer_params = params.pop("trainer")
    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    trainer = Trainer.from_params(model,
                                  serialization_dir,
                                  iterator,
                                  train_data,
                                  params=trainer_params,
                                  validation_data=None,
                                  validation_iterator=None)

    logger.info(f'Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations.')
    learning_rates, losses = search_learning_rate(trainer,
                                                  start_lr=start_lr,
                                                  end_lr=end_lr,
                                                  num_batches=num_batches,
                                                  linear_steps=linear_steps,
                                                  stopping_factor=stopping_factor)
    logger.info(f'Finished learning rate search.')
    losses = _smooth(losses, 0.98)

    _save_plot(learning_rates, losses, os.path.join(serialization_dir, 'lr-losses.png'))
コード例 #39
0
def find_learning_rate_model(
    params: Params,
    serialization_dir: str,
    start_lr: float = 1e-5,
    end_lr: float = 10,
    num_batches: int = 100,
    linear_steps: bool = False,
    stopping_factor: float = None,
    force: bool = False,
) -> None:
    """
    Runs learning rate search for given `num_batches` and saves the results in ``serialization_dir``

    # Parameters

    params : `Params`
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : `str`
        The directory in which to save results.
    start_lr : `float`
        Learning rate to start the search.
    end_lr : `float`
        Learning rate upto which search is done.
    num_batches : `int`
        Number of mini-batches to run Learning rate finder.
    linear_steps : `bool`
        Increase learning rate linearly if False exponentially.
    stopping_factor : `float`
        Stop the search when the current loss exceeds the best loss recorded by
        multiple of stopping factor. If `None` search proceeds till the `end_lr`
    force : `bool`
        If True and the serialization directory already exists, everything in it will
        be removed prior to finding the learning rate.
    """
    create_serialization_dir(params,
                             serialization_dir,
                             recover=False,
                             force=force)

    prepare_environment(params)

    cuda_device = params.params.get("trainer").get("cuda_device", -1)
    check_for_gpu(cuda_device)
    distributed_params = params.params.get("distributed")
    # See https://github.com/allenai/allennlp/issues/3658
    assert not distributed_params, "find-lr is not compatible with DistributedDataParallel."

    all_datasets = datasets_from_params(params)
    datasets_for_vocab_creation = set(
        params.pop("datasets_for_vocab_creation", all_datasets))

    for dataset in datasets_for_vocab_creation:
        if dataset not in all_datasets:
            raise ConfigurationError(
                f"invalid 'dataset_for_vocab_creation' {dataset}")

    logger.info(
        "From dataset instances, %s will be considered for vocabulary creation.",
        ", ".join(datasets_for_vocab_creation),
    )
    vocab = Vocabulary.from_params(
        params.pop("vocabulary", {}),
        instances=(instance for key, dataset in all_datasets.items()
                   for instance in dataset
                   if key in datasets_for_vocab_creation),
    )

    train_data = all_datasets["train"]
    train_data.index_with(vocab)
    model = Model.from_params(vocab=vocab, params=params.pop("model"))
    data_loader = DataLoader.from_params(dataset=train_data,
                                         params=params.pop("data_loader"))

    trainer_params = params.pop("trainer")

    no_grad_regexes = trainer_params.pop("no_grad", ())
    for name, parameter in model.named_parameters():
        if any(re.search(regex, name) for regex in no_grad_regexes):
            parameter.requires_grad_(False)

    trainer_choice = trainer_params.pop("type", "gradient_descent")
    if trainer_choice != "gradient_descent":
        raise ConfigurationError(
            "currently find-learning-rate only works with the GradientDescentTrainer"
        )
    trainer: GradientDescentTrainer = Trainer.from_params(  # type: ignore
        model=model,
        serialization_dir=serialization_dir,
        data_loader=data_loader,
        params=trainer_params,
    )

    logger.info(
        f"Starting learning rate search from {start_lr} to {end_lr} in {num_batches} iterations."
    )
    learning_rates, losses = search_learning_rate(
        trainer,
        start_lr=start_lr,
        end_lr=end_lr,
        num_batches=num_batches,
        linear_steps=linear_steps,
        stopping_factor=stopping_factor,
    )
    logger.info("Finished learning rate search.")
    losses = _smooth(losses, 0.98)

    _save_plot(learning_rates, losses,
               os.path.join(serialization_dir, "lr-losses.png"))
コード例 #40
0
ファイル: test_knowbert.py プロジェクト: zxlzr/kb
def get_knowbert(vocab, mode, include_wiki=False):
    params = {
        "type": "knowbert",
        "mode": mode,
        "soldered_kgs": {
            "wordnet": {
                "type": "soldered_kg",
                "entity_linker": {
                    "type": "entity_linking_with_candidate_mentions",
                    "kg_model": {
                        "type": "from_archive",
                        "archive_file": ARCHIVE_FILE,
                    },
                    "contextual_embedding_dim": 12,
                    "max_sequence_length": 64,
                    "span_encoder_config": {
                        "hidden_size": 24,
                        "num_hidden_layers": 1,
                        "num_attention_heads": 3,
                        "intermediate_size": 37
                    },
                },
                "span_attention_config": {
                    "hidden_size": 24,
                    "num_hidden_layers": 2,
                    "num_attention_heads": 4,
                    "intermediate_size": 55
                }
            },
        },
        "soldered_layers": {
            "wordnet": 1
        },
        "bert_model_name": "tests/fixtures/bert/bert_test_fixture.tar.gz",
    }

    if include_wiki:
        params["soldered_kgs"]["wiki"] = {
            "type": "soldered_kg",
            "entity_linker": {
                "type": "entity_linking_with_candidate_mentions",
                "namespace": "entity_wiki",
                "entity_embedding": {
                    "num_embeddings": 14,
                    "embedding_dim": 24,
                },
                "contextual_embedding_dim": 12,
                "max_sequence_length": 64,
                "span_encoder_config": {
                    "hidden_size": 24,
                    "num_hidden_layers": 1,
                    "num_attention_heads": 3,
                    "intermediate_size": 37
                },
            },
            "span_attention_config": {
                "hidden_size": 24,
                "num_hidden_layers": 1,
                "num_attention_heads": 4,
                "intermediate_size": 55
            }
        }
        params["soldered_layers"]["wiki"] = 0
        params["soldered_kgs"]["wordnet"]["entity_linker"][
            "namespace"] = "entity_wordnet"

    model = Model.from_params(Params(params), vocab=vocab)
    return model
コード例 #41
0
def evaluate_perplexity(model: Model, sampler: Model, num_samples: int,
                        instances: Iterator[Instance],
                        data_iterator: DataIterator,
                        cuda_device: int) -> Dict[str, Any]:

    check_for_gpu(cuda_device)

    logger.info('Iterating over dataset')

    with torch.no_grad():

        summands = []
        penalized_summands = []

        for i in range(num_samples):
            iterator = data_iterator(instances, num_epochs=1, shuffle=False)
            generator_tqdm = Tqdm.tqdm(iterator, total=0)

            model.eval()
            sampler.eval()

            summand = 0.0
            penalized_summand = 0.0
            denom = 0
            for batch, _ in generator_tqdm:

                batch = util.move_to_device(batch, cuda_device)

                # We need sequence length to help compute perplexity
                n_tokens = util.get_text_field_mask(
                    batch['source']).float().sum().item()
                denom += n_tokens

                # Draw a sample
                sampler_output = sampler.sample(**batch)
                sample_logp = sampler_output['logp']
                sample = sampler_output['sample']

                # Evaluate on sample
                model_output = model(**sample)
                model_logp = model_output['logp']
                model_penalized_logp = model_output['penalized_logp']
                summand += (model_logp - sample_logp).item()
                penalized_summand += (model_penalized_logp -
                                      sample_logp).item()

            summands.append(summand)
            penalized_summands.append(penalized_summand)
            t = torch.tensor(summands)
            p = torch.tensor(penalized_summands)
            t_sum = torch.logsumexp(t, dim=0)
            p_sum = torch.logsumexp(p, dim=0)
            sum_logp = (t_sum - math.log(i + 1)).item()
            sum_logp_penalized = (p_sum - math.log(i + 1)).item()
            ppl = math.exp(-sum_logp / denom)
            upp = math.exp(-sum_logp_penalized / denom)

            print('PPL: %f' % ppl)
            print('UPP: %f' % upp)

    metrics = {'ppl': ppl, 'upp': upp}
    return metrics
コード例 #42
0
ファイル: allen.py プロジェクト: phillswope/charade
 def _load_model(self, basedir):
     config_path = os.path.join(basedir, 'config.json')
     config = Params.from_file(config_path)
     model = Model.load(config=config, serialization_dir=basedir)
     sentiment_map = self._vocab_to_sentiment_map(model.vocab)
     return model, sentiment_map
コード例 #43
0
def load_weights(model: Model, path: str, location: str = 'cpu') -> None:
    with open(path, 'rb') as f:
        model.load_state_dict(torch.load(f, map_location=location))
コード例 #44
0
ファイル: allen.py プロジェクト: phillswope/charade
 def _load_model(basedir):
     config_path = os.path.join(basedir, 'config.json')
     config = Params.from_file(config_path)
     model = Model.load(config=config, serialization_dir=basedir)
     return model
コード例 #45
0
def _load(config: Params,
          adapters_dir: str,
          serialization_dir: str,
          weights_file: str = None,
          cuda_device: int = -1) -> 'Model':
    """
    Instantiates an already-trained model, based on the experiment
    configuration and some optional overrides.
    """
    weights_file = weights_file or os.path.join(serialization_dir, "best.th")

    # Load vocabulary from file
    vocab_dir = os.path.join(serialization_dir, 'vocabulary')
    # If the config specifies a vocabulary subclass, we need to use it.
    vocab_params = config.get("vocabulary", Params({}))
    vocab_choice = vocab_params.pop_choice("type", Vocabulary.list_available(), True)
    vocab = Vocabulary.by_name(vocab_choice).from_files(vocab_dir)

    model_params = config.get('model')

    # The experiment config tells us how to _train_ a model, including where to get pre-trained
    # embeddings from.  We're now _loading_ the model, so those embeddings will already be
    # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
    # want the code to look for it, so we remove it from the parameters here.
    remove_pretrained_embedding_params(model_params)
    model = Model.from_params(vocab=vocab, params=model_params)

    # If vocab+embedding extension was done, the model initialized from from_params
    # and one defined by state dict in weights_file might not have same embedding shapes.
    # Eg. when model embedder module was transferred along with vocab extension, the
    # initialized embedding weight shape would be smaller than one in the state_dict.
    # So calling model embedding extension is required before load_state_dict.
    # If vocab and model embeddings are in sync, following would be just a no-op.
    model.extend_embedder_vocab()

    # model_state = torch.load(weights_file, map_location=util.device_mapping(cuda_device))
    # model.load_state_dict(model_state, strict=False)

    for file in os.listdir(adapters_dir):
        logger.info(f"{file} is loading..")

    # loop over the adapters folder and load weights into a dictionary
    for i, layer in enumerate(model.text_field_embedder.token_embedder_bert.bert_model.encoder.layer):
        try:
            for j, (file, attention_adapter, output_attention) in enumerate(zip(os.listdir(adapters_dir), layer.attention.output.adapter, layer.output.adapter)):
                adapter_state = torch.load(os.path.join(adapters_dir, file))
                attention_adapter.load_state_dict(adapter_state['attention_adapter_' + str(i)])
                output_attention.load_state_dict(adapter_state['output_adapter_' + str(i)])
        except AttributeError:
            logger.warning(f"Could not find the adapter model inside the archive {adapters_dir}")
            traceback.print_exc()
            return

    # Force model to cpu or gpu, as appropriate, to make sure that the embeddings are
    # in sync with the weights
    if cuda_device >= 0:
        model.cuda(cuda_device)
    else:
        model.cpu()

    return model
コード例 #46
0
ファイル: evaluate.py プロジェクト: shlomota/newscaptioning
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int,
             serialization_dir: str, eval_suffix: str,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    nlp = spacy.load("en_core_web_lg")
    assert not os.path.exists(
        os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl'))

    # caching saves us extra 30 minutes
    if 'goodnews' in serialization_dir:
        cache_path = 'data/goodnews/evaluation_cache.pkl'
    elif 'nytimes' in serialization_dir:
        cache_path = 'data/nytimes/evaluation_cache.pkl'
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as f:
            cache = pickle.load(f)
    else:
        cache = {}

    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            write_to_json(output_dict, serialization_dir, nlp, eval_suffix,
                          cache)

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join([
                "%s: %.2f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            # if loss_count != batch_count:
            #     raise RuntimeError("The model you are trying to evaluate only sometimes " +
            #                        "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

    if not os.path.exists(cache_path):
        with open(cache_path, 'wb') as f:
            pickle.dump(cache, f)

    return final_metrics
コード例 #47
0
ファイル: train.py プロジェクト: ihaeyong/r2c
num_workers = (4 * NUM_GPUS if NUM_CPUS == 32 else 2 * NUM_GPUS) - 1
print(f"Using {num_workers} workers out of {NUM_CPUS} possible", flush=True)
loader_params = {
    'batch_size': 96 // NUM_GPUS,
    'num_gpus': NUM_GPUS,
    'num_workers': num_workers
}
train_loader = VCRLoader.from_dataset(train, **loader_params)
val_loader = VCRLoader.from_dataset(val, **loader_params)
test_loader = VCRLoader.from_dataset(test, **loader_params)

ARGS_RESET_EVERY = 100
print("Loading {} for {}".format(params['model'].get('type', 'WTF?'),
                                 'rationales' if args.rationale else 'answer'),
      flush=True)
model = Model.from_params(vocab=train.vocab, params=params['model'])
for submodule in model.detector.backbone.modules():
    if isinstance(submodule, BatchNorm2d):
        submodule.track_running_stats = False
    for p in submodule.parameters():
        p.requires_grad = False

model = DataParallel(model).cuda() if NUM_GPUS > 1 else model.cuda()
optimizer = Optimizer.from_params(
    [x for x in model.named_parameters() if x[1].requires_grad],
    params['trainer']['optimizer'])

lr_scheduler_params = params['trainer'].pop("learning_rate_scheduler", None)
scheduler = LearningRateScheduler.from_params(
    optimizer, lr_scheduler_params) if lr_scheduler_params else None
コード例 #48
0
    def __init__(
        self,
        id: str,
        registered_model_name: Optional[str] = None,
        model_class: Optional[type] = None,
        registered_predictor_name: Optional[str] = None,
        display_name: Optional[str] = None,
        archive_file: Optional[str] = None,
        overrides: Optional[Dict] = None,
        model_details: Optional[Union[str, ModelDetails]] = None,
        intended_use: Optional[Union[str, IntendedUse]] = None,
        factors: Optional[Union[str, Factors]] = None,
        metrics: Optional[Union[str, Metrics]] = None,
        evaluation_data: Optional[Union[str, EvaluationData]] = None,
        training_data: Optional[Union[str, TrainingData]] = None,
        quantitative_analyses: Optional[Union[str,
                                              QuantitativeAnalyses]] = None,
        ethical_considerations: Optional[Union[str,
                                               EthicalConsiderations]] = None,
        caveats_and_recommendations: Optional[Union[
            str, CaveatsAndRecommendations]] = None,
    ):

        assert id
        if not model_class and registered_model_name:
            try:
                model_class = Model.by_name(registered_model_name)
            except ConfigurationError:
                logger.warning("{} is not a registered model.".format(
                    registered_model_name))

        if model_class:
            display_name = display_name or model_class.__name__
            model_details = model_details or get_description(model_class)
            if not registered_predictor_name:
                registered_predictor_name = model_class.default_predictor  # type: ignore

        if archive_file and not archive_file.startswith("https:"):
            archive_file = os.path.join(self._storage_location, archive_file)

        if isinstance(model_details, str):
            model_details = ModelDetails(description=model_details)
        if isinstance(intended_use, str):
            intended_use = IntendedUse(primary_uses=intended_use)
        if isinstance(factors, str):
            factors = Factors(relevant_factors=factors)
        if isinstance(metrics, str):
            metrics = Metrics(model_performance_measures=metrics)
        if isinstance(evaluation_data, str):
            evaluation_data = EvaluationData(dataset=evaluation_data)
        if isinstance(training_data, str):
            training_data = TrainingData(dataset=training_data)
        if isinstance(quantitative_analyses, str):
            quantitative_analyses = QuantitativeAnalyses(
                unitary_results=quantitative_analyses)
        if isinstance(ethical_considerations, str):
            ethical_considerations = EthicalConsiderations(
                ethical_considerations)
        if isinstance(caveats_and_recommendations, str):
            caveats_and_recommendations = CaveatsAndRecommendations(
                caveats_and_recommendations)

        self.id = id
        self.registered_model_name = registered_model_name
        self.registered_predictor_name = registered_predictor_name
        self.display_name = display_name
        self.archive_file = archive_file
        self.model_details = model_details
        self.intended_use = intended_use
        self.factors = factors
        self.metrics = metrics
        self.evaluation_data = evaluation_data
        self.training_data = training_data
        self.quantitative_analyses = quantitative_analyses
        self.ethical_considerations = ethical_considerations
        self.caveats_and_recommendations = caveats_and_recommendations
コード例 #49
0
    def test_model_load(self):
        params = Params.from_file(
            'tests/fixtures/decomposable_attention/experiment.json')
        model = Model.load(params)

        assert isinstance(model, DecomposableAttention)
コード例 #50
0
ファイル: model_wrapper.py プロジェクト: parita/visualbert
 def initialize_model(self, args):
     model = Model.from_params(vocab=None, params=Params(args.model))
     if args.get("fp16", False):
         model.half()
         print("Using FP 16, Model Halfed")
     self.model = DataParallel(model).cuda()