コード例 #1
0
    def __init__(self, configuration, args, pre_embed=None):
        configuration = deepcopy(configuration)
        self.configuration = deepcopy(configuration)

        configuration['model']['encoder']['pre_embed'] = pre_embed
        self.encoder = Encoder.from_params(
            Params(configuration['model']['encoder'])).to(device)

        self.frozen_attn = args.frozen_attn
        self.adversarial = args.adversarial
        self.pre_loaded_attn = args.pre_loaded_attn

        configuration['model']['decoder'][
            'hidden_size'] = self.encoder.output_size
        if self.frozen_attn:
            self.decoder = FrozenAttnDecoder.from_params(
                Params(configuration['model']['decoder'])).to(device)
        elif self.pre_loaded_attn:
            self.decoder = PretrainedWeightsDecoder.from_params(
                Params(configuration['model']['decoder'])).to(device)
        else:
            self.decoder = AttnDecoder.from_params(
                Params(configuration['model']['decoder'])).to(device)

        self.encoder_params = list(self.encoder.parameters())
        if not self.frozen_attn:
            self.attn_params = list([
                v for k, v in self.decoder.named_parameters()
                if 'attention' in k
            ])
        self.decoder_params = list([
            v for k, v in self.decoder.named_parameters()
            if 'attention' not in k
        ])

        self.bsize = configuration['training']['bsize']

        weight_decay = configuration['training'].get('weight_decay', 1e-5)
        self.encoder_optim = torch.optim.Adam(self.encoder_params,
                                              lr=0.001,
                                              weight_decay=weight_decay,
                                              amsgrad=True)
        if not self.frozen_attn:
            self.attn_optim = torch.optim.Adam(self.attn_params,
                                               lr=0.001,
                                               weight_decay=0,
                                               amsgrad=True)
        self.decoder_optim = torch.optim.Adam(self.decoder_params,
                                              lr=0.001,
                                              weight_decay=weight_decay,
                                              amsgrad=True)

        pos_weight = configuration['training'].get('pos_weight', [1.0] *
                                                   self.decoder.output_size)
        self.pos_weight = torch.Tensor(pos_weight).to(device)

        # setup either adversarial or std binary cross-entropy loss
        if self.adversarial:
            self.criterion = nn.KLDivLoss(size_average=None,
                                          reduce=None,
                                          reduction='sum').to(device)
            self.lmbda = args.lmbda
        else:
            self.criterion = nn.BCEWithLogitsLoss(reduction='none').to(device)

        dirname = configuration['training']['exp_dirname']
        basepath = configuration['training'].get('basepath', 'outputs')
        self.time_str = time.ctime().replace(' ', '_')
        self.dirname = os.path.join(basepath, dirname, self.time_str)
コード例 #2
0
ファイル: optimizers.py プロジェクト: wyxingyuX/allennlp
    def from_params(cls, model_parameters: List,
                    params: Params):  # type: ignore
        # pylint: disable=arguments-differ
        if isinstance(params, str):
            optimizer = params
            params = Params({})
        else:
            optimizer = params.pop_choice("type", Optimizer.list_available())

        # make the parameter groups if need
        groups = params.pop("parameter_groups", None)
        if groups:
            # The input to the optimizer is list of dict.
            # Each dict contains a "parameter group" and groups specific options,
            # e.g., {'params': [list of parameters], 'lr': 1e-3, ...}
            # Any config option not specified in the additional options (e.g.
            # for the default group) is inherited from the top level config.
            # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options
            #
            # groups contains something like:
            #"parameter_groups": [
            #       [["regex1", "regex2"], {"lr": 1e-3},
            #        ["regex3"], {"lr": 1e-4}]
            #]
            #(note that the allennlp config files require double quotes ", and will
            # fail (sometimes silently) with single quotes ').

            # This is typed as as Any since the dict values other then
            # the params key are passed to the Optimizer constructor and
            # can be any type it accepts.
            # In addition to any parameters that match group specific regex,
            # we also need a group for the remaining "default" group.
            # Those will be included in the last entry of parameter_groups.
            parameter_groups: Any = [{
                'params': []
            } for _ in range(len(groups) + 1)]
            # add the group specific kwargs
            for k in range(len(groups)):  # pylint: disable=consider-using-enumerate
                parameter_groups[k].update(groups[k][1].as_dict())

            regex_use_counts: Dict[str, int] = {}
            parameter_group_names: List[set] = [
                set() for _ in range(len(groups) + 1)
            ]
            for name, param in model_parameters:
                # Determine the group for this parameter.
                group_index = None
                for k, group_regexes in enumerate(groups):
                    for regex in group_regexes[0]:
                        if regex not in regex_use_counts:
                            regex_use_counts[regex] = 0
                        if re.search(regex, name):
                            if group_index is not None and group_index != k:
                                raise ValueError(
                                    "{} was specified in two separate parameter groups"
                                    .format(name))
                            group_index = k
                            regex_use_counts[regex] += 1

                if group_index is not None:
                    parameter_groups[group_index]['params'].append(param)
                    parameter_group_names[group_index].add(name)
                else:
                    # the default group
                    parameter_groups[-1]['params'].append(param)
                    parameter_group_names[-1].add(name)

            # log the parameter groups
            logger.info("Done constructing parameter groups.")
            for k in range(len(groups) + 1):
                group_options = {
                    key: val
                    for key, val in parameter_groups[k].items()
                    if key != 'params'
                }
                logger.info("Group %s: %s, %s", k,
                            list(parameter_group_names[k]), group_options)
            # check for unused regex
            for regex, count in regex_use_counts.items():
                if count == 0:
                    logger.warning(
                        "When constructing parameter groups, "
                        " %s not match any parameter name", regex)

        else:
            parameter_groups = [param for name, param in model_parameters]

        return Optimizer.by_name(optimizer)(parameter_groups,
                                            **params.as_dict())  # type: ignore
    def test_from_params(self):

        params = Params({"use_subtrees": True, "granularity": "5-class"})
        reader = StanfordSentimentTreeBankDatasetReader.from_params(params)
        assert reader._use_subtrees is True
        assert reader._granularity == "5-class"
コード例 #4
0
ファイル: model_test_case.py プロジェクト: nadgeri14/allennlp
    def ensure_model_can_train_save_and_load(
        self,
        param_file: str,
        tolerance: float = 1e-4,
        cuda_device: int = -1,
        gradients_to_ignore: Set[str] = None,
        overrides: str = "",
        disable_dropout: bool = True,
    ):
        """
        # Parameters

        param_file : ``str``
            Path to a training configuration file that we will use to train the model for this
            test.
        tolerance : ``float``, optional (default=1e-4)
            When comparing model predictions between the originally-trained model and the model
            after saving and loading, we will use this tolerance value (passed as ``rtol`` to
            ``numpy.testing.assert_allclose``).
        cuda_device : ``int``, optional (default=-1)
            The device to run the test on.
        gradients_to_ignore : ``Set[str]``, optional (default=None)
            This test runs a gradient check to make sure that we're actually computing gradients
            for all of the parameters in the model.  If you really want to ignore certain
            parameters when doing that check, you can pass their names here.  This is not
            recommended unless you're `really` sure you don't need to have non-zero gradients for
            those parameters (e.g., some of the beam search / state machine models have
            infrequently-used parameters that are hard to force the model to use in a small test).
        overrides : ``str``, optional (default = "")
            A JSON string that we will use to override values in the input parameter file.
        disable_dropout : ``bool``, optional (default = True)
            If True we will set all dropout to 0 before checking gradients. (Otherwise, with small
            datasets, you may get zero gradients because of unlucky dropout.)
        """
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file,
                                      save_dir,
                                      overrides=overrides)
        loaded_model = load_archive(archive_file,
                                    cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(
                model.state_dict()[key].cpu().numpy(),
                loaded_model.state_dict()[key].cpu().numpy(),
                err_msg=key,
            )
        params = Params.from_file(param_file, params_overrides=overrides)
        reader = DatasetReader.from_params(params["dataset_reader"])

        # Need to duplicate params because Iterator.from_params will consume.
        iterator_params = params["iterator"]
        iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict()))

        iterator = DataIterator.from_params(iterator_params)
        iterator2 = DataIterator.from_params(iterator_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        print("Reading with original model")
        model_dataset = reader.read(params["validation_data_path"])
        iterator.index_with(model.vocab)
        model_batch = next(iterator(model_dataset, shuffle=False))

        print("Reading with loaded model")
        loaded_dataset = reader.read(params["validation_data_path"])
        iterator2.index_with(loaded_model.vocab)
        loaded_batch = next(iterator2(loaded_dataset, shuffle=False))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch,
                                                      gradients_to_ignore,
                                                      disable_dropout)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key,
                                     1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, "stateful") and module.stateful:
                    module.reset_states()
        print("Predicting with original model")
        model_predictions = model(**model_batch)
        print("Predicting with loaded model")
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
コード例 #5
0
 def test_can_init_dot(self):
     legacy_attention = Attention.from_params(
         Params({u"type": u"dot_product"}))
     isinstance(legacy_attention, DotProductAttention)
コード例 #6
0
    def test_can_build_from_params(self):
        reader = SquadReader.from_params(Params({}))

        assert reader._tokenizer.__class__.__name__ == "SpacyTokenizer"
        assert reader._token_indexers[
            "tokens"].__class__.__name__ == "SingleIdTokenIndexer"
コード例 #7
0
# Try to use the validation dataset reader if there is one - otherwise fall back
# to the default dataset_reader used for both training and validation.
validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
if validation_dataset_reader_params is not None:
    dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
else:
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.input_file

embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping)
                                     if args.embedding_sources_mapping else {})
if args.extend_vocab:
    logger.info("Vocabulary is being extended with test instances.")
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)
    model.vocab.extend_from_instances(Params({}), instances=instances)
    model.extend_embedder_vocab(embedding_sources)


predictor = AMconllPredictor(dataset_reader,args.k,args.give_up, args.threads, model=model)

requires_art_root = {"DM" : True, "PAS": True, "PSD": True, "EDS" : False, "AMR-2015": False, "AMR-2017": False}

sentences = []
with open(args.input_file) as f:
    for sentence in f:
        words = sentence.rstrip("\n").split(" ")
        sentences.append(from_raw_text(words,requires_art_root[args.formalism], dict()))
with TemporaryDirectory() as direc:
    temp_path =  direc+"/sentences.amconll"
    with open(temp_path,"w") as f:
コード例 #8
0
 def test_ignore_oov_should_fail_on_non_padded_vocab(self):
     params = Params({"ignore_oov": True})
     self.assertRaises(ConfigurationError,
                       BagOfWordCountsTokenEmbedder.from_params,
                       self.non_padded_vocab, params)
コード例 #9
0
    def test_create_kwargs(self):
        kwargs = create_kwargs(MyClass, MyClass, Params({"my_int": 5}), my_bool=True, my_float=4.4)

        # my_float should not be included because it's not a param of the MyClass constructor
        assert kwargs == {"my_int": 5, "my_bool": True}
コード例 #10
0
 def _get_optimizer(self, lr: float = 1.0):
     optimizer_params = Params({"type": "sgd", "lr": lr})
     optimizer_params["parameter_groups"] = [[[f"^{m}"], {}]
                                             for m in self.model._modules]
     return Optimizer.from_params(self.model.named_parameters(),
                                  optimizer_params)
コード例 #11
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(
        logging.INFO)

    # Load from archive
    archive = load_archive(args.archive_file, args.cuda_device, args.overrides,
                           args.weights_file)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader',
                                                  None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(
            validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(
            config.pop('dataset_reader'))
    evaluation_data_path = args.input_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    embedding_sources: Dict[str,
                            str] = (json.loads(args.embedding_sources_mapping)
                                    if args.embedding_sources_mapping else {})
    if args.extend_vocab:
        logger.info("Vocabulary is being extended with test instances.")
        model.vocab.extend_from_instances(Params({}), instances=instances)
        model.extend_embedder_vocab(embedding_sources)

    iterator_params = config.pop("validation_iterator", None)
    if iterator_params is None:
        iterator_params = config.pop("iterator")
    iterator = DataIterator.from_params(iterator_params)
    iterator.index_with(model.vocab)

    thrs = args.thresholds.replace("_", ",").split(",")

    for thr in thrs:
        model._temperature_threshold = float(thr)
        metrics = evaluate(model, instances, iterator, args.cuda_device,
                           args.batch_weight_key)

        logger.info("Finished evaluating.")
        logger.info("Metrics:")
        for key, metric in metrics.items():
            logger.info("%s: %s: %s", thr, key, metric)

        output_file = args.output_file
        if output_file:
            with open(output_file + "_" + thr, "w") as file:
                json.dump(metrics, file, indent=4)
    return metrics
コード例 #12
0
def create_model(vocab: Vocabulary, embedding_dim: int,
                 hidden_dim: int, TaskModel: Model = BaseTextClassifier,
                 wemb: str = None, encoder_type: str = "lstm",
                 pretrained_model: BaseTextClassifier = None,
                 fix_pretrained_weights: bool = False, **kwargs) -> Model:
    """
    :param vocab: input / output vocabulary of the dataset
    :param embedding_dim:
    :param hidden_dim:
    :param TaskModel:  the model to apply
    :param encoder_type: GRU, LSTM
    :param wemb: type of word embeddings being used None, ELMO, Glove
    :param dropout:
    :param n_layers:
    :param pretrained_model: use a pretrained model as an input to copy the encoder layers from
    e.g. for building a domain classifier
    :param fix_pretrained_weights: whether to fix embeddings of the encoding layer or not
    (only if a pretrained model is provided)
    :return:
    """

    if wemb is None: wemb = "random"

    if wemb.lower() == "elmo":

        word_embeddings_params = Params({
                "embedding_dim": 100,
                "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz",
                "trainable": False
              })

        elmo_params = Params({
                "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
                "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
                "do_layer_norm": False,
                "dropout": 0.5,
                "requires_grad": False
              })

        token_embeddings = Embedding.from_params(vocab, word_embeddings_params)
        elmo_embeddings = ElmoTokenEmbedder.from_params(vocab, elmo_params)
        word_embeddings = BasicTextFieldEmbedder({"tokens": token_embeddings, "elmo": elmo_embeddings})

    elif wemb.lower() == "glove" or "http" in wemb or os.path.exists(wemb):

        if wemb.lower() == "glove":
            pretrained_file = "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz"
            embedding_dim = 100
        else:
            pretrained_file = wemb

        word_embeddings_params = Params({
                "embedding_dim": embedding_dim,
                "pretrained_file": pretrained_file,
                "trainable": False
        })

        token_embeddings = Embedding.from_params(vocab=vocab, params=word_embeddings_params)
        word_embeddings = BasicTextFieldEmbedder({"tokens": token_embeddings})

    else:
        token_embeddings = Embedding(num_embeddings=vocab.get_vocab_size("tokens"),
                                     embedding_dim=embedding_dim)
        word_embeddings = BasicTextFieldEmbedder({"tokens": token_embeddings})

    embedding_dim = word_embeddings.get_output_dim()
    rnn_params = Params({"type": encoder_type,
                         "input_size": embedding_dim,
                         "hidden_size": hidden_dim,
                         "num_layers": kwargs["num_layers"],
                         "bidirectional": kwargs["bidirectional"]})

    del kwargs["num_layers"]
    del kwargs["bidirectional"]

    if TaskModel is BaseSequenceTagger or (TaskModel is SequenceDomainClassifier and kwargs["num_extra_rnn_layers"] > 0):
        rnn = Seq2SeqEncoder.from_params(rnn_params)
    else:
        rnn = Seq2VecEncoder.from_params(rnn_params)

    model = TaskModel(word_embeddings, rnn, vocab, **kwargs)

    # if a Pretrained model is provided
    # in the case of copying encoding representations from the task classifier to the domain classifier
    if pretrained_model is not None:

        # freezing embeddings of the encoder and the word embeddings
        model.encoder.load_state_dict(pretrained_model.encoder.state_dict())
        model.word_embeddings.load_state_dict(pretrained_model.word_embeddings.state_dict())

        if fix_pretrained_weights:

            for p in model.encoder.parameters():
                p.requires_grad = False

            for p in model.word_embeddings.parameters():
                p.requires_grad = False

    return model
コード例 #13
0
    def ensure_model_can_train_save_and_load(self,
                                             param_file: str,
                                             tolerance: float = 1e-4,
                                             cuda_device: int = -1):
        save_dir = self.TEST_DIR / "save_and_load_test"
        archive_file = save_dir / "model.tar.gz"
        model = train_model_from_file(param_file, save_dir)
        loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
        state_keys = model.state_dict().keys()
        loaded_state_keys = loaded_model.state_dict().keys()
        assert state_keys == loaded_state_keys
        # First we make sure that the state dict (the parameters) are the same for both models.
        for key in state_keys:
            assert_allclose(model.state_dict()[key].cpu().numpy(),
                            loaded_model.state_dict()[key].cpu().numpy(),
                            err_msg=key)
        params = Params.from_file(param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])

        # Need to duplicate params because Iterator.from_params will consume.
        iterator_params = params['iterator']
        iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict()))

        iterator = DataIterator.from_params(iterator_params)
        iterator2 = DataIterator.from_params(iterator_params2)

        # We'll check that even if we index the dataset with each model separately, we still get
        # the same result out.
        model_dataset = reader.read(params['validation_data_path'])
        iterator.index_with(model.vocab)
        model_batch = next(iterator(model_dataset, shuffle=False, cuda_device=cuda_device))

        loaded_dataset = reader.read(params['validation_data_path'])
        iterator2.index_with(loaded_model.vocab)
        loaded_batch = next(iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device))

        # Check gradients are None for non-trainable parameters and check that
        # trainable parameters receive some gradient if they are trainable.
        self.check_model_computes_gradients_correctly(model, model_batch)

        # The datasets themselves should be identical.
        assert model_batch.keys() == loaded_batch.keys()
        for key in model_batch.keys():
            self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6)

        # Set eval mode, to turn off things like dropout, then get predictions.
        model.eval()
        loaded_model.eval()
        # Models with stateful RNNs need their states reset to have consistent
        # behavior after loading.
        for model_ in [model, loaded_model]:
            for module in model_.modules():
                if hasattr(module, 'stateful') and module.stateful:
                    module.reset_states()
        model_predictions = model(**model_batch)
        loaded_model_predictions = loaded_model(**loaded_batch)

        # Check loaded model's loss exists and we can compute gradients, for continuing training.
        loaded_model_loss = loaded_model_predictions["loss"]
        assert loaded_model_loss is not None
        loaded_model_loss.backward()

        # Both outputs should have the same keys and the values for these keys should be close.
        for key in model_predictions.keys():
            self.assert_fields_equal(model_predictions[key],
                                     loaded_model_predictions[key],
                                     name=key,
                                     tolerance=tolerance)

        return model, loaded_model
コード例 #14
0
    def setUp(self):
        super(BidirectionalAttentionFlowTest, self).setUp()

        constants.GLOVE_PATH = 'tests/fixtures/glove.6B.100d.sample.txt.gz'
        reader_params = Params({
            'token_indexers': {
                'tokens': {
                    'type': 'single_id'
                },
                'token_characters': {
                    'type': 'characters'
                }
            }
        })
        dataset = SquadReader.from_params(reader_params).read(
            'tests/fixtures/data/squad.json')
        vocab = Vocabulary.from_dataset(dataset)
        self.vocab = vocab
        dataset.index_instances(vocab)
        self.dataset = dataset
        self.token_indexers = {
            'tokens': SingleIdTokenIndexer(),
            'token_characters': TokenCharactersIndexer()
        }

        self.model = BidirectionalAttentionFlow.from_params(
            self.vocab, Params({}))

        small_params = Params({
            'text_field_embedder': {
                'tokens': {
                    'type': 'embedding',
                    'pretrained_file': constants.GLOVE_PATH,
                    'trainable': False,
                    'projection_dim': 4
                },
                'token_characters': {
                    'type': 'character_encoding',
                    'embedding': {
                        'embedding_dim': 8
                    },
                    'encoder': {
                        'type': 'cnn',
                        'embedding_dim': 8,
                        'num_filters': 4,
                        'ngram_filter_sizes': [5]
                    }
                }
            },
            'phrase_layer': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 8,
                'hidden_size': 4,
                'num_layers': 1,
            },
            'similarity_function': {
                'type': 'linear',
                'combination': 'x,y,x*y',
                'tensor_1_dim': 8,
                'tensor_2_dim': 8
            },
            'modeling_layer': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 32,
                'hidden_size': 4,
                'num_layers': 1,
            },
            'span_end_encoder': {
                'type': 'lstm',
                'bidirectional': True,
                'input_size': 56,
                'hidden_size': 4,
                'num_layers': 1,
            },
        })
        self.small_model = BidirectionalAttentionFlow.from_params(
            self.vocab, small_params)
コード例 #15
0
    def test_train_model(self):
        params = lambda: Params({
            "model": {
                "type": "simple_tagger",
                "text_field_embedder": {
                    "token_embedders": {
                        "tokens": {
                            "type": "embedding",
                            "embedding_dim": 5
                        }
                    }
                },
                "encoder": {
                    "type": "lstm",
                    "input_size": 5,
                    "hidden_size": 7,
                    "num_layers": 2
                },
            },
            "dataset_reader": {
                "type": "sequence_tagging"
            },
            "train_data_path": SEQUENCE_TAGGING_DATA_PATH,
            "validation_data_path": SEQUENCE_TAGGING_DATA_PATH,
            "iterator": {
                "type": "basic",
                "batch_size": 2
            },
            "trainer": {
                "num_epochs": 2,
                "optimizer": "adam"
            },
        })

        train_model(params(),
                    serialization_dir=os.path.join(self.TEST_DIR,
                                                   "test_train_model"))

        # It's OK if serialization dir exists but is empty:
        serialization_dir2 = os.path.join(self.TEST_DIR, "empty_directory")
        assert not os.path.exists(serialization_dir2)
        os.makedirs(serialization_dir2)
        train_model(params(), serialization_dir=serialization_dir2)

        # It's not OK if serialization dir exists and has junk in it non-empty:
        serialization_dir3 = os.path.join(self.TEST_DIR, "non_empty_directory")
        assert not os.path.exists(serialization_dir3)
        os.makedirs(serialization_dir3)
        with open(os.path.join(serialization_dir3, "README.md"), "w") as f:
            f.write("TEST")

        with pytest.raises(ConfigurationError):
            train_model(params(), serialization_dir=serialization_dir3)

        # It's also not OK if serialization dir is a real serialization dir:
        with pytest.raises(ConfigurationError):
            train_model(params(),
                        serialization_dir=os.path.join(self.TEST_DIR,
                                                       "test_train_model"))

        # But it's OK if serialization dir exists and --recover is specified:
        train_model(
            params(),
            serialization_dir=os.path.join(self.TEST_DIR, "test_train_model"),
            recover=True,
        )

        # It's ok serialization dir exists and --force is specified (it will be deleted):
        train_model(params(),
                    serialization_dir=os.path.join(self.TEST_DIR,
                                                   "test_train_model"),
                    force=True)

        # But --force and --recover cannot both be specified
        with pytest.raises(ConfigurationError):
            train_model(
                params(),
                serialization_dir=os.path.join(self.TEST_DIR,
                                               "test_train_model"),
                force=True,
                recover=True,
            )
コード例 #16
0
def train_model(
    params: Params,
    serialization_dir: str,
    file_friendly_logging: bool = False,
    recover: bool = False,
    force: bool = False,
    node_rank: int = 0,
    include_package: List[str] = None,
    batch_weight_key: str = "",
) -> Model:
    """
    Trains the model specified in the given :class:`Params` object, using the data and training
    parameters also specified in that object, and saves the results in ``serialization_dir``.

    # Parameters

    params : ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow
        down tqdm's output to only once every 10 seconds.
    recover : ``bool``, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see ``Model.from_archive``.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    node_rank : ``int``, optional
        Rank of the current node in distributed training
    include_package : ``List[str]``, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.
    batch_weight_key : ``str``, optional (default="")
        If non-empty, name of metric used to weight the loss on a per-batch basis.

    # Returns

    best_model : ``Model``
        The model with the best epoch weights.
    """
    training_util.create_serialization_dir(params, serialization_dir, recover, force)
    params.to_file(os.path.join(serialization_dir, CONFIG_NAME))

    distributed_params = params.params.pop("distributed", None)
    # If distributed isn't in the config and the config contains strictly
    # one cuda device, we just run a single training process.
    if distributed_params is None:
        model = _train_worker(
            process_rank=0,
            params=params,
            serialization_dir=serialization_dir,
            file_friendly_logging=file_friendly_logging,
            recover=recover,
            include_package=include_package,
            batch_weight_key=batch_weight_key,
        )
        archive_model(serialization_dir)
        return model

    # Otherwise, we are running multiple processes for training.
    else:
        # We are careful here so that we can raise a good error if someone
        # passed the wrong thing - cuda_devices are required.
        device_ids = distributed_params.pop("cuda_devices", None)
        multi_device = isinstance(device_ids, list) and len(device_ids) > 1
        num_nodes = distributed_params.pop("num_nodes", 1)

        if not (multi_device or num_nodes > 1):
            raise ConfigurationError(
                "Multiple cuda devices/nodes need to be configured to run distributed training."
            )
        check_for_gpu(device_ids)

        master_addr = distributed_params.pop("master_address", "127.0.0.1")
        master_port = distributed_params.pop("master_port", 29500)
        num_procs = len(device_ids)
        world_size = num_nodes * num_procs

        logging.info(
            f"Switching to distributed training mode since multiple GPUs are configured"
            f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | "
            f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | "
            f"World size: {world_size}"
        )

        # Creating `Vocabulary` objects from workers could be problematic since
        # the data iterators in each worker will yield only `rank` specific
        # instances. Hence it is safe to construct the vocabulary and write it
        # to disk before initializing the distributed context. The workers will
        # load the vocabulary from the path specified.
        if params.get("vocabulary", Params({})).get("type", "") != "from_files":
            vocab = training_util.make_vocab_from_params(params.duplicate(), serialization_dir)
            params["vocabulary"] = {
                "type": "from_files",
                "directory": os.path.join(serialization_dir, "vocabulary"),
                "padding_token": vocab._padding_token,
                "oov_token": vocab._oov_token,
            }

        mp.spawn(
            _train_worker,
            args=(
                params.duplicate(),
                serialization_dir,
                file_friendly_logging,
                recover,
                include_package,
                batch_weight_key,
                node_rank,
                master_addr,
                master_port,
                world_size,
                device_ids,
            ),
            nprocs=num_procs,
        )
        archive_model(serialization_dir)
        model = Model.load(params, serialization_dir)
        return model
コード例 #17
0
 def test_from_params_requires_batch_first(self):
     params = Params({"type": "lstm", "batch_first": False})
     with pytest.raises(ConfigurationError):
         Seq2VecEncoder.from_params(params)
コード例 #18
0
ファイル: train.py プロジェクト: kshre/udify-xlmr
    file_name = args.config[0] if args.config else args.base_config
    log_dir_name = os.path.basename(file_name).split(".")[0]

configs = []

if not args.resume:
    serialization_dir = os.path.join(
        "logs", log_dir_name,
        datetime.datetime.now().strftime("%Y.%m.%d_%H.%M.%S"))

    overrides = {}
    if args.device is not None:
        overrides["trainer"] = {"cuda_device": args.device}
    if args.lazy is not None:
        overrides["dataset_reader"] = {"lazy": args.lazy}
    configs.append(Params(overrides))
    for config_file in args.config:
        configs.append(Params.from_file(config_file))
    configs.append(Params.from_file(args.base_config))
else:
    serialization_dir = args.resume
    configs.append(
        Params.from_file(os.path.join(serialization_dir, "config.json")))

train_params = util.merge_configs(configs)
if "vocabulary" in train_params:
    # Remove this key to make AllenNLP happy
    train_params["vocabulary"].pop("non_padded_namespaces", None)

predict_params = train_params.duplicate()
params = train_params.duplicate()
コード例 #19
0
import json
from allennlp.common import FromParams, Params


class BaseGaussian(FromParams):
    def __init__(self, mean: float, variance: float):
        self.mean = mean
        self.variance = variance


class MyGaussian(BaseGaussian):
    def __init__(self, name: str, **kwargs):
        super().__init__(**kwargs)
        self.name = name


param_str = """{"mean": 0.0, "variance": 1.0, "name": "My Gaussian"}"""
params = Params(json.loads(param_str))
gaussian = MyGaussian.from_params(params)
print(f"Mean: {gaussian.mean}")
print(f"Variance: {gaussian.variance}")
print(f"Name: {gaussian.name}")
コード例 #20
0
 def test_can_build_from_params(self):
     reader = QangarooReader.from_params(Params({}))
     # pylint: disable=protected-access
     assert reader._token_indexers[
         'tokens'].__class__.__name__ == 'SingleIdTokenIndexer'
コード例 #21
0
def train(train_dataset, val_dataset, cfg):
    # Vocabularyを生成
    VOCAB_SIZE = cfg.w2v.vocab_size
    vocab = Vocabulary.from_instances(train_dataset + val_dataset,
                                      max_vocab_size=VOCAB_SIZE)

    BATCH_SIZE = cfg.training.batch_size

    # パディング済みミニバッチを生成してくれるIterator
    iterator = BucketIterator(batch_size=BATCH_SIZE,
                              sorting_keys=[("tokens", "num_tokens")])
    iterator.index_with(vocab)

    # 東北大が提供している学習済み日本語 Wikipedia エンティティベクトルを使用する
    # http://www.cl.ecei.tohoku.ac.jp/~m-suzuki/jawiki_vector/
    model_name = cfg.w2v.model_name
    norm = cfg.w2v.norm
    cwd = hydra.utils.get_original_cwd()
    params = Params({
        'embedding_dim':
        200,
        'padding_index':
        0,
        'pretrained_file':
        os.path.join(cwd, f'embs/jawiki.{model_name}_vectors.200d.txt'),
        'norm_type':
        norm
    })

    token_embedding = Embedding.from_params(vocab=vocab, params=params)
    HIDDEN_SIZE = cfg.model.hidden_size
    dropout = cfg.model.dropout

    word_embeddings: TextFieldEmbedder = BasicTextFieldEmbedder(
        {"tokens": token_embedding})
    encoder: Seq2SeqEncoder = PytorchSeq2SeqWrapper(
        nn.LSTM(word_embeddings.get_output_dim(),
                HIDDEN_SIZE,
                bidirectional=True,
                batch_first=True))
    model = ClassifierWithAttn(word_embeddings, encoder, vocab, dropout)
    model.train()

    USE_GPU = True

    if USE_GPU and torch.cuda.is_available():
        model = model.cuda(0)

    LR = cfg.training.learning_rate
    EPOCHS = cfg.training.epoch
    patience = cfg.training.patience if cfg.training.patience > 0 else None

    optimizer = optim.Adam(model.parameters(), lr=LR)
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=train_dataset,
                      validation_dataset=val_dataset,
                      patience=patience,
                      cuda_device=0 if USE_GPU else -1,
                      num_epochs=EPOCHS)
    metrics = trainer.train()
    logger.info(metrics)

    return model, metrics
コード例 #22
0
    def test_extras_for_custom_classes(self):

        from allennlp.common.registrable import Registrable

        class BaseClass(Registrable):
            pass

        class BaseClass2(Registrable):
            pass

        @BaseClass.register("A")
        class A(BaseClass):
            def __init__(self, a: int, b: int, val: str) -> None:
                self.a = a
                self.b = b
                self.val = val

            def __hash__(self):
                return self.b

            def __eq__(self, other):
                return self.b == other.b

            @classmethod
            def from_params(cls, params: Params, a: int,
                            **extras) -> "A":  # type: ignore
                # A custom from params
                b = params.pop_int("b")
                val = params.pop("val", "C")
                params.assert_empty(cls.__name__)
                return cls(a=a, b=b, val=val)

        @BaseClass2.register("B")
        class B(BaseClass2):
            def __init__(self, c: int, b: int) -> None:
                self.c = c
                self.b = b

            @classmethod
            def from_params(cls, params: Params, c: int,
                            **extras) -> "B":  # type: ignore
                b = params.pop_int("b")
                params.assert_empty(cls.__name__)
                return cls(c=c, b=b)

        @BaseClass.register("E")
        class E(BaseClass):
            def __init__(self, m: int, n: int) -> None:
                self.m = m
                self.n = n

            @classmethod
            def from_params(cls, params: Params,
                            **extras2) -> "E":  # type: ignore
                m = params.pop_int("m")
                params.assert_empty(cls.__name__)
                n = extras2["n"]
                return cls(m=m, n=n)

        class C:
            pass

        @BaseClass.register("D")
        class D(BaseClass):
            def __init__(
                self,
                arg1: List[BaseClass],
                arg2: Tuple[BaseClass, BaseClass2],
                arg3: Dict[str, BaseClass],
                arg4: Set[BaseClass],
                arg5: List[BaseClass],
            ) -> None:
                self.arg1 = arg1
                self.arg2 = arg2
                self.arg3 = arg3
                self.arg4 = arg4
                self.arg5 = arg5

        vals = [1, 2, 3]
        params = Params({
            "type":
            "D",
            "arg1": [
                {
                    "type": "A",
                    "b": vals[0]
                },
                {
                    "type": "A",
                    "b": vals[1]
                },
                {
                    "type": "A",
                    "b": vals[2]
                },
            ],
            "arg2": [{
                "type": "A",
                "b": vals[0]
            }, {
                "type": "B",
                "b": vals[0]
            }],
            "arg3": {
                "class_1": {
                    "type": "A",
                    "b": vals[0]
                },
                "class_2": {
                    "type": "A",
                    "b": vals[1]
                },
            },
            "arg4": [
                {
                    "type": "A",
                    "b": vals[0],
                    "val": "M"
                },
                {
                    "type": "A",
                    "b": vals[1],
                    "val": "N"
                },
                {
                    "type": "A",
                    "b": vals[1],
                    "val": "N"
                },
            ],
            "arg5": [{
                "type": "E",
                "m": 9
            }],
        })
        extra = C()
        tval1 = 5
        tval2 = 6
        d = BaseClass.from_params(params=params,
                                  extra=extra,
                                  a=tval1,
                                  c=tval2,
                                  n=10)

        # Tests for List # Parameters
        assert len(d.arg1) == len(vals)
        assert isinstance(d.arg1, list)
        assert isinstance(d.arg1[0], A)
        assert all(x.b == y for x, y in zip(d.arg1, vals))
        assert all(x.a == tval1 for x in d.arg1)

        # Tests for Tuple
        assert isinstance(d.arg2, tuple)
        assert isinstance(d.arg2[0], A)
        assert isinstance(d.arg2[1], B)
        assert d.arg2[0].a == tval1
        assert d.arg2[1].c == tval2
        assert d.arg2[0].b == d.arg2[1].b == vals[0]

        # Tests for Dict
        assert isinstance(d.arg3, dict)
        assert isinstance(d.arg3["class_1"], A)
        assert d.arg3["class_1"].a == d.arg3["class_2"].a == tval1
        assert d.arg3["class_1"].b == vals[0]
        assert d.arg3["class_2"].b == vals[1]

        # Tests for Set
        assert isinstance(d.arg4, set)
        assert len(d.arg4) == 2
        assert any(x.val == "M" for x in d.arg4)
        assert any(x.val == "N" for x in d.arg4)

        # Tests for custom extras parameters
        assert isinstance(d.arg5, list)
        assert isinstance(d.arg5[0], E)
        assert d.arg5[0].m == 9
        assert d.arg5[0].n == 10
コード例 #23
0
ファイル: feedforward_pair.py プロジェクト: ranglang/nlp
                   dropout=dropout)


if __name__ == "__main__":
    from allennlp.common import Params
    torch.manual_seed(999)

    batch = 16
    input_dim = 200
    hidden1 = 100
    hidden2 = 80
    test_input_1 = torch.autograd.Variable(torch.randn(batch, input_dim))
    test_input_2 = torch.autograd.Variable(torch.randn(batch, input_dim))

    ff_pair = FeedForwardPair.from_params(
        Params({
            "input_dim": input_dim,
            "num_layers": 2,
            "hidden_dims": [hidden1, hidden2],
            "activations": ["tanh", "linear"],
            "dropout": [0.0, 0.0]
        }))

    r1, r2 = ff_pair(test_input_1, test_input_2)

    assert r1.size() == r2.size() == torch.Size([batch, hidden2])

    test_input_2 = test_input_1.clone()
    r3, r4 = ff_pair(test_input_1, test_input_2)
    assert (r3 == r4).all()
コード例 #24
0
    def test_no_constructor(self):
        params = Params({"type": "just_spaces"})

        Tokenizer.from_params(params)
コード例 #25
0
def params(params_dict):
    return Params(params_dict)
コード例 #26
0
    def test_from_params(self):
        my_class = MyClass.from_params(Params({"my_int": 10}), my_bool=True)

        assert isinstance(my_class, MyClass)
        assert my_class.my_int == 10
        assert my_class.my_bool
コード例 #27
0
ファイル: __init__.py プロジェクト: yakazimir/allennlp
 def __call__(self, **kwargs) -> PytorchSeq2VecWrapper:
     return self.from_params(Params(kwargs))
コード例 #28
0
 def test_from_params_works_correctly(self):        
     tokenizer = NgramTokenizer.from_params(Params({'max_ngram_degree': 2}))
     assert tokenizer._max_ngram_degree == 2
コード例 #29
0
 def test_can_construct_from_params(self):
     assert CosineSimilarity.from_params(Params(
         {})).__class__.__name__ == 'CosineSimilarity'
コード例 #30
0
 def test_can_build_from_params(self):
     reader = SquadReader.from_params(Params({}))
     # pylint: disable=protected-access
     assert reader._tokenizer.__class__.__name__ == 'WordTokenizer'
     assert reader._token_indexers["tokens"].__class__.__name__ == 'SingleIdTokenIndexer'