コード例 #1
0
    def test_from_params(self):
        # pylint: disable=protected-access
        params = Params({})

        with pytest.raises(ConfigurationError):
            iterator = BucketIterator.from_params(params)

        sorting_keys = [("s1", "nt"), ("s2", "nt2")]
        params['sorting_keys'] = sorting_keys
        iterator = BucketIterator.from_params(params)

        assert iterator._sorting_keys == sorting_keys
        assert iterator._padding_noise == 0.1
        assert not iterator._biggest_batch_first
        assert iterator._batch_size == 32

        params = Params({
            "sorting_keys": sorting_keys,
            "padding_noise": 0.5,
            "biggest_batch_first": True,
            "batch_size": 100
        })

        iterator = BucketIterator.from_params(params)
        assert iterator._sorting_keys == sorting_keys
        assert iterator._padding_noise == 0.5
        assert iterator._biggest_batch_first
        assert iterator._batch_size == 100
コード例 #2
0
ファイル: __init__.py プロジェクト: sanyu12/Bert_Attempt
 def from_params(self, params: Params) -> PytorchSeq2VecWrapper:
     if not params.pop('batch_first', True):
         raise ConfigurationError("Our encoder semantics assumes batch is always first!")
     if self._module_class in self.PYTORCH_MODELS:
         params['batch_first'] = True
     module = self._module_class(**params.as_dict())
     return PytorchSeq2VecWrapper(module)
コード例 #3
0
ファイル: archival_test.py プロジェクト: sanyu12/Bert_Attempt
    def setUp(self):
        super().setUp()

        self.params = Params({
            "model": {
                "type": "simple_tagger",
                "text_field_embedder": {
                    "tokens": {
                        "type": "embedding",
                        "embedding_dim": 5
                    }
                },
                "encoder": {
                    "type": "lstm",
                    "input_size": 5,
                    "hidden_size": 7,
                    "num_layers": 2
                }
            },
            "dataset_reader": {
                "type": "sequence_tagging"
            },
            "train_data_path":
            str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'),
            "validation_data_path":
            str(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv'),
            "iterator": {
                "type": "basic",
                "batch_size": 2
            },
            "trainer": {
                "num_epochs": 2,
                "optimizer": "adam",
            }
        })
コード例 #4
0
    def test_forward(self):
        batch = 16
        len1, len2 = 21, 24
        seq_len1 = torch.randint(low=len1 - 10, high=len1 + 1, size=(batch,)).long()
        seq_len2 = torch.randint(low=len2 - 10, high=len2 + 1, size=(batch,)).long()

        mask1 = []
        for w in seq_len1:
            mask1.append([1] * w.item() + [0] * (len1 - w.item()))
        mask1 = torch.FloatTensor(mask1)
        mask2 = []
        for w in seq_len2:
            mask2.append([1] * w.item() + [0] * (len2 - w.item()))
        mask2 = torch.FloatTensor(mask2)

        d = 200  # hidden dimension
        l = 20  # number of perspective
        test1 = torch.randn(batch, len1, d)
        test2 = torch.randn(batch, len2, d)
        test1 = test1 * mask1.view(-1, len1, 1).expand(-1, len1, d)
        test2 = test2 * mask2.view(-1, len2, 1).expand(-1, len2, d)

        test1_fw, test1_bw = torch.split(test1, d // 2, dim=-1)
        test2_fw, test2_bw = torch.split(test2, d // 2, dim=-1)

        ml_fw = BiMpmMatching.from_params(Params({"is_forward": True, "num_perspectives": l}))
        ml_bw = BiMpmMatching.from_params(Params({"is_forward": False, "num_perspectives": l}))

        vecs_p_fw, vecs_h_fw = ml_fw(test1_fw, mask1, test2_fw, mask2)
        vecs_p_bw, vecs_h_bw = ml_bw(test1_bw, mask1, test2_bw, mask2)
        vecs_p, vecs_h = torch.cat(vecs_p_fw + vecs_p_bw, dim=2), torch.cat(vecs_h_fw + vecs_h_bw, dim=2)

        assert vecs_p.size() == torch.Size([batch, len1, 10 + 10 * l])
        assert vecs_h.size() == torch.Size([batch, len2, 10 + 10 * l])
        assert ml_fw.get_output_dim() == ml_bw.get_output_dim() == vecs_p.size(2) // 2 == vecs_h.size(2) // 2
コード例 #5
0
    def test_read_embedding_file_inside_archive(self):
        token2vec = {
            "think": torch.Tensor([0.143, 0.189, 0.555, 0.361, 0.472]),
            "make": torch.Tensor([0.878, 0.651, 0.044, 0.264, 0.872]),
            "difference": torch.Tensor([0.053, 0.162, 0.671, 0.110, 0.259]),
            "àèìòù": torch.Tensor([1.0, 2.0, 3.0, 4.0, 5.0])
        }
        vocab = Vocabulary()
        for token in token2vec:
            vocab.add_token_to_namespace(token)

        params = Params({
            'pretrained_file':
            str(self.FIXTURES_ROOT / 'embeddings/multi-file-archive.zip'),
            'embedding_dim':
            5
        })
        with pytest.raises(
                ValueError,
                message=
                "No ValueError when pretrained_file is a multi-file archive"):
            Embedding.from_params(vocab, params)

        for ext in ['.zip', '.tar.gz']:
            archive_path = str(
                self.FIXTURES_ROOT / 'embeddings/multi-file-archive') + ext
            file_uri = format_embeddings_file_uri(
                archive_path, 'folder/fake_embeddings.5d.txt')
            params = Params({'pretrained_file': file_uri, 'embedding_dim': 5})
            embeddings = Embedding.from_params(vocab, params).weight.data
            for tok, vec in token2vec.items():
                i = vocab.get_token_index(tok)
                assert torch.equal(embeddings[i],
                                   vec), 'Problem with format ' + archive_path
コード例 #6
0
    def test_from_params(self):
        # pylint: disable=protected-access
        params = Params({})
        iterator = BasicIterator.from_params(params)
        assert iterator._batch_size == 32  # default value

        params = Params({"batch_size": 10})
        iterator = BasicIterator.from_params(params)
        assert iterator._batch_size == 10
コード例 #7
0
 def from_params(cls, vocab: Vocabulary, params: Params) -> 'TokenCharactersEncoder':  # type: ignore
     # pylint: disable=arguments-differ
     embedding_params: Params = params.pop("embedding")
     # Embedding.from_params() uses "tokens" as the default namespace, but we need to change
     # that to be "token_characters" by default.
     embedding_params.setdefault("vocab_namespace", "token_characters")
     embedding = Embedding.from_params(vocab, embedding_params)
     encoder_params: Params = params.pop("encoder")
     encoder = Seq2VecEncoder.from_params(encoder_params)
     dropout = params.pop_float("dropout", 0.0)
     params.assert_empty(cls.__name__)
     return cls(embedding, encoder, dropout)
コード例 #8
0
    def test_extras(self):
        # pylint: disable=unused-variable,arguments-differ
        from srl_model.common.registrable import Registrable

        class A(Registrable):
            pass

        @A.register("b")
        class B(A):
            def __init__(self, size: int, name: str) -> None:
                self.size = size
                self.name = name

        @A.register("c")
        class C(A):
            def __init__(self, size: int, name: str) -> None:
                self.size = size
                self.name = name

            # custom from params
            @classmethod
            def from_params(cls, params: Params, size: int) -> 'C':  # type: ignore
                name = params.pop('name')
                return cls(size=size, name=name)


        # Check that extras get passed, even though A doesn't need them.
        params = Params({"type": "b", "size": 10})
        b = A.from_params(params, name="extra")

        assert b.name == "extra"
        assert b.size == 10

        # Check that extra extras don't get passed.
        params = Params({"type": "b", "size": 10})
        b = A.from_params(params, name="extra", unwanted=True)

        assert b.name == "extra"
        assert b.size == 10

        # Now the same with a custom from_params.
        params = Params({"type": "c", "name": "extra_c"})
        c = A.from_params(params, size=20)
        assert c.name == "extra_c"
        assert c.size == 20

        # Check that extra extras don't get passed.
        params = Params({"type": "c", "name": "extra_c"})
        c = A.from_params(params, size=20, unwanted=True)

        assert c.name == "extra_c"
        assert c.size == 20
コード例 #9
0
    def test_mismatched_dimensions_raise_configuration_errors(self):
        params = Params.from_file(self.param_file)
        # Make the input_dim to the first feedforward_layer wrong - it should be 2.
        params["model"]["attend_feedforward"]["input_dim"] = 10
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop("model"))

        params = Params.from_file(self.param_file)
        # Make the projection output_dim of the last layer wrong - it should be
        # 3, equal to the number of classes.
        params["model"]["aggregate_feedforward"]["output_dim"] = 10
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop("model"))
コード例 #10
0
 def test_can_construct_from_params(self):
     params = Params({
             'embedding_dim': 5,
             })
     encoder = BagOfEmbeddingsEncoder.from_params(params)
     assert encoder.get_input_dim() == 5
     assert encoder.get_output_dim() == 5
     params = Params({
             'embedding_dim': 12,
             'averaged': True
             })
     encoder = BagOfEmbeddingsEncoder.from_params(params)
     assert encoder.get_input_dim() == 12
     assert encoder.get_output_dim() == 12
コード例 #11
0
    def test_search(self):
        beam_search = BeamSearch.from_params(Params({'beam_size': 4}))
        initial_state = SimpleDecoderState([0, 1, 2, 3], [[], [], [], []], [
            torch.Tensor([0.0]),
            torch.Tensor([0.0]),
            torch.Tensor([0.0]),
            torch.Tensor([0.0])
        ], [-3, 1, -20, 5])
        decoder_step = SimpleDecoderStep(include_value_in_score=True)
        best_states = beam_search.search(5,
                                         initial_state,
                                         decoder_step,
                                         keep_final_unfinished_states=False)

        # Instance with batch index 2 needed too many steps to finish, and batch index 3 had no
        # path to get to a finished state.  (See the simple transition system definition; goal is
        # to end up at 4, actions are either add one or two to starting value.)
        assert len(best_states) == 2
        assert best_states[0][0].action_history[0] == [-1, 1, 3, 4]
        assert best_states[1][0].action_history[0] == [3, 4]

        best_states = beam_search.search(5,
                                         initial_state,
                                         decoder_step,
                                         keep_final_unfinished_states=True)

        # Now we're keeping final unfinished states, which allows a "best state" for the instances
        # that didn't have one before.  Our previous best states for the instances that finish
        # doesn't change, because the score for taking another step is always negative at these
        # values.
        assert len(best_states) == 4
        assert best_states[0][0].action_history[0] == [-1, 1, 3, 4]
        assert best_states[1][0].action_history[0] == [3, 4]
        assert best_states[2][0].action_history[0] == [-18, -16, -14, -12, -10]
        assert best_states[3][0].action_history[0] == [7, 9, 11, 13, 15]
コード例 #12
0
    def test_train_with_test_set(self):
        params = Params({
                "model": {
                        "type": "simple_tagger",
                        "text_field_embedder": {
                                "tokens": {
                                        "type": "embedding",
                                        "embedding_dim": 5
                                }
                        },
                        "encoder": {
                                "type": "lstm",
                                "input_size": 5,
                                "hidden_size": 7,
                                "num_layers": 2
                        }
                },
                "dataset_reader": {"type": "lazy-test"},
                "train_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "test_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "validation_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "evaluate_on_test": True,
                "iterator": {"type": "basic", "batch_size": 2},
                "trainer": {
                        "num_epochs": 2,
                        "optimizer": "adam"
                }
        })

        train_model(params, serialization_dir=os.path.join(self.TEST_DIR, 'lazy_test_set'))
コード例 #13
0
    def test_forward_gives_correct_output(self):
        params = Params({
            'input_dim': 2,
            'output_dims': 3,
            'pool_sizes': 4,
            'dropout': 0.0,
            'num_layers': 2
        })
        maxout = Maxout.from_params(params)

        constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.)
        initializer = InitializerApplicator([(".*", constant_init)])
        initializer(maxout)

        input_tensor = torch.FloatTensor([[-3, 1]])
        output = maxout(input_tensor).data.numpy()
        assert output.shape == (1, 3)
        # This output was checked by hand
        # The output of the first maxout layer is [-1, -1, -1], since the
        # matrix multiply gives us [-2]*12. Reshaping and maxing
        # produces [-2, -2, -2] and the bias increments these values.
        # The second layer output is [-2, -2, -2], since the matrix
        # matrix multiply gives us [-3]*12. Reshaping and maxing
        # produces [-3, -3, -3] and the bias increments these values.
        assert_almost_equal(output, [[-2, -2, -2]])
コード例 #14
0
ファイル: fine_tune.py プロジェクト: sanyu12/Bert_Attempt
def fine_tune_model_from_file_paths(model_archive_path: str,
                                    config_file: str,
                                    serialization_dir: str,
                                    overrides: str = "",
                                    extend_vocab: bool = False,
                                    file_friendly_logging: bool = False) -> Model:
    """
    A wrapper around :func:`fine_tune_model` which loads the model archive from a file.

    Parameters
    ----------
    model_archive_path : ``str``
        Path to a saved model archive that is the result of running the ``train`` command.
    config_file : ``str``
        A configuration file specifying how to continue training.  The format is identical to the
        configuration file for the ``train`` command, but any contents in the ``model`` section is
        ignored (as we are using the provided model archive instead).
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`fine_tune_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`fine_tune_model`.
    """
    # We don't need to pass in `cuda_device` here, because the trainer will call `model.cuda()` if
    # necessary.
    archive = load_archive(model_archive_path)
    params = Params.from_file(config_file, overrides)
    return fine_tune_model(model=archive.model,
                           params=params,
                           serialization_dir=serialization_dir,
                           extend_vocab=extend_vocab,
                           file_friendly_logging=file_friendly_logging)
コード例 #15
0
    def test_error_is_throw_when_cuda_device_is_not_available(self):
        params = Params({
                "model": {
                        "type": "simple_tagger",
                        "text_field_embedder": {
                                "tokens": {
                                        "type": "embedding",
                                        "embedding_dim": 5
                                }
                        },
                        "encoder": {
                                "type": "lstm",
                                "input_size": 5,
                                "hidden_size": 7,
                                "num_layers": 2
                        }
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": 'tests/fixtures/data/sequence_tagging.tsv',
                "validation_data_path": 'tests/fixtures/data/sequence_tagging.tsv',
                "iterator": {"type": "basic", "batch_size": 2},
                "trainer": {
                        "num_epochs": 2,
                        "cuda_device": torch.cuda.device_count(),
                        "optimizer": "adam"
                }
        })

        with pytest.raises(ConfigurationError,
                           message="Experiment specified a GPU but none is available;"
                                   " if you want to run on CPU use the override"
                                   " 'trainer.cuda_device=-1' in the json config file."):
            train_model(params, serialization_dir=os.path.join(self.TEST_DIR, 'test_train_model'))
コード例 #16
0
    def test_forward_works_with_projection_layer(self):
        params = Params({
            'options_file': self.FIXTURES_ROOT / 'elmo' / 'options.json',
            'weight_file': self.FIXTURES_ROOT / 'elmo' / 'lm_weights.hdf5',
            'projection_dim': 20
        })
        word1 = [0] * 50
        word2 = [0] * 50
        word1[0] = 6
        word1[1] = 5
        word1[2] = 4
        word1[3] = 3
        word2[0] = 3
        word2[1] = 2
        word2[2] = 1
        word2[3] = 0
        embedding_layer = ElmoTokenEmbedder.from_params(vocab=None,
                                                        params=params)
        input_tensor = torch.LongTensor([[word1, word2]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 2, 20)

        input_tensor = torch.LongTensor([[[word1]]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 1, 1, 20)
コード例 #17
0
 def test_embedding_layer_actually_initializes_word_vectors_correctly(self):
     vocab = Vocabulary()
     vocab.add_token_to_namespace("word")
     vocab.add_token_to_namespace("word2")
     unicode_space = "\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"
     vocab.add_token_to_namespace(unicode_space)
     embeddings_filename = str(self.TEST_DIR / "embeddings.gz")
     with gzip.open(embeddings_filename, 'wb') as embeddings_file:
         embeddings_file.write("word 1.0 2.3 -1.0\n".encode('utf-8'))
         embeddings_file.write(
             f"{unicode_space} 3.4 3.3 5.0\n".encode('utf-8'))
     params = Params({
         'pretrained_file': embeddings_filename,
         'embedding_dim': 3,
     })
     embedding_layer = Embedding.from_params(vocab, params)
     word_vector = embedding_layer.weight.data[vocab.get_token_index(
         "word")]
     assert numpy.allclose(word_vector.numpy(),
                           numpy.array([1.0, 2.3, -1.0]))
     word_vector = embedding_layer.weight.data[vocab.get_token_index(
         unicode_space)]
     assert numpy.allclose(word_vector.numpy(), numpy.array([3.4, 3.3,
                                                             5.0]))
     word_vector = embedding_layer.weight.data[vocab.get_token_index(
         "word2")]
     assert not numpy.allclose(word_vector.numpy(),
                               numpy.array([1.0, 2.3, -1.0]))
コード例 #18
0
 def test_can_construct_from_params(self):
     params = Params({
         'embedding_dim': 5,
         'num_filters': 4,
         'ngram_filter_sizes': [3, 5]
     })
     encoder = CnnEncoder.from_params(params)
     assert encoder.get_output_dim() == 8
     params = Params({
         'embedding_dim': 5,
         'num_filters': 4,
         'ngram_filter_sizes': [3, 5],
         'output_dim': 7
     })
     encoder = CnnEncoder.from_params(params)
     assert encoder.get_output_dim() == 7
コード例 #19
0
ファイル: bidaf_test.py プロジェクト: sanyu12/Bert_Attempt
    def test_batch_predictions_are_consistent(self):
        # The CNN encoder has problems with this kind of test - it's not properly masked yet, so
        # changing the amount of padding in the batch will result in small differences in the
        # output of the encoder.  Because BiDAF is so deep, these differences get magnified through
        # the network and make this test impossible.  So, we'll remove the CNN encoder entirely
        # from the model for this test.  If/when we fix the CNN encoder to work correctly with
        # masking, we can change this back to how the other models run this test, with just a
        # single line.
        # pylint: disable=protected-access,attribute-defined-outside-init

        # Save some state.
        saved_model = self.model
        saved_instances = self.instances

        # Modify the state, run the test with modified state.
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params['dataset_reader'])
        reader._token_indexers = {'tokens': reader._token_indexers['tokens']}
        self.instances = reader.read(self.FIXTURES_ROOT / 'data' /
                                     'squad.json')
        vocab = Vocabulary.from_instances(self.instances)
        for instance in self.instances:
            instance.index_fields(vocab)
        del params['model']['text_field_embedder']['token_embedders'][
            'token_characters']
        params['model']['phrase_layer']['input_size'] = 2
        self.model = Model.from_params(vocab=vocab, params=params['model'])

        self.ensure_batch_predictions_are_consistent()

        # Restore the state.
        self.model = saved_model
        self.instances = saved_instances
コード例 #20
0
 def test_forward_runs_with_non_bijective_mapping(self):
     elmo_fixtures_path = self.FIXTURES_ROOT / 'elmo'
     options_file = str(elmo_fixtures_path / 'options.json')
     weight_file = str(elmo_fixtures_path / 'lm_weights.hdf5')
     params = Params({
         "words": {
             "type": "embedding",
             "num_embeddings": 20,
             "embedding_dim": 2,
         },
         "elmo": {
             "type": "elmo_token_embedder",
             "options_file": options_file,
             "weight_file": weight_file
         },
         "embedder_to_indexer_map": {
             "words": ["words"],
             "elmo": ["elmo", "words"]
         }
     })
     token_embedder = BasicTextFieldEmbedder.from_params(self.vocab, params)
     inputs = {
         'words': (torch.rand(3, 6) * 20).long(),
         'elmo': (torch.rand(3, 6, 50) * 15).long(),
     }
     token_embedder(inputs)
コード例 #21
0
 def setUp(self):
     super(TestBasicTextFieldEmbedder, self).setUp()
     self.vocab = Vocabulary()
     self.vocab.add_token_to_namespace("1")
     self.vocab.add_token_to_namespace("2")
     self.vocab.add_token_to_namespace("3")
     self.vocab.add_token_to_namespace("4")
     params = Params({
         "words1": {
             "type": "embedding",
             "embedding_dim": 2
         },
         "words2": {
             "type": "embedding",
             "embedding_dim": 5
         },
         "words3": {
             "type": "embedding",
             "embedding_dim": 3
         }
     })
     self.token_embedder = BasicTextFieldEmbedder.from_params(
         vocab=self.vocab, params=params)
     self.inputs = {
         "words1": torch.LongTensor([[0, 2, 3, 5]]),
         "words2": torch.LongTensor([[1, 4, 3, 2]]),
         "words3": torch.LongTensor([[1, 5, 1, 2]])
     }
コード例 #22
0
 def test_forward_works_on_higher_order_input(self):
     params = Params({
         "words": {
             "type": "embedding",
             "num_embeddings": 20,
             "embedding_dim": 2,
         },
         "characters": {
             "type": "character_encoding",
             "embedding": {
                 "embedding_dim": 4,
                 "num_embeddings": 15,
             },
             "encoder": {
                 "type": "cnn",
                 "embedding_dim": 4,
                 "num_filters": 10,
                 "ngram_filter_sizes": [3],
             },
         }
     })
     token_embedder = BasicTextFieldEmbedder.from_params(vocab=self.vocab,
                                                         params=params)
     inputs = {
         'words': (torch.rand(3, 4, 5, 6) * 20).long(),
         'characters': (torch.rand(3, 4, 5, 6, 7) * 15).long(),
     }
     assert token_embedder(inputs,
                           num_wrapping_dims=2).size() == (3, 4, 5, 6, 12)
コード例 #23
0
 def setUp(self):
     super(TestTokenCharactersEncoder, self).setUp()
     self.vocab = Vocabulary()
     self.vocab.add_token_to_namespace("1", "token_characters")
     self.vocab.add_token_to_namespace("2", "token_characters")
     self.vocab.add_token_to_namespace("3", "token_characters")
     self.vocab.add_token_to_namespace("4", "token_characters")
     params = Params({
         "embedding": {
             "embedding_dim": 2,
             "vocab_namespace": "token_characters"
         },
         "encoder": {
             "type": "cnn",
             "embedding_dim": 2,
             "num_filters": 4,
             "ngram_filter_sizes": [1, 2],
             "output_dim": 3
         }
     })
     self.encoder = TokenCharactersEncoder.from_params(
         vocab=self.vocab, params=deepcopy(params))
     self.embedding = Embedding.from_params(vocab=self.vocab,
                                            params=params["embedding"])
     self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"])
     constant_init = lambda tensor: torch.nn.init.constant_(tensor, 1.)
     initializer = InitializerApplicator([(".*", constant_init)])
     initializer(self.encoder)
     initializer(self.embedding)
     initializer(self.inner_encoder)
コード例 #24
0
ファイル: train.py プロジェクト: sanyu12/Bert_Attempt
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    param_path : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging,
                       recover)
コード例 #25
0
ファイル: linear_test.py プロジェクト: sanyu12/Bert_Attempt
 def test_can_construct_from_params(self):
     params = Params({
         'tensor_1_dim': 4,
         'tensor_2_dim': 4,
         'combination': 'x,y,x*y,y-x'
     })
     linear = LinearSimilarity.from_params(params)
     assert list(linear._weight_vector.size()) == [16]
コード例 #26
0
 def test_can_init_dot(self):
     legacy_attention = MatrixAttention.from_params(
         Params({
             "type": "linear",
             "tensor_1_dim": 3,
             "tensor_2_dim": 3
         }))
     isinstance(legacy_attention, LinearMatrixAttention)
コード例 #27
0
 def test_from_params_requires_batch_first(self):
     params = Params({
         "type": "lstm",
         "batch_first": False,
     })
     with pytest.raises(ConfigurationError):
         # pylint: disable=unused-variable
         encoder = Seq2VecEncoder.from_params(params)
コード例 #28
0
    def test_model_load(self):
        params = Params.from_file(self.FIXTURES_ROOT /
                                  'decomposable_attention' / 'experiment.json')
        model = Model.load(params,
                           serialization_dir=self.FIXTURES_ROOT /
                           'decomposable_attention' / 'serialization')

        assert isinstance(model, DecomposableAttention)
コード例 #29
0
 def test_can_build_from_params(self):
     params = Params({
         "type": "legacy",
         'similarity_function': {
             'type': 'cosine'
         }
     })
     attention = MatrixAttention.from_params(params)
     # pylint: disable=protected-access
     assert attention._similarity_function.__class__.__name__ == 'CosineSimilarity'
コード例 #30
0
 def test_can_build_from_params(self):
     params = Params({
         'similarity_function': {
             'type': 'cosine'
         },
         'normalize': False
     })
     attention = LegacyAttention.from_params(params)
     # pylint: disable=protected-access
     assert attention._similarity_function.__class__.__name__ == 'CosineSimilarity'
     assert attention._normalize is False