def __init__(self, configuration, args, pre_embed=None): configuration = deepcopy(configuration) self.configuration = deepcopy(configuration) configuration['model']['encoder']['pre_embed'] = pre_embed self.encoder = Encoder.from_params( Params(configuration['model']['encoder'])).to(device) self.frozen_attn = args.frozen_attn self.adversarial = args.adversarial self.pre_loaded_attn = args.pre_loaded_attn configuration['model']['decoder'][ 'hidden_size'] = self.encoder.output_size if self.frozen_attn: self.decoder = FrozenAttnDecoder.from_params( Params(configuration['model']['decoder'])).to(device) elif self.pre_loaded_attn: self.decoder = PretrainedWeightsDecoder.from_params( Params(configuration['model']['decoder'])).to(device) else: self.decoder = AttnDecoder.from_params( Params(configuration['model']['decoder'])).to(device) self.encoder_params = list(self.encoder.parameters()) if not self.frozen_attn: self.attn_params = list([ v for k, v in self.decoder.named_parameters() if 'attention' in k ]) self.decoder_params = list([ v for k, v in self.decoder.named_parameters() if 'attention' not in k ]) self.bsize = configuration['training']['bsize'] weight_decay = configuration['training'].get('weight_decay', 1e-5) self.encoder_optim = torch.optim.Adam(self.encoder_params, lr=0.001, weight_decay=weight_decay, amsgrad=True) if not self.frozen_attn: self.attn_optim = torch.optim.Adam(self.attn_params, lr=0.001, weight_decay=0, amsgrad=True) self.decoder_optim = torch.optim.Adam(self.decoder_params, lr=0.001, weight_decay=weight_decay, amsgrad=True) pos_weight = configuration['training'].get('pos_weight', [1.0] * self.decoder.output_size) self.pos_weight = torch.Tensor(pos_weight).to(device) # setup either adversarial or std binary cross-entropy loss if self.adversarial: self.criterion = nn.KLDivLoss(size_average=None, reduce=None, reduction='sum').to(device) self.lmbda = args.lmbda else: self.criterion = nn.BCEWithLogitsLoss(reduction='none').to(device) dirname = configuration['training']['exp_dirname'] basepath = configuration['training'].get('basepath', 'outputs') self.time_str = time.ctime().replace(' ', '_') self.dirname = os.path.join(basepath, dirname, self.time_str)
def from_params(cls, model_parameters: List, params: Params): # type: ignore # pylint: disable=arguments-differ if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop("parameter_groups", None) if groups: # The input to the optimizer is list of dict. # Each dict contains a "parameter group" and groups specific options, # e.g., {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [["regex1", "regex2"], {"lr": 1e-3}, # ["regex3"], {"lr": 1e-4}] #] #(note that the allennlp config files require double quotes ", and will # fail (sometimes silently) with single quotes '). # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. # In addition to any parameters that match group specific regex, # we also need a group for the remaining "default" group. # Those will be included in the last entry of parameter_groups. parameter_groups: Any = [{ 'params': [] } for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts: Dict[str, int] = {} parameter_group_names: List[set] = [ set() for _ in range(len(groups) + 1) ] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError( "{} was specified in two separate parameter groups" .format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index]['params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1]['params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info("Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = { key: val for key, val in parameter_groups[k].items() if key != 'params' } logger.info("Group %s: %s, %s", k, list(parameter_group_names[k]), group_options) # check for unused regex for regex, count in regex_use_counts.items(): if count == 0: logger.warning( "When constructing parameter groups, " " %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] return Optimizer.by_name(optimizer)(parameter_groups, **params.as_dict()) # type: ignore
def test_from_params(self): params = Params({"use_subtrees": True, "granularity": "5-class"}) reader = StanfordSentimentTreeBankDatasetReader.from_params(params) assert reader._use_subtrees is True assert reader._granularity == "5-class"
def ensure_model_can_train_save_and_load( self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1, gradients_to_ignore: Set[str] = None, overrides: str = "", disable_dropout: bool = True, ): """ # Parameters param_file : ``str`` Path to a training configuration file that we will use to train the model for this test. tolerance : ``float``, optional (default=1e-4) When comparing model predictions between the originally-trained model and the model after saving and loading, we will use this tolerance value (passed as ``rtol`` to ``numpy.testing.assert_allclose``). cuda_device : ``int``, optional (default=-1) The device to run the test on. gradients_to_ignore : ``Set[str]``, optional (default=None) This test runs a gradient check to make sure that we're actually computing gradients for all of the parameters in the model. If you really want to ignore certain parameters when doing that check, you can pass their names here. This is not recommended unless you're `really` sure you don't need to have non-zero gradients for those parameters (e.g., some of the beam search / state machine models have infrequently-used parameters that are hard to force the model to use in a small test). overrides : ``str``, optional (default = "") A JSON string that we will use to override values in the input parameter file. disable_dropout : ``bool``, optional (default = True) If True we will set all dropout to 0 before checking gradients. (Otherwise, with small datasets, you may get zero gradients because of unlucky dropout.) """ save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir, overrides=overrides) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose( model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key, ) params = Params.from_file(param_file, params_overrides=overrides) reader = DatasetReader.from_params(params["dataset_reader"]) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params["iterator"] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. print("Reading with original model") model_dataset = reader.read(params["validation_data_path"]) iterator.index_with(model.vocab) model_batch = next(iterator(model_dataset, shuffle=False)) print("Reading with loaded model") loaded_dataset = reader.read(params["validation_data_path"]) iterator2.index_with(loaded_model.vocab) loaded_batch = next(iterator2(loaded_dataset, shuffle=False)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch, gradients_to_ignore, disable_dropout) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, "stateful") and module.stateful: module.reset_states() print("Predicting with original model") model_predictions = model(**model_batch) print("Predicting with loaded model") loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def test_can_init_dot(self): legacy_attention = Attention.from_params( Params({u"type": u"dot_product"})) isinstance(legacy_attention, DotProductAttention)
def test_can_build_from_params(self): reader = SquadReader.from_params(Params({})) assert reader._tokenizer.__class__.__name__ == "SpacyTokenizer" assert reader._token_indexers[ "tokens"].__class__.__name__ == "SingleIdTokenIndexer"
# Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) predictor = AMconllPredictor(dataset_reader,args.k,args.give_up, args.threads, model=model) requires_art_root = {"DM" : True, "PAS": True, "PSD": True, "EDS" : False, "AMR-2015": False, "AMR-2017": False} sentences = [] with open(args.input_file) as f: for sentence in f: words = sentence.rstrip("\n").split(" ") sentences.append(from_raw_text(words,requires_art_root[args.formalism], dict())) with TemporaryDirectory() as direc: temp_path = direc+"/sentences.amconll" with open(temp_path,"w") as f:
def test_ignore_oov_should_fail_on_non_padded_vocab(self): params = Params({"ignore_oov": True}) self.assertRaises(ConfigurationError, BagOfWordCountsTokenEmbedder.from_params, self.non_padded_vocab, params)
def test_create_kwargs(self): kwargs = create_kwargs(MyClass, MyClass, Params({"my_int": 5}), my_bool=True, my_float=4.4) # my_float should not be included because it's not a param of the MyClass constructor assert kwargs == {"my_int": 5, "my_bool": True}
def _get_optimizer(self, lr: float = 1.0): optimizer_params = Params({"type": "sgd", "lr": lr}) optimizer_params["parameter_groups"] = [[[f"^{m}"], {}] for m in self.model._modules] return Optimizer.from_params(self.model.named_parameters(), optimizer_params)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) thrs = args.thresholds.replace("_", ",").split(",") for thr in thrs: model._temperature_threshold = float(thr) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s: %s", thr, key, metric) output_file = args.output_file if output_file: with open(output_file + "_" + thr, "w") as file: json.dump(metrics, file, indent=4) return metrics
def create_model(vocab: Vocabulary, embedding_dim: int, hidden_dim: int, TaskModel: Model = BaseTextClassifier, wemb: str = None, encoder_type: str = "lstm", pretrained_model: BaseTextClassifier = None, fix_pretrained_weights: bool = False, **kwargs) -> Model: """ :param vocab: input / output vocabulary of the dataset :param embedding_dim: :param hidden_dim: :param TaskModel: the model to apply :param encoder_type: GRU, LSTM :param wemb: type of word embeddings being used None, ELMO, Glove :param dropout: :param n_layers: :param pretrained_model: use a pretrained model as an input to copy the encoder layers from e.g. for building a domain classifier :param fix_pretrained_weights: whether to fix embeddings of the encoding layer or not (only if a pretrained model is provided) :return: """ if wemb is None: wemb = "random" if wemb.lower() == "elmo": word_embeddings_params = Params({ "embedding_dim": 100, "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz", "trainable": False }) elmo_params = Params({ "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json", "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5", "do_layer_norm": False, "dropout": 0.5, "requires_grad": False }) token_embeddings = Embedding.from_params(vocab, word_embeddings_params) elmo_embeddings = ElmoTokenEmbedder.from_params(vocab, elmo_params) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embeddings, "elmo": elmo_embeddings}) elif wemb.lower() == "glove" or "http" in wemb or os.path.exists(wemb): if wemb.lower() == "glove": pretrained_file = "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.100d.txt.gz" embedding_dim = 100 else: pretrained_file = wemb word_embeddings_params = Params({ "embedding_dim": embedding_dim, "pretrained_file": pretrained_file, "trainable": False }) token_embeddings = Embedding.from_params(vocab=vocab, params=word_embeddings_params) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embeddings}) else: token_embeddings = Embedding(num_embeddings=vocab.get_vocab_size("tokens"), embedding_dim=embedding_dim) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embeddings}) embedding_dim = word_embeddings.get_output_dim() rnn_params = Params({"type": encoder_type, "input_size": embedding_dim, "hidden_size": hidden_dim, "num_layers": kwargs["num_layers"], "bidirectional": kwargs["bidirectional"]}) del kwargs["num_layers"] del kwargs["bidirectional"] if TaskModel is BaseSequenceTagger or (TaskModel is SequenceDomainClassifier and kwargs["num_extra_rnn_layers"] > 0): rnn = Seq2SeqEncoder.from_params(rnn_params) else: rnn = Seq2VecEncoder.from_params(rnn_params) model = TaskModel(word_embeddings, rnn, vocab, **kwargs) # if a Pretrained model is provided # in the case of copying encoding representations from the task classifier to the domain classifier if pretrained_model is not None: # freezing embeddings of the encoder and the word embeddings model.encoder.load_state_dict(pretrained_model.encoder.state_dict()) model.word_embeddings.load_state_dict(pretrained_model.word_embeddings.state_dict()) if fix_pretrained_weights: for p in model.encoder.parameters(): p.requires_grad = False for p in model.word_embeddings.parameters(): p.requires_grad = False return model
def ensure_model_can_train_save_and_load(self, param_file: str, tolerance: float = 1e-4, cuda_device: int = -1): save_dir = self.TEST_DIR / "save_and_load_test" archive_file = save_dir / "model.tar.gz" model = train_model_from_file(param_file, save_dir) loaded_model = load_archive(archive_file, cuda_device=cuda_device).model state_keys = model.state_dict().keys() loaded_state_keys = loaded_model.state_dict().keys() assert state_keys == loaded_state_keys # First we make sure that the state dict (the parameters) are the same for both models. for key in state_keys: assert_allclose(model.state_dict()[key].cpu().numpy(), loaded_model.state_dict()[key].cpu().numpy(), err_msg=key) params = Params.from_file(param_file) reader = DatasetReader.from_params(params['dataset_reader']) # Need to duplicate params because Iterator.from_params will consume. iterator_params = params['iterator'] iterator_params2 = Params(copy.deepcopy(iterator_params.as_dict())) iterator = DataIterator.from_params(iterator_params) iterator2 = DataIterator.from_params(iterator_params2) # We'll check that even if we index the dataset with each model separately, we still get # the same result out. model_dataset = reader.read(params['validation_data_path']) iterator.index_with(model.vocab) model_batch = next(iterator(model_dataset, shuffle=False, cuda_device=cuda_device)) loaded_dataset = reader.read(params['validation_data_path']) iterator2.index_with(loaded_model.vocab) loaded_batch = next(iterator2(loaded_dataset, shuffle=False, cuda_device=cuda_device)) # Check gradients are None for non-trainable parameters and check that # trainable parameters receive some gradient if they are trainable. self.check_model_computes_gradients_correctly(model, model_batch) # The datasets themselves should be identical. assert model_batch.keys() == loaded_batch.keys() for key in model_batch.keys(): self.assert_fields_equal(model_batch[key], loaded_batch[key], key, 1e-6) # Set eval mode, to turn off things like dropout, then get predictions. model.eval() loaded_model.eval() # Models with stateful RNNs need their states reset to have consistent # behavior after loading. for model_ in [model, loaded_model]: for module in model_.modules(): if hasattr(module, 'stateful') and module.stateful: module.reset_states() model_predictions = model(**model_batch) loaded_model_predictions = loaded_model(**loaded_batch) # Check loaded model's loss exists and we can compute gradients, for continuing training. loaded_model_loss = loaded_model_predictions["loss"] assert loaded_model_loss is not None loaded_model_loss.backward() # Both outputs should have the same keys and the values for these keys should be close. for key in model_predictions.keys(): self.assert_fields_equal(model_predictions[key], loaded_model_predictions[key], name=key, tolerance=tolerance) return model, loaded_model
def setUp(self): super(BidirectionalAttentionFlowTest, self).setUp() constants.GLOVE_PATH = 'tests/fixtures/glove.6B.100d.sample.txt.gz' reader_params = Params({ 'token_indexers': { 'tokens': { 'type': 'single_id' }, 'token_characters': { 'type': 'characters' } } }) dataset = SquadReader.from_params(reader_params).read( 'tests/fixtures/data/squad.json') vocab = Vocabulary.from_dataset(dataset) self.vocab = vocab dataset.index_instances(vocab) self.dataset = dataset self.token_indexers = { 'tokens': SingleIdTokenIndexer(), 'token_characters': TokenCharactersIndexer() } self.model = BidirectionalAttentionFlow.from_params( self.vocab, Params({})) small_params = Params({ 'text_field_embedder': { 'tokens': { 'type': 'embedding', 'pretrained_file': constants.GLOVE_PATH, 'trainable': False, 'projection_dim': 4 }, 'token_characters': { 'type': 'character_encoding', 'embedding': { 'embedding_dim': 8 }, 'encoder': { 'type': 'cnn', 'embedding_dim': 8, 'num_filters': 4, 'ngram_filter_sizes': [5] } } }, 'phrase_layer': { 'type': 'lstm', 'bidirectional': True, 'input_size': 8, 'hidden_size': 4, 'num_layers': 1, }, 'similarity_function': { 'type': 'linear', 'combination': 'x,y,x*y', 'tensor_1_dim': 8, 'tensor_2_dim': 8 }, 'modeling_layer': { 'type': 'lstm', 'bidirectional': True, 'input_size': 32, 'hidden_size': 4, 'num_layers': 1, }, 'span_end_encoder': { 'type': 'lstm', 'bidirectional': True, 'input_size': 56, 'hidden_size': 4, 'num_layers': 1, }, }) self.small_model = BidirectionalAttentionFlow.from_params( self.vocab, small_params)
def test_train_model(self): params = lambda: Params({ "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 }, }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": SEQUENCE_TAGGING_DATA_PATH, "validation_data_path": SEQUENCE_TAGGING_DATA_PATH, "iterator": { "type": "basic", "batch_size": 2 }, "trainer": { "num_epochs": 2, "optimizer": "adam" }, }) train_model(params(), serialization_dir=os.path.join(self.TEST_DIR, "test_train_model")) # It's OK if serialization dir exists but is empty: serialization_dir2 = os.path.join(self.TEST_DIR, "empty_directory") assert not os.path.exists(serialization_dir2) os.makedirs(serialization_dir2) train_model(params(), serialization_dir=serialization_dir2) # It's not OK if serialization dir exists and has junk in it non-empty: serialization_dir3 = os.path.join(self.TEST_DIR, "non_empty_directory") assert not os.path.exists(serialization_dir3) os.makedirs(serialization_dir3) with open(os.path.join(serialization_dir3, "README.md"), "w") as f: f.write("TEST") with pytest.raises(ConfigurationError): train_model(params(), serialization_dir=serialization_dir3) # It's also not OK if serialization dir is a real serialization dir: with pytest.raises(ConfigurationError): train_model(params(), serialization_dir=os.path.join(self.TEST_DIR, "test_train_model")) # But it's OK if serialization dir exists and --recover is specified: train_model( params(), serialization_dir=os.path.join(self.TEST_DIR, "test_train_model"), recover=True, ) # It's ok serialization dir exists and --force is specified (it will be deleted): train_model(params(), serialization_dir=os.path.join(self.TEST_DIR, "test_train_model"), force=True) # But --force and --recover cannot both be specified with pytest.raises(ConfigurationError): train_model( params(), serialization_dir=os.path.join(self.TEST_DIR, "test_train_model"), force=True, recover=True, )
def train_model( params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, node_rank: int = 0, include_package: List[str] = None, batch_weight_key: str = "", ) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. # Parameters params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see ``Model.from_archive``. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. node_rank : ``int``, optional Rank of the current node in distributed training include_package : ``List[str]``, optional In distributed mode, extra packages mentioned will be imported in trainer workers. batch_weight_key : ``str``, optional (default="") If non-empty, name of metric used to weight the loss on a per-batch basis. # Returns best_model : ``Model`` The model with the best epoch weights. """ training_util.create_serialization_dir(params, serialization_dir, recover, force) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) distributed_params = params.params.pop("distributed", None) # If distributed isn't in the config and the config contains strictly # one cuda device, we just run a single training process. if distributed_params is None: model = _train_worker( process_rank=0, params=params, serialization_dir=serialization_dir, file_friendly_logging=file_friendly_logging, recover=recover, include_package=include_package, batch_weight_key=batch_weight_key, ) archive_model(serialization_dir) return model # Otherwise, we are running multiple processes for training. else: # We are careful here so that we can raise a good error if someone # passed the wrong thing - cuda_devices are required. device_ids = distributed_params.pop("cuda_devices", None) multi_device = isinstance(device_ids, list) and len(device_ids) > 1 num_nodes = distributed_params.pop("num_nodes", 1) if not (multi_device or num_nodes > 1): raise ConfigurationError( "Multiple cuda devices/nodes need to be configured to run distributed training." ) check_for_gpu(device_ids) master_addr = distributed_params.pop("master_address", "127.0.0.1") master_port = distributed_params.pop("master_port", 29500) num_procs = len(device_ids) world_size = num_nodes * num_procs logging.info( f"Switching to distributed training mode since multiple GPUs are configured" f"Master is at: {master_addr}:{master_port} | Rank of this node: {node_rank} | " f"Number of workers in this node: {num_procs} | Number of nodes: {num_nodes} | " f"World size: {world_size}" ) # Creating `Vocabulary` objects from workers could be problematic since # the data iterators in each worker will yield only `rank` specific # instances. Hence it is safe to construct the vocabulary and write it # to disk before initializing the distributed context. The workers will # load the vocabulary from the path specified. if params.get("vocabulary", Params({})).get("type", "") != "from_files": vocab = training_util.make_vocab_from_params(params.duplicate(), serialization_dir) params["vocabulary"] = { "type": "from_files", "directory": os.path.join(serialization_dir, "vocabulary"), "padding_token": vocab._padding_token, "oov_token": vocab._oov_token, } mp.spawn( _train_worker, args=( params.duplicate(), serialization_dir, file_friendly_logging, recover, include_package, batch_weight_key, node_rank, master_addr, master_port, world_size, device_ids, ), nprocs=num_procs, ) archive_model(serialization_dir) model = Model.load(params, serialization_dir) return model
def test_from_params_requires_batch_first(self): params = Params({"type": "lstm", "batch_first": False}) with pytest.raises(ConfigurationError): Seq2VecEncoder.from_params(params)
file_name = args.config[0] if args.config else args.base_config log_dir_name = os.path.basename(file_name).split(".")[0] configs = [] if not args.resume: serialization_dir = os.path.join( "logs", log_dir_name, datetime.datetime.now().strftime("%Y.%m.%d_%H.%M.%S")) overrides = {} if args.device is not None: overrides["trainer"] = {"cuda_device": args.device} if args.lazy is not None: overrides["dataset_reader"] = {"lazy": args.lazy} configs.append(Params(overrides)) for config_file in args.config: configs.append(Params.from_file(config_file)) configs.append(Params.from_file(args.base_config)) else: serialization_dir = args.resume configs.append( Params.from_file(os.path.join(serialization_dir, "config.json"))) train_params = util.merge_configs(configs) if "vocabulary" in train_params: # Remove this key to make AllenNLP happy train_params["vocabulary"].pop("non_padded_namespaces", None) predict_params = train_params.duplicate() params = train_params.duplicate()
import json from allennlp.common import FromParams, Params class BaseGaussian(FromParams): def __init__(self, mean: float, variance: float): self.mean = mean self.variance = variance class MyGaussian(BaseGaussian): def __init__(self, name: str, **kwargs): super().__init__(**kwargs) self.name = name param_str = """{"mean": 0.0, "variance": 1.0, "name": "My Gaussian"}""" params = Params(json.loads(param_str)) gaussian = MyGaussian.from_params(params) print(f"Mean: {gaussian.mean}") print(f"Variance: {gaussian.variance}") print(f"Name: {gaussian.name}")
def test_can_build_from_params(self): reader = QangarooReader.from_params(Params({})) # pylint: disable=protected-access assert reader._token_indexers[ 'tokens'].__class__.__name__ == 'SingleIdTokenIndexer'
def train(train_dataset, val_dataset, cfg): # Vocabularyを生成 VOCAB_SIZE = cfg.w2v.vocab_size vocab = Vocabulary.from_instances(train_dataset + val_dataset, max_vocab_size=VOCAB_SIZE) BATCH_SIZE = cfg.training.batch_size # パディング済みミニバッチを生成してくれるIterator iterator = BucketIterator(batch_size=BATCH_SIZE, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # 東北大が提供している学習済み日本語 Wikipedia エンティティベクトルを使用する # http://www.cl.ecei.tohoku.ac.jp/~m-suzuki/jawiki_vector/ model_name = cfg.w2v.model_name norm = cfg.w2v.norm cwd = hydra.utils.get_original_cwd() params = Params({ 'embedding_dim': 200, 'padding_index': 0, 'pretrained_file': os.path.join(cwd, f'embs/jawiki.{model_name}_vectors.200d.txt'), 'norm_type': norm }) token_embedding = Embedding.from_params(vocab=vocab, params=params) HIDDEN_SIZE = cfg.model.hidden_size dropout = cfg.model.dropout word_embeddings: TextFieldEmbedder = BasicTextFieldEmbedder( {"tokens": token_embedding}) encoder: Seq2SeqEncoder = PytorchSeq2SeqWrapper( nn.LSTM(word_embeddings.get_output_dim(), HIDDEN_SIZE, bidirectional=True, batch_first=True)) model = ClassifierWithAttn(word_embeddings, encoder, vocab, dropout) model.train() USE_GPU = True if USE_GPU and torch.cuda.is_available(): model = model.cuda(0) LR = cfg.training.learning_rate EPOCHS = cfg.training.epoch patience = cfg.training.patience if cfg.training.patience > 0 else None optimizer = optim.Adam(model.parameters(), lr=LR) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=val_dataset, patience=patience, cuda_device=0 if USE_GPU else -1, num_epochs=EPOCHS) metrics = trainer.train() logger.info(metrics) return model, metrics
def test_extras_for_custom_classes(self): from allennlp.common.registrable import Registrable class BaseClass(Registrable): pass class BaseClass2(Registrable): pass @BaseClass.register("A") class A(BaseClass): def __init__(self, a: int, b: int, val: str) -> None: self.a = a self.b = b self.val = val def __hash__(self): return self.b def __eq__(self, other): return self.b == other.b @classmethod def from_params(cls, params: Params, a: int, **extras) -> "A": # type: ignore # A custom from params b = params.pop_int("b") val = params.pop("val", "C") params.assert_empty(cls.__name__) return cls(a=a, b=b, val=val) @BaseClass2.register("B") class B(BaseClass2): def __init__(self, c: int, b: int) -> None: self.c = c self.b = b @classmethod def from_params(cls, params: Params, c: int, **extras) -> "B": # type: ignore b = params.pop_int("b") params.assert_empty(cls.__name__) return cls(c=c, b=b) @BaseClass.register("E") class E(BaseClass): def __init__(self, m: int, n: int) -> None: self.m = m self.n = n @classmethod def from_params(cls, params: Params, **extras2) -> "E": # type: ignore m = params.pop_int("m") params.assert_empty(cls.__name__) n = extras2["n"] return cls(m=m, n=n) class C: pass @BaseClass.register("D") class D(BaseClass): def __init__( self, arg1: List[BaseClass], arg2: Tuple[BaseClass, BaseClass2], arg3: Dict[str, BaseClass], arg4: Set[BaseClass], arg5: List[BaseClass], ) -> None: self.arg1 = arg1 self.arg2 = arg2 self.arg3 = arg3 self.arg4 = arg4 self.arg5 = arg5 vals = [1, 2, 3] params = Params({ "type": "D", "arg1": [ { "type": "A", "b": vals[0] }, { "type": "A", "b": vals[1] }, { "type": "A", "b": vals[2] }, ], "arg2": [{ "type": "A", "b": vals[0] }, { "type": "B", "b": vals[0] }], "arg3": { "class_1": { "type": "A", "b": vals[0] }, "class_2": { "type": "A", "b": vals[1] }, }, "arg4": [ { "type": "A", "b": vals[0], "val": "M" }, { "type": "A", "b": vals[1], "val": "N" }, { "type": "A", "b": vals[1], "val": "N" }, ], "arg5": [{ "type": "E", "m": 9 }], }) extra = C() tval1 = 5 tval2 = 6 d = BaseClass.from_params(params=params, extra=extra, a=tval1, c=tval2, n=10) # Tests for List # Parameters assert len(d.arg1) == len(vals) assert isinstance(d.arg1, list) assert isinstance(d.arg1[0], A) assert all(x.b == y for x, y in zip(d.arg1, vals)) assert all(x.a == tval1 for x in d.arg1) # Tests for Tuple assert isinstance(d.arg2, tuple) assert isinstance(d.arg2[0], A) assert isinstance(d.arg2[1], B) assert d.arg2[0].a == tval1 assert d.arg2[1].c == tval2 assert d.arg2[0].b == d.arg2[1].b == vals[0] # Tests for Dict assert isinstance(d.arg3, dict) assert isinstance(d.arg3["class_1"], A) assert d.arg3["class_1"].a == d.arg3["class_2"].a == tval1 assert d.arg3["class_1"].b == vals[0] assert d.arg3["class_2"].b == vals[1] # Tests for Set assert isinstance(d.arg4, set) assert len(d.arg4) == 2 assert any(x.val == "M" for x in d.arg4) assert any(x.val == "N" for x in d.arg4) # Tests for custom extras parameters assert isinstance(d.arg5, list) assert isinstance(d.arg5[0], E) assert d.arg5[0].m == 9 assert d.arg5[0].n == 10
dropout=dropout) if __name__ == "__main__": from allennlp.common import Params torch.manual_seed(999) batch = 16 input_dim = 200 hidden1 = 100 hidden2 = 80 test_input_1 = torch.autograd.Variable(torch.randn(batch, input_dim)) test_input_2 = torch.autograd.Variable(torch.randn(batch, input_dim)) ff_pair = FeedForwardPair.from_params( Params({ "input_dim": input_dim, "num_layers": 2, "hidden_dims": [hidden1, hidden2], "activations": ["tanh", "linear"], "dropout": [0.0, 0.0] })) r1, r2 = ff_pair(test_input_1, test_input_2) assert r1.size() == r2.size() == torch.Size([batch, hidden2]) test_input_2 = test_input_1.clone() r3, r4 = ff_pair(test_input_1, test_input_2) assert (r3 == r4).all()
def test_no_constructor(self): params = Params({"type": "just_spaces"}) Tokenizer.from_params(params)
def params(params_dict): return Params(params_dict)
def test_from_params(self): my_class = MyClass.from_params(Params({"my_int": 10}), my_bool=True) assert isinstance(my_class, MyClass) assert my_class.my_int == 10 assert my_class.my_bool
def __call__(self, **kwargs) -> PytorchSeq2VecWrapper: return self.from_params(Params(kwargs))
def test_from_params_works_correctly(self): tokenizer = NgramTokenizer.from_params(Params({'max_ngram_degree': 2})) assert tokenizer._max_ngram_degree == 2
def test_can_construct_from_params(self): assert CosineSimilarity.from_params(Params( {})).__class__.__name__ == 'CosineSimilarity'
def test_can_build_from_params(self): reader = SquadReader.from_params(Params({})) # pylint: disable=protected-access assert reader._tokenizer.__class__.__name__ == 'WordTokenizer' assert reader._token_indexers["tokens"].__class__.__name__ == 'SingleIdTokenIndexer'