def from_params(cls, params: Params) -> "Elmo": # Add files to archive params.add_file_to_archive("options_file") params.add_file_to_archive("weight_file") options_file = params.pop("options_file") weight_file = params.pop("weight_file") requires_grad = params.pop("requires_grad", False) num_output_representations = params.pop("num_output_representations") do_layer_norm = params.pop_bool("do_layer_norm", False) keep_sentence_boundaries = params.pop_bool("keep_sentence_boundaries", False) dropout = params.pop_float("dropout", 0.5) scalar_mix_parameters = params.pop("scalar_mix_parameters", None) params.assert_empty(cls.__name__) return cls( options_file=options_file, weight_file=weight_file, num_output_representations=num_output_representations, requires_grad=requires_grad, do_layer_norm=do_layer_norm, keep_sentence_boundaries=keep_sentence_boundaries, dropout=dropout, scalar_mix_parameters=scalar_mix_parameters, )
def from_params(cls, params: Params) -> 'WordSplitter': language = params.pop('language', 'en_core_web_sm') pos_tags = params.pop_bool('pos_tags', False) parse = params.pop_bool('parse', False) ner = params.pop_bool('ner', False) params.assert_empty(cls.__name__) return cls(language, pos_tags, parse, ner)
def from_params(cls, params: Params) -> 'WordSplitter': cut_all = params.pop_bool('cut_all', False) hmm = params.pop_bool('hmm', False) cut_for_search = params.pop_bool('cut_for_search', False) pos_tags = params.pop_bool('pos_tags', False) params.assert_empty(cls.__name__) return cls(cut_all, hmm, cut_for_search, pos_tags)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SelectiveRegressor': token_representation_dim = params.pop_int("token_representation_dim") encoder = params.pop("encoder", None) if encoder is not None: encoder = Seq2SeqEncoder.from_params(encoder) decoder = params.pop("decoder", None) if decoder is not None and not isinstance(decoder, str): decoder = FeedForward.from_params(decoder) contextualizer = params.pop('contextualizer', None) if contextualizer: contextualizer = Contextualizer.from_params(contextualizer) pretrained_file = params.pop("pretrained_file", None) transfer_contextualizer_from_pretrained_file = params.pop_bool( "transfer_contextualizer_from_pretrained_file", False) transfer_encoder_from_pretrained_file = params.pop_bool( "transfer_encoder_from_pretrained_file", False) freeze_encoder = params.pop_bool("freeze_encoder", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, token_representation_dim=token_representation_dim, encoder=encoder, decoder=decoder, contextualizer=contextualizer, pretrained_file=pretrained_file, transfer_contextualizer_from_pretrained_file=transfer_contextualizer_from_pretrained_file, transfer_encoder_from_pretrained_file=transfer_encoder_from_pretrained_file, freeze_encoder=freeze_encoder, initializer=initializer, regularizer=regularizer)
def from_params(cls, params: Params) -> 'Elmo': # Add files to archive params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) char_map_file = params.pop('char_map_file', None) num_output_representations = params.pop('num_output_representations') do_layer_norm = params.pop_bool('do_layer_norm', False) keep_sentence_boundaries = params.pop_bool('keep_sentence_boundaries', False) dropout = params.pop_float('dropout', 0.5) scalar_mix_parameters = params.pop('scalar_mix_parameters', None) params.assert_empty(cls.__name__) return cls(options_file=options_file, weight_file=weight_file, num_output_representations=num_output_representations, char_map_file=char_map_file, requires_grad=requires_grad, do_layer_norm=do_layer_norm, keep_sentence_boundaries=keep_sentence_boundaries, dropout=dropout, scalar_mix_parameters=scalar_mix_parameters)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Embedding': num_embeddings = params.pop_int('num_embeddings', None) vocab_namespace = params.pop("vocab_namespace", "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_file = params.pop("pretrained_file", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) dropout = params.pop_float('dropout', None) params.assert_empty(cls.__name__) weight = _read_pretrained_embeddings_file( pretrained_file, embedding_dim, vocab, vocab_namespace) if pretrained_file else None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse, dropout=dropout)
def from_params(self, params: Params) -> PytorchSeq2SeqWrapper: if not params.pop_bool('batch_first', True): raise ConfigurationError("Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params['batch_first'] = True stateful = params.pop_bool('stateful', False) module = self._module_class(**params.as_dict()) return PytorchSeq2SeqWrapper(module, stateful=stateful)
def from_params(self, params: Params) -> PytorchSeq2SeqWrapper: if not params.pop_bool("batch_first", True): raise ConfigurationError("Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params["batch_first"] = True stateful = params.pop_bool("stateful", False) module = self._module_class(**params.as_dict(infer_type_and_cast=True)) return PytorchSeq2SeqWrapper(module, stateful=stateful)
def from_params(cls, params: Params) -> 'AclarcDatasetReader': lazy = params.pop('lazy', False) tokenizer = Tokenizer.from_params(params.pop('tokenizer', {})) use_lexicon_features = params.pop_bool("use_lexicon_features", False) use_sparse_lexicon_features = params.pop_bool( "use_sparse_lexicon_features", False) with_elmo = params.pop_bool("with_elmo", False) params.assert_empty(cls.__name__) return cls(lazy=lazy, tokenizer=tokenizer, use_lexicon_features=use_lexicon_features, use_sparse_lexicon_features=use_sparse_lexicon_features, with_elmo=with_elmo)
def from_params(cls, params: Params) -> "Task": task_name = params.pop("task_name", "pos") validation_metric_name = params.pop("validation_metric_name", "accuracy") validation_metric_decreases = params.pop_bool("validation_metric_decreases", False) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty(cls.__name__) return cls( name=task_name, validation_metric_name=validation_metric_name, validation_metric_decreases=validation_metric_decreases, evaluate_on_test=evaluate_on_test, )
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Tagger': token_representation_dim = params.pop_int("token_representation_dim") encoder = params.pop("encoder", None) if encoder is not None: encoder = Seq2SeqEncoder.from_params(encoder) decoder = params.pop("decoder", None) if decoder is not None and not isinstance(decoder, str): decoder = FeedForward.from_params(decoder) use_crf = params.pop_bool("use_crf", False) constrain_crf_decoding = params.pop_bool("constrain_crf_decoding", False) include_start_end_transitions = params.pop_bool("include_start_end_transitions", True) contextualizer = params.pop('contextualizer', None) if contextualizer: contextualizer = Contextualizer.from_params(contextualizer) calculate_per_label_f1 = params.pop_bool("calculate_per_label_f1", False) calculate_span_f1 = params.pop_bool("calculate_span_f1", False) calculate_perplexity = params.pop_bool("calculate_perplexity", False) loss_average = params.pop("loss_average", "batch") label_encoding = params.pop_choice("label_encoding", [None, "BIO", "BIOUL", "IOB1"], default_to_first_choice=True) pretrained_file = params.pop("pretrained_file", None) transfer_contextualizer_from_pretrained_file = params.pop_bool( "transfer_contextualizer_from_pretrained_file", False) transfer_encoder_from_pretrained_file = params.pop_bool( "transfer_encoder_from_pretrained_file", False) freeze_encoder = params.pop_bool("freeze_encoder", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, token_representation_dim=token_representation_dim, encoder=encoder, decoder=decoder, use_crf=use_crf, constrain_crf_decoding=constrain_crf_decoding, include_start_end_transitions=include_start_end_transitions, label_encoding=label_encoding, contextualizer=contextualizer, calculate_per_label_f1=calculate_per_label_f1, calculate_span_f1=calculate_span_f1, calculate_perplexity=calculate_perplexity, loss_average=loss_average, pretrained_file=pretrained_file, transfer_contextualizer_from_pretrained_file=transfer_contextualizer_from_pretrained_file, transfer_encoder_from_pretrained_file=transfer_encoder_from_pretrained_file, freeze_encoder=freeze_encoder, initializer=initializer, regularizer=regularizer)
def from_params( # type: ignore cls, vocab: Vocabulary, params: Params) -> "BiattentiveClassificationNetwork": embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab=vocab, params=embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params( params.pop("pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop("elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool("use_input_elmo", False) use_integrator_output_elmo = params.pop_bool( "use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params( params.pop("initializer", [])) regularizer = RegularizerApplicator.from_params( params.pop("regularizer", [])) params.assert_empty(cls.__name__) return cls( vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer, )
def from_params(cls, params: Params) -> 'ProParaDatasetReader': token_indexers = TokenIndexer.dict_from_params( params.pop("token_indexers", {})) multiple_annotations = params.pop_bool("multiple_annotations", False) return ProParaDatasetReader(token_indexers=token_indexers, multiple_annotations=multiple_annotations)
def from_params(cls, model: Model, serialization_dir: str, iterator: DataIterator, train_data: Iterable[Instance], validation_data: Optional[Iterable[Instance]], params: Params, validation_iterator: DataIterator = None) -> 'GANTrainer': patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = params.pop_int("cuda_device", -1) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) if cuda_device >= 0: model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if lr_scheduler_params: scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) else: scheduler = None num_serialized_models_to_keep = params.pop_int( "num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) params.assert_empty(cls.__name__) return cls(model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=scheduler, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds= keep_serialized_model_every_num_seconds, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval)
def from_params(cls, vocab, params: Params) -> 'PointerGenerator': source_embedder_params = params.pop("source_embedder") source_embedder = TextFieldEmbedder.from_params( vocab, source_embedder_params) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) max_decoding_steps = params.pop("max_decoding_steps") target_namespace = params.pop("target_namespace", "tokens") # If no attention function is specified, we should not use attention, not attention with # default similarity function. attention_function_type = params.pop("attention_function", None) if attention_function_type is not None: attention_function = SimilarityFunction.from_params( attention_function_type) else: attention_function = None scheduled_sampling_ratio = params.pop_float("scheduled_sampling_ratio", 0.0) pointer_gen = params.pop_bool("pointer_gen", True) max_oovs = params.pop("max_oovs", None) params.assert_empty(cls.__name__) return cls(vocab, source_embedder=source_embedder, encoder=encoder, max_decoding_steps=max_decoding_steps, target_namespace=target_namespace, attention_function=attention_function, scheduled_sampling_ratio=scheduled_sampling_ratio, pointer_gen=pointer_gen, max_oovs=max_oovs)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BidirectionalAttentionFlow': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) num_highway_layers = params.pop_int("num_highway_layers") phrase_layer = Seq2SeqEncoder.from_params(params.pop("phrase_layer")) similarity_function = SimilarityFunction.from_params(params.pop("similarity_function")) modeling_layer = Seq2SeqEncoder.from_params(params.pop("modeling_layer")) span_end_encoder = Seq2SeqEncoder.from_params(params.pop("span_end_encoder")) dropout = params.pop_float('dropout', 0.2) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) mask_lstms = params.pop_bool('mask_lstms', True) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, num_highway_layers=num_highway_layers, phrase_layer=phrase_layer, attention_similarity_function=similarity_function, modeling_layer=modeling_layer, span_end_encoder=span_end_encoder, dropout=dropout, mask_lstms=mask_lstms, initializer=initializer, regularizer=regularizer)
def from_params(self, params: Params) -> PytorchSeq2SeqWrapper: if not params.pop_bool('batch_first', True): raise ConfigurationError("Our encoder semantics assumes batch is always first!") if self._module_class in self.PYTORCH_MODELS: params['batch_first'] = True module = self._module_class(**params.as_dict()) return PytorchSeq2SeqWrapper(module)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'ElmoTokenEmbedder': # type: ignore # pylint: disable=arguments-differ params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) do_layer_norm = params.pop_bool('do_layer_norm', False) dropout = params.pop_float("dropout", 0.5) namespace_to_cache = params.pop("namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list(vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int("projection_dim", None) scalar_mix_parameters = params.pop('scalar_mix_parameters', None) params.assert_empty(cls.__name__) return cls(options_file=options_file, weight_file=weight_file, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters)
def from_params(cls, vocab, params: Params) -> 'WikiTablesMmlSemanticParser': question_embedder = TextFieldEmbedder.from_params(vocab, params.pop("question_embedder")) action_embedding_dim = params.pop_int("action_embedding_dim") encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) entity_encoder = Seq2VecEncoder.from_params(params.pop('entity_encoder')) max_decoding_steps = params.pop_int("max_decoding_steps") mixture_feedforward_type = params.pop('mixture_feedforward', None) if mixture_feedforward_type is not None: mixture_feedforward = FeedForward.from_params(mixture_feedforward_type) else: mixture_feedforward = None decoder_beam_search = BeamSearch.from_params(params.pop("decoder_beam_search")) input_attention = Attention.from_params(params.pop("attention")) training_beam_size = params.pop_int('training_beam_size', None) use_neighbor_similarity_for_linking = params.pop_bool('use_neighbor_similarity_for_linking', False) dropout = params.pop_float('dropout', 0.0) num_linking_features = params.pop_int('num_linking_features', 10) tables_directory = params.pop('tables_directory', '/wikitables/') rule_namespace = params.pop('rule_namespace', 'rule_labels') params.assert_empty(cls.__name__) return cls(vocab, question_embedder=question_embedder, action_embedding_dim=action_embedding_dim, encoder=encoder, entity_encoder=entity_encoder, mixture_feedforward=mixture_feedforward, decoder_beam_search=decoder_beam_search, max_decoding_steps=max_decoding_steps, input_attention=input_attention, training_beam_size=training_beam_size, use_neighbor_similarity_for_linking=use_neighbor_similarity_for_linking, dropout=dropout, num_linking_features=num_linking_features, tables_directory=tables_directory, rule_namespace=rule_namespace)
def from_params( # type: ignore cls, vocab: Vocabulary, params: Params, **extras) -> "ElmoTokenEmbedder": options_file = params.pop("options_file") weight_file = params.pop("weight_file") requires_grad = params.pop("requires_grad", False) do_layer_norm = params.pop_bool("do_layer_norm", False) dropout = params.pop_float("dropout", 0.5) namespace_to_cache = params.pop("namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list( vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int("projection_dim", None) scalar_mix_parameters = params.pop("scalar_mix_parameters", None) params.assert_empty(cls.__name__) return cls( options_file=options_file, weight_file=weight_file, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters, )
def from_params(cls, params: Params) -> 'Seq2SeqDatasetReader': source_tokenizer_type = params.pop('source_tokenizer', None) source_tokenizer = None if source_tokenizer_type is None else Tokenizer.from_params( source_tokenizer_type) target_tokenizer_type = params.pop('target_tokenizer', None) target_tokenizer = None if target_tokenizer_type is None else Tokenizer.from_params( target_tokenizer_type) source_indexers_type = params.pop('source_token_indexers', None) source_add_start_token = params.pop_bool('source_add_start_token', True) if source_indexers_type is None: source_token_indexers = None else: source_token_indexers = TokenIndexer.dict_from_params( source_indexers_type) target_indexers_type = params.pop('target_token_indexers', None) if target_indexers_type is None: target_token_indexers = None else: target_token_indexers = TokenIndexer.dict_from_params( target_indexers_type) lazy = params.pop('lazy', False) params.assert_empty(cls.__name__) return Seq2SeqDatasetReader( source_tokenizer=source_tokenizer, target_tokenizer=target_tokenizer, source_token_indexers=source_token_indexers, target_token_indexers=target_token_indexers, source_add_start_token=source_add_start_token, lazy=lazy)
def extend_from_instances(self, params: Params, instances: Iterable['adi.Instance'] = ()) -> None: """ Extends an already generated vocabulary using a collection of instances. """ min_count = params.pop("min_count", None) max_vocab_size = pop_max_vocab_size(params) non_padded_namespaces = params.pop("non_padded_namespaces", DEFAULT_NON_PADDED_NAMESPACES) pretrained_files = params.pop("pretrained_files", {}) min_pretrained_embeddings = params.pop("min_pretrained_embeddings", None) only_include_pretrained_words = params.pop_bool("only_include_pretrained_words", False) tokens_to_add = params.pop("tokens_to_add", None) params.assert_empty("Vocabulary - from dataset") logger.info("Fitting token dictionary from dataset.") namespace_token_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int)) for instance in Tqdm.tqdm(instances): instance.count_vocab_items(namespace_token_counts) self._extend(counter=namespace_token_counts, min_count=min_count, max_vocab_size=max_vocab_size, non_padded_namespaces=non_padded_namespaces, pretrained_files=pretrained_files, only_include_pretrained_words=only_include_pretrained_words, tokens_to_add=tokens_to_add, min_pretrained_embeddings=min_pretrained_embeddings)
def from_params(cls, model: Model, task_list: List[Task], serialization_dir: str, params: Params) -> "GanMtlTrainer": optimizer_params = params.pop("optimizer") lr_scheduler_params = params.pop("scheduler") patience = params.pop_int("patience", 2) num_epochs = params.pop_int("num_epochs", 20) cuda_device = params.pop_int("cuda_device", -1) gradient_accumulation_steps = params.pop_int("gradient_accumulation_steps", 1) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) min_lr = params.pop_float("min_lr", 0.00001) no_tqdm = params.pop_bool("no_tqdm", False) summary_interval = params.pop("summary_interval", 30) histogram_interval = params.pop("histogram_interval", 30) sampling_method = params.pop("sampling_method", "proportional") params.assert_empty(cls.__name__) return GanMtlTrainer( model=model, task_list=task_list, optimizer_params=optimizer_params, lr_scheduler_params=lr_scheduler_params, patience=patience, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, gradient_accumulation_steps=gradient_accumulation_steps, grad_norm=grad_norm, grad_clipping=grad_clipping, min_lr=min_lr, no_tqdm=no_tqdm, summary_interval=summary_interval, histogram_interval=histogram_interval, sampling_method=sampling_method )
def from_params( # type: ignore cls, vocab: Vocabulary, params: Params) -> "ElmoTokenEmbedderMultiLang": options_files = params.pop("options_files") weight_files = params.pop("weight_files") for lang in options_files.keys(): options_files.add_file_to_archive(lang) for lang in weight_files.keys(): weight_files.add_file_to_archive(lang) requires_grad = params.pop("requires_grad", False) do_layer_norm = params.pop_bool("do_layer_norm", False) dropout = params.pop_float("dropout", 0.5) namespace_to_cache = params.pop("namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list( vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int("projection_dim", None) scalar_mix_parameters = params.pop("scalar_mix_parameters", None) aligning_files = params.pop("aligning_files", {}) params.assert_empty(cls.__name__) return cls( options_files=options_files, weight_files=weight_files, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters, aligning_files=aligning_files, )
def from_params(cls, params: Params): input_dim = params.pop_int('input_dim') hidden_dim = params.pop_int('hidden_dim') projection_dim = params.pop_int('projection_dim', None) feedforward_hidden_dim = params.pop_int("feedforward_hidden_dim") num_layers = params.pop_int("num_layers", 2) num_attention_heads = params.pop_int('num_attention_heads', 3) use_positional_encoding = params.pop_bool('use_positional_encoding', True) dropout_prob = params.pop_float("dropout_prob", 0.1) residual_dropout_prob = params.pop_float("residual_dropout_prob", 0.2) attention_dropout_prob = params.pop_float("attention_dropout_prob", 0.1) params.assert_empty(cls.__name__) return cls(input_dim=input_dim, hidden_dim=hidden_dim, feedforward_hidden_dim=feedforward_hidden_dim, projection_dim=projection_dim, num_layers=num_layers, num_attention_heads=num_attention_heads, use_positional_encoding=use_positional_encoding, dropout_prob=dropout_prob, residual_dropout_prob=residual_dropout_prob, attention_dropout_prob=attention_dropout_prob)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'ElmoTokenEmbedder': # type: ignore # pylint: disable=arguments-differ params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) do_layer_norm = params.pop_bool('do_layer_norm', False) dropout = params.pop_float("dropout", 0.5) namespace_to_cache = params.pop("namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list( vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int("projection_dim", None) params.assert_empty(cls.__name__) return cls(options_file=options_file, weight_file=weight_file, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SarcasmClassifier': bert_model_name = params.pop("bert_model_name") quote_response_encoder = Seq2VecEncoder.from_params( params.pop("quote_response_encoder")) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) classifier_feedforward_2 = FeedForward.from_params( params.pop("classifier_feedforward_2")) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) report_auxiliary_metrics = params.pop_bool("report_auxiliary_metrics", False) # predict_mode = params.pop_bool("predict_mode", False) # print(f"pred mode: {predict_mode}") return cls(vocab=vocab, bert_model_name=bert_model_name, quote_response_encoder=quote_response_encoder, classifier_feedforward=classifier_feedforward, classifier_feedforward_2=classifier_feedforward_2, initializer=initializer, regularizer=regularizer, report_auxiliary_metrics=report_auxiliary_metrics)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SarcasmClassifier': embedder_params1 = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(embedder_params1, vocab=vocab) quote_response_encoder = Seq2VecEncoder.from_params( params.pop("quote_response_encoder")) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) predict_mode = params.pop_bool("predict_mode", False) # print(f"pred mode: {predict_mode}") return cls(vocab=vocab, text_field_embedder=text_field_embedder, quote_response_encoder=quote_response_encoder, classifier_feedforward=classifier_feedforward, initializer=initializer, regularizer=regularizer, predict_mode=predict_mode)
def from_params( cls, vocab: Vocabulary, params: Params # type: ignore ) -> 'ElmoTokenEmbedderMultiLang': # pylint: disable=arguments-differ options_files = params.pop('options_files') weight_files = params.pop('weight_files') for lang in options_files.keys(): options_files.add_file_to_archive(lang) for lang in weight_files.keys(): weight_files.add_file_to_archive(lang) requires_grad = params.pop('requires_grad', False) do_layer_norm = params.pop_bool('do_layer_norm', False) dropout = params.pop_float("dropout", 0.5) namespace_to_cache = params.pop("namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list( vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int("projection_dim", None) scalar_mix_parameters = params.pop('scalar_mix_parameters', None) aligning_files = params.pop('aligning_files', {}) params.assert_empty(cls.__name__) return cls(options_files=options_files, weight_files=weight_files, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters, aligning_files=aligning_files)
def from_params(cls, params: Params) -> 'Attention': similarity_function = SimilarityFunction.from_params( params.pop('similarity_function', {})) normalize = params.pop_bool('normalize', True) params.assert_empty(cls.__name__) return cls(similarity_function=similarity_function, normalize=normalize)
def from_params(cls, params: Params) -> 'Embedding': # pylint: disable=arguments-differ num_entities = params.pop_int('num_entities', None) num_predicates = params.pop_int('num_predicates', None) embedding_dim = params.pop_int('embedding_dim') entity_pretrained_file = params.pop("entity_pretrained_file", None) predicate_pretrained_file = params.pop("predicate_pretrained_file", None) entity2id_file = params.pop('entity2id_file', None) predicate2id_file = params.pop('predicate2id_file', None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) cuda_device = params.pop_int('cuda_device', -1) params.assert_empty(cls.__name__) entity_weight, predicate_weight = None, None entity2id = _read_element2id_file(entity2id_file) predicate2id = _read_element2id_file(predicate2id_file) if entity_pretrained_file: entity_weight = _read_pretrained_embeddings_file( entity_pretrained_file) if predicate_pretrained_file: predicate_weight = _read_pretrained_embeddings_file( predicate_pretrained_file) return cls(num_entities=num_entities, num_predicates=num_predicates, embedding_dim=embedding_dim, projection_dim=projection_dim, entity2id=entity2id, predicate2id=predicate2id, entity_weight=entity_weight, predicate_weight=predicate_weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse, cuda_device=cuda_device)
def from_params( cls, vocab: Vocabulary, params: Params) -> 'BasicTextFieldEmbedder': # type: ignore # pylint: disable=arguments-differ,bad-super-call # The original `from_params` for this class was designed in a way that didn't agree # with the constructor. The constructor wants a 'token_embedders' parameter that is a # `Dict[str, TokenEmbedder]`, but the original `from_params` implementation expected those # key-value pairs to be top-level in the params object. # # This breaks our 'configuration wizard' and configuration checks. Hence, going forward, # the params need a 'token_embedders' key so that they line up with what the constructor wants. # For now, the old behavior is still supported, but produces a DeprecationWarning. embedder_to_indexer_map = params.pop("embedder_to_indexer_map", None) if embedder_to_indexer_map is not None: embedder_to_indexer_map = embedder_to_indexer_map.as_dict( quiet=True) allow_unmatched_keys = params.pop_bool("allow_unmatched_keys", False) use_fp16 = params.pop_bool("use_fp16", False) token_embedder_params = params.pop('token_embedders', None) if token_embedder_params is not None: # New way: explicitly specified, so use it. token_embedders = { name: TokenEmbedder.from_params(subparams, vocab=vocab) for name, subparams in token_embedder_params.items() } else: # Warn that the original behavior is deprecated warnings.warn( DeprecationWarning( "the token embedders for BasicTextFieldEmbedder should now " "be specified as a dict under the 'token_embedders' key, " "not as top-level key-value pairs")) token_embedders = {} keys = list(params.keys()) for key in keys: embedder_params = params.pop(key) token_embedders[key] = TokenEmbedder.from_params( vocab=vocab, params=embedder_params) params.assert_empty(cls.__name__) return cls(use_fp16, token_embedders, embedder_to_indexer_map, allow_unmatched_keys)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Embedding': """ We need the vocabulary here to know how many items we need to embed, and we look for a ``vocab_namespace`` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the ``num_embeddings`` key directly, and the vocabulary will be ignored. """ num_embeddings = params.pop_int('num_embeddings', None) vocab_namespace = params.pop("vocab_namespace", "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_file = params.pop("pretrained_file", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) params.assert_empty(cls.__name__) if pretrained_file: # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _read_pretrained_embedding_file(pretrained_file, embedding_dim, vocab, vocab_namespace) else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'ElmoTokenEmbedder': params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) do_layer_norm = params.pop_bool('do_layer_norm', False) dropout = params.pop_float("dropout", 0.5) params.assert_empty(cls.__name__) return cls(options_file, weight_file, do_layer_norm, dropout, requires_grad=requires_grad)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BiattentiveClassificationNetwork': # type: ignore # pylint: disable=arguments-differ embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab=vocab, params=embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params(params.pop("pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop("elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool("use_input_elmo", False) use_integrator_output_elmo = params.pop_bool("use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer)
def from_params(cls, params: Params) -> 'Elmo': # Add files to archive params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) num_output_representations = params.pop('num_output_representations') do_layer_norm = params.pop_bool('do_layer_norm', False) params.assert_empty(cls.__name__) return cls(options_file, weight_file, num_output_representations, requires_grad=requires_grad, do_layer_norm=do_layer_norm)
def from_params(cls, params: Params) -> 'Elmo': # Add files to archive params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) num_output_representations = params.pop('num_output_representations') do_layer_norm = params.pop_bool('do_layer_norm', False) keep_sentence_boundaries = params.pop_bool('keep_sentence_boundaries', False) dropout = params.pop_float('dropout', 0.5) scalar_mix_parameters = params.pop('scalar_mix_parameters', None) params.assert_empty(cls.__name__) return cls(options_file=options_file, weight_file=weight_file, num_output_representations=num_output_representations, requires_grad=requires_grad, do_layer_norm=do_layer_norm, keep_sentence_boundaries=keep_sentence_boundaries, dropout=dropout, scalar_mix_parameters=scalar_mix_parameters)
def from_params(cls, params: Params): input_dim = params.pop_int('input_dim') hidden_dim = params.pop_int('hidden_dim') projection_dim = params.pop_int('projection_dim', None) feedforward_hidden_dim = params.pop_int("feedforward_hidden_dim") num_layers = params.pop_int("num_layers", 2) num_attention_heads = params.pop_int('num_attention_heads', 3) use_positional_encoding = params.pop_bool('use_positional_encoding', True) dropout_prob = params.pop_float("dropout_prob", 0.2) params.assert_empty(cls.__name__) return cls(input_dim=input_dim, hidden_dim=hidden_dim, feedforward_hidden_dim=feedforward_hidden_dim, projection_dim=projection_dim, num_layers=num_layers, num_attention_heads=num_attention_heads, use_positional_encoding=use_positional_encoding, dropout_prob=dropout_prob)
def from_params(cls, params: Params) -> 'AdaptiveIterator': adaptive_memory_usage_constant = params.pop_int('adaptive_memory_usage_constant') padding_memory_scaling = params.pop('padding_memory_scaling') maximum_batch_size = params.pop_int('maximum_batch_size', 10000) biggest_batch_first = params.pop_bool('biggest_batch_first', False) batch_size = params.pop_int('batch_size', None) sorting_keys = params.pop('sorting_keys', None) padding_noise = params.pop_float('sorting_noise', 0.2) instances_per_epoch = params.pop_int('instances_per_epoch', None) max_instances_in_memory = params.pop_int('max_instances_in_memory', None) params.assert_empty(cls.__name__) return cls(adaptive_memory_usage_constant=adaptive_memory_usage_constant, padding_memory_scaling=padding_memory_scaling, maximum_batch_size=maximum_batch_size, biggest_batch_first=biggest_batch_first, batch_size=batch_size, sorting_keys=sorting_keys, padding_noise=padding_noise, instances_per_epoch=instances_per_epoch, max_instances_in_memory=max_instances_in_memory)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BasicTextFieldEmbedder': # type: ignore # pylint: disable=arguments-differ,bad-super-call # The original `from_params` for this class was designed in a way that didn't agree # with the constructor. The constructor wants a 'token_embedders' parameter that is a # `Dict[str, TokenEmbedder]`, but the original `from_params` implementation expected those # key-value pairs to be top-level in the params object. # # This breaks our 'configuration wizard' and configuration checks. Hence, going forward, # the params need a 'token_embedders' key so that they line up with what the constructor wants. # For now, the old behavior is still supported, but produces a DeprecationWarning. embedder_to_indexer_map = params.pop("embedder_to_indexer_map", None) if embedder_to_indexer_map is not None: embedder_to_indexer_map = embedder_to_indexer_map.as_dict(quiet=True) allow_unmatched_keys = params.pop_bool("allow_unmatched_keys", False) token_embedder_params = params.pop('token_embedders', None) if token_embedder_params is not None: # New way: explicitly specified, so use it. token_embedders = { name: TokenEmbedder.from_params(subparams, vocab=vocab) for name, subparams in token_embedder_params.items() } else: # Warn that the original behavior is deprecated warnings.warn(DeprecationWarning("the token embedders for BasicTextFieldEmbedder should now " "be specified as a dict under the 'token_embedders' key, " "not as top-level key-value pairs")) token_embedders = {} keys = list(params.keys()) for key in keys: embedder_params = params.pop(key) token_embedders[key] = TokenEmbedder.from_params(vocab=vocab, params=embedder_params) params.assert_empty(cls.__name__) return cls(token_embedders, embedder_to_indexer_map, allow_unmatched_keys)
def from_params(cls, params: Params) -> 'Seq2SeqDatasetReader': source_tokenizer_type = params.pop('source_tokenizer', None) source_tokenizer = None if source_tokenizer_type is None else Tokenizer.from_params(source_tokenizer_type) target_tokenizer_type = params.pop('target_tokenizer', None) target_tokenizer = None if target_tokenizer_type is None else Tokenizer.from_params(target_tokenizer_type) source_indexers_type = params.pop('source_token_indexers', None) source_add_start_token = params.pop_bool('source_add_start_token', True) if source_indexers_type is None: source_token_indexers = None else: source_token_indexers = TokenIndexer.dict_from_params(source_indexers_type) target_indexers_type = params.pop('target_token_indexers', None) if target_indexers_type is None: target_token_indexers = None else: target_token_indexers = TokenIndexer.dict_from_params(target_indexers_type) lazy = params.pop('lazy', False) params.assert_empty(cls.__name__) return Seq2SeqDatasetReader(source_tokenizer=source_tokenizer, target_tokenizer=target_tokenizer, source_token_indexers=source_token_indexers, target_token_indexers=target_token_indexers, source_add_start_token=source_add_start_token, lazy=lazy)
def from_params(cls, params: Params) -> 'BagOfEmbeddingsEncoder': embedding_dim = params.pop_int('embedding_dim') averaged = params.pop_bool('averaged', default=None) params.assert_empty(cls.__name__) return cls(embedding_dim=embedding_dim, averaged=averaged)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'Embedding': # type: ignore """ We need the vocabulary here to know how many items we need to embed, and we look for a ``vocab_namespace`` key in the parameter dictionary to know which vocabulary to use. If you know beforehand exactly how many embeddings you need, or aren't using a vocabulary mapping for the things getting embedded here, then you can pass in the ``num_embeddings`` key directly, and the vocabulary will be ignored. In the configuration file, a file containing pretrained embeddings can be specified using the parameter ``"pretrained_file"``. It can be the path to a local file or an URL of a (cached) remote file. Two formats are supported: * hdf5 file - containing an embedding matrix in the form of a torch.Tensor; * text file - an utf-8 encoded text file with space separated fields:: [word] [dim 1] [dim 2] ... The text file can eventually be compressed with gzip, bz2, lzma or zip. You can even select a single file inside an archive containing multiple files using the URI:: "(archive_uri)#file_path_inside_the_archive" where ``archive_uri`` can be a file system path or a URL. For example:: "(http://nlp.stanford.edu/data/glove.twitter.27B.zip)#glove.twitter.27B.200d.txt" """ # pylint: disable=arguments-differ num_embeddings = params.pop_int('num_embeddings', None) vocab_namespace = params.pop("vocab_namespace", "tokens") if num_embeddings is None: num_embeddings = vocab.get_vocab_size(vocab_namespace) embedding_dim = params.pop_int('embedding_dim') pretrained_file = params.pop("pretrained_file", None) projection_dim = params.pop_int("projection_dim", None) trainable = params.pop_bool("trainable", True) padding_index = params.pop_int('padding_index', None) max_norm = params.pop_float('max_norm', None) norm_type = params.pop_float('norm_type', 2.) scale_grad_by_freq = params.pop_bool('scale_grad_by_freq', False) sparse = params.pop_bool('sparse', False) params.assert_empty(cls.__name__) if pretrained_file: # If we're loading a saved model, we don't want to actually read a pre-trained # embedding file - the embeddings will just be in our saved weights, and we might not # have the original embedding file anymore, anyway. weight = _read_pretrained_embeddings_file(pretrained_file, embedding_dim, vocab, vocab_namespace) else: weight = None return cls(num_embeddings=num_embeddings, embedding_dim=embedding_dim, projection_dim=projection_dim, weight=weight, padding_index=padding_index, trainable=trainable, max_norm=max_norm, norm_type=norm_type, scale_grad_by_freq=scale_grad_by_freq, sparse=sparse)
def fine_tune_model(model: Model, params: Params, serialization_dir: str, file_friendly_logging: bool = False) -> Model: """ Fine tunes the given model, using a set of parameters that is largely identical to those used for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored, if it is present (as we are already given a ``Model`` here). The main difference between the logic done here and the logic done in ``train_model`` is that here we do not worry about vocabulary construction or creating the model object. Everything else is the same. Parameters ---------- archive : ``Archive`` A saved model archive that is the result of running the ``train`` command. train_data_path : ``str`` Path to the training data to use for fine-tuning. serialization_dir : ``str`` The directory in which to save results and logs. validation_data_path : ``str``, optional Path to the validation data to use while fine-tuning. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. """ prepare_environment(params) os.makedirs(serialization_dir) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) if params.pop('model', None): logger.warning("You passed parameters for the model in your configuration file, but we " "are ignoring them, using instead the model parameters in the archive.") vocabulary_params = params.pop('vocabulary', {}) if vocabulary_params.get('directory_path', None): logger.warning("You passed `directory_path` in parameters for the vocabulary in " "your configuration file, but it will be ignored. " "Vocabulary from the saved model will be extended with current data.") all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab = model.vocab vocab.extend_from_instances(vocabulary_params, (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer = Trainer.from_params(model, serialization_dir, iterator, train_data, validation_data, trainer_params) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Fine-tuning interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) if test_data and evaluate_on_test: test_metrics = evaluate(model, test_data, iterator, cuda_device=trainer._cuda_devices[0]) # pylint: disable=protected-access for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") metrics_json = json.dumps(metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model
def from_params(cls, params: Params) -> 'PosTagIndexer': namespace = params.pop('namespace', 'pos_tags') coarse_tags = params.pop_bool('coarse_tags', False) params.assert_empty(cls.__name__) return cls(namespace=namespace, coarse_tags=coarse_tags)
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover, force) prepare_global_logging(serialization_dir, file_friendly_logging) cuda_device = params.params.get('trainer').get('cuda_device', -1) if isinstance(cuda_device, list): for device in cuda_device: check_for_gpu(device) else: check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("From dataset instances, %s will be considered for vocabulary creation.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params( params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation) ) model = Model.from_params(vocab=vocab, params=params.pop('model')) # Initializing the model can have side effect of expanding the vocabulary vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params(validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_choice = trainer_params.pop_choice("type", Trainer.list_available(), default_to_first_choice=True) trainer = Trainer.by_name(trainer_choice).from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) logger.info("Loading the best epoch weights.") best_model_state_path = os.path.join(serialization_dir, 'best.th') best_model_state = torch.load(best_model_state_path) best_model = model best_model.load_state_dict(best_model_state) if test_data and evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = evaluate( best_model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0] # pylint: disable=protected-access ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) return best_model
def from_params(cls, params: Params, instances: Iterable['adi.Instance'] = None): # type: ignore """ There are two possible ways to build a vocabulary; from a collection of instances, using :func:`Vocabulary.from_instances`, or from a pre-saved vocabulary, using :func:`Vocabulary.from_files`. You can also extend pre-saved vocabulary with collection of instances using this method. This method wraps these options, allowing their specification from a ``Params`` object, generated from a JSON configuration file. Parameters ---------- params: Params, required. instances: Iterable['adi.Instance'], optional If ``params`` doesn't contain a ``directory_path`` key, the ``Vocabulary`` can be built directly from a collection of instances (i.e. a dataset). If ``extend`` key is set False, dataset instances will be ignored and final vocabulary will be one loaded from ``directory_path``. If ``extend`` key is set True, dataset instances will be used to extend the vocabulary loaded from ``directory_path`` and that will be final vocabulary used. Returns ------- A ``Vocabulary``. """ # pylint: disable=arguments-differ # Vocabulary is ``Registrable`` so that you can configure a custom subclass, # but (unlike most of our registrables) almost everyone will want to use the # base implementation. So instead of having an abstract ``VocabularyBase`` or # such, we just add the logic for instantiating a registered subclass here, # so that most users can continue doing what they were doing. vocab_type = params.pop("type", None) if vocab_type is not None: return cls.by_name(vocab_type).from_params(params=params, instances=instances) extend = params.pop("extend", False) vocabulary_directory = params.pop("directory_path", None) if not vocabulary_directory and not instances: raise ConfigurationError("You must provide either a Params object containing a " "vocab_directory key or a Dataset to build a vocabulary from.") if extend and not instances: raise ConfigurationError("'extend' is true but there are not instances passed to extend.") if extend and not vocabulary_directory: raise ConfigurationError("'extend' is true but there is not 'directory_path' to extend from.") if vocabulary_directory and instances: if extend: logger.info("Loading Vocab from files and extending it with dataset.") else: logger.info("Loading Vocab from files instead of dataset.") if vocabulary_directory: vocab = Vocabulary.from_files(vocabulary_directory) if not extend: params.assert_empty("Vocabulary - from files") return vocab if extend: vocab.extend_from_instances(params, instances=instances) return vocab min_count = params.pop("min_count", None) max_vocab_size = pop_max_vocab_size(params) non_padded_namespaces = params.pop("non_padded_namespaces", DEFAULT_NON_PADDED_NAMESPACES) pretrained_files = params.pop("pretrained_files", {}) min_pretrained_embeddings = params.pop("min_pretrained_embeddings", None) only_include_pretrained_words = params.pop_bool("only_include_pretrained_words", False) tokens_to_add = params.pop("tokens_to_add", None) params.assert_empty("Vocabulary - from dataset") return Vocabulary.from_instances(instances=instances, min_count=min_count, max_vocab_size=max_vocab_size, non_padded_namespaces=non_padded_namespaces, pretrained_files=pretrained_files, only_include_pretrained_words=only_include_pretrained_words, tokens_to_add=tokens_to_add, min_pretrained_embeddings=min_pretrained_embeddings)
def from_params(cls, params: Params) -> 'Attention': similarity_function = SimilarityFunction.from_params(params.pop('similarity_function', {})) normalize = params.pop_bool('normalize', True) params.assert_empty(cls.__name__) return cls(similarity_function=similarity_function, normalize=normalize)
def from_params(cls, params: Params) -> 'DotProductSimilarity': scale_output = params.pop_bool('scale_output', False) params.assert_empty(cls.__name__) return cls(scale_output=scale_output)
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. """ prepare_environment(params) create_serialization_dir(params, serialization_dir, recover) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) all_datasets = datasets_from_params(params) datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Creating a vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab = Vocabulary.from_params(params.pop("vocabulary", {}), (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) model = Model.from_params(vocab, params.pop('model')) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(vocab) train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") trainer = Trainer.from_params(model, serialization_dir, iterator, train_data, validation_data, trainer_params) evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info("Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) if test_data and evaluate_on_test: test_metrics = evaluate(model, test_data, iterator, cuda_device=trainer._cuda_devices[0]) # pylint: disable=protected-access for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info("To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") metrics_json = json.dumps(metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model
def from_params(cls, model_parameters: List, params: Params): # type: ignore # pylint: disable=arguments-differ if isinstance(params, str): optimizer = params params = Params({}) else: optimizer = params.pop_choice("type", Optimizer.list_available()) # make the parameter groups if need groups = params.pop("parameter_groups", None) if groups: # The input to the optimizer is list of dict. # Each dict contains a "parameter group" and groups specific options, # e.g., {'params': [list of parameters], 'lr': 1e-3, ...} # Any config option not specified in the additional options (e.g. # for the default group) is inherited from the top level config. # see: http://pytorch.org/docs/0.3.0/optim.html?#per-parameter-options # # groups contains something like: #"parameter_groups": [ # [["regex1", "regex2"], {"lr": 1e-3}], # [["regex3"], {"lr": 1e-4}] #] #(note that the allennlp config files require double quotes ", and will # fail (sometimes silently) with single quotes '). # This is typed as as Any since the dict values other then # the params key are passed to the Optimizer constructor and # can be any type it accepts. # In addition to any parameters that match group specific regex, # we also need a group for the remaining "default" group. # Those will be included in the last entry of parameter_groups. parameter_groups: Any = [{'params': []} for _ in range(len(groups) + 1)] # add the group specific kwargs for k in range(len(groups)): # pylint: disable=consider-using-enumerate parameter_groups[k].update(groups[k][1].as_dict()) regex_use_counts: Dict[str, int] = {} parameter_group_names: List[set] = [set() for _ in range(len(groups) + 1)] for name, param in model_parameters: # Determine the group for this parameter. group_index = None for k, group_regexes in enumerate(groups): for regex in group_regexes[0]: if regex not in regex_use_counts: regex_use_counts[regex] = 0 if re.search(regex, name): if group_index is not None and group_index != k: raise ValueError("{} was specified in two separate parameter groups".format(name)) group_index = k regex_use_counts[regex] += 1 if group_index is not None: parameter_groups[group_index]['params'].append(param) parameter_group_names[group_index].add(name) else: # the default group parameter_groups[-1]['params'].append(param) parameter_group_names[-1].add(name) # log the parameter groups logger.info("Done constructing parameter groups.") for k in range(len(groups) + 1): group_options = {key: val for key, val in parameter_groups[k].items() if key != 'params'} logger.info("Group %s: %s, %s", k, list(parameter_group_names[k]), group_options) # check for unused regex for regex, count in regex_use_counts.items(): if count == 0: logger.warning("When constructing parameter groups, " " %s not match any parameter name", regex) else: parameter_groups = [param for name, param in model_parameters] # Log the number of parameters to optimize num_parameters = 0 for parameter_group in parameter_groups: if isinstance(parameter_group, dict): num_parameters += sum(parameter.numel() for parameter in parameter_group["params"]) else: num_parameters += parameter_group.numel() logger.info("Number of trainable parameters: %s", num_parameters) # By default we cast things that e.g. look like floats to floats before handing them # to the Optimizer constructor, but if you want to disable that behavior you could add a # "infer_type_and_cast": false # key to your "trainer.optimizer" config. infer_type_and_cast = params.pop_bool("infer_type_and_cast", True) params_as_dict = params.as_dict(infer_type_and_cast=infer_type_and_cast) return Optimizer.by_name(optimizer)(parameter_groups, **params_as_dict) # type: ignore
def from_params(cls, params: Params) -> 'SingleIdTokenIndexer': namespace = params.pop('namespace', 'tokens') lowercase_tokens = params.pop_bool('lowercase_tokens', False) params.assert_empty(cls.__name__) return cls(namespace=namespace, lowercase_tokens=lowercase_tokens)