def test_pass_through_encoder_passes_through(self): encoder = PassThroughEncoder(input_dim=9) tensor = torch.randn([2, 3, 9]) output = encoder(tensor) numpy.testing.assert_array_almost_equal( tensor.detach().cpu().numpy(), output.detach().cpu().numpy() )
def init_model(self) -> Model: """build the model Args: vocab (Vocabulary): the vocabulary of corpus Returns: Model: the final models """ bert_text_field_embedder = PretrainedTransformerEmbedder(model_name=self.config.model_name) bert_text_field_embedder tagger = SimpleTagger( vocab=self.vocab, text_field_embedder=BasicTextFieldEmbedder( token_embedders={ 'tokens': bert_text_field_embedder } ), encoder=PassThroughEncoder(bert_text_field_embedder.get_output_dim()), verbose_metrics=True, calculate_span_f1=True, label_encoding="BMES", ) tagger.to(device=self.config.device) return tagger
def test_saturated_dropout_trivial_mask(self): encoder = PassThroughEncoder(input_dim=1) pruner = PercentSaturatedDropout(encoder, percent=0.25) mask = torch.ones(1, 1) dropped = pruner(INPUTS, mask) exp_dropped = torch.tensor([[[0.0, 0.0, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]]]) torch.testing.assert_allclose(dropped, exp_dropped)
def get_encoder(input_dim, output_dim, encoder_type, args): if encoder_type == "pass": return PassThroughEncoder(input_dim) if encoder_type == "bilstm": return PytorchSeq2SeqWrapper( AllenNLPSequential(torch.nn.ModuleList( [get_encoder(input_dim, output_dim, "bilstm-unwrapped", args)]), input_dim, output_dim, bidirectional=True, residual_connection=args.residual_connection, dropout=args.dropout)) if encoder_type == "bilstm-unwrapped": return torch.nn.LSTM( input_dim, output_dim, batch_first=True, bidirectional=True, dropout=args.dropout, ) if encoder_type == "self_attention": return IntraSentenceAttentionEncoder(input_dim=input_dim, projection_dim=output_dim) if encoder_type == "stacked_self_attention": return StackedSelfAttentionEncoder( input_dim=input_dim, hidden_dim=output_dim, projection_dim=output_dim, feedforward_hidden_dim=output_dim, num_attention_heads=5, num_layers=3, dropout_prob=args.dropout, ) raise RuntimeError(f"Unknown encoder type={encoder_type}")
def __init__( self, embedder: TokenEmbedder, encoder: Seq2SeqEncoder = None, feature_type: str = "entity_start", ): super().__init__() self.embedder = embedder self.encoder = encoder or PassThroughEncoder(input_dim=self.embedder.get_output_dim()) self.feature_type = feature_type
def test_pass_through_encoder_with_mask(self): encoder = PassThroughEncoder(input_dim=9) tensor = torch.randn([2, 3, 9]) mask = torch.BoolTensor([[True, True, True], [True, False, False]]) output = encoder(tensor, mask) target = tensor * mask.unsqueeze(dim=-1).float() numpy.testing.assert_array_almost_equal(output.detach().cpu().numpy(), target.detach().cpu().numpy())
def __init__(self, my_device=torch.device('cuda:2'), model_name='roberta.hdf5', model_path=current_directory_path + '/external_pretrained_models/'): self.answ = "UNKNOWN ERROR" self.model_name = model_name self.model_path = model_path self.first_object = '' self.second_object = '' self.predicates = '' self.aspects = '' cuda_device = my_device self.spans = [ ] # we can't use set because span object is dict and dict is unchashable. We add function add_span to keep non-repeatability try: print(self.model_path + self.model_name) print(model_path + "vocab_dir") vocab = Vocabulary.from_files(model_path + "vocab_dir") BERT_MODEL = 'google/electra-base-discriminator' embedder = PretrainedTransformerMismatchedEmbedder( model_name=BERT_MODEL) text_field_embedder = BasicTextFieldEmbedder({'tokens': embedder}) seq2seq_encoder = PassThroughEncoder( input_dim=embedder.get_output_dim()) print("encoder loaded") self.indexer = PretrainedTransformerMismatchedIndexer( model_name=BERT_MODEL) print("indexer loaded") self.model = SimpleTagger( text_field_embedder=text_field_embedder, vocab=vocab, encoder=seq2seq_encoder, calculate_span_f1=True, label_encoding='IOB1').cuda(device=cuda_device) self.model.load_state_dict( torch.load(self.model_path + self.model_name)) print("model loaded") self.reader = Conll2003DatasetReader( token_indexers={'tokens': self.indexer}) print("reader loaded") except: e = sys.exc_info()[0] print("exeption while mapping to gpu in extractor ", e) raise RuntimeError( "Init extractor: can't map to gpu. Maybe it is OOM") try: self.predictor = SentenceTaggerPredictor(self.model, self.reader) except: e = sys.exc_info()[0] print("exeption in creating predictor ", e) raise RuntimeError( "Init extractor: can't map to gpu. Maybe it is WTF")
def _build_model(config, vocab, lemmatize_helper, morpho_vectorizer, bert_max_length=None): embedder = _load_embedder(config, vocab, bert_max_length) input_dim = embedder.get_output_dim() if config.embedder.use_pymorphy: input_dim += morpho_vectorizer.morpho_vector_dim pos_tag_embedding = None if config.task.task_type == 'single' and config.task.params['use_pos_tag']: pos_tag_embedding = Embedding( num_embeddings=vocab.get_vocab_size('grammar_value_tags'), embedding_dim=config.task.params['pos_embedding_dim']) input_dim += config.task.params['pos_embedding_dim'] encoder = None if config.encoder.encoder_type != 'lstm': encoder = PassThroughEncoder(input_dim=input_dim) elif config.encoder.use_weight_drop: encoder = LstmWeightDropSeq2SeqEncoder( input_dim, config.encoder.hidden_dim, num_layers=config.encoder.num_layers, bidirectional=True, dropout=config.encoder.dropout, variational_dropout=config.encoder.variational_dropout) else: encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_dim, config.encoder.hidden_dim, num_layers=config.encoder.num_layers, dropout=config.encoder.dropout, bidirectional=True, batch_first=True)) return DependencyParser( vocab=vocab, text_field_embedder=embedder, encoder=encoder, lemmatize_helper=lemmatize_helper, task_config=config.task, pos_tag_embedding=pos_tag_embedding, morpho_vector_dim=morpho_vectorizer.morpho_vector_dim if config.embedder.use_pymorphy else 0, tag_representation_dim=config.parser.tag_representation_dim, arc_representation_dim=config.parser.arc_representation_dim, dropout=config.parser.dropout, input_dropout=config.embedder.dropout, gram_val_representation_dim=config.parser.gram_val_representation_dim, lemma_representation_dim=config.parser.lemma_representation_dim)
def __init__( self, backbone: ModelBackbone, labels: List[str], tokens_pooler: Optional[Seq2VecEncoderConfiguration] = None, sentences_encoder: Optional[Seq2SeqEncoderConfiguration] = None, sentences_pooler: Seq2VecEncoderConfiguration = None, feedforward: Optional[FeedForwardConfiguration] = None, multilabel: bool = False, ) -> None: super(DocumentClassification, self).__init__( backbone, labels=labels, multilabel=multilabel ) self.backbone.encoder = TimeDistributedEncoder(backbone.encoder) # layers self.tokens_pooler = TimeDistributedEncoder( BagOfEmbeddingsEncoder(embedding_dim=self.backbone.encoder.get_output_dim()) if not tokens_pooler else tokens_pooler.input_dim( self.backbone.encoder.get_output_dim() ).compile() ) self.sentences_encoder = ( PassThroughEncoder(self.tokens_pooler.get_output_dim()) if not sentences_encoder else sentences_encoder.input_dim( self.tokens_pooler.get_output_dim() ).compile() ) self.sentences_pooler = ( BagOfEmbeddingsEncoder(self.sentences_encoder.get_output_dim()) if not sentences_pooler else sentences_pooler.input_dim( self.sentences_encoder.get_output_dim() ).compile() ) self.feedforward = ( None if not feedforward else feedforward.input_dim(self.sentences_pooler.get_output_dim()).compile() ) self._classification_layer = torch.nn.Linear( (self.feedforward or self.sentences_pooler).get_output_dim(), self.num_labels, )
def __init__( self, vocab: Vocabulary, featurizer: InputFeaturizer, embedder: TextFieldEmbedder, encoder: Optional[Encoder] = None, ): super(ModelBackbone, self).__init__() self.vocab = vocab self.featurizer = featurizer self.embedder = embedder self.encoder = (encoder.input_dim( self.embedder.get_output_dim()).compile() if encoder else PassThroughEncoder(self.embedder.get_output_dim()))
def __init__(self, pooler: Seq2VecEncoder, knowledge_encoder: Seq2SeqEncoder = None): super().__init__() self.pooler = pooler pass_thru = PassThroughEncoder(pooler.get_input_dim()) self.knowledge_encoder = TimeDistributed( knowledge_encoder or pass_thru) # TimeDistributed(context_encoder) self.knowledge_attn = DotProductMatrixAttention( ) # CosineMatrixAttention() # self.attn = DotProductMatrixAttention() self.input_dim = pooler.get_input_dim() self.output_dim = pooler.get_output_dim()
def __init__( self, backbone: ModelBackbone, labels: List[str], token_pooler: Optional[Seq2VecEncoderConfiguration] = None, sentence_encoder: Optional[Seq2SeqEncoderConfiguration] = None, sentence_pooler: Seq2VecEncoderConfiguration = None, feedforward: Optional[FeedForwardConfiguration] = None, dropout: float = 0.0, multilabel: bool = False, label_weights: Optional[Union[List[float], Dict[str, float]]] = None, ) -> None: super().__init__( backbone, labels=labels, multilabel=multilabel, label_weights=label_weights, ) self._empty_prediction = DocumentClassificationPrediction( labels=[], probabilities=[]) self.backbone.encoder = TimeDistributedEncoder(backbone.encoder) # layers self.token_pooler = TimeDistributedEncoder( BagOfEmbeddingsEncoder( embedding_dim=self.backbone.encoder.get_output_dim( )) if not token_pooler else token_pooler. input_dim(self.backbone.encoder.get_output_dim()).compile()) self.sentence_encoder = ( PassThroughEncoder(self.token_pooler.get_output_dim()) if not sentence_encoder else sentence_encoder.input_dim( self.token_pooler.get_output_dim()).compile()) self.sentence_pooler = ( BagOfEmbeddingsEncoder(self.sentence_encoder.get_output_dim()) if not sentence_pooler else sentence_pooler.input_dim( self.sentence_encoder.get_output_dim()).compile()) self.feedforward = (None if not feedforward else feedforward.input_dim( self.sentence_pooler.get_output_dim()).compile()) self.dropout = torch.nn.Dropout(dropout) self._classification_layer = torch.nn.Linear( (self.feedforward or self.sentence_pooler).get_output_dim(), self.num_labels, )
def _build_model(config, vocab, lemmatize_helper, morpho_vectorizer, bert_max_length=None): embedder = _load_embedder(config, bert_max_length) input_dim = embedder.get_output_dim() if config.embedder.use_pymorphy: input_dim += morpho_vectorizer.morpho_vector_dim encoder = None if config.encoder.encoder_type != 'lstm': encoder = PassThroughEncoder(input_dim=input_dim) elif config.encoder.use_weight_drop: encoder = LstmWeightDropSeq2SeqEncoder( input_dim, config.encoder.hidden_dim, num_layers=config.encoder.num_layers, bidirectional=True, dropout=config.encoder.dropout, variational_dropout=config.encoder.variational_dropout) else: encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_dim, config.encoder.hidden_dim, num_layers=config.encoder.num_layers, dropout=config.encoder.dropout, bidirectional=True, batch_first=True)) return DependencyParser( vocab=vocab, text_field_embedder=embedder, encoder=encoder, lemmatize_helper=lemmatize_helper, morpho_vector_dim=morpho_vectorizer.morpho_vector_dim if config.embedder.use_pymorphy else 0, tag_representation_dim=config.parser.tag_representation_dim, arc_representation_dim=config.parser.arc_representation_dim, dropout=config.parser.dropout, input_dropout=config.embedder.dropout, gram_val_representation_dim=config.parser.gram_val_representation_dim, lemma_representation_dim=config.parser.lemma_representation_dim)
def __init__(self, pooler: Seq2VecEncoder, context_encoder: Seq2SeqEncoder = None, kb_path: str = None, kb_shape: Tuple[int, int] = None, trainable_kb: bool = False, projection_dim: int = None): super().__init__() kb = (torch.load(kb_path) if kb_path else torch.ones(kb_shape)).float() self.knowledge = nn.Parameter(kb, requires_grad=trainable_kb).float() self.projection_dim = projection_dim if projection_dim: self.kb_proj = nn.Linear(self.knowledge.size(0), self.projection_dim) self.context_encoder = context_encoder or PassThroughEncoder( pooler.get_input_dim()) self.pooler = pooler self.output_dim = pooler.get_output_dim()
def test_sequence_tagging_reader(): model_name = 'bert-base-chinese' bert_token_indexers = PretrainedTransformerIndexer(model_name=model_name) reader = SequenceTaggingDatasetReader( token_indexers={"tokens": bert_token_indexers}) train_file = './data/weibo/train.corpus' dev_file = './data/weibo/dev.corpus' test_file = './data/weibo/dev.corpus' train_instances = list(reader.read(train_file)) dev_instances = list(reader.read(dev_file)) test_instances = list(reader.read(test_file)) vocab: Vocabulary = Vocabulary.from_instances(train_instances) assert vocab.get_namespaces() is not None bert_text_field_embedder = PretrainedTransformerEmbedder( model_name=model_name) tagger = SimpleTagger( vocab=vocab, text_field_embedder=BasicTextFieldEmbedder( token_embedders={'tokens': bert_text_field_embedder}), encoder=PassThroughEncoder(bert_text_field_embedder.get_output_dim()), calculate_span_f1=True, label_encoding="BMES", # verbose_metrics=True ) train_data_loader, dev_data_loader = build_data_loaders( train_instances, dev_instances) train_data_loader.index_with(vocab) dev_data_loader.index_with(vocab) trainer = build_trainer(model=tagger, serialization_dir='./output', train_loader=train_data_loader, dev_loader=dev_data_loader) print("Starting training") trainer.train() print("Finished training")
def init_crf_model(self) -> Model: """init crf tagger model """ # 1. import related modules from allennlp bert_text_field_embedder = PretrainedTransformerEmbedder(model_name=self.config.model_name) bert_text_field_embedder tagger = SimpleTagger( vocab=self.vocab, text_field_embedder=BasicTextFieldEmbedder( token_embedders={ 'tokens': bert_text_field_embedder } ), encoder=PassThroughEncoder(bert_text_field_embedder.get_output_dim()), verbose_metrics=True, calculate_span_f1=True, label_encoding="BMES", ) tagger.to(device=self.config.device) return tagger
def model_ctor(): # model = BertForTokenClassificationCustom.from_pretrained(self._bert_model_type, # cache_dir=self._cache_dir, # num_labels=len(self._tag2idx)).cuda() # # seq_tagger = SequenceTaggerBert(model, self._bert_tokenizer, idx2tag=self._idx2tag, # tag2idx=self._tag2idx, pred_batch_size=self._ebs) embedder = PretrainedTransformerMismatchedEmbedder( model_name=self._bert_model_type) text_field_embedder = BasicTextFieldEmbedder({'tokens': embedder}) seq2seq_encoder = PassThroughEncoder( input_dim=embedder.get_output_dim()) tagger = SimpleTagger(text_field_embedder=text_field_embedder, vocab=self.vocab, encoder=seq2seq_encoder, calculate_span_f1=True, label_encoding='IOB1').cuda() return tagger
def test_saturated_dropout_zero(self): encoder = PassThroughEncoder(input_dim=1) pruner = PercentSaturatedDropout(encoder, percent=0.0) dropped = pruner(INPUTS) exp_dropped = torch.tensor([[[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]]]) torch.testing.assert_allclose(dropped, exp_dropped)
def __init__(self, vocab: Vocabulary, token_representation_dim: int, encoder: Optional[Seq2SeqEncoder] = None, decoder: Optional[Union[FeedForward, str]] = None, contextualizer: Optional[Contextualizer] = None, calculate_per_label_f1: bool = False, loss_average: str = "batch", pretrained_file: Optional[str] = None, transfer_contextualizer_from_pretrained_file: bool = False, transfer_encoder_from_pretrained_file: bool = False, freeze_encoder: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SelectiveTagger, self).__init__(vocab, regularizer) self._num_classes = self.vocab.get_vocab_size("labels") self._token_representation_dim = token_representation_dim self._contextualizer = contextualizer if encoder is None: encoder = PassThroughEncoder( input_dim=self._token_representation_dim) self._encoder = encoder # Load the contextualizer and encoder weights from the # pretrained_file if applicable if pretrained_file: archive = None if self._contextualizer and transfer_contextualizer_from_pretrained_file: logger.info("Attempting to load contextualizer weights from " "pretrained_file at {}".format(pretrained_file)) archive = load_archive(cached_path(pretrained_file)) contextualizer_state = archive.model._contextualizer.state_dict( ) contextualizer_layer_num = self._contextualizer._layer_num self._contextualizer.load_state_dict(contextualizer_state) if contextualizer_layer_num is not None: logger.info("Setting layer num to {}".format( contextualizer_layer_num)) self._contextualizer.set_layer_num( contextualizer_layer_num) else: self._contextualizer.reset_layer_num() logger.info("Successfully loaded contextualizer weights!") if transfer_encoder_from_pretrained_file: logger.info("Attempting to load encoder weights from " "pretrained_file at {}".format(pretrained_file)) if archive is None: archive = load_archive(cached_path(pretrained_file)) encoder_state = archive.model._encoder.state_dict() self._encoder.load_state_dict(encoder_state) logger.info("Successfully loaded encoder weights!") self._freeze_encoder = freeze_encoder for parameter in self._encoder.parameters(): # If freeze is true, requires_grad should be false and vice versa. parameter.requires_grad_(not self._freeze_encoder) if decoder is None or decoder == "linear": # Create the default decoder (logistic regression) if it is not provided. decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 1, "hidden_dims": self._num_classes, "activations": "linear" })) logger.info("No decoder provided to model, using default " "decoder: {}".format(decoder)) elif decoder == "mlp": # Create the MLP decoder decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 2, "hidden_dims": [1024, self._num_classes], "activations": ["relu", "linear"] })) logger.info("Using MLP decoder: {}".format(decoder)) self._decoder = decoder check_dimensions_match(self._token_representation_dim, self._encoder.get_input_dim(), "token representation dim", "encoder input dim") check_dimensions_match(self._encoder.get_output_dim(), self._decoder.get_input_dim(), "encoder output dim", "decoder input dim") check_dimensions_match(self._decoder.get_output_dim(), self._num_classes, "decoder output dim", "number of classes") if loss_average not in {"batch", "token"}: raise ConfigurationError( "loss_average is {}, expected one of batch " "or token".format(loss_average)) self.loss_average = loss_average self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_per_label_f1 = calculate_per_label_f1 label_metric_name = "label_{}" if self.calculate_per_label_f1 else "_label_{}" for label_name, label_index in self.vocab._token_to_index[ "labels"].items(): self.metrics[label_metric_name.format(label_name)] = F1Measure( positive_label=label_index) # Whether to run in error analysis mode or not, see commands.error_analysis self.error_analysis = False logger.info("Applying initializer...") initializer(self)
def test_get_dimension_is_correct(self): encoder = PassThroughEncoder(input_dim=9) assert encoder.get_input_dim() == 9 assert encoder.get_output_dim() == 9
def __init__(self, vocab: Vocabulary, token_representation_dim: int, encoder: Optional[Seq2SeqEncoder] = None, decoder: Optional[Union[FeedForward, str]] = None, use_crf: bool = False, constrain_crf_decoding: bool = False, include_start_end_transitions: bool = True, label_encoding: Optional[str] = None, contextualizer: Optional[Contextualizer] = None, calculate_per_label_f1: bool = False, calculate_span_f1: bool = False, calculate_perplexity: bool = False, loss_average: str = "batch", pretrained_file: Optional[str] = None, transfer_contextualizer_from_pretrained_file: bool = False, transfer_encoder_from_pretrained_file: bool = False, freeze_encoder: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Tagger, self).__init__(vocab, regularizer) self._num_classes = self.vocab.get_vocab_size("labels") self._token_representation_dim = token_representation_dim self._contextualizer = contextualizer if encoder is None: encoder = PassThroughEncoder(input_dim=token_representation_dim) self._encoder = encoder # Load the contextualizer and encoder weights from the # pretrained_file if applicable if pretrained_file: archive = None if self._contextualizer and transfer_contextualizer_from_pretrained_file: logger.info("Attempting to load contextualizer weights from " "pretrained_file at {}".format(pretrained_file)) archive = load_archive(cached_path(pretrained_file)) contextualizer_state = archive.model._contextualizer.state_dict() contextualizer_layer_num = self._contextualizer._layer_num logger.info("contextualizer_layer_num {}".format(contextualizer_layer_num)) self._contextualizer.load_state_dict(contextualizer_state) if contextualizer_layer_num is not None: logger.info("Setting layer num to {}".format( contextualizer_layer_num)) self._contextualizer.set_layer_num(contextualizer_layer_num) else: self._contextualizer.reset_layer_num() logger.info("Successfully loaded contextualizer weights!") if transfer_encoder_from_pretrained_file: logger.info("Attempting to load encoder weights from " "pretrained_file at {}".format(pretrained_file)) if archive is None: archive = load_archive(cached_path(pretrained_file)) encoder_state = archive.model._encoder.state_dict() self._encoder.load_state_dict(encoder_state) logger.info("Successfully loaded encoder weights!") self._freeze_encoder = freeze_encoder for parameter in self._encoder.parameters(): # If freeze is true, requires_grad should be false and vice versa. parameter.requires_grad_(not self._freeze_encoder) if decoder is None or decoder == "linear": # Create the default decoder (logistic regression) if it is not provided. decoder = FeedForward.from_params(Params( {"input_dim": self._encoder.get_output_dim(), "num_layers": 1, "hidden_dims": self._num_classes, "activations": "linear"})) logger.info("No decoder provided to model, using default " "decoder: {}".format(decoder)) elif decoder == "mlp": # Create the MLP decoder decoder = FeedForward.from_params(Params( {"input_dim": self._encoder.get_output_dim(), "num_layers": 2, "hidden_dims": [1024, self._num_classes], "activations": ["relu", "linear"]})) logger.info("Using MLP decoder: {}".format(decoder)) self._decoder = TimeDistributed(decoder) self._use_crf = use_crf self._constrain_crf_decoding = constrain_crf_decoding self._crf = None if use_crf: logger.info("Using CRF on top of decoder outputs") if constrain_crf_decoding: if label_encoding is None: raise ConfigurationError( "constrain_crf_decoding is True, but " "label_encoding was not provided. label_encoding " "must be provided.") logger.info("Constraining CRF decoding with label " "encoding {}".format(label_encoding)) labels = self.vocab.get_index_to_token_vocabulary("labels") constraints = allowed_transitions(label_encoding, labels) else: constraints = None self._crf = ConditionalRandomField( self._num_classes, constraints, include_start_end_transitions=include_start_end_transitions) check_dimensions_match(self._token_representation_dim, self._encoder.get_input_dim(), "dimensionality of token representation", "encoder input dim") check_dimensions_match(self._encoder.get_output_dim(), self._decoder._module.get_input_dim(), "encoder output dim", "decoder input dim") check_dimensions_match(self._decoder._module.get_output_dim(), self._num_classes, "decoder output dim", "number of classes") if loss_average not in {"batch", "token"}: raise ConfigurationError("loss_average is {}, expected one of batch " "or token".format(loss_average)) self.loss_average = loss_average self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_perplexity = calculate_perplexity if calculate_perplexity: self.metrics["perplexity"] = Perplexity() self.calculate_per_label_f1 = calculate_per_label_f1 self.calculate_span_f1 = calculate_span_f1 if label_encoding and label_encoding not in ["BIO", "BIOUL", "IOB1"]: raise ConfigurationError("If not None, label encoding must be one of BIO, BIOUL, " "or IOB1. Got {}".format(label_encoding)) self.label_encoding = label_encoding label_metric_name = "label_{}" if self.calculate_per_label_f1 else "_label_{}" for label_name, label_index in self.vocab._token_to_index["labels"].items(): self.metrics[label_metric_name.format(label_name)] = F1Measure(positive_label=label_index) if self.calculate_span_f1: if not self.label_encoding: raise ConfigurationError("label_encoding must be provided when " "calculating_span_f1 is true.") else: # Set up span-based F1 measure self.metrics["span_based_f1"] = SpanBasedF1Measure(self.vocab, tag_namespace="labels", label_encoding=self.label_encoding) # Whether to run in error analysis mode or not, see commands.error_analysis self.error_analysis = False logger.info("Applying initializer...") initializer(self)
def __init__(self, args, input_dim, hidden_dim, word_embedder): super(RelationAttendedDefinitionSentenceEncoder, self).__init__() self.config = args self.args = args self.input_dim = input_dim self.hidden_dim = hidden_dim self.projection_dim = input_dim self.feedforward_hidden_dim = input_dim self.num_layers = self.args.num_layers_for_stackatt self.num_attention_heads = self.args.num_atthead_for_stackatt self.word_embedder = word_embedder self.word_embedding_dropout = nn.Dropout( self.args.word_embedding_dropout) # from allennlp.modules.seq2seq_encoders import , , \ # , , # BidirectionalLanguageModelTransformer, FeedForwardEncoder if self.args.definition_seq2seq == 'passthrough': self.seq2seq = PassThroughEncoder(input_dim=input_dim) elif self.args.definition_seq2seq == 'multiheadstackatt': self.seq2seq = StackedSelfAttentionEncoder( input_dim=input_dim, hidden_dim=input_dim, projection_dim=input_dim, feedforward_hidden_dim=input_dim, num_layers=2, num_attention_heads=2) elif self.args.definition_seq2seq == 'qanet': self.seq2seq = QaNetEncoder(input_dim=input_dim, hidden_dim=input_dim, attention_projection_dim=input_dim, feedforward_hidden_dim=input_dim, num_blocks=2, num_convs_per_block=2, conv_kernel_size=3, num_attention_heads=2) elif self.args.definition_seq2seq == 'intrasentenceatt': self.seq2seq = IntraSentenceAttentionEncoder( input_dim=input_dim, projection_dim=input_dim, output_dim=input_dim) elif self.args.definition_seq2seq == 'gatedcnn': self.seq2seq = GatedCnnEncoder(input_dim=512, layers=[[[4, 512]], [[4, 512], [4, 512]], [[4, 512], [4, 512]], [[4, 512], [4, 512]]], dropout=0.05) elif self.args.definition_seq2seq == 'bilmtransformer': self.seq2seq = BidirectionalLanguageModelTransformer( input_dim=input_dim, hidden_dim=input_dim, num_layers=2) # elif self.args.definition_seq2seq == 'feedfoward': # feedforward = FeedForward(input_dim=input_dim, num_layers=1, hidden_dims=input_dim, activations=self.args.activation_for_sentence_ff) # self.seq2seq = FeedForwardEncoder(feedforward) # ''' # *"linear" # *`"relu" < https: // pytorch.org / docs / master / nn.html # torch.nn.ReLU>`_ # *`"relu6" < https: // pytorch.org / docs / master / nn.html # torch.nn.ReLU6>`_ # *`"elu" < https: // pytorch.org / docs / master / nn.html # torch.nn.ELU>`_ # *`"prelu" < https: // pytorch.org / docs / master / nn.html # torch.nn.PReLU>`_ # *`"leaky_relu" < https: // pytorch.org / docs / master / nn.html # torch.nn.LeakyReLU>`_ # *`"threshold" < https: // pytorch.org / docs / master / nn.html # torch.nn.Threshold>`_ # *`"hardtanh" < https: // pytorch.org / docs / master / nn.html # torch.nn.Hardtanh>`_ # *`"sigmoid" < https: // pytorch.org / docs / master / nn.html # torch.nn.Sigmoid>`_ # *`"tanh" < https: // pytorch.org / docs / master / nn.html # torch.nn.Tanh>`_ # *`"log_sigmoid" < https: // pytorch.org / docs / master / nn.html # torch.nn.LogSigmoid>`_ # *`"softplus" < https: // pytorch.org / docs / master / nn.html # torch.nn.Softplus>`_ # *`"softshrink" < https: // pytorch.org / docs / master / nn.html # torch.nn.Softshrink>`_ # *`"softsign" < https: // pytorch.org / docs / master / nn.html # torch.nn.Softsign>`_ # *`"tanhshrink" < https: // pytorch.org / docs / master / nn.html # torch.nn.Tanhshrink>`_ # ''' elif self.args.definition_seq2seq == 'multiheadselfatt': self.seq2seq = MultiHeadSelfAttention( num_heads=2, input_dim=input_dim, output_projection_dim=input_dim, attention_dim=input_dim, values_dim=input_dim) else: print('Encoder not defined:', self.args.definition_seq2seq) exit()
train_dataset = reader.read("conll2003/eng.train") validation_dataset = reader.read("conll2003/eng.testa") test_dataset = reader.read("conll2003/eng.testb") all_insts = train_dataset + validation_dataset + test_dataset vocab = Vocabulary.from_instances(all_insts) dataset = Batch(all_insts) dataset.index_instances(vocab) embedder = PretrainedTransformerMismatchedEmbedder(model_name, last_layer_only = True) token_embedder = BasicTextFieldEmbedder({"bert" : embedder}) embedding_dim = 768 encoder = PassThroughEncoder(input_dim=embedding_dim) model = SimpleTagger(vocab = vocab, text_field_embedder = token_embedder, encoder = encoder, calculate_span_f1 = True, label_encoding = "IOB1") optimizer = optim.Adam(model.parameters(), lr=3e-05) if torch.cuda.is_available(): print("Using GPU") cuda_device = 0 model = model.cuda(cuda_device) else: cuda_device = -1
def __init__( self, embedder: TokenEmbedder, encoder: Seq2SeqEncoder = None, ): super().__init__() self.embedder = embedder self.encoder = encoder or PassThroughEncoder(input_dim=self.embedder.get_output_dim())
def __init__(self, vocab: Vocabulary, token_representation_dim: int, encoder: Optional[Seq2SeqEncoder] = None, decoder: Optional[Union[FeedForward, str]] = None, contextualizer: Optional[Contextualizer] = None, pretrained_file: Optional[str] = None, transfer_contextualizer_from_pretrained_file: bool = False, transfer_encoder_from_pretrained_file: bool = False, freeze_encoder: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SelectiveRegressor, self).__init__(vocab, regularizer) self._token_representation_dim = token_representation_dim self._contextualizer = contextualizer if encoder is None: encoder = PassThroughEncoder( input_dim=self._token_representation_dim) self._encoder = encoder # Load the contextualizer and encoder weights from the # pretrained_file if applicable if pretrained_file: archive = None if self._contextualizer and transfer_contextualizer_from_pretrained_file: logger.info("Attempting to load contextualizer weights from " "pretrained_file at {}".format(pretrained_file)) archive = load_archive(cached_path(pretrained_file)) contextualizer_state = archive.model._contextualizer.state_dict( ) contextualizer_layer_num = self._contextualizer._layer_num self._contextualizer.load_state_dict(contextualizer_state) if contextualizer_layer_num is not None: logger.info("Setting layer num to {}".format( contextualizer_layer_num)) self._contextualizer.set_layer_num( contextualizer_layer_num) else: self._contextualizer.reset_layer_num() logger.info("Successfully loaded contextualizer weights!") if transfer_encoder_from_pretrained_file: logger.info("Attempting to load encoder weights from " "pretrained_file at {}".format(pretrained_file)) if archive is None: archive = load_archive(cached_path(pretrained_file)) encoder_state = archive.model._encoder.state_dict() self._encoder.load_state_dict(encoder_state) logger.info("Successfully loaded encoder weights!") self._freeze_encoder = freeze_encoder for parameter in self._encoder.parameters(): # If freeze is true, requires_grad should be false and vice versa. parameter.requires_grad_(not self._freeze_encoder) if decoder is None or decoder == "linear": # Create the default decoder (logistic regression) if it is not provided. decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 1, "hidden_dims": 1, "activations": "linear" })) logger.info("No decoder provided to model, using default " "decoder: {}".format(decoder)) elif decoder == "mlp": # Create the MLP decoder decoder = FeedForward.from_params( Params({ "input_dim": self._encoder.get_output_dim(), "num_layers": 2, "hidden_dims": [1024, 1], "activations": ["relu", "linear"] })) logger.info("Using MLP decoder: {}".format(decoder)) self._decoder = decoder check_dimensions_match(self._token_representation_dim, self._encoder.get_input_dim(), "token representation dim", "encoder input dim") check_dimensions_match(self._encoder.get_output_dim(), self._decoder.get_input_dim(), "encoder output dim", "decoder input dim") check_dimensions_match(self._decoder.get_output_dim(), 1, "decoder output dim", "1, since we're predicting a real value") # SmoothL1Loss as described in "Neural Models of Factuality" (NAACL 2018) self.loss = torch.nn.SmoothL1Loss(reduction="none") self.metrics = { "mae": MeanAbsoluteError(), "pearson_r": PearsonCorrelation() } # Whether to run in error analysis mode or not, see commands.error_analysis self.error_analysis = False logger.info("Applying initializer...") initializer(self)