def __init__(self, vocab: Vocabulary, model_text_field_embedder: TextFieldEmbedder, internal_text_encoder: Seq2VecEncoder, output_layer: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Seq2VecClassifier, self).__init__(vocab, regularizer) self.model_text_field_embedder = model_text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.internal_text_encoder = internal_text_encoder self.output_layer = output_layer if model_text_field_embedder.get_output_dim( ) != internal_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the model_text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format( model_text_field_embedder.get_output_dim(), internal_text_encoder.get_input_dim())) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, inner_encoder: Seq2VecEncoder, outer_encoder: Seq2SeqEncoder, label_namespace: str = "labels", dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(HierarchicalRNN, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.inner_encoder = inner_encoder self.outer_encoder = outer_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.label_projection_layer = TimeDistributed( Linear(outer_encoder.get_output_dim(), self.num_tags)) # self.metrics = {"accuracy": FuckingAccuracy()} self.metrics = {"accuracy": CategoricalAccuracy()} self._loss = torch.nn.CrossEntropyLoss() check_dimensions_match(text_field_embedder.get_output_dim(), inner_encoder.get_input_dim(), 'text field embedding dim', 'inner encoder input dim') check_dimensions_match(inner_encoder.get_output_dim(), outer_encoder.get_input_dim(), 'inner encoder output dim', 'outer encoder input dim') initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(PriorityCrisisClassifierWithLossWeight, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } # corresponding to low, medium, high and critical weights = torch.tensor([1.0000, 6.1094, 8.3922, 16.8462], dtype=torch.float32) self.loss = torch.nn.CrossEntropyLoss(weight=weights) initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'DeIsTe': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) inter_attention = MatrixAttention.from_params( params.pop("inter_attention")) param_dyn_encoder = Seq2VecEncoder.from_params( params.pop("param_dyn_encoder")) pos_embedder = TokenEmbedder.from_params( vocab=None, params=params.pop("pos_embedder")) pos_attn_encoder = Seq2VecEncoder.from_params( params.pop("pos_attn_encoder")) output_feedforward_params = params.pop('output_feedforward', None) output_feedforward = FeedForward.from_params( output_feedforward_params) if output_feedforward_params else None initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, inter_attention=inter_attention, param_dyn_encoder=param_dyn_encoder, pos_embedder=pos_embedder, pos_attn_encoder=pos_attn_encoder, output_feedforward=output_feedforward, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, title_encoder: Seq2VecEncoder, abstract_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(AcademicPaperClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.title_encoder = title_encoder self.abstract_encoder = abstract_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != title_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), title_encoder.get_input_dim())) if text_field_embedder.get_output_dim() != abstract_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the abstract_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) # raise ValueError(self.vocab.get_vocab_size("tokens")) # raise ValueError(text_field_embedder.get_output_dim()) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.classifier_feedforward = classifier_feedforward self.metrics = { "multilabel-f1": MultiLabelF1Measure(), 'accuracy': BooleanAccuracy() } self.pearson_r = PearsonCorrelation() self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() self._threshold = 0.5 initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, pretrained_archive=None) -> None: super(SentenceClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("tokens") self.sentence_encoder = sentence_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim( ) != sentence_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the sentence_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), sentence_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), } self.loss = torch.nn.CrossEntropyLoss() initializer(self) # if existing, load pre-trained model if pretrained_archive: archive = load_archive(pretrained_archive) self._initialize_weights_from_archive(archive)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'OntoEmmaNN': name_embedder = TextFieldEmbedder.from_params( vocab, params.pop("name_embedder")) definition_embedder = TextFieldEmbedder.from_params( vocab, params.pop("definition_embedder")) name_encoder = Seq2VecEncoder.from_params(params.pop("name_encoder")) definition_encoder = Seq2VecEncoder.from_params( params.pop("definition_encoder")) siamese_feedforward = FeedForward.from_params( params.pop("siamese_feedforward")) decision_feedforward = FeedForward.from_params( params.pop("decision_feedforward")) init_params = params.pop('initializer', None) reg_params = params.pop('regularizer', None) initializer = (InitializerApplicator.from_params(init_params) if init_params is not None else InitializerApplicator()) regularizer = RegularizerApplicator.from_params( reg_params) if reg_params is not None else None return cls(vocab=vocab, name_embedder=name_embedder, definition_embedder=definition_embedder, name_encoder=name_encoder, definition_encoder=definition_encoder, siamese_feedforward=siamese_feedforward, decision_feedforward=decision_feedforward, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(PriorityCrisisClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } # the vector [1.0000, 4.6600, 6.0852, 83.3817] is obtained from ./tests/models/priority_crisis_classifier_test.py and learned based on training set # a similar function is declared in the test script called get_weights as follows. weights = torch.tensor([1.0000, 4.0000, 8.0000, 16.0000], dtype=torch.float32) self.loss = torch.nn.CrossEntropyLoss() # self.loss = torch.nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EFClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") logger.info("------------------------------------") logger.info("num class {}".format(self.num_classes)) logger.info("------------------------------------") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != text_encoder.get_input_dim( ): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the text_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), text_encoder.get_input_dim())) self.f1 = F1Measure(positive_label=1) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, SH_field_embedder: TextFieldEmbedder, abstract_text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(EtdClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.SH_field_embedder = SH_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.label_dict = self..vocab.get_index_to_token_vocabulary('labels') self.abstract_text_encoder = abstract_text_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != abstract_text_encoder.get_input_dim(): raise ConfigurationError("The output dimension of the text_field_embedder must match the " "input dimension of the abstract_text_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_text_encoder.get_input_dim())) self.metrics = { "roc_auc_score": RocAucScore() } self.loss = torch.nn.BCEWithLogitsLoss() initializer(self)
def from_params( cls, vocab: Vocabulary, params: Params ) -> 'DialogueContextHierarchicalCoherenceClassifier': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) utterance_encoder = Seq2VecEncoder.from_params( params.pop("utterance_encoder")) context_encoder = Seq2VecEncoder.from_params( params.pop("context_encoder")) response_encoder = Seq2VecEncoder.from_params( params.pop("response_encoder")) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) initializer = InitializerApplicator.from_params( params.pop("initializer", [])) regularizer = RegularizerApplicator.from_params( params.pop("regularizer", [])) return cls(vocab=vocab, text_field_embedder=text_field_embedder, utterance_encoder=utterance_encoder, context_encoder=context_encoder, response_encoder=response_encoder, classifier_feedforward=classifier_feedforward, initializer=initializer, regularizer=regularizer)
def from_params(cls, params: Params, vocab: Vocabulary) -> 'CMVPredictor': response_embedder_params = params.pop("response_embedder") response_embedder = TextFieldEmbedder.from_params( vocab=vocab, params=response_embedder_params) response_word_attention_params = params.pop("response_word_attention") response_word_attention = Seq2VecEncoder.from_params( params=response_word_attention_params) response_encoder_params = params.pop("response_encoder") response_encoder = Seq2SeqEncoder.from_params( params=response_encoder_params) response_sentence_attention_params = params.pop( "response_sentence_attention") response_sentence_attention = Seq2VecEncoder.from_params( params=response_sentence_attention_params) op_embedder_params = params.pop("op_embedder", None) op_embedder = None if op_embedder_params is not None: op_embedder = TextFieldEmbedder.from_params( vocab=vocab, params=op_embedder_params) op_word_attention_params = params.pop("op_word_attention", None) op_word_attention = None if op_word_attention_params is not None: op_word_attention = Seq2VecEncoder.from_params( params=op_word_attention_params) op_encoder_params = params.pop("op_encoder", None) op_encoder = None if op_encoder_params is not None: op_encoder = Seq2SeqEncoder.from_params(params=op_encoder_params) output_feedforward = FeedForward.from_params( params=params.pop('output_feedforward')) feature_feedforward = None feature_feedforward_params = params.pop('feature_feedforward', None) if feature_feedforward_params is not None: feature_feedforward = FeedForward.from_params( params=feature_feedforward_params) dropout = params.pop("dropout", 0) initializer = InitializerApplicator.from_params( params=params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params=params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab, response_embedder, response_word_attention, response_encoder, response_sentence_attention, output_feedforward, op_embedder, op_word_attention, op_encoder, dropout, initializer, regularizer, feature_feedforward)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, share_encoder: Seq2VecEncoder = None, private_encoder: Seq2VecEncoder = None, dropout: float = None, input_dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None) -> None: super(JointSentimentClassifier, self).__init__(vocab=vocab, regularizer=regularizer) self._text_field_embedder = text_field_embedder if share_encoder is None and private_encoder is None: share_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) share_encoder = PytorchSeq2SeqWrapper(share_rnn) private_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) private_encoder = PytorchSeq2SeqWrapper(private_rnn) logger.info("Using LSTM as encoder") self._domain_embeddings = Embedding( len(TASKS_NAME), self._text_field_embedder.get_output_dim()) self._share_encoder = share_encoder self._s_domain_discriminator = Discriminator( share_encoder.get_output_dim(), len(TASKS_NAME)) self._p_domain_discriminator = Discriminator( private_encoder.get_output_dim(), len(TASKS_NAME)) # TODO individual valid discriminator self._valid_discriminator = Discriminator( self._domain_embeddings.get_output_dim(), 2) for task in TASKS_NAME: tagger = SentimentClassifier( vocab=vocab, text_field_embedder=self._text_field_embedder, share_encoder=self._share_encoder, private_encoder=copy.deepcopy(private_encoder), s_domain_discriminator=self._s_domain_discriminator, p_domain_discriminator=self._p_domain_discriminator, valid_discriminator=self._valid_discriminator, dropout=dropout, input_dropout=input_dropout, label_smoothing=0.1, initializer=initializer) self.add_module("_tagger_{}".format(task), tagger) logger.info("Multi-Task Learning Model has been instantiated.")
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels') -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param embedder: Used to embed the text. :param encoder: Encodes the sentence/text. E.g. LSTM :param feedforward: An optional feed forward layer to apply after the encoder :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. This is based on the LSTM model by `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_ ''' self.label_name = label_name self.embedder = embedder self.encoder = encoder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.feedforward = feedforward if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary( self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) check_dimensions_match(embedder.get_output_dim(), encoder.get_input_dim(), 'Embedding', 'Encoder') if self.feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), 'Encoder', 'FeedForward') initializer(self)
def __init__( self, vocab: Vocabulary, input_unit: Seq2VecEncoder, text_field_embedder: TextFieldEmbedder, # embedding_projection_dim: int = None, classifier_feedforward: FeedForward = None, max_step: int = 12, n_memories: int = 3, self_attention: bool = False, memory_gate: bool = False, dropout: int = 0.15, loss_weights=None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_classes = max(self.vocab.get_vocab_size("labels"), 2) self.text_field_embedder = text_field_embedder self.proj = nn.Linear(text_field_embedder.get_output_dim(), input_unit.get_input_dim()) self.input_unit = input_unit self.mac = MACCell( text_field_embedder.get_output_dim( ), # input_unit.get_output_dim(), max_step=max_step, n_memories=n_memories, self_attention=self_attention, memory_gate=memory_gate, dropout=dropout, save_attns=False, ) hidden_size = 2 * input_unit.get_output_dim() n_layers = 3 self.classifier = classifier_feedforward or FeedForward( input_dim=hidden_size, num_layers=n_layers, hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes], activations=[ Activation.by_name("relu")(), Activation.by_name("relu")(), Activation.by_name("linear")() ], dropout=[dropout, dropout, 0.0]) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "weighted_f1": WeightedF1Measure(), "fbeta": FBetaMeasure(average='micro') } weights = loss_weights and torch.FloatTensor(loss_weights) self.loss = nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, label_namespace: str = "logical_form", feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_tags = self.vocab.get_vocab_size("logical_form") self.encoder = encoder self.text_field_embedder = text_field_embedder self.BOW_embedder_question = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_description = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) # using crf as the estimator for sequential tags self.crf = ConditionalRandomField( self.num_tags, include_start_end_transitions=False ) self.softmax_layer = Softmax() self.ce_loss = CrossEntropyLoss() self.matched = 0 self.all_pred = 0 if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.question_pred_layer = Linear(4*output_dim, 3*self.num_tags) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. self.metrics = {} check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(4*encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, abstract_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SentimentClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.abstract_encoder = abstract_encoder self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim( ) != abstract_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the abstract_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), abstract_encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() initializer(self) model = Sequential() model.add( Conv2D(64, kernel_size=(3, 3), input_shape=(530, 700, 3), padding='VALID')) model.add(Conv2D(64, kernel_size=(3, 3), padding='VALID')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add( Conv2D(128, kernel_size=(3, 3), strides=1, activation='relu', padding='VALID')) model.add( Conv2D(128, kernel_size=(3, 3), strides=1, activation='relu', padding='VALID')) model.add(AveragePooling2D(pool_size=(19, 19))) model.add(Flatten()) model.summary() self.image_model = model
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, feedforward: FeedForward): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") print("==> encoded input shape: {}, output shape: {}\n".format( encoder.get_input_dim(), encoder.get_output_dim())) # logger.info("==> encoded input shape: {}, output shape: {}\n".format(encoder.get_input_dim(),encoder.get_output_dim())) self.feedforward = feedforward self.classifier = torch.nn.Linear(self.feedforward.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy()
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, clauses_encoder: Seq2VecEncoder, outer_encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, include_start_end_transitions: bool = True, dropout: float = None, loss_weights: Optional[List] = [], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(JCC, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.clauses_encoder = inner_encoder self.outer_encoder = outer_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.label_projection_layer = TimeDistributed( Linear(outer_encoder.get_output_dim(), self.num_tags)) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions, ) self.metrics = {"accuracy": Accuracy()} check_dimensions_match( text_field_embedder.get_output_dim(), clauses_encoder.get_input_dim(), "text field embedding dim", "clauses encoder input dim", ) check_dimensions_match( clauses_encoder.get_output_dim(), outer_encoder.get_input_dim(), "clauses encoder output dim", "outer encoder input dim", ) initializer(self)
def setUp(self): super(TestTokenCharactersEncoder, self).setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params(vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def setUp(self): super(TestTokenCharactersEncoder, self).setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params( self.vocab, deepcopy(params)) self.embedding = Embedding.from_params(self.vocab, params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = lambda tensor: torch.nn.init.constant(tensor, 1.) initializer = InitializerApplicator(default_initializer=constant_init) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def __init__(self, vocab: Vocabulary, source_text_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, tied_source_embedder_key: Optional[str] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, positive_label: str = "algebra", target_namespace: str = "tokens")-> None: super(TextClassifier, self).__init__(vocab, regularizer) self._source_text_embedder = source_text_embedder self._target_namespace = target_namespace self._encoder = encoder self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self.accuracy = CategoricalAccuracy() positive_label = vocab.get_token_index(positive_label, namespace='labels') # for comnputing precision, recall and f1 self.f1_measure = F1Measure(positive_label) # the loss function combines logsoftmax and NLLloss, the input to this function is logits self.loss_function = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, initializer: InitializerApplicator) -> None: super(BertModel, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.seq2vec_encoder = seq2vec_encoder self.num_types = self.vocab.get_vocab_size("state_change_type_labels") self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(), self.num_types) self._type_accuracy = CategoricalAccuracy() self.type_f1_metrics = {} self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary( "state_change_type_labels") for type_label in self.type_labels_vocab.values(): self.type_f1_metrics["type_" + type_label] = F1Measure( self.vocab.get_token_index(type_label, "state_change_type_labels")) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params, constructor_to_call=None, constructor_to_inspect=None) -> 'BertModel': #initialize the class using JSON params embedder_params = params.pop("text_field_embedder") token_params = embedder_params.pop("tokens") embedding = PretrainedTransformerEmbedder.from_params( vocab=vocab, params=token_params) text_field_embedder = BasicTextFieldEmbedder( token_embedders={'tokens': embedding}) # text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) seq2vec_encoder_params = params.pop("seq2vec_encoder") seq2vec_encoder = Seq2VecEncoder.from_params(seq2vec_encoder_params) initializer = InitializerApplicator( ) #.from_params(params.pop("initializer", [])) params.assert_empty(cls.__name__) # print(cls) return cls(vocab=vocab, text_field_embedder=text_field_embedder, seq2vec_encoder=seq2vec_encoder, initializer=initializer)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SarcasmClassifier': bert_model_name = params.pop("bert_model_name") quote_response_encoder = Seq2VecEncoder.from_params( params.pop("quote_response_encoder")) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) classifier_feedforward_2 = FeedForward.from_params( params.pop("classifier_feedforward_2")) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) report_auxiliary_metrics = params.pop_bool("report_auxiliary_metrics", False) # predict_mode = params.pop_bool("predict_mode", False) # print(f"pred mode: {predict_mode}") return cls(vocab=vocab, bert_model_name=bert_model_name, quote_response_encoder=quote_response_encoder, classifier_feedforward=classifier_feedforward, classifier_feedforward_2=classifier_feedforward_2, initializer=initializer, regularizer=regularizer, report_auxiliary_metrics=report_auxiliary_metrics)
def setUp(self): super().setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params( vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) # We need the embeddings to convert word IDs to their vector representations self.word_embeddings = word_embeddings self.encoder = encoder # After converting a sequence of vectors to a single vector, we feed it into # a fully-connected linear layer to reduce the dimension to the total number of labels. self.linear = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive) self.accuracy = CategoricalAccuracy() self.f1_measure_positive = F1Measure( vocab.get_token_index("positive", "labels")) self.f1_measure_negative = F1Measure( vocab.get_token_index("negative", "labels")) self.f1_measure_neutral = F1Measure( vocab.get_token_index("neutral", "labels")) # We use the cross entropy loss because this is a classification task. # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss, # which makes it unnecessary to add a separate softmax layer. self.loss_function = torch.nn.CrossEntropyLoss()
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2VecEncoder, answers_encoder: Seq2VecEncoder, captions_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size('labels') self.question_encoder = question_encoder self.answers_encoder = TimeDistributed(answers_encoder) self.captions_encoder = TimeDistributed(captions_encoder) self.classifier_feedforward = classifier_feedforward # self.classifier_feedforward = TimeDistributed(classifier_feedforward) self._encoding_dim = captions_encoder.get_output_dim() self.ques_cap_att = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'SarcasmClassifier': embedder_params1 = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(embedder_params1, vocab=vocab) quote_response_encoder = Seq2VecEncoder.from_params( params.pop("quote_response_encoder")) classifier_feedforward = FeedForward.from_params( params.pop("classifier_feedforward")) classifier_feedforward_2 = FeedForward.from_params( params.pop("classifier_feedforward_2")) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) report_auxiliary_metrics = params.pop_bool("report_auxiliary_metrics", False) predict_mode = params.pop_bool("predict_mode", False) # print(f"pred mode: {predict_mode}") return cls(vocab=vocab, text_field_embedder=text_field_embedder, quote_response_encoder=quote_response_encoder, classifier_feedforward=classifier_feedforward, classifier_feedforward_2=classifier_feedforward_2, initializer=initializer, regularizer=regularizer, report_auxiliary_metrics=report_auxiliary_metrics, predict_mode=predict_mode)
def test_from_params_requires_batch_first(self): params = Params({ "type": "lstm", "batch_first": False, }) with pytest.raises(ConfigurationError): # pylint: disable=unused-variable encoder = Seq2VecEncoder.from_params(params)
def test_from_params_builders_encoder_correctly(self): # We're just making sure parameters get passed through correctly here, and that the basic # API works. params = Params({ "type": "lstm", "bidirectional": True, "num_layers": 3, "input_size": 5, "hidden_size": 7 }) encoder = Seq2VecEncoder.from_params(params) # pylint: disable=protected-access assert encoder.__class__.__name__ == 'PytorchSeq2VecWrapper' assert encoder._module.__class__.__name__ == 'LSTM' assert encoder._module.num_layers == 3 assert encoder._module.input_size == 5 assert encoder._module.hidden_size == 7 assert encoder._module.bidirectional is True assert encoder._module.batch_first is True