Example #1
0
    def __init__(self,
                 vocab: Vocabulary,
                 model_text_field_embedder: TextFieldEmbedder,
                 internal_text_encoder: Seq2VecEncoder,
                 output_layer: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(Seq2VecClassifier, self).__init__(vocab, regularizer)

        self.model_text_field_embedder = model_text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.internal_text_encoder = internal_text_encoder
        self.output_layer = output_layer

        if model_text_field_embedder.get_output_dim(
        ) != internal_text_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the model_text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(
                    model_text_field_embedder.get_output_dim(),
                    internal_text_encoder.get_input_dim()))
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 inner_encoder: Seq2VecEncoder,
                 outer_encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(HierarchicalRNN, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.inner_encoder = inner_encoder
        self.outer_encoder = outer_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.label_projection_layer = TimeDistributed(
            Linear(outer_encoder.get_output_dim(), self.num_tags))
        # self.metrics = {"accuracy": FuckingAccuracy()}
        self.metrics = {"accuracy": CategoricalAccuracy()}
        self._loss = torch.nn.CrossEntropyLoss()

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               inner_encoder.get_input_dim(),
                               'text field embedding dim',
                               'inner encoder input dim')
        check_dimensions_match(inner_encoder.get_output_dim(),
                               outer_encoder.get_input_dim(),
                               'inner encoder output dim',
                               'outer encoder input dim')
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PriorityCrisisClassifierWithLossWeight,
              self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        # self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }

        # corresponding to low, medium, high and critical
        weights = torch.tensor([1.0000, 6.1094, 8.3922, 16.8462],
                               dtype=torch.float32)

        self.loss = torch.nn.CrossEntropyLoss(weight=weights)
        initializer(self)
Example #4
0
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'DeIsTe':
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(
            vocab, embedder_params)

        inter_attention = MatrixAttention.from_params(
            params.pop("inter_attention"))
        param_dyn_encoder = Seq2VecEncoder.from_params(
            params.pop("param_dyn_encoder"))

        pos_embedder = TokenEmbedder.from_params(
            vocab=None, params=params.pop("pos_embedder"))
        pos_attn_encoder = Seq2VecEncoder.from_params(
            params.pop("pos_attn_encoder"))

        output_feedforward_params = params.pop('output_feedforward', None)
        output_feedforward = FeedForward.from_params(
            output_feedforward_params) if output_feedforward_params else None

        initializer = InitializerApplicator.from_params(
            params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params.pop('regularizer', []))

        params.assert_empty(cls.__name__)
        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   inter_attention=inter_attention,
                   param_dyn_encoder=param_dyn_encoder,
                   pos_embedder=pos_embedder,
                   pos_attn_encoder=pos_attn_encoder,
                   output_feedforward=output_feedforward,
                   initializer=initializer,
                   regularizer=regularizer)
Example #5
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 title_encoder: Seq2VecEncoder,
                 abstract_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(AcademicPaperClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.title_encoder = title_encoder
        self.abstract_encoder = abstract_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != title_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            title_encoder.get_input_dim()))
        if text_field_embedder.get_output_dim() != abstract_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the abstract_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            abstract_encoder.get_input_dim()))
        self.metrics = {
                "accuracy": CategoricalAccuracy(),
                "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Example #6
0
    def __init__(self, 
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        # raise ValueError(self.vocab.get_vocab_size("tokens"))
        # raise ValueError(text_field_embedder.get_output_dim())

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            encoder.get_input_dim()))

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.classifier_feedforward = classifier_feedforward

        self.metrics = {
                "multilabel-f1": MultiLabelF1Measure(),
                'accuracy': BooleanAccuracy()
        }
        self.pearson_r = PearsonCorrelation()
        self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() 
        
        self._threshold = 0.5

        initializer(self)
Example #7
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sentence_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 pretrained_archive=None) -> None:
        super(SentenceClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("tokens")
        self.sentence_encoder = sentence_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim(
        ) != sentence_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the sentence_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       sentence_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        # if existing, load pre-trained model
        if pretrained_archive:
            archive = load_archive(pretrained_archive)
            self._initialize_weights_from_archive(archive)
Example #8
0
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'OntoEmmaNN':
        name_embedder = TextFieldEmbedder.from_params(
            vocab, params.pop("name_embedder"))
        definition_embedder = TextFieldEmbedder.from_params(
            vocab, params.pop("definition_embedder"))
        name_encoder = Seq2VecEncoder.from_params(params.pop("name_encoder"))
        definition_encoder = Seq2VecEncoder.from_params(
            params.pop("definition_encoder"))
        siamese_feedforward = FeedForward.from_params(
            params.pop("siamese_feedforward"))
        decision_feedforward = FeedForward.from_params(
            params.pop("decision_feedforward"))

        init_params = params.pop('initializer', None)
        reg_params = params.pop('regularizer', None)
        initializer = (InitializerApplicator.from_params(init_params)
                       if init_params is not None else InitializerApplicator())
        regularizer = RegularizerApplicator.from_params(
            reg_params) if reg_params is not None else None

        return cls(vocab=vocab,
                   name_embedder=name_embedder,
                   definition_embedder=definition_embedder,
                   name_encoder=name_encoder,
                   definition_encoder=definition_encoder,
                   siamese_feedforward=siamese_feedforward,
                   decision_feedforward=decision_feedforward,
                   initializer=initializer,
                   regularizer=regularizer)
Example #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PriorityCrisisClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        # self.num_classes = self.vocab.get_vocab_size("labels")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }

        # the vector [1.0000, 4.6600, 6.0852, 83.3817] is obtained from ./tests/models/priority_crisis_classifier_test.py and learned based on training set
        # a similar function is declared in the test script called get_weights as follows.
        weights = torch.tensor([1.0000, 4.0000, 8.0000, 16.0000],
                               dtype=torch.float32)
        self.loss = torch.nn.CrossEntropyLoss()
        # self.loss = torch.nn.CrossEntropyLoss(weight=weights)
        initializer(self)
Example #10
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EFClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        logger.info("------------------------------------")
        logger.info("num class {}".format(self.num_classes))
        logger.info("------------------------------------")
        self.text_encoder = text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != text_encoder.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the text_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       text_encoder.get_input_dim()))
        self.f1 = F1Measure(positive_label=1)
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Example #11
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 SH_field_embedder: TextFieldEmbedder,
                 abstract_text_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(EtdClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.SH_field_embedder = SH_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.label_dict = self..vocab.get_index_to_token_vocabulary('labels')
        self.abstract_text_encoder = abstract_text_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != abstract_text_encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the abstract_text_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            abstract_text_encoder.get_input_dim()))

        self.metrics = {
                "roc_auc_score": RocAucScore()
        }
        self.loss = torch.nn.BCEWithLogitsLoss()

        initializer(self)
    def from_params(
            cls, vocab: Vocabulary, params: Params
    ) -> 'DialogueContextHierarchicalCoherenceClassifier':
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(
            vocab, embedder_params)
        utterance_encoder = Seq2VecEncoder.from_params(
            params.pop("utterance_encoder"))
        context_encoder = Seq2VecEncoder.from_params(
            params.pop("context_encoder"))
        response_encoder = Seq2VecEncoder.from_params(
            params.pop("response_encoder"))
        classifier_feedforward = FeedForward.from_params(
            params.pop("classifier_feedforward"))

        initializer = InitializerApplicator.from_params(
            params.pop("initializer", []))
        regularizer = RegularizerApplicator.from_params(
            params.pop("regularizer", []))

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   utterance_encoder=utterance_encoder,
                   context_encoder=context_encoder,
                   response_encoder=response_encoder,
                   classifier_feedforward=classifier_feedforward,
                   initializer=initializer,
                   regularizer=regularizer)
Example #13
0
    def from_params(cls, params: Params, vocab: Vocabulary) -> 'CMVPredictor':

        response_embedder_params = params.pop("response_embedder")
        response_embedder = TextFieldEmbedder.from_params(
            vocab=vocab, params=response_embedder_params)

        response_word_attention_params = params.pop("response_word_attention")
        response_word_attention = Seq2VecEncoder.from_params(
            params=response_word_attention_params)

        response_encoder_params = params.pop("response_encoder")
        response_encoder = Seq2SeqEncoder.from_params(
            params=response_encoder_params)

        response_sentence_attention_params = params.pop(
            "response_sentence_attention")
        response_sentence_attention = Seq2VecEncoder.from_params(
            params=response_sentence_attention_params)

        op_embedder_params = params.pop("op_embedder", None)
        op_embedder = None
        if op_embedder_params is not None:
            op_embedder = TextFieldEmbedder.from_params(
                vocab=vocab, params=op_embedder_params)

        op_word_attention_params = params.pop("op_word_attention", None)
        op_word_attention = None
        if op_word_attention_params is not None:
            op_word_attention = Seq2VecEncoder.from_params(
                params=op_word_attention_params)

        op_encoder_params = params.pop("op_encoder", None)
        op_encoder = None
        if op_encoder_params is not None:
            op_encoder = Seq2SeqEncoder.from_params(params=op_encoder_params)

        output_feedforward = FeedForward.from_params(
            params=params.pop('output_feedforward'))

        feature_feedforward = None
        feature_feedforward_params = params.pop('feature_feedforward', None)
        if feature_feedforward_params is not None:
            feature_feedforward = FeedForward.from_params(
                params=feature_feedforward_params)

        dropout = params.pop("dropout", 0)

        initializer = InitializerApplicator.from_params(
            params=params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params=params.pop('regularizer', []))

        params.assert_empty(cls.__name__)

        return cls(vocab, response_embedder, response_word_attention,
                   response_encoder, response_sentence_attention,
                   output_feedforward, op_embedder, op_word_attention,
                   op_encoder, dropout, initializer, regularizer,
                   feature_feedforward)
Example #14
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 share_encoder: Seq2VecEncoder = None,
                 private_encoder: Seq2VecEncoder = None,
                 dropout: float = None,
                 input_dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: RegularizerApplicator = None) -> None:
        super(JointSentimentClassifier, self).__init__(vocab=vocab,
                                                       regularizer=regularizer)

        self._text_field_embedder = text_field_embedder
        if share_encoder is None and private_encoder is None:
            share_rnn = nn.LSTM(
                input_size=self._text_field_embedder.get_output_dim(),
                hidden_size=150,
                batch_first=True,
                dropout=dropout,
                bidirectional=True)
            share_encoder = PytorchSeq2SeqWrapper(share_rnn)
            private_rnn = nn.LSTM(
                input_size=self._text_field_embedder.get_output_dim(),
                hidden_size=150,
                batch_first=True,
                dropout=dropout,
                bidirectional=True)
            private_encoder = PytorchSeq2SeqWrapper(private_rnn)
            logger.info("Using LSTM as encoder")
            self._domain_embeddings = Embedding(
                len(TASKS_NAME), self._text_field_embedder.get_output_dim())
        self._share_encoder = share_encoder

        self._s_domain_discriminator = Discriminator(
            share_encoder.get_output_dim(), len(TASKS_NAME))

        self._p_domain_discriminator = Discriminator(
            private_encoder.get_output_dim(), len(TASKS_NAME))

        # TODO individual valid discriminator
        self._valid_discriminator = Discriminator(
            self._domain_embeddings.get_output_dim(), 2)

        for task in TASKS_NAME:
            tagger = SentimentClassifier(
                vocab=vocab,
                text_field_embedder=self._text_field_embedder,
                share_encoder=self._share_encoder,
                private_encoder=copy.deepcopy(private_encoder),
                s_domain_discriminator=self._s_domain_discriminator,
                p_domain_discriminator=self._p_domain_discriminator,
                valid_discriminator=self._valid_discriminator,
                dropout=dropout,
                input_dropout=input_dropout,
                label_smoothing=0.1,
                initializer=initializer)
            self.add_module("_tagger_{}".format(task), tagger)

        logger.info("Multi-Task Learning Model has been instantiated.")
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 feedforward: Optional[FeedForward] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 dropout: float = 0.0,
                 label_name: str = 'target-sentiment-labels') -> None:
        super().__init__(vocab, regularizer)
        '''
        :param vocab: A Vocabulary, required in order to compute sizes 
                      for input/output projections.
        :param embedder: Used to embed the text.
        :param encoder: Encodes the sentence/text. E.g. LSTM
        :param feedforward: An optional feed forward layer to apply after the 
                            encoder
        :param initializer: Used to initialize the model parameters.
        :param regularizer: If provided, will be used to calculate the 
                            regularization penalty during training.
        :param dropout: To apply dropout after each layer apart from the last 
                        layer. All dropout that is applied to timebased data 
                        will be `variational dropout`_ all else will be  
                        standard dropout.
        :param label_name: Name of the label name space.
        
        This is based on the LSTM model by 
        `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_
        
        '''
        self.label_name = label_name
        self.embedder = embedder
        self.encoder = encoder
        self.num_classes = self.vocab.get_vocab_size(self.label_name)
        self.feedforward = feedforward

        if feedforward is not None:
            output_dim = self.feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.label_projection = Linear(output_dim, self.num_classes)

        self.metrics = {"accuracy": CategoricalAccuracy()}
        self.f1_metrics = {}
        # F1 Scores
        label_index_name = self.vocab.get_index_to_token_vocabulary(
            self.label_name)
        for label_index, _label_name in label_index_name.items():
            _label_name = f'F1_{_label_name.capitalize()}'
            self.f1_metrics[_label_name] = F1Measure(label_index)
        self._variational_dropout = InputVariationalDropout(dropout)
        self._naive_dropout = Dropout(dropout)
        check_dimensions_match(embedder.get_output_dim(),
                               encoder.get_input_dim(), 'Embedding', 'Encoder')
        if self.feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(), 'Encoder',
                                   'FeedForward')
        initializer(self)
Example #16
0
    def __init__(
            self,
            vocab: Vocabulary,
            input_unit: Seq2VecEncoder,
            text_field_embedder: TextFieldEmbedder,
            # embedding_projection_dim: int = None,
            classifier_feedforward: FeedForward = None,
            max_step: int = 12,
            n_memories: int = 3,
            self_attention: bool = False,
            memory_gate: bool = False,
            dropout: int = 0.15,
            loss_weights=None,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_classes = max(self.vocab.get_vocab_size("labels"), 2)

        self.text_field_embedder = text_field_embedder

        self.proj = nn.Linear(text_field_embedder.get_output_dim(),
                              input_unit.get_input_dim())
        self.input_unit = input_unit
        self.mac = MACCell(
            text_field_embedder.get_output_dim(
            ),  # input_unit.get_output_dim(),
            max_step=max_step,
            n_memories=n_memories,
            self_attention=self_attention,
            memory_gate=memory_gate,
            dropout=dropout,
            save_attns=False,
        )

        hidden_size = 2 * input_unit.get_output_dim()
        n_layers = 3
        self.classifier = classifier_feedforward or FeedForward(
            input_dim=hidden_size,
            num_layers=n_layers,
            hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes],
            activations=[
                Activation.by_name("relu")(),
                Activation.by_name("relu")(),
                Activation.by_name("linear")()
            ],
            dropout=[dropout, dropout, 0.0])

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": F1Measure(positive_label=1),
            "weighted_f1": WeightedF1Measure(),
            "fbeta": FBetaMeasure(average='micro')
        }

        weights = loss_weights and torch.FloatTensor(loss_weights)
        self.loss = nn.CrossEntropyLoss(weight=weights)

        initializer(self)
Example #17
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 label_namespace: str = "logical_form",
                 feedforward: Optional[FeedForward] = None,
                 dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_tags = self.vocab.get_vocab_size("logical_form")
        self.encoder = encoder
        
        self.text_field_embedder = text_field_embedder
        self.BOW_embedder_question = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_description = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        
        
        # using crf as the estimator for sequential tags
        self.crf = ConditionalRandomField(
            self.num_tags, 
            include_start_end_transitions=False
        )
        
        self.softmax_layer = Softmax()
        self.ce_loss = CrossEntropyLoss()
        
        self.matched = 0
        self.all_pred = 0

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
            
        self.question_pred_layer = Linear(4*output_dim, 3*self.num_tags)
        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.

        self.metrics = {}
        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(4*encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")
        initializer(self)
Example #18
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 abstract_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SentimentClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.abstract_encoder = abstract_encoder
        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim(
        ) != abstract_encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the abstract_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       abstract_encoder.get_input_dim()))
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        model = Sequential()
        model.add(
            Conv2D(64,
                   kernel_size=(3, 3),
                   input_shape=(530, 700, 3),
                   padding='VALID'))
        model.add(Conv2D(64, kernel_size=(3, 3), padding='VALID'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(
            Conv2D(128,
                   kernel_size=(3, 3),
                   strides=1,
                   activation='relu',
                   padding='VALID'))
        model.add(
            Conv2D(128,
                   kernel_size=(3, 3),
                   strides=1,
                   activation='relu',
                   padding='VALID'))
        model.add(AveragePooling2D(pool_size=(19, 19)))

        model.add(Flatten())

        model.summary()

        self.image_model = model
 def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder, feedforward: FeedForward):
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     print("==> encoded input shape: {}, output shape: {}\n".format(
         encoder.get_input_dim(), encoder.get_output_dim()))
     #  logger.info("==> encoded input shape: {}, output shape: {}\n".format(encoder.get_input_dim(),encoder.get_output_dim()))
     self.feedforward = feedforward
     self.classifier = torch.nn.Linear(self.feedforward.get_output_dim(),
                                       num_labels)
     self.accuracy = CategoricalAccuracy()
Example #20
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        clauses_encoder: Seq2VecEncoder,
        outer_encoder: Seq2SeqEncoder,
        label_namespace: str = "labels",
        constraint_type: str = None,
        include_start_end_transitions: bool = True,
        dropout: float = None,
        loss_weights: Optional[List] = [],
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super(JCC, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.clauses_encoder = inner_encoder
        self.outer_encoder = outer_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.label_projection_layer = TimeDistributed(
            Linear(outer_encoder.get_output_dim(), self.num_tags))

        labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
        constraints = allowed_transitions(constraint_type, labels)
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions,
        )
        self.metrics = {"accuracy": Accuracy()}

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            clauses_encoder.get_input_dim(),
            "text field embedding dim",
            "clauses encoder input dim",
        )
        check_dimensions_match(
            clauses_encoder.get_output_dim(),
            outer_encoder.get_input_dim(),
            "clauses encoder output dim",
            "outer encoder input dim",
        )
        initializer(self)
 def setUp(self):
     super(TestTokenCharactersEncoder, self).setUp()
     self.vocab = Vocabulary()
     self.vocab.add_token_to_namespace("1", "token_characters")
     self.vocab.add_token_to_namespace("2", "token_characters")
     self.vocab.add_token_to_namespace("3", "token_characters")
     self.vocab.add_token_to_namespace("4", "token_characters")
     params = Params({
             "embedding": {
                     "embedding_dim": 2,
                     "vocab_namespace": "token_characters"
                     },
             "encoder": {
                     "type": "cnn",
                     "embedding_dim": 2,
                     "num_filters": 4,
                     "ngram_filter_sizes": [1, 2],
                     "output_dim": 3
                     }
             })
     self.encoder = TokenCharactersEncoder.from_params(vocab=self.vocab, params=deepcopy(params))
     self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"])
     self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"])
     constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.}))
     initializer = InitializerApplicator([(".*", constant_init)])
     initializer(self.encoder)
     initializer(self.embedding)
     initializer(self.inner_encoder)
Example #22
0
 def setUp(self):
     super(TestTokenCharactersEncoder, self).setUp()
     self.vocab = Vocabulary()
     self.vocab.add_token_to_namespace("1", "token_characters")
     self.vocab.add_token_to_namespace("2", "token_characters")
     self.vocab.add_token_to_namespace("3", "token_characters")
     self.vocab.add_token_to_namespace("4", "token_characters")
     params = Params({
         "embedding": {
             "embedding_dim": 2,
             "vocab_namespace": "token_characters"
         },
         "encoder": {
             "type": "cnn",
             "embedding_dim": 2,
             "num_filters": 4,
             "ngram_filter_sizes": [1, 2],
             "output_dim": 3
         }
     })
     self.encoder = TokenCharactersEncoder.from_params(
         self.vocab, deepcopy(params))
     self.embedding = Embedding.from_params(self.vocab, params["embedding"])
     self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"])
     constant_init = lambda tensor: torch.nn.init.constant(tensor, 1.)
     initializer = InitializerApplicator(default_initializer=constant_init)
     initializer(self.encoder)
     initializer(self.embedding)
     initializer(self.inner_encoder)
Example #23
0
    def __init__(self,
                 vocab: Vocabulary,
                 source_text_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 tied_source_embedder_key: Optional[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 positive_label: str = "algebra",  
                 target_namespace: str = "tokens")-> None:

        super(TextClassifier, self).__init__(vocab, regularizer)

        self._source_text_embedder = source_text_embedder
        self._target_namespace = target_namespace
        self._encoder = encoder
        self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(), 
                                        out_features=vocab.get_vocab_size('labels'))
        self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace)
        
        self.accuracy = CategoricalAccuracy()
        positive_label = vocab.get_token_index(positive_label, namespace='labels')
        # for comnputing precision, recall and f1
        self.f1_measure = F1Measure(positive_label)

        # the loss function combines logsoftmax and NLLloss, the input to this function is logits
        self.loss_function = torch.nn.CrossEntropyLoss()  

        
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 question_embedder: TextFieldEmbedder,
                 action_embedding_dim: int,
                 encoder: Seq2SeqEncoder,
                 entity_encoder: Seq2VecEncoder,
                 max_decoding_steps: int,
                 use_neighbor_similarity_for_linking: bool = False,
                 dropout: float = 0.0,
                 num_linking_features: int = 10,
                 rule_namespace: str = 'rule_labels',
                 tables_directory: str = '/wikitables/') -> None:
        super(WikiTablesSemanticParser, self).__init__(vocab)
        self._question_embedder = question_embedder
        self._encoder = encoder
        self._entity_encoder = TimeDistributed(entity_encoder)
        self._max_decoding_steps = max_decoding_steps
        self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._rule_namespace = rule_namespace
        self._denotation_accuracy = WikiTablesAccuracy(tables_directory)
        self._action_sequence_accuracy = Average()
        self._has_logical_form = Average()

        self._action_padding_index = -1  # the padding value used by IndexField
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1)

        # This is what we pass as input in the first step of decoding, when we don't have a
        # previous action, or a previous question attention.
        self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim))
        self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim()))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_question)

        check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(),
                               "entity word average embedding dim", "question embedding dim")

        self._num_entity_types = 4  # TODO(mattg): get this in a more principled way somehow?
        self._num_start_types = 5  # TODO(mattg): get this in a more principled way somehow?
        self._embedding_dim = question_embedder.get_output_dim()
        self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim)
        self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim)

        if num_linking_features > 0:
            self._linking_params = torch.nn.Linear(num_linking_features, 1)
        else:
            self._linking_params = None

        if self._use_neighbor_similarity_for_linking:
            self._question_entity_params = torch.nn.Linear(1, 1)
            self._question_neighbor_params = torch.nn.Linear(1, 1)
        else:
            self._question_entity_params = None
            self._question_neighbor_params = None
Example #25
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2vec_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator) -> None:
        super(BertModel, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.seq2vec_encoder = seq2vec_encoder

        self.num_types = self.vocab.get_vocab_size("state_change_type_labels")
        self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(),
                                            self.num_types)

        self._type_accuracy = CategoricalAccuracy()

        self.type_f1_metrics = {}
        self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary(
            "state_change_type_labels")
        for type_label in self.type_labels_vocab.values():
            self.type_f1_metrics["type_" + type_label] = F1Measure(
                self.vocab.get_token_index(type_label,
                                           "state_change_type_labels"))

        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Example #26
0
    def from_params(cls,
                    vocab: Vocabulary,
                    params: Params,
                    constructor_to_call=None,
                    constructor_to_inspect=None) -> 'BertModel':
        #initialize the class using JSON params
        embedder_params = params.pop("text_field_embedder")
        token_params = embedder_params.pop("tokens")
        embedding = PretrainedTransformerEmbedder.from_params(
            vocab=vocab, params=token_params)
        text_field_embedder = BasicTextFieldEmbedder(
            token_embedders={'tokens': embedding})
        #         text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)

        seq2vec_encoder_params = params.pop("seq2vec_encoder")
        seq2vec_encoder = Seq2VecEncoder.from_params(seq2vec_encoder_params)

        initializer = InitializerApplicator(
        )  #.from_params(params.pop("initializer", []))

        params.assert_empty(cls.__name__)
        #         print(cls)
        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   seq2vec_encoder=seq2vec_encoder,
                   initializer=initializer)
    def from_params(cls, vocab: Vocabulary,
                    params: Params) -> 'SarcasmClassifier':
        bert_model_name = params.pop("bert_model_name")
        quote_response_encoder = Seq2VecEncoder.from_params(
            params.pop("quote_response_encoder"))
        classifier_feedforward = FeedForward.from_params(
            params.pop("classifier_feedforward"))
        classifier_feedforward_2 = FeedForward.from_params(
            params.pop("classifier_feedforward_2"))

        initializer = InitializerApplicator.from_params(
            params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params.pop('regularizer', []))
        report_auxiliary_metrics = params.pop_bool("report_auxiliary_metrics",
                                                   False)

        # predict_mode = params.pop_bool("predict_mode", False)
        # print(f"pred mode: {predict_mode}")

        return cls(vocab=vocab,
                   bert_model_name=bert_model_name,
                   quote_response_encoder=quote_response_encoder,
                   classifier_feedforward=classifier_feedforward,
                   classifier_feedforward_2=classifier_feedforward_2,
                   initializer=initializer,
                   regularizer=regularizer,
                   report_auxiliary_metrics=report_auxiliary_metrics)
 def setUp(self):
     super().setUp()
     self.vocab = Vocabulary()
     self.vocab.add_token_to_namespace("1", "token_characters")
     self.vocab.add_token_to_namespace("2", "token_characters")
     self.vocab.add_token_to_namespace("3", "token_characters")
     self.vocab.add_token_to_namespace("4", "token_characters")
     params = Params({
         "embedding": {
             "embedding_dim": 2,
             "vocab_namespace": "token_characters"
         },
         "encoder": {
             "type": "cnn",
             "embedding_dim": 2,
             "num_filters": 4,
             "ngram_filter_sizes": [1, 2],
             "output_dim": 3
         }
     })
     self.encoder = TokenCharactersEncoder.from_params(
         vocab=self.vocab, params=deepcopy(params))
     self.embedding = Embedding.from_params(vocab=self.vocab,
                                            params=params["embedding"])
     self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"])
     constant_init = Initializer.from_params(
         Params({
             "type": "constant",
             "val": 1.
         }))
     initializer = InitializerApplicator([(".*", constant_init)])
     initializer(self.encoder)
     initializer(self.embedding)
     initializer(self.inner_encoder)
Example #29
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder, vocab: Vocabulary) -> None:
        super().__init__(vocab)
        # We need the embeddings to convert word IDs to their vector representations
        self.word_embeddings = word_embeddings

        self.encoder = encoder

        # After converting a sequence of vectors to a single vector, we feed it into
        # a fully-connected linear layer to reduce the dimension to the total number of labels.
        self.linear = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))

        # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive)
        self.accuracy = CategoricalAccuracy()
        self.f1_measure_positive = F1Measure(
            vocab.get_token_index("positive", "labels"))
        self.f1_measure_negative = F1Measure(
            vocab.get_token_index("negative", "labels"))
        self.f1_measure_neutral = F1Measure(
            vocab.get_token_index("neutral", "labels"))

        # We use the cross entropy loss because this is a classification task.
        # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss,
        # which makes it unnecessary to add a separate softmax layer.
        self.loss_function = torch.nn.CrossEntropyLoss()
 def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder,
              encoder: Seq2VecEncoder):
     super().__init__(vocab)
     self.embedder = embedder
     self.encoder = encoder
     num_labels = vocab.get_vocab_size("labels")
     self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
Example #31
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 question_encoder: Seq2VecEncoder,
                 answers_encoder: Seq2VecEncoder,
                 captions_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size('labels')

        self.question_encoder = question_encoder
        self.answers_encoder = TimeDistributed(answers_encoder)
        self.captions_encoder = TimeDistributed(captions_encoder)
        self.classifier_feedforward = classifier_feedforward
        # self.classifier_feedforward = TimeDistributed(classifier_feedforward)

        self._encoding_dim = captions_encoder.get_output_dim()
        self.ques_cap_att = LinearMatrixAttention(self._encoding_dim,
                                                  self._encoding_dim,
                                                  'x,y,x*y')

        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Example #32
0
    def from_params(cls, vocab: Vocabulary,
                    params: Params) -> 'SarcasmClassifier':
        embedder_params1 = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(embedder_params1,
                                                            vocab=vocab)
        quote_response_encoder = Seq2VecEncoder.from_params(
            params.pop("quote_response_encoder"))
        classifier_feedforward = FeedForward.from_params(
            params.pop("classifier_feedforward"))
        classifier_feedforward_2 = FeedForward.from_params(
            params.pop("classifier_feedforward_2"))

        initializer = InitializerApplicator.from_params(
            params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params.pop('regularizer', []))
        report_auxiliary_metrics = params.pop_bool("report_auxiliary_metrics",
                                                   False)

        predict_mode = params.pop_bool("predict_mode", False)
        # print(f"pred mode: {predict_mode}")

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   quote_response_encoder=quote_response_encoder,
                   classifier_feedforward=classifier_feedforward,
                   classifier_feedforward_2=classifier_feedforward_2,
                   initializer=initializer,
                   regularizer=regularizer,
                   report_auxiliary_metrics=report_auxiliary_metrics,
                   predict_mode=predict_mode)
 def test_from_params_requires_batch_first(self):
     params = Params({
             "type": "lstm",
             "batch_first": False,
             })
     with pytest.raises(ConfigurationError):
         # pylint: disable=unused-variable
         encoder = Seq2VecEncoder.from_params(params)
 def test_from_params_builders_encoder_correctly(self):
     # We're just making sure parameters get passed through correctly here, and that the basic
     # API works.
     params = Params({
             "type": "lstm",
             "bidirectional": True,
             "num_layers": 3,
             "input_size": 5,
             "hidden_size": 7
             })
     encoder = Seq2VecEncoder.from_params(params)
     # pylint: disable=protected-access
     assert encoder.__class__.__name__ == 'PytorchSeq2VecWrapper'
     assert encoder._module.__class__.__name__ == 'LSTM'
     assert encoder._module.num_layers == 3
     assert encoder._module.input_size == 5
     assert encoder._module.hidden_size == 7
     assert encoder._module.bidirectional is True
     assert encoder._module.batch_first is True