def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, share_encoder: Seq2VecEncoder = None, private_encoder: Seq2VecEncoder = None, dropout: float = None, input_dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None) -> None: super(JointSentimentClassifier, self).__init__(vocab=vocab, regularizer=regularizer) self._text_field_embedder = text_field_embedder if share_encoder is None and private_encoder is None: share_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) share_encoder = PytorchSeq2SeqWrapper(share_rnn) private_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) private_encoder = PytorchSeq2SeqWrapper(private_rnn) logger.info("Using LSTM as encoder") self._domain_embeddings = Embedding( len(TASKS_NAME), self._text_field_embedder.get_output_dim()) self._share_encoder = share_encoder self._s_domain_discriminator = Discriminator( share_encoder.get_output_dim(), len(TASKS_NAME)) self._p_domain_discriminator = Discriminator( private_encoder.get_output_dim(), len(TASKS_NAME)) # TODO individual valid discriminator self._valid_discriminator = Discriminator( self._domain_embeddings.get_output_dim(), 2) for task in TASKS_NAME: tagger = SentimentClassifier( vocab=vocab, text_field_embedder=self._text_field_embedder, share_encoder=self._share_encoder, private_encoder=copy.deepcopy(private_encoder), s_domain_discriminator=self._s_domain_discriminator, p_domain_discriminator=self._p_domain_discriminator, valid_discriminator=self._valid_discriminator, dropout=dropout, input_dropout=input_dropout, label_smoothing=0.1, initializer=initializer) self.add_module("_tagger_{}".format(task), tagger) logger.info("Multi-Task Learning Model has been instantiated.")
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") print("==> encoded input shape: {}, output shape: {}\n".format( encoder.get_input_dim(), encoder.get_output_dim())) logger.info("==> encoded input shape: {}, output shape: {}\n".format( encoder.get_input_dim(), encoder.get_output_dim())) self.classifier = torch.nn.Linear(self.encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy()
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2VecEncoder, answers_encoder: Seq2VecEncoder, captions_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size('labels') self.question_encoder = question_encoder self.answers_encoder = TimeDistributed(answers_encoder) self.captions_encoder = TimeDistributed(captions_encoder) self.classifier_feedforward = classifier_feedforward # self.classifier_feedforward = TimeDistributed(classifier_feedforward) self._encoding_dim = captions_encoder.get_output_dim() self.ques_cap_att = LinearMatrixAttention(self._encoding_dim, self._encoding_dim, 'x,y,x*y') self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, word_embeddings: TextFieldEmbedder, embedding_dropout: float, encoder: Seq2VecEncoder, encoder_dropout: float, out_dim: int, vocab: Vocabulary, verbose=False) -> None: super().__init__(vocab) # 将word id 转为vector representations self._word_embeddings = word_embeddings self._embedding_dropout = torch.nn.Dropout(embedding_dropout) self._encoder = encoder self._encoder_dropout = torch.nn.Dropout(encoder_dropout) # fc 层将上一层的维度转为输出的类别数 self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=out_dim) # 评价指标,分类准确率, F1 score # self.accuracy = CategoricalAccuracy() # self.f1_measure = F1Measure(positive_label) # 对于分类任务,交叉熵作为loss 函数 # 而pytorch中的CrossEntropyLoss内部包含了一个softmax 和log likelihood loss,因此不必显示定义softmax层 # self.loss_function = torch.nn.CrossEntropyLoss() self.loss_function = torch.nn.BCEWithLogitsLoss() self._verbose = verbose
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, initializer: InitializerApplicator) -> None: super(BertModel, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.seq2vec_encoder = seq2vec_encoder self.num_types = self.vocab.get_vocab_size("state_change_type_labels") self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(), self.num_types) self._type_accuracy = CategoricalAccuracy() self.type_f1_metrics = {} self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary( "state_change_type_labels") for type_label in self.type_labels_vocab.values(): self.type_f1_metrics["type_" + type_label] = F1Measure( self.vocab.get_token_index(type_label, "state_change_type_labels")) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, dropout: float = 0, label_namespace: str = 'label', initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, feedforward: Optional[FeedForward] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._seq2vec_encoder = seq2vec_encoder self._feedforward = feedforward if feedforward is not None: self._classifier_input_dim = feedforward.get_output_dim() else: self._classifier_input_dim = seq2vec_encoder.get_output_dim() self.bn = nn.BatchNorm1d(num_features=self._classifier_input_dim) if dropout: self._dropout = nn.Dropout(dropout) else: self._dropout = None self._num_labels = vocab.get_vocab_size(namespace=label_namespace) self._classification_layer = nn.Linear(self._classifier_input_dim, self._num_labels) self._accuracy = CategoricalAccuracy() self._loss = nn.CrossEntropyLoss() initializer(self)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) # We need the embeddings to convert word IDs to their vector representations self.word_embeddings = word_embeddings self.encoder = encoder # After converting a sequence of vectors to a single vector, we feed it into # a fully-connected linear layer to reduce the dimension to the total number of labels. self.linear = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive) self.accuracy = CategoricalAccuracy() self.f1_measure_positive = F1Measure( vocab.get_token_index("positive", "labels")) self.f1_measure_negative = F1Measure( vocab.get_token_index("negative", "labels")) self.f1_measure_neutral = F1Measure( vocab.get_token_index("neutral", "labels")) # We use the cross entropy loss because this is a classification task. # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss, # which makes it unnecessary to add a separate softmax layer. self.loss_function = torch.nn.CrossEntropyLoss()
def __init__(self, vocab: Vocabulary, source_text_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, tied_source_embedder_key: Optional[str] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, positive_label: str = "algebra", target_namespace: str = "tokens")-> None: super(TextClassifier, self).__init__(vocab, regularizer) self._source_text_embedder = source_text_embedder self._target_namespace = target_namespace self._encoder = encoder self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) self.accuracy = CategoricalAccuracy() positive_label = vocab.get_token_index(positive_label, namespace='labels') # for comnputing precision, recall and f1 self.f1_measure = F1Measure(positive_label) # the loss function combines logsoftmax and NLLloss, the input to this function is logits self.loss_function = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, inner_encoder: Seq2VecEncoder, outer_encoder: Seq2SeqEncoder, label_namespace: str = "labels", dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(HierarchicalRNN, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.inner_encoder = inner_encoder self.outer_encoder = outer_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.label_projection_layer = TimeDistributed( Linear(outer_encoder.get_output_dim(), self.num_tags)) # self.metrics = {"accuracy": FuckingAccuracy()} self.metrics = {"accuracy": CategoricalAccuracy()} self._loss = torch.nn.CrossEntropyLoss() check_dimensions_match(text_field_embedder.get_output_dim(), inner_encoder.get_input_dim(), 'text field embedding dim', 'inner encoder input dim') check_dimensions_match(inner_encoder.get_output_dim(), outer_encoder.get_input_dim(), 'inner encoder output dim', 'outer encoder input dim') initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels') -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param embedder: Used to embed the text. :param encoder: Encodes the sentence/text. E.g. LSTM :param feedforward: An optional feed forward layer to apply after the encoder :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. This is based on the LSTM model by `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_ ''' self.label_name = label_name self.embedder = embedder self.encoder = encoder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.feedforward = feedforward if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary( self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) check_dimensions_match(embedder.get_output_dim(), encoder.get_input_dim(), 'Embedding', 'Encoder') if self.feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), 'Encoder', 'FeedForward') initializer(self)
def __init__( self, vocab: Vocabulary, input_unit: Seq2VecEncoder, text_field_embedder: TextFieldEmbedder, # embedding_projection_dim: int = None, classifier_feedforward: FeedForward = None, max_step: int = 12, n_memories: int = 3, self_attention: bool = False, memory_gate: bool = False, dropout: int = 0.15, loss_weights=None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_classes = max(self.vocab.get_vocab_size("labels"), 2) self.text_field_embedder = text_field_embedder self.proj = nn.Linear(text_field_embedder.get_output_dim(), input_unit.get_input_dim()) self.input_unit = input_unit self.mac = MACCell( text_field_embedder.get_output_dim( ), # input_unit.get_output_dim(), max_step=max_step, n_memories=n_memories, self_attention=self_attention, memory_gate=memory_gate, dropout=dropout, save_attns=False, ) hidden_size = 2 * input_unit.get_output_dim() n_layers = 3 self.classifier = classifier_feedforward or FeedForward( input_dim=hidden_size, num_layers=n_layers, hidden_dims=(n_layers - 1) * [hidden_size] + [self.num_classes], activations=[ Activation.by_name("relu")(), Activation.by_name("relu")(), Activation.by_name("linear")() ], dropout=[dropout, dropout, 0.0]) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "weighted_f1": WeightedF1Measure(), "fbeta": FBetaMeasure(average='micro') } weights = loss_weights and torch.FloatTensor(loss_weights) self.loss = nn.CrossEntropyLoss(weight=weights) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, label_namespace: str = "logical_form", feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_tags = self.vocab.get_vocab_size("logical_form") self.encoder = encoder self.text_field_embedder = text_field_embedder self.BOW_embedder_question = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_description = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) # using crf as the estimator for sequential tags self.crf = ConditionalRandomField( self.num_tags, include_start_end_transitions=False ) self.softmax_layer = Softmax() self.ce_loss = CrossEntropyLoss() self.matched = 0 self.all_pred = 0 if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.question_pred_layer = Linear(4*output_dim, 3*self.num_tags) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. self.metrics = {} check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(4*encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TokenEmbedder, seq2vec_encoder: Seq2VecEncoder, wbrun: Any): super().__init__(vocab) self.embedder = embedder self.seq2vec_encoder = seq2vec_encoder num_labels = vocab.get_vocab_size("labels") log.debug(f"Labels: {num_labels}.") self.classifier = torch.nn.Linear(seq2vec_encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy() wbrun.watch(self.classifier, log=all) log.debug("Model init complete.")
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, regularizer_applicator: RegularizerApplicator = None): super().__init__(vocab, regularizer_applicator) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy() self.auc = Auc() self.reg_app = regularizer_applicator
def __init__(self, vocab: Vocabulary, encoder: Seq2VecEncoder, embedding: Embedding = None) -> None: super().__init__(vocab) # Dense embedding of source vocab tokens. self._embedding = embedding # Encodes the sequence of source embeddings into a sequence of hidden states. self._encoder = encoder self._sigmoid = Sigmoid() self._projection_layer = Linear(encoder.get_output_dim(), 1)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, clauses_encoder: Seq2VecEncoder, outer_encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, include_start_end_transitions: bool = True, dropout: float = None, loss_weights: Optional[List] = [], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(JCC, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.clauses_encoder = inner_encoder self.outer_encoder = outer_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.label_projection_layer = TimeDistributed( Linear(outer_encoder.get_output_dim(), self.num_tags)) labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions, ) self.metrics = {"accuracy": Accuracy()} check_dimensions_match( text_field_embedder.get_output_dim(), clauses_encoder.get_input_dim(), "text field embedding dim", "clauses encoder input dim", ) check_dimensions_match( clauses_encoder.get_output_dim(), outer_encoder.get_input_dim(), "clauses encoder output dim", "outer encoder input dim", ) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, startphrase_encoder: Seq2VecEncoder, ending_encoder: Seq2VecEncoder, similarity: SimilarityFunction, initializer: InitializerApplicator, regularizer: RegularizerApplicator = None) -> None: super().__init__(vocab, regularizer) # validate the configuration check_dimensions_match(text_field_embedder.get_output_dim(), startphrase_encoder.get_input_dim(), "text field embedding dim", "startphrase encoder input dim") check_dimensions_match(text_field_embedder.get_output_dim(), ending_encoder.get_input_dim(), "text field embedding dim", "ending encoder input dim") check_dimensions_match(startphrase_encoder.get_output_dim(), ending_encoder.get_output_dim(), "startphrase embedding dim", "ending embedding dim") # bind all attributes to the instance self.text_field_embedder = text_field_embedder self.startphrase_encoder = startphrase_encoder self.ending_encoder = ending_encoder self.similarity = similarity # set the training and validation losses self.xentropy = torch.nn.CrossEntropyLoss() self.accuracy = CategoricalAccuracy() # initialize all variables initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size("labels") self.hidden_layer = torch.nn.Sequential( torch.nn.Dropout(p=0.5), torch.nn.utils.weight_norm( torch.nn.Linear(encoder.get_output_dim(), 128)), torch.nn.LeakyReLU(inplace=True), ) self.output_layer = torch.nn.Linear(128, num_labels) self.accuracy = CategoricalAccuracy()
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder): super().__init__(vocab) self.embedder = embedder self.encoder = encoder print("num labels is") print(vocab.get_vocab_size("labels")) num_labels = vocab.get_vocab_size("labels") print("it is probably since we have a seq2seq in reality") self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy() self.auc = Auc()
def __init__(self, pooler: Seq2VecEncoder, knowledge_encoder: Seq2SeqEncoder = None): super().__init__() self.pooler = pooler pass_thru = PassThroughEncoder(pooler.get_input_dim()) self.knowledge_encoder = TimeDistributed( knowledge_encoder or pass_thru) # TimeDistributed(context_encoder) self.knowledge_attn = DotProductMatrixAttention( ) # CosineMatrixAttention() # self.attn = DotProductMatrixAttention() self.input_dim = pooler.get_input_dim() self.output_dim = pooler.get_output_dim()
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, regularizer_applicator: RegularizerApplicator = None): super().__init__(vocab, regularizer_applicator) self.embedder = embedder self.encoder = encoder num_labels = vocab.get_vocab_size( "labels" ) # the labels was constructed. i.e. even though we did not explicitly do anything to it, it knows how large it should be! logger.info("num labels is as follows: {}".format(num_labels) ) # why does it ned to know the labels converison however? self.classifier = torch.nn.Linear(encoder.get_output_dim(), num_labels) self.accuracy = CategoricalAccuracy() self.auc = Auc() self.reg_app = regularizer_applicator
def __init__(self, vocabulary: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder) -> None: super(TextClassifier, self).__init__(vocab=vocabulary) self.vocabulary = vocabulary self.embedder = embedder self.encoder = encoder self.num_classes = self.vocabulary.get_vocab_size("labels") self.feedforward = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.loss = torch.nn.CrossEntropyLoss()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward = None, loss_weights=None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder.train() hidden_size = encoder.get_output_dim() self.classifier_feedforward = classifier_feedforward or FeedForward( input_dim=hidden_size, num_layers=3, hidden_dims=[hidden_size, hidden_size, self.num_classes], activations=[ Activation.by_name("relu")(), Activation.by_name("relu")(), Activation.by_name("linear")() ], dropout=[0.2, 0.2, 0.0]) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "weighted_f1": WeightedF1Measure(), } args = { weight: torch.FloatTensor(loss_weights) } if loss_weights else {} self.loss = nn.CrossEntropyLoss(**args) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, question_encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.nb_classes = self.vocab.get_vocab_size("labels") self.question_encoder = question_encoder self.enc_dropout = torch.nn.Dropout(0.5) self.classifier_feedforward = Linear(question_encoder.get_output_dim(), self.nb_classes) self.ff_dropout = torch.nn.Dropout(0.5) self.metrics = { "accuracy": CategoricalAccuracy(), } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, model_text_field_embedder: TextFieldEmbedder, internal_text_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, use_batch_norm: bool = False, embedding_token_dropout: Optional[float] = None, embedding_dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._model_text_field_embedder = model_text_field_embedder self._num_classes = self.vocab.get_vocab_size("labels") self._internal_text_encoder = internal_text_encoder self._classifier_feedforward = classifier_feedforward self._embedding_token_dropout = nn.Dropout( embedding_token_dropout) if embedding_token_dropout else None self._embedding_dropout = nn.Dropout( embedding_dropout) if embedding_dropout else None self._batch_norm = nn.modules.BatchNorm1d( num_features=internal_text_encoder.get_output_dim( )) if use_batch_norm else None if model_text_field_embedder.get_output_dim( ) != internal_text_encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the model_text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format( model_text_field_embedder.get_output_dim(), internal_text_encoder.get_input_dim())) self._metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure( 1 ) # Assuming binary classification and we set to 1 suggestion which is what semeval task is about. } self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, pooler: Seq2VecEncoder, context_encoder: Seq2SeqEncoder = None, kb_path: str = None, kb_shape: Tuple[int, int] = None, trainable_kb: bool = False, projection_dim: int = None): super().__init__() kb = (torch.load(kb_path) if kb_path else torch.ones(kb_shape)).float() self.knowledge = nn.Parameter(kb, requires_grad=trainable_kb).float() self.projection_dim = projection_dim if projection_dim: self.kb_proj = nn.Linear(self.knowledge.size(0), self.projection_dim) self.context_encoder = context_encoder or PassThroughEncoder( pooler.get_input_dim()) self.pooler = pooler self.output_dim = pooler.get_output_dim()
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.encoder = encoder # input(vocab.print_statistics()) # Wow, this is dominated by the LabelField's default namespace, see label_field.py where label_namespace: str = 'labels', self.linear = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # input(self.linear.weight.shape) self.accuracy = CategoricalAccuracy() self.loss_function = torch.nn.CrossEntropyLoss() self.f1_measure = F1Measure(2) self.softmax = torch.nn.Softmax( dim=1) # softmax over the last output dimension output=Tensor(batch_size, label_size)
def __init__(self, vocab: Vocabulary, pivot_phrase_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, negative_sampling: bool = True, num_negative_examples: int = 10) -> None: super().__init__(vocab, regularizer) self.negative_sampling = negative_sampling self.num_negative_examples = num_negative_examples self.pivot_phrase_embedder = pivot_phrase_embedder self.vocab_size = self.vocab.get_vocab_size("words") self.encoder = encoder self._output_projection_layer = Linear(encoder.get_output_dim(), self.vocab_size) self._context_words_embedder = Embedding( self.vocab_size, pivot_phrase_embedder.get_output_dim()) initializer(self)