def __init__(self, *args, **kwargs) -> None: super(DocumentClassifier, self).__init__(*args, **kwargs) self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), "confusion_matrix": ConfusionMatrix(positive_label=1), }
def test_f1_measure_other_positive_label(self, device: str): f1_measure = F1Measure(positive_label=1) predictions = torch.tensor( [ [0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.5, 0.1, 0.2, 0.0], [0.1, 0.2, 0.1, 0.7, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], ], device=device, ) # [True Negative, False Positive, True Positive, # False Positive, True Negative, False Positive] targets = torch.tensor([0, 4, 1, 0, 3, 0], device=device) f1_measure(predictions, targets) metrics = f1_measure.get_metric() precision = metrics["precision"] recall = metrics["recall"] f1 = metrics["f1"] assert f1_measure._true_positives == 1.0 assert f1_measure._true_negatives == 2.0 assert f1_measure._false_positives == 3.0 assert f1_measure._false_negatives == 0.0 f1_measure.reset() # check value assert_allclose(precision, 0.25) assert_allclose(recall, 1.0) assert_allclose(f1, 0.4) # check type assert isinstance(precision, float) assert isinstance(recall, float) assert isinstance(f1, float)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2vec_encoder: Seq2VecEncoder, initializer: InitializerApplicator) -> None: super(BertModel, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.seq2vec_encoder = seq2vec_encoder self.num_types = self.vocab.get_vocab_size("state_change_type_labels") self.aggregate_feedforward = Linear(seq2vec_encoder.get_output_dim(), self.num_types) self._type_accuracy = CategoricalAccuracy() self.type_f1_metrics = {} self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary( "state_change_type_labels") for type_label in self.type_labels_vocab.values(): self.type_f1_metrics["type_" + type_label] = F1Measure( self.vocab.get_token_index(type_label, "state_change_type_labels")) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def test_f1_measure_works_for_sequences(self): f1_measure = F1Measure(positive_label=0) predictions = torch.Tensor([[[0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0]], [[0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0]]]) # [[True Positive, True Negative, True Negative], # [True Positive, True Negative, False Negative]] targets = torch.Tensor([[0, 3, 4], [0, 1, 0]]) f1_measure(predictions, targets) precision, recall, f1 = f1_measure.get_metric() assert f1_measure._true_positives == 2.0 assert f1_measure._true_negatives == 3.0 assert f1_measure._false_positives == 0.0 assert f1_measure._false_negatives == 1.0 f1_measure.reset() numpy.testing.assert_almost_equal(precision, 1.0) numpy.testing.assert_almost_equal(recall, 0.666666666) numpy.testing.assert_almost_equal(f1, 0.8) # Test the same thing with a mask: mask = torch.Tensor([[0, 1, 0], [1, 1, 1]]) f1_measure(predictions, targets, mask) precision, recall, f1 = f1_measure.get_metric() assert f1_measure._true_positives == 1.0 assert f1_measure._true_negatives == 2.0 assert f1_measure._false_positives == 0.0 assert f1_measure._false_negatives == 1.0 numpy.testing.assert_almost_equal(precision, 1.0) numpy.testing.assert_almost_equal(recall, 0.5) numpy.testing.assert_almost_equal(f1, 0.66666666666)
def __init__( self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', single_box: bool = False, softbox_temp: float = 10., margin: float = 0.0, number_of_negative_samples: int = 0, debug: bool = False, regularization_weight: float = 0, init_interval_center: float = 0.25, init_interval_delta: float = 0.1, # adversarial_negative: bool = False, # adv_neg_softmax_temp: float = 0.8 ) -> None: super().__init__(num_entities, num_relations, embedding_dim, box_type, single_box, softbox_temp, margin, number_of_negative_samples, debug, regularization_weight, init_interval_center, init_interval_delta) self.train_f1 = FBetaMeasure(average='micro') #self.valid_f1 = FBetaMeasure(average='micro') self.threshold_with_f1 = F1WithThreshold(flip_sign=True) self.istest = False self.test_threshold = None self.test_f1 = F1Measure(positive_label=1)
def __init__(self, vocab: Vocabulary, calculate_per_label_f1: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(WordConditionalMajoritySelectiveTagger, self).__init__(vocab, regularizer) self._num_classes = self.vocab.get_vocab_size("labels") self._total_label_counts: typing.Counter[str] = Counter() self._token_label_counts: Dict[str, typing.Counter[str]] = {} self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_per_label_f1 = calculate_per_label_f1 label_metric_name = "label_{}" if self.calculate_per_label_f1 else "_label_{}" for label_name, label_index in self.vocab._token_to_index[ "labels"].items(): self.metrics[label_metric_name.format(label_name)] = F1Measure( positive_label=label_index) # Whether to run in error analysis mode or not, see commands.error_analysis self.error_analysis = False initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, verbose_metrics: False, dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(TextClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.dropout = torch.nn.Dropout(dropout) self.num_classes = self.vocab.get_vocab_size("labels") self.classifier_feedforward = torch.nn.Linear( self.text_field_embedder.get_output_dim(), self.num_classes) self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} self.verbose_metrics = verbose_metrics for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index( index=i, namespace="labels")] = F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_encoder: Seq2SeqEncoder, qa_attention_module: MatrixAttention, text_encoder_qa_matching: Seq2VecEncoder, qa_matching_layer: FeedForward, qr_attention_module: Attention, text_encoder_ra_entailment: Seq2VecEncoder, ra_matching_layer: FeedForward, predict_layer: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(AnswerHelpfulPredictionModel, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.context_encoder = context_encoder self.qa_attention_module = qa_attention_module self.text_encoder_qa_matching = text_encoder_qa_matching self.qa_matching_layer = qa_matching_layer self.qr_attention_module = qr_attention_module self.text_encoder_ra_entailment = text_encoder_ra_entailment self.ra_matching_layer = ra_matching_layer self.predict_layer = predict_layer # performance scores are running values, reset the values every epoch self.f1_measure = F1Measure(positive_label=1) self.auc_score = Auc(positive_label=1) self.accuracy = CategoricalAccuracy() self.criterion = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, word_embeddings: TextFieldEmbedder, embedding_dropout: float, encoder: Seq2VecEncoder, encoder_dropout: float, out_dim: int, vocab: Vocabulary, positive_label: int = 4, verbose=True) -> None: super().__init__(vocab) # 将word id 转为vector representations self._word_embeddings = word_embeddings self._embedding_dropout = torch.nn.Dropout(embedding_dropout) self._encoder = encoder self._encoder_dropout = torch.nn.Dropout(encoder_dropout) # fc 层将上一层的维度转为输出的类别数 self._linear = torch.nn.Linear(in_features=encoder.get_output_dim(), out_features=out_dim) # 评价指标,分类准确率, F1 score self.accuracy = CategoricalAccuracy() self.f1_measure = F1Measure(positive_label) # 对于分类任务,交叉熵作为loss 函数 # 而pytorch中的CrossEntropyLoss内部包含了一个softmax 和log likelihood loss,因此不必显示定义softmax层 self.loss_function = torch.nn.CrossEntropyLoss() # self.loss_function = torch.nn.BCEWithLogitsLoss() self._verbose = verbose
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, node_embedder: TokenEmbedder, verbose_metrics: False, classifier_feedforward: FeedForward, use_node_vector: bool = True, use_abstract: bool = True, dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(AclClassifier, self).__init__(vocab, regularizer) self.node_embedder = node_embedder self.text_field_embedder = text_field_embedder self.use_node_vector = use_node_vector self.use_abstract = use_abstract self.dropout = torch.nn.Dropout(dropout) self.num_classes = self.vocab.get_vocab_size("labels") self.classifier_feedforward = classifier_feedforward self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} self.verbose_metrics = verbose_metrics for i in range(self.num_classes): label_name = vocab.get_token_from_index(index=i, namespace="labels") self.label_f1_metrics[label_name] = F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2seq_encoder: Seq2SeqEncoder, initializer: InitializerApplicator) -> None: super(ProLocalModel, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.seq2seq_encoder = seq2seq_encoder self.attention_layer = \ Attention(similarity_function=BilinearSimilarity(2 * seq2seq_encoder.get_output_dim(), seq2seq_encoder.get_output_dim()), normalize=True) self.num_types = self.vocab.get_vocab_size("state_change_type_labels") self.aggregate_feedforward = Linear(seq2seq_encoder.get_output_dim(), self.num_types) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="state_change_tags") # by default "O" is ignored in metric computation self.num_tags = self.vocab.get_vocab_size("state_change_tags") self.tag_projection_layer = TimeDistributed(Linear(self.seq2seq_encoder.get_output_dim() + 2 , self.num_tags)) self._type_accuracy = CategoricalAccuracy() self.type_f1_metrics = {} self.type_labels_vocab = self.vocab.get_index_to_token_vocabulary("state_change_type_labels") for type_label in self.type_labels_vocab.values(): self.type_f1_metrics["type_" + type_label] = F1Measure(self.vocab.get_token_index(type_label, "state_change_type_labels")) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, seq2seq_encoder: Seq2SeqEncoder, feedforward_encoder: Seq2SeqEncoder, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ): super(SimpleGeneratorModel, self).__init__(vocab, regularizer) self._vocabulary = vocab self._text_field_embedder = text_field_embedder self._seq2seq_encoder = seq2seq_encoder self._dropout = torch.nn.Dropout(p=dropout) self._feedforward_encoder = feedforward_encoder self._classifier_input_dim = feedforward_encoder.get_output_dim() self._classification_layer = torch.nn.Linear( self._classifier_input_dim, 1) self._rationale_f1_metric = F1Measure(positive_label=1) self._rationale_length = Average() self._rationale_supervision_loss = Average() initializer(self)
def __init__(self, args, word_embeddings: TextFieldEmbedder, vocab: Vocabulary) -> None: super().__init__(vocab) # parameters self.args = args self.word_embeddings = word_embeddings # gate self.W_z = nn.Linear(self.args.embedding_size, 1, bias=False) self.U_z = nn.Linear(self.args.embedding_size, 1, bias=False) self.W_r = nn.Linear(self.args.embedding_size, 1, bias=False) self.U_r = nn.Linear(self.args.embedding_size, 1, bias=False) self.W = nn.Linear(self.args.embedding_size, 1, bias=False) self.U = nn.Linear(self.args.embedding_size, 1, bias=False) # layers self.event_embedding = EventEmbedding(args, self.word_embeddings) self.attention = Attention(self.args.embedding_size, score_function='mlp') self.sigmoid = Sigmoid() self.tanh = Tanh() self.score = Score(self.args.embedding_size, self.args.embedding_size, threshold=self.args.threshold) # metrics self.accuracy = BooleanAccuracy() self.f1_score = F1Measure(positive_label=1) self.loss_function = BCELoss()
def __init__( self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', softbox_temp: float = 10., single_box: bool = False, margin: float = 0.0, vocab: Optional[None] = None, debug: bool = False # we don't need vocab but some api relies on its presence as an argument ) -> None: super().__init__() self.debug = debug self.num_entities = num_entities self.num_relations = num_relations self.embedding_dim = embedding_dim self.box_type = box_type self.create_embeddings_layer(num_entities, num_relations, embedding_dim, single_box) self.loss_f = torch.nn.MarginRankingLoss( # type: ignore margin=margin, reduction='mean') self.softbox_temp = softbox_temp self.margin = margin self.f1 = F1Measure(1)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, class_loss_weights: List[float], initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.classifier_feedforward = classifier_feedforward class_loss_weights = torch.Tensor(class_loss_weights) class_loss_weights = class_loss_weights / class_loss_weights.sum() self.loss = torch.nn.CrossEntropyLoss(weight=class_loss_weights) self.metric_overall_accuracy = CategoricalAccuracy() self.metric_class_accuracies = { c: F1Measure(positive_label=i) for i, c in enumerate(['unfunny', 'somewhat_funny', 'funny']) } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, output_feedforward: FeedForward, output_logit: FeedForward, dropout: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._encoder = encoder if dropout: self.dropout = torch.nn.Dropout(dropout) self.rnn_input_dropout = InputVariationalDropout(dropout) else: self.dropout = None self.rnn_input_dropout = None self._output_feedforward = output_feedforward self._output_logit = output_logit self._num_labels = vocab.get_vocab_size(namespace="labels") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self._f1 = F1Measure( positive_label=vocab._token_to_index["labels"]["1"]) self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, num_entities: int, num_relations: int, embedding_dim: int, box_type: str = 'SigmoidBoxTensor', single_box: bool = False, softbox_temp: float = 10., number_of_negative_samples: int = 0, debug: bool = False, regularization_weight: float = 0, init_interval_center: float = 0.25, init_interval_delta: float = 0.1, neg_samples_in_dataset_reader: int = 0) -> None: super().__init__( num_entities, num_relations, embedding_dim, box_type=box_type, single_box=single_box, softbox_temp=softbox_temp, number_of_negative_samples=number_of_negative_samples, debug=debug, regularization_weight=regularization_weight, init_interval_center=init_interval_center, init_interval_delta=init_interval_delta, neg_samples_in_dataset_reader=neg_samples_in_dataset_reader) self.train_f1 = FBetaMeasure(average='micro') # self.valid_f1 = FBetaMeasure(average='micro') self.threshold_with_f1 = F1WithThreshold(flip_sign=True) self.istest = False self.test_threshold = None # self.test_f1 = FBetaMeasure(average='macro') self.test_f1 = F1Measure(positive_label=1)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, elmo: Elmo = None, use_input_elmo: bool = False): super().__init__(vocab) self.elmo = elmo self.use_elmo = use_input_elmo self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.classifier_feed_forward = classifier_feedforward self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} # create F1 Measures for each class for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] = \ F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() self.attention = Attention(encoder.get_output_dim())
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, quote_response_encoder: Seq2VecEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, predict_mode: bool = False, ) -> None: super(SarcasmClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.quote_response_encoder = quote_response_encoder self.classifier_feedforward = classifier_feedforward self.label_acc_metrics = {"accuracy": CategoricalAccuracy()} self.label_f1_metrics = {} # for i in range(self.num_classes): # self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="label")] =\ # F1Measure(positive_label=i) for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] =\ F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() # self.attention_seq2seq = Attention(quote_response_encoder.get_output_dim()) self.predict_mode = predict_mode initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, embedding_dropout: float, seq2seq_encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), loss_weights: Optional[List] = [], regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(ICC, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self._embedding_dropout = nn.Dropout(embedding_dropout) self.num_classes = self.vocab.get_vocab_size("labels") self.seq2seq_encoder = seq2seq_encoder self.self_attentive_pooling_projection = nn.Linear( seq2seq_encoder.get_output_dim(), 1) self.classifier_feedforward = classifier_feedforward self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(positive_label=1), } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def test_f1_measure_accumulates_and_resets_correctly(self): f1_measure = F1Measure(positive_label=0) predictions = torch.Tensor([[0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.5, 0.1, 0.2, 0.0], [0.1, 0.2, 0.1, 0.7, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0]]) # [True Positive, True Negative, True Negative, # False Negative, True Negative, False Negative] targets = torch.Tensor([0, 4, 1, 0, 3, 0]) f1_measure(predictions, targets) f1_measure(predictions, targets) precision, recall, f1 = f1_measure.get_metric() assert f1_measure._true_positives == 2.0 assert f1_measure._true_negatives == 6.0 assert f1_measure._false_positives == 0.0 assert f1_measure._false_negatives == 4.0 f1_measure.reset() numpy.testing.assert_almost_equal(precision, 1.0) numpy.testing.assert_almost_equal(recall, 0.333333333) numpy.testing.assert_almost_equal(f1, 0.499999999) assert f1_measure._true_positives == 0.0 assert f1_measure._true_negatives == 0.0 assert f1_measure._false_positives == 0.0 assert f1_measure._false_negatives == 0.0
def test_f1_measure_other_positive_label(self): f1_measure = F1Measure(positive_label=1) predictions = torch.Tensor([ [0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.5, 0.1, 0.2, 0.0], [0.1, 0.2, 0.1, 0.7, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], ]) # [True Negative, False Positive, True Positive, # False Positive, True Negative, False Positive] targets = torch.Tensor([0, 4, 1, 0, 3, 0]) f1_measure(predictions, targets) precision, recall, f1 = f1_measure.get_metric() assert f1_measure._true_positives == 1.0 assert f1_measure._true_negatives == 2.0 assert f1_measure._false_positives == 3.0 assert f1_measure._false_negatives == 0.0 f1_measure.reset() # check value numpy.testing.assert_almost_equal(precision, 0.25) numpy.testing.assert_almost_equal(recall, 1.0) numpy.testing.assert_almost_equal(f1, 0.4) # check type assert isinstance(precision, float) assert isinstance(recall, float) assert isinstance(f1, float)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, use_sep: bool = True, with_crf: bool = False, self_attn: Seq2SeqEncoder = None, bert_dropout: float = 0.1, sci_sum: bool = False, additional_feature_size: int = 0, ) -> None: super(SeqClassificationModel, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.vocab = vocab self.use_sep = use_sep self.with_crf = with_crf self.sci_sum = sci_sum self.self_attn = self_attn self.additional_feature_size = additional_feature_size self.dropout = torch.nn.Dropout(p=bert_dropout) # define loss if self.sci_sum: self.loss = torch.nn.MSELoss( reduction='none') # labels are rouge scores self.labels_are_scores = True self.num_labels = 1 else: self.loss = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction='none') self.labels_are_scores = False self.num_labels = self.vocab.get_vocab_size(namespace='labels') # define accuracy metrics self.label_accuracy = CategoricalAccuracy() self.all_f1_metrics = FBetaMeasure(beta=1.0, average='micro') self.label_f1_metrics = {} # define F1 metrics per label for label_index in range(self.num_labels): label_name = self.vocab.get_token_from_index( namespace='labels', index=label_index) self.label_f1_metrics[label_name] = F1Measure(label_index) encoded_senetence_dim = text_field_embedder._token_embedders[ 'bert'].output_dim ff_in_dim = encoded_senetence_dim if self.use_sep else self_attn.get_output_dim( ) ff_in_dim += self.additional_feature_size self.time_distributed_aggregate_feedforward = TimeDistributed( Linear(ff_in_dim, self.num_labels)) if self.with_crf: self.crf = ConditionalRandomField( self.num_labels, constraints=None, include_start_end_transitions=True)
def __init__(self, args, word_embeddings: TextFieldEmbedder, vocab: Vocabulary, domain_info: bool = True) -> None: super().__init__(vocab) # parameters self.args = args self.word_embeddings = word_embeddings self.domain = domain_info # layers self.event_embedding = EventEmbedding(args, self.word_embeddings) self.event_type_embedding = EventTypeEmbedding(args, self.word_embeddings) self.lstm = LSTM(input_size=self.args.embedding_size, hidden_size=self.args.hidden_size) self.linear = Linear(self.args.hidden_size, self.args.embedding_size) self.W_c = Linear(self.args.embedding_size, self.args.hidden_size, bias=False) self.W_e = Linear(self.args.hidden_size, self.args.hidden_size, bias=False) self.relu = ReLU() self.score = Score(self.args.embedding_size, self.args.embedding_size, threshold=self.args.threshold) # metrics self.accuracy = BooleanAccuracy() self.f1_score = F1Measure(positive_label=1) self.loss_function = BCELoss()
def test_f1_measure_accumulates_and_resets_correctly(self, device: str): f1_measure = F1Measure(positive_label=0) predictions = torch.tensor( [ [0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.5, 0.1, 0.2, 0.0], [0.1, 0.2, 0.1, 0.7, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0], ], device=device, ) # [True Positive, True Negative, True Negative, # False Negative, True Negative, False Negative] targets = torch.tensor([0, 4, 1, 0, 3, 0], device=device) f1_measure(predictions, targets) f1_measure(predictions, targets) metrics = f1_measure.get_metric() precision = metrics["precision"] recall = metrics["recall"] f1 = metrics["f1"] assert f1_measure._true_positives == 2.0 assert f1_measure._true_negatives == 6.0 assert f1_measure._false_positives == 0.0 assert f1_measure._false_negatives == 4.0 f1_measure.reset() assert_allclose(precision, 1.0) assert_allclose(recall, 0.333333333) assert_allclose(f1, 0.499999999) assert f1_measure._true_positives == 0.0 assert f1_measure._true_negatives == 0.0 assert f1_measure._false_positives == 0.0 assert f1_measure._false_negatives == 0.0
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, feedforward: Optional[FeedForward] = None, include_start_end_transitions: bool = True, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = 'labels' self.num_tags = self.vocab.get_vocab_size(self.label_namespace) # encode text self.text_field_embedder = text_field_embedder self.encoder = encoder self.dropout = torch.nn.Dropout(dropout) if dropout else None self.feedforward = feedforward # crf output_dim = self.encoder.get_output_dim() if feedforward is None else feedforward.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) self.crf = ConditionalRandomField(self.num_tags, constraints=None, include_start_end_transitions=include_start_end_transitions) initializer(self) self.metrics = {} # Add F1 score for individual labels to metrics for index, label in self.vocab.get_index_to_token_vocabulary(self.label_namespace).items(): self.metrics[label] = F1Measure(positive_label=index)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), class_weights: List[float] = (1.0, 1.0), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self.classifier_feedforward = classifier_feedforward self.num_classes = self.vocab.get_vocab_size('labels') assert self.num_classes == classifier_feedforward.get_output_dim() # if classifier_feedforward.get_input_dim() != 768: # raise ConfigurationError(F"The input dimension of the classifier_feedforward, " # F"found {classifier_feedforward.get_input_dim()}, must match the " # F" output dimension of the bert embeder, {768}") index = 0 if self.num_classes == 2: index = self.vocab.get_token_index("正类", "labels") self.metrics = { "accuracy": CategoricalAccuracy(), "f1": F1Measure(index) } # weights = torch.Tensor(class_weights) self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, args, word_embeddings: TextFieldEmbedder, vocab: Vocabulary) -> None: super().__init__(vocab) # parameters self.args = args self.word_embeddings = word_embeddings # layers self.event_embedding = EventEmbedding(self.args, self.word_embeddings) self.lstm = DynamicLSTM(self.args.embedding_size * 2, self.args.hidden_size, num_layers=1, batch_first=True) self.attention = NoQueryAttention(self.args.hidden_size + self.args.embedding_size * 2, score_function='bi_linear') self.score = Score(self.args.hidden_size, self.args.embedding_size, threshold=self.args.threshold) # metrics self.accuracy = BooleanAccuracy() self.f1_score = F1Measure(positive_label=1) self.loss_function = BCELoss()
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, vocab: Vocabulary, positive_label: int = 4) -> None: super().__init__(vocab) # We need the embeddings to convert word IDs to their vector representations self.word_embeddings = word_embeddings self.encoder = encoder # After converting a sequence of vectors to a single vector, we feed it into # a fully-connected linear layer to reduce the dimension to the total number of labels. self.linear = torch.nn.Linear( in_features=encoder.get_output_dim(), out_features=vocab.get_vocab_size('labels')) # Monitor the metrics - we use accuracy, as well as prec, rec, f1 for 4 (very positive) self.accuracy = CategoricalAccuracy() self.f1_measure = F1Measure(positive_label) # We use the cross entropy loss because this is a classification task. # Note that PyTorch's CrossEntropyLoss combines softmax and log likelihood loss, # which makes it unnecessary to add a separate softmax layer. self.loss_function = torch.nn.CrossEntropyLoss()
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, text_encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, verbose_metrics: False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(TextClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.text_encoder = text_encoder self.classifier_feedforward = classifier_feedforward self.prediction_layer = torch.nn.Linear( self.classifier_feedforward.get_output_dim(), self.num_classes) self.label_accuracy = CategoricalAccuracy() self.label_f1_metrics = {} self.verbose_metrics = verbose_metrics for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index( index=i, namespace="labels")] = F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() self.pool = lambda text, mask: util.get_final_encoder_states( text, mask, bidirectional=True) initializer(self)