Esempio n. 1
0
    def __init__(self, hparams):
        super(MaskRefineModel, self).__init__()
        self.save_hyperparameters()

        # Hyperparameters
        self.hparams = hparams
        self.lr = hparams.lr

        # Modules
        self.net = load_network(hparams)
        self.criterion = TverskyLoss(hparams.tversky_alpha,
                                     hparams.tversky_beta)

        # Metrics
        self.train_metrics = MetricCollection([Precision(), Recall(), F1(2)])
        self.val_metrics = MetricCollection([Precision(), Recall(), F1(2)])
Esempio n. 2
0
    def __init__(self,MODEL,TRAIN_DATA,TRAIN_CODES,DEV_DATA,DEV_CODES,TEST_DATA,TEST_CODES,HIDDEN_UNIT1,BATCH_SIZE,LR,EPS,EPOCHS,FREEZE_BERT=False):

        super(CorefClassifier, self).__init__()  
        #self.save_hyperparameters()
        
        self.BEST_THRESHOLD = 0

        self.train_data  = TRAIN_DATA
        self.train_codes = TRAIN_CODES

        self.dev_data   = DEV_DATA
        self.dev_codes  = DEV_CODES

        self.test_data  = TEST_DATA
        self.test_codes = TEST_CODES

        self.model = AutoModel.from_pretrained(MODEL)
        self.hidden_unit1 = HIDDEN_UNIT1
        
        if self.hidden_unit1:
            self.hidden_layer1 = nn.Linear(768, self.hidden_unit1)
            self.hidden_layer2 = nn.Linear(self.hidden_unit1, 1)
        else:
            self.hidden_layer1 = nn.Linear(768, 1)

        self.lossfn = nn.BCELoss()
        self.batch_size = BATCH_SIZE
        self.lr  = LR
        self.eps = EPS
        self.epochs = EPOCHS
        
        if FREEZE_BERT:
            for param in self.model.parameters():
                param.requires_grad = False
        
        #Metrics
        self.valid_metrics = MetricCollection([Accuracy(),
                                               Precision(num_classes=1, average='macro'),
                                               Recall(num_classes=1, average='macro'),
                                               F1(num_classes=1, average='macro')
                                              ])
        
        self.test_metrics = MetricCollection([Accuracy(),
                                               Precision(num_classes=1, average='macro'),
                                               Recall(num_classes=1, average='macro'),
                                               F1(num_classes=1, average='macro')
                                              ])
Esempio n. 3
0
def test_v1_5_metrics_collection():
    target = torch.tensor([0, 2, 0, 2, 0, 1, 0, 2])
    preds = torch.tensor([2, 1, 2, 0, 1, 2, 2, 2])

    MetricCollection.__init__._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0."):
        metrics = MetricCollection([Accuracy()])
    assert metrics(preds, target) == {'Accuracy': torch.tensor(0.1250)}
Esempio n. 4
0
    def __init__(self, hparams):
        super(EdgeCompleteModel, self).__init__()
        self.save_hyperparameters()

        # Hyperparameters
        self.hparams = hparams
        self.lr = hparams.lr

        # Modules
        self.mask_refine_net = MaskRefineNet(hparams.mask_refine_weights)
        self.mask_refine_net.freeze()
        self.net = load_network(hparams)
        self.tv_loss = TverskyLoss(hparams.tversky_alpha, hparams.tversky_beta)

        # Metrics
        self.train_metrics = MetricCollection([Precision(), Recall(), F1(2)])
        self.val_metrics = MetricCollection([Precision(), Recall(), F1(2)])
def test_v1_5_0_metrics_collection():
    target = torch.tensor([0, 2, 0, 2, 0, 1, 0, 2])
    preds = torch.tensor([2, 1, 2, 0, 1, 2, 2, 2])
    with pytest.deprecated_call(
            match="The `MetricCollection` was deprecated since v1.3.0 in favor"
            " of `torchmetrics.collections.MetricCollection`. It will be removed in v1.5.0"
    ):
        metrics = MetricCollection([Accuracy()])
    assert metrics(preds, target) == {'Accuracy': torch.Tensor([0.1250])[0]}
Esempio n. 6
0
def make_metric(metric):
    if metric is None:
        return None
    if isinstance(metric, list):
        m = list(map(instantiate, metric))
    elif isinstance(metric, dict):
        m = {name: instantiate(value) for name, value in metric.items()}
    else:
        m = [instantiate(metric)]
    return MetricCollection(metrics=m)
def test_v1_5_metrics_collection():
    target = torch.tensor([0, 2, 0, 2, 0, 1, 0, 2])
    preds = torch.tensor([2, 1, 2, 0, 1, 2, 2, 2])

    MetricCollection.__init__.warned = False
    with pytest.deprecated_call(
        match="`pytorch_lightning.metrics.metric.MetricCollection` was deprecated since v1.3.0 in favor"
        " of `torchmetrics.collections.MetricCollection`. It will be removed in v1.5.0."
    ):
        metrics = MetricCollection([Accuracy()])
    assert metrics(preds, target) == {'Accuracy': torch.tensor(0.1250)}
    class TestModel(BoringModel):
        def __init__(self):
            super().__init__()
            self.metric = MetricCollection([SumMetric(), DiffMetric()])
            self.sum = 0.0
            self.diff = 0.0

        def training_step(self, batch, batch_idx):
            x = batch
            metric_vals = self.metric(x.sum())
            self.sum += x.sum()
            self.diff -= x.sum()
            self.log_dict({f"{k}_step": v for k, v in metric_vals.items()})
            return self.step(x)

        def training_epoch_end(self, outputs):
            metric_vals = self.metric.compute()
            self.log_dict({f"{k}_epoch": v for k, v in metric_vals.items()})
Esempio n. 9
0
    def __init__(self, dm, cfg):
        """
        A model that predicts the name of a variable using its usages.

        :param dm: datamodule which contains train, val, test datasets,
        :param cfg: config that contains all needed parameters.
        """
        super(IdTransformerModel, self).__init__()

        self.dm = dm

        self.max_sequence_length = cfg.max_sequence_length
        self.max_num_usages = cfg.max_num_usages
        self.max_target_length = cfg.max_target_length
        self.embedding_dim = cfg.embedding_dim
        self.usage_embedding_dim = cfg.usage_embedding_dim
        self.target_embedding_dim = cfg.target_embedding_dim
        self.num_heads = cfg.num_heads
        self.num_encoder_layers = cfg.num_encoder_layers
        self.num_decoder_layers = cfg.num_decoder_layers
        self.num_usage_layers = cfg.num_usage_layers
        self.dropout = cfg.dropout
        self.sequence_reducer = cfg.sequence_reducer
        self.sequence_encoder_type = cfg.sequence_encoder_type

        self.batch_size = cfg.batch_size
        self.max_lr = cfg.max_lr

        self.encoder = IdEncoder(
            self.max_sequence_length, self.max_num_usages,
            self.dm.usage_vocab_size, self.dm.usage_pad_idx,
            self.embedding_dim, self.num_heads, self.usage_embedding_dim,
            self.num_encoder_layers, self.num_usage_layers, self.dropout,
            self.sequence_encoder_type, self.sequence_reducer)
        self.decoder = IdDecoder(self.max_num_usages, self.max_target_length,
                                 self.dm.target_vocab_size,
                                 self.dm.target_pad_idx,
                                 self.usage_embedding_dim, self.num_heads,
                                 self.target_embedding_dim,
                                 self.num_decoder_layers, self.dropout)

        self.tusk = "generation" if cfg.target_tokenizer_type is not None else "classification"
        self.train_metrics = MetricCollection(self.metrics_dict("train"))
        self.val_metrics = MetricCollection(self.metrics_dict("val"))
        self.test_metrics = MetricCollection(self.metrics_dict("test"))

        self.loss = nn.CrossEntropyLoss()
Esempio n. 10
0
    def __init__(self, hparams):
        super(TextRemovalModel, self).__init__()
        self.save_hyperparameters()

        # Hyperparameters
        self.hparams = hparams
        self.lr = hparams.lr

        # Networks
        self.mask_refine_net = MaskRefineNet(hparams.mask_refine_weights)
        self.mask_refine_net.freeze()
        self.edge_complete_net = EdgeCompleteNet(hparams.edge_complete_weights)
        self.edge_complete_net.freeze()
        self.net = load_network(hparams)

        # Losses
        self.l1_loss = nn.L1Loss()
        self.gradient_loss = GradientLoss(type=hparams.gradient_loss_type)
        self.perceptual_loss = PerceptualLoss()

        # Metrics
        self.train_psnr = PSNR()
        self.val_metrics = MetricCollection([PSNR(), MeanAbsoluteError()])
Esempio n. 11
0
 def __init__(self):
     super().__init__()
     self.metric = MetricCollection([SumMetric(), DiffMetric()])
     self.sum = 0.0
     self.diff = 0.0
Esempio n. 12
0
class IdTransformerModel(pl.LightningModule):
    def __init__(self, dm, cfg):
        """
        A model that predicts the name of a variable using its usages.

        :param dm: datamodule which contains train, val, test datasets,
        :param cfg: config that contains all needed parameters.
        """
        super(IdTransformerModel, self).__init__()

        self.dm = dm

        self.max_sequence_length = cfg.max_sequence_length
        self.max_num_usages = cfg.max_num_usages
        self.max_target_length = cfg.max_target_length
        self.embedding_dim = cfg.embedding_dim
        self.usage_embedding_dim = cfg.usage_embedding_dim
        self.target_embedding_dim = cfg.target_embedding_dim
        self.num_heads = cfg.num_heads
        self.num_encoder_layers = cfg.num_encoder_layers
        self.num_decoder_layers = cfg.num_decoder_layers
        self.num_usage_layers = cfg.num_usage_layers
        self.dropout = cfg.dropout
        self.sequence_reducer = cfg.sequence_reducer
        self.sequence_encoder_type = cfg.sequence_encoder_type

        self.batch_size = cfg.batch_size
        self.max_lr = cfg.max_lr

        self.encoder = IdEncoder(
            self.max_sequence_length, self.max_num_usages,
            self.dm.usage_vocab_size, self.dm.usage_pad_idx,
            self.embedding_dim, self.num_heads, self.usage_embedding_dim,
            self.num_encoder_layers, self.num_usage_layers, self.dropout,
            self.sequence_encoder_type, self.sequence_reducer)
        self.decoder = IdDecoder(self.max_num_usages, self.max_target_length,
                                 self.dm.target_vocab_size,
                                 self.dm.target_pad_idx,
                                 self.usage_embedding_dim, self.num_heads,
                                 self.target_embedding_dim,
                                 self.num_decoder_layers, self.dropout)

        self.tusk = "generation" if cfg.target_tokenizer_type is not None else "classification"
        self.train_metrics = MetricCollection(self.metrics_dict("train"))
        self.val_metrics = MetricCollection(self.metrics_dict("val"))
        self.test_metrics = MetricCollection(self.metrics_dict("test"))

        self.loss = nn.CrossEntropyLoss()

    def metrics_dict(self, prefix="train"):
        if self.tusk == "classification":
            return {
                f"{prefix}_top1": Top(n=1),
                f"{prefix}_top5": Top(n=5),
                f"{prefix}_MRR": MRR()
            }
        elif self.tusk == "generation":
            ignore_idxs = (self.dm.target_eos_idx, self.dm.target_pad_idx)
            return {
                f"{prefix}_accuracy": Accuracy(),
                f"{prefix}_precision": Precision(ignore_idxs),
                f"{prefix}_recall": Recall(ignore_idxs),
                f"{prefix}_F1": F1(ignore_idxs)
            }
        else:
            return ValueError(f"{self.tusk} tusk is not supported")

    def forward(self,
                usages,
                num_usages=None,
                beam_search=True,
                limit_num_usages=10,
                **kwargs):
        inp = usages[:limit_num_usages]
        with torch.no_grad():
            memory = self.encoder(inp.to(self.device), num_usages)
            if beam_search:
                out = self.decoder.beam_search(memory, self.max_target_length,
                                               self.dm.target_init_idx,
                                               self.dm.target_eos_idx,
                                               self.dm.target_pad_idx,
                                               **kwargs)
            else:
                out = self.decoder.greedy_search(memory,
                                                 self.max_target_length,
                                                 self.dm.target_init_idx,
                                                 self.dm.target_eos_idx)
        return out

    def training_step(self, batch, batch_idx):
        loss, predictions, targets = self.step(batch, batch_idx)
        self.log('train_loss',
                 loss,
                 prog_bar=True,
                 on_step=True,
                 on_epoch=True)
        self.train_metrics.update(predictions, targets)
        self.log_dict(self.train_metrics,
                      on_step=False,
                      on_epoch=True,
                      prog_bar=False)
        return loss

    def validation_step(self, batch, batch_idx):
        loss, predictions, targets = self.step(batch, batch_idx)
        self.log('val_loss', loss, prog_bar=True, on_step=True, on_epoch=True)
        self.val_metrics.update(predictions, targets)
        self.log_dict(self.val_metrics,
                      on_step=False,
                      on_epoch=True,
                      prog_bar=False)
        return loss

    def test_step(self, batch, batch_idx):
        loss, predictions, targets = self.step(batch, batch_idx)
        self.log('test_loss', loss, prog_bar=True, on_step=True, on_epoch=True)
        self.test_metrics.update(predictions, targets)
        self.log_dict(self.test_metrics,
                      on_step=False,
                      on_epoch=True,
                      prog_bar=False)

    def step(self, batch, batch_idx):
        src, tgt = batch
        x, num_usages, _ = src  # BxUxL, B
        tgt, tgt_length = tgt  # TxB, B
        memory = self.encoder(x, num_usages)
        out = self.decoder(memory, tgt, num_usages, tgt_length)
        tgt_to_loss = torch.constant_pad_nd(tgt, (0, 0, 0, 1),
                                            self.dm.target_pad_idx)[1:,
                                                                    ...]  # TxB
        # [[<s>], [<token>], [</s>], [<pad>]]] -> [[<token>], [</s>], [<pad>], [<pad>]]
        out_to_loss = out.transpose(1, 2)  # TxBxV -> TxVxB
        return self.loss(out_to_loss, tgt_to_loss), out, tgt  # 1, TxBxV, TxB

    def configure_optimizers(self):
        def schedule(step, warmup_steps=len(self.dm.trainloader)):
            x = (step + 1) / warmup_steps
            return min(1, x**(-0.5))

        optimizer = torch.optim.Adam(self.parameters(), lr=self.max_lr)
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, schedule)
        return {
            "optimizer": optimizer,
            'scheduler': scheduler,
            'interval': 'step'
        }