def test_wrong_params(average, mdmc_average, num_classes, inputs, ignore_index, top_k, threshold): preds, target = inputs.preds, inputs.target with pytest.raises(ValueError): acc = Accuracy( average=average, mdmc_average=mdmc_average, num_classes=num_classes, ignore_index=ignore_index, threshold=threshold, top_k=top_k ) acc(preds[0], target[0]) acc.compute() with pytest.raises(ValueError): accuracy( preds[0], target[0], average=average, mdmc_average=mdmc_average, num_classes=num_classes, ignore_index=ignore_index, threshold=threshold, top_k=top_k )
class IrisClassification(pl.LightningModule): def __init__(self, **kwargs): super().__init__() self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() self.args = kwargs self.fc1 = nn.Linear(4, 10) self.fc2 = nn.Linear(10, 10) self.fc3 = nn.Linear(10, 3) self.cross_entropy_loss = nn.CrossEntropyLoss() self.lr = kwargs.get("lr", 0.01) self.momentum = kwargs.get("momentum", 0.9) self.weight_decay = kwargs.get("weight_decay", 0.1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) return x def configure_optimizers(self): return torch.optim.SGD(self.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) def training_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) loss = self.cross_entropy_loss(logits, y) self.train_acc(torch.argmax(logits, dim=1), y) self.log("train_acc", self.train_acc.compute(), on_step=False, on_epoch=True) self.log("loss", loss) return {"loss": loss} def validation_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) loss = F.cross_entropy(logits, y) self.val_acc(torch.argmax(logits, dim=1), y) self.log("val_acc", self.val_acc.compute()) self.log("val_loss", loss, sync_dist=True) def test_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) loss = F.cross_entropy(logits, y) self.test_acc(torch.argmax(logits, dim=1), y) self.log("test_loss", loss) self.log("test_acc", self.test_acc.compute())
def test_wrong_params(top_k, threshold): preds, target = _input_mcls_prob.preds, _input_mcls_prob.target with pytest.raises(ValueError): acc = Accuracy(threshold=threshold, top_k=top_k) acc(preds, target) acc.compute() with pytest.raises(ValueError): accuracy(preds, target, threshold=threshold, top_k=top_k)
class ModelParallelClassificationModel(LightningModule): def __init__(self, lr: float = 0.01, num_blocks: int = 5): super().__init__() self.lr = lr self.num_blocks = num_blocks self.prepare_data_per_node = True self.train_acc = Accuracy() self.valid_acc = Accuracy() self.test_acc = Accuracy() def make_block(self): return nn.Sequential(nn.Linear(32, 32, bias=False), nn.ReLU()) def configure_sharded_model(self) -> None: self.model = nn.Sequential(*(self.make_block() for x in range(self.num_blocks)), nn.Linear(32, 3)) def forward(self, x): x = self.model(x) # Ensure output is in float32 for softmax operation x = x.float() logits = F.softmax(x, dim=1) return logits def training_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) loss = F.cross_entropy(logits, y) self.log("train_loss", loss, prog_bar=True) self.log("train_acc", self.train_acc(logits, y), prog_bar=True, sync_dist=True) return {"loss": loss} def validation_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) self.log("val_loss", F.cross_entropy(logits, y), prog_bar=False, sync_dist=True) self.log("val_acc", self.valid_acc(logits, y), prog_bar=True, sync_dist=True) def test_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) self.log("test_loss", F.cross_entropy(logits, y), prog_bar=False, sync_dist=True) self.log("test_acc", self.test_acc(logits, y), prog_bar=True, sync_dist=True) def predict_step(self, batch, batch_idx, dataloader_idx=None): x, y = batch logits = self.forward(x) self.test_acc(logits, y) return self.test_acc.compute() def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99) return [optimizer], [{"scheduler": lr_scheduler, "interval": "step"}] def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None: if not hasattr(self, "model"): self.configure_sharded_model()
def test_ignore_index(preds, target, ignore_index, exp_result, subset_accuracy): ignoreindex = Accuracy(ignore_index=ignore_index, subset_accuracy=subset_accuracy) for batch in range(preds.shape[0]): ignoreindex(preds[batch], target[batch]) assert ignoreindex.compute() == exp_result assert accuracy(preds, target, ignore_index=ignore_index, subset_accuracy=subset_accuracy) == exp_result
def test_topk_accuracy(preds, target, exp_result, k, subset_accuracy): topk = Accuracy(top_k=k, subset_accuracy=subset_accuracy) for batch in range(preds.shape[0]): topk(preds[batch], target[batch]) assert topk.compute() == exp_result # Test functional total_samples = target.shape[0] * target.shape[1] preds = preds.view(total_samples, 4, -1) target = target.view(total_samples, -1) assert accuracy(preds, target, top_k=k, subset_accuracy=subset_accuracy) == exp_result
def test_average_accuracy_bin(preds, target, num_classes, exp_result, average, multiclass): acc = Accuracy(num_classes=num_classes, average=average, multiclass=multiclass) for batch in range(preds.shape[0]): acc(preds[batch], target[batch]) assert (acc.compute() == tensor(exp_result)).all() # Test functional total_samples = target.shape[0] * target.shape[1] preds = preds.view(total_samples, -1) target = target.view(total_samples, -1) acc_score = accuracy(preds, target, num_classes=num_classes, average=average, multiclass=multiclass) assert (acc_score == tensor(exp_result)).all()
def test_same_input(average): preds = _input_miss_class.preds target = _input_miss_class.target preds_flat = torch.cat(list(preds), dim=0) target_flat = torch.cat(list(target), dim=0) mc = Accuracy(num_classes=NUM_CLASSES, average=average) for i in range(NUM_BATCHES): mc.update(preds[i], target[i]) class_res = mc.compute() func_res = accuracy(preds_flat, target_flat, num_classes=NUM_CLASSES, average=average) sk_res = sk_accuracy(target_flat, preds_flat) assert torch.allclose(class_res, torch.tensor(sk_res).float()) assert torch.allclose(func_res, torch.tensor(sk_res).float())
class LightningMNISTClassifier(pl.LightningModule): def __init__(self, **kwargs): """mlflow.start_run() Initializes the network """ super(LightningMNISTClassifier, self).__init__() self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() # mnist images are (1, 28, 28) (channels, width, height) self.layer_1 = torch.nn.Linear(28 * 28, 128) self.layer_2 = torch.nn.Linear(128, 256) self.layer_3 = torch.nn.Linear(256, 10) self.args = kwargs @staticmethod def add_model_specific_args(parent_parser): parser = ArgumentParser(parents=[parent_parser], add_help=False) parser.add_argument( "--lr", type=float, default=0.001, metavar="LR", help="learning rate (default: 0.001)", ) return parser def forward(self, x): """ :param x: Input data :return: output - mnist digit label for the input image """ batch_size = x.size()[0] # (b, 1, 28, 28) -> (b, 1*28*28) x = x.view(batch_size, -1) # layer 1 (b, 1*28*28) -> (b, 128) x = self.layer_1(x) x = torch.relu(x) # layer 2 (b, 128) -> (b, 256) x = self.layer_2(x) x = torch.relu(x) # layer 3 (b, 256) -> (b, 10) x = self.layer_3(x) # probability distribution over labels x = torch.log_softmax(x, dim=1) return x def cross_entropy_loss(self, logits, labels): """ Initializes the loss function :return: output - Initialized cross entropy loss function """ return F.nll_loss(logits, labels) def training_step(self, train_batch, batch_idx): """ Training the data as batches and returns training loss on each batch :param train_batch: Batch data :param batch_idx: Batch indices :return: output - Training loss """ x, y = train_batch logits = self.forward(x) loss = self.cross_entropy_loss(logits, y) _, y_hat = torch.max(logits, dim=1) self.train_acc(y_hat, y) self.log("train_acc", self.train_acc.compute()) self.log("train_loss", loss) return {"loss": loss} def validation_step(self, val_batch, batch_idx): """ Performs validation of data in batches :param val_batch: Batch data :param batch_idx: Batch indices :return: output - valid step loss """ x, y = val_batch logits = self.forward(x) loss = self.cross_entropy_loss(logits, y) _, y_hat = torch.max(logits, dim=1) self.val_acc(y_hat, y) self.log("val_acc", self.val_acc.compute()) self.log("val_loss", loss, sync_dist=True) def test_step(self, test_batch, batch_idx): """ Performs test and computes the accuracy of the model :param test_batch: Batch data :param batch_idx: Batch indices :return: output - Testing accuracy """ x, y = test_batch output = self.forward(x) _, y_hat = torch.max(output, dim=1) self.test_acc(y_hat, y) self.log("test_acc", self.test_acc.compute()) def prepare_data(self): """ Prepares the data for training and prediction """ return {} def configure_optimizers(self): """ Initializes the optimizer and learning rate scheduler :return: output - Initialized optimizer and scheduler """ optimizer = torch.optim.Adam(self.parameters(), lr=self.args["lr"]) scheduler = { "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="min", factor=0.2, patience=2, min_lr=1e-6, verbose=True, ), "monitor": "val_loss", } return [optimizer], [scheduler]
class Experiment(pl.LightningModule): def __init__(self, num_enc=6, optim_type="Adam", lr=1e-3, weight_decay=0.0): super().__init__() self.save_hyperparameters() self.lr = lr fmten_model = Fmten.load_from_checkpoint( "pretrain/fmten/epoch=585-step=659835.ckpt") self.atom_embedding = fmten_model.atom_embedding self.atomic_number_embedding = fmten_model.atomic_number_embedding self.mendeleev_number_embedding = fmten_model.mendeleev_number_embedding self.position_embedding = fmten_model.position_embedding self.lattice_embedding = fmten_model.lattice_embedding self.encoder = fmten_model.encoder self.readout = ff_output(input_dim=256, output_dim=230) self.accuracy = Accuracy() @staticmethod def Gassian_expand(value_list, min_value, max_value, intervals, expand_width): value_list = value_list.expand(-1, -1, intervals) centers = torch.linspace(min_value, max_value, intervals).type_as(value_list) result = torch.exp(-(value_list - centers)**2 / expand_width**2) return result def shared_procedure(self, batch): encoded_graph, _ = batch # atoms: (batch_size,max_atoms,59) atoms = encoded_graph["atoms"] # padding_mask: (batch_size, max_atoms) padding_mask = encoded_graph["padding_mask"] # (batch_size, max_atoms, 1) elecneg = encoded_graph["elecneg"] # (batch_size, max_atoms, 1) covrad = encoded_graph["covrad"] # (batch_size, max_atoms, 1) FIE = encoded_graph["FIE"] # (batch_size, max_atoms, 1) elecaffi = encoded_graph["elecaffi"] # (batch_size, max_atoms, 1) atmwht = encoded_graph["AM"] # (batch_size, max_atoms, 3) positions = encoded_graph["positions"] # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # (batch_size, max_atoms, 80) elecneg = self.Gassian_expand(elecneg, 0.5, 4.0, 80, 0.04) # (batch_size, max_atoms, 80) covrad = self.Gassian_expand(covrad, 50, 250, 80, 2.5) # (batch_size, max_atoms, 80) FIE = self.Gassian_expand(FIE, 3, 25, 80, 0.28) # (batch_size, max_atoms, 80) elecaffi = self.Gassian_expand(elecaffi, -3, 3.7, 80, 0.08) # (batch_size, max_atoms, 80) atmwht = self.Gassian_expand(atmwht, 0, 210, 80, 2.63) atoms = torch.cat((atoms, elecneg, covrad, FIE, elecaffi, atmwht), dim=2) # (batch_size, max_atoms, 459) atoms = self.atom_embedding(atoms) # (batch_size,max_atoms,atoms_info) positions = positions.unsqueeze(dim=3).expand(-1, -1, 3, 80) centers = torch.linspace(-15, 18, 80).type_as(positions) # (batch_size, max_atoms, 3, 80) positions = torch.exp(-(positions - centers)**2 / 0.41**2) # (batch_size, max_atoms, 240) positions = torch.flatten(positions, start_dim=2) # (batch_size,max_atoms,positions_info) positions = self.position_embedding(positions) atmnb = encoded_graph["AN"] # (batch_size, max_atoms) atomic_numbers = self.atomic_number_embedding(atmnb) mennb = encoded_graph["MN"] # (batch_size, max_atoms) mendeleev_numbers = self.mendeleev_number_embedding( mennb) # (batch_size, max_atoms, atoms_info) atoms = atoms+atomic_numbers+mendeleev_numbers + \ positions # (batch_size,max_atoms,atoms_info) lattice = encoded_graph["lattice"] # lattice: (batch_size, 9, 1) lattice = self.Gassian_expand(lattice, -15, 18, 80, 0.41) # (batch_size, 9, 80) lattice = torch.flatten(lattice, start_dim=1) # (batch_size,720) # lattice: (batch_size,1,1) cell_volume = torch.log(encoded_graph["CV"]) cell_volume = self.Gassian_expand(cell_volume, 3, 8, 80, 0.06) # (batch_size,1,80) cell_volume = torch.flatten(cell_volume, start_dim=1) # (batch_size, 80) lattice = torch.cat((lattice, cell_volume), dim=1) # (batch_size, 800) lattice = self.lattice_embedding(lattice) # (batch_size,lacttice_info) # (batch_size,1,lacttice_info) lattice = torch.unsqueeze(lattice, dim=1) # (batch_size,1+max_atoms,atoms_info) atoms = torch.cat((lattice, atoms), dim=1) # (1+max_atoms, batch_size, atoms_info) atoms = torch.transpose(atoms, dim0=0, dim1=1) batch_size = padding_mask.shape[0] cls_padding = torch.zeros( (batch_size, 1)).bool().type_as(padding_mask) # (batch_size, 1) # (batch_size, 1+max_atoms) padding_mask = torch.cat((cls_padding, padding_mask), dim=1) # (1+max_atoms, batch_size, atoms_info) atoms = self.encoder(src=atoms, src_key_padding_mask=padding_mask) system_out = atoms[0] # (batch_size,atoms_info) output_spectrum = self.readout(system_out) # (batch_size, raman_info) return output_spectrum def forward(self, batch): predicted_spectrum = self.shared_procedure(batch) return predicted_spectrum def training_step(self, batch, batch_idx): _, ramans = batch ramans = ramans.squeeze(dim=1) predicted_spectrum = self.shared_procedure(batch) loss = F.cross_entropy(predicted_spectrum, ramans) self.log("train_loss", loss, on_epoch=True, on_step=False) probability = F.softmax(predicted_spectrum, dim=1) self.accuracy(probability, ramans) return loss def on_train_epoch_end(self, outputs) -> None: self.log("train_acc", self.accuracy.compute()) def validation_step(self, batch, batch_idx): _, ramans = batch ramans = ramans.squeeze(dim=1) predicted_spectrum = self.shared_procedure(batch) loss = F.cross_entropy(predicted_spectrum, ramans) self.log("val_loss", loss, on_epoch=True, on_step=False, sync_dist=True) probability = F.softmax(predicted_spectrum, dim=1) self.accuracy(probability, ramans) return loss def on_validation_epoch_end(self) -> None: self.log("val_acc", self.accuracy.compute()) def configure_optimizers(self): if self.hparams.optim_type == "AdamW": optimizer = torch.optim.AdamW( self.parameters(), lr=self.lr, weight_decay=self.hparams.weight_decay) else: optimizer = torch.optim.Adam( self.parameters(), lr=self.lr, weight_decay=self.hparams.weight_decay) schedualer = CosineAnnealingWarmupRestarts(optimizer=optimizer, first_cycle_steps=200, max_lr=self.hparams.lr, min_lr=0, warmup_steps=30) return [optimizer], [schedualer]
class BertNewsClassifier(pl.LightningModule): #pylint: disable=too-many-ancestors,too-many-instance-attributes """Bert Model Class.""" def __init__(self, **kwargs): """Initializes the network, optimizer and scheduler.""" super(BertNewsClassifier, self).__init__() #pylint: disable=super-with-arguments self.pre_trained_model_name = "bert-base-uncased" #pylint: disable=invalid-name self.bert_model = BertModel.from_pretrained(self.pre_trained_model_name) for param in self.bert_model.parameters(): param.requires_grad = False self.drop = nn.Dropout(p=0.2) # assigning labels self.class_names = ["World", "Sports", "Business", "Sci/Tech"] n_classes = len(self.class_names) self.fc1 = nn.Linear(self.bert_model.config.hidden_size, 512) self.out = nn.Linear(512, n_classes) # self.bert_model.embedding = self.bert_model.embeddings # self.embedding = self.bert_model.embeddings self.scheduler = None self.optimizer = None self.args = kwargs self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() self.preds = [] self.target = [] def compute_bert_outputs( #pylint: disable=no-self-use self, model_bert, embedding_input, attention_mask=None, head_mask=None ): """Computes Bert Outputs. Args: model_bert : the bert model embedding_input : input for bert embeddings. attention_mask : attention mask head_mask : head mask Returns: output : the bert output """ if attention_mask is None: attention_mask = torch.ones( #pylint: disable=no-member embedding_input.shape[0], embedding_input.shape[1] ).to(embedding_input) extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) extended_attention_mask = extended_attention_mask.to( dtype=next(model_bert.parameters()).dtype ) # fp16 compatibility extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 if head_mask is not None: if head_mask.dim() == 1: head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze( -1 ).unsqueeze(-1) head_mask = head_mask.expand( model_bert.config.num_hidden_layers, -1, -1, -1, -1 ) elif head_mask.dim() == 2: head_mask = ( head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1) ) # We can specify head_mask for each layer head_mask = head_mask.to( dtype=next(model_bert.parameters()).dtype ) # switch to fload if need + fp16 compatibility else: head_mask = [None] * model_bert.config.num_hidden_layers encoder_outputs = model_bert.encoder( embedding_input, extended_attention_mask, head_mask=head_mask ) sequence_output = encoder_outputs[0] pooled_output = model_bert.pooler(sequence_output) outputs = ( sequence_output, pooled_output, ) + encoder_outputs[1:] return outputs def forward(self, input_ids, attention_mask=None): """ Forward function. Args: input_ids: Input data attention_maks: Attention mask value Returns: output - Type of news for the given news snippet """ embedding_input = self.bert_model.embeddings(input_ids) outputs = self.compute_bert_outputs( self.bert_model, embedding_input, attention_mask ) pooled_output = outputs[1] output = torch.tanh(self.fc1(pooled_output)) output = self.drop(output) output = self.out(output) return output def training_step(self, train_batch, batch_idx): """Training the data as batches and returns training loss on each batch. Args: train_batch Batch data batch_idx: Batch indices Returns: output - Training loss """ input_ids = train_batch["input_ids"].to(self.device) attention_mask = train_batch["attention_mask"].to(self.device) targets = train_batch["targets"].to(self.device) output = self.forward(input_ids, attention_mask) _, y_hat = torch.max(output, dim=1) #pylint: disable=no-member loss = F.cross_entropy(output, targets) self.train_acc(y_hat, targets) self.log("train_acc", self.train_acc.compute()) self.log("train_loss", loss) return {"loss": loss, "acc": self.train_acc.compute()} def test_step(self, test_batch, batch_idx): """Performs test and computes the accuracy of the model. Args: test_batch: Batch data batch_idx: Batch indices Returns: output - Testing accuracy """ input_ids = test_batch["input_ids"].to(self.device) attention_mask = test_batch["attention_mask"].to(self.device) targets = test_batch["targets"].to(self.device) output = self.forward(input_ids, attention_mask) _, y_hat = torch.max(output, dim=1) #pylint: disable=no-member test_acc = accuracy_score(y_hat.cpu(), targets.cpu()) self.test_acc(y_hat, targets) self.preds += y_hat.tolist() self.target += targets.tolist() self.log("test_acc", self.test_acc.compute()) return {"test_acc": torch.tensor(test_acc)} #pylint: disable=no-member def validation_step(self, val_batch, batch_idx): """Performs validation of data in batches. Args: val_batch: Batch data batch_idx: Batch indices Returns: output - valid step loss """ input_ids = val_batch["input_ids"].to(self.device) attention_mask = val_batch["attention_mask"].to(self.device) targets = val_batch["targets"].to(self.device) output = self.forward(input_ids, attention_mask) _, y_hat = torch.max(output, dim=1) #pylint: disable=no-member loss = F.cross_entropy(output, targets) self.val_acc(y_hat, targets) self.log("val_acc", self.val_acc.compute()) self.log("val_loss", loss, sync_dist=True) return {"val_step_loss": loss, "acc": self.val_acc.compute()} def configure_optimizers(self): """Initializes the optimizer and learning rate scheduler. Returns: output - Initialized optimizer and scheduler """ self.optimizer = AdamW(self.parameters(), lr=self.args.get("lr", 0.001)) self.scheduler = { "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode="min", factor=0.2, patience=2, min_lr=1e-6, verbose=True, ), "monitor": "val_loss", } return [self.optimizer], [self.scheduler]
class IrisClassification(pl.LightningModule): def __init__(self, **kwargs): super(IrisClassification, self).__init__() self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() self.args = kwargs self.fc1 = nn.Linear(4, 10) self.fc2 = nn.Linear(10, 10) self.fc3 = nn.Linear(10, 3) self.cross_entropy_loss = nn.CrossEntropyLoss() def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.relu(self.fc3(x)) return x @staticmethod def add_model_specific_args(parent_parser): """ Add model specific arguments like learning rate :param parent_parser: Application specific parser :return: Returns the augmented arugument parser """ parser = ArgumentParser(parents=[parent_parser], add_help=False) parser.add_argument( "--lr", type=float, default=0.01, metavar="LR", help="learning rate (default: 0.001)", ) return parser def configure_optimizers(self): return torch.optim.Adam(self.parameters(), self.args["lr"]) def training_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) _, y_hat = torch.max(logits, dim=1) loss = self.cross_entropy_loss(logits, y) self.train_acc(y_hat, y) self.log( "train_acc", self.train_acc.compute(), on_step=False, on_epoch=True, ) self.log("train_loss", loss) return {"loss": loss} def validation_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) _, y_hat = torch.max(logits, dim=1) loss = F.cross_entropy(logits, y) self.val_acc(y_hat, y) self.log("val_acc", self.val_acc.compute()) self.log("val_loss", loss, sync_dist=True) def test_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) _, y_hat = torch.max(logits, dim=1) self.test_acc(y_hat, y) self.log("test_acc", self.test_acc.compute())
class BertNewsClassifier(pl.LightningModule): def __init__(self, **kwargs): """ Initializes the network, optimizer and scheduler """ super(BertNewsClassifier, self).__init__() self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() self.PRE_TRAINED_MODEL_NAME = "bert-base-uncased" self.bert_model = BertModel.from_pretrained( self.PRE_TRAINED_MODEL_NAME) for param in self.bert_model.parameters(): param.requires_grad = False self.drop = nn.Dropout(p=0.2) # assigning labels self.class_names = ["world", "Sports", "Business", "Sci/Tech"] n_classes = len(self.class_names) self.fc1 = nn.Linear(self.bert_model.config.hidden_size, 512) self.out = nn.Linear(512, n_classes) self.args = kwargs def compute_bert_outputs(self, model_bert, embedding_input, attention_mask=None, head_mask=None): if attention_mask is None: attention_mask = torch.ones( embedding_input.shape[0], embedding_input.shape[1]).to(embedding_input) extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) extended_attention_mask = extended_attention_mask.to( dtype=next(model_bert.parameters()).dtype) # fp16 compatibility extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 if head_mask is not None: if head_mask.dim() == 1: head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze( -1).unsqueeze(-1) head_mask = head_mask.expand( model_bert.config.num_hidden_layers, -1, -1, -1, -1) elif head_mask.dim() == 2: head_mask = (head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1) ) # We can specify head_mask for each layer head_mask = head_mask.to(dtype=next(model_bert.parameters( )).dtype) # switch to fload if need + fp16 compatibility else: head_mask = [None] * model_bert.config.num_hidden_layers encoder_outputs = model_bert.encoder(embedding_input, extended_attention_mask, head_mask=head_mask) sequence_output = encoder_outputs[0] pooled_output = model_bert.pooler(sequence_output) outputs = ( sequence_output, pooled_output, ) + encoder_outputs[1:] return outputs def forward(self, input_ids, attention_mask=None): """ :param input_ids: Input data :param attention_maks: Attention mask value :return: output - Type of news for the given news snippet """ embedding_input = self.bert_model.embeddings(input_ids) outputs = self.compute_bert_outputs(self.bert_model, embedding_input, attention_mask) pooled_output = outputs[1] output = F.relu(self.fc1(pooled_output)) output = self.drop(output) output = self.out(output) return output @staticmethod def add_model_specific_args(parent_parser): """ Returns the review text and the targets of the specified item :param parent_parser: Application specific parser :return: Returns the augmented arugument parser """ parser = ArgumentParser(parents=[parent_parser], add_help=False) parser.add_argument( "--lr", type=float, default=0.001, metavar="LR", help="learning rate (default: 0.001)", ) return parser def training_step(self, train_batch, batch_idx): """ Training the data as batches and returns training loss on each batch :param train_batch Batch data :param batch_idx: Batch indices :return: output - Training loss """ input_ids = train_batch["input_ids"] attention_mask = train_batch["attention_mask"] targets = train_batch["targets"] output = self.forward(input_ids, attention_mask) _, y_hat = torch.max(output, dim=1) loss = F.cross_entropy(output, targets) self.train_acc(y_hat, targets) self.log("train_acc", self.train_acc.compute().cpu()) self.log("train_loss", loss.cpu()) return {"loss": loss} def test_step(self, test_batch, batch_idx): """ Performs test and computes the accuracy of the model :param test_batch: Batch data :param batch_idx: Batch indices :return: output - Testing accuracy """ input_ids = test_batch["input_ids"] targets = test_batch["targets"] attention_mask = test_batch["attention_mask"] output = self.forward(input_ids, attention_mask) _, y_hat = torch.max(output, dim=1) self.test_acc(y_hat, targets) self.log("test_acc", self.test_acc.compute().cpu()) def validation_step(self, val_batch, batch_idx): """ Performs validation of data in batches :param val_batch: Batch data :param batch_idx: Batch indices :return: output - valid step loss """ input_ids = val_batch["input_ids"] targets = val_batch["targets"] attention_mask = val_batch["attention_mask"] output = self.forward(input_ids, attention_mask) _, y_hat = torch.max(output, dim=1) loss = F.cross_entropy(output, targets) self.val_acc(y_hat, targets) self.log("val_acc", self.val_acc.compute().cpu()) self.log("val_loss", loss, sync_dist=True) def configure_optimizers(self): """ Initializes the optimizer and learning rate scheduler :return: output - Initialized optimizer and scheduler """ optimizer = AdamW(self.parameters(), lr=self.args["lr"]) scheduler = { "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="min", factor=0.2, patience=2, min_lr=1e-6, verbose=True, ), "monitor": "val_loss", } return [optimizer], [scheduler]
class CIFAR10Classifier(pl.LightningModule): # pylint: disable=too-many-ancestors,too-many-instance-attributes """Cifar10 model class.""" def __init__(self, **kwargs): """Initializes the network, optimizer and scheduler.""" super(CIFAR10Classifier, self).__init__() # pylint: disable=super-with-arguments self.model_conv = models.resnet50(pretrained=True) for param in self.model_conv.parameters(): param.requires_grad = False num_ftrs = self.model_conv.fc.in_features num_classes = 10 self.model_conv.fc = nn.Linear(num_ftrs, num_classes) self.scheduler = None self.optimizer = None self.args = kwargs self.train_acc = Accuracy() self.val_acc = Accuracy() self.test_acc = Accuracy() self.preds = [] self.target = [] def forward(self, x_var): """Forward function.""" out = self.model_conv(x_var) return out def training_step(self, train_batch, batch_idx): """Training Step Args: train_batch : training batch batch_idx : batch id number Returns: train accuracy """ if batch_idx == 0: self.reference_image = (train_batch[0][0]).unsqueeze(0) # pylint: disable=attribute-defined-outside-init # self.reference_image.resize((1,1,28,28)) print("\n\nREFERENCE IMAGE!!!") print(self.reference_image.shape) x_var, y_var = train_batch output = self.forward(x_var) _, y_hat = torch.max(output, dim=1) loss = F.cross_entropy(output, y_var) self.log("train_loss", loss) self.train_acc(y_hat, y_var) self.log("train_acc", self.train_acc.compute()) return {"loss": loss} def test_step(self, test_batch, batch_idx): """Testing step Args: test_batch : test batch data batch_idx : tests batch id Returns: test accuracy """ x_var, y_var = test_batch output = self.forward(x_var) _, y_hat = torch.max(output, dim=1) loss = F.cross_entropy(output, y_var) accelerator = self.args.get("accelerator", None) if accelerator is not None: self.log("test_loss", loss, sync_dist=True) else: self.log("test_loss", loss) self.test_acc(y_hat, y_var) self.preds += y_hat.tolist() self.target += y_var.tolist() self.log("test_acc", self.test_acc.compute()) return {"test_acc": self.test_acc.compute()} def validation_step(self, val_batch, batch_idx): """Testing step. Args: val_batch : val batch data batch_idx : val batch id Returns: validation accuracy """ x_var, y_var = val_batch output = self.forward(x_var) _, y_hat = torch.max(output, dim=1) loss = F.cross_entropy(output, y_var) accelerator = self.args.get("accelerator", None) if accelerator is not None: self.log("val_loss", loss, sync_dist=True) else: self.log("val_loss", loss) self.val_acc(y_hat, y_var) self.log("val_acc", self.val_acc.compute()) return {"val_step_loss": loss, "val_loss": loss} def configure_optimizers(self): """Initializes the optimizer and learning rate scheduler. Returns: output - Initialized optimizer and scheduler """ self.optimizer = torch.optim.Adam( self.parameters(), lr=self.args.get("lr", 0.001), weight_decay=self.args.get("weight_decay", 0), eps=self.args.get("eps", 1e-8), ) self.scheduler = { "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode="min", factor=0.2, patience=3, min_lr=1e-6, verbose=True, ), "monitor": "val_loss", } return [self.optimizer], [self.scheduler] def makegrid(self, output, numrows): # pylint: disable=no-self-use """Makes grids. Args: output : Tensor output numrows : num of rows. Returns: c_array : gird array """ outer = torch.Tensor.cpu(output).detach() plt.figure(figsize=(20, 5)) b_array = np.array([]).reshape(0, outer.shape[2]) c_array = np.array([]).reshape(numrows * outer.shape[2], 0) i = 0 j = 0 while i < outer.shape[1]: img = outer[0][i] b_array = np.concatenate((img, b_array), axis=0) j += 1 if j == numrows: c_array = np.concatenate((c_array, b_array), axis=1) b_array = np.array([]).reshape(0, outer.shape[2]) j = 0 i += 1 return c_array def show_activations(self, x_var): """Showns activation Args: x_var: x variable """ # logging reference image self.logger.experiment.add_image("input", torch.Tensor.cpu(x_var[0][0]), self.current_epoch, dataformats="HW") # logging layer 1 activations out = self.model_conv.conv1(x_var) c_grid = self.makegrid(out, 4) self.logger.experiment.add_image("layer 1", c_grid, self.current_epoch, dataformats="HW") def training_epoch_end(self, outputs): """Training epoch end. Args: outputs: outputs of train end """ self.show_activations(self.reference_image) # Logging graph if self.current_epoch == 0: sample_img = torch.rand((1, 3, 64, 64)) self.logger.experiment.add_graph(CIFAR10Classifier(), sample_img)