Ejemplo n.º 1
0
def trainer():
    return train.Trainer(dataset=DummyDataset(),
                         train_sampler=BaseSampler(),
                         val_sampler=BaseSampler(),
                         model=DummyModel(),
                         loss_fn=NLLLoss(),
                         metric_fn=NLLLoss(),
                         optimizer=Adam,
                         extra_validation_metrics=[NLLLoss()] * 3)
Ejemplo n.º 2
0
    def __init__(self, name: Optional[str] = None):
        """
        Constructor.

        Args:
            name: Name of the module (DEFAULT: None)
        """
        # Call the base constructors.
        # Serialization.__init__(self, name=name)
        torch_NLLLoss.__init__(self)
Ejemplo n.º 3
0
    def fit_model(self, train_csv, valid_csv, test_csv, save_path=None, save_model='dialog_retrieval_model.pth',
                  save_final_model=False):
        train_loader, valid_loader, test_loader = loading_retrieval_data(train_csv, valid_csv, test_csv,
                                                                         max_len=self.seq_len,
                                                                         word_sequence=self.word_sequence,
                                                                         batch_size=self.batch_size)

        model = SANNetwork(vocab_size=len(self.word_sequence.word_dict),
                           embed_size=self.embed_size,
                           rnn_hidden_size=self.rnn_hidden_size,
                           rnn_model=self.rnn_model,
                           output_size=self.output_size,
                           use_bidirectional=self.use_bidirectional,
                           dropout=self.drop_out)

        summary(model)

        self.model = model

        optimizer = Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        criterion = NLLLoss()

        self.model.fit(train_loader=train_loader,
                       valid_loader=valid_loader,
                       optimizer=optimizer,
                       criterion=criterion,
                       device=self.device,
                       epochs=self.epochs,
                       save_dir=save_path,
                       model_file=save_model,
                       save_final_model=save_final_model)

        self.model.test(test_loader=test_loader, criterion=criterion, device=self.device)
Ejemplo n.º 4
0
    def __init__(self, args, encoder, decoder, vocab):
        self.encoder = encoder
        self.decoder = decoder
        self.enc_optimizer = self.add_optimizers(self.encoder, args)
        self.dec_optimizer = self.add_optimizers(self.decoder, args)

        self.vocab = vocab
        self.create_embeddings()
        self.start_token = vocab.word_to_ind['<s>']
        self.end_token = vocab.word_to_ind['</s>']
        # self.evaluator = evaluator

        self.train_data = DialogueBatcher(vocab, "train")
        self.val_data = DialogueBatcher(vocab, "valid")
        # self.test_data = DialogueBatcher(vocab, "test")

        self.summary_dir = args.summary_dir
        self.verbose = args.verbose
        self.criterion = NLLLoss()
        self.teach_ratio = args.teacher_forcing_ratio
        self.grad_clip = args.grad_clip

        self.train_iterations = self.train_data.num_per_epoch * args.min_epochs
        self.val_iterations = self.train_data.num_per_epoch * args.min_epochs
        self.print_every = args.print_every
        self.val_every = args.val_every
Ejemplo n.º 5
0
    def __init__(self, in_dim, channels, kernel_size, layers, filters,
                 dist_size, masked_conv_class):
        super().__init__()
        self.in_dim = in_dim
        self.channels = channels
        self.kernel_size = kernel_size
        self.filters = filters
        self.layers = layers
        self.dist_size = dist_size
        self.mconv = masked_conv_class
        p = int((self.kernel_size - 1) / 2)

        self.net = ModuleList()
        self.net.append(
            self.mconv('A', self.channels, self.filters, self.kernel_size, p))
        self.net.append(ReLU())
        for _ in range(self.layers - 1):
            self.net.append(
                ResBlock('B', self.filters, self.filters, self.kernel_size,
                         self.mconv))
        self.net.append(self.mconv('B', self.filters, self.filters, 1, 0))
        self.net.append(ReLU())
        self.net.append(
            self.mconv('B', self.filters, self.dist_size * self.channels, 1,
                       0))

        self.log_softmax = LogSoftmax(dim=2)
        self.loss = NLLLoss(reduction='sum')
        print(self)
Ejemplo n.º 6
0
def hyper_parameters_grid_search(base_folder: str, file_base_name: str,
                                 num_to_class: Dict):
    batches = [32, 64, 128, 256]
    lrs = [0.01, 0.001, 0.0001]
    epochs = [10, 15, 20]
    loss_funcs = [NLLLoss(), CrossEntropyLoss()]
    best_values = {'loss': 0, 'accuracy': 0, 'predictions': None, 'index': 0}
    for i, (batch_size, lr, epochs, loss_func) in enumerate(
            itertools.product(batches, lrs, epochs, loss_funcs)):
        print(
            f'Run #{i + 1} --> batch: {batch_size}, lr: {lr}, epochs: {epochs}, loss_func: {loss_func}'
        )
        loss, accuracy, preds = main(batch_size, lr, epochs, loss_func,
                                     base_folder)
        write_predictions_to_file(preds, f'{file_base_name}_{i + 1}',
                                  num_to_class)
        if accuracy > best_values['accuracy']:
            best_values['index'] = i
            best_values['loss'] = loss
            best_values['accuracy'] = accuracy
            best_values['predictions'] = preds
    print(
        f"Best: run #{best_values['index']} --> loss: {best_values['loss']}, accuracy: {best_values['accuracy']}"
    )
    return best_values
def show_dialogues(val_data, encoder, decoder, task):
  encoder.eval()
  decoder.eval()
  dialogues = data_io.select_consecutive_pairs(val_data, 5)

  for i, dialog in enumerate(dialogues):
    print("Dialogue Sample {} ------------".format(i))
    for j, turn in enumerate(dialog):
      input_variable, output_variable = turn
      _, predictions, _ = run_inference(encoder, decoder, input_variable, \
                      output_variable, criterion=NLLLoss(), teach_ratio=0)
      sources = input_variable.data.tolist()
      targets = output_variable.data.tolist()

      source_tokens = [vocab.index_to_word(s[0], task) for s in sources]
      target_tokens = [vocab.index_to_word(t[0], task) for t in targets]
      pred_tokens = [vocab.index_to_word(p, task) for p in predictions]

      source = " ".join(source_tokens[:-1]) # Remove the <EOS>
      target = " ".join(target_tokens[:-1])
      pred = " ".join(pred_tokens[:-1])
      print("User Query: {0}".format(source))
      print("Target Response: {0}".format(target))
      print("Predicted Response: {0}".format(pred))
    print('')
Ejemplo n.º 8
0
def PN_train(train_loader,model,
    optimizer,writer,iter_counter,alpha):

    test_shot = model.shots[-1]
    way = model.way

    target = torch.LongTensor([i//test_shot for i in range(test_shot*way)]).cuda()
    criterion = NLLLoss().cuda()
    criterion_part = BCEWithLogitsLoss().cuda()

    lr = optimizer.param_groups[0]['lr']

    writer.add_scalar('lr',lr,iter_counter)

    avg_proto_loss = 0
    avg_heatmap_loss = 0
    avg_total_loss = 0
    avg_acc = 0

    for i, ((inp,mask),_) in enumerate(train_loader):

        iter_counter += 1      
        inp = inp.cuda()
        mask = mask.cuda()

        if iter_counter%1000==0:
            model.eval()
            util.visualize(model,writer,iter_counter,inp[:9],mask[:9])
            model.train()
        
        log_prediction,heatmap_logits = model(inp,mask)

        loss_heatmap = criterion_part(heatmap_logits,mask)
        loss_proto = criterion(log_prediction,target)
        loss = alpha*loss_heatmap+loss_proto
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _,max_index = torch.max(log_prediction,1)
        acc = 100*torch.sum(torch.eq(max_index,target)).item()/test_shot/way

        avg_acc += acc
        avg_total_loss += loss.item()
        avg_proto_loss += loss_proto.item()
        avg_heatmap_loss += loss_heatmap.item()

    avg_total_loss = avg_total_loss/(i+1)
    avg_proto_loss = avg_proto_loss/(i+1)
    avg_heatmap_loss = avg_heatmap_loss/(i+1)
    avg_acc = avg_acc/(i+1)

    writer.add_scalar('total_loss',avg_total_loss,iter_counter)
    writer.add_scalar('proto_loss',avg_proto_loss,iter_counter)
    writer.add_scalar('heatmap_loss',avg_heatmap_loss,iter_counter)

    writer.add_scalar('train_acc',avg_acc,iter_counter)

    return iter_counter,avg_acc
Ejemplo n.º 9
0
    def __init__(self, input_size, hidden_size, n_layers, batch_size,
                 learning_rate):
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.device = 'cuda:0' if torch.cuda.is_available() else "cpu"

        dataset = TranslateDataset()
        self.dataloader = DataLoader(dataset=dataset,
                                     batch_size=self.batch_size,
                                     shuffle=True)
        self.char2index, self.index2char = dataset.char2index, dataset.index2char
        self.vocab_size = dataset.len

        self.encoder = EncoderRNN(self.input_size, self.hidden_size,
                                  self.vocab_size, self.n_layers,
                                  self.batch_size)
        self.decoder = DecoderRNN(self.input_size, self.hidden_size,
                                  self.vocab_size, self.n_layers,
                                  self.batch_size)

        self.encoder.to(self.device)
        self.decoder.to(self.device)

        self.loss = NLLLoss()

        self.encoder_optim = torch.optim.SGD(self.encoder.parameters(),
                                             lr=self.learning_rate)
        self.deoder_optim = torch.optim.SGD(self.encoder.parameters(),
                                            lr=self.learning_rate)
Ejemplo n.º 10
0
    def forward(self, input_ids: torch.tensor, attention_mask: torch.tensor,
                token_type_ids: torch.tensor, intent_label: torch.tensor,
                example_input: torch.tensor, example_mask: torch.tensor,
                example_token_types: torch.tensor,
                example_intents: torch.tensor):
        example_pooled_output = self.encode(input_ids=example_input,
                                            attention_mask=example_mask,
                                            token_type_ids=example_token_types)

        pooled_output = self.encode(input_ids=input_ids,
                                    attention_mask=attention_mask,
                                    token_type_ids=token_type_ids)

        pooled_output = self.dropout(pooled_output)
        probs = torch.softmax(pooled_output.mm(example_pooled_output.t()),
                              dim=-1)

        intent_probs = 1e-6 + torch.zeros(
            probs.size(0), self.num_intent_labels).cuda().scatter_add(
                -1,
                example_intents.unsqueeze(0).repeat(probs.size(0), 1), probs)

        # Compute losses if labels provided
        if intent_label is not None:
            loss_fct = NLLLoss()
            intent_lp = torch.log(intent_probs)
            intent_loss = loss_fct(intent_lp.view(-1, self.num_intent_labels),
                                   intent_label.type(torch.long))
        else:
            intent_loss = torch.tensor(0)

        return intent_probs, intent_loss
def child_adience_ldl_loss(model_out, gt):
    dist, minor, adience = model_out
    ldl, minor_gt, adience_gt = gt
    lf = NLLLoss(reduction='mean')
    kl = KLDivLoss(reduction='batchmean')

    return kl(dist, ldl) + lf(minor, minor_gt) + lf(adience, adience_gt)
Ejemplo n.º 12
0
def create_model(word_sequence, output_size):
    # 构建 lstm 模型
    if config.model.lower() == 'lstm':
        model = LSTM_Model(len(word_sequence),
                           embed_size=config.lstm_embed_size,
                           hidden_size=config.lstm_hidden_size,
                           output_size=output_size,
                           num_layers=config.lstm_num_layers,
                           drop_out=config.dropout)

    # 构建 fnn 模型
    elif config.model.lower() == 'fnn':
        model = FNN_Model(len(word_sequence.word_dict),
                          output_size=output_size,
                          hidden_num=config.fnn_hidden,
                          dropout=config.dropout)

    print(
        "---------------------------- model summary ----------------------------------"
    )
    print(model)
    print(
        "-----------------------------------------------------------------------------"
    )
    print()

    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    criterion = NLLLoss()

    return (model, optimizer, criterion)
Ejemplo n.º 13
0
 def forward(self, sample: Dict[str,
                                Any]) -> Dict[str, Any]:  # type: ignore
     """Forward pass of an embedder, encoder and decoder."""
     if "forward" in sample:
         raise RuntimeError("Forward already computed.")
     if "loss" in sample:
         raise RuntimeError("Loss already computed.")
     graph, etypes = sample[self.graph_field_name]
     features = [
         sample[field_name] for field_name in self.feature_field_names
     ]
     formatting_indexes = sample[self.indexes_field_name].indexes
     graph = self.graph_embedder(graph=graph, features=features)
     encodings = self.graph_encoder(graph=graph,
                                    feat=graph.ndata["x"],
                                    etypes=etypes)
     label_encodings = self.selector(tensor=encodings,
                                     indexes=formatting_indexes)
     projections = self.class_projection(label_encodings)
     softmaxed = self.softmax(projections)
     labels = sample[self.label_field_name]
     sample["forward"] = softmaxed
     if labels is not None:
         sample["loss"] = NLLLoss(weight=softmaxed.new([
             graph.batch_size,
             formatting_indexes.numel() - graph.batch_size
         ]))(softmaxed, labels)
     return sample
Ejemplo n.º 14
0
    def __init__(self, input_size, hidden_size, batch_size, learning_rate,
                 num_epoch, method):
        dataset = Seq2SeqDataset()

        self.vocab = sorted(set(dataset.full_text))
        self.vocab_size = len(self.vocab)
        self.char2ind, self.ind2char = self.get_vocab()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = self.vocab_size
        self.method = method
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_epoch = num_epoch
        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"

        self.dataloader = DataLoader(dataset=dataset,
                                     batch_size=batch_size,
                                     shuffle=True)

        self.encoder = Encoder(input_size, hidden_size, self.vocab_size)
        self.decoder = Decoder(hidden_size, self.output_size, method)

        self.encoder = self.encoder.to(self.device)
        self.decoder = self.decoder.to(self.device)

        self.loss_function = NLLLoss()

        self.encoder_optim = optim.Adam(self.encoder.parameters(),
                                        lr=self.learning_rate)
        self.decoder_optim = optim.Adam(self.decoder.parameters(),
                                        lr=self.learning_rate)
Ejemplo n.º 15
0
def train_PN_stage_1(train_loader, model, optimizer, writer, iter_counter,
                     alpha):

    lr = optimizer.param_groups[0]['lr']
    writer.add_scalar('lr', lr, iter_counter)
    criterion = NLLLoss().cuda()
    criterion_part = BCEWithLogitsLoss().cuda()

    avg_dynamic_loss = 0
    avg_heatmap_loss = 0
    avg_total_loss = 0
    avg_acc = 0

    for i, ((inp, mask), target) in enumerate(train_loader):

        iter_counter += 1
        batch_size = target.size(0)

        inp = inp.cuda()
        mask = mask.cuda()

        target = target.cuda()

        if iter_counter % 1000 == 0:
            model.eval()
            util.visualize(model, writer, iter_counter, inp[:9], mask[:9])
            model.train()

        log_prediction, heatmap_logits = model.forward_stage_1(inp, mask)

        loss_heatmap = criterion_part(heatmap_logits, mask)
        loss_dynamic = criterion(log_prediction, target)
        loss = alpha * loss_heatmap + loss_dynamic

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, max_index = torch.max(log_prediction, 1)
        acc = 100 * (torch.sum(torch.eq(max_index, target)).float() /
                     batch_size).item()

        avg_acc += acc
        avg_total_loss += loss.item()
        avg_dynamic_loss += loss_dynamic.item()
        avg_heatmap_loss += loss_heatmap.item()

    avg_total_loss = avg_total_loss / (i + 1)
    avg_dynamic_loss = avg_dynamic_loss / (i + 1)
    avg_heatmap_loss = avg_heatmap_loss / (i + 1)
    avg_acc = avg_acc / (i + 1)

    writer.add_scalar('total_loss', avg_total_loss, iter_counter)
    writer.add_scalar('dynamic_loss', avg_dynamic_loss, iter_counter)
    writer.add_scalar('heatmap_loss', avg_heatmap_loss, iter_counter)

    writer.add_scalar('train_acc', avg_acc, iter_counter)

    return iter_counter, avg_acc
Ejemplo n.º 16
0
def train(net,
          train_loader,
          test_loader,
          path='models/',
          epochs=10,
          plot_train=False):
    optimizer = optim.Adam(net.parameters(), lr=0.01)
    accumulate_grad_steps = 50
    nllloss = NLLLoss(ignore_index=-1)
    loss_func = partial(nll_loss_func, nllloss=nllloss)
    # klloss = KLDivLoss(reduction='batchmean')
    # loss_func = partial(kl_loss_func, klloss=klloss)
    net.train()
    device = net.device
    if net.use_coda:
        net.cuda()
    print("Training Started")
    test_loss_lst, test_acc_lst, train_loss_lst, train_acc_lst, time_lst = [], [], [], [], []
    best_acc = 0
    for epoch in range(epochs):
        t0 = time.time()
        weights = 0
        for i, sentence in enumerate(train_loader):
            headers = sentence[2].to(device)
            sentence_len = sentence[3][0]
            scores = net(sentence)
            loss = loss_func(scores, headers)
            loss = loss * sentence_len
            weights += sentence_len
            if i % accumulate_grad_steps == 0:
                loss = loss / weights
                loss.backward()
                # torch.nn.utils.clip_grad_norm_(net.parameters(), 0.5)
                optimizer.step()
                net.zero_grad()
                weights = 0
            else:
                loss.backward()
        test_acc, test_loss = predict(net, device, test_loader, loss_func)
        test_loss_lst.append(test_loss)
        test_acc_lst.append(test_acc)
        if plot_train:
            train_acc, train_loss = predict(net, device, train_loader,
                                            loss_func)
            train_loss_lst.append(train_loss)
            train_acc_lst.append(train_acc)
        if best_acc < test_acc and epoch > 5 and test_acc > 0.88:
            tmp_path = path + '_epoch_' + str(epoch) + '_acc_' + str(
                np.round(test_acc, 4)).replace('.', '') + '.pt'
            net.save(tmp_path)
            best_acc = test_acc
        ctime = (time.time() - t0) / 60
        time_lst.append(ctime)
        print(
            f"Epoch [{epoch + 1}/{epochs}] Completed \t Test Loss: {test_loss:.3f}"
            f" \t Test Accuracy: {test_acc:.3f} \t Time: {ctime:.2f}")
    plot(test_acc_lst, test_loss_lst, time_lst, path + '_test_plot.png')
    if plot_train:
        plot(train_acc_lst, train_loss_lst, time_lst, path + '_train_plot.png')
Ejemplo n.º 17
0
 def __init__(self, n_features, n_hidden_units, n_classes, lr=0.01, n_hidden_layers=1):
     super(GraphSAGE, self).__init__()
     self.convs = [SAGEConv(n_features, n_hidden_units)] + [SAGEConv(n_hidden_units, n_hidden_units) for _ in range(n_hidden_layers-1)]
     self.convs = torch.nn.Sequential(*self.convs)
     self.output = SAGEConv(n_hidden_units, n_classes)
     
     self.loss = NLLLoss()
     self.optimizer = Adam(self.parameters(), lr=lr, weight_decay=5e-4)
Ejemplo n.º 18
0
 def __init__(self, config, num_labels=2, num_splices=None):
     super(BertForSplicedSequenceClassification,
           self).__init__(config=config, num_labels=num_labels)
     self.num_splices = num_splices
     self.softmax = nn.Softmax(dim=1)
     self.loc_softmax = nn.Softmax(dim=0)
     #self.loc_weights = nn.Parameter(torch.ones(num_splices))
     self.loss_fct = NLLLoss()
Ejemplo n.º 19
0
    def epoch_train(self):
        model = Seq2Seq()
        optimizer = torch.optim.SGD(model.parameters(), lr=self.learning_rate)
        loss_func = NLLLoss()

        for i in range(self.n_epoch):
            loss = self.train(model, optimizer, loss_func)
            a = self.eval('G')
            print(loss)
Ejemplo n.º 20
0
 def pow_loss(self, logits, labed):
     resutl = torch.log(
         torch.pow(logits, 2) /
         torch.sum(torch.pow(logits, 2), dim=-1, keepdim=True))
     # pisewies = MyReLU.apply
     # resutl = pisewies(resutl) * -1
     loss_function = NLLLoss()
     resutl = loss_function(resutl, labed)
     return resutl
Ejemplo n.º 21
0
def _create_xor_model():
    mlp = MLP(2, 10, 2, 2, False)
    loss_model = CompareModel(mlp, NLLLoss())
    data_model = DataModel(loss_model, {
        "train": XORDataset(train=True),
        "test": XORDataset(train=False)
    })
    model = ModelWrapper(data_model)
    return model
Ejemplo n.º 22
0
def consistency_loss(model,
                     batch,
                     loss_weight=1e-2,
                     max_loss=5.0,
                     mode="kl",
                     min_prob=0.75,
                     **kwargs):
    min_prob = torch.Tensor([min_prob])[0].to(model.device)
    max_loss = torch.Tensor([max_loss])[0].to(model.device)
    x = batch["x"].reshape((-1, ) + batch["x"].shape[2:]).to(model.device)
    letters = batch["letters"].reshape((-1, ) + batch["letters"].shape[2:]).to(
        model.device)
    output = model(letters, inputs=x)
    output = output.reshape(batch["x"].shape[:2] + output.shape[1:])
    n_paradigms, n_classes = output.shape[1], output.shape[-1]
    indexes = batch["lcs"].unsqueeze(dim=-1).repeat(1, 1, 1,
                                                    n_classes).to(model.device)
    mask = batch["lcs_mask"].unsqueeze(dim=-1).repeat(1, 1, 1, n_classes).to(
        model.device)
    lcs_output = torch.gather(output, 2, indexes)
    if mode == "majority":
        _, lcs_labels = torch.max(lcs_output, dim=-1)
        lcs_labels_one_hot = torch.nn.functional.one_hot(
            lcs_labels, n_classes).int()
        _, lcs_votes = torch.max(torch.sum(lcs_labels_one_hot, dim=1),
                                 dim=-1)  # B * L
        target_labels = lcs_votes.unsqueeze(dim=1).repeat(1, n_paradigms,
                                                          1)  # B * Z * L
        loss = NLLLoss(reduction="none")(lcs_output.permute(0, 3, 1, 2),
                                         target_labels)
        loss = loss * batch["lcs_mask"]
        loss = nn.functional.relu(loss + torch.log(min_prob))
    else:
        lcs_output_probs = torch.exp(lcs_output)
        mean_probs = lcs_output_probs.mean(dim=1)
        target_probs = mean_probs.unsqueeze(dim=1).repeat(1, n_paradigms, 1, 1)
        #     print(target_probs.shape)
        loss = torch.min(
            torch.max(
                -max_loss,
                target_probs * (torch.log(target_probs) - lcs_output) * mask),
            max_loss).mean()
    loss = loss_weight * loss.mean()
    return {"consistency_loss": loss}
    def __init__(self, parameters):
        self.params = parameters

        # Transform applied to each image
        transform = transforms.Compose(
            [transforms.ToTensor(),
             ImageTransform(self.params)])

        # Initialize datasets
        self.trainset = MNIST(root=self.params.dataset_dir,
                              train=True,
                              download=True,
                              transform=transform)
        self.testset = MNIST(
            root=self.params.dataset_dir,
            train=False,
            download=True,
            transform=transform,
        )

        # Initialize loaders
        self.trainloader = DataLoader(
            self.trainset,
            batch_size=self.params.batch_size,
            shuffle=False,
            num_workers=self.params.num_workers,
            sampler=RandomSampler(self.trainset),
        )

        self.testloader = DataLoader(
            self.testset,
            batch_size=self.params.batch_size,
            shuffle=False,
            num_workers=self.params.num_workers,
        )

        # Checking for GPU
        self.use_gpu = self.params.use_gpu and torch.cuda.is_available()
        self.device = torch.device("cuda:0" if self.use_gpu else "cpu")

        # Initialize model
        self.model = MNIST_Network(self.params)
        self.model.to(self.device)

        print(self.model)

        print("Number of parameters = {}".format(self.model.num_parameters()))

        # Setup optimizer
        self.optimizer = self.optimizer_select()

        # Criterion
        self.criterion = NLLLoss()
Ejemplo n.º 24
0
    def forward(self, input_ids: torch.tensor, attention_mask: torch.tensor,
                token_type_ids: torch.tensor, slot_labels: torch.tensor,
                example_word_inds: torch.tensor, example_input: torch.tensor,
                example_mask: torch.tensor, example_token_types: torch.tensor,
                example_slots: torch.tensor):
        example_hidden_states = self.encode(input_ids=example_input,
                                            attention_mask=example_mask,
                                            token_type_ids=example_token_types)

        hidden_states = self.encode(input_ids=input_ids,
                                    attention_mask=attention_mask,
                                    token_type_ids=token_type_ids)

        self.dropout(hidden_states)

        # relevant example states
        example_hidden = example_hidden_states[
            torch.arange(example_hidden_states.size(0)), example_word_inds]
        # Compute probabilities by copying from examples
        probs = torch.softmax(hidden_states.bmm(
            example_hidden.t().unsqueeze(0).repeat(hidden_states.size(0), 1,
                                                   1)),
                              dim=-1)
        example_slots = example_slots.view(1, 1, example_slots.size(0)).repeat(
            probs.size(0), probs.size(1), 1)
        slot_probs = 1e-6 + torch.zeros(
            probs.size(0), probs.size(1),
            self.num_slot_labels).cuda().scatter_add(-1, example_slots, probs)

        # Compute losses if labels provided
        if slot_labels is not None:
            loss_fct = NLLLoss()

            slot_logits = torch.log(slot_probs)

            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = slot_logits.view(
                    -1, self.num_slot_labels)[active_loss]
                active_labels = slot_labels.view(-1)[active_loss]
                slot_loss = loss_fct(active_logits,
                                     active_labels.type(torch.long))
            else:
                slot_loss = loss_fct(
                    slot_logits.view(-1, self.num_slot_labels),
                    slot_labels.view(-1).type(torch.long))
        else:
            slot_loss = torch.tensor(
                0).cuda() if torch.cuda.is_available() else torch.tensor(0)

        return slot_logits, slot_loss
Ejemplo n.º 25
0
    def __init__(self,
                 transformer: OpenaiTransformer,
                 metrics: Dict[str, Any] = None,
                 accuracy_top_k: List = None):
        super(BaseLMHead, self).__init__()
        self.transformer = transformer
        self._metrics = metrics
        self._accuracy_top_k = accuracy_top_k

        self._decoder = self.transformer.decoder

        self.log_softmax = nn.LogSoftmax(dim=1)
        self.loss = NLLLoss(ignore_index=0)
Ejemplo n.º 26
0
    def __init__(self, input_len, out_length, loss=None):
        super().__init__()
        self.linear = Linear(input_len, input_len)
        self.activation = ReLU()

        self.linear_2 = Linear(input_len, out_length)

        if not loss:
            self.loss = NLLLoss(reduction='none')
        else:
            self.loss = loss

        self.softmax = LogSoftmax()
Ejemplo n.º 27
0
    def __init__(self, model, lr, criterion_num=0, optimizer_num=0):
        self.model = model

        criterions = [CrossEntropyLoss(),
                      NLLLoss()]
        self.criterion = criterions[criterion_num]

        optimizers = [Adam(self.model.parameters(), lr=lr),
                      SGD(self.model.parameters(), lr=lr)]

        self.optimizer = optimizers[optimizer_num]

        cuda = torch.cuda.is_available()
        self.device = torch.device("cuda:0" if cuda else "cpu")
Ejemplo n.º 28
0
 def criterion(self, predict, target, ignore_index=-1, size_average=True):
     """
     criterion for FCN, the predict is (n, c, w, h), c is classes
     :param predict:
     :param target:
     :param ignore_index:
     :param size_average:
     :return:
     """
     log_p = F.log_softmax(predict, dim=1)
     # now log_p is 0~1, output would be 0 - n_classes
     nll_loss = NLLLoss(ignore_index=ignore_index)
     loss = nll_loss(log_p, target)
     return loss
Ejemplo n.º 29
0
def init_losses(
    task_dict: dict[str, TaskType],
    loss_dict: dict[str, Literal["L1", "L2", "CSE"]],
    robust: bool = False,
) -> dict[str, tuple[str, type[torch.nn.Module]]]:
    """_summary_

    Args:
        task_dict (dict[str, TaskType]): Map of target names to "regression" or "classification".
        loss_dict (dict[str, "L1" | "L2" | "CSE"]): Map of target names to loss functions.
        robust (bool, optional): Whether to use an uncertainty adjusted loss. Defaults to False.

    Returns:
        dict[str, tuple[str, type[torch.nn.Module]]]: Dictionary of losses for each task
    """
    criterion_dict: dict[str, tuple[str, type[torch.nn.Module]]] = {}
    for name, task in task_dict.items():
        # Select Task and Loss Function
        if task == "classification":
            if loss_dict[name] != "CSE":
                raise NameError(
                    "Only CSE loss allowed for classification tasks")

            if robust:
                criterion_dict[name] = (task, NLLLoss())
            else:
                criterion_dict[name] = (task, CrossEntropyLoss())

        elif task == "regression":
            if robust:
                if loss_dict[name] == "L1":
                    criterion_dict[name] = (task, RobustL1Loss)
                elif loss_dict[name] == "L2":
                    criterion_dict[name] = (task, RobustL2Loss)
                else:
                    raise NameError(
                        "Only L1 or L2 losses are allowed for robust regression tasks"
                    )
            else:
                if loss_dict[name] == "L1":
                    criterion_dict[name] = (task, L1Loss())
                elif loss_dict[name] == "L2":
                    criterion_dict[name] = (task, MSELoss())
                else:
                    raise NameError(
                        "Only L1 or L2 losses are allowed for regression tasks"
                    )

    return criterion_dict
Ejemplo n.º 30
0
def default_train(train_loader,model,
    optimizer,writer,iter_counter):

    way = model.way
    test_shot = model.shots[-1]
    target = torch.LongTensor([i//test_shot for i in range(test_shot*way)]).cuda()
    criterion = NLLLoss().cuda()

    lr = optimizer.param_groups[0]['lr']

    writer.add_scalar('lr',lr,iter_counter)

    avg_loss = 0
    avg_acc = 0

    for i, (inp,_) in enumerate(train_loader):

        iter_counter += 1

        if isinstance(inp,list):
            (image_inp,mask) = inp
            image_inp = image_inp.cuda()
            mask = mask.cuda()
            log_prediction = model(image_inp,mask)

        elif isinstance(inp,torch.Tensor):
            inp = inp.cuda()
            log_prediction = model(inp)
        
        loss = criterion(log_prediction,target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_value = loss.item()
        _,max_index = torch.max(log_prediction,1)
        acc = 100*torch.sum(torch.eq(max_index,target)).item()/test_shot/way

        avg_acc += acc
        avg_loss += loss_value

    avg_acc = avg_acc/(i+1)
    avg_loss = avg_loss/(i+1)

    writer.add_scalar('proto_loss',avg_loss,iter_counter)
    writer.add_scalar('train_acc',avg_acc,iter_counter)

    return iter_counter,avg_acc