def __init__(self, args, problem, use_cuda): """ Initialize model training with arguments and problem. :param args command line arguments. :param use_cuda When True, use the GPU. """ self.max_regularization_examples = args.num_shaving if hasattr(args, "num_shaving") else 0 self.max_validation_examples = args.num_validation if hasattr(args, "num_validation") else 0 self.max_training_examples = args.num_training if hasattr(args, "num_training") else 0 max_examples_per_epoch = args.max_examples_per_epoch if hasattr(args, 'max_examples_per_epoch') else None self.max_examples_per_epoch = max_examples_per_epoch if max_examples_per_epoch is not None else self.max_regularization_examples self.criterion = problem.loss_function() self.criterion_multi_label = MultiLabelSoftMarginLoss() # problem.loss_function() self.args = args self.problem = problem self.best_acc = 0 self.start_epoch = 0 self.use_cuda = use_cuda self.mini_batch_size = problem.mini_batch_size() self.net = None self.optimizer_training = None self.scheduler_train = None self.unsuploader = self.problem.reg_loader() self.trainloader = self.problem.train_loader() self.testloader = self.problem.test_loader() self.is_parallel = False self.best_performance_metrics = None self.failed_to_improve = 0 self.confusion_matrix = None self.best_model_confusion_matrix = None self.published_reconstruction_loss=False
def start_train(): # 使用自定义的卷积神经网络训练 model = CaptchaModelCNN().cuda() model.train() # 训练模式 logging.info('Train start') # 损失函数 criterion = MultiLabelSoftMarginLoss() # Adam算法 optimizer = Adam(model.parameters(), lr=RATE) ids = loaders(PATH_TRAIN, BATCH_SIZE) logging.info('Iteration is %s' % len(ids)) for epoch in range(EPOCHS): for i, (image, label, order) in enumerate(ids): # 包装Tensor对象并记录其operations images = Variable(image).cuda() labels = Variable(label.float()).cuda() predict_labels = model(images) loss = criterion(predict_labels, labels) # 保持当前参数状态并基于计算得到的梯度进行参数更新。 optimizer.zero_grad() loss.backward() optimizer.step() i += 1 if i % 100 == 0: logging.info("epoch:%s, step:%s, loss:%s" % (epoch, i, loss.item())) # 保存训练结果 torch.save(model.state_dict(), MODEL_NAME) # 保存训练结果 torch.save(model.state_dict(), MODEL_NAME) logging.info('Train done')
def forward(self, input_ids, token_type_ids=None, input_ent=None, ent_mask=None, attention_mask=None, labels=None): input_dag = self.embed_dag(input_ent) _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, input_dag, ent_mask, output_all_encoded_layers=False) prediction_scores = self.classifier(pooled_output) if labels is not None: loss_fct = MultiLabelSoftMarginLoss() loss = loss_fct(prediction_scores.view(-1, self.num_labels), labels) return loss else: return prediction_scores
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None, position_ids=None, head_mask=None, class_weights=None, **kwargs): outputs = self.bert(input_ids, position_ids=position_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, head_mask=head_mask) pooled_output = outputs[1] pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) outputs = (logits, ) + outputs[ 2:] # add hidden states and attention if they are here if not self.loss or self.loss == 'bce': loss_fct = BCEWithLogitsLoss(weight=class_weights) elif self.loss == 'multilabel-softmargin': loss_fct = MultiLabelSoftMarginLoss() else: raise ValueError(f'Unknown loss function {self.loss}') if labels is not None: loss = loss_fct(logits, labels.float()) outputs = (loss, ) + outputs return outputs
def calculate_loss(self, logits, labels): if self.label_weights is not None: self.label_weights = self.label_weights.to(logits.device) if self.multi_label: if self.loss_func == 'cross_entropy' or self.loss_func == 'bce': loss_func = BCEWithLogitsLoss(weight=self.label_weights) elif self.loss_func == 'soft_margin': loss_func = MultiLabelSoftMarginLoss(weight=self.label_weights) else: self.__raise_invalid_loss_func() loss = loss_func(logits.view(-1, self.num_labels), labels.view(-1, self.num_labels)) elif self.soft_label: loss_func = SoftLabelCrossEntropyLoss() loss = loss_func(logits.view(-1, self.num_labels), labels.view(-1, self.num_labels)) elif self.num_labels == 1: # We are doing regression loss_func = MSELoss() loss = loss_func(logits.view(-1), labels.view(-1)) else: if self.loss_func == 'cross_entropy': loss_func = CrossEntropyLoss(weight=self.label_weights) elif self.loss_func == 'focal': loss_func_params = dict(zip(['gamma'], self.loss_func_params)) loss_func = FocalLoss(weight=self.label_weights, **loss_func_params) else: self.__raise_invalid_loss_func() loss = loss_func(logits.view(-1, self.num_labels), labels.view(-1)) return loss
def forward(self, input_ids, label_hierarchy=None, token_type_ids=None, attention_mask=None, labels=None, position_ids=None, head_mask=None, class_weights=None, **kwargs): if label_hierarchy is None: label_hierarchy = self.label_hierarchy outputs = self.bert(input_ids, position_ids=position_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, head_mask=head_mask) pooled_output = outputs[1] loss_fct = MultiLabelSoftMarginLoss() loss = torch.FloatTensor([0]).to(input_ids.device) logits = torch.zeros((input_ids.size()[0], self.num_labels), dtype=torch.float32, device=input_ids.device) # go through the label hierarchy and get predictions from the corresponding models for k, idx_ in label_hierarchy.items(): pooled_output_ = self.dropouts[f'{k}'](pooled_output) logits_ = self.classifiers[f'{k}'](pooled_output_) if labels is not None: labels_ = labels[:, idx_] loss_fct = BCEWithLogitsLoss( weight=class_weights[idx_] if class_weights is not None else None) loss += loss_fct(logits_, labels_.float()) logits[:, idx_] = logits_ outputs = (logits, ) + outputs[ 2:] # add hidden states and attention if they are here if labels is not None: outputs = (loss, ) + outputs return outputs
def rebuild_criterions(self, output_name, weights=None): if output_name == "softmaxGenotype": self.criterion_classifier = MultiLabelSoftMarginLoss( weight=weights)
def criterion_MultiLabelSoftMarginLoss(): return MultiLabelSoftMarginLoss(reduction="mean")
def rebuild_criterions(self, output_name, weights=None): if output_name == "softmaxGenotype": self.semisup_loss_criterion = MultiLabelSoftMarginLoss()
def __init__(self, input_size=512, n_dim=500, ngpus=1, dropout_p=0, num_hidden_layers=3, num_classes=10, prior_dim=2, epsilon=1E-15, seed=None, mini_batch=7, prenormalized_inputs=False, use_selu=False): """ :param input_size: :param n_dim: :param ngpus: :param dropout_p: :param num_hidden_layers: :param num_classes: :param prior_dim: :param epsilon: :param seed: :param mini_batch: :param prenormalized_inputs: True when the inputs must be normalized by mean and std before using this model. """ super().__init__(use_selu=use_selu) self.epsilon = epsilon self.prior_dim = prior_dim self.num_classes = num_classes self.seed = seed self.reconstruction_criterion = MSELoss() self.semisup_loss_criterion = MultiLabelSoftMarginLoss() self.categorical_distribution = None self.prenormalized_inputs = prenormalized_inputs self.encoder = _SemiSupAdvEncoder(input_size=input_size, n_dim=n_dim, ngpus=ngpus, dropout_p=dropout_p, num_hidden_layers=num_hidden_layers, num_classes=num_classes, latent_code_dim=prior_dim, use_selu=use_selu) self.decoder = _SemiSupAdvDecoder(input_size=input_size, n_dim=n_dim, ngpus=ngpus, dropout_p=dropout_p, num_hidden_layers=num_hidden_layers, num_classes=num_classes, prior_dim=prior_dim, use_selu=use_selu) self.discriminator_cat = _SemiSupAdvDiscriminatorCat( n_dim=n_dim, ngpus=ngpus, dropout_p=dropout_p, num_hidden_layers=num_hidden_layers, num_classes=num_classes, use_selu=use_selu) self.discriminator_prior = _SemiSupAdvDiscriminatorPrior( n_dim=n_dim, ngpus=ngpus, dropout_p=dropout_p, num_hidden_layers=num_hidden_layers, prior_dim=prior_dim, use_selu=use_selu)
def rebuild_criterions(self, output_name, weights=None): if output_name == "softmaxGenotype": self.criterion_classifier = MultiLabelSoftMarginLoss(weight=weights,size_average=False) self.criterion_autoencoder = MSELoss()
def main(): dataset = VOC(img_transform=image_transform()) dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) val_dataset = VOC(mode='val', img_transform=image_transform()) val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False) model = peak_response_mapping(fc_resnet50(), win_size=3, sub_pixel_locating_factor=8, enable_peak_stimulation=True) model = model.cuda() loss_func = MultiLabelSoftMarginLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1.0e-4) val_acc = 0 for epoch in range(EPOCH): print('-----------epoch' + str(epoch) + '-----------') for step, (b_x, b_y) in enumerate(dataloader): b_x = b_x.cuda() b_y = b_y.cuda() result = model.forward(b_x) loss = loss_func(result, b_y) optimizer.zero_grad() loss.backward() optimizer.step() print('loss:' + str(loss.cpu().data.numpy()) + ' acc:' + str( cal_acc(result.cpu().data.numpy().reshape(-1), b_y.cpu().data.numpy().reshape(-1)))) if step % 10 == 0 and step != 0: with torch.no_grad(): val_loss = [] val_results = [] val_labels = [] for step, (b_x, b_y) in enumerate(val_dataloader): b_x = b_x.cuda() b_y = b_y.cuda() result = model.forward(b_x) single_loss = loss_func(result, b_y) val_loss.append(single_loss.cpu().data.numpy()) optimizer.zero_grad() val_results.extend(result.cpu().data.numpy()) val_labels.extend(b_y.cpu().data.numpy()) val_results = np.array(val_results).reshape(-1) val_labels = np.array(val_labels).reshape(-1) print('--------------------val_loss:' + str(np.mean(val_loss)) + ' val_acc:' + str(cal_acc(val_results, val_labels))) if (cal_acc(val_results, val_labels) > val_acc): val_acc = cal_acc(val_results, val_labels) torch.save(model, '../Save/model/model.pt')
from dataset.vqa_dataset import VqaDataset from net.network import load if __name__ == '__main__': root_path = "/opt/vqa-data" model_path = "../models/batch-512-models/epoch-100-checkpoint.pth.tar" batch_size = 2048 dataset = VqaDataset(root_path) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()) info = dataset.load_info() net = load(model_path, info) criterion = MultiLabelSoftMarginLoss().cuda() total_error = 0 accuracy = 0 for batch, (questions, image_features, answers) in enumerate(dataloader, 0): questions, image_features, answers = Variable( questions.cuda()), Variable(image_features.cuda()), Variable( answers.cuda()) outputs = net(questions, image_features) _, correct_indices = torch.max(answers.data, 1) _, expected_indices = torch.max(outputs.data, 1) net.zero_grad() loss = criterion(outputs, answers) total_error += loss.data.sum() accuracy += (correct_indices == expected_indices).sum() accuracy /= dataset.number_of_questions()
def __init__(self): self.loss_fn = MultiLabelSoftMarginLoss()
def multi_label(y_input, y_target): loss = MultiLabelSoftMarginLoss() return loss(y_input, y_target)
'./datarnn/') id2token = {v: k for k, v in token2id.iteritems()} # ### Change into torch vectors x = torch.from_numpy(x) y = torch.from_numpy(np.array(y)) # ### Create attention classifier # You have to pass multilabel loss function as a parameter. from torch.nn import MultiLabelSoftMarginLoss acf = AttentionClassifier(vocab_size=len(token2id) + 1, embed_size=25, gru_hidden=25, n_classes=len(label2id)) loss = acf.fit(x, y, lengths_mask, epochs=2, validation_split=0.5, loss=MultiLabelSoftMarginLoss(), multilabel=True) # ### Predicting # Prediction needs more than one minibatch at this moment. acf.predict(x[0:128], lengths_mask[0:128]) # Getting attention acf.get_attention(x[0:64], lengths_mask[0:64]) # Visualize attention for first sample in the batch acf.visualize_attention(x[1:65], lengths_mask[1:65], id2token, './datarnn/visualisation/visual.html') from IPython.core.display import display, HTML with open('./datarnn/visualisation/visual.html', 'r') as f: text = f.read() display(HTML(text))