예제 #1
0
파일: manifest.py 프로젝트: lymioumm/Mnist
def fit(epoch, model, data_loader, phase='training', volatile=False):
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()
        volatile = True
    running_loss = 0.0
    running_correct = 0
    for batch_idx, (data, target) in enumerate(data_loader):

        data, target = Variable(data, volatile), Variable(target)
        if phase == 'training':
            optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)

        running_loss += F.nll_loss(output, target, size_average=False).data
        preds = output.data.max(dim=1, keepdim=True)[1]
        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()
        if phase == 'training':
            loss.backward()
            optimizer.step()

    loss = running_loss / len(data_loader.dataset)
    accuracy = 100. * running_correct / len(data_loader.dataset)

    print(
        f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}'
    )
    return loss, accuracy
예제 #2
0
    def train_step(model,
                   optimizer,
                   positive_sample,
                   negative_sample,
                   subsampling_weight,
                   mode,
                   device="cuda"):

        model.train()
        optimizer.zero_grad()

        positive_sample = positive_sample.to(device)
        negative_sample = negative_sample.to(device)
        subsampling_weight = subsampling_weight.to(device)

        negative_score = model((positive_sample, negative_sample), mode=mode)
        negative_score = F.logsigmoid(-negative_score).mean(dim=1)

        positive_score = model(positive_sample)
        positive_score = F.logsigmoid(positive_score).squeeze(dim=1)

        positive_sample_loss = -(subsampling_weight * positive_score
                                 ).sum() / subsampling_weight.sum()
        negative_sample_loss = -(subsampling_weight * negative_score
                                 ).sum() / subsampling_weight.sum()

        loss = (positive_sample_loss + negative_sample_loss) / 2
        loss.backward()
        optimizer.step()

        return loss.item()
예제 #3
0
def train(trainloader, model, optimizer, criterion):
    model.train()

    train_loss = []
    correct = 0
    total = 0
    train_acc = []

    for i, (features, labels) in enumerate(trainloader):
        data, target = Variable(features).cuda(), Variable(labels).cuda()
        optimizer.zero_grad()

        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        prediction = output.data.max(1)[1]
        accuracy = (float(prediction.eq(target.data).sum()) /
                    float(batch_size)) * 100.0
        train_acc.append(accuracy)
        train_loss.append(loss.data[0])

    loss_train = np.mean(train_loss)
    acc = np.mean(train_acc)
    return loss_train, acc
    def take_optimisation_step(self, optimizer, network, loss, clipping_norm=None, retain_graph=False):
        """Takes an optimisation step by calculating gradients given the loss and then updating the parameters"""
        if not isinstance(network, list):
            network = [network]

        # Reset network gradients to 0.
        optimizer.zero_grad()

        # Backpropagate and calculate gradients.
        loss.backward(retain_graph=retain_graph)

        self.logger.info("Loss -- {}".format(loss.item()))
        self.wandb_log(dict(loss=loss,
                            episode_number=self.episode_number),
                       step=self.global_step_number)

        if self.debug_mode:
            self.log_gradient_and_weight_information(network, optimizer)

        if clipping_norm is not None:
            for net in network:
                # Clip gradients to help stabilise training
                torch.nn.utils.clip_grad_norm_(net.parameters(), clipping_norm)

        # Finally, take optimization step.
        optimizer.step()
    def take_optimisation_step(self,
                               optimizer,
                               network,
                               loss,
                               clipping_norm=None,
                               retain_graph=False):
        """Takes an optimisation step by calculating gradients given the loss and then updating the parameters"""
        if not isinstance(network, list): network = [network]
        optimizer.zero_grad()  #reset gradients to 0
        loss.backward(
            retain_graph=retain_graph)  #this calculates the gradients

        if self.config.log_loss:
            self.logger.info("Loss -- {}".format(loss.item()))
            # print('loss', loss.item())

        if self.debug_mode:
            self.log_gradient_and_weight_information(network, optimizer)

        if clipping_norm is not None:
            for net in network:
                torch.nn.utils.clip_grad_norm_(
                    net.parameters(),
                    clipping_norm)  #clip gradients to help stabilise training

        optimizer.step()  #this applies the gradients
예제 #6
0
def train():
    for epoch in range(epochs):
        resnet50.train()
        epoch_loss = 0
        with tqdm(total=n_train, desc=f'Epoch {epoch+1}/{epochs}',
                  unit='img') as pbar:
            for i_batch, batch_data in enumerate(train_loader):
                img = batch_data['img'].to(device=device)
                #print(vin.shape)
                label = batch_data['label'].to(device=device)

                pred = resnet50(img)
                pred = pred.squeeze()
                #    print(pred.shape)
                #   print(label.shape)

                loss = criterion(pred, label)
                epoch_loss += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                pbar.update(img.shape[0])

        print('training loss is:', epoch_loss / n_train)
        _, __, val_loss, ___ = test()
        scheduler.step(val_loss)

        writer.add_scalar('train_loss', epoch_loss, epoch)
        writer.add_scalar('val_loss', val_loss, epoch)
예제 #7
0
def train_epoch(model,
                data_loaders,
                optimizer,
                device,
                criterion,
                epoch,
                scheduler=None):
    acc = Acc("train_batch_acc")
    loss = Loss("train_batch_loss")
    model.train()
    data_size = len(data_loaders["train"])
    i = 0

    with torch.set_grad_enabled(True):
        for inputs, labels in data_loaders["train"]:
            optimizer.zero_grad()
            inputs = inputs.cuda(device)
            labels = labels.cuda(device)
            pred = model(inputs)
            c_loss = criterion(pred, labels)
            c_loss.backward()
            optimizer.step()
            loss.update(c_loss.item(), epoch * data_size + i)
            acc.update(pred, labels, epoch * data_size + i)
            i += 1
        writer.add_scalar("learningRate", optimizer.param_groups[0]['lr'],
                          (epoch + 1) * data_size)
        epoch_loss = loss.get()
        epoch_acc = acc.get()
        if scheduler:
            if type(scheduler) == torch.optim.lr_scheduler.ReduceLROnPlateau:
                pass
            else:
                scheduler.step()
    return epoch_acc, epoch_loss
예제 #8
0
def train(trainloader, model, optimizer):
    model.train()

    train_loss = []
    train_acc = []

    for i, (features, labels) in enumerate(trainloader):
        data = Variable(features).to(device)
        target = Variable(labels).to(device)
        optimizer.zero_grad()

        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(output.data, 1)
        correct = (predicted == labels).sum().item()
        accuracy = format(100 * correct / batch_size)

        train_acc.append(accuracy)
        train_loss.append(loss.item())

    loss_train = np.mean(train_loss)
    acc = np.mean(train_acc)
    return loss_train, acc
예제 #9
0
def main():

    net = Net().cuda()
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    # 训练
    for epoch in range(10):

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs;data is a list of [inputs,labels]
            inputs, labels = data[0].cuda(), data[1].cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:
                print('[%d,%5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

                # 预测test样本
                total = 0
                correct = 0
                for i, test_data in enumerate(testloader):
                    with torch.no_grad():
                        test_inputs, test_labers = test_data[0].cuda(
                        ), test_data[1].cuda()
                        test_outputs = net(test_inputs)
                        predicts = torch.max(test_outputs, 1)[1]
                    total += test_labers.size(0)
                    correct += (predicts == test_labers).sum()
                accuracy = correct / total * 100
                print("Accuracy of the network on the 100 test images: {}%".
                      format(accuracy))

    print('Finished Training')

    PATH = './cifar_net.pth'
    torch.save(net.state_dict(), PATH)
예제 #10
0
파일: train.py 프로젝트: divinit7/Colorizer
def pretrain_generator(net_G, train_dl, optimizer, criterion, epochs):
    for e in range(epochs):
        loss_meter = AverageMeter()
        for data in tqdm.tqdm(train_dl):
            L, ab = data['L'].to(device), data['ab'].to(device)
            preds = net_G(L)
            loss = criterion(preds, ab)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_meter.update(loss.item(), L.size(0))

        print(f'Epoch {e +1}/{epochs} ')
        print(f'L1 Loss: {loss_meter.avg:.5f}')
예제 #11
0
def train(model, train_loader, optimizer, log_interval):
    model.train()
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                loss.item()))
예제 #12
0
def PPO_Update(policy, memory, optimizer, opts):

    for i in range(opts.epoch):
        state, action, reward, probs, done = memory.sample()

        new_probs = policy.get_prob(state, action)

        ratios = (new_probs - probs.squeeze()).exp()

        obs1 = ratios * reward
        obs2 = torch.clamp(ratios, 1 - opts.eps, 1 + opts.eps) * reward

        obs = -torch.min(obs1, obs2).mean()

        optimizer.zero_grad()
        obs.backward()
        optimizer.step()
예제 #13
0
def fit_one_cycle(epochs,
                  max_lr,
                  model,
                  train_loader,
                  val_loader,
                  weight_decay=0,
                  grad_clip=None,
                  opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []

    # Set up cutom optimizer with weight decay
    optimizer = opt_func(model.parameters(), max_lr, weight_decay=weight_decay)
    # Set up one-cycle learning rate scheduler
    sched = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr, epochs=epochs, steps_per_epoch=len(train_loader))

    for epoch in range(epochs):
        # Training Phase
        model.train()
        train_losses = []
        lrs = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()

            # Gradient clipping
            if grad_clip:
                nn.utils.clip_grad_value_(model.parameters(), grad_clip)

            optimizer.step()
            optimizer.zero_grad()

            # Record & update learning rate
            lrs.append(get_lr(optimizer))
            sched.step()

        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        result['lrs'] = lrs
        model.epoch_end(epoch, result)
        history.append(result)
    return history
 def train_model(train_dl, model):
     # define the optimization
     criterion = CrossEntropyLoss()
     optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
     # enumerate epochs
     for epoch in range(500):
         # enumerate mini batches
         for i, (inputs, targets) in enumerate(train_dl):
             # clear the gradients
             optimizer.zero_grad()
             # compute the model output
             yhat = model(inputs)
             # calculate loss
             loss = criterion(yhat, targets)
             # credit assignment
             loss.backward()
             # update model weights
             optimizer.step()
예제 #15
0
def train(data_loader, model, optimizer, device):

    model.train()
    batch_MSEs = []
    for data in tqdm(data_loader):
        # remember, we have image and targets
        # in our dataset class
        inputs = data[0]
        targets = data[1]
        labels = data[2]
        # move inputs/targets to cuda/cpu device
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        outputs = model(inputs)

        "BCE_LOSS"
        loss = torch.nn.BCELoss()(outputs, targets)

        "MSE_LOSS"
        # loss = nn.MSELoss()(outputs, targets)

        "SSIM_LOSS"
        # criterion = 1 - pytorch_ssim.ssim()(outputs, targets)
        # loss = criterion()
        # crit = SSIMLoss().cuda(device)
        # ssim = crit(outputs, targets)
        # loss = ssim

        "MAE_LOSS"
        # loss = torch.abs(targets - outputs).mean()

        batch_MSEs.append(loss.item())

        # backward step the loss
        loss.backward()
        # step optimizer
        optimizer.step()
        # print(loss.item())
    batch_MSEs = np.array(batch_MSEs)
    epoch_loss = np.mean(batch_MSEs)
    print(epoch_loss)
    return epoch_loss
예제 #16
0
  def update(engine, batch):
      model.train()
      y, x_label, y_pure, H = train_dataset.prepare_batch(batch, device=args.device)

      if args.with_pure_y and args.with_h:
        x_pred, y_pure_pred, H_pred = model(y, pure=y_pure, H=H, opp=True)
        loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps
        if args.loss_type == "MSELoss":
          loss_1 = loss_1 / x_pred.size(0)
        loss_noise = criterion2(y_pure_pred, y_pure) / y.size(0) / args.gradient_accumulation_steps
        loss_noise_h = criterion2(H_pred, H) / H.size(0) / args.gradient_accumulation_steps
        if args.only_l1:
          loss = loss_1
        else:
          loss = loss_1 + loss_noise * args.noise_lambda + loss_noise_h
        output = (loss.item(), loss_1.item(), loss_noise.item(), loss_noise_h.item())
      elif args.with_pure_y:
        x_pred, y_pure_pred = model(y, pure=y_pure if args.interpolation else None, opp=True)
        loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps
        loss_noise = criterion2(y_pure_pred, y_pure) / y.size(0) / args.gradient_accumulation_steps
        loss = loss_1 + loss_noise * args.noise_lambda
        output = (loss.item(), loss_1.item(), loss_noise.item())
      elif args.with_h:
        x_pred, H_pred = model(y, opp=True)
        loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps
        loss_noise = criterion2(H_pred, H) / H.size(0) / args.gradient_accumulation_steps
        loss = loss_1 + loss_noise * args.noise_lambda
        output = (loss.item(), loss_1.item(), loss_noise.item())
      else:
        x_pred = model(y)
        loss_1 = criterion(x_pred, x_label) / args.gradient_accumulation_steps
        loss = loss_1
        output = (loss.item(), loss_1.item(), torch.zeros_like(loss_1).item())

      loss.backward()
      if args.max_norm > 0:
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)
      if engine.state.iteration % args.gradient_accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()
      return output
예제 #17
0
    def loss_and_acc_on_epoch(self,
                              batches_per_epoch,
                              generator,
                              train=True,
                              num_skipped=20):
        mean_loss = 0
        mean_accuracy = 0
        sum_constraints, num_constraints = 0, 0
        for sample_id, next_element in tqdm(
                enumerate(islice(generator, batches_per_epoch))):
            input_seq = next_element['input_seq']
            constraint = next_element['constraint']
            # input_seq_index is (seq_length, batch_size)
            input_seq_index = next_element['input_seq_index']
            # todo requires_grad?
            input_seq, constraint, input_seq_index = (
                Variable(torch.FloatTensor(input_seq).cuda()),
                Variable(torch.FloatTensor(constraint).cuda()),
                Variable(torch.LongTensor(input_seq_index).cuda()))
            optimizer.zero_grad()
            output = self((input_seq, constraint))
            loss = mean_crossentropy_loss(output,
                                          input_seq_index,
                                          num_skipped=num_skipped,
                                          constraint=constraint)
            if train:
                loss.backward()
                optimizer.step()

            # compute mean loss and accuracy
            mean_loss += loss.data.mean()
            seq_accuracy, (sum_constraint, num_constraint) = accuracy(
                output_seq=output,
                targets_seq=input_seq_index,
                num_skipped=num_skipped,
                constraint=constraint)
            mean_accuracy += seq_accuracy
            sum_constraints += sum_constraint
            num_constraints += num_constraint

        return mean_loss / batches_per_epoch, mean_accuracy / batches_per_epoch, sum_constraints / num_constraints
예제 #18
0
def train(dataloader, model, criterion, optimizer, scheduler, epoch):
    model.train()
    print('epoch ' + str(epoch))

    train_loss = 0
    train_acc = 0
    total = len(dataloader)
    start = time.time()
    toPilImage = transforms.ToPILImage(
    )  # transform tensor into PIL image to save
    for batch_num, (x, y) in enumerate(dataloader):

        # print((x.shape, y.shape))

        x = x.to(device)
        y = y.to(device)

        x = x + torch.randn_like(x, device=device) * args.noise_sd

        # # output image
        # if i < 5:
        #     # noisy_image = torch.clamp(x.cpu() + noise * noise_sd, min=0, max=1)
        #     pil = toPilImage(x.cpu())
        #     pil.save("{}/img_n_{}_.png".format("./output", batch_num ))
        # if i == 5:
        #     exit(0)

        output = model(x)
        loss = criterion(output, y)
        acc = accuracy(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_acc += acc
    scheduler.step()
    end = time.time()
    print('trainning time:', end - start, 'sec, loss: ', train_loss / total,
          'acc: ', train_acc / total)
예제 #19
0
def test(testloader, model):
    model.eval()

    with torch.no_grad():
        test_loss = []
        test_acc = []

        for i, (features, labels) in enumerate(testloader):
            data = Variable(features).to(device)
            target = Variable(labels).to(device)
            optimizer.zero_grad()

            output = model(data)
            loss = F.nll_loss(output, target)

            _, predicted = torch.max(output.data, 1)
            correct = (predicted == labels).sum().item()
            accuracy = format(100 * correct / batch_size)
            test_acc.append(accuracy)
            test_loss.append(loss.item())

        loss_test = np.mean(test_loss)
        acc = np.mean(test_acc)
        return loss_test, acc
예제 #20
0
파일: FCNet.py 프로젝트: Fanxiaodon/nn
def train(train_loader):
    # 损失函数值
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        # 如果有gpu,则使用gpu
        inputs, labels = inputs.to(device), labels.to(device)

        # 梯度置零
        optimizer.zero_grad()
        # 前向传播
        output = net(inputs)
        # 损失函数
        loss = criterion(output, labels)
        # 反向传播,权值更新
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        # 每50个batch_size后打印一次损失函数值
        if i % 100 == 99:
            print('%5d loss: %.3f' %
                  (i + 1, running_loss / 100))
            running_loss = 0.0
예제 #21
0
파일: kg_learn.py 프로젝트: vivym/OpenKS
	def train_step(self, model, optimizer, train_iterator, args):
		'''
        A single train step. Apply back-propation and return the loss
        '''

		model.train()

		optimizer.zero_grad()

		positive_sample, negative_sample, subsampling_weight, mode = next(train_iterator)

		if args['gpu']:
			positive_sample = positive_sample.cuda()
			negative_sample = negative_sample.cuda()
			subsampling_weight = subsampling_weight.cuda()

		negative_score = self.forward(model, (positive_sample, negative_sample), mode=mode)

		if args['negative_adversarial_sampling']:
			# In self-adversarial sampling, we do not apply back-propagation on the sampling weight
			negative_score = (F.softmax(negative_score * args['adversarial_temperature'], dim=1).detach()
							  * F.logsigmoid(-negative_score)).sum(dim=1)
		else:
			negative_score = F.logsigmoid(-negative_score).mean(dim=1)

		positive_score = self.forward(model, positive_sample)

		positive_score = F.logsigmoid(positive_score).squeeze(dim=1)

		if args['uni_weight']:
			positive_sample_loss = - positive_score.mean()
			negative_sample_loss = - negative_score.mean()
		else:
			positive_sample_loss = - (subsampling_weight * positive_score).sum() / subsampling_weight.sum()
			negative_sample_loss = - (subsampling_weight * negative_score).sum() / subsampling_weight.sum()

		loss = (positive_sample_loss + negative_sample_loss) / 2

		if args['regularization'] != 0.0:
			# Use L3 regularization for ComplEx and DistMult
			regularization = args.regularization * (
					model.entity_embedding.norm(p=3) ** 3 +
					model.relation_embedding.norm(p=3).norm(p=3) ** 3
			)
			loss = loss + regularization
			regularization_log = {'regularization': regularization.item()}
		else:
			regularization_log = {}

		loss.backward()

		optimizer.step()

		log = {
			**regularization_log,
			'positive_sample_loss': positive_sample_loss.item(),
			'negative_sample_loss': negative_sample_loss.item(),
			'loss': loss.item()
		}

		return log
예제 #22
0
    baselogger.info(f'Run {run}/{args.n_runs}, Total Epochs: {args.epochs}')
    baselogger.info(model)
    baselogger.info(
        f'total_params:{sum(p.numel() for p in model.parameters() if p.requires_grad)}'
    )

    tic_run = time.time()

    best_test_seen, best_test_unseen, test_seen, test_unseen, Z = 0, 0, 0, 0, None
    for epoch in range(args.epochs):
        # train
        tic_epoch = time.time()
        model.train()

        optimizer.zero_grad()
        Z = model(Xseen)
        loss = F.nll_loss(Z[train_idx], Y[train_idx])

        loss.backward()
        optimizer.step()

        train_time = time.time() - tic_epoch

        # eval
        model.eval()
        Z = model(X)

        train_acc = accuracy(Z[train_idx], Y[train_idx])
        test_seen = accuracy(Z[test_idx_seen], Y[test_idx_seen])
        test_unseen = accuracy(Z[test_idx_unseen], Y[test_idx_unseen])
예제 #23
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # Path
    parser.add_argument("--output_model_path",
                        default="./models/classifier_model.bin",
                        type=str,
                        help="Path of the output model.")
    parser.add_argument("--output_lossfig_path",
                        default="./models/loss.png",
                        type=str,
                        help="Path of the output model.")

    # Model options.
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size.")
    parser.add_argument("--seq_length",
                        type=int,
                        default=128,
                        help="Sequence length.")

    # Optimizer options.
    parser.add_argument("--learning_rate",
                        type=float,
                        default=2e-5,
                        help="Learning rate.")
    parser.add_argument("--warmup",
                        type=float,
                        default=0.1,
                        help="Warm up value.")

    # Training options.
    parser.add_argument("--dropout", type=float, default=0.5, help="Dropout.")
    parser.add_argument("--epochs_num",
                        type=int,
                        default=5,
                        help="Number of epochs.")
    parser.add_argument("--report_steps",
                        type=int,
                        default=100,
                        help="Specific steps to print prompt.")
    parser.add_argument("--seed", type=int, default=7, help="Random seed.")
    parser.add_argument("--device",
                        type=str,
                        default='cpu',
                        help="Device use.")

    args = parser.parse_args()

    def set_seed(seed=7):
        random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

    set_seed(args.seed)

    # 读取数据
    train = pd.read_csv('../data5k/train.tsv', encoding='utf-8', sep='\t')
    dev = pd.read_csv('../data5k/dev.tsv', encoding='utf-8', sep='\t')
    test = pd.read_csv('../data5k/test.tsv', encoding='utf-8', sep='\t')

    # Load bert vocabulary and tokenizer
    bert_config = BertConfig('bert_model/bert_config.json')
    BERT_MODEL_PATH = 'bert_model'
    bert_tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH,
                                                   cache_dir=None,
                                                   do_lower_case=False)

    # 产生输入数据
    processor = DataPrecessForSingleSentence(bert_tokenizer=bert_tokenizer)

    # train dataset
    seqs, seq_masks, seq_segments = processor.get_input(
        sentences=train['text_a'].tolist(), max_seq_len=args.seq_length)
    labels = train['label'].tolist()
    t_seqs = torch.tensor(seqs, dtype=torch.long)
    t_seq_masks = torch.tensor(seq_masks, dtype=torch.long)
    t_seq_segments = torch.tensor(seq_segments, dtype=torch.long)
    t_labels = torch.tensor(labels, dtype=torch.long)
    train_data = TensorDataset(t_seqs, t_seq_masks, t_seq_segments, t_labels)
    train_sampler = RandomSampler(train_data)
    train_dataloder = DataLoader(dataset=train_data,
                                 sampler=train_sampler,
                                 batch_size=args.batch_size)

    # dev dataset
    seqs, seq_masks, seq_segments = processor.get_input(
        sentences=dev['text_a'].tolist(), max_seq_len=args.seq_length)
    labels = dev['label'].tolist()
    t_seqs = torch.tensor(seqs, dtype=torch.long)
    t_seq_masks = torch.tensor(seq_masks, dtype=torch.long)
    t_seq_segments = torch.tensor(seq_segments, dtype=torch.long)
    t_labels = torch.tensor(labels, dtype=torch.long)
    dev_data = TensorDataset(t_seqs, t_seq_masks, t_seq_segments, t_labels)
    dev_sampler = RandomSampler(dev_data)
    dev_dataloder = DataLoader(dataset=dev_data,
                               sampler=dev_sampler,
                               batch_size=args.batch_size)

    # test dataset
    seqs, seq_masks, seq_segments = processor.get_input(
        sentences=test['text_a'].tolist(), max_seq_len=args.seq_length)
    labels = test['label'].tolist()
    t_seqs = torch.tensor(seqs, dtype=torch.long)
    t_seq_masks = torch.tensor(seq_masks, dtype=torch.long)
    t_seq_segments = torch.tensor(seq_segments, dtype=torch.long)
    t_labels = torch.tensor(labels, dtype=torch.long)
    test_data = TensorDataset(t_seqs, t_seq_masks, t_seq_segments, t_labels)
    test_sampler = RandomSampler(test_data)
    test_dataloder = DataLoader(dataset=test_data,
                                sampler=test_sampler,
                                batch_size=args.batch_size)

    # build classification model
    model = BertForSequenceClassification(bert_config, 2)

    # For simplicity, we use DataParallel wrapper to use multiple GPUs.
    if args.device == 'cpu':
        device = torch.device("cpu")
    else:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if torch.cuda.device_count() > 1:
            print("{} GPUs are available. Let's use them.".format(
                torch.cuda.device_count()))
            model = nn.DataParallel(model)
    model = model.to(device)

    # evaluation function
    def evaluate(args, is_test, metrics='Acc'):
        if is_test:
            dataset = test_dataloder
            instances_num = test.shape[0]
            print("The number of evaluation instances: ", instances_num)
        else:
            dataset = dev_dataloder
            instances_num = dev.shape[0]
            print("The number of evaluation instances: ", instances_num)

        correct = 0
        model.eval()
        # Confusion matrix.
        confusion = torch.zeros(2, 2, dtype=torch.long)

        for i, batch_data in enumerate(dataset):
            batch_data = tuple(t.to(device) for t in batch_data)
            batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data
            with torch.no_grad():
                logits = model(batch_seqs,
                               batch_seq_masks,
                               batch_seq_segments,
                               labels=None)
            pred = logits.softmax(dim=1).argmax(dim=1)
            gold = batch_labels
            for j in range(pred.size()[0]):
                confusion[pred[j], gold[j]] += 1
            correct += torch.sum(pred == gold).item()

        if is_test:
            print("Confusion matrix:")
            print(confusion)
            print("Report precision, recall, and f1:")

        for i in range(confusion.size()[0]):
            p = confusion[i, i].item() / confusion[i, :].sum().item()
            r = confusion[i, i].item() / confusion[:, i].sum().item()
            f1 = 2 * p * r / (p + r)
            if i == 1:
                label_1_f1 = f1
            print("Label {}: {:.3f}, {:.3f}, {:.3f}".format(i, p, r, f1))
        print("Acc. (Correct/Total): {:.4f} ({}/{}) ".format(
            correct / instances_num, correct, instances_num))
        if metrics == 'Acc':
            return correct / instances_num
        elif metrics == 'f1':
            return label_1_f1
        else:
            return correct / instances_num

    # training phase
    print("Start training.")
    instances_num = train.shape[0]
    batch_size = args.batch_size
    train_steps = int(instances_num * args.epochs_num / batch_size) + 1

    print("Batch size: ", batch_size)
    print("The number of training instances:", instances_num)

    # 待优化的参数
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=args.learning_rate,
                         warmup=args.warmup,
                         t_total=train_steps)

    # 存储每一个batch的loss
    all_loss = []
    all_acc = []
    total_loss = 0.0
    result = 0.0
    best_result = 0.0

    for epoch in range(1, args.epochs_num + 1):
        model.train()
        for step, batch_data in enumerate(train_dataloder):
            batch_data = tuple(t.to(device) for t in batch_data)
            batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data
            # 对标签进行onehot编码
            one_hot = torch.zeros(batch_labels.size(0), 2).long()
            '''one_hot_batch_labels = one_hot.scatter_(
                dim=1,
                index=torch.unsqueeze(batch_labels, dim=1),
                src=torch.ones(batch_labels.size(0), 2).long())

            
            logits = model(
                batch_seqs, batch_seq_masks, batch_seq_segments, labels=None)
            logits = logits.softmax(dim=1)
            loss_function = CrossEntropyLoss()
            loss = loss_function(logits, batch_labels)'''
            loss = model(batch_seqs, batch_seq_masks, batch_seq_segments,
                         batch_labels)
            loss.backward()
            total_loss += loss.item()
            if (step + 1) % 100 == 0:
                print("Epoch id: {}, Training steps: {}, Avg loss: {:.3f}".
                      format(epoch, step + 1, total_loss / 100))
                sys.stdout.flush()
                total_loss = 0.
            #print("Epoch id: {}, Training steps: {}, Avg loss: {:.3f}".format(epoch, step+1, loss))
            optimizer.step()
            optimizer.zero_grad()

        all_loss.append(total_loss)
        total_loss = 0.
        print("Start evaluation on dev dataset.")
        result = evaluate(args, False)
        all_acc.append(result)
        if result > best_result:
            best_result = result
            torch.save(model, open(args.output_model_path, "wb"))
            #save_model(model, args.output_model_path)
        else:
            continue

        print("Start evaluation on test dataset.")
        evaluate(args, True)

    print('all_loss:', all_loss)
    print('all_acc:', all_acc)

    # Evaluation phase.
    print("Final evaluation on the test dataset.")
    model.load_state_dict(torch.load(args.output_model_path))
    evaluate(args, True)
    '''
예제 #24
0
def train(dataloader, model, criterion, optimizer, scheduler, epoch):
    model.train()
    print('epoch ' + str(epoch))

    train_loss = 0.0
    train_acc = 0.0
    total = len(dataloader)
    start = time.time()
    toPilImage = transforms.ToPILImage(
    )  # transform tensor into PIL image to save

    for batch_num, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)

        # gauss noise training
        gauss_noise = torch.randn_like(x, device=device) * args.noise_sd
        # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd

        # targeted noise training
        tmp_criterion = nn.CrossEntropyLoss()
        tmp_optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
        classifier = PyTorchClassifier(
            model=model,
            clip_values=(min_pixel_value, max_pixel_value),
            loss=tmp_criterion,
            optimizer=tmp_optimizer,
            input_shape=(3, 32, 32),
            nb_classes=10,
        )
        # generate random targets
        targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes())

        # calculate loss gradient
        grad = classifier.loss_gradient(x=x.cpu().numpy(), y=targets) * (-1.0)
        scaled_grad = torch.Tensor(grad * args.eps_step).to(device)

        # print((scaled_grad.shape, gauss_noise.shape, targets.shape))

        # combine noise and targeted noise
        x_combine = x + (gauss_noise *
                         (1.0 - args.k_value)) + (scaled_grad * args.k_value)

        model.zero_grad()

        output = model(x_combine)
        loss = criterion(output, y)
        acc = accuracy(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_acc += acc

    scheduler.step()
    end = time.time()
    print('trainning time:', end - start, 'sec, loss: ', train_loss / total,
          'acc: ', train_acc / total)
    return train_loss / total, train_acc / total
예제 #25
0
def train(model, train_dataloder, valid_dataloder, loss_function, optimizer,\
           device, num_labels,
                save_path='./job_fine_tuned_bert.pth'):
    # 存储loss
    train_losses = []
    valid_losses = []
    avg_train_losses = []
    avg_valid_losses = []
    patience = 20
    early_stopping = EarlyStopping(patience=patience, verbose=True)

    # 模型训练
    for i in trange(EPOCHS, desc='Epoch'):

        model.train()  # 训练
        for step, batch_data in enumerate(train_dataloder):
            batch_data = tuple(t.to(device) for t in batch_data)
            batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data
            logits = model(batch_seqs,
                           batch_seq_masks,
                           batch_seq_segments,
                           labels=None)
            logits = torch.nn.functional.log_softmax(logits, dim=1)
            # loss_function = CrossEntropyLoss()
            loss = loss_function(logits, batch_labels)
            loss.backward()
            train_losses.append(loss.item())
            print("\r step: %d / %d, loss: %f" %
                  (step, len(train_dataloder), loss),
                  end='')
            optimizer.step()
            optimizer.zero_grad()
            torch.cuda.empty_cache()

        model.eval()  # 验证
        for step, batch_data in enumerate(valid_dataloder):
            with torch.no_grad():
                batch_data = tuple(t.to(device) for t in batch_data)
                batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels = batch_data

                logits = model(batch_seqs,
                               batch_seq_masks,
                               batch_seq_segments,
                               labels=None)
                logits = torch.nn.functional.log_softmax(logits, dim=1)
                # loss_function = CrossEntropyLoss()
                loss = loss_function(logits, batch_labels)
                valid_losses.append(loss.item())
        torch.cuda.empty_cache()
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)
        if step % 20 == 0:
            print("train_loss:%f, valid_loss:%f" % (train_loss, valid_loss))

        # 重置训练损失和验证损失
        train_losses = []
        valid_losses = []

        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early Stopping")
            break

    torch.save(model, open(save_path, "wb"))

    # 绘制 loss 图
    fig = plt.figure(figsize=(8, 6))
    plt.plot(range(1,
                   len(avg_train_losses) + 1),
             avg_train_losses,
             label='Training Loss')
    plt.plot(range(1,
                   len(avg_valid_losses) + 1),
             avg_valid_losses,
             label='Validation Loss')
    # find the position of lowest validation loss
    minposs = avg_valid_losses.index(min(avg_valid_losses)) + 1
    # plt.axvline(minposs, linestyle='--', color = 'r', lable='Early Stopping Checkpoint')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()
    fig.savefig('loss_plot.png', bbox_inches='tight')

    return model
예제 #26
0
model = nn.Linear(1, 1)

# 损失函数
criterion = nn.MSELoss()

# 随机梯度下降
optimizer = SGD(model.parameters(), lr=0.001)
x_train = x.reshape(-1, 1).astype('float32')
y_train = y.reshape(-1, 1).astype('float32')

# 训练次数
train_times = 30000

for i in range(train_times):
    input = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)
    output = model(input)
    optimizer.zero_grad()  #梯度清零
    loss = criterion(output, labels)
    loss.backward()  #反向传播
    optimizer.step()  #更新参数
    if (i % 100 == 0):
        plt.clf()
        #每 100次打印一下损失函数,看看效果
        plt.scatter(x, y)
        plt.plot(x, output.data.numpy(), color="red")
        plt.pause(0.1)
        print('epoch {}, loss {:1.4f}'.format(i, loss.data.item()))

plt.ioff()
plt.show()
예제 #27
0
def train_model(classifier, criterion, optimizer, trainLoader, valLoader,
                epochs):
    loss_list = []
    val_loss_list = []
    correct_list = []
    val_correct_list = []
    for epoch in range(epochs):
        running_loss = 0.0
        val_running_loss = 0.0
        running_correct = 0.0
        val_running_correct = 0.0
        for i, data in enumerate(tqdm(trainLoader)):
            inputs, labels = data  # Get the inputs and labels from the data loader
            inputs = inputs.to(
                device
            )  # input to device as our model is running in mentioned device.
            labels = labels.to(device)
            optimizer.zero_grad()  # Zero the parameter gradients

            # Forward pass
            outputs = classifier(inputs)
            loss = criterion(outputs, labels)  # Compute the loss

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()  # Parameter update

            # Calculate the running loss and correct
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            running_correct += torch.sum(predicted == labels.data)
        else:
            with torch.no_grad():
                for j, data in enumerate(valLoader):
                    inputs, labels = data
                    inputs = inputs.to(
                        device
                    )  # input to device as our model is running in mentioned device.
                    labels = labels.to(device)
                    # Forward pass
                    outputs = classifier(inputs)
                    # Calculate loss
                    loss = criterion(outputs, labels)
                    val_running_loss += loss.item()
                    # Compute the running correct
                    _, predicted = torch.max(outputs.data, 1)
                    val_running_correct += torch.sum(predicted == labels.data)
            epoch_loss = running_loss / len(trainLoader)
            epoch_acc = running_correct.float() / len(trainLoader)
            loss_list.append(epoch_loss)
            correct_list.append(
                epoch_acc.cpu())  # convert to cpu to avoid error

            val_epoch_loss = val_running_loss / len(valLoader)
            val_epoch_acc = val_running_correct.float() / len(valLoader)
            val_loss_list.append(val_epoch_loss)
            val_correct_list.append(
                val_epoch_acc.cpu())  # convert to cpu to avoid error
            print('Epoch {}/{}'.format(epoch + 1, epochs))
            print('Training Loss: {:.4f}'.format(epoch_loss))
            print('Training Accuracy: {:.4f}'.format(epoch_acc))
            print('Validation Loss: {:.4f}'.format(val_epoch_loss))
            print('Validation Accuracy: {:.4f}'.format(val_epoch_acc))
    return loss_list, correct_list, val_loss_list, val_correct_list
예제 #28
0
def train(dataloader, model,criterion, optimizer, scheduler, epoch):
    model.train()
    print('epoch ' + str(epoch))

    train_loss = 0.0
    train_acc = 0.0
    total = len(dataloader)
    start = time.time()
    toPilImage = transforms.ToPILImage()    # transform tensor into PIL image to save

    for batch_num, (x, y) in enumerate(dataloader):
        x = x.to(device)
        y = y.to(device)


        # gauss noise training
        gauss_noise = torch.randn_like(x, device=device) * args.noise_sd
        # x_noise = x + torch.randn_like(x, device=device) * args.noise_sd

        # targeted noise training
        tmp_criterion = nn.CrossEntropyLoss()
        tmp_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        classifier = PyTorchClassifier(
            model=model,
            clip_values=(min_pixel_value, max_pixel_value),
            loss=tmp_criterion,
            optimizer=tmp_optimizer,
            input_shape=(3, 32, 32),
            nb_classes=10,
        )

        # all other classes
        targets = []
        y_np = y.cpu().numpy()
        for i in range(y.shape[0]) :
            targets.append( np.expand_dims( np.random.permutation( np.delete(np.arange(get_num_classes()), y_np[i]) ), axis=0 ) )
        # print(targets[0].shape)
        targets = np.concatenate(targets)
        # print(targets.shape)
        # exit(0)

        mix_noise = torch.zeros_like(x)
        for t in range(targets.shape[1]):
            # generate random targets
            # targets = art.utils.random_targets(y.cpu().numpy(), get_num_classes())

            # calculate loss gradient
            # print(np.squeeze(targets[:,t]).shape)
            # exit()

            y_slice = np.squeeze(targets[:,t])
            y_oh = np.zeros((y_slice.size, get_num_classes()))
            y_oh[np.arange(y_slice.size), y_slice] = 1


            grad = classifier.loss_gradient(x=x.cpu().numpy(), y=y_oh) * (-1.0)
            scaled_grad = torch.Tensor(grad * args.eps_step).to(device)

            mix_noise += scaled_grad

            model.zero_grad()
            tmp_optimizer.zero_grad()

            # print((scaled_grad.shape, gauss_noise.shape, targets.shape))

        # combine noise and targeted noise
        x_combine = x + (gauss_noise * (1.0 - args.k_value)) + (mix_noise * args.k_value)

        model.zero_grad()

        output = model(x_combine)
        loss = criterion(output, y)
        acc = accuracy(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()       
        train_acc += acc

    scheduler.step()
    end = time.time()
    print('trainning time:',end - start,'sec, loss: ', train_loss/total, 'acc: ', train_acc/total)
    return train_loss/total, train_acc/total
예제 #29
0
    for i, opt in enumerate(optims):
        x = Variable(torch.normal(
            torch.zeros(2), torch.FloatTensor([1, 1])) + 
            torch.FloatTensor([9, 9]), requires_grad = True
        )
        optimizer = opt([x, ], lr = lrs[i])
        x_start = x.data.clone()
        pos = []
        for j in range(max_iter):
            point = x.data.clone()
            pos.append(point.numpy())
            y = func_to_use(x)
            y.backward(retain_graph = True)
            optimizer.step()            # 在不传入closure(某个可以自动绑定外部变量的函数)时只更新一步,传入closure更新多步
            # 如果没有closure,由于backward已经求出了x的梯度,则可以使用这个梯度进行线搜索得到步长,并移动x(更新)        
            optimizer.zero_grad()       # 一般的习惯是,先进行zero_grad,再重新开始计算
            # zero_grad的作用是,将之前的grad清零(否则会累加)
            # step的作用:

            print("Optimizer %s, iteration %d / %d, x = "%(opt.__name__, j, max_iter), x.data, "y = ", float(y.data))

        ## 绘图
        pos = np.array(pos)
        
        curve_vals = np.array([func_to_use(p) for p in pos])
        # print(pos)
        xs = pos[:, 0]
        ys = pos[:, 1]
        # print(xs.shape, ys.shape, curve_vals.shape)
        ax.plot3D(xs, ys, curve_vals, c = colors[i], label = opt.__name__)
        ax.scatter3D(xs, ys, curve_vals, c = colors[i], s = 10)
예제 #30
0
def train_model(classifier, criterion, optimizer, trainLoader, valLoader,
                epochs):
    loss_list = []
    val_loss_list = []
    correct_list = []
    val_correct_list = []
    for epoch in range(epochs):
        running_loss = 0.0
        val_running_loss = 0.0
        running_correct = 0.0
        val_running_correct = 0.0
        for i, data in enumerate(tqdm(trainLoader)):
            inputs, labels = data  # Get the inputs and labels from the data loader
            optimizer.zero_grad()  # Zero the parameter gradients

            # Forward pass
            outputs = classifier(inputs)
            loss = criterion(outputs, labels)  # Compute the loss

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()  # Parameter update

            # Calculate the running loss and correct
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            running_correct += torch.sum(predicted == labels.data)
        else:
            with torch.no_grad():
                for j, data in enumerate(valLoader):
                    inputs, labels = data
                    # Forward pass
                    outputs = classifier(inputs)
                    # Calculate loss
                    loss = criterion(outputs, labels)
                    val_running_loss += loss.item()
                    # Compute the running correct
                    _, predicted = torch.max(outputs.data, 1)
                    val_running_correct += torch.sum(predicted == labels.data)
            epoch_loss = running_loss / len(trainLoader)  # loss per epoch
            epoch_acc = running_correct.float() / len(
                trainLoader.dataset)  # accuracy per epoch
            loss_list.append(epoch_loss)
            correct_list.append(epoch_acc)

            val_epoch_loss = val_running_loss / len(
                valLoader)  # loss per epoch
            val_epoch_acc = val_running_correct.float() / len(
                valLoader.dataset)  # accuracy per epoch
            val_loss_list.append(val_epoch_loss)
            val_correct_list.append(val_epoch_acc)
            print('Epoch {}/{}'.format(epoch + 1, epochs))
            print('Training Loss: {:.4f}'.format(epoch_loss))
            print('Training Accuracy: {:.4f}'.format(epoch_acc))
            print('Validation Loss: {:.4f}'.format(val_epoch_loss))
            print('Validation Accuracy: {:.4f}'.format(val_epoch_acc))
    plt.style.use('ggplot')
    plt.plot(loss_list, label='Training Loss')
    plt.plot(val_loss_list, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.savefig('data/loss_25_Adam_LeNet2.png')  # MODIFY OUTPUT NAME
    plt.show()

    plt.style.use('ggplot')
    plt.plot(correct_list, label='Training Accuracy')
    plt.plot(val_correct_list, label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.savefig('data/accuracy_25_Adam_LeNet2.png')  # MODIFY OUTPUT NAME
    plt.show()