Ejemplo n.º 1
0
def train(net):
    net.train()
    priorbox = PriorBox()
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.to(device)

    dataloader = DataLoader(VOCDetection(),
                            batch_size=2,
                            collate_fn=detection_collate,
                            num_workers=12)

    for epoch in range(1000):
        loss_ls, loss_cs = [], []
        load_t0 = time.time()
        if epoch > 500:
            adjust_learning_rate(optimizer, 1e-4)

        for images, targets in dataloader:
            images = images.to(device)
            targets = [anno.to(device) for anno in targets]
            out = net(images)
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, priors, targets)

            loss = 2 * loss_l + loss_c
            loss.backward()
            optimizer.step()
            loss_cs.append(loss_c.item())
            loss_ls.append(loss_l.item())
        load_t1 = time.time()

        print(f'{np.mean(loss_cs)}, {np.mean(loss_ls)} time:{load_t1-load_t0}')
        torch.save(net.state_dict(), 'Final_FaceBoxes.pth')
Ejemplo n.º 2
0
def train(net):
    net.train()
    priorbox = PriorBox()
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.to(device)

    dataloader = DataLoader(VOCDetection(), batch_size=2, collate_fn=detection_collate, num_workers=12)

    for epoch in range(1000):
        loss_ls, loss_cs = [], []
        load_t0 = time.time()
        if epoch > 500:
            adjust_learning_rate(optimizer, 1e-4)

        for images, targets in dataloader:
            images = images.to(device)
            targets = [anno.to(device) for anno in targets]
            out = net(images)
            optimizer.zero_grad()
            loss_l, loss_c = criterion(out, priors, targets)

            loss = 2 * loss_l + loss_c
            loss.backward()
            optimizer.step()
            loss_cs.append(loss_c.item())
            loss_ls.append(loss_l.item())
        load_t1 = time.time()

        print(f'{np.mean(loss_cs)}, {np.mean(loss_ls)} time:{load_t1-load_t0}')
        torch.save(net.state_dict(), 'Final_FaceBoxes.pth')
Ejemplo n.º 3
0
    def train(self, traindataloader, valdataloader, startepoch, endepoch):
        for epoch in range(startepoch, endepoch + 1):

            train = self._epoch(traindataloader, epoch)

            if epoch % self.opts.valInterval == 0:
                with torch.no_grad():
                    test = self._epoch(valdataloader, epoch, 'val')
                Writer = open(self.File, 'a')
                Writer.write(train + ' ' + test + '\n')
                Writer.close()
            else:
                Writer = open(self.File, 'a')
                Writer.write(train + '\n')
                Writer.close()

            if epoch % self.opts.saveInterval == 0:
                state = {
                    'epoch': epoch + 1,
                    'model_state': self.model.state_dict(),
                    'optimizer_state': self.optimizer.state_dict(),
                }
                path = os.path.join(self.opts.saveDir,
                                    'model_{}.pth'.format(epoch))
                torch.save(state, path)
            adjust_learning_rate(self.optimizer, epoch, self.opts.dropLR,
                                 self.opts.dropMag)
        loss_final = self._epoch(valdataloader, -1, 'val')
        return
Ejemplo n.º 4
0
    def train(self, epoch, epochs):
        adjust_learning_rate(self.optimizer, epoch, self.lr, epochs)
        self.model.train()
        total_loss, total_num, train_bar = 0.0, 0, tqdm(self.train_loader)
        for x, target in train_bar:
            batch_size = len(target)
            x1, x2 = x[0].cuda(), x[1].cuda()

            _, online_projection_1 = self.model(x1)
            _, online_projection_2 = self.model(x2)
            online_prediction_1 = self.predictor(online_projection_1)
            online_prediction_2 = self.predictor(online_projection_2)

            with torch.no_grad():
                _, target_projection_1 = self.target_encoder(x1)
                _, target_projection_2 = self.target_encoder(x2)

            loss = self.L(
                online_prediction_1, target_projection_2.detach()) + self.L(
                    online_prediction_2, target_projection_1.detach())
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            self.update_moving_average(self.target_ema_updater,
                                       self.target_encoder, self.model)

            total_num += batch_size
            total_loss += loss.item() * batch_size
            train_bar.set_description(
                'Train Epoch: [{}/{}] Loss: {:.4f}'.format(
                    epoch, epochs, total_loss / total_num))
        return total_loss / total_num
def train(train_loader,
          model,
          optimizer,
          criterion,
          regularizer=None,
          lr_schedule=None):
    loss_sum = 0.0
    correct = 0.0

    num_iters = len(train_loader)
    model.train()
    for iter, (input, target) in enumerate(train_loader):
        if lr_schedule is not None:
            lr = lr_schedule(iter / num_iters)
            utils.adjust_learning_rate(optimizer, lr)
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        output = model(input)
        loss = criterion(output, target)
        if regularizer is not None:
            loss += regularizer(model)

        optimizer.zero_grad()
        loss.backward()

        loss_sum += loss.item() * input.size(0)
        pred = output.data.argmax(1, keepdim=True)
        correct += pred.eq(target.data.view_as(pred)).sum().item()

    grad = np.concatenate(
        [p.grad.data.cpu().numpy().ravel() for p in model.parameters()])

    return grad
Ejemplo n.º 6
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    sys.stdout = Logger(osp.join(args.save_dir, 'log' + '.txt'))

    if use_gpu:
        print("Currently using GPU: {}".format(args.gpu))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU")


    with open(loader_path, 'rb') as f:
        trainloader, testloader = pickle.load(f)


    print("Creating model: {}".format(args.model))
    model = models.create(name=args.model, num_classes=num_classes, feature_dim=feature_dim)

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    criterion_xent = nn.CrossEntropyLoss()
    criterion_cent = CenterLoss(num_classes=num_classes, feat_dim=args.featdim, use_gpu=use_gpu)
    optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=5e-04, momentum=0.9)
    optimizer_centloss = torch.optim.SGD(criterion_cent.parameters(), lr=args.lr_cent)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer_model, step_size=args.stepsize, gamma=args.gamma)

    start_time = time.time()

    total_loss_list = []
    train_acc, test_acc = 0, 0
    for epoch in range(args.max_epoch):
        adjust_learning_rate(optimizer_model, epoch)

        print("==> Epoch {}/{}".format(epoch+1, args.max_epoch))
        loss_list, train_acc = train(model, criterion_xent, criterion_cent,
              optimizer_model, optimizer_centloss,
              trainloader, use_gpu, num_classes, epoch)
        total_loss_list.append(loss_list)

        if args.stepsize > 0: scheduler.step()

        if args.eval_freq > 0 and (epoch+1) % args.eval_freq == 0 or (epoch+1) == args.max_epoch:
            print("==> Test")
            test_acc = test(model, testloader, use_gpu, num_classes, epoch)

    total_loss_list = np.array(total_loss_list).ravel()

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))

    return total_loss_list,train_acc, test_acc
Ejemplo n.º 7
0
def train(x_train,x_train_external,y_train):
    # model
    
    num_class=np.shape(y_train)[1]
    num_external=np.shape(x_train_external)[1]
    
    model = ECGNet(BasicBlock, [3, 4, 6, 3],num_classes= num_class,num_external=num_external)
    model = model.to(device)
    
    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    criterion1 = nn.BCEWithLogitsLoss()
    
    lr = config.lr
    start_epoch = 1
    stage = 1
    best_auc = -1   
       
    # =========>开始训练<=========
    print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss,train_auc= train_epoch(model, optimizer, criterion1,x_train,x_train_external,y_train)
        print('#epoch:%02d stage:%d train_loss:%.4f train_auc:%.4f time:%s'
              % (epoch, stage, train_loss, train_auc, utils.print_time_cost(since)))
                   
        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
            

    return model
Ejemplo n.º 8
0
    def run_train(self):
        start = time.time()

        # epochs
        for epoch in range(self.start_epoch, self.num_epochs):
            # trian an epoch
            self.train(epoch=epoch)

            # time per epoch
            epoch_time = time.time() - start
            print('Epoch: [{0}] finished, time consumed: {epoch_time:.3f}'.
                  format(epoch, epoch_time=epoch_time))

            # decay learning rate every epoch
            adjust_learning_rate(self.optimizer, self.lr_decay)

            # save checkpoint
            if self.checkpoint_path is not None:
                save_checkpoint(epoch=epoch,
                                model=self.model,
                                model_name=self.model_name,
                                optimizer=self.optimizer,
                                dataset_name=self.dataset_name,
                                word_map=self.word_map,
                                checkpoint_path=self.checkpoint_path,
                                checkpoint_basename=self.checkpoint_basename)

            start = time.time()
Ejemplo n.º 9
0
def train(mix_trainloader, model, interp, optimizer, args):
    """Create the model and start the training."""
    tot_iter = len(mix_trainloader)
    for i_iter, batch in enumerate(mix_trainloader):
        images, labels, name = batch
        labels = labels.long()

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter, tot_iter, args)

        if args.info_max_loss:
            pred = model(images.to(device), training=True)
            loss = self_training_regularized_infomax(pred, labels.to(device),
                                                     args)
        elif args.unc_noise:
            pred, noise_pred = model(images.to(device), training=True)
            loss = self_training_regularized_infomax_cct(
                pred, labels.to(device), noise_pred, args)
        else:
            pred = model(images.to(device))
            loss = F.cross_entropy(pred, labels.to(device), ignore_index=255)

        loss.backward()
        optimizer.step()

        logger.info('iter = {} of {} completed, loss = {:.4f}'.format(
            i_iter + 1, tot_iter, loss.item()))
Ejemplo n.º 10
0
def train(x_train, x_train_external, y_train):
    # model

    num_class = np.shape(y_train)[1]

    model = ResNet34(num_classes=num_class)
    model = model.to(device)

    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    wc = y_train.sum(axis=0)
    wc = 1. / (np.log(wc + 1) + 1)

    w = torch.tensor(wc, dtype=torch.float).to(device)
    criterion1 = utils.WeightedMultilabel(w)

    lr = config.lr
    start_epoch = 1
    stage = 1
    best_auc = -1

    # =========>开始训练<=========
    for epoch in range(start_epoch, config.max_epoch + 1):
        train_loss, train_auc = train_epoch(model, optimizer, criterion1,
                                            x_train, x_train_external, y_train)

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay

            utils.adjust_learning_rate(optimizer, lr)
    return model
Ejemplo n.º 11
0
def train_imagenet(model, args):
    optimizer = optim.SGD(model.parameters(),
                          weight_decay=args.weight_decay,
                          lr=args.lr,
                          momentum=args.momentum)
    criterion = nn.CrossEntropyLoss()
    train_loader = train_imagenet_loader(args)
    val_loader = val_imagenet_loader(args)

    if os.path.exists(args.model_dir):
        shutil.rmtree(args.model_dir)
    os.makedirs(args.model_dir)

    best_prec = 0.0
    for epoch in range(1, args.epochs + 1):
        adjust_learning_rate(optimizer, epoch, args)
        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)
        # evaluate on validation set
        cur_prec, _ = validate(val_loader, model, criterion, args)

        # remember best prec@1 and save checkpoint
        is_best = cur_prec > best_prec
        if is_best == True:
            best_prec = cur_prec
            cur_model_name = args.model_name + "-" + str(epoch).zfill(
                2) + "-{:.3f}.pth".format(best_prec)
            torch.save(model.state_dict(),
                       os.path.join(args.model_dir, cur_model_name))
            print('Save weights at {}/{}'.format(args.model_dir,
                                                 cur_model_name))
Ejemplo n.º 12
0
 def update_learning_rate(epoch, ite):
     lr_adapted = args.lr * args.droplr**np.sum(args.adlr < epoch)
     if not lr_current == lr_adapted:
         print('Learning rate is adapted: {} -> {}'.format(
             lr_current, lr_adapted))
         utils.adjust_learning_rate(optimizer, lr_adapted)
     return lr_adapted
Ejemplo n.º 13
0
def train(base_datamgr, base_set, aux_iter, val_loader, model, start_epoch,
          stop_epoch, params):

    # for validation
    max_acc = 0
    total_it = 0
    # training
    for epoch in range(start_epoch, stop_epoch):
        if params.adj_lr == True:
            learning_rate_adj = params.LUT_lr
            model_lr = utils.adjust_learning_rate(model.model_optim, epoch,
                                                  learning_rate_adj)
            ft_lr = utils.adjust_learning_rate(model.ft_optim, epoch,
                                               learning_rate_adj)
        # randomly split seen domains to pseudo-seen and pseudo-unseen domains
        random_set = random.sample(base_set, k=2)
        ps_set = random_set[0]
        pu_set = random_set[1:]
        ps_loader = base_datamgr.get_data_loader(os.path.join(
            params.data_dir, ps_set, 'base.json'),
                                                 aug=params.train_aug)
        pu_loader = base_datamgr.get_data_loader([
            os.path.join(params.data_dir, dataset, 'base.json')
            for dataset in pu_set
        ],
                                                 aug=params.train_aug)
        base_loader = base_datamgr.get_data_loader([
            os.path.join(params.data_dir, dataset, 'base.json')
            for dataset in base_set
        ],
                                                   aug=params.train_aug)
        # train loop
        model.train()
        if params.feature_wise_type == 'FT':
            total_it = model.train_loop(epoch, base_loader, total_it)
        else:
            total_it = model.trainall_loop(epoch, ps_loader, pu_loader,
                                           aux_iter, total_it)

        # validate
        model.eval()
        with torch.no_grad():
            acc = model.test_loop(val_loader)

        # save
        if acc > max_acc:
            print(f"best model! accuracy: {acc}, save...")
            max_acc = acc
            outfile = os.path.join(params.checkpoint_dir, 'best_model.tar')
            model.save(outfile, epoch)
        else:
            print('GG!! best accuracy {:f}'.format(max_acc))
        if ((epoch + 1) % params.save_freq == 0) or (epoch == stop_epoch - 1):
            outfile = os.path.join(params.checkpoint_dir,
                                   '{:d}.tar'.format(epoch + 1))
            model.save(outfile, epoch)

    return
Ejemplo n.º 14
0
def train(args, encoder, decoder, loader, decoder_optimizer, encoder_optimizer,
          device, criterion):
    decoder.train()  # train mode (dropout and batchnorm is used)
    encoder.train()
    losses = AverageMeter()  # loss (per word decoded)
    top3accs = AverageMeter()  # top accuracy
    i = 0
    for data in tqdm(loader):
        if i % args.lr_update_freq == 0 and i > 0:
            adjust_learning_rate(decoder_optimizer, args.decay_rate)
            if args.fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, args.decay_rate)
        imgs = data[0]
        caps = data[1]
        caplens = data[2]
        # Forward pass
        imgs = encoder(imgs.to(device)).to(device)
        caps = caps.to(imgs.device)
        caplens = caplens.to(imgs.device)
        scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(
            imgs, caps, caplens)
        targets = caps_sorted[:, 1:]  #remove <start> and <end> tokens
        scores = pack_padded_sequence(scores,
                                      decode_lengths, batch_first=True).to(
                                          device)  #remove padding tokens
        targets = pack_padded_sequence(targets,
                                       decode_lengths,
                                       batch_first=True).to(device)
        # Calculate loss
        loss = criterion(scores.data, targets.data).to(imgs.device)
        # Add doubly stochastic attention regularization
        loss += args.alphac * ((1. - alphas.sum(dim=1).to(device))**2).mean()
        # Back prop.
        decoder_optimizer.zero_grad()
        if encoder_optimizer is not None:
            encoder_optimizer.zero_grad()
        loss.backward()
        if args.gradient_clip is not None:
            clip_gradient(decoder_optimizer, args.gradient_clip)
            if encoder_optimizer is not None:
                clip_gradient(encoder_optimizer, args.gradient_clip)
        # Update weights
        decoder_optimizer.step()
        if encoder_optimizer is not None:
            encoder_optimizer.step()
        # Keep track of metrics
        top3 = accuracy(scores.data, targets.data, 3)
        losses.update(loss.item(), sum(decode_lengths))
        top3accs.update(top3, sum(decode_lengths))
        # Print status
        if i % args.print_freq == 0:
            print('Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top-3 Accuracy {top3.val:.3f} ({top3.avg:.3f})'.format(
                      loss=losses, top3=top3accs))
        if i % args.checkpoint_freq == 0 and args.checkpoint_freq > 0:
            save_checkpoint(args.model_path, i, encoder, decoder,
                            encoder_optimizer, decoder_optimizer, 0, False)
        i += 1
Ejemplo n.º 15
0
def run(dataset, net_type, train=True):

    # Hyper Parameter settings
    train_ens = cfg.train_ens
    valid_ens = cfg.valid_ens
    test_ens = cfg.test_ens
    n_epochs = cfg.n_epochs
    lr_start = cfg.lr_start
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size

    trainset, testset, inputs, outputs = data.getDataset_regression(dataset)

    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)
    net = getModel(net_type, inputs, outputs).to(device)

    print(len(train_loader))
    print(len(valid_loader))
    print(len(test_loader))

    ckpt_dir = f'checkpoints/regression/{dataset}/bayesian'
    ckpt_name = f'checkpoints/regression/{dataset}/bayesian/model_{net_type}.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    criterion = metrics.ELBO_regression_hetero(len(trainset)).to(device)

    if train:
        optimizer = Adam(net.parameters(), lr=lr_start)
        valid_loss_max = np.Inf
        for epoch in range(n_epochs):  # loop over the dataset multiple times
            cfg.curr_epoch_no = epoch
            utils.adjust_learning_rate(optimizer, metrics.lr_linear(epoch, 0, n_epochs, lr_start))

            train_loss, train_mse, train_kl = train_model(net, optimizer, criterion, train_loader, num_ens=train_ens)
            valid_loss, valid_mse = validate_model(net, criterion, valid_loader, num_ens=valid_ens)

            print('Epoch: {} \tTraining Loss: {:.4f} \tTraining MSE: {:.4f} \tValidation Loss: {:.4f} \tValidation MSE: {:.4f} \ttrain_kl_div: {:.4f}'.format(
                epoch, train_loss, train_mse, valid_loss, valid_mse, train_kl))

            # save model if validation MSE has increased
            if valid_loss <= valid_loss_max:
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                    valid_loss_max, valid_loss))
                torch.save(net.state_dict(), ckpt_name)
                valid_loss_max = valid_loss

    # test saved model
    best_model = getModel(net_type, inputs, outputs).to(device)
    best_model.load_state_dict(torch.load(ckpt_name))
    test_loss, test_mse = test_model(best_model, criterion, test_loader, num_ens=test_ens)
    print('Test Loss: {:.4f} \tTest MSE: {:.4f} '.format(
            test_loss, test_mse))
    test_uncertainty(best_model, testset[:100], data='ccpp')
Ejemplo n.º 16
0
def train():
    cfg = json.load(open(cfg_path))
    enable_cuda = cfg['train']['enable_cuda']
    gpus = cfg['train']['GPUS']
    device = 'cuda:%d' % gpus[0] if enable_cuda else 'cpu'
    enable_multi_gpus = enable_cuda and len(gpus) > 1

    batch_size = cfg['train']['batch_size']
    batch_size = len(gpus) * batch_size if enable_multi_gpus else batch_size

    def transform_fn(image, boxes):
        sel_box_idx = random.randrange(0, len(boxes))
        to_size = random.choice(cfg['network']['anchor_sizes'])
        to_size = random.randint(round(to_size * 0.7), round(to_size * 1.3))
        img, boxes = transform.select_crop_face(image, boxes,
                                                cfg['train']['image_shape'],
                                                sel_box_idx, to_size)
        return img, boxes

    wider_train_dataset = WiderTrain(img_dir=cfg['wider_train']['image_dir'],
                                     anno_path=cfg['wider_train']['txt_path'],
                                     transform=transform_fn)
    dataloader = DataLoader(wider_train_dataset,
                            batch_size=batch_size,
                            collate_fn=wider_train_dataset.collate_fn,
                            num_workers=12,
                            drop_last=True,
                            shuffle=True)

    print('model is being building...')
    detector = Detector(cfg=cfg)
    if enable_multi_gpus:
        detector = th.nn.DataParallel(detector, device_ids=gpus)
    detector.to(device)
    detector.train(True)
    optimizer = th.optim.Adam(detector.parameters())
    print('model is builded.')

    train_step = 0
    for epoch in range(50):
        for i, sample in enumerate(dataloader):
            optimizer.zero_grad()
            sample['image'] = sample['image'].to(device)
            sample['boxes'] = sample['boxes'].to(device)
            loss = detector(sample)
            if enable_multi_gpus:
                loss = sum(loss)
            loss.backward()
            optimizer.step()
            lr = _lr_adjust(train_step)
            ut.adjust_learning_rate(optimizer, lr)
            #if i % 100 == 0:
            print("epoch %d, step %d, total_step %d, loss %.3f" %
                  (epoch, i, train_step, loss.item()))
            i += 1
            train_step += 1
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda',
                        default='True',
                        help='The dataset the class to processed')
    parser.add_argument('--gpu_id',
                        default='0',
                        help='The dataset the class to processed')
    args = parser.parse_args()

    (train_data_loader, val_data_loader, network, optimizer, train_writer,
     test_writer) = setup(bool(args.cuda), int(args.gpu_id))

    # Everything seems fine.
    # make a code log with exp name
    utils.save_exp_information()

    # init values
    train_jter_count, val_jter_count, best_loss = (0, 0, np.inf)
    start_time = time.time()
    for epoch in range(gv.total_epochs):
        train_st_time = time.time()
        utils.adjust_learning_rate(optimizer, epoch, gv.orig_lr)
        train_data_loader.shuffle_index()
        train_jter_count = train(train_data_loader, network, optimizer,
                                 train_writer, train_jter_count)
        print('==========TRAIN Epoch', epoch + 1,
              "COMPLETE ====================")
        val_st_time = time.time()
        loss, val_jter_count = val(val_data_loader, network, test_writer,
                                   train_jter_count)
        print('==========val Epoch', epoch + 1,
              "COMPLETE ====================")
        print('==========TIME FROM START: ',
              time.time() - start_time, ' =============')
        if loss < best_loss:
            print('========== BEST MODEL TILL NOW! =============')
            best_loss_ = True
            best_loss = loss
        else:
            best_loss_ = False
        # add a is best checker
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': 'res18',
                'loss': loss,
                'model_state_dict': network.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            filename='weights/the_real_simple_GRU_' + str(epoch + 1) + '.pth',
            is_best=best_loss_)
        print('==========Total Time for epoch: ',
              time.time() - val_st_time, ' =============')
    train_writer.close()
    test_writer.close()
Ejemplo n.º 18
0
def main():
    global opt, best_prec1

    opt = parser.parse_args()
    opt.logdir = opt.logdir + '/' + opt.name
    logger = 'hi'

    best_prec1 = 0
    print(opt)

    # Initialize the model, criterion and the optimizer
    model = init.load_model(opt)
    model, criterion, optimizer = init.setup(model, opt)
    # Display the model structure
    print(model)

    # Setup trainer and validation
    trainer = train.Trainer(model, criterion, optimizer, opt, logger)
    validator = train.Validator(model, criterion, opt, logger)

    # Load model from a checkpoint if mentioned in opts
    if opt.resume:
        if os.path.isfile(opt.resume):
            model, optimizer, opt, best_prec1 = init.resumer(
                opt, model, optimizer)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    cudnn.benchmark = True

    # Setup the train and validation data loaders
    dataloader = init_data.load_data(opt)
    train_loader = dataloader.train_loader
    val_loader = dataloader.val_loader

    for epoch in range(opt.start_epoch, opt.epochs):
        utils.adjust_learning_rate(opt, optimizer, epoch)
        print("Starting epoch number:", epoch + 1, "Learning rate:",
              optimizer.param_groups[0]["lr"])

        if opt.testOnly == False:
            # Train the network over the training data
            trainer.train(train_loader, epoch, opt)

        #if opt.tensorboard:
        #logger.scalar_summary('learning_rate', opt.lr, epoch)

        # Measure the validation accuracy
        acc = validator.validate(val_loader, epoch, opt)
        best_prec1 = max(acc, best_prec1)
        if best_prec1 == acc:
            # Save the new model if the accuracy is better than the previous saved model
            init.save_checkpoint(opt, model, optimizer, best_prec1, epoch)

        print('Best accuracy: [{0:.3f}]\t'.format(best_prec1))
Ejemplo n.º 19
0
def od_mmd_train(init_lr_da, init_lr_kd, epochs, growth_rate, alpha, gamma, init_beta, distils, source_dataloader, targets_dataloader, targets_testloader,
	optimizer_das, optimizer_kds, criterion, device, batch_norm, is_scheduler_da=True, is_scheduler_kd=True, scheduler_da=None, scheduler_kd=None, is_cst=True):
	
	total_loss_arr = []
	teacher_da_temp_loss_arr = []
	kd_temp_loss_arr = []
	teacher_target_acc_arr = []
	student_target_acc_arr = []

	best_student_acc = 0.
	best_teacher_acc = 0.
	epochs += 1
	for epoch in range(1, epochs):
		beta = init_beta * torch.exp(growth_rate * (epoch - 1))
		beta = beta.to(device)

		if (is_scheduler_da):
			new_lr_da = init_lr_da / np.power((1 + 10 * (epoch - 1) / epochs), 0.75) # 10*
			for optimizer_da in optimizer_das:
				adjust_learning_rate(optimizer_da, new_lr_da)

		if (is_scheduler_kd):
			new_lr_kd = init_lr_kd / np.power((1 + 10 * (epoch - 1) / epochs), 0.75) # 10*
			for optimizer_kd in optimizer_kds:
				adjust_learning_rate(optimizer_kd, new_lr_kd)

		total_loss_1, total_loss_2, teacher_da_temp_loss_1 = od_mmd_one_epoch(epoch, epochs, distils, source_dataloader,
											targets_dataloader, optimizer_das, optimizer_kds,
											criterion, device,alpha, beta, gamma, batch_norm, is_cst)

		students_targets_acc = np.zeros(len(distils))

		for i, d in enumerate(targets_testloader):
			students_targets_acc[i] = eval(distils[i].s_net, device, d, False)

		total_target_acc = students_targets_acc.mean()
		print(f'epoch : {epoch}\tacc : {total_target_acc}')

		if (total_target_acc > best_student_acc):
			best_student_acc = total_target_acc

			torch.save({'student_model': distils[0].s_net.state_dict(), 'acc': best_student_acc, 'epoch': epoch},
		               "./student_model.pth")

		if scheduler_da is not None:
			scheduler_da.step()

		if scheduler_kd is not None:
			scheduler_kd.step()

		if(epoch == 150 and epoch == 250):
			for optimizer_kd in optimizer_kds:
				for param_group in optimizer_kd.param_groups:
					param_group['lr'] = param_group['lr'] * .1
	return best_student_acc
Ejemplo n.º 20
0
def main():
    # 加载词库,加载数据集
    voc = Lang('data/WORDMAP.json')
    print("词库数量 " + str(voc.n_words))
    train_data = SaDataset('train', voc)
    val_data = SaDataset('valid', voc)

    # 初始化模型
    encoder = EncoderRNN(voc.n_words, hidden_size, encoder_n_layers, dropout)
    # 将模型使用device进行计算,如果是gpu,则会使用显存,如果是cpu,则会使用内存
    encoder = encoder.to(device)

    # 初始化优化器  优化器的目的是让梯度下降,手段是调整模型的参数,optim是一个pytorch的一个包,adam是一个优化算法,梯度下降
    print('Building optimizers ...')
    '''
    需要优化的参数
    学习率
    '''
    optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    # 基础准确率
    best_acc = 0
    epochs_since_improvement = 0

    # epochs 训练的次数
    for epoch in range(0, epochs):
        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(optimizer, 0.8)

        # 训练一次
        train(epoch, train_data, encoder, optimizer)

        # 使用验证集对训练结果进行验证,防止过拟合
        val_acc, val_loss = valid(val_data, encoder)
        print('\n * ACCURACY - {acc:.3f}, LOSS - {loss:.3f}\n'.format(acc=val_acc, loss=val_loss))

        # 检查是否有提升
        is_best = val_acc > best_acc
        best_acc = max(best_acc, val_acc)

        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, encoder, optimizer, val_acc, is_best)

        # Reshuffle samples 将验证集合测试集打乱
        np.random.shuffle(train_data.samples)
        np.random.shuffle(val_data.samples)
Ejemplo n.º 21
0
def train(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = 0
    epochs_since_improvement = 0
    logger = get_logger()
    if checkpoint is None:
        model = CarRecognitionNet()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6)
    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to GPU, if available
    model = model.to(device)

    # 构建数据集
    loader = fetch_dataloaders(args.image_folder, [0.8, 0.2], batchsize=args.batch_size)
    train_loader = loader['train']
    valid_loader = loader['valid']

    for epoch in range(start_epoch, args.end_epoch):
        # 模型长时间不更新退出
        if epochs_since_improvement > 50:
            break

        # 调整学习率
        if epochs_since_improvement > 0 and epochs_since_improvement % 15 == 0:
            adjust_learning_rate(optimizer, 0.1)

        train_loss, train_acc = __train(train_loader=train_loader,
                                        model=model,
                                        optimizer=optimizer,
                                        epoch=epoch,
                                        logger=logger
                                        )

        vail_loss, vail_acc = __valid(valid_loader=valid_loader,
                                      model=model,
                                      logger=logger)
        is_best = vail_acc > best_acc
        best_acc = max(vail_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            logger.info("Epochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0
            # Save checkpoint
        save_checkpoint("OURS_RES50", epoch, epochs_since_improvement, model, optimizer, best_acc, is_best)
Ejemplo n.º 22
0
def train_epoch(
    net,
    epoch,
    data_loader,
    optimizer,
    input_file,
    device,
    config,
    epoch_size
):

  net.train()

  total_loss = 0.0
  dataprocess = tqdm(data_loader)
  for iteration, batch_item in enumerate(dataprocess):

    image, mask = batch_item['image'], batch_item['mask']
    image = image.to(device)
    mask = mask.to(device)

    # 调整当前学习率
    utils.adjust_learning_rate(
      optimizer,
      config.LR_STRATEGY,
      epoch,
      iteration,
      epoch_size
    )

    optimizer.zero_grad()

    # forward
    out = net(image)

    # loss
    loss = utils.create_loss(out, mask, config.NUM_CLASSES)

    total_loss += loss.item()

    # 反向传播
    loss.backward()

    # 学习
    optimizer.step()

    # 界面显示
    dataprocess.set_description_str("epoch:{}".format(epoch))
    dataprocess.set_postfix_str("loss:{:.4f}".format(loss.item()))

  input_file.write(
    "Epoch:{}, loss is {:.4f} \n".format(epoch, total_loss / len(data_loader))
  )
  input_file.flush()
def train(epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    net.train()

    end = time.time()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda is not None:
            inputs, targets = inputs.cuda(), targets.cuda()

        # compute output
        outputs = net(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs, targets, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        utils.adjust_learning_rate(optimizer, epoch, batch_idx,
                                   len(train_loader), args.ne, args.lr)

        if batch_idx % 200 == 0:
            batch_time.update(time.time() - end)
            end = time.time()
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      batch_idx,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))
  def run(self):
    batch_size = self.config['batch_size']
    learning_rate = self.config['learning_rate']

    # Create Models
    self.itc_attentioner = itc_model.Attentioner().cuda()
    self.mfd_attentioner = mfd_model.attentioner().cuda()

    image_smoother = itc_model.ImageSmoother(kernel_size=self.config['smoother_kernel'])
    image_smoother = image_smoother.cuda()
    self.image_smoother = image_smoother

    variance_pool2d = ops.VariancePool2d(kernel_size=self.config['variance_kernel'], same=True)
    variance_pool2d = variance_pool2d.cuda()
    self.variance_pool2d = variance_pool2d

    # Model Summary
    self.logger.debug('ITC Attentioner Architecture')
    summary(self.itc_attentioner, (3, 224, 224), batch_size=batch_size)

    self.logger.debug('MFD Attentioner Architecture')
    summary(self.mfd_attentioner, (3, 224, 224), batch_size=batch_size)

    # Load Pretrained Models
    self.load_pretrained_models()

    model_params = []
    model_params += self.itc_attentioner.parameters()

    self.optm = torch.optim.Adam(model_params, lr=learning_rate)

    # Restore Model
    if not self.args.restart:
      self.load_checkpoint()

    # Setup Global Train Index
    self.gidx = self.epoch * len(self.dataset_train)

    # Initial Validation
    # self.valid = DataObject()
    # self.run_valid()

    total_epochs = self.config['epochs']
    for _ in range(self.epoch, total_epochs):
      utils.adjust_learning_rate(learning_rate, self.optm, self.epoch)

      self.train = DataObject()
      self.run_train()

      self.valid = DataObject()
      self.run_valid()

      self.epoch += 1
Ejemplo n.º 25
0
def run(dataset, net_type):

    # Hyper Parameter settings
    train_ens = cfg.train_ens
    valid_ens = cfg.valid_ens
    n_epochs = cfg.n_epochs
    lr_start = cfg.lr_start
    num_workers = cfg.num_workers
    valid_size = cfg.valid_size
    batch_size = cfg.batch_size

    trainset, testset, inputs, outputs = data.getDataset(dataset)
    train_loader, valid_loader, test_loader = data.getDataloader(
        trainset, testset, valid_size, batch_size, num_workers)

    net = getModel(net_type, inputs, outputs).to(device)

    ckpt_dir = f'checkpoints/{dataset}/bayesian'
    ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}.pt'

    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir, exist_ok=True)

    criterion = metrics.ELBO(len(trainset)).to(device)
    optimizer = Adam(net.parameters(), lr=lr_start)
    valid_loss_max = np.Inf
    for epoch in range(n_epochs):  # loop over the dataset multiple times
        utils.adjust_learning_rate(
            optimizer, metrics.lr_linear(epoch, 0, n_epochs, lr_start))

        train_loss, train_acc, train_kl = train_model(net,
                                                      optimizer,
                                                      criterion,
                                                      train_loader,
                                                      num_ens=train_ens)
        valid_loss, valid_acc = validate_model(net,
                                               criterion,
                                               valid_loader,
                                               num_ens=valid_ens)

        print(
            'Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f} \ttrain_kl_div: {:.4f}'
            .format(epoch, train_loss, train_acc, valid_loss, valid_acc,
                    train_kl))

        # save model if validation accuracy has increased
        if valid_loss <= valid_loss_max:
            print(
                'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                .format(valid_loss_max, valid_loss))
            torch.save(net.state_dict(), ckpt_name)
            valid_loss_max = valid_loss
Ejemplo n.º 26
0
def adjust_boost_lr(dataloader,
                    model,
                    criterion=torch.nn.CrossEntropyLoss(),
                    device=torch.device('cuda'),
                    lr_initialization=[
                        0.01, 0.1, 1., 10., 100., 1000., 10000., 100000.,
                        1000000.
                    ],
                    n_steps=1000,
                    step_size=1000000.):

    model.eval().to(device)  #.detach()
    with torch.no_grad():
        logits = []
        targets = []
        predicts = []
        for iter, (input, target, logit) in enumerate(dataloader):
            input = input.to(device)
            predict = model(input)

            logits.append(logit)
            targets.append(target)
            predicts.append(predict.cpu())

        logits = torch.cat(logits, dim=0).detach()
        targets = torch.cat(targets, dim=0).detach()
        predicts = torch.cat(predicts, dim=0).detach()

        lr_initialization = torch.tensor(lr_initialization)
        results = torch.zeros(lr_initialization.shape, dtype=torch.float)
        for iter, lr in enumerate(lr_initialization):
            results[iter] = criterion(logits + lr * predicts, targets)

    learning_rate = float(lr_initialization[torch.argmin(results)])
    learning_rate = torch.tensor([learning_rate], requires_grad=True)
    optim = torch.optim.SGD([learning_rate], lr=1., momentum=0.5)
    learning_rates = np.arange(1., 0., -1 / n_steps,
                               dtype=float) * float(learning_rate) * step_size

    for iter, loc_lr in enumerate(learning_rates):
        utils.adjust_learning_rate(optim, loc_lr)
        loss = criterion(logits + learning_rate * predicts, targets)

        loss.backward()
        optim.step()
        if iter % (100) == 99:
            print('[', iter, '] lr :', learning_rate, 'grad :',
                  learning_rate.grad)
        optim.zero_grad()

    return float(learning_rate.detach())
Ejemplo n.º 27
0
def train(config, train_iter, val_iter, model, criterion, optimizer, epoch):
    global iteration, n_total, train_loss, n_bad_loss
    global init, best_val_loss

    print("=> EPOCH {}".format(epoch))
    train_iter.init_epoch()
    for batch in train_iter:
        model = model.to('cuda')
        # batch = batch.to('cuda')
        iteration += 1
        model.train()

        output, _, __ = model(batch.grapheme.to('cuda'),
                              batch.phoneme[:-1].to('cuda'))
        target = batch.phoneme[1:].to('cuda')
        loss = criterion(output.view(output.size(0) * output.size(1), -1),
                         target.view(target.size(0) * target.size(1)))

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), config.clip, 'inf')
        optimizer.step()

        n_total += batch.batch_size
        train_loss += loss.data * batch.batch_size

        if iteration % config.log_every == 0:
            train_loss /= n_total
            val_loss = validate(val_iter, model, criterion)
            print("   % Time: {:5.0f} | Iteration: {:5} | Batch: {:4}/{}"
                  " | Train loss: {:.4f} | Val loss: {:.4f}".format(
                      time.time() - init, iteration, train_iter.iterations,
                      len(train_iter), train_loss, val_loss))

            # test for val_loss improvement
            n_total = train_loss = 0
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                n_bad_loss = 0
                torch.save(model.state_dict(), config.best_model)
            else:
                n_bad_loss += 1
            if n_bad_loss == config.n_bad_loss:
                best_val_loss = val_loss
                n_bad_loss = 0
                adjust_learning_rate(optimizer, config.lr_decay)
                new_lr = optimizer.param_groups[0]['lr']
                print("=> Adjust learning rate to: {}".format(new_lr))
                if new_lr < config.lr_min:
                    return True
    return False
def swa_train(model, swa_model, train_iter, valid_iter, optimizer, criterion, pretrain_epochs, swa_epochs, swa_lr, cycle_length, device, writer, cpt_filename):
    swa_n = 1

    swa_model.load_state_dict(copy.deepcopy(model.state_dict()))

    utils.save_checkpoint(
        cpt_directory,
        1,
        '{}-swa-{:2.4f}-{:03d}-{}'.format(date, swa_lr, cycle_length, cpt_filename),
        state_dict=model.state_dict(),
        swa_state_dict=swa_model.state_dict(),
        swa_n=swa_n,
        optimizer=optimizer.state_dict()
    )

    for e in range(swa_epochs):
        epoch = e + pretrain_epochs
        time_ep = time.time()
        lr = utils.schedule(epoch, cycle_length, lr_init, swa_lr)
        utils.adjust_learning_rate(optimizer, lr)

        train_res = utils.train_epoch(model, train_iter, optimizer, criterion, device)
        valid_res = utils.evaluate(model, valid_iter, criterion, device)

        utils.moving_average(swa_model, model, swa_n)
        swa_n += 1
        utils.bn_update(train_iter, swa_model)
        swa_res = utils.evaluate(swa_model, valid_iter, criterion, device)

        time_ep = time.time() - time_ep
        values = [epoch + 1, lr, swa_lr, cycle_length, train_res['loss'], valid_res['loss'], swa_res['loss'], None, None, time_ep]
        writer.writerow(values)

        table = tabulate.tabulate([values], columns, tablefmt='simple', floatfmt='8.4f')
        if epoch % 20 == 0:
            table = table.split('\n')
            table = '\n'.join([table[1]] + table)
        else:
            table = table.split('\n')[2]
        print(table)

        utils.save_checkpoint(
            cpt_directory,
            epoch + 1,
            '{}-swa-{:2.4f}-{:03d}-{}'.format(date, swa_lr, cycle_length, cpt_filename),
            state_dict=model.state_dict(),
            swa_state_dict=swa_model.state_dict(),
            swa_n=swa_n,
            optimizer=optimizer.state_dict()
        )
Ejemplo n.º 29
0
def main(m):
    best_error = 100
    opt = parser_params()

    if opt.dataset == 'cifar10':
        train_loader, test_loader = cifar10_dataloaders(
            batch_size=opt.batch_size, num_workers=opt.num_workers)
        n_cls = 10
    else:
        raise NotImplementedError(opt.dataset)

    print(opt.model[m])
    model = model_dict[opt.model[m]](num_classes=n_cls)

    optimizer = optim.SGD(model.parameters(),
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          weight_decay=opt.weight_decay)
    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True

    for epoch in range(1, opt.epochs + 1):

        if m == 4 and epoch == 1:
            opt.learning_rate = 0.01
        else:
            opt.learning_rate = 0.1

        adjust_learning_rate(epoch, opt, optimizer)
        print("==> training...")

        train_error, train_loss = train(epoch, train_loader, model, criterion,
                                        optimizer, list_loss_train[m])
        print('epoch {} | train_loss: {}'.format(epoch, train_loss))
        print('epoch {} | train_error: {}'.format(epoch, train_error))

        test_error, test_loss = test(test_loader, model, criterion,
                                     list_loss_test[m])
        print('epoch {} | test_loss: {}'.format(epoch, test_loss))
        print('epoch {} | test_error: {}'.format(epoch, test_error))
        print('iterations: {}'.format(epoch * len(train_loader)))

        if best_error > test_error:
            best_error = test_error

    print('Min error: ', best_error)
Ejemplo n.º 30
0
def main():
    global opt, best_prec1

    opt = parser.parse_args()
    opt.logdir = opt.logdir+'/'+opt.name
    logger = None#Logger(opt.logdir)
    opt.lr = opt.maxlr

    print(opt)
    best_prec1 = 0
    cudnn.benchmark = True
    model = init_model.load_model(opt)
    if opt.model_def.startswith('alexnet') or opt.model_def.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    elif opt.ngpus > 1:
        model = torch.nn.DataParallel(model).cuda()
    print(model)
    model, criterion, optimizer = init_model.setup(model,opt)

    trainer = train.Trainer(model, criterion, optimizer, opt, logger)
    validator = train.Validator(model, criterion, opt, logger)

    if opt.resume:
        if os.path.isfile(opt.resume):
            model, optimizer, opt, best_acc = init_model.resumer(opt, model, optimizer)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    dataloader = init_data.load_data(opt)
    train_loader = dataloader.train_loader
    #print(utils.get_mean_and_std(train_loader))
    val_loader = dataloader.val_loader

    for epoch in range(opt.start_epoch, opt.epochs):
        utils.adjust_learning_rate(opt, optimizer, epoch)
        print("Starting epoch number:",epoch,"Learning rate:", opt.lr)

        if opt.testOnly == False:
            trainer.train(train_loader, epoch, opt)
        if opt.tensorboard:
            logger.scalar_summary('learning_rate', opt.lr, epoch)

        prec1 = validator.validate(val_loader, epoch, opt)
        best_prec1 = max(prec1, best_prec1)
        init_model.save_checkpoint(opt, model, optimizer, best_prec1, epoch)

        print('Best Prec@1: [{0:.3f}]\t'.format(best_prec1))
Ejemplo n.º 31
0
def train(x_train, x_val, x_train_external, x_val_external, y_train, y_val,
          num_class):
    # model
    model = ECGNet(BasicBlock, [3, 4, 6, 3], num_classes=num_class)
    model = model.to(device)

    # optimizer and loss
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    #   optimizer = optim. RMSProp(model.parameters(), lr=config.lr)

    wc = y_train.sum(axis=0)
    wc = 1. / (np.log(wc) + 1)

    #添加和标签权重的惩罚,如果一个标签和其他标签越接近越容易混淆,它的权重得分会越大,应该更加关注一些,此权重是已经做了归一化
    #    weight=np.array([0.9608,0.9000,0.8373,0.8373,0.8706,0.6412,0.8373,0.9118,1.0,0.9255,0.9118,
    #                      0.9892,0.9588,0.9118,0.9118,0.8137,0.9608,1.0,0.9118,0.9588,0.9588,0.9863,
    #                      0.8373,0.9892,0.9588,0.9118,0.9863])
    #   wc=weight*wc

    w = torch.tensor(wc, dtype=torch.float).to(device)
    criterion1 = utils.WeightedMultilabel(w)
    criterion2 = nn.BCEWithLogitsLoss()

    lr = config.lr
    start_epoch = 1
    stage = 1
    best_auc = -1

    # =========>开始训练<=========
    print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_auc = train_epoch(model, optimizer, criterion1,
                                            x_train, x_train_external, y_train,
                                            num_class)
        val_loss, val_auc = val_epoch(model, criterion2, x_val, x_val_external,
                                      y_val, num_class)
        print(
            '#epoch:%02d stage:%d train_loss:%.4f train_auc:%.4f  val_loss:%.4f val_auc:%.4f  time:%s'
            % (epoch, stage, train_loss, train_auc, val_loss, val_auc,
               utils.print_time_cost(since)))

        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            print("*" * 10, "step into stage %02d lr %.5f" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)
    return model
Ejemplo n.º 32
0
def train(train_loader, model, optimizer, start_iter, num_iters):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    total_losses = AverageMeter()
    rpn_losses = AverageMeter()
    odn_losses = AverageMeter()
    rpn_ce_losses = AverageMeter()
    rpn_box_losses = AverageMeter()
    odn_ce_losses = AverageMeter()
    odn_box_losses = AverageMeter()

    # switch to train mode
    end_iter = start_iter + num_iters - 1
    model.train()

    end = time.time()
    # for i in range(start_iter, start_iter + num_iters):
    for i, (inputs, anns) in enumerate(train_loader):
        i += start_iter
        # get minibatch
        # inputs, anns = next(train_loader)
        lr = adjust_learning_rate(optimizer, args.lr, args.decay_rate,
                                  i, args.niters)  # TODO: add custom
        # measure data loading time
        data_time.update(time.time() - end)

        optimizer.zero_grad()
        # forward images one by one (TODO: support batch mode later, or
        # multiprocess)
        for j, input in enumerate(inputs):
            input_anns = anns[j]  # anns of this input
            if len(input_anns) == 0:
                continue
            gt_bbox = np.vstack([ann['bbox'] + [ann['ordered_id']] for ann in input_anns])
            im_info= [[input.size(1), input.size(2),
                        input_anns[0]['scale_ratio']]]
            input_var= torch.autograd.Variable(input.unsqueeze(0).cuda(),
                                 requires_grad=False)

            cls_prob, bbox_pred, rois= model(input_var, im_info, gt_bbox)
            loss= model.loss
            loss.backward()
            # record loss
            total_losses.update(loss.data[0], input_var.size(0))
            rpn_losses.update(model.rpn.loss.data[0], input_var.size(0))
            rpn_ce_losses.update(
                model.rpn.cross_entropy.data[0], input_var.size(0))
            rpn_box_losses.update(
                model.rpn.loss_box.data[0], input_var.size(0))
            odn_losses.update(model.odn.loss.data[0], input_var.size(0))
            odn_ce_losses.update(
                model.odn.cross_entropy.data[0], input_var.size(0))
            odn_box_losses.update(
                model.odn.loss_box.data[0], input_var.size(0))

        # do SGD step
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.print_freq > 0 and (i + 1) % args.print_freq == 0:
            print('iter: [{0}] '
                  'Time {batch_time.val:.3f} '
                  'Data {data_time.val:.3f} '
                  'Loss {total_losses.val:.4f} '
                  'RPN {rpn_losses.val:.4f} '
                  '{rpn_ce_losses.val:.4f} '
                  '{rpn_box_losses.val:.4f} '
                  'ODN {odn_losses.val:.4f} '
                  '{odn_ce_losses.val:.4f} '
                  '{odn_box_losses.val:.4f} '
                  .format(i, batch_time=batch_time,
                          data_time=data_time,
                          total_losses=total_losses,
                          rpn_losses=rpn_losses,
                          rpn_ce_losses=rpn_ce_losses,
                          rpn_box_losses=rpn_box_losses,
                          odn_losses=odn_losses,
                          odn_ce_losses=odn_ce_losses,
                          odn_box_losses=odn_box_losses))

        del inputs
        del anns
        if i == end_iter:
            break

    print('iter: [{0}-{1}] '
          'Time {batch_time.avg:.3f} '
          'Data {data_time.avg:.3f} '
          'Loss {total_losses.avg:.4f} '
          'RPN {rpn_losses.avg:.4f} '
          '{rpn_ce_losses.avg:.4f} '
          '{rpn_box_losses.avg:.4f} '
          'ODN {odn_losses.avg:.4f} '
          '{odn_ce_losses.avg:.4f} '
          '{odn_box_losses.avg:.4f} '
          .format(start_iter, end_iter,
                  batch_time=batch_time,
                  data_time=data_time,
                  total_losses=total_losses,
                  rpn_losses=rpn_losses,
                  rpn_ce_losses=rpn_ce_losses,
                  rpn_box_losses=rpn_box_losses,
                  odn_losses=odn_losses,
                  odn_ce_losses=odn_ce_losses,
                  odn_box_losses=odn_box_losses))

    if args.tensorboard:
        log_value('train_total_loss', total_losses.avg, end_iter)
        log_value('train_rpn_loss', rpn_losses.avg, end_iter)
        log_value('train_rpn_ce_loss', rpn_ce_losses.avg, end_iter)
        log_value('train_rpn_box_loss', rpn_box_losses.avg, end_iter)
        log_value('train_odn_loss', odn_losses.avg, end_iter)
        log_value('train_odn_ce_loss', odn_ce_losses.avg, end_iter)
        log_value('train_odn_box_loss', odn_box_losses.avg, end_iter)
    return total_losses.avg