Exemple #1
0
def train():

    model = tv.models.resnet101(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.fc = t.nn.Linear(2048, 2048)
    for name, param in model.named_parameters():
        if name == 'layer4.2.conv2.weight':
            param.requires_grad = True
        if name == 'layer4.2.bn2.weight':
            param.requires_grad = True
        if name == 'layer4.2.bn2.bias':
            param.requires_grad = True
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            print(name)
    model.cuda()
    criterion = t.nn.BCEWithLogitsLoss()
    optimizer = t.optim.Adam(model.parameters(), lr=1e-3)
    dataloader = get_dataloader()
    word_att = dataloader.dataset.word_att
    loss_meter = meter.AverageValueMeter()
    epoch = 32

    for epoch in range(epoch):
        loss_meter.reset()
        for ii, (imgs, caps, indexes) in tqdm.tqdm(enumerate(dataloader)):
            optimizer.zero_grad()
            imgs = imgs.cuda()
            caps = caps.cuda()
            labels = model(imgs)
            loss = criterion(labels, caps)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.item())
            if (ii + 1) % 50 == 0:
                print('epoch:', epoch, 'loss:', loss_meter.value()[0])
            if (ii + 1) % 1000 == 0:
                ture_words = []
                print('真实属性词:')
                ture_pic_att = [(ix, item) for ix, item in enumerate(caps[6])]
                for item in ture_pic_att:
                    if item[1] == 1:
                        ture_words.append(word_att[item[0]])
                print(ture_words)
                gen_words = []
                print('预测属性词:')
                m = t.nn.Sigmoid()
                labels_sigmoid = m(labels)
                result_pic_att = [(ix, item)
                                  for ix, item in enumerate(labels_sigmoid[6])]
                for item in result_pic_att:
                    if item[1] >= 0.5:
                        gen_words.append(word_att[item[0]])
                print(gen_words)

        prefix = 'muti_labei_classification'
        path = '{prefix}_{time}'.format(prefix=prefix,
                                        time=time.strftime('%m%d_%H%M'))

        t.save(model, path)
Exemple #2
0
        optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.0)

    misfit = nn.CrossEntropyLoss(weight=weights)
    softmax = nn.Softmax2d()

    print(bcolors.BOLD + 'Batches=%d' % (N // batch_size) + bcolors.ENDC)
    best_val_loss = np.Inf
    hist_val_loss = []
    hist_train_loss = []
    train_time = []
    val_time = []
    for epoch in range(num_epochs):

        print(bcolors.BOLD + '\n=> Training Epoch #%d' % (epoch + 1) +
              bcolors.ENDC)
        running_loss = tnt.AverageValueMeter()
        running_acc = tnt.AverageValueMeter()
        start_time = time.time()
        needs_header = True

        # Training Loop
        count = 0
        for batch_idx, (images, labels) in enumerate(train_loader):

            if use_gpu:
                images = images.cuda()
                labels = labels.cuda()

            # Forward Pass
            optimizer.zero_grad()
            if is_unet:
Exemple #3
0
                              num_layers=args.num_layers,
                              dim=args.dim,
                              hidden_dim=args.hidden_dim,
                              num_heads=8,
                              dropout_prob=0.1,
                              max_length=args.seq_length)
    model.train()

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=1e-4,
                          nesterov=True)
    annealer = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs)

    loss_meter = meter.AverageValueMeter()
    time_meter = meter.TimeMeter(unit=False)

    train_losses = []

    for epoch in range(args.num_epochs):

        for i, (x, y) in enumerate(train_loader):

            x, y = x.to(args.device), y.to(args.device)
            loss = model.loss(x, y).mean()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.cpu().data.numpy(), n=1)
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, print_freq, log_params_hist):
    """Training loop for one epoch."""
    losses = {'objective_loss'   : tnt.AverageValueMeter(),
              'regularizer_loss' : tnt.AverageValueMeter()}
    if compression_scheduler is None:
        # Initialize the regularizer loss to zero
        losses['regularizer_loss'].add(0)

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(inputs)
        target_var = torch.autograd.Variable(target)

        # Execute the forard phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch)
        output = model(input_var)
        loss = criterion(output, target_var)

        # Measure accuracy and record loss
        classerr.add(output.data, target)
        losses['objective_loss'].add(loss.data[0])

        if compression_scheduler:
            # Before running the backward phase, we add any regularization loss computed by the scheduler
            regularizer_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss)
            loss += regularizer_loss
            losses['regularizer_loss'].add(regularizer_loss.data[0])

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % print_freq == 0:
            # Log some statistics
            lr = optimizer.param_groups[0]['lr']
            stats = ('Peformance/Training/',
                     OrderedDict([
                         ('Loss', losses['objective_loss'].mean),
                         ('Reg Loss', losses['regularizer_loss'].mean),
                         ('Top1', classerr.value(1)),
                         ('Top5', classerr.value(5)),
                         ('LR', lr),
                         ('Time', batch_time.mean)])
                    )

            distiller.log_training_progress(stats,
                                            model.named_parameters() if log_params_hist else None,
                                            epoch, steps_completed,
                                            steps_per_epoch, print_freq,
                                            loggers)
        end = time.time()
Exemple #5
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()
    # 数据设定  户籍科 010 82640433
    train_data = DogCat(opt.load_model_path, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    train_dataloader = DataLoader(test_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    # 目标函数和优化器
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model)
    # 统计指标,平滑处理之后的损失
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(
                enumerate(train_dataloader)):  # ii num ,(data,label) enumerate
            # 训练模型参数
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.stop()
            # 更新统计指标及可视化
            loss_meter.add(loss.data[0])
            confusion_matrix.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exist(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
            model.save()

            # 计算验证集上的指标及其可视化
            val_cm, val_accuracy = val(model, val_dataloader)
            vis.plot('val_accuracy', val_accuracy)
            vis.log(
                'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}'
                .format(epoch=epoch,
                        loss=loss_meter.value()[0],
                        val_cm=str(val_cm.value()),
                        train_cm=str(confusion_matrix.value()),
                        lr=lr))
            if loss_meter.value()[0] > previous_loss:
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                previous_loss = loss_meter.value()[0]
Exemple #6
0
def train(args, train_dataset, test_dataset, model, optimizer, writer, device):
    print("start")

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=args.batchsize,
                              collate_fn=batcher,
                              shuffle=args.shuffle,
                              num_workers=args.workers)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=args.batchsize * 2,
                             collate_fn=batcher,
                             shuffle=args.shuffle,
                             num_workers=args.workers)
    print(model)
    print(train_dataset.mean.item(), train_dataset.std.item())
    # if model.name in ["MGCN", "SchNet"]:
    if args.multi_gpu:
        model.module.set_mean_std(train_dataset.mean, train_dataset.std)
    else:
        model.set_mean_std(train_dataset.mean, train_dataset.std)
    model.to(device)
    loss_fn = nn.MSELoss()
    MAE_fn = nn.L1Loss()
    mse_meter = meter.AverageValueMeter()
    mae_meter = meter.AverageValueMeter()
    init_lr = args.lr
    info = {'train_loss': [], 'train_mae': [], 'test_loss': [], 'test_mae': []}
    for epoch in range(args.epochs):
        mse_meter.reset()
        mae_meter.reset()
        model.train()
        for idx, (mols, label) in enumerate(train_loader):
            g = dgl.batch([mol.ful_g for mol in mols])
            g.to(device)
            label = label.to(device)
            res = model(g).squeeze()

            loss = loss_fn(res, label)
            mae = MAE_fn(res, label)
            # if loss>1e3:
            #     print('loss more than 1e3')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mae_meter.add(mae.detach().item())
            mse_meter.add(loss.detach().item())
            if idx % 50 == 0 and args.use_tb:
                writer.add_scalar(
                    'training_loss',
                    mse_meter.value()[0],
                    int((idx + 1 + epoch * len(train_loader)) / 50))
                writer.add_scalar(
                    'training_mae',
                    mae_meter.value()[0],
                    int((idx + 1 + epoch * len(train_loader)) / 50))
                print('training loss {} mae {}'.format(mse_meter.value()[0],
                                                       mae_meter.value()[0]))
        loss_test, mae_test = test(args, test_loader, model, device)

        print(
            "Epoch {:2d}, training: loss: {:.7f}, mae: {:.7f} test: loss{:.7f}, mae:{:.7f}"
            .format(epoch,
                    mse_meter.value()[0],
                    mae_meter.value()[0], loss_test, mae_test))
        if (epoch + 1) % 100 == 0:
            init_lr = init_lr / 2
            for param_group in optimizer.param_groups:
                param_group['lr'] = init_lr
            print('current learning rate: {}'.format(init_lr))

        info['train_loss'].append(mse_meter.value()[0])
        info['train_mae'].append(mae_meter.value()[0])
        info['test_loss'].append(loss_test)
        info['test_mae'].append(mae_test)
        if args.use_tb:
            writer.add_scalar('testing_loss', loss_test, epoch)
            writer.add_scalar('testing_mae', mae_test, epoch)
    return info
Exemple #7
0
def train(**kwargs):

    print("开始训练")
    # 定义一个网络模型对象
    # 通过config文件中模型名称来加载模型
    netWork = getattr(models, opt.model)()
    print('当前使用的模型为' + opt.model)

    # 定义可视化对象
    vis = Visualizer(opt.env + opt.model)

    # 先将模型加载到内存中,即CPU中
    map_location = lambda storage, loc: storage
    if opt.load_model_path:
        netWork.load_state_dict(
            t.load(opt.load_model_path, map_location=map_location))
    if opt.use_gpu:
        netWork.cuda()

    # step2: 加载数据
    train_data = XueLangDataSet(opt.data_root, train=True)
    #train=False  test=False   则为验证集
    val_data = XueLangDataSet(opt.data_root, train=False)
    # 数据集加载器
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.val_batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)
    # criterion 损失函数和optimizer优化器
    # 分类损失函数使用交叉熵
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    # 优化器使用Adam
    if opt.fixed_weight:
        # 选择固定部分权重参数
        if opt.model is 'ResNet18_bo' or opt.model is 'ResNet152_bo':
            # ResNet18_bo和ResNet152网络只更新最后的全连接层
            print(opt.model + '网络只更新最后的全连接层')
            optimizer = t.optim.Adam(netWork.model_bo.fc.parameters(),
                                     lr=opt.lr,
                                     weight_decay=opt.weight_decay)
        if opt.model is 'VGG16_bo' or opt.model is 'VGG19_bo':
            print(opt.model + '网络只更新分类层')
            optimizer = t.optim.Adam(netWork.classifier.parameters(),
                                     lr=opt.lr,
                                     weight_decay=opt.weight_decay)
        if opt.model is 'DenseNet_bo':
            print(opt.model + '网络只更新最后的全连接层')
            optimizer = t.optim.Adam(netWork.classifier.parameters(),
                                     lr=opt.lr,
                                     weight_decay=opt.weight_decay)
    else:
        # 更新全部参数(只vgg19做了更改)
        print(opt.model + '网络更新全部参数')
        optimizer = t.optim.Adam(netWork.parameters(),
                                 lr=opt.lr,
                                 weight_decay=opt.weight_decay)

    # 统计指标meters  仪表 显示损失的图形
    #计算所有数的平均数和标准差,来统计一个epoch中损失的平均值
    loss_meter = meter.AverageValueMeter()
    # 定义初始的loss
    previous_loss = 1e100
    best_val_auc = 0
    for epoch in range(opt.max_epoch):
        # 清空仪表信息
        loss_meter.reset()
        # 迭代数据集加载器
        for ii, (data_origin, label) in enumerate(train_dataloader):
            # 训练模型
            # input_img为模型输入图像
            input_img = Variable(data_origin)
            # label_img为对应标签
            label_img = Variable(label)
            # 将数据转到GPU
            if opt.use_gpu:
                input_img = input_img.cuda()
                label_img = label_img.cuda()
            # 优化器梯度清零
            optimizer.zero_grad()
            # 前向传播,得到网络产生的输出值label_output
            label_output = netWork(input_img)

            # 损失为交叉熵
            loss = criterion(label_output, label_img)
            # 反向传播  自动求梯度         loss进行反向传播
            loss.backward()
            # 更新优化器的可学习参数       optimizer优化器进行更新参数
            optimizer.step()
            # 更新仪表 并可视化
            loss_meter.add(loss.data[0])
            # 每print_freq次可视化loss
            if ii % opt.print_freq == opt.print_freq - 1:
                # plot是自定义的方法
                vis.plot('训练集loss', loss_meter.value()[0])
        # 一个epoch之后保存模型
        t.save(netWork, opt.checkpoint_root + opt.model + '.pth')
        print("第" + str(epoch) +
              "次epoch完成==============================================")
        # 当前时刻的一些信息
        vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
            epoch=epoch, loss=loss_meter.value()[0], lr=lr))

        # 更新学习率  如果损失开始升高,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]

        # 在验证集上进行验证,保存在验证集上效果最好的模型
        # 模型调整为验证模式
        netWork.eval()
        predict_label = []
        real_label = []
        for ii, (val_data_origin, val_label) in enumerate(val_dataloader):
            # 训练模型
            # input_img为模型输入图像
            val_input_img = Variable(val_data_origin, volatile=True)
            # label_img为对应标签
            val_label_img = val_label
            # 将数据转到GPU
            if opt.use_gpu:
                val_input_img = val_input_img.cuda()
            # 前向传播,得到网络产生的输出值label_output
            val_label_output = netWork(val_input_img)
            # 将结果合并
            val_predict_score = t.nn.functional.softmax(
                val_label_output, dim=1)[:, 1].cpu().data.numpy().flatten()
            val_label_img = val_label_img.numpy().flatten()
            for i in range(len(val_label_img)):
                predict_label.append(val_predict_score[i])
                real_label.append(val_label_img[i])
        # 过完一遍验证集,计算整个验证集上的AUC
        validation_auc_sklearn = roc_auc_score(real_label, predict_label)

        # 画出验证集的auc sklearn
        vis.plot('验证集的auc', validation_auc_sklearn)
        # 模型恢复为训练模式
        netWork.train()

        # 保存到目前为止 在验证集上的AUC最大的模型
        if best_val_auc < validation_auc_sklearn:
            best_val_auc = validation_auc_sklearn
            print('当前得到最好的验证集的AUC为  %.5f' % best_val_auc)
            netWork.save(
                netWork, opt.checkpoint_root + 'auc' +
                str(validation_auc_sklearn) + '.pth')
    print("============训练完毕=============")
Exemple #8
0
def train(model):
    avgLoss = 0.0
    best_acc = 0.0
    save_path = './weights/captcha'
    os.makedirs(save_path, exist_ok=True)
    if t.cuda.is_available():
        model = model.cuda()
    # data loading
    trainDataset = Captcha("../captcha/train/", train=True)
    testDataset = Captcha("../captcha/test/", train=False)
    trainDataLoader = DataLoader(trainDataset, batch_size=batchSize,
                                 shuffle=True, num_workers=4)
    testDataLoader = DataLoader(testDataset, batch_size=batchSize,
                                shuffle=True, num_workers=4)
    circles_per_epoch = len(trainDataLoader) // batchSize
    # max_iters = circles_per_epoch * circles_per_epoch
    # loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learningRate)

    loss_meter = meter.AverageValueMeter()
    # training procedure
    for epoch in range(totalEpoch):
        for circle, input in tqdm.tqdm(enumerate(trainDataLoader, 0)):
            x, label = input
            if t.cuda.is_available():
                x = x.cuda()
                label = label.cuda()
            label = label.long()
            label1, label2, label3, label4 = label[:, 0], label[:, 1], label[:, 2], label[:, 3]
            # print(label1,label2,label3,label4)
            optimizer.zero_grad()
            y1, y2, y3, y4 = model(x)
            # print(y1.shape, y2.shape, y3.shape, y4.shape)
            loss1, loss2, loss3, loss4 = criterion(y1, label1), criterion(y2, label2) \
                , criterion(y3, label3), criterion(y4, label4)
            loss = loss1 + loss2 + loss3 + loss4
            loss_meter.add(loss.item())
            writer.add_scalar('train/loss', loss.item(), circle + epoch * circles_per_epoch)
            # print(loss)
            avgLoss += loss.item()
            loss.backward()
            optimizer.step()
            # evaluation
            if circle % printCircle == 1:
                print("Epoch %d : after %d circle,the train loss is %.5f" %
                      (epoch, circle, avgLoss / printCircle))
                writeFile("Epoch %d : after %d circle,the train loss is %.5f" %
                          (epoch, circle, avgLoss / printCircle))

                avgLoss = 0
            if circle % testCircle == 1:
                accuracy = test(model, testDataLoader)
                if accuracy > best_acc:
                    best_acc = accuracy
                    model.save(save_path)
                print('current acc is : {}, the best acc is : {}'.format(accuracy, best_acc))
                writeFile("current acc is : %.5f, the best acc is : %.5f" % (accuracy, best_acc))
                writer.add_scalar('test/acc', accuracy, circle + epoch * circles_per_epoch)
            # if circle % saveCircle == 1:
            #     model.save(str(epoch)+"_"+str(saveCircle))
    writer.close()
def main():
    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    if opt.scat > 0:
        model, params, stats = models.__dict__[opt.model](N=opt.N, J=opt.scat)
    else:
        model, params, stats = models.__dict__[opt.model]()

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = %f' % lr)
        return torch.optim.SGD(params.values(),
                               lr,
                               opt.momentum,
                               weight_decay=opt.weightDecay)

    def get_iterator(mode):
        ds = create_dataset(opt, mode)
        return ds.parallel(batch_size=opt.batchSize,
                           shuffle=mode,
                           num_workers=opt.nthread,
                           pin_memory=False)

    optimizer = create_optimizer(opt, opt.lr)

    iter_test = get_iterator(False)
    iter_train = get_iterator(True)

    if opt.scat > 0:
        scat = Scattering(M=opt.N, N=opt.N, J=opt.scat, pre_pad=False).cuda()

    epoch = 0
    if opt.resume != '':
        resumeFile = opt.resume
        if not resumeFile.endswith('pt7'):
            resumeFile = torch.load(opt.resume + '/latest.pt7')['latest_file']
            state_dict = torch.load(resumeFile)
            epoch = state_dict['epoch']
            params_tensors, stats = state_dict['params'], state_dict['stats']
            for k, v in params.iteritems():
                v.data.copy_(params_tensors[k])
            optimizer.load_state_dict(state_dict['optimizer'])
            print('model was restored from epoch:', epoch)

    print('\nParameters:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v.data))
                      for key, v in params.items()]))
    print('\nAdditional buffers:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v))
                      for key, v in stats.items()]))
    n_parameters = sum(
        [p.numel() for p in list(params.values()) + list(stats.values())])
    print('\nTotal number of parameters: %f' % n_parameters)

    meter_loss = meter.AverageValueMeter()
    classacc = meter.ClassErrorMeter(topk=[1, 5], accuracy=False)
    timer_data = meter.TimeMeter('s')
    timer_sample = meter.TimeMeter('s')
    timer_train = meter.TimeMeter('s')
    timer_test = meter.TimeMeter('s')

    def h(sample):
        inputs = sample[0].cuda()
        if opt.scat > 0:
            inputs = scat(inputs)
        inputs = Variable(inputs)
        targets = Variable(sample[1].cuda().long())
        if sample[2]:
            model.train()
        else:
            model.eval()
        y = torch.nn.parallel.data_parallel(model, inputs,
                                            np.arange(opt.ngpu).tolist())
        return F.cross_entropy(y, targets), y

    def log(t, state):
        if (t['epoch'] > 0 and t['epoch'] % opt.frequency_save == 0):
            torch.save(
                dict(params={k: v.data.cpu()
                             for k, v in params.iteritems()},
                     stats=stats,
                     optimizer=state['optimizer'].state_dict(),
                     epoch=t['epoch']),
                open(os.path.join(opt.save, 'epoch_%i_model.pt7' % t['epoch']),
                     'w'))
            torch.save(
                dict(
                    latest_file=os.path.join(opt.save, 'epoch_%i_model.pt7' %
                                             t['epoch'])),
                open(os.path.join(opt.save, 'latest.pt7'), 'w'))

        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        global data_time
        data_time = timer_data.value()
        timer_sample.reset()
        state['sample'].append(state['train'])

    def on_forward(state):
        prev_sum5 = classacc.sum[5]
        prev_sum1 = classacc.sum[1]
        classacc.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

        next_sum5 = classacc.sum[5]
        next_sum1 = classacc.sum[1]
        n = state['output'].data.size(0)
        curr_top5 = 100.0 * (next_sum5 - prev_sum5) / n
        curr_top1 = 100.0 * (next_sum1 - prev_sum1) / n
        sample_time = timer_sample.value()
        timer_data.reset()
        if (state['train']):
            txt = 'Train:'
        else:
            txt = 'Test'
        if (state['t'] % opt.frequency_print == 0 and state['t'] > 0):
            print(
                '%s [%i,%i/%i] ; loss: %.3f (%.3f) ; acc5: %.2f (%.2f) ; acc1: %.2f (%.2f) ; data %.3f ; time %.3f'
                % (txt, state['epoch'], state['t'] % len(state['iterator']),
                   len(state['iterator']), state['loss'].data[0],
                   meter_loss.value()[0], curr_top5, classacc.value(5),
                   curr_top1, classacc.value(1), data_time, sample_time))

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()

        state['iterator'] = iter_train

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            print('changing LR')
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        if (state['t'] % opt.frequency_test == 0 and state['t'] > 0):
            train_loss = meter_loss.value()
            train_acc = classacc.value()
            train_time = timer_train.value()
            meter_loss.reset()
            classacc.reset()
            timer_test.reset()

            engine.test(h, iter_test)

            log(
                {
                    "train_loss": train_loss[0],
                    "train_acc": 100 - train_acc[0],
                    "test_loss": meter_loss.value()[0],
                    "test_acc": 100 - classacc.value()[0],
                    "epoch": state['epoch'],
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                }, state)

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, iter_train, opt.epochs, optimizer)
Exemple #10
0
import warnings
warnings.filterwarnings("ignore")
from metric import metric_results, printMetricResults
from tensorboardX import SummaryWriter
from torchnet import meter

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# device = 'cpu'
class Config(object):
    tensorboardX_path = './tensorboardXDir/solution1'


config = Config()
loss_train_meter = meter.AverageValueMeter()  # 记录损失函数的均值和方差
loss_valid_meter = meter.AverageValueMeter()


def get_logger(filename='logtest'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger
Exemple #11
0
  print('Called with args:')
  print(args)

  if args.use_tfboard:
    from model.utils.logger import Logger
    # Set the logger
    logger = Logger('./logs')

  if args.use_visdom:
    # visdom
    from visual_loss import Visualizer
    from torchnet import meter
    featurename= 'fpn'
    visname= featurename+args.net+"_"+args.dataset+"_"+str(args.session)
    vis = Visualizer(env=visname) 
    loss_meter = meter.AverageValueMeter() 
    loss_rpn_cls_meter = meter.AverageValueMeter()
    loss_rpn_box_meter = meter.AverageValueMeter()
    loss_rcnn_cls_meter = meter.AverageValueMeter()
    loss_rcnn_box_meter = meter.AverageValueMeter()

  logging.basicConfig(filename="logs/"+args.net+"_"+args.dataset+"_"+str(args.session)+".log",
        filemode='w', level=logging.DEBUG)
  logging.info(str(datetime.now()))

  if args.dataset == "pascal_voc":
      args.imdb_name = "voc_2007_trainval"
      args.imdbval_name = "voc_2007_test"
      args.set_cfgs = ['FPN_ANCHOR_SCALES', '[32, 64, 128, 256, 512]', 'FPN_FEAT_STRIDES', '[4, 8, 16, 32, 64]', 'MAX_NUM_GT_BOXES', '20']
  elif args.dataset == "pascal_voc_0712":
      args.imdb_name = "voc_2007_trainval+voc_2012_trainval"
Exemple #12
0
def train(**kwargs):
    """根据命令行参数更新配置"""
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    """(1)step1:加载网络,若有预训练模型也加载"""
    #model = getattr(models,opt.model)()
    model = models.resnet34(pretrained=True)
    model.fc = nn.Linear(512, 2)
    #if opt.load_model_path:
    #	model.load(opt.load_model_path)
    if opt.use_gpu:  #GPU
        model.cuda()
    """(2)step2:处理数据"""
    train_data = DogCat(opt.train_data_root, train=True)  #训练集
    val_data = DogCat(opt.train_data_root, train=False)  #验证集

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    """(3)step3:定义损失函数和优化器"""
    criterion = t.nn.CrossEntropyLoss()  #交叉熵损失
    lr = opt.lr  #学习率
    optimizer = t.optim.SGD(model.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵"""
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10
    """(5)开始训练"""
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in enumerate(train_dataloader):

            print "ii:", ii
            #训练模型参数
            input = Variable(data)
            target = Variable(label)

            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            #梯度清零
            optimizer.zero_grad()
            score = model(input)

            loss = criterion(score, target)
            loss.backward()  #反向传播

            #更新参数
            optimizer.step()

            #更新统计指标及可视化
            loss_meter.add(loss.item())
            #print score.shape,target.shape
            confusion_matrix.add(score.detach(), target.detach())

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        #model.save()
        name = time.strftime('model' + '%m%d_%H:%M:%S.pth')
        t.save(model.state_dict(), 'checkpoints/' + name)
        """计算验证集上的指标及可视化"""
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        print "epoch:", epoch, "loss:", loss_meter.value(
        )[0], "accuracy:", val_accuracy
        """如果损失不再下降,则降低学习率"""
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        previous_loss = loss_meter.value()[0]
def train(**kwargs):
    # setting the parameter in opt as the input argument
    for k, v in kwargs.items():
        setattr(opt, k, v.strip("'"))

    # setting the device
    opt.device = t.device('cuda') if opt.use_gpu else t.device('cpu')
    device = opt.device
    vis = Visualizer(env=opt.env)

    # get the sequence from sequence.npz
    data, word2ix_train, ix2word_train, word2ix_fix, ix2word_fix = load_data(
        opt.parsed_data_path)

    random.shuffle(data)

    #devide the data for the test and train and convert to the dataloader
    devision = int(len(data) * 8 / 10)
    train_data = data[:devision]
    test_data = data[devision + 1:]
    train_data = t.from_numpy(train_data)
    test_data = t.from_numpy(test_data)
    dataloader = t.utils.data.DataLoader(train_data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)
    dataloader_fortest = t.utils.data.DataLoader(test_data,
                                                 batch_size=opt.batch_size,
                                                 shuffle=True,
                                                 num_workers=1)

    # define the model
    model = TrainingModel_Vec(len(word2ix_train), len(word2ix_fix), 200, 400)
    optimizer = t.optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    loss_meter = meter.AverageValueMeter()

    # load the pretrained word vector and convert it to a matrix in the order of index
    pretrained_weight = form_matrix(ix2word_fix, opt.pathforvec)
    pretrained_weight = np.array(pretrained_weight)
    # copy the pretrained vectors to the embeding
    model.embeddingsfix.weight.data.copy_(t.from_numpy(pretrained_weight))

    i = 0

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]

            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.item())

            # plot the loss
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('lossintrain', loss_meter.value()[0])
                #print("loss_meter.value()[0] : " + str(loss_meter.value()[0]))
                vis.plot('lossintrain', loss.item())
                #print("loss.item() : " + str(loss.item()))

                # for test
        loss_meter.reset()
        model.eval()  # 设置为test模式
        test_loss = 0  # 初始化测试损失值为0
        correct = 0  # 初始化预测正确的数据个数为0
        total = 0
        for iii, datatest in enumerate(dataloader_fortest):
            #if args.cuda:
            #   data, target = data.cuda(), target.cuda()

            datatest = datatest.long().transpose(1, 0).contiguous()
            datatest = datatest.to(device)
            optimizer.zero_grad()
            input_test, target_test = datatest[:-1, :], datatest[
                1:, :]  #后面这个是是去掉了第一行,前面这个是去掉最后一行
            output_test, _ = model(input_test)
            test_loss += criterion(output_test, target_test.view(-1))
            #print("loss_test: " + str(loss_test))
            #loss_meter.add(loss_test.item())
            #test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss 把所有loss值进行累加
            pred = output_test.data.max(1, keepdim=True)[
                1]  # get the index of the max log-probability 其中[0]是值[1]是index
            #print(output_test.size())
            #print(target_test.size())
            #print("right: " + str(pred.eq(target_test.data.view_as(pred)).cpu().sum()))
            #print(pred.size()[0])
            #print(target_test)
            target_test = target_test.data.view_as(
                pred)[int(pred.size()[0] / 4 * 2):int(pred.size()[0] / 4 * 3)]
            #print(target_test)
            pred = pred[int(pred.size()[0] / 4 * 2):int(pred.size()[0] / 4 *
                                                        3)]

            #print("original: " + str(len(datatest.data[0])))
            #print(target_test.data.view_as(pred).size()[0])
            #print(target_test.data.view_as(pred).size())
            correct += pred.eq(target_test).cpu().sum()  # 对预测正确的数据个数进行累加
            total += target_test.size()[0]
            #correct += find_in_ten(output_test.data,target_test.data)

        test_loss /= iii
        print(epoch)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(test_loss, correct, total, 100. * correct / total))

        model.train()

    t.save(model.state_dict(), '%s_%s.pth' % ("testtestingfix", epoch))
Exemple #14
0
def train():
    vis = Visualizer("Kesci")
    train_data = AppData("data/train_23d_1p_ap.json", iflabel=True)
    val_data = AppData("data/val_23d_1p_ap.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 256, shuffle=False, num_workers=2)
    test_data = AppData("data/test_23d_1p_ap.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 256, shuffle=False, num_workers=2)

    criterion = t.nn.CrossEntropyLoss(weight=t.Tensor([1, 1.2])).cuda()
    learning_rate = 0.0005
    weight_decay = 0.0002
    model = Sequence(15, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(500):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, label) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(label).cuda().view(-1)
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])
            confusion_matrix.add(output.data, target.data)

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})

        # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()),
        #     train_cm=str(confusion_matrix.value()), lr=learning_rate))

        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.95
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 10 == 9:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "{train_f1}, {val_f1}, {test_f1}, model:{model}, {train_cm}, {val_cm}, {test_cm}"
                .format(train_f1=train_f1,
                        val_f1=val_f1,
                        test_f1=test_f1,
                        model=time.strftime('%m%d %H:%M:%S'),
                        train_cm=str(train_cm.value()),
                        val_cm=str(val_cm.value()),
                        test_cm=str(test_cm.value())))
def train(args):
    """
    Implements the training loop for the MultiTaskResnet3dClassifier.
    Args:
        args (Namespace) : Program arguments
    """
    # Get model and loss function
    model = MTClassifier3D(args).to(args.device)

    # Initialize losses for each head
    loss_wrapper = MultiTaskLoss(args)
    loss_fn = nn.BCEWithLogitsLoss()

    # TODO: Get train and validation dataloaders
    train_dataset = ClassifierDataset(args.csv_dir, 'train', args.features, resample=(
        args.num_slices, args.slice_size, args.slice_size))
    train_loader = DataLoader(
        train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, pin_memory=True
    )
    
    peds_validation_dataset = ClassifierDataset(args.peds_csv_dir, 'val', args.peds_features, resample=(
        args.num_slices, args.slice_size, args.slice_size))
    peds_validation_loader = DataLoader(
        peds_validation_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True
    )
    
    adult_validation_dataset = ClassifierDataset(args.adult_csv_dir, 'val', args.adult_features, resample=(
        args.num_slices, args.slice_size, args.slice_size))
    adult_validation_loader = DataLoader(
        adult_validation_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True
    )

    # Get optimizer and scheduler
    optimizer = optim.Adam(model.parameters(), args.lr)
    warmup_iters = args.lr_warmup_epochs * len(train_loader)
    lr_milestones = [len(train_loader) * m for m in args.lr_milestones]
    lr_scheduler = WarmupMultiStepLR(
        optimizer, milestones=lr_milestones, gamma=args.lr_gamma,
        warmup_iters=warmup_iters, warmup_factor=1e-5)

    # Get saver, logger, and evaluator
    saver = ModelSaver(args, max_ckpts=args.max_ckpts,
                       metric_name=args.best_ckpt_metric, maximize_metric=args.maximize_metric)
    # evaluator = ModelEvaluator(args, validation_loader, cls_loss_fn)

    # Load model from checkpoint is applicable
    if args.continue_train:
        saver.load_model(model, args.name, ckpt_path=args.load_path,
                         optimizer=optimizer, scheduler=lr_scheduler)
    logger = TrainLogger(args, len(train_loader.dataset))


    # Multi GPU training if applicable
    if len(args.gpu_ids) > 1:
        print("Using", len(args.gpu_ids), "GPUs.")
        model = nn.DataParallel(model)

    loss_meter = meter.AverageValueMeter()

    # Train model
    logger.log_hparams(args)
    while not logger.is_finished_training():
        logger.start_epoch()

        for inputs, targets in tqdm(train_loader):
            logger.start_iter()
            with torch.set_grad_enabled(True):
                inputs = inputs.to(args.device)
                targets = targets.to(args.device)
                head_preds = model(inputs)

                loss = loss_wrapper(head_preds, targets)
                loss_meter.add(loss.item())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            # Log all train losses
            if logger.iter % args.steps_per_print == 0 and logger.iter != 0:
                logger.log_metrics({'train_loss': loss_meter.value()[0]})
                loss_meter.reset()

            logger.end_iter()

        # Evaluate model and save model ckpt
        if logger.epoch % args.epochs_per_eval == 0:
            peds_metrics = evaluate(args, model, loss_wrapper,
                            peds_validation_loader, "validation", args.device, 'peds')
            logger.log_metrics(peds_metrics)
            adult_metrics = evaluate(args, model, loss_wrapper,
                            adult_validation_loader, "validation", args.device, 'adult')
            logger.log_metrics(adult_metrics)
        
        if logger.epoch % args.epochs_per_save == 0:
            saver.save(logger.epoch, model, optimizer, lr_scheduler, args.device,
                       args.name)
        lr_scheduler.step()
        logger.end_epoch()
Exemple #16
0
def train(**kwargs):
    # 根据命令行参数更新配置
    opt.parse(kwargs)
    # vis = Visualizer(opt.env)
    # step1: 模型
    model = getattr(mymodels, opt.model)()

    '''
	model_ft = torchvision.models.vgg16_bn(pretrained = True)
	pretrained_dict = model_ft.state_dict()
	model_dict = model.state_dict()
	# 将pretrained_dict里不属于model_dict的键剔除掉
	pretrained_dict =  {k: v for k, v in pretrained_dict.items() 
					if k in model_dict}
	model_dict.update(pretrained_dict)x
	model.load_state_dict(model_dict)
	'''
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()
        summary(model, (3, 224, 224))
    print(opt)
    # step2: 数据
    train_data = myData(
        filelists=opt.train_filelists,
        # transform = data_transforms['train'],
        scale=opt.cropscale,
        transform=None,
        test=False,
        data_source='none')

    val_data = myData(
        filelists=opt.test_filelists,
        # transform =data_transforms['val'],
        transform=None,
        scale=opt.cropscale,
        test=False, data_source='none')
    train_loader = DataLoader(dataset=train_data,
                              batch_size=opt.batch_size, shuffle=True)
    print(train_loader)
    val_loader = DataLoader(dataset=val_data,
                            batch_size=opt.batch_size, shuffle=False)

    dataloaders = {'train': train_loader, 'val': val_loader}
    dataset_sizes = {'train': len(train_data), 'val': len(val_data)}

    # step3: 目标函数和优化器
    criterion = FocalLoss(2)
    # criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    # optimizer = torch.optim.Adam(model.parameters(),
    #                       lr = lr,
    #                       weight_decay = opt.weight_decay)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=opt.lr,
                                momentum=0.5,
                                weight_decay=opt.weight_decay)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                           step_size=opt.lr_stepsize, gamma=0.5)
    # set learning rate every 10 epoch decrease 10%
    # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵

    confusion_matrix = meter.ConfusionMeter(2)
    train_loss = meter.AverageValueMeter()  # 为了可视化增加的内容
    val_loss = meter.AverageValueMeter()
    train_acc = meter.AverageValueMeter()  # 为了可视化增加的内容
    val_acc = meter.AverageValueMeter()
    previous_loss = 1e100
    best_tpr = 0.0
    # 训练
    for epoch in range(opt.max_epoch):
        print('Epoch {}/{}'.format(epoch, opt.max_epoch - 1))
        print('-' * 10)
        train_loss.reset()
        train_acc.reset()
        running_loss = 0.0
        running_corrects = 0
        exp_lr_scheduler.step()
        for step, batch in enumerate(tqdm(train_loader, desc='Train %s On Anti-spoofing' % (opt.model), unit='batch')):
            inputs, labels = batch
            if opt.use_gpu:
                inputs = Variable(inputs.cuda())
                labels = Variable(labels.cuda())
            else:
                inputs = Variable(inputs)
                labels = Variable(labels)
            optimizer.zero_grad()  # zero the parameter gradients
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                # print(outputs.shape)
                _, preds = torch.max(outputs, 1)

                loss0 = criterion(outputs, labels)
                loss = loss0
                loss.backward()  # backward of gradient
                optimizer.step()  # strategy to drop
                if step % 20 == 0:
                    pass
                # print('epoch:%d/%d step:%d/%d loss: %.4f loss0: %.4f loss1: %.4f'%(epoch, opt.max_epoch, step, len(train_loader),
                # loss.item(),loss0.item(),loss1.item()))
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            '''
			if step%opt.print_freq==opt.print_freq-1:
				vis.plot('loss', train_loss.value()[0])
			   
			   # 如果需要的话,进入debug模式
			   if os.path.exists(opt.debug_file):
				   import ipdb;
				   ipdb.set_trace()	
			'''
        epoch_loss = running_loss / dataset_sizes['train']
        epoch_acc = running_corrects.double() / float(dataset_sizes['train'])
        print('Train Loss: {:.8f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        train_loss.add(epoch_loss)
        train_acc.add(epoch_acc)

        val_loss.reset()
        val_acc.reset()
        val_cm, v_loss, v_accuracy, metric = val(model, val_loader, dataset_sizes['val'])
        print('Val Loss: {:.8f} Acc: {:.4f}'.format(v_loss, v_accuracy))
        val_loss.add(v_loss)
        val_acc.add(v_accuracy)

        eer = metric[0]
        tprs = metric[1]
        auc = metric[2]
        xy_dic = metric[3]
        tpr1 = tprs['TPR(1.%)']

        # vis.plot_many_stack({'train_loss':train_loss.value()[0],\
        # 				'val_loss':val_loss.value()[0]},win_name ="Loss")
        # vis.plot_many_stack({'train_acc':train_acc.value()[0],\
        # 				'val_acc':val_acc.value()[0]},win_name = 'Acc')
        # vis.log("epoch:{epoch},lr:{lr},\
        # 		train_loss:{train_loss},train_acc:{train_acc},\
        # 		val_loss:{val_loss},val_acc:{val_acc},\
        # 		train_cm:{train_cm},val_cm:{val_cm}"
        # .format(
        # 		   epoch = epoch,
        # 		   train_loss = train_loss.value()[0],
        # 		   train_acc = train_acc.value()[0],
        # 		   val_loss = val_loss.value()[0],
        # 		   val_acc = val_acc.value()[0],
        # 		   train_cm=str(confusion_matrix.value()),
        # 		   val_cm = str(val_cm.value()),
        # 		   lr=lr))
        '''
		if v_loss > previous_loss:          
			lr = lr * opt.lr_decay
			for param_group in optimizer.param_groups:
				param_group['lr'] = lr
		'''
        # vis.plot_many_stack({'lr':lr},win_name ='lr')
        previous_loss = val_loss.value()[0]
        # if tpr1 > best_tpr:
        best_tpr = tpr1
        best_tpr_epoch = epoch
        # best_model_wts = model.state_dict()
        os.system('mkdir -p %s' % (os.path.join('checkpoints', opt.model)))
        model.save(name='checkpoints/' + opt.model + '/' + str(epoch) + '.pth')
        # print('Epoch: {:d} Val Loss: {:.8f} Acc: {:.4f}'.format(epoch,v_loss,v_accuracy),file=open('result/val.txt','a'))
        print(
            'Epoch: {:d} Val Loss: {:.8f} Acc: {:.4f} EER: {:.6f} TPR(1.0%): {:.6f} TPR(.5%): {:.6f} AUC: {:.8f}'.format(
                epoch, v_loss, v_accuracy, eer, tprs["TPR(1.%)"], tprs["TPR(.5%)"], auc),
            file=open('D:\\dingding\\xiazai\\test\\val.txt', 'a'))
        print(
            'Epoch: {:d} Val Loss: {:.8f} Acc: {:.4f} EER: {:.6f} TPR(1.0%): {:.6f} TPR(.5%): {:.6f} AUC: {:.8f}'.format(
                epoch, v_loss, v_accuracy, eer, tprs["TPR(1.%)"], tprs["TPR(.5%)"], auc))
    # model.load_state_dict(best_model_wts)
    print('Best val Epoch: {},Best val TPR: {:4f}'.format(best_tpr_epoch, best_tpr))
Exemple #17
0
    # 用 torchnet来存放损失函数,如果没有,请安装conda install torchnet
    '''
    训练前的模型、损失函数设置
    vis = Visualizer(env='my_wind')#为了可视化增加的内容
    loss_meter = meter.AverageValueMeter()#为了可视化增加的内容
    for epoch in range(10):
        #每个epoch开始前,将存放的loss清除,重新开始记录
        loss_meter.reset()#为了可视化增加的内容
        model.train()
        for ii,(data,label)in enumerate(trainloader):
            ...
            out=model(input)
            loss=...
            loss_meter.add(loss.data[0])#为了可视化增加的内容
    
        #loss可视化
        #loss_meter.value()[0]返回存放的loss的均值
        vis.plot_many_stack({'train_loss': loss_meter.value()[0]})#为了可视化增加的内容
    '''
    # 示例
    vis = Visualizer(env='my_wind')  # 为了可视化增加的内容
    loss_meter = meter.AverageValueMeter()  # 为了可视化增加的内容
    for epoch in range(103):
        time.sleep(.1)
        # loss_meter.reset()  # 为了可视化增加的内容
        loss_meter.add(epoch * random.random())  # 假设loss=epoch
        vis.plot_many_stack({'train_loss':
                             loss_meter.value()[0]})  # 为了可视化增加的内容
        # 如果还想同时显示test loss,如法炮制,并用字典的形式赋值,如下。还可以同时显示train和test accuracy
        # vis.plot_many_stack({'train_loss': loss_meter.value()[0],'test_loss':test_loss_meter.value()[0]})#为了可视化增加的内容
def train(**kwargs):
    opt.parse(kwargs)
    if not os.path.exists(opt.save_folder):
        os.mkdir(opt.save_folder)
    tb_logger = SummaryWriter(opt.save_folder)
    logger = create_logger('global_logger', opt.save_folder + '/log.txt')
    batch_time = AverageMeter(10)
    data_time = AverageMeter(10)
    losses = AverageMeter(10)
    loss_meter = meter.AverageValueMeter()

    train_sets = []
    for data_txt in opt.train_txt:
        data_root, gt_root, list_file = data_txt.split(' ')
        train_sets.append(
            OCRDataset(data_root, gt_root, list_file, opt.input_size, 'train',
                       opt.chars_list, opt.max_seq))
    train_data = ConcatDataset(train_sets)
    train_loader = DataLoader(train_data,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=opt.num_works)

    valid_sets = []
    for valid_txt in opt.valid_txt:
        data_root, gt_root, list_file = valid_txt.split(' ')
        valid_sets.append(
            OCRDataset(data_root, gt_root, list_file, opt.input_size, 'valid',
                       opt.chars_list, opt.max_seq))
    valid_data = ConcatDataset(valid_sets)
    valid_loader = DataLoader(valid_data,
                              batch_size=opt.batch_size,
                              shuffle=False,
                              num_workers=opt.num_works)

    model = getattr(models, opt.model)(opt.basenet,
                                       opt.input_size,
                                       opt.max_seq,
                                       opt.num_classes,
                                       mode='train',
                                       attn=opt.attn)

    if opt.load_model_path is not None:
        load_state(model, opt.load_model_path, 'cuda:%d' % opt.gpus[0])
    if len(opt.gpus) > 1:
        model = torch.nn.DataParallel(model, device_ids=opt.gpus)
    model = gpu(model, opt)

    if len(opt.gpus) > 1:
        optimizer = torch.optim.Adam(model.module.parameters(),
                                     lr=opt.lr,
                                     betas=opt.betas,
                                     weight_decay=opt.weight_decay)
    else:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=opt.betas,
                                     weight_decay=opt.weight_decay)

    curr_step = 0
    total_step = int(len(train_data) / opt.batch_size * opt.epoches)
    best_val_error = 1e10
    previous_loss = 1e10
    # warmup
    warmup_epoches = opt.epoches // 10
    warmup_rate = math.pow(100, 1 / warmup_epoches)

    for epoch in range(opt.epoches):
        model.train()
        end = time.time()
        # loss_meter.reset()
        for i, (imgs, gt_chars_seg, gt_order_seg,
                gt_pos_seg) in enumerate(train_loader):
            # measure data loading time
            data_time.update(time.time() - end)
            # zero the parameter gradients
            optimizer.zero_grad()

            imgs = gpu(imgs, opt)
            gt_chars_seg = gpu(gt_chars_seg, opt)
            gt_order_seg = gpu(gt_order_seg, opt)
            gt_pos_seg = gpu(gt_pos_seg, opt)

            chars_seg, ord_seg, pos_seg = model(imgs)
            loss = get_loss(chars_seg, ord_seg, pos_seg, gt_chars_seg,
                            gt_order_seg, gt_pos_seg, opt)

            loss.backward()
            optimizer.step()
            losses.update(loss.item())
            loss_meter.add(loss.item())
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            curr_step += 1
            current_lr = optimizer.param_groups[0]['lr']
            if curr_step % opt.print_freq == 0:
                tb_logger.add_scalar('loss_train', losses.avg, curr_step)
                tb_logger.add_scalar('lr', current_lr, curr_step)
                logger.info(
                    'Iter: [{0}/{1}]\t'
                    'Epoch: {2}\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'LR {lr:.4f}'.format(curr_step,
                                         total_step,
                                         epoch,
                                         batch_time=batch_time,
                                         data_time=data_time,
                                         loss=losses,
                                         lr=current_lr))

        # val
        model.eval()
        val_error = val(model, valid_loader, opt)
        logger.info('Mean error: {0}\t'.format(val_error))
        if not tb_logger is None:
            tb_logger.add_scalar('error_val', val_error, curr_step)
        if val_error < best_val_error:
            best_val_error = val_error
            if len(opt.gpus) > 1:
                torch.save(model.module.state_dict(),
                           os.path.join(opt.save_folder, "best_val_error.pth"))
            else:
                torch.save(model.state_dict(),
                           os.path.join(opt.save_folder, "best_val_error.pth"))
        # warmup
        if epoch < warmup_epoches:
            for param_group in optimizer.param_groups:
                param_group["lr"] *= warmup_rate
                # decay lr if loss no longer decrease
        else:
            if opt.lr_immediate_decay and loss_meter.value(
            )[0] > previous_loss:
                for param_group in optimizer.param_groups:
                    param_group["lr"] *= opt.lr_decay

            if epoch == int(opt.epoches * 0.6) or epoch == int(
                    opt.epoches * 0.9):
                for param_group in optimizer.param_groups:
                    param_group["lr"] *= opt.lr_decay
            previous_loss = loss_meter.value()[0]
    # save last pth
    if len(opt.gpus) > 1:
        torch.save(model.module.state_dict(),
                   os.path.join(opt.save_folder, "last.pth"))
    else:
        torch.save(model.state_dict(), os.path.join(opt.save_folder,
                                                    "last.pth"))
Exemple #19
0
def train():
    """
    train function
    :return:
    """

    vis = visualizer.Visualizer(config.visdom_env)

    # step1: configure model
    model = torchvision.models.densenet121(pretrained=False, num_classes=2)
    if config.use_gpu:
        model = torch.nn.DataParallel(model).cuda()

    # step2: data
    train_data = DogCat(config.train_data_root, train=True)
    val_data = DogCat(config.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    val_dataloader = DataLoader(val_data,
                                config.batch_size,
                                shuffle=False,
                                num_workers=config.num_workers)

    # step3: criterion and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=config.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    model.train()
    best_acc = -1.0
    start_epoch = -1

    # optionally resume from a checkpoint
    state = dict()
    if config.load_model_path:
        logging.info('Loading checkpoint from {path}'.format(
            path=config.load_model_path))
        state = model_util.load(config.load_model_path)
        start_epoch = state['epoch']
        best_acc = state['accuracy']
        model.load_state_dict(state['state_dic'])
        optimizer.load_state_dict(state['optimizer'])
        logging.info('Loaded checkpoint from {path}'.format(
            path=config.load_model_path))

    for epoch in range(start_epoch + 1, config.max_epoch):

        logging.info('epoch = %d' % epoch)

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_data)):

            # train model
            input_var = Variable(data)
            target_var = Variable(label)
            if config.use_gpu:
                input_var = input_var.cuda()
                target_var = target_var.cuda()

            optimizer.zero_grad()
            score = model(input_var)
            loss = criterion(score, target_var)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(score.data, target_var.data)

            if ii % config.print_freq == config.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                # if os.path.exists(config.debug_file):
                #     import ipdb;
                #     ipdb.set_trace()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        is_best = val_accuracy > best_acc
        best_acc = max(val_accuracy, best_acc)

        logging.info(
            "epoch:{epoch},lr:{lr},loss:{loss},acc:{acc} train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    acc=val_accuracy,
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        state['epoch'] = epoch
        state['model'] = config.model
        state['state_dic'] = model.state_dict()
        state['accuracy'] = val_accuracy
        state['optimizer'] = optimizer.state_dict()
        model_util.save(state, config.checkpoint_dir, is_best)

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * config.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
Exemple #20
0
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training loop for one epoch."""
    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit
    # So exiterrors is analogous to classerr for the non-Early Exit case
    if args.earlyexit_lossweights:
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to('cuda'), target.to('cuda')

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)

        if not hasattr(args, 'kd_policy') or args.kd_policy is None:
            output = model(inputs)
        else:
            output = args.kd_policy.forward(inputs)

        if not args.earlyexit_lossweights:
            loss = criterion(output, target)
            # Measure accuracy and record loss
            classerr.add(output.data, target)
        else:
            # Measure accuracy and record loss
            loss = earlyexit_loss(output, target, criterion, args)

        losses[OBJECTIVE_LOSS_KEY].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss,
                                                                  optimizer=optimizer, return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())

            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % args.print_freq == 0:
            # Log some statistics
            errs = OrderedDict()
            if not args.earlyexit_lossweights:
                errs['Top1'] = classerr.value(1)
                errs['Top5'] = classerr.value(5)
            else:
                # for Early Exit case, the Top1 and Top5 stats are computed for each exit.
                for exitnum in range(args.num_exits):
                    errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1)
                    errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5)

            stats_dict = OrderedDict()
            for loss_name, meter in losses.items():
                stats_dict[loss_name] = meter.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Peformance/Training/', stats_dict)

            params = model.named_parameters() if args.log_params_histograms else None
            distiller.log_training_progress(stats,
                                            params,
                                            epoch, steps_completed,
                                            steps_per_epoch, args.print_freq,
                                            loggers)
        end = time.time()
Exemple #21
0
def train(args, settings, train_datset, model, optimizer, writer, device):
    print("start")

    train_loader = DataLoader(dataset=train_datset,
                              batch_size=args.batchsize,
                              collate_fn=batcher_g,
                              shuffle=args.shuffle,
                              num_workers=args.workers)
    # test_loader= DataLoader(dataset=test_dataset,batch_size=args.batchsize,collate_fn=batcher_g,shuffle=args.shuffle,num_workers=args.workers)

    print(model)
    model.to(device)
    loss_fn = nn.CrossEntropyLoss()
    # MAE_fn = nn.L1Loss()
    n_loss_meter = meter.AverageValueMeter()
    c_loss_meter = meter.AverageValueMeter()
    n_acc_meter = meter.ConfusionMeter(
        100)  # clustering num might be too big, do not use confusion matrix
    c_acc_meter = AccMeter(settings['cls_num'])
    init_lr = args.lr
    info = {'n_loss': [], 'n_acc': [], 'c_loss': [], 'c_acc': []}
    cls_tags = 0
    K = settings['cls_num']
    N = len(train_datset)
    cls_distr = torch.ones(K) / K
    inst_distr = torch.ones(N) / N

    cls_log_prob = np.ones([N, K
                            ]) * np.log(K) / N  # prob_tensor  (cost function)
    cls_tags = np.ones([N, K]) / (K * N)  # the tag is a prob distribution

    for epoch in range(args.epochs):
        n_loss_meter.reset()
        c_loss_meter.reset()
        n_acc_meter.reset()
        c_acc_meter.reset()
        model.train()

        # prepare pesudo labels via k means
        if epoch % settings['cls_epochs'] == 1:
            # feats_all = get_preds(args, model, train_datset, device)
            # if epoch == 0:
            #     cls_tags = k_means(feats_all.cpu(), settings['cls_num'], settings['iters'],
            #                        inits=settings['init_method'], show_stats=True)
            # else:
            #     cls_tags = k_means(feats_all.cpu(), settings['cls_num'], settings['iters'], inits='random',     #use random tags
            #                        show_stats=True)

            # perform optimal transport
            time0 = time.time()
            cls_tags = ot.sinkhorn(
                inst_distr, cls_distr, cls_log_prob,
                0.04)  # shape dataset_num*cls_num ...takes 40s~250s on cpu
            print('optimal transport solved: {}'.format(time.time() - time0))

        # model.re_init_head()
        for idx, (mols, n_label, ids) in enumerate(train_loader):
            g = dgl.batch([mol.ful_g for mol in mols])
            g.to(device)
            n_label = n_label.to(device)

            # Mask node features
            mask = torch.randint(
                0, g.number_of_nodes(),
                [int(args.mask_n_ratio * g.number_of_nodes())])
            g.ndata['nodes'][mask] = 0

            # make pesudo labels vis optimal transport
            cls_labels = N * torch.tensor(
                cls_tags[list(ids)], requires_grad=False).to(device).float()

            atom_preds, cls_preds = model(g)
            cls_logits = torch.log(F.softmax(cls_preds, dim=1))

            n_pred_cls = torch.argmax(atom_preds, dim=1)
            n_loss = loss_fn(atom_preds[mask], n_label[mask])

            # compute c loss
            c_loss = torch.sum(-cls_labels * cls_logits, dim=1).mean()

            loss = c_loss + n_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            cls_log_prob[idx * args.batchsize:idx * args.batchsize +
                         len(mols)] = -cls_logits.detach().cpu().numpy()
            # n_loss_meter.add(n_loss.detach().item())
            c_loss_meter.add(c_loss.detach().item())
            n_acc_meter.add(n_pred_cls, n_label)
            # c_acc_meter.add(c_pred_cls, cls_labels)
            if idx % 50 == 0 and args.use_tb:
                acc = 100 * sum(
                    n_acc_meter.value()[i, i]
                    for i in range(10)) / n_acc_meter.value().sum()
                writer.add_scalar(
                    'n_train_loss',
                    n_loss_meter.value()[0],
                    int((idx + 1 + epoch * len(train_loader)) / 50))
                writer.add_scalar(
                    'n_train_acc', acc,
                    int((idx + 1 + epoch * len(train_loader)) / 50))
                print('training loss {} acc {}'.format(n_loss_meter.value()[0],
                                                       acc))

        # n_loss_test, n_acc_test= test(args,test_loader,model,device)

        acc = 100 * sum(n_acc_meter.value()[i, i]
                        for i in range(10)) / n_acc_meter.value().sum()
        print(
            "Epoch {:2d}, training: loss: {:.7f}, acc: {:.7f}  self-clustering: loss: {:.7f}"
            .format(epoch,
                    n_loss_meter.value()[0], acc,
                    c_loss_meter.value()[0]))
        if (epoch + 1) % 100 == 0:
            init_lr = init_lr / 1
            for param_group in optimizer.param_groups:
                param_group['lr'] = init_lr
            print('current learning rate: {}'.format(init_lr))

        info['n_loss'].append(n_loss_meter.value()[0])
        info['n_acc'].append(acc)
        info['c_loss'].append(c_loss_meter.value()[0])
        info['c_acc'].append(100 * c_acc_meter.value())
    return info
Exemple #22
0
def _validate(data_loader, model, criterion, loggers, args, epoch=-1):
    """Execute the validation/test loop."""
    losses = {'objective_loss': tnt.AverageValueMeter()}
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))

    if args.earlyexit_thresholds:
        # for Early Exit, we have a list of errors and losses for each of the exits.
        args.exiterrors = []
        args.losses_exits = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))
            args.losses_exits.append(tnt.AverageValueMeter())
        args.exit_taken = [0] * args.num_exits

    batch_time = tnt.AverageValueMeter()
    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    if args.display_confusion:
        confusion = tnt.ConfusionMeter(args.num_classes)
    total_steps = total_samples / batch_size
    msglogger.info('%d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to evaluation mode
    model.eval()

    end = time.time()
    for validation_step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to('cuda'), target.to('cuda')
            # compute output from model
            output = model(inputs)

            if not args.earlyexit_thresholds:
                # compute loss
                loss = criterion(output, target)
                # measure accuracy and record loss
                losses['objective_loss'].add(loss.item())
                classerr.add(output.data, target)
                if args.display_confusion:
                    confusion.add(output.data, target)
            else:
                earlyexit_validate_loss(output, target, criterion, args)

            # measure elapsed time
            batch_time.add(time.time() - end)
            end = time.time()

            steps_completed = (validation_step+1)
            if steps_completed % args.print_freq == 0:
                if not args.earlyexit_thresholds:
                    stats = ('',
                            OrderedDict([('Loss', losses['objective_loss'].mean),
                                         ('Top1', classerr.value(1)),
                                         ('Top5', classerr.value(5))]))
                else:
                    stats_dict = OrderedDict()
                    stats_dict['Test'] = validation_step
                    for exitnum in range(args.num_exits):
                        la_string = 'LossAvg' + str(exitnum)
                        stats_dict[la_string] = args.losses_exits[exitnum].mean
                        # Because of the nature of ClassErrorMeter, if an exit is never taken during the batch,
                        # then accessing the value(k) will cause a divide by zero. So we'll build the OrderedDict
                        # accordingly and we will not print for an exit error when that exit is never taken.
                        if args.exit_taken[exitnum]:
                            t1 = 'Top1_exit' + str(exitnum)
                            t5 = 'Top5_exit' + str(exitnum)
                            stats_dict[t1] = args.exiterrors[exitnum].value(1)
                            stats_dict[t5] = args.exiterrors[exitnum].value(5)
                    stats = ('Performance/Validation/', stats_dict)

                distiller.log_training_progress(stats, None, epoch, steps_completed,
                                                total_steps, args.print_freq, loggers)
    if not args.earlyexit_thresholds:
        msglogger.info('==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                       classerr.value()[0], classerr.value()[1], losses['objective_loss'].mean)

        if args.display_confusion:
            msglogger.info('==> Confusion:\n%s\n', str(confusion.value()))
        return classerr.value(1), classerr.value(5), losses['objective_loss'].mean
    else:
        total_top1, total_top5, losses_exits_stats = earlyexit_validate_stats(args)
        return total_top1, total_top5, losses_exits_stats[args.num_exits-1]
Exemple #23
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)(opt)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DocumentPair(opt.train_data_root,
                              doc_type='train',
                              suffix='txt',
                              load=lambda x: x.strip().split(','))
    train_data.initialize(vocab_size=opt.vocab_size)
    val_data = DocumentPair(opt.validate_data_root,
                            doc_type='validate',
                            suffix='txt',
                            load=lambda x: x.strip().split(','),
                            vocab=train_data.vocab)
    val_data.initialize()
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, batch in enumerate(train_dataloader):

            data_left, data_right, label, num_pos = load_data(
                batch, opt, train_data.vocab)

            # train model
            input_data_left, input_data_right = Variable(
                t.from_numpy(data_left)), Variable(t.from_numpy(data_right))
            target = Variable(t.from_numpy(label))
            if opt.use_gpu:
                input_data_left, input_data_right = input_data_left.cuda(
                ), input_data_right.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            scores, predictions = model((input_data_left, input_data_right))
            loss = criterion(scores, target.max(1)[1])
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(predictions.data, target.max(1)[1].data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        model.save()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
Exemple #24
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env,port = opt.vis_port)
    device = t.device('cuda') if opt.use_gpu else t.device('cpu')

    # 数据加载
    train_data = FLogo(opt.data_root,train=True)
    train_dataloader = DataLoader(train_data,opt.batch_size,shuffle=True,num_workers=opt.num_workers)

    '''
    # 以下内容是可视化dataloader的数据的
    一 检查dataset是否合理
    二 为了写论文凑图
    
    dataiter = iter(train_dataloader)
    img1,img2,lable=dataiter.next()
    img1 = tv.utils.make_grid((img1+1)/2,nrow=6,padding=2).numpy()
    img2 = tv.utils.make_grid((img2+1)/2,nrow=6,padding=2).numpy()
    plt.figure()
    plt.imshow(np.transpose(img1, (1, 2, 0)))
    plt.figure()
    plt.imshow(np.transpose(img2, (1, 2, 0)))
    plt.figure()
    lables = label.unsqueeze(1)  # lables
    mask = tv.utils.make_grid(lables,nrow=6,padding=2).numpy()
    plt.imshow(np.transpose(mask, (1, 2, 0)))
    plt.show()


from torchvision.transforms import ToPILImage
import numpy as np
import matplotlib.pylab as plt
train()
    '''

    # 网络
    net = Net()
    net.train()

    # 加载预训练模型
    if opt.load_model_path:
        net.load_state_dict(t.load(opt.load_model_path,map_location = lambda storage,loc:storage),False)
        print('已加载完。。')
    else:
        # 模型初始化
        for m in net.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                nn.init.xavier_normal_(m.weight)
                print('模型参数完成初始化。。')
    net.to(device)

    # 损失函数和优化器
    criterion = nn.BCEWithLogitsLoss(pos_weight=opt.pos_weight.to(device))
    optimizer = t.optim.SGD(net.parameters(),lr=opt.lr, momentum=opt.momentum,weight_decay=opt.weight_decay)

    # 使用meter模块
    loss_meter = meter.AverageValueMeter()

    # 学习率调整策略
    # scheduler = StepLR(optimizer, step_size=1000, gamma=0.5)

    for epoch in range(opt.epoches):
        loss_meter.reset() # 重置loss_meter??
        for ii,(target_img,query_logo,mask) in tqdm.tqdm(enumerate(train_dataloader)):
            print(target_img.shape)
            # 训练
            target_img = target_img.to(device)
            query_logo = query_logo.to(device)

            mask = mask.to(device)

            optimizer.zero_grad()

            output = net(query_logo,target_img)
            output = output.squeeze()
            predict = t.sigmoid(output)
            # predict_mask = t.sigmoid(output) # true output should be sigmoid
            # ipdb.set_trace()
            true_mask = mask/255

            # predict = output.view(output.size(0),-1)
            # target = true_mask.view(true_mask.size(0),-1)
            # ipdb.set_trace()
            # print(predict.size(),target.size())


            # loss = criterion(F.softmax(output,dim=2),true_mask)
            loss = criterion(output,true_mask)
            # print(loss.item())

            loss.backward()
            optimizer.step()

            # meter update and visualize
            loss_meter.add(loss.item())
            if (ii+1)%opt.plot_every == 0:

                vis.img('target_img', ((target_img + 1) / 2).data[0])
                vis.img('query_logo', ((query_logo + 1) / 2).data[0])
                vis.img('truth groud', (true_mask.data[0]))
                vis.img('predict', predict.data[0])
                pre_judgement = predict.data[0]
                pre_judgement[pre_judgement > 0.5] = 1  # 改成0.7怎么样!
                pre_judgement[pre_judgement <= 0.5] = 0
                vis.img('pre_judge(>0.5)', pre_judgement)

                # vis.img('pre_judge', pre_judgement)
                # vis.log({'predicted':output.data[0].cpu().numpy()})
                # vis.log({'truth groud':true_mask.data[0].cpu().numpy()})

        print('finish epoch:',epoch)
        # vis.log({'predicted':output.data[0].cpu().numpy()})
        vis.plot('loss',loss_meter.value()[0])

        if (epoch+1) %opt.save_model_epoch == 0:
            vis.save([opt.env])
            t.save(net.state_dict(),'checkpoints/%s_localize_v6.pth' % epoch)
Exemple #25
0
def train(**kwargs):
    # opt.parse(kwargs)
    vis = Visualizer(opt.env)

    savingData = []  #
    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    test_data = DogCat(opt.test_data_root, test=True)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch + 1):

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)
        test_cm, test_accuracy = val(model, test_dataloader)
        vis.plot('test_accuracy', test_accuracy)
        vis.plot('lr', lr)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},test_cm:{test_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    test_cm=str(test_cm.value()),
                    lr=lr))
        print("epoch = ", epoch, "   loss = ",
              loss_meter.value()[0], "   lr = ", lr)
        batch_results = [(epoch, loss_meter.value()[0], lr,
                          str(val_cm.value()), str(confusion_matrix.value()),
                          str(test_cm.value()), val_accuracy, test_accuracy)
                         ]  #
        savingData += batch_results  #
        save_training_data(savingData, opt.traingData_file)  #
        # update learning rate
        # if loss_meter.value()[0] > previous_loss:
        lr = lr * opt.lr_decay
        # # 第二种降低学习率的方法:不会有moment等信息的丢失
        # for param_group in optimizer.param_groups:
        #     param_group['lr'] = lr

        if epoch == opt.max_epoch:
            return

        previous_loss = loss_meter.value()[0]
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_data) / opt.batch_size):

            # train model
            input = data
            target = label
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        prefix = 'checkpoints/'
        name = time.strftime(prefix + '%m%d_%H:%M:%S_' + str(epoch + 1) +
                             '.pth')
        if epoch == 0:
            model.save(name)
        if np.mod(epoch + 1, 10) == 0:
            model.save(name)
Exemple #26
0
def train(opt):
    #创建可视化对象
    if opt.use_env:
        #需要在pycharm里的Terminal先启动visdom服务器:python -m visdom.server
        vis = Visdom(env=opt.env)

    #获取数据
    data, ix2word, word2ix = get_data(opt)  #
    #word_embeds=word2vec_train(opt, data, ix2word)#预先训练词向量,代替模型中的embed层,因为要做text rank提取关键字
    #print(word_embeds)
    data = t.from_numpy(data)  #转成torch
    # print(data.shape, data)
    dataloader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=1)

    #模型定义
    encoder = Encoder(len(word2ix), opt.hidden_dim)
    decoder = AttentionDecoder(opt.hidden_dim, len(word2ix), opt.dropout_rate,
                               opt.input_len)

    en_optimizer = t.optim.SGD(encoder.parameters(), lr=opt.lr)
    de_optimizer = t.optim.SGD(decoder.parameters(), lr=opt.lr)

    criterion = nn.CrossEntropyLoss()

    # if opt.model_path:
    #     model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        # model=model.cuda()
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        criterion = criterion.cuda()

    loss_meter = meter.AverageValueMeter()  #原来用的是loss=0.0,现在用这个自动计算平均值
    # count = 0
    for epoch in range(opt.epoch):
        # print(epoch)
        loss_meter.reset()  # 重置为0
        count = 0
        for i, data_ in enumerate(
                dataloader
        ):  #tqdm 在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)
            #训练
            #data_: 一个batch,torch.Size([128, 80]), 每个batch是128首诗,每首诗最长是80个字。后续的hidden_size=embed_size=128
            # print(data_.shape)
            # print(data_)
            data_ = data_.long().contiguous(
            )  #transpose交换第0维度和第1维度,所以data从batch_size*seq_len变成seq_len*batch_size; contoguouse()把tensor变成在内存中连续分布的形式。
            if opt.use_gpu:
                data_ = data_.cuda()
            #loss_meter.reset()  # 重置为0
            for poetry in data_:  #poetry是data_的一行,也就是一首诗
                loss = 0
                encoder_hidden = encoder.initHidden(opt.use_gpu)
                en_optimizer.zero_grad()  # 置零
                de_optimizer.zero_grad()
                count += 1
                # print(poetry)
                #print(epoch,':',count)
                input_, target_ = poetry[:opt.input_len].view(
                    -1,
                    1), poetry[1:].view(-1,
                                        1)  #输入为每首诗的前10个字,target为整首诗, 形状!!!!!,
                input_len = input_.size(0)
                output_len = target_.size(0)
                encoder_outputs = t.zeros(
                    opt.input_len,
                    encoder.hidden_size,
                    device='cuda' if opt.use_gpu else 'cpu')

                #loss_meter.reset()  # 重置为0
                # encoder_hidden = encoder.initHidden(opt.use_gpu)
                # en_optimizer.zero_grad()  # 置零
                # de_optimizer.zero_grad()
                #encoder:
                for ei in range(input_len):
                    encoder_output, encoder_hidden = encoder(
                        input_[ei], encoder_hidden)
                    encoder_outputs[ei] = encoder_output[0]
                #decoder:
                #use_teacher_forcing=True if random.random()<opt.teacher_forcing_ratio else False
                use_teacher_forcing = True
                decoder_input = t.tensor(
                    [[word2ix['<START>']]],
                    device='cuda' if opt.use_gpu else 'cpu')
                decoder_hidden = encoder_hidden
                if use_teacher_forcing:
                    for di in range(output_len):
                        decoder_output, decoder_hidden, decoder_attention = decoder(
                            decoder_input, decoder_hidden, encoder_outputs)
                        decoder_input = target_[di]
                        loss += criterion(decoder_output, target_[di])
                        #loss_meter.add(loss.item())
                else:
                    for di in range(output_len):
                        decoder_output, decoder_hidden, decoder_attention = decoder(
                            decoder_input, decoder_hidden, encoder_outputs)
                        topv, topi = decoder_output.topk(1)  #预测出来的下一个字
                        decoder_input = topi.squeeze().detach(
                        )  #detach阻断这个节点上的反向传播
                        loss += criterion(decoder_output, target_[di])
                        #loss_meter.add(loss.item())
                        if decoder_input.item() == word2ix['<END>']:
                            break

                loss.backward()
                en_optimizer.step()
                de_optimizer.step()
                #可视化
                if count % opt.plot_every == 0:
                    if os.path.exists(opt.debug_file):
                        ipdb.set_trace()  #设置断点
                    #显示loss值
                    #print(type(loss_meter.value()[0]),loss_meter.value()[0])
                    vis.line(X=np.array([
                        (count // opt.plot_every) / opt.plot_every
                    ]),
                             Y=np.array([loss.item() / output_len]),
                             win='loss',
                             update='None' if count //
                             opt.plot_every == 0 else 'append')
                    #显示诗歌原文
                    #print(type(poetry),poetry.shape,poetry.tolist())
                    p = [ix2word[k] for k in poetry.tolist()]  #输出原诗

                    vis.text(' '.join(p), win=u'origin_poem')

                    # #显示生成的诗
                    start_words = '床前明月光,疑似地上霜'
                    gen_poetry = ''.join(
                        generate(opt, encoder, decoder, start_words, ix2word,
                                 word2ix))
                    print(i, ':', gen_poetry)
                    vis.text(''.join(gen_poetry), win=u'generate_poem')
                    # #保存模型
                    t.save(encoder.state_dict(),
                           '%s/seq2seq/1_%s.pth' % (opt.model_prefix, epoch))
                    t.save(decoder.state_dict(),
                           '%s/seq2seq/1_%s.pth' % (opt.model_prefix, epoch))
Exemple #27
0
def validate(net,
             K,
             L,
             W,
             misfit,
             val_loader,
             use_gpu,
             epoch,
             fig_path,
             save_fig,
             nbatches=1,
             is_unet=False):

    # For now just test on one image from the training set, later loop over val set
    running_loss = tnt.AverageValueMeter()
    running_acc = tnt.AverageValueMeter()

    count = 0
    for batch_idx, (images, labels) in enumerate(val_loader):

        if use_gpu:
            images = images.cuda()
            labels = labels.cuda()

        # Forward Pass
        with torch.no_grad():

            if is_unet:
                outputs = net(images)
            else:
                X = net(images, K, L)
                outputs = conv1x1(X, W)
            probs = softmax(outputs)
            loss = misfit(outputs, labels)
            _, preds = torch.max(outputs, 1)
            acc = getAccuracy(preds, labels)

        running_loss.add(loss.item())
        running_acc.add(acc)
        summary_writer.add_scalar('Val Loss', running_loss.mean,
                                  epoch + (batch_idx / nbatches))
        summary_writer.add_scalar('Val Acc', running_acc.mean,
                                  epoch + (batch_idx / nbatches))

        # Save every val image
        if save_fig and (epoch + 1) % 24 == 0:
            for i in range(images.shape[0]):
                plot_probs(
                    images[i], labels[i], preds[i], probs[i],
                    os.path.join(fig_path,
                                 'final_preds/%06d_%04d.png' % (epoch, count)))
                count += 1

    # Save a single val image
    if save_fig and epoch % 1 == 0:
        plot_probs(images[0], labels[0], preds[0], probs[0],
                   os.path.join(fig_path, 'validating/%06d.png' % epoch))

    print('\n    Validation Loss: %6.4f, Acc: %6.4f' %
          (running_loss.mean, running_acc.mean * 100))

    return running_loss.mean
Exemple #28
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # train model
            input = data.to(opt.device)
            target = label.to(opt.device)

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach())

            if (ii + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        model.save()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
Exemple #29
0
def train(**kwargs):

    for k, v in kwargs.items():
        setattr(opt, k, v)

    vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        model.cuda()
        criterion.cuda()
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            if opt.use_gpu: data_ = data_.cuda()
            optimizer.zero_grad()
            input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :])
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])

            # # 可视化
            # if (1+ii)%opt.plot_every==0:
            #
            #     if os.path.exists(opt.debug_file):
            #         ipdb.set_trace()
            #
            #     vis.plot('loss',loss_meter.value()[0])
            #
            #     # 诗歌原文
            #     poetrys=[ [ix2word[_word] for _word in data_[:,_iii]]
            #                         for _iii in range(data_.size(1))][:16]
            #     vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),win=u'origin_poem')
            #
            #     gen_poetries = []
            #     # 分别以这几个字作为诗歌的第一个字,生成8首诗
            #     for word in list(u'春江花月夜凉如水'):
            #         gen_poetry =  ''.join(generate(model,word,ix2word,word2ix))
            #         gen_poetries.append(gen_poetry)
            #     vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]),win=u'gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
Exemple #30
0
def train(**kwargs):
    #init
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    if opt.vis:
        vis = Visualizer(opt.env)
        vis_val = Visualizer('valdemoire')

    #dataset
    FiveCrop_transforms = transforms.Compose([
        transforms.FiveCrop(256),
        transforms.Lambda(lambda crops: torch.stack(
            [transforms.ToTensor()(crop) for crop in crops]))
    ])
    data_transforms = transforms.Compose([
        # transforms.RandomCrop(256),
        transforms.ToTensor()
    ])
    train_data = MoireData(opt.train_path)
    test_data = MoireData(opt.test_path, is_val=True)
    train_dataloader = DataLoader(train_data,
                                  batch_size=opt.train_batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers,
                                  drop_last=True)
    test_dataloader = DataLoader(test_data,
                                 batch_size=opt.val_batch_size,
                                 shuffle=True,
                                 num_workers=opt.num_workers,
                                 drop_last=True)

    last_epoch = 0
    #model_init
    cfg.merge_from_file("config/cfg.yaml")
    model = get_pose_net(cfg, pretrained=opt.model_path)  #initweight
    model = model.to(opt.device)

    if opt.vis:
        val_loss, val_psnr = val(model, test_dataloader, vis_val)
        print(val_loss, val_psnr)
    else:
        val_loss, val_psnr = val(model, test_dataloader)
        print(val_loss, val_psnr)

    criterion_c = L1_Charbonnier_loss()
    criterion_s = L1_Sobel_Loss()
    lr = opt.lr
    optimizer = torch.optim.Adam(
        params=model.parameters(),
        lr=lr,
        weight_decay=0.01  #0.005
    )

    if opt.model_path:
        map_location = lambda storage, loc: storage
        checkpoint = torch.load(opt.model_path, map_location=map_location)
        last_epoch = checkpoint["epoch"]
        optimizer_state = checkpoint["optimizer"]
        optimizer.load_state_dict(optimizer_state)

        lr = checkpoint["lr"]
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    loss_meter = meter.AverageValueMeter()
    psnr_meter = meter.AverageValueMeter()
    previous_loss = 1e100
    accumulation_steps = opt.accumulation_steps

    for epoch in range(opt.max_epoch):
        if epoch < last_epoch:
            continue
        loss_meter.reset()
        psnr_meter.reset()
        torch.cuda.empty_cache()
        loss_list = []

        for ii, (moires, clear_list) in tqdm(enumerate(train_dataloader)):
            moires = moires.to(opt.device)
            clears = clear_list[0].to(opt.device)

            output_list, edge_output_list = model(moires)
            outputs, edge_X = output_list[0], edge_output_list[0]

            if epoch < 20:
                pass
            elif epoch >= 20 and epoch < 40:
                opt.loss_alpha = 0.9
            else:
                opt.loss_alpha = 1.0

            c_loss = criterion_c(outputs, clears)
            s_loss = criterion_s(edge_X, clears)
            loss = opt.loss_alpha * c_loss + (1 - opt.loss_alpha) * s_loss

            # saocaozuo gradient accumulation
            loss = loss / accumulation_steps
            loss.backward()

            if (ii + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            loss_meter.add(loss.item() * accumulation_steps)

            moires = tensor2im(moires)
            outputs = tensor2im(outputs)
            clears = tensor2im(clears)

            psnr = colour.utilities.metric_psnr(outputs, clears)
            psnr_meter.add(psnr)

            if opt.vis and (ii + 1) % opt.plot_every == 0:  #100个batch画图一次
                vis.images(moires, win='moire_image')
                vis.images(outputs, win='output_image')
                vis.text(
                    "current outputs_size:{outputs_size},<br/> outputs:{outputs}<br/>"
                    .format(outputs_size=outputs.shape, outputs=outputs),
                    win="size")
                vis.images(clears, win='clear_image')
                #record the train loss to txt
                vis.plot('train_loss',
                         loss_meter.value()
                         [0])  #meter.value() return 2 value of mean and std
                vis.log(
                    "epoch:{epoch}, lr:{lr}, train_loss:{loss}, train_psnr:{train_psnr}"
                    .format(epoch=epoch + 1,
                            loss=loss_meter.value()[0],
                            lr=lr,
                            train_psnr=psnr_meter.value()[0]))
                loss_list.append(str(loss_meter.value()[0]))

            torch.cuda.empty_cache()
        if opt.vis:
            val_loss, val_psnr = val(model, test_dataloader, vis_val)
            vis.plot('val_loss', val_loss)
            vis.log(
                "epoch:{epoch}, average val_loss:{val_loss}, average val_psnr:{val_psnr}"
                .format(epoch=epoch + 1, val_loss=val_loss, val_psnr=val_psnr))
        else:
            val_loss, val_psnr = val(model, test_dataloader)

        #每个epoch把loss写入文件
        with open(opt.save_prefix + "loss_list.txt", 'a') as f:
            f.write("\nepoch_{}\n".format(epoch + 1))
            f.write('\n'.join(loss_list))

        if (epoch + 1) % opt.save_every == 0 or epoch == 0:  # 每5个epoch保存一次
            prefix = opt.save_prefix + 'HRnet_epoch{}_'.format(epoch + 1)
            file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')
            checkpoint = {
                'epoch': epoch + 1,
                "optimizer": optimizer.state_dict(),
                "model": model.state_dict(),
                "lr": lr
            }
            torch.save(checkpoint, file_name)

        if (loss_meter.value()[0] > previous_loss) or ((epoch + 1) % 10) == 0:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]

    prefix = opt.save_prefix + 'HRnet_final_'
    file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')
    checkpoint = {
        'epoch': epoch + 1,
        "optimizer": optimizer.state_dict(),
        "model": model.state_dict(),
        "lr": lr
    }
    torch.save(checkpoint, file_name)