コード例 #1
0
ファイル: test.py プロジェクト: mindfyhh/HRDN-DEMOIRE
def test(**kwargs):
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    if opt.vis:
        vis = Visualizer(opt.env)

    test_data = Val_MoireData(opt.test_path)
    test_dataloader = DataLoader(test_data,
                                batch_size=opt.test_batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers,
                                drop_last=False)


    model = get_model("HRDN")
    prefix = "{0}{1}/".format(opt.save_prefix, "HRDN")
    model.eval()
    torch.cuda.empty_cache()
    # criterion_c = L1_Charbonnier_loss()
    # loss_meter = meter.AverageValueMeter()

    psnr_meter = meter.AverageValueMeter()
    for ii, (moires, clears, labels) in tqdm(enumerate(test_dataloader)):
        moires = moires.to(opt.device)
        clears = clears.to(opt.device)
        output_list, _ = model(moires)
        outputs = output_list[0]
        moires = tensor2im(moires)
        outputs = tensor2im(outputs)
        clears = tensor2im(clears)

        psnr = colour.utilities.metric_psnr(outputs, clears)
        psnr_meter.add(psnr)

        bs = moires.shape[0]
        for jj in range(bs):
            output, clear = outputs[jj], clears[jj]
            label = labels[jj]
            img_path = "{0}{1}_output.png".format(prefix, label)
            save_single_image(output, img_path)

        if opt.vis and vis != None and (ii + 1) % 10 == 0:  # 每10个iter画图一次
            vis.log(">>>>>>>> batch_psnr:{psnr}<<<<<<<<<<".format(psnr=psnr))

        torch.cuda.empty_cache()
    print("average psnr is {}".format(psnr_meter.value()[0]))
コード例 #2
0
def train(**kwargs):
    # load kwargs
    opt.parse(kwargs)
    print(kwargs)

    # visdom
    vis = Visualizer(opt.env)

    # vis log opt
    vis.log('user config:')
    for k, v in opt.__class__.__dict__.items():
        if not k.startswith('__'):
            vis.log('{} {}'.format(k, getattr(opt, k)))

    # config model
    model = getattr(models, opt.model)()

    if opt.use_pretrained_model:
        model = load_pretrained()

    if opt.load_model_path:
        # load exist model
        model.load(opt.load_model_path)
    elif opt.use_weight_init:
        # we need init weight
        #
        model.apply(weight_init)
    # if use GPU
    if opt.use_gpu:
        model.cuda()

    # genearte_data
    train_data = Flower(train=True)
    val_data = Flower(train=False)
    test_data = Flower(test=True)

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)

    # criterion and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    if 'Dense' in opt.model:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=0.9,
                                    nesterov=True,
                                    weight_decay=opt.weight_decay)
    else:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=opt.weight_decay)

    # meters
    loss_meter = meter.AverageValueMeter()
    # 17 classes
    confusion_matrix = meter.ConfusionMeter(17)
    previous_loss = 1e100

    #

    best_accuracy = 0

    # start training
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for bactch_index, (data, label) in tqdm(enumerate(train_dataloader)):

            # train model
            input = Variable(data)
            target = Variable(label)
            # gpu update
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # update meter
            loss_meter.add(loss.data[0])

            # print(score.data, target.data)
            #      [batch_size, 17]  [batch_size]
            confusion_matrix.add(score.data, target.data)

            # plot
            if bactch_index % opt.print_freq == opt.print_freq - 1:
                # cross_entropy
                print('loss ', loss_meter.value()[0])
                # visualize loss
                vis.plot('loss', loss_meter.value()[0])

        # save model for this epoch
        if opt.use_pretrained_model is False and epoch % opt.save_freq == 0:
            model.save()

        # validate
        val_cm, val_accuracy = val(model, val_dataloader)

        # test
        test_cm, test_accuracy = val(model, test_dataloader)

        # plot validation accuracy
        print('Epoch {}/{}: val_accuracy  {}'.format(epoch, opt.max_epoch,
                                                     val_accuracy))

        # plot vis
        vis.plot('val_accuracy', val_accuracy)
        vis.plot('test_accuracy', test_accuracy)
        vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}'.format(
            epoch=epoch, loss=loss_meter.value()[0], lr=lr))
        # vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)
        # )

        # update best validation model
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save(model.state_dict(),
                       './checkpoints/best_{}.pth'.format(opt.model))
            if opt.use_pretrained_model is False:
                model.save('./checkpoints/best_{}.pth'.format(
                    model.model_name))

        # update learning rate for this epoch
        if float(loss_meter.value()[0]) > previous_loss:
            lr = lr * opt.lr_decay

            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
    print('Best model validation accuracy {}'.format(best_accuracy))
コード例 #3
0
def train():
    vis = Visualizer("Kesci" + time.strftime('%m%d%H%M'))
    train_data = AppData("../kesci/data/data_v3_23d/train_ab.json",
                         iflabel=True)
    val_data = AppData("../kesci/data/data_v3_23d/val_ab.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 512, shuffle=False, num_workers=2)
    test_data = AppData("../kesci/data/data_v3_23d/test_ab.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 512, shuffle=False, num_workers=2)

    criterion = t.nn.BCEWithLogitsLoss().cuda()
    learning_rate = 0.002
    weight_decay = 0.0003
    model = DoubleSequence(31, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(400):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, target) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(target).cuda()
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.data[0])

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})
        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.9
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 3 == 2:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "训练集:{train_f1:%}, {train_pre:%}, {train_rec:%} | 验证集:{val_f1:%}, {val_pre:%}, {val_rec:%} | \
            测试集:{test_f1:%}, {test_pre:%}, {test_rec:%} | {train_true_num:%}, {val_true_num:%}, {test_true_num:%}"
                .format(
                    train_f1=train_f1,
                    val_f1=val_f1,
                    test_f1=test_f1,
                    train_true_num=train_cm.value()[:, 0].sum() /
                    len(train_data),
                    val_true_num=val_cm.value()[:, 0].sum() / len(val_data),
                    test_true_num=test_cm.value()[:, 0].sum() / len(test_data),
                    train_pre=train_cm.value()[0][0] /
                    train_cm.value()[0].sum(),
                    train_rec=train_cm.value()[0][0] /
                    train_cm.value()[:, 0].sum(),
                    val_pre=val_cm.value()[0][0] / val_cm.value()[0].sum(),
                    val_rec=val_cm.value()[0][0] / val_cm.value()[:, 0].sum(),
                    test_pre=test_cm.value()[0][0] / test_cm.value()[0].sum(),
                    test_rec=test_cm.value()[0][0] /
                    test_cm.value()[:, 0].sum()))
コード例 #4
0
ファイル: train.py プロジェクト: yuanyuanzijin/learn-pytorch
def train():
    vis = Visualizer("Kesci")
    train_data = AppData("data/data_16d_target/train.json", iflabel=True)
    val_data = AppData("data/data_16d_target/val.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 32, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 256, shuffle=False, num_workers=2)
    test_data = AppData("data/data_16d_target/test.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 256, shuffle=False, num_workers=2)

    criterion = t.nn.CrossEntropyLoss().cuda()
    learning_rate = 0.003
    weight_decay = 0.0002
    model = Sequence(15, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(500):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, label) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(label).cuda().view(-1)
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])

            confusion_matrix.add(output.data, target.data)

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})

        # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()),
        #     train_cm=str(confusion_matrix.value()), lr=learning_rate))

        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.95
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 10 == 9:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "model:{model} | {train_f1}, {train_pre}, {train_rec} | {val_f1}, {val_pre}, {val_rec} | {test_f1}, {test_pre}, {test_rec}"
                .format(train_f1=train_f1,
                        val_f1=val_f1,
                        test_f1=test_f1,
                        model=time.strftime('%m%d %H:%M:%S'),
                        train_pre=str(train_cm.value()[0][0] /
                                      train_cm.value()[:, 0].sum()),
                        train_rec=str(train_cm.value()[0][0] /
                                      train_cm.value()[0].sum()),
                        val_pre=str(val_cm.value()[0][0] /
                                    val_cm.value()[:, 0].sum()),
                        val_rec=str(val_cm.value()[0][0] /
                                    val_cm.value()[0].sum()),
                        test_pre=str(test_cm.value()[0][0] /
                                     test_cm.value()[:, 0].sum()),
                        test_rec=str(test_cm.value()[0][0] /
                                     test_cm.value()[0].sum())))
コード例 #5
0
def train(**kwargs):
    opt.parse(**kwargs)
    # step1: configure model
    model = getattr(models,opt.model)(opt.num_class)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_path, transform=opt.train_transform, train = True)
    val_data = DogCat(opt.train_data_path, transform=opt.test_val_transform, train = False, test= False)
    train_dataloader = DataLoader(train_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers)
    val_dataloader   = DataLoader(val_data,   batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()                   # 用于统计一个epoch内的平均误差
    confusion_matrix = meter.ConfusionMeter(opt.num_class)
    previous_loss=1e6
    # step5: train
    vis  = Visualizer(opt.env)
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data, label) in tqdm(enumerate(train_dataloader)):
            # train model
            input = Variable(data)
            target  = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data)
            confusion_matrix.add(score.data, target.data)

            # ipdb.set_trace()
            if ii%opt.print_freq == opt.print_freq-1:
                vis.plot(win='loss', y=loss_meter.value()[0])

        model.save()

        # step6: validate and visualize
        val_confusion_matrix, val_accuracy = val(model, val_dataloader)
        vis.plot(win='val_accuracy',y=val_accuracy)
        vis.log(win='log_text', info=
                'epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format(
                    epoch=epoch,lr=lr,loss=loss_meter.value()[0],train_cm=str(confusion_matrix.value()),val_cm=str(val_confusion_matrix)
                )
                )

        # step7: update learning_rate
        if loss_meter.value()[0] > previous_loss:
            lr=lr*opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr']=lr

        previous_loss=loss_meter.value()[0]
コード例 #6
0
def train(**kwargs):
    '''
    训练
    :param kwargs: 可调整参数,默认是config中的默认参数
    :return:训练出完整模型
    '''

    # 根据命令行参数更新配置
    opt.parse(kwargs)
    # visdom绘图程序
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step:1 构建模型
    # 选取配置中名字为model的模型
    model = getattr(models, opt.model)()
    # 是否读取保存好的模型参数
    if opt.load_model_path:
        model.load(opt.load_model_path)

    # 设置GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    model.to(opt.device)

    # step2: 数据
    train_data = CWRUDataset2D(opt.train_data_root, train=True)
    # 测试数据集和验证数据集是一样的,这些数据是没有用于训练的
    test_data = CWRUDataset2D(opt.train_data_root, train=False)

    train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False)

    # step3: 目标函数和优化器
    # 损失函数,交叉熵
    criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    # 优化函数,Adam
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay)

    # step4: 统计指标,平滑处理之后的损失,还有混淆矩阵
    # 损失进行取平均及方差计算。
    loss_meter = meter.AverageValueMeter()
    # 混淆矩阵
    confusion_matrix = meter.ConfusionMeter(opt.category)
    previous_loss = 1e10

    # 训练
    for epoch in range(opt.max_epoch):

        # 重置
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # 训练模型
            input = data.to(opt.device)
            target = label.to(opt.device)

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # 更新统计指标以及可视化
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach())

            if (ii + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()

        # 每个batch保存模型
        model.save()

        # 计算测试集上的指标和可视化
        val_cm, val_accuracy = val(model, test_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
            epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
            lr=lr))

        # 如果损失不在下降,那么就降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
コード例 #7
0
def train(**kwargs):
    """
    训练
    """
    # 根据传入的参数更改配置信息
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    cudnn.enabled = True
    cudnn.benchmark = True

    # step1: 配置并加载模型
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: 加载数据(训练集和交叉验证集)
    train_data = SceneData(opt.train_data_root, opt.labels, train=True)
    val_data = SceneData(opt.train_data_root, opt.labels, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)

    # step3: 目标函数和优化器
    criterion = t.nn.CrossEntropyLoss()  # 交叉熵损失函数
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=opt.lr,
                             weight_decay=opt.weight_decay)  # Adam算法
    """
    # 冻结除全连接层外的所有层,只训练最后的全连接层(用于有全连接层模型的finetune)
    for para in list(model.parameters())[:-1]:
        para.requires_grad = False
    optimizer = t.optim.Adam(params=[model.fc.weight, model.fc.bias], lr=opt.lr, weight_decay=opt.weight_decay)  # Adam算法
    """

    # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵
    loss_meter = meter.AverageValueMeter()  # 能够计算所有数的平均值和标准差,用来统计一次训练中损失的平均值
    confusion_matrix = meter.ConfusionMeter(opt.num_labels)
    previous_loss = 1e100

    # 训练
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        # 每次读出一个batch的数据训练
        for step, (data, label) in tqdm.tqdm(enumerate(train_dataloader),
                                             total=len(train_data)):

            train_input = data.to(opt.device)
            label_input = label.to(opt.device)

            optimizer.zero_grad()  # 梯度清零
            score = model(train_input)  # 调用模型
            loss = criterion(score, label_input)  # 计算损失函数
            loss.backward()  # 反向传播
            optimizer.step()  # 优化

            # 更新统计指标及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(score.detach(), label_input.detach())

            if step % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

        model.save()

        # 计算验证集上的指标及可视化
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # 如果损失不再下降,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 动态修改学习率
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
コード例 #8
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.retrain:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data,label) in tqdm(enumerate(train_dataloader),total=len(train_data)):

            # train model 
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()


        model.save(opt.load_model_path)

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
コード例 #9
0
def train():
    vis = Visualizer(opt.env + opt.model)
    net = getattr(models, opt.model)()
    print('当前使用的模型为' + opt.model)
    # 分类损失函数使用交叉熵
    criterion = t.nn.CrossEntropyLoss()
    optimizer = t.optim.Adam(net.parameters(),
                             lr=opt.learning_rate,
                             weight_decay=opt.weight_decay)

    start_epoch = 0
    if opt.load_model_path:
        checkpoint = t.load(opt.load_model_path)

        # 加载多GPU模型参数到 CPU上
        state_dict = checkpoint['net']
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)  # 加载模型
        optimizer.load_state_dict(checkpoint['optimizer'])  # 加载优化器
        start_epoch = checkpoint['epoch']  # 加载训练批次

    # 学习率每当到达milestones值则更新参数
    if start_epoch == 0:
        scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=opt.milestones,
                                                     gamma=0.1,
                                                     last_epoch=-1)
        print('从头训练 ,学习率为{}'.format(optimizer.param_groups[0]['lr']))
    else:
        scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=opt.milestones,
                                                     gamma=0.1,
                                                     last_epoch=start_epoch)
        print('加载预训练模型{}并从{}轮开始训练,学习率为{}'.format(
            opt.load_model_path, start_epoch, optimizer.param_groups[0]['lr']))

    # 网络转移到GPU上
    if opt.use_gpu:
        net = t.nn.DataParallel(net, device_ids=opt.device_ids)  # 模型转为GPU并行
        net.cuda()
        cudnn.benchmark = True

    train_data = NodeDataSet(train=True)
    val_data = NodeDataSet(val=True)

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)

    for epoch in range(opt.max_epoch - start_epoch):
        print('开始 epoch {}/{}.'.format(start_epoch + epoch + 1, opt.max_epoch))
        epoch_loss = 0

        # 每轮判断是否更新学习率
        scheduler.step()

        # 迭代数据集加载器
        for ii, (block_3d, truth_label) in enumerate(train_dataloader):
            if opt.use_gpu:
                block_3d = block_3d.cuda()
                truth_label = truth_label.cuda()
            predict_label = net(block_3d)

            loss = criterion(predict_label, truth_label)
            epoch_loss += loss.item()

            if ii % 8 == 0:
                vis.plot('训练集loss', loss.item())

            optimizer.zero_grad()  # 优化器梯度清零
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数

        # 当前时刻的一些信息
        vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
            epoch=epoch, loss=loss.item(), lr=optimizer.param_groups[0]['lr']))
        vis.plot('每轮epoch的loss均值', epoch_loss / ii)
        # 保存模型、优化器、当前轮次等
        state = {
            'net': net.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch
        }
        if not os.path.exists(opt.checkpoint_root):
            os.makedirs(opt.checkpoint_root)
        t.save(state, opt.checkpoint_root + '{}_node.pth'.format(epoch))

        # ============验证===================
        val_loss = 0
        with t.no_grad():
            for jj, (val_block_3d, val_label) in enumerate(val_dataloader):
                if opt.use_gpu:
                    val_block_3d = val_block_3d.cuda()
                    val_label = val_label.cuda()
                val_predict_label = net(val_block_3d)
                loss = criterion(val_predict_label, val_label)
                val_loss += loss.item()
            vis.plot('验证集loss均值', val_loss / jj)
コード例 #10
0
ファイル: main.py プロジェクト: jh0905/DogCatProject
def train(**kwargs):
    opt.parse(kwargs)  # 根据命令行输入更新配置,如 python main.py train --env='env1219' --
    vis = Visualizer(opt.env)

    # 第一步:加载模型(模型,预训练参数,GPU)
    model = getattr(models, opt.model)()  # 等价于 models.AlexNet()
    # model = torchvision.models.resnet34(pretrained=True, num_classes=1000)
    # model.fc = nn.Linear(512, 2)  # 修改最后一层为我们的二分类问题
    if opt.load_model_path:
        model.load(opt.load_model_path)  # 加载模型参数
    if opt.use_gpu:
        model.cuda()
    # 第二步:加载数据(训练集、验证集,用DataLoader来装载)
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_data_loader = DataLoader(train_data,
                                   opt.batch_size,
                                   shuffle=True,
                                   num_workers=opt.num_workers)
    val_data_loader = DataLoader(val_data,
                                 opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)

    # 第三步:定义损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    # 第四步:统计指标:平滑处理之后的损失,混淆矩阵
    # meter是PyTorchNet里面的一个重要工具,可以帮助用户快速统计训练过程中的一些指标。
    loss_meter = meter.AverageValueMeter(
    )  # loss_meter.value(),返回一个二元组,第一个元素是均值,第二个元素是标准差
    confusion_matrix = meter.ConfusionMeter(2)  # confusion_matrix是一个2*2的混淆矩阵
    previous_loss = 1e100  # 初始置为10^100

    # 第五步:开始训练
    for epoch in range(opt.max_epoch):
        loss_meter.reset()  # 置为nan
        confusion_matrix.reset()  # 清零
        # tqdm 是一个快速,可扩展的Python进度条,可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)
        import math
        for ii, (data, label) in tqdm(enumerate(train_data_loader),
                                      total=math.ceil(
                                          len(train_data) / opt.batch_size)):
            # 模型参数训练
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()  # 每轮都要清空一轮梯度信息
            pred = model(input)
            # 计算损失,输入为(pred,target),其中pred的shape为(batch_size,n_classes),target为(batch_size),target中的值为0或1
            # 示例
            # pred = t.randn(128,2)
            # target = t.empty(128,dtype=t.long).random_(2)
            # criterion(pred,target)
            # 某个样本的预测值x为[0.11,0.39],对应标签y为0,那么计算损失的方式为: -x[y] + log(exp(x[0])+exp(x[1]))
            # 具体计算方式见: https://blog.csdn.net/geter_CS/article/details/84857220

            loss = criterion(pred, target)
            loss.backward()  # 计算梯度
            optimizer.step()  # 更新网络权重参数
            # 更新统计指标以及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(pred.data, target.data)

            if ii % opt.print_freq == opt.print_freq - 1:  # 每20个batch输出一次损失
                vis.plot('loss', loss_meter.value()[0])
                # 如果需要的话,进入debug模式
                # if os.path.exists(opt.debug_file):
                #     ipdb.set_trace()

        # 保存训练好的模型
        model.save()  # 存在checkpoints目录下

        # 在验证集上测试结果并可视化
        val_cm, val_accuracy = val(model, val_data_loader)
        vis.plot('val_accuracy', val_accuracy)  # 绘制的是精确度

        vis.log(
            'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}'
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # 如果损失不再下降,则衰减学习率
        if loss_meter.value()[0] > previous_loss:
            lr = opt.lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]  # 更新previous_loss的值为当前损失的平均值
コード例 #11
0
def train(**kwargs):
    '''
    训练
    '''
    # 根据命令行参数更新配置
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model定义网络
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data定义数据
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer定义损失函数和优化器
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters统计指标:平滑处理之后的损失,还有混淆矩阵
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # step5:train开始训练
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in enumerate(train_dataloader):

            # train model 训练网络
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize可视化各种指标
            loss_meter.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)

            if ii % opt.print_freq == opt.print_freq - 1:  # 计算在验证集上的指标
                vis.plot('loss', loss_meter.value()[0])

                # 如果需要的话,进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        model.save()

        # validate and visualize计算验证集上的指标及可视化
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate如果损失不再下降,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
コード例 #12
0
def train():

    vis=Visualizer(opt.env)
    # 定义一个网络模型对象
    netWork=NetWork()
    # 先将模型加载到内存中,即CPU中
    map_location = lambda storage, loc: storage
    if opt.load_model_path:
        netWork.load_state_dict(t.load(opt.load_model_path,map_location=map_location))
    # 将模型转到GPU   1:转到GPU1上
    if opt.use_gpu:
        netWork.cuda(1)
    # step2: 加载数据
    train_data=DataProcessing(opt.data_root,train=True)
    #train=False  test=False   则为验证集
    val_data=DataProcessing(opt.data_root,train=False)
    # 数据集加载器
    train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data, 1, shuffle=False, num_workers=opt.num_workers)
    # step3: criterion 损失函数和optimizer优化器
    # 损失函数使用均方误差   
    criterion = t.nn.MSELoss()
    lr=opt.lr
    # 优化器使用Adam
    optimizer = t.optim.Adam(netWork.parameters(), lr=opt.lr, weight_decay =opt.weight_decay)
    # step4: 统计指标meters  仪表 显示损失的图形
    #计算所有书的平均数和标准差,来统计一个epoch中损失的平均值
    loss_meter=meter.AverageValueMeter()
    # 定义初试的loss
    previous_loss = 1e100
    for epoch in range(opt.max_epoch):
        #清空仪表信息
        loss_meter.reset()
        # 迭代数据集加载器
        for ii, (data_origin, data_grayscale) in enumerate(train_dataloader):
            #训练模型
            #input_img为模型输入图像,为灰度加噪图
            input_img=Variable(data_grayscale)
            #output_real_img 为target图像,即为 原图的灰度图
            output_real_img=Variable(data_origin)
            #将数据转到GPU
            if opt.use_gpu:
                input_img=input_img.cuda(1)
                output_real_img=output_real_img.cuda(1)
            #优化器梯度清零
            optimizer.zero_grad()
            #前向传播,得到网络产生的输出图像output_img
            output_img=netWork(input_img)
            # 损失为MSE均方误差
            loss=criterion(output_img,output_real_img)
            # 反向传播  自动求梯度         loss进行反向传播
            loss.backward()
            # 更新优化器的可学习参数       optimizer优化器进行更新参数
            optimizer.step()
            # 更新仪表 并可视化
            loss_meter.add(loss.data[0])
            # 每print_freq次可视化loss
            if ii % opt.print_freq == opt.print_freq - 1:
                # plot是自定义的方法
                vis.plot('loss', loss_meter.value()[0])
        # 一个epoch之后保存模型
        netWork.save()
        # 利用lena.jpg测试每个epoch的模型
        add_every_epoch_lena(netWork,epoch)
        # # 使用验证集和可视化
        # val_output_img= val(netWork, val_dataloader)
        # vis.img("val_output_img",val_output_img.data)
        
        # 当前时刻的一些信息
        vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
            epoch=epoch, loss=loss_meter.value()[0],lr=lr))
        # 更新学习率  如果损失开始升高,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr=lr*opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]
        vis.img_many(dict_lena)
    print("============训练完毕=============")
コード例 #13
0
ファイル: main.py プロジェクト: hank08tw/Genderclassification
def train(**kwargs):
    #opt.parse(kwargs)
    vis = Visualizer()

    # step1: configure model
    print("come step 1")
    #model = getattr(models, opt.model)()
    model=models.resnet18(pretrained=True)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    print("come here step 2")
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    print(train_dataloader)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    print("come step 3")
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay)
        
    # step4: meters
    print("come step 4")
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):
        print("start training")
        loss_meter.reset()
        confusion_matrix.reset()
        # print("original matrix is:{}".format(confusion_matrix.value()))
        for ii,(data,label) in tqdm(enumerate(train_dataloader)):
            print("label is {}".format(label))
            # train model 
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            print("score is ",score)
            print("target is",target)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            #print("Epoch is :{s},Loss is {}".format(epoch,loss))
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            #if ii%opt.print_freq==opt.print_freq-1:
            #     vis.plot('loss', loss_meter.value()[0])
            #
            #     # 进入debug模式
            #     if os.path.exists(opt.debug_file):
            #         import ipdb;
            #         ipdb.set_trace()
        print("Now learning rate is {}".format(lr))
        if (epoch % opt.lr_decay_epoch == 0):
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

    model.save()

    # validate and visualize

    val_cm, val_accuracy = val(model, val_dataloader)

    print("end val function")
    print(val_cm.value(),val_accuracy)
    vis.plot('val_accuracy',val_accuracy)
    vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))

        # update learning rate

    if loss_meter.value()[0] > previous_loss:
        lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
コード例 #14
0
def train(**kwargs):
    #根据命令行参数更新配置
    opt.parse(kwargs=kwargs)
    vis = Visualizer(opt.env)

    #1.模型
    model = getattr(models, opt.model)()  #注意实例化
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    #2 数据
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(dataset=train_data,
                                  batch_size=opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(dataset=val_data,
                                batch_size=opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    #3 损失函数和优化器
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    #4 统计指标:平滑处理之后的损失,还有混淆矩阵
    loss_meter = meter.AverageValueMeter()  #AverageValueMeter,计算所有数的平均值
    confusion_matrix = meter.ConfusionMeter(2)  #用来统计二分类的一些统计指标
    previous_loss = 1e100

    #训练
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in enumerate(train_dataloader):
            #训练模型参数
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            #更新统计指标及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(predicted=score.data, target=target)

            if ii % opt.print_frep == opt.print_frep - 1:
                vis.plot("loss", loss_meter.value()[0])

                #如果需要的话进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        model.save()
        #计算验证集上的指标以及可视化
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot("val_accuracy", val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))
        #如果损失不再下降,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr
        previous_loss = loss_meter.value()[0]
コード例 #15
0
    def train_torch(self, datasetTypeCls, learning_rate_value=None, learning_rate_decay=None, num_epochs=5000, early_stop_epochs=5):
        """

        :param datasetTypeCls:
        :param learning_rate_value:
        :param learning_rate_decay:
        :param num_epochs:
        :param early_stop_epochs: 连续这么多个epoch上,val_loss都没有降低,则提前终止训练
        :return:
        """
        # Load dataset

        self.saved_params = []



        if self.pretrained:
            print('Saved model found. Loading...')
            self.load_model()

        if learning_rate_value is None:
            learning_rate_value = self.learning_rate_value
        if learning_rate_decay is None:
            learning_rate_decay = self.learning_rate_decay

        # Create neural network model (depending on first command line parameter)
        print("Building model and compiling functions...")

        # print(self.sinputs) # ??????? 弄啥嘞?


        # [Xiao]
        vis = Visualizer('xiao-moddrop-v3.0')

        # step 1: setup model
        model = self.model
        # model = model.cuda()

        # step 2: data\
        if datasetTypeCls in [DatasetOfDamagedMultimodal, DatasetLQAudio,  DatasetLQVideoClassifier, DatasetLQVideoFeatureExtractor, DatasetLQSkeleton]:
            # print(f'In basicClassifier.py, df = {df}')
            train_data = datasetTypeCls(self.input_folder, train_valid_test='train')
            val_data = datasetTypeCls(self.input_folder, train_valid_test='valid')
        else:
            train_data = datasetTypeCls(self.input_folder, self.modality, 'train', self.hand_list,
                                                      self.seq_per_class,
                                                      self.nclasses, self.input_size, self.step, self.nframes)
            val_data = datasetTypeCls(self.input_folder, self.modality, 'valid', self.hand_list, 200,
                                                    self.nclasses, self.input_size, self.step, self.nframes)

        print('Dataset prepared.')

        # self._load_dataset('train')  # ??
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=12)  # num_workers 按 CPU 逻辑核数目来。查看命令是: cat /proc/cpuinfo| grep "processor"| wc -l
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=12)

        print('DataLoader prepared.')
        # val_loader = DataLoader(self.val_data, 32)

        # step 3: criterion and optimizer
        self.criterion = nn.CrossEntropyLoss()
        self.lr = 0.02 # 0.001
        self.optimizer = torch.optim.SGD(model.parameters(), lr=self.lr, weight_decay=1-0.9998, nesterov=True, momentum=0.8)

        # visdom show line of loss
        win = vis.line(
            X=numpy.array([0, 1]),
            Y=numpy.array([0, 1]),
            name="loss"
        )
        win1 = vis.line(
            X=numpy.array([0, 1]),
            Y=numpy.array([0, 1]),
            name="loss_epoch"
        )

        # step 4: go to GPU
        # self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # 这一句放到init方法中了
        model.to(self.device)


        print('Training begin...')
        # for data in train_loader:
        #     print(data[1])
        #     # break
        # print('HH=======25-20947489THRLGIHRSGHRNKGNREOSG========')
        best_val_loss = self.nclasses
        epochs_no_better_val_loss = 0

        for epoch in range(num_epochs):

            print(f'CURRENT EPOCH: {epoch}')

            # In each epoch, we do a full pass over the training data:
            losses = []

            train_loader_len = len(train_loader)
            ten_pct = train_loader_len // 10
            for ii, (data, label) in enumerate(train_loader):
                if ii % ten_pct == 0 :
                    print(f'ii = {ii}, percentage: {ii/train_loader_len}')

                input = data
                target = label.to(torch.int64)

                # [Xiao] 如果是多模态的输入,需要区别对待
                if not isinstance(input, dict):
                    # 若当前的输入是 torch.tensor ,说明不是最终的多模态输入,可以直接上GPU
                    input, target = input.to(self.device), target.to(self.device)
                else:
                    # 若是字典,说明是多模态输入,这里先将label上GPU,其他的部分在输入model.forward()中分别取出来作为tensor再上GPU
                    target = target.to(self.device)

                # print(f'input.shape is : {input.shape}')
                # print(f'target.shape is : {target.shape}')

                # Create a loss expression for training, i.e., a scalar objective we want
                # to minimize (for our multi-class problem, it is the cross-entropy loss):
                self.optimizer.zero_grad()
                score = model(input)
                # print(f'score shape is: {score.shape}')
                loss = self.criterion(score, target)    # score 即为长度等于 nclasses 的“概率”向量, target 即为单一值(类别的编号)

                loss.backward()
                self.optimizer.step()

                losses.append(loss.data)



                # print(f'score is : {score}')
                # print(f'target is : {target}')

                # print(f'loss.data is: {loss.data}')
                # print(f'numpy.array(loss.data) is : {numpy.array(loss.data)}')

                # if ii % 10 == 0:
                #     vis.plot('loss', loss)

                # vis.line(X=torch.Tensor([ii + epoch*len(train_loader)]), Y=torch.Tensor([loss]), win=win, update='append', name='train_loss')

            print(f'Computation over epoch {epoch} is OK.')

            # 计算验证集上的指标及可视化
            val_loss = self.val(model, val_loader)
            # 若验证误差降低,更新最好模型
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                self.save_model()
                epochs_no_better_val_loss = 0
            else:
                # 若验证loss没有降低,则累计到 early_stop_epochs 个epoch之后,就提前停止
                epochs_no_better_val_loss += 1
                if epochs_no_better_val_loss >= early_stop_epochs:
                    break
            # vis.plot('val_loss', val_loss)
            print(f'Validation over epoch {epoch} is OK.')

            vis.line(X=torch.Tensor([epoch]), Y=torch.Tensor([sum(losses) / len(losses)]), win=win1, update='append',
                     name='mean_train_loss_per_epoch')
            vis.line(X=torch.Tensor([epoch]), Y=torch.Tensor([val_loss]), win=win1, update='append',
                     name='val_loss')

            vis.log("[Train Loss] epoch:{epoch},lr:{lr},loss:{loss}".format(
                epoch=epoch, loss=loss.data,
                lr=self.lr))
            vis.log("[Valid Loss] epoch:{epoch},lr:{lr},loss:{loss}".format(
                epoch=epoch, loss=val_loss,
                lr=self.lr))
コード例 #16
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load_new(opt.load_model_path)
    else:
        print('Initialize the model!')
        model.apply(weight_init)

    model.to(opt.device)

    # step2: data
    train_data = TextData(opt.data_root, opt.train_txt_path)
    val_data = TextData(opt.data_root, opt.val_txt_path)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):
            # train model
            input = data.to(opt.device)
            target = label.to(opt.device)
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            #for n, p in model.named_parameters():
            #    print(n)
            #    h = p.register_hook(lambda grad: print(grad))
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # enter debug mode
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()
            if ii % (opt.print_freq * 10) == 0:
                vis.images(input.cpu().numpy(),
                           opts=dict(title='Label', caption='Label'),
                           win=1)
                print('Epoch: {} Iter: {} Loss: {}'.format(epoch, ii, loss))

        if epoch % 2 == 0:
            model.save('./checkpoints/' + opt.env + '_' + str(epoch) + '.pth')

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))
        train_cm = confusion_matrix.value()
        t_accuracy = 100. * (train_cm[0][0] +
                             train_cm[1][1]) / (train_cm.sum())
        vis.plot('train_accuracy', t_accuracy)
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
コード例 #17
0
def train(**kwargs):
    #init
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    if opt.vis:
        vis = Visualizer(opt.env)
        vis_val = Visualizer('valdemoire')

    #dataset
    FiveCrop_transforms = transforms.Compose([
        transforms.FiveCrop(256),
        transforms.Lambda(lambda crops: torch.stack(
            [transforms.ToTensor()(crop) for crop in crops]))
    ])
    data_transforms = transforms.Compose([
        # transforms.RandomCrop(256),
        transforms.ToTensor()
    ])
    train_data = MoireData(opt.train_path)
    test_data = MoireData(opt.test_path, is_val=True)
    train_dataloader = DataLoader(train_data,
                                  batch_size=opt.train_batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers,
                                  drop_last=True)
    test_dataloader = DataLoader(test_data,
                                 batch_size=opt.val_batch_size,
                                 shuffle=True,
                                 num_workers=opt.num_workers,
                                 drop_last=True)

    last_epoch = 0
    #model_init
    cfg.merge_from_file("config/cfg.yaml")
    model = get_pose_net(cfg, pretrained=opt.model_path)  #initweight
    model = model.to(opt.device)

    if opt.vis:
        val_loss, val_psnr = val(model, test_dataloader, vis_val)
        print(val_loss, val_psnr)
    else:
        val_loss, val_psnr = val(model, test_dataloader)
        print(val_loss, val_psnr)

    criterion_c = L1_Charbonnier_loss()
    criterion_s = L1_Sobel_Loss()
    lr = opt.lr
    optimizer = torch.optim.Adam(
        params=model.parameters(),
        lr=lr,
        weight_decay=0.01  #0.005
    )

    if opt.model_path:
        map_location = lambda storage, loc: storage
        checkpoint = torch.load(opt.model_path, map_location=map_location)
        last_epoch = checkpoint["epoch"]
        optimizer_state = checkpoint["optimizer"]
        optimizer.load_state_dict(optimizer_state)

        lr = checkpoint["lr"]
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    loss_meter = meter.AverageValueMeter()
    psnr_meter = meter.AverageValueMeter()
    previous_loss = 1e100
    accumulation_steps = opt.accumulation_steps

    for epoch in range(opt.max_epoch):
        if epoch < last_epoch:
            continue
        loss_meter.reset()
        psnr_meter.reset()
        torch.cuda.empty_cache()
        loss_list = []

        for ii, (moires, clear_list) in tqdm(enumerate(train_dataloader)):
            moires = moires.to(opt.device)
            clears = clear_list[0].to(opt.device)

            output_list, edge_output_list = model(moires)
            outputs, edge_X = output_list[0], edge_output_list[0]

            if epoch < 20:
                pass
            elif epoch >= 20 and epoch < 40:
                opt.loss_alpha = 0.9
            else:
                opt.loss_alpha = 1.0

            c_loss = criterion_c(outputs, clears)
            s_loss = criterion_s(edge_X, clears)
            loss = opt.loss_alpha * c_loss + (1 - opt.loss_alpha) * s_loss

            # saocaozuo gradient accumulation
            loss = loss / accumulation_steps
            loss.backward()

            if (ii + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            loss_meter.add(loss.item() * accumulation_steps)

            moires = tensor2im(moires)
            outputs = tensor2im(outputs)
            clears = tensor2im(clears)

            psnr = colour.utilities.metric_psnr(outputs, clears)
            psnr_meter.add(psnr)

            if opt.vis and (ii + 1) % opt.plot_every == 0:  #100个batch画图一次
                vis.images(moires, win='moire_image')
                vis.images(outputs, win='output_image')
                vis.text(
                    "current outputs_size:{outputs_size},<br/> outputs:{outputs}<br/>"
                    .format(outputs_size=outputs.shape, outputs=outputs),
                    win="size")
                vis.images(clears, win='clear_image')
                #record the train loss to txt
                vis.plot('train_loss',
                         loss_meter.value()
                         [0])  #meter.value() return 2 value of mean and std
                vis.log(
                    "epoch:{epoch}, lr:{lr}, train_loss:{loss}, train_psnr:{train_psnr}"
                    .format(epoch=epoch + 1,
                            loss=loss_meter.value()[0],
                            lr=lr,
                            train_psnr=psnr_meter.value()[0]))
                loss_list.append(str(loss_meter.value()[0]))

            torch.cuda.empty_cache()
        if opt.vis:
            val_loss, val_psnr = val(model, test_dataloader, vis_val)
            vis.plot('val_loss', val_loss)
            vis.log(
                "epoch:{epoch}, average val_loss:{val_loss}, average val_psnr:{val_psnr}"
                .format(epoch=epoch + 1, val_loss=val_loss, val_psnr=val_psnr))
        else:
            val_loss, val_psnr = val(model, test_dataloader)

        #每个epoch把loss写入文件
        with open(opt.save_prefix + "loss_list.txt", 'a') as f:
            f.write("\nepoch_{}\n".format(epoch + 1))
            f.write('\n'.join(loss_list))

        if (epoch + 1) % opt.save_every == 0 or epoch == 0:  # 每5个epoch保存一次
            prefix = opt.save_prefix + 'HRnet_epoch{}_'.format(epoch + 1)
            file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')
            checkpoint = {
                'epoch': epoch + 1,
                "optimizer": optimizer.state_dict(),
                "model": model.state_dict(),
                "lr": lr
            }
            torch.save(checkpoint, file_name)

        if (loss_meter.value()[0] > previous_loss) or ((epoch + 1) % 10) == 0:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]

    prefix = opt.save_prefix + 'HRnet_final_'
    file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')
    checkpoint = {
        'epoch': epoch + 1,
        "optimizer": optimizer.state_dict(),
        "model": model.state_dict(),
        "lr": lr
    }
    torch.save(checkpoint, file_name)
コード例 #18
0
def train():
    vis = Visualizer(opt.env, port=opt.vis_port)
    # step1 : load model
    model = getattr(models, opt.model)(pretrained=True)
    # 加载预训练模型,微调或者特征提取
    model = init_extract_model(model, 10)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # step2: data
    train_data_list = standard_data(opt.train_data_dir, 'train')
    val_data_list = standard_data(opt.train_data_dir, 'val')
    train_dataloader = DataLoader(IcvDataset(train_data_list),
                                  batch_size=opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(IcvDataset(val_data_list, train=False),
                                batch_size=opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer and scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt.lr,
                                 weight_decay=opt.weight_decay)
    # 每100个epoch 下降 lr=lr*gamma
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=300,
                                                gamma=0.1)

    # step4: define metrics
    train_losses = AverageMeter()
    train_top1 = AverageMeter()

    # step5.1: some parameters for K-fold and restart model
    start_epoch = 0
    best_top1 = 50

    # step5.2: restart the training process
    # PyTorch 保存断点checkpoints 的格式为 .tar文件扩展名格式
    if opt.resum_model_dir is not None:
        checkpoint = torch.load(opt.resum_model_dir)
        start_epoch = checkpoint["epoch"]
        best_top1 = checkpoint["best_top1"]
        optimizer.load_state_dict(checkpoint["optimizer"])
        model.load_state_dict(checkpoint["state_dict"])

    # 在恢复训练时,需要调用 model.train() 以确保所有网络层处于训练模式
    model.train()

    # step6 : train
    for epoch in range(start_epoch, opt.max_epoch):
        # lr 下降
        scheduler.step(epoch)
        lr = get_learning_rate(optimizer)
        train_losses.reset()
        train_top1.reset()
        for iter, (input, target) in enumerate(train_dataloader):
            input = input.to(device)
            target = target.to(device)
            optimizer.zero_grad()
            # forword
            output = model(input)
            loss = criterion(output, target)
            precious = accuracy(output, target, topk=(1, ))
            # loss and acc
            train_losses.update(loss.item(), input.size(0))
            train_top1.update(precious[0].item(), input.size(0))
            # backword
            loss.backward()
            optimizer.step()
        val_loss, val_top1 = val(model, val_dataloader, criterion, device)

        is_best = val_top1.avg > best_top1
        best_top1 = max(val_top1.avg, best_top1)

        print("epoch : {}/{}".format(epoch, opt.max_epoch))
        print("train-->loss:{},acc:{}".format(train_losses.avg,
                                              train_top1.avg))
        print("val-->loss:{},acc:{}".format(val_loss.avg, val_top1.avg))

        vis.plot_many({
            'train_loss': train_losses.avg,
            'val_loss': val_loss.avg
        })
        # vis.plot('train_loss', train_losses.avg)
        # vis.plot('val_accuracy', val_top1.avg)

        vis.log(
            "epoch:{epoch},lr:{lr},train_loss:{train_loss},val_loss:{val_loss},train_acc:{train_acc},val_acc:{val_acc}"
            .format(epoch=epoch,
                    train_loss=train_losses.avg,
                    val_loss=str(val_loss.avg),
                    train_acc=str(train_top1.avg),
                    val_acc=str(val_top1.avg),
                    lr=lr))

        if epoch % 10 == 0:
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model": opt.model,
                    "state_dict": model.state_dict(),
                    "best_top1": best_top1,
                    "optimizer": optimizer.state_dict(),
                    "val_loss": val_loss.avg,
                }, opt.save_model_dir, is_best, epoch)
コード例 #19
0
def train(**kwargs):
    """根据命令行参数更新配置"""
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    """(1)step1:加载网络,若有预训练模型也加载"""
    #model = getattr(models,opt.model)()
    model = models.resnet34(pretrained=True)
    model.fc = nn.Linear(512, 2)
    #if opt.load_model_path:
    #	model.load(opt.load_model_path)
    if opt.use_gpu:  #GPU
        model.cuda()
    """(2)step2:处理数据"""
    train_data = DogCat(opt.train_data_root, train=True)  #训练集
    val_data = DogCat(opt.train_data_root, train=False)  #验证集

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    """(3)step3:定义损失函数和优化器"""
    criterion = t.nn.CrossEntropyLoss()  #交叉熵损失
    lr = opt.lr  #学习率
    optimizer = t.optim.SGD(model.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵"""
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10
    """(5)开始训练"""
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in enumerate(train_dataloader):

            print "ii:", ii
            #训练模型参数
            input = Variable(data)
            target = Variable(label)

            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            #梯度清零
            optimizer.zero_grad()
            score = model(input)

            loss = criterion(score, target)
            loss.backward()  #反向传播

            #更新参数
            optimizer.step()

            #更新统计指标及可视化
            loss_meter.add(loss.item())
            #print score.shape,target.shape
            confusion_matrix.add(score.detach(), target.detach())

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        #model.save()
        name = time.strftime('model' + '%m%d_%H:%M:%S.pth')
        t.save(model.state_dict(), 'checkpoints/' + name)
        """计算验证集上的指标及可视化"""
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        print "epoch:", epoch, "loss:", loss_meter.value(
        )[0], "accuracy:", val_accuracy
        """如果损失不再下降,则降低学习率"""
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        previous_loss = loss_meter.value()[0]
コード例 #20
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if os.path.exists(opt.load_model_path):
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    if os.path.exists(opt.pars_path):
        dic = load_dict(opt.pars_path)
        previous_loss = dic['loss'][-1] if 'loss' in dic.keys() else 1e100
    else:
        dic = {}
    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step2: criterion and optimizer
    criterion = nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    #previous_loss = 1e100
    # train
    for epoch in range(5, opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_dataloader)):
            #confusion_matrix.reset()
            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data.item())
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                dic = save_dict(opt.pars_path,
                                dic,
                                loss_data=loss_meter.value()[0])
                #loss_meter.reset()
                vis.plot('loss', dic['loss_data'])
                name = model.save()
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trave()

        name = model.save()
        # update learning: reduce learning rate when loss no longer decrease
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]
        dic = save_dict(opt.pars_path,
                        dic,
                        name=name,
                        epoch=epoch,
                        lr=lr,
                        loss=loss_meter.value()[0],
                        train_cm=confusion_matrix.value())

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)
        dic = save_dict(opt.pars_path,
                        dic,
                        val_accuracy=val_accuracy,
                        val_cm=val_cm.value())

        vis.log(dic)
コード例 #21
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)(opt)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DocumentPair(opt.train_data_root,
                              doc_type='train',
                              suffix='txt',
                              load=lambda x: x.strip().split(','))
    train_data.initialize(vocab_size=opt.vocab_size)
    val_data = DocumentPair(opt.validate_data_root,
                            doc_type='validate',
                            suffix='txt',
                            load=lambda x: x.strip().split(','),
                            vocab=train_data.vocab)
    val_data.initialize()
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, batch in enumerate(train_dataloader):

            data_left, data_right, label, num_pos = load_data(
                batch, opt, train_data.vocab)

            # train model
            input_data_left, input_data_right = Variable(
                t.from_numpy(data_left)), Variable(t.from_numpy(data_right))
            target = Variable(t.from_numpy(label))
            if opt.use_gpu:
                input_data_left, input_data_right = input_data_left.cuda(
                ), input_data_right.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            scores, predictions = model((input_data_left, input_data_right))
            loss = criterion(scores, target.max(1)[1])
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(predictions.data, target.max(1)[1].data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        model.save()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
コード例 #22
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env,port = opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        train_loss = 0.
        train_acc = 0.
        i = 0

        for ii,(data,label) in tqdm(enumerate(train_dataloader)):

            # train model 
            input = data.to(opt.device)
            target = label.to(opt.device)


            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)

            train_loss += loss.item()
            pred = t.max(score, 1)[1]
            train_correct = (pred==target).sum()
            train_acc += train_correct.item()
            print('epoch ', epoch, ' batch ', i)
            i+=1
            print('Train Loss: %f, Acc: %f' % (loss.item(), train_correct.item() / float(len(data))))

            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach()) 

            if (ii + 1)%opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()

        print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
            train_data)), train_acc / (len(train_data))))


        # model.save()
        prefix = 'checkpoints/' + opt.model + '_a'+str(epoch)+'.pth'
        t.save(model.state_dict(), prefix)

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader, criterion, val_data)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
コード例 #23
0
ファイル: main.py プロジェクト: eglrp/Gist_code
def train():
    t.cuda.set_device(1)

    # n_channels:医学影像为一通道灰度图    n_classes:二分类
    net = UNet(n_channels=1, n_classes=1)
    optimizer = t.optim.SGD(net.parameters(),
                            lr=opt.learning_rate,
                            momentum=0.9,
                            weight_decay=0.0005)
    criterion = t.nn.BCELoss()  # 二进制交叉熵(适合mask占据图像面积较大的场景)

    start_epoch = 0
    if opt.load_model_path:
        checkpoint = t.load(opt.load_model_path)

        # 加载多GPU模型参数到 单模型上
        state_dict = checkpoint['net']
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)  # 加载模型
        optimizer.load_state_dict(checkpoint['optimizer'])  # 加载优化器
        start_epoch = checkpoint['epoch']  # 加载训练批次

    # 学习率每当到达milestones值则更新参数
    if start_epoch == 0:
        scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=opt.milestones,
                                                     gamma=0.1,
                                                     last_epoch=-1)  # 默认为-1
        print('从头训练 ,学习率为{}'.format(optimizer.param_groups[0]['lr']))
    else:
        scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=opt.milestones,
                                                     gamma=0.1,
                                                     last_epoch=start_epoch)
        print('加载预训练模型{}并从{}轮开始训练,学习率为{}'.format(
            opt.load_model_path, start_epoch, optimizer.param_groups[0]['lr']))

    # 网络转移到GPU上
    if opt.use_gpu:
        net = t.nn.DataParallel(net, device_ids=opt.device_ids)  # 模型转为GPU并行
        net.cuda()
        cudnn.benchmark = True

    # 定义可视化对象
    vis = Visualizer(opt.env)

    train_data = NodeDataSet(train=True)
    val_data = NodeDataSet(val=True)
    test_data = NodeDataSet(test=True)

    # 数据集加载器
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.test_batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)
    for epoch in range(opt.max_epoch - start_epoch):
        print('开始 epoch {}/{}.'.format(start_epoch + epoch + 1, opt.max_epoch))
        epoch_loss = 0

        # 每轮判断是否更新学习率
        scheduler.step()

        # 迭代数据集加载器
        for ii, (img, mask) in enumerate(
                train_dataloader):  # pytorch0.4写法,不再将tensor封装为Variable
            # 将数据转到GPU
            if opt.use_gpu:
                img = img.cuda()
                true_masks = mask.cuda()
            masks_pred = net(img)

            # 经过sigmoid
            masks_probs = t.sigmoid(masks_pred)

            # 损失 = 二进制交叉熵损失 + dice损失
            loss = criterion(masks_probs.view(-1), true_masks.view(-1))

            # 加入dice损失
            if opt.use_dice_loss:
                loss += dice_loss(masks_probs, true_masks)

            epoch_loss += loss.item()

            if ii % 2 == 0:
                vis.plot('训练集loss', loss.item())

            # 优化器梯度清零
            optimizer.zero_grad()
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()

        # 当前时刻的一些信息
        vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
            epoch=epoch, loss=loss.item(), lr=optimizer.param_groups[0]['lr']))

        vis.plot('每轮epoch的loss均值', epoch_loss / ii)
        # 保存模型、优化器、当前轮次等
        state = {
            'net': net.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch
        }
        t.save(state, opt.checkpoint_root + '{}_unet.pth'.format(epoch))

        # ============验证===================

        net.eval()
        # 评价函数:Dice系数    Dice距离用于度量两个集合的相似性
        tot = 0
        for jj, (img_val, mask_val) in enumerate(val_dataloader):
            img_val = img_val
            true_mask_val = mask_val
            if opt.use_gpu:
                img_val = img_val.cuda()
                true_mask_val = true_mask_val.cuda()

            mask_pred = net(img_val)
            mask_pred = (t.sigmoid(mask_pred) > 0.5).float()  # 阈值为0.5
            # 评价函数:Dice系数   Dice距离用于度量两个集合的相似性
            tot += dice_loss(mask_pred, true_mask_val).item()
        val_dice = tot / jj
        vis.plot('验证集 Dice损失', val_dice)

        # ============验证召回率===================
        # 每10轮验证一次测试集召回率
        if epoch % 10 == 0:
            result_test = []
            for kk, (img_test, mask_test) in enumerate(test_dataloader):
                # 测试 unet分割能力,故 不使用真值mask
                if opt.use_gpu:
                    img_test = img_test.cuda()
                mask_pred_test = net(img_test)  # [1,1,512,512]

                probs = t.sigmoid(mask_pred_test).squeeze().squeeze().cpu(
                ).detach().numpy()  # [512,512]
                mask = probs > opt.out_threshold
                result_test.append(mask)

            # 得到 测试集所有预测掩码,计算二维召回率
            vis.plot('测试集二维召回率', getRecall(result_test).getResult())
        net.train()
コード例 #24
0
ファイル: main.py プロジェクト: 672401341/pytorch-book
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env,port = opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data,label) in tqdm(enumerate(train_dataloader)):

            # train model 
            input = data.to(opt.device)
            target = label.to(opt.device)


            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach()) 

            if (ii + 1)%opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()


        model.save()

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
コード例 #25
0
ファイル: main.py プロジェクト: zZhouLi/my_DogVsCat
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()
    # 数据设定  户籍科 010 82640433
    train_data = DogCat(opt.load_model_path, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    train_dataloader = DataLoader(test_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    # 目标函数和优化器
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model)
    # 统计指标,平滑处理之后的损失
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(
                enumerate(train_dataloader)):  # ii num ,(data,label) enumerate
            # 训练模型参数
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.stop()
            # 更新统计指标及可视化
            loss_meter.add(loss.data[0])
            confusion_matrix.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exist(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
            model.save()

            # 计算验证集上的指标及其可视化
            val_cm, val_accuracy = val(model, val_dataloader)
            vis.plot('val_accuracy', val_accuracy)
            vis.log(
                'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}'
                .format(epoch=epoch,
                        loss=loss_meter.value()[0],
                        val_cm=str(val_cm.value()),
                        train_cm=str(confusion_matrix.value()),
                        lr=lr))
            if loss_meter.value()[0] > previous_loss:
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                previous_loss = loss_meter.value()[0]
コード例 #26
0
ファイル: main.py プロジェクト: longqianh/3d-holography
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    Model = getattr(models, opt.model)
    model = Model(40)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    train_data = CGHData(opt.train_data_root, train=True)
    val_data = CGHData(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    criterion = nn.MSELoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    loss_meter = meter.AverageValueMeter()
    # confusion_matrix=meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        # confusion_matrix.reset()

        for k, (data, label) in enumerate(train_dataloader):
            # print(k)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])
            # confusion_matrix.add(score.data, target.data)

            if k % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

            model.save()

            vak_cm, val_accuracy = val(model, val_dataloader)
            vis.plot('val_accuracy', val_accuracy)
            vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
                epoch=epoch, loss=loss_meter.value()[0], lr=lr))

            if loss_meter.value()[0] > previous_loss:
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

            previous_loss = loss_meter.value()[0]
コード例 #27
0
ファイル: main.py プロジェクト: LANCE-HXZ/dlo-stack
def train(**kwargs):
    # opt.parse(kwargs)
    vis = Visualizer(opt.env)

    savingData = []  #
    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    test_data = DogCat(opt.test_data_root, test=True)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch + 1):

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)
        test_cm, test_accuracy = val(model, test_dataloader)
        vis.plot('test_accuracy', test_accuracy)
        vis.plot('lr', lr)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},test_cm:{test_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    test_cm=str(test_cm.value()),
                    lr=lr))
        print("epoch = ", epoch, "   loss = ",
              loss_meter.value()[0], "   lr = ", lr)
        batch_results = [(epoch, loss_meter.value()[0], lr,
                          str(val_cm.value()), str(confusion_matrix.value()),
                          str(test_cm.value()), val_accuracy, test_accuracy)
                         ]  #
        savingData += batch_results  #
        save_training_data(savingData, opt.traingData_file)  #
        # update learning rate
        # if loss_meter.value()[0] > previous_loss:
        lr = lr * opt.lr_decay
        # # 第二种降低学习率的方法:不会有moment等信息的丢失
        # for param_group in optimizer.param_groups:
        #     param_group['lr'] = lr

        if epoch == opt.max_epoch:
            return

        previous_loss = loss_meter.value()[0]
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_data) / opt.batch_size):

            # train model
            input = data
            target = label
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        prefix = 'checkpoints/'
        name = time.strftime(prefix + '%m%d_%H:%M:%S_' + str(epoch + 1) +
                             '.pth')
        if epoch == 0:
            model.save(name)
        if np.mod(epoch + 1, 10) == 0:
            model.save(name)
コード例 #28
0
def train(**kwargs):
    """根据命令行参数更新配置"""
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    """(1)step1:加载网络,若有预训练模型也加载"""
    model = getattr(models, opt.model)()
    """(2)step2:处理数据"""
    train_data = Ictal(opt.train_data_root, opt.model, train=True)  # 训练集
    val_data = Ictal(opt.train_data_root, opt.model, train=False)  # 验证集

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    """(3)step3:定义损失函数和优化器"""
    criterion = t.nn.CrossEntropyLoss()  # 交叉熵损失
    lr = opt.lr  # 学习率
    optimizer = t.optim.SGD(model.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵"""
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    start = time.time()
    """(5)开始训练"""
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in enumerate(train_dataloader):
            # 训练模型参数
            input = Variable(data)
            if opt.model == 'CNN_1d':
                input = input.permute(0, 2, 1)

            target = Variable(label)

            # 梯度清零
            optimizer.zero_grad()
            score = model(input)

            loss = criterion(score, target)
            loss.backward()  # 反向传播

            # 更新参数
            optimizer.step()

            # 更新统计指标及可视化
            loss_meter.add(loss.item())
            # print score.shape, target.shape
            confusion_matrix.add(score.detach(), target.detach())

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        model.save(epoch)
        """计算验证集上的指标及可视化"""
        val_cm, val_accuracy = val(model, val_dataloader, opt.model)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        tra_cm, tra_accuracy = val(model, train_dataloader, opt.model)

        print("epoch:", epoch, "loss:",
              loss_meter.value()[0], "val_accuracy:", val_accuracy,
              "tra_accuracy:", tra_accuracy)
        """如果损失不再下降,则降低学习率"""
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        previous_loss = loss_meter.value()[0]
    end = time.time()

    print(end - start)
コード例 #29
0
def train(args, config):
    vis = Visualizer()

    train_set = MNIST(data_path=config.train_data_path,
                      label_path=config.train_label_path,
                      config=config,
                      mode='train')
    valid_set = MNIST(data_path=config.train_data_path,
                      label_path=config.train_label_path,
                      config=config,
                      mode='valid')

    train_dataloader = DataLoader(train_set,
                                  config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    valid_dataloader = DataLoader(valid_set,
                                  config.batch_size,
                                  shuffle=False,
                                  num_workers=config.num_workers)

    model = getattr(network, args.model)().eval()
    if args.load_model_path:
        model.load(args.load_model_path)
    if args.use_gpu:
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)

    train_loss_meter, valid_loss_meter = meter.AverageValueMeter(
    ), meter.AverageValueMeter()
    train_confusion_matrix, valid_confusion_matrix = meter.ConfusionMeter(
        10), meter.ConfusionMeter(10)

    best_valid_loss = 1e5
    best_epoch = 0
    dist_to_best = 0

    time_begin = time.clock()

    for epoch in range(config.epoch):

        # train
        model.train()
        train_loss_meter.reset()
        train_confusion_matrix.reset()

        for _iter, (train_data, train_target) in enumerate(train_dataloader):

            if args.use_gpu:
                train_data = train_data.cuda()
                train_target = train_target.cuda()

            optimizer.zero_grad()
            train_logits, train_output = model(train_data)
            train_loss = criterion(train_logits, train_target)
            train_loss.backward()
            optimizer.step()

            train_loss_meter.add(train_loss.item())
            train_confusion_matrix.add(train_logits.data, train_target.data)

            if _iter % config.print_freq == 0:
                vis.plot('train_loss', train_loss_meter.value()[0])
        model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format(
            str(epoch))))

        # valid
        model.eval()
        valid_loss_meter.reset()
        valid_confusion_matrix.reset()

        for _iter, (valid_data, valid_target) in enumerate(valid_dataloader):

            if args.use_gpu:
                valid_data = valid_data.cuda()
                valid_target = valid_target.cuda()

            valid_logits, valid_output = model(valid_data)
            valid_loss = criterion(valid_logits, valid_target)

            valid_loss_meter.add(valid_loss.item())
            valid_confusion_matrix.add(valid_logits.detach().squeeze(),
                                       valid_target.type(t.LongTensor))

        valid_cm = valid_confusion_matrix.value()
        valid_accuracy = 100. * (valid_cm.diagonal().sum()) / (valid_cm.sum())

        vis.plot('valid_accuracy', valid_accuracy)

        vis.log(
            "epoch:{epoch}, train_loss:{train_loss}, train_cm:{train_cm}, valid_loss:{valid_loss}, valid_cm:{valid_cm}, valid_accuracy:{valid_accuracy}"
            .format(epoch=epoch,
                    train_loss=train_loss_meter.value()[0],
                    train_cm=str(train_confusion_matrix.value()),
                    valid_loss=valid_loss_meter.value()[0],
                    valid_cm=str(valid_cm),
                    valid_accuracy=valid_accuracy))
        print(
            "epoch:{epoch}, train_loss:{train_loss}, valid_loss:{valid_loss}, valid_accuracy:{valid_accuracy}"
            .format(epoch=epoch,
                    train_loss=train_loss_meter.value()[0],
                    valid_loss=valid_loss_meter.value()[0],
                    valid_accuracy=valid_accuracy))
        print("train_cm:\n{train_cm}\n\nvalid_cm:\n{valid_cm}".format(
            train_cm=str(train_confusion_matrix.value()),
            valid_cm=str(valid_cm),
        ))

        # early stop
        if valid_loss_meter.value()[0] < best_valid_loss:
            best_epoch = epoch
            best_valid_loss = valid_loss_meter.value()[0]
            dist_to_best = 0

        dist_to_best += 1
        if dist_to_best > 4:
            break

    model.save(path=os.path.join(args.ckpts_dir, 'model.pth'))
    vis.save()
    print("save model successfully")
    print("best epoch: ", best_epoch)
    print("best valid loss: ", best_valid_loss)
    time_end = time.clock()
    print('time cost: %.2f' % (time_end - time_begin))
コード例 #30
0
ファイル: main.py プロジェクト: swordzxz/Alexnet.pytorch
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()
        print("trian epoch: ", epoch)
        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # train model
            input = data.to(opt.device)
            target = label.to(opt.device)

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach())

            if (ii + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # # 进入debug模式
                # if os.path.exists(opt.debug_file):
                #     import ipdb;
                #     ipdb.set_trace()

        model.save()

        # validate and visualize
        print("start eval:")
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        vis.plot('lr', lr)
        previous_loss = loss_meter.value()[0]
コード例 #31
0
def train(**kwargs):
    '''
    训练
    '''
    # 根据命令行参数更新配置

    vis = Visualizer(opt.env)

    # step1: configure model(定义网络)
    # 将config里的model赋值给models,从而定义model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data(定义数据)
    # 加载训练集
    train_data = DogCat(opt.train_data_root, train=True)
    # 加载验证集
    val_data = DogCat(opt.train_data_root, train=False)

    # 使用dataloader加载数据
    # 加载训练集数据
    train_dataloader = DataLoader(
        train_data,
        opt.batch_size,
        # 打乱数据
        shuffle=True,
        num_workers=opt.num_workers)
    # 加载验证集数据
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer(定义损失函数和优化器)
    criterion = t.nn.CrossEntropyLoss()  # 分类问题使用交叉熵,优化器使用Adam
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters(计算重要指标,平滑处理之后的损失,还有混淆矩阵)
    # 计算所有meter的的平均值和标准差,统计一个ecoph中损失的平均值
    loss_meter = meter.AverageValueMeter()  # 损失值
    # 混淆矩阵 2表示2分类问题
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train(开始训练)
    for epoch in range(opt.max_epoch):

        # 清空仪表信息和混淆矩阵信息
        loss_meter.reset()
        confusion_matrix.reset()

        # 迭代训练集的加载器dataloader
        for ii, (data, label) in enumerate(train_dataloader):

            # train model (训练网络参数)
            # 输入为data
            input = Variable(data)
            # 输出target为label
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            # 优化器梯度清零
            optimizer.zero_grad()
            # 计算出输入的概率
            score = model(input)
            # 损失函数
            loss = criterion(score, target)
            # 反向传播,自动求梯度
            loss.backward()
            # 更新优化器的可学习参数
            optimizer.step()

            # meters update and visualize(更新统计指标,可视化各种指标)
            loss_meter.add(loss.data[0])
            '''
            confusionmeter 用来统计问题中的分类情况,比准确率的更加详细
            '''
            confusion_matrix.add(score.data, target.data)

            # 每print_freq次可视化loss
            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        # 保存模型
        model.save()

        # validate and visualize(计算验证集上的指标及可视化)
        # 验证集数据val_dataloader
        val_cm, val_accuracy = val(model,
                                   val_dataloader)  # 使用验证集计算准确率,val_cm混淆矩阵
        # 可视化准确率
        vis.plot('val_accuracy', val_accuracy)
        # 当前时刻的一些信息
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning (如果损失不下降,降低学习率)
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]