Esempio n. 1
0
def run():
    device = torch.device("cuda:0")

    # 实例化一个网络
    input_channels = 1 * use_depth + 3 * use_rgb

    net = GGCNN2(1 * use_depth + 3 * use_rgb)
    net.load_state_dict(
        torch.load(
            os.path.join(net_path,
                         '210910_1905/model0.915_epoch41_batch_8.pth')))
    net = net.to(device)
    # 保存网络和训练参数信息
    summary(net, (1 * use_depth + 3 * use_rgb, 300, 300))
    f = open(os.path.join(save_folder, 'arch.txt'), 'w')
    sys.stdout = f
    summary(net, (input_channels, 300, 300))
    sys.stdout = sys.__stdout__
    f.close()
    with open(os.path.join(save_folder, 'params.txt'), 'w') as f:
        f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format(
            batch_size, batches_per_epoch, epochs, lr))
    # 准备数据集
    # 训练集
    train_data = Jacquard('./jacquard',
                          include_rgb=use_rgb,
                          include_depth=use_depth,
                          start=0.0,
                          end=split,
                          random_rotate=r_rotate,
                          random_zoom=r_zoom,
                          output_size=300)
    train_dataset = torch.utils.data.DataLoader(train_data,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=num_workers)
    # 验证集
    val_data = Jacquard('./jacquard',
                        include_rgb=use_rgb,
                        include_depth=use_depth,
                        start=split,
                        end=1.0,
                        random_rotate=r_rotate,
                        random_zoom=r_zoom,
                        output_size=300)
    val_dataset = torch.utils.data.DataLoader(val_data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=num_workers)

    # 设置优化器
    optimizer = optim.Adam(net.parameters())
    # 设置tensorboardX
    tb = SummaryWriter(log_dir=os.path.join(save_folder, net_desc))
    # 开始主循环
    # 添加模型图
    test_img = torch.randn((batch_size, input_channels, 300, 300))
    tb.add_graph(net, test_img.to(device))

    logger.info('validating...')
    for i in range(5):
        validate_results = validate(net,
                                    device,
                                    val_dataset,
                                    batches_per_epoch=val_batches)

    # 载入模型并训练一定的epoch
    for epoch in range(epochs):
        train_results = train(epoch, net, device, train_dataset, optimizer,
                              batches_per_epoch)

        # 添加总的loss到tb
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        # 添加各项的单独loss到tb
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)

        logger.info('validating...')
        validate_results = validate(net,
                                    device,
                                    val_dataset,
                                    batches_per_epoch=val_batches)

        # 添加IOU到tb
        tb.add_scalar(
            'loss/IOU', validate_results['correct'] /
            (validate_results['correct'] + validate_results['failed']), epoch)
        # 添加各项的单独loss到tb
        tb.add_scalar('loss/val_loss', validate_results['loss'], epoch)
        for n, l in validate_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)

        torch.save(
            net.state_dict(), '{0}/model{1}_epoch{2}_batch_{3}.pth'.format(
                save_folder,
                str(validate_results['acc'])[0:5], epoch, batch_size))
def run(num_workers):

    #设置输出文件夹
    out_dir = 'trained_models/'
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')

    save_folder = os.path.join(out_dir, dt)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    #获取设备
    max_acc = 0.3
    device = torch.device("cuda:0")

    #实例化一个网络
    net = GGCNN2(4)
    net = net.to(device)

    # #保存网络和训练参数信息
    # summary(net,(4,300,300))
    # f = open(os.path.join(save_folder,'arch.txt'),'w')
    # sys.stdout = f
    # summary(net,(4,300,300))
    # sys.stdout = sys.__stdout__
    # f.close()
    # with open(os.path.join(save_folder,'params.txt'),'w') as f:
    #     f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format(batch_size,batches_per_epoch,epochs,lr))
    #准备数据集
    #训练集
    train_data = Cornell('../cornell',
                         random_rotate=True,
                         random_zoom=True,
                         output_size=300)
    train_dataset = torch.utils.data.DataLoader(train_data,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=num_workers)
    #验证集
    val_data = Cornell('../cornell',
                       random_rotate=True,
                       random_zoom=True,
                       output_size=300)
    val_dataset = torch.utils.data.DataLoader(val_data,
                                              batch_size=1,
                                              shuffle=True,
                                              num_workers=num_workers)
    #设置优化器
    optimizer = optim.Adam(net.parameters())

    #开始主循环
    time3 = 0
    for epoch in range(epochs):
        train_results, time2_1s, time3_2s, time1_3s, time2_3s, time3 = train(
            epoch,
            net,
            device,
            train_dataset,
            optimizer,
            batches_per_epoch,
            time3=time3)
        #logging.info('validating...')
        #validate_results = validate(net,device,val_dataset,batches_per_epoch/10,vis = True)
        #logging.info('{0}/model{1}_epoch{2}_batch_{3}'.format(save_folder,str(validate_results)[0:5],epoch,batch_size))
        # if validate_results > max_acc:
        #     max_acc = validate_results
        #     torch.save(net,'{0}/model{1}_epoch{2}_batch_{3}'.format(save_folder,str(validate_results)[0:5],epoch,batch_size))
    return time2_1s, time3_2s, time1_3s, time2_3s  #调试
def run():
    # 设置输出文件夹
    home_dir = '11.add_POTO/trained_models'
    out_dir = '11.add_POTO/trained_models/Patch'
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    net_desc = '{}_tb'.format(dt)

    save_folder = os.path.join(out_dir, dt)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    logging.basicConfig(filename=os.path.join(save_folder, 'logger.log'),
                        level=logging.INFO)

    logging.info(
        '\nVersion: Train Prob\nModel: GGCNN2 + filter\nValidate: IOU\nQuality map: position img\nPretrain: {}\nInfo: This version start to train the filter layer by add a prob loss. And you can choose whether to use or not to use the return of "get ground truth" function "prob" as the validate qulity map.And this version of code is tranfered from the model trained on the original ggcnn.\nNOTE:prob is supervised under the file **prob.png'
        .format(str(pretrain)))

    logging.info(
        '\nbatch_size:{0}\nlr:{1}\nuse_depth:{2}\nuse_rgb:{3}\nr_rotate:{4}\nr_zoom:{5}\ndataset:{6}\npretrain_net:{7}'
        .format(batch_size, lr, use_depth, use_rgb, r_rotate, r_zoom, dataset,
                pretrain_net_path))

    device = torch.device("cuda:0")

    # 实例化一个网络
    input_channels = 1 * use_depth + 3 * use_rgb
    # net = GGCNN2(input_channels)
    # net = net.to(device)

    net = GGCNN2(input_channels)
    # net.load_state_dict(torch.load(os.path.join(out_dir,'210814_1917/model0.959_epoch95_batch_8.pth')))
    #
    # net.load_state_dict(torch.load(os.path.join(out_dir,'210819_0935/model0.955_epoch73_batch_8.pth')))
    # 这个是加了prob的
    net.load_state_dict(torch.load(os.path.join(home_dir, pretrain_net_path)))
    # 这个是加了poto的
    # net.load_state_dict(torch.load(os.path.join(out_dir,'210825_1745/model0.939_epoch1_batch_8.pth')))
    net = net.to(device)
    # 准备数据集
    # 训练集
    if dataset == 'ADJ':
        train_data = Jacquard('./jacquard',
                              include_rgb=use_rgb,
                              include_depth=use_depth,
                              start=0.0,
                              end=split,
                              random_rotate=r_rotate,
                              random_zoom=r_zoom,
                              output_size=300,
                              load_from_npy=True,
                              npy_path='train_ADJ.npy')
        train_dataset = torch.utils.data.DataLoader(train_data,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=num_workers)
        # 验证集
        val_data = Jacquard('./jacquard',
                            include_rgb=use_rgb,
                            include_depth=use_depth,
                            start=split,
                            end=1.0,
                            random_rotate=r_rotate,
                            random_zoom=r_zoom,
                            output_size=300,
                            load_from_npy=True,
                            npy_path='test_ADJ.npy')
        val_dataset = torch.utils.data.DataLoader(val_data,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=num_workers)
    else:
        train_data = Jacquard('./jacquard',
                              include_rgb=use_rgb,
                              include_depth=use_depth,
                              start=0.0,
                              end=split,
                              random_rotate=r_rotate,
                              random_zoom=r_zoom,
                              output_size=300)
        train_dataset = torch.utils.data.DataLoader(train_data,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=num_workers)
        # 验证集
        val_data = Jacquard('./jacquard',
                            include_rgb=use_rgb,
                            include_depth=use_depth,
                            start=split,
                            end=1.0,
                            random_rotate=r_rotate,
                            random_zoom=r_zoom,
                            output_size=300)
        val_dataset = torch.utils.data.DataLoader(val_data,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=num_workers)

    # 设置优化器
    optimizer = optim.Adam(net.parameters())
    logging.info('Start training')
    # 载入模型并训练一定的epoch
    for i in range(5):
        validate_results = validate(net,
                                    device,
                                    val_dataset,
                                    batches_per_epoch=val_batches)

    for epoch in range(100):
        # 先在没加patch loss的网络上验证5次
        train_results = train(epoch, net, device, train_dataset, optimizer,
                              batches_per_epoch)

        logging.info('Validating....')
        validate_results = validate(net,
                                    device,
                                    val_dataset,
                                    batches_per_epoch=val_batches)
        # print('正确正确')
        # print(validate_results['true_positive']/validate_results['correct'])
        # print('正确错误')
        # print(validate_results['true_negative']/validate_results['failed'])
        # print('正确精度')
        # print((validate_results['true_positive']+validate_results['false_negative'])/(validate_results['failed']+validate_results['correct']))
        # if validate_results['acc'] > max_acc:
        #     max_acc = validate_results['acc']
        torch.save(
            net.state_dict(), '{0}/model{1}_epoch{2}_batch_{3}.pth'.format(
                save_folder,
                str(validate_results['acc'])[0:5], epoch, batch_size))
Esempio n. 4
0
def run():
    #设置输出文件夹
    out_dir = 'trained_models/'
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    net_desc = '{}_tb'.format(dt)

    save_folder = os.path.join(out_dir, dt)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    #获取设备
    max_acc = 0.3
    device = torch.device("cuda:0")

    #实例化一个网络
    input_channels = 1 * use_depth + 3 * use_rgb
    net = GGCNN2(input_channels)
    net = net.to(device)

    #保存网络和训练参数信息
    summary(net, (1 * use_depth + 3 * use_rgb, 300, 300))
    # f = open(os.path.join(save_folder,'arch.txt'),'w')
    # sys.stdout = f
    # summary(net,(4,300,300))
    # sys.stdout = sys.__stdout__
    # f.close()
    with open(os.path.join(save_folder, 'params.txt'), 'w') as f:
        f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format(
            batch_size, batches_per_epoch, epochs, lr))
    #准备数据集
    #训练集
    #logging.info('开始构建数据集:{}'.format(time.ctime()))
    #train_data = Cornell('../cornell',include_rgb = use_rgb, start = 0.0,end = split,random_rotate = r_rotate,random_zoom = r_zoom,output_size=300)
    train_data = Jacquard('../jacquard',
                          include_rgb=use_rgb,
                          start=0.0,
                          end=split,
                          random_rotate=r_rotate,
                          random_zoom=r_zoom,
                          output_size=300)
    train_dataset = torch.utils.data.DataLoader(train_data,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=num_workers)
    #验证集
    #val_data = Cornell('../cornell',include_rgb = use_rgb, start = split,end = 1.0,random_rotate = r_rotate,random_zoom = r_zoom,output_size = 300)
    val_data = Jacquard('../jacquard',
                        include_rgb=use_rgb,
                        start=split,
                        end=1.0,
                        random_rotate=r_rotate,
                        random_zoom=r_zoom,
                        output_size=300)
    val_dataset = torch.utils.data.DataLoader(val_data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=num_workers)

    #设置优化器
    optimizer = optim.Adam(net.parameters())
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.5,verbose = True)
    #设置tensorboardX
    tb = tensorboardX.SummaryWriter(os.path.join(save_folder, net_desc))
    #开始主循环
    for epoch in range(epochs):
        train_results = train(epoch, net, device, train_dataset, optimizer,
                              batches_per_epoch)
        #scheduler.step()
        #添加总的loss到tb
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        #添加各项的单独loss到tb
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)
        logging.info('validating...')
        validate_results = validate(net,
                                    device,
                                    val_dataset,
                                    batches_per_epoch=val_batches)
        tb.add_scalar(
            'loss/IOU', validate_results['correct'] /
            (validate_results['correct'] + validate_results['failed']), epoch)
        tb.add_scalar('loss/val_loss', validate_results['loss'], epoch)
        for n, l in validate_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)
        if validate_results['acc'] > max_acc:
            max_acc = validate_results['acc']
            torch.save(
                net, '{0}/model{1}_epoch{2}_batch_{3}'.format(
                    save_folder,
                    str(validate_results['acc'])[0:5], epoch, batch_size))
    return train_results, validate_results
def run():
    # 设置输出文件夹
    out_dir = '8.jacquard_code_origin/trained_models/'
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    net_desc = '{}_tb'.format(dt)

    save_folder = os.path.join(out_dir, dt)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    # 获取设备
    device = torch.device("cuda:0")

    # 实例化一个网络
    input_channels = 1 * use_depth + 3 * use_rgb
    net = GGCNN2(input_channels)
    # net.load_state_dict(torch.load(os.path.join(net_path,'210716_1054/model0.943_epoch96_batch_8.pth')))
    net = net.to(device)

    # net_c = C_NET()

    # net_c = net_c.to(device)
    # 保存网络和训练参数信息
    summary(net, (1 * use_depth + 3 * use_rgb, 300, 300))
    f = open(os.path.join(save_folder, 'arch.txt'), 'w')
    sys.stdout = f
    summary(net, (input_channels, 300, 300))
    sys.stdout = sys.__stdout__
    f.close()
    with open(os.path.join(save_folder, 'params.txt'), 'w') as f:
        f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format(
            batch_size, batches_per_epoch, epochs, lr))
    # 准备数据集
    # 训练集
    train_data = Jacquard('./jacquard',
                          include_rgb=use_rgb,
                          include_depth=use_depth,
                          start=0.0,
                          end=split,
                          random_rotate=r_rotate,
                          random_zoom=r_zoom,
                          output_size=300)
    train_dataset = torch.utils.data.DataLoader(train_data,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                num_workers=num_workers)
    # 验证集
    val_data = Jacquard('./jacquard',
                        include_rgb=use_rgb,
                        include_depth=use_depth,
                        start=split,
                        end=1.0,
                        random_rotate=r_rotate,
                        random_zoom=r_zoom,
                        output_size=300)
    val_dataset = torch.utils.data.DataLoader(val_data,
                                              batch_size=1,
                                              shuffle=True,
                                              num_workers=num_workers)

    # 设置优化器
    optimizer = torch.optim.Adam(
        [
            {
                'params': net.parameters()
            },
            # {'params': net_c.parameters()}
        ],
        lr)

    # 设置tensorboardX
    tb = SummaryWriter(log_dir=os.path.join(save_folder, net_desc))
    # 开始主循环
    # 添加模型图
    test_img = torch.randn((batch_size, input_channels, 300, 300))
    tb.add_graph(net, test_img.to(device))

    max_acc = 0.60

    # 载入模型并训练一定的epoch
    for epoch in range(epochs):
        train_results = train(epoch, net, device, train_dataset, optimizer,
                              batches_per_epoch)

        # 添加总的loss到tb
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        # 添加各项的单独loss到tb
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)

        logging.info('validating...')
        validate_results = validate(net,
                                    device,
                                    val_dataset,
                                    batches_per_epoch=val_batches)

        # 添加IOU到tb
        tb.add_scalar(
            'loss/IOU', validate_results['correct'] /
            (validate_results['correct'] + validate_results['failed']), epoch)
        # 添加各项的单独loss到tb
        tb.add_scalar('loss/val_loss', validate_results['loss'], epoch)
        for n, l in validate_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)

        if validate_results['acc'] > max_acc:
            max_acc = validate_results['acc']
            torch.save(
                net.state_dict(), '{0}/model{1}_epoch{2}_batch_{3}.pth'.format(
                    save_folder,
                    str(validate_results['acc'])[0:5], epoch, batch_size))