def run(): device = torch.device("cuda:0") # 实例化一个网络 input_channels = 1 * use_depth + 3 * use_rgb net = GGCNN2(1 * use_depth + 3 * use_rgb) net.load_state_dict( torch.load( os.path.join(net_path, '210910_1905/model0.915_epoch41_batch_8.pth'))) net = net.to(device) # 保存网络和训练参数信息 summary(net, (1 * use_depth + 3 * use_rgb, 300, 300)) f = open(os.path.join(save_folder, 'arch.txt'), 'w') sys.stdout = f summary(net, (input_channels, 300, 300)) sys.stdout = sys.__stdout__ f.close() with open(os.path.join(save_folder, 'params.txt'), 'w') as f: f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format( batch_size, batches_per_epoch, epochs, lr)) # 准备数据集 # 训练集 train_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=0.0, end=split, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) train_dataset = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) # 验证集 val_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=split, end=1.0, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) val_dataset = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=num_workers) # 设置优化器 optimizer = optim.Adam(net.parameters()) # 设置tensorboardX tb = SummaryWriter(log_dir=os.path.join(save_folder, net_desc)) # 开始主循环 # 添加模型图 test_img = torch.randn((batch_size, input_channels, 300, 300)) tb.add_graph(net, test_img.to(device)) logger.info('validating...') for i in range(5): validate_results = validate(net, device, val_dataset, batches_per_epoch=val_batches) # 载入模型并训练一定的epoch for epoch in range(epochs): train_results = train(epoch, net, device, train_dataset, optimizer, batches_per_epoch) # 添加总的loss到tb tb.add_scalar('loss/train_loss', train_results['loss'], epoch) # 添加各项的单独loss到tb for n, l in train_results['losses'].items(): tb.add_scalar('train_loss/' + n, l, epoch) logger.info('validating...') validate_results = validate(net, device, val_dataset, batches_per_epoch=val_batches) # 添加IOU到tb tb.add_scalar( 'loss/IOU', validate_results['correct'] / (validate_results['correct'] + validate_results['failed']), epoch) # 添加各项的单独loss到tb tb.add_scalar('loss/val_loss', validate_results['loss'], epoch) for n, l in validate_results['losses'].items(): tb.add_scalar('val_loss/' + n, l, epoch) torch.save( net.state_dict(), '{0}/model{1}_epoch{2}_batch_{3}.pth'.format( save_folder, str(validate_results['acc'])[0:5], epoch, batch_size))
def run(num_workers): #设置输出文件夹 out_dir = 'trained_models/' dt = datetime.datetime.now().strftime('%y%m%d_%H%M') save_folder = os.path.join(out_dir, dt) if not os.path.exists(save_folder): os.makedirs(save_folder) #获取设备 max_acc = 0.3 device = torch.device("cuda:0") #实例化一个网络 net = GGCNN2(4) net = net.to(device) # #保存网络和训练参数信息 # summary(net,(4,300,300)) # f = open(os.path.join(save_folder,'arch.txt'),'w') # sys.stdout = f # summary(net,(4,300,300)) # sys.stdout = sys.__stdout__ # f.close() # with open(os.path.join(save_folder,'params.txt'),'w') as f: # f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format(batch_size,batches_per_epoch,epochs,lr)) #准备数据集 #训练集 train_data = Cornell('../cornell', random_rotate=True, random_zoom=True, output_size=300) train_dataset = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) #验证集 val_data = Cornell('../cornell', random_rotate=True, random_zoom=True, output_size=300) val_dataset = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=True, num_workers=num_workers) #设置优化器 optimizer = optim.Adam(net.parameters()) #开始主循环 time3 = 0 for epoch in range(epochs): train_results, time2_1s, time3_2s, time1_3s, time2_3s, time3 = train( epoch, net, device, train_dataset, optimizer, batches_per_epoch, time3=time3) #logging.info('validating...') #validate_results = validate(net,device,val_dataset,batches_per_epoch/10,vis = True) #logging.info('{0}/model{1}_epoch{2}_batch_{3}'.format(save_folder,str(validate_results)[0:5],epoch,batch_size)) # if validate_results > max_acc: # max_acc = validate_results # torch.save(net,'{0}/model{1}_epoch{2}_batch_{3}'.format(save_folder,str(validate_results)[0:5],epoch,batch_size)) return time2_1s, time3_2s, time1_3s, time2_3s #调试
def run(): # 设置输出文件夹 home_dir = '11.add_POTO/trained_models' out_dir = '11.add_POTO/trained_models/Patch' dt = datetime.datetime.now().strftime('%y%m%d_%H%M') net_desc = '{}_tb'.format(dt) save_folder = os.path.join(out_dir, dt) if not os.path.exists(save_folder): os.makedirs(save_folder) logging.basicConfig(filename=os.path.join(save_folder, 'logger.log'), level=logging.INFO) logging.info( '\nVersion: Train Prob\nModel: GGCNN2 + filter\nValidate: IOU\nQuality map: position img\nPretrain: {}\nInfo: This version start to train the filter layer by add a prob loss. And you can choose whether to use or not to use the return of "get ground truth" function "prob" as the validate qulity map.And this version of code is tranfered from the model trained on the original ggcnn.\nNOTE:prob is supervised under the file **prob.png' .format(str(pretrain))) logging.info( '\nbatch_size:{0}\nlr:{1}\nuse_depth:{2}\nuse_rgb:{3}\nr_rotate:{4}\nr_zoom:{5}\ndataset:{6}\npretrain_net:{7}' .format(batch_size, lr, use_depth, use_rgb, r_rotate, r_zoom, dataset, pretrain_net_path)) device = torch.device("cuda:0") # 实例化一个网络 input_channels = 1 * use_depth + 3 * use_rgb # net = GGCNN2(input_channels) # net = net.to(device) net = GGCNN2(input_channels) # net.load_state_dict(torch.load(os.path.join(out_dir,'210814_1917/model0.959_epoch95_batch_8.pth'))) # # net.load_state_dict(torch.load(os.path.join(out_dir,'210819_0935/model0.955_epoch73_batch_8.pth'))) # 这个是加了prob的 net.load_state_dict(torch.load(os.path.join(home_dir, pretrain_net_path))) # 这个是加了poto的 # net.load_state_dict(torch.load(os.path.join(out_dir,'210825_1745/model0.939_epoch1_batch_8.pth'))) net = net.to(device) # 准备数据集 # 训练集 if dataset == 'ADJ': train_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=0.0, end=split, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300, load_from_npy=True, npy_path='train_ADJ.npy') train_dataset = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) # 验证集 val_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=split, end=1.0, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300, load_from_npy=True, npy_path='test_ADJ.npy') val_dataset = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=num_workers) else: train_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=0.0, end=split, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) train_dataset = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) # 验证集 val_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=split, end=1.0, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) val_dataset = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=num_workers) # 设置优化器 optimizer = optim.Adam(net.parameters()) logging.info('Start training') # 载入模型并训练一定的epoch for i in range(5): validate_results = validate(net, device, val_dataset, batches_per_epoch=val_batches) for epoch in range(100): # 先在没加patch loss的网络上验证5次 train_results = train(epoch, net, device, train_dataset, optimizer, batches_per_epoch) logging.info('Validating....') validate_results = validate(net, device, val_dataset, batches_per_epoch=val_batches) # print('正确正确') # print(validate_results['true_positive']/validate_results['correct']) # print('正确错误') # print(validate_results['true_negative']/validate_results['failed']) # print('正确精度') # print((validate_results['true_positive']+validate_results['false_negative'])/(validate_results['failed']+validate_results['correct'])) # if validate_results['acc'] > max_acc: # max_acc = validate_results['acc'] torch.save( net.state_dict(), '{0}/model{1}_epoch{2}_batch_{3}.pth'.format( save_folder, str(validate_results['acc'])[0:5], epoch, batch_size))
def run(): #设置输出文件夹 out_dir = 'trained_models/' dt = datetime.datetime.now().strftime('%y%m%d_%H%M') net_desc = '{}_tb'.format(dt) save_folder = os.path.join(out_dir, dt) if not os.path.exists(save_folder): os.makedirs(save_folder) #获取设备 max_acc = 0.3 device = torch.device("cuda:0") #实例化一个网络 input_channels = 1 * use_depth + 3 * use_rgb net = GGCNN2(input_channels) net = net.to(device) #保存网络和训练参数信息 summary(net, (1 * use_depth + 3 * use_rgb, 300, 300)) # f = open(os.path.join(save_folder,'arch.txt'),'w') # sys.stdout = f # summary(net,(4,300,300)) # sys.stdout = sys.__stdout__ # f.close() with open(os.path.join(save_folder, 'params.txt'), 'w') as f: f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format( batch_size, batches_per_epoch, epochs, lr)) #准备数据集 #训练集 #logging.info('开始构建数据集:{}'.format(time.ctime())) #train_data = Cornell('../cornell',include_rgb = use_rgb, start = 0.0,end = split,random_rotate = r_rotate,random_zoom = r_zoom,output_size=300) train_data = Jacquard('../jacquard', include_rgb=use_rgb, start=0.0, end=split, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) train_dataset = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) #验证集 #val_data = Cornell('../cornell',include_rgb = use_rgb, start = split,end = 1.0,random_rotate = r_rotate,random_zoom = r_zoom,output_size = 300) val_data = Jacquard('../jacquard', include_rgb=use_rgb, start=split, end=1.0, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) val_dataset = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=False, num_workers=num_workers) #设置优化器 optimizer = optim.Adam(net.parameters()) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.5,verbose = True) #设置tensorboardX tb = tensorboardX.SummaryWriter(os.path.join(save_folder, net_desc)) #开始主循环 for epoch in range(epochs): train_results = train(epoch, net, device, train_dataset, optimizer, batches_per_epoch) #scheduler.step() #添加总的loss到tb tb.add_scalar('loss/train_loss', train_results['loss'], epoch) #添加各项的单独loss到tb for n, l in train_results['losses'].items(): tb.add_scalar('train_loss/' + n, l, epoch) logging.info('validating...') validate_results = validate(net, device, val_dataset, batches_per_epoch=val_batches) tb.add_scalar( 'loss/IOU', validate_results['correct'] / (validate_results['correct'] + validate_results['failed']), epoch) tb.add_scalar('loss/val_loss', validate_results['loss'], epoch) for n, l in validate_results['losses'].items(): tb.add_scalar('val_loss/' + n, l, epoch) if validate_results['acc'] > max_acc: max_acc = validate_results['acc'] torch.save( net, '{0}/model{1}_epoch{2}_batch_{3}'.format( save_folder, str(validate_results['acc'])[0:5], epoch, batch_size)) return train_results, validate_results
def run(): # 设置输出文件夹 out_dir = '8.jacquard_code_origin/trained_models/' dt = datetime.datetime.now().strftime('%y%m%d_%H%M') net_desc = '{}_tb'.format(dt) save_folder = os.path.join(out_dir, dt) if not os.path.exists(save_folder): os.makedirs(save_folder) # 获取设备 device = torch.device("cuda:0") # 实例化一个网络 input_channels = 1 * use_depth + 3 * use_rgb net = GGCNN2(input_channels) # net.load_state_dict(torch.load(os.path.join(net_path,'210716_1054/model0.943_epoch96_batch_8.pth'))) net = net.to(device) # net_c = C_NET() # net_c = net_c.to(device) # 保存网络和训练参数信息 summary(net, (1 * use_depth + 3 * use_rgb, 300, 300)) f = open(os.path.join(save_folder, 'arch.txt'), 'w') sys.stdout = f summary(net, (input_channels, 300, 300)) sys.stdout = sys.__stdout__ f.close() with open(os.path.join(save_folder, 'params.txt'), 'w') as f: f.write('batch_size:{}\nbatches_per_epoch:{}\nepochs:{}\nlr:{}'.format( batch_size, batches_per_epoch, epochs, lr)) # 准备数据集 # 训练集 train_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=0.0, end=split, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) train_dataset = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) # 验证集 val_data = Jacquard('./jacquard', include_rgb=use_rgb, include_depth=use_depth, start=split, end=1.0, random_rotate=r_rotate, random_zoom=r_zoom, output_size=300) val_dataset = torch.utils.data.DataLoader(val_data, batch_size=1, shuffle=True, num_workers=num_workers) # 设置优化器 optimizer = torch.optim.Adam( [ { 'params': net.parameters() }, # {'params': net_c.parameters()} ], lr) # 设置tensorboardX tb = SummaryWriter(log_dir=os.path.join(save_folder, net_desc)) # 开始主循环 # 添加模型图 test_img = torch.randn((batch_size, input_channels, 300, 300)) tb.add_graph(net, test_img.to(device)) max_acc = 0.60 # 载入模型并训练一定的epoch for epoch in range(epochs): train_results = train(epoch, net, device, train_dataset, optimizer, batches_per_epoch) # 添加总的loss到tb tb.add_scalar('loss/train_loss', train_results['loss'], epoch) # 添加各项的单独loss到tb for n, l in train_results['losses'].items(): tb.add_scalar('train_loss/' + n, l, epoch) logging.info('validating...') validate_results = validate(net, device, val_dataset, batches_per_epoch=val_batches) # 添加IOU到tb tb.add_scalar( 'loss/IOU', validate_results['correct'] / (validate_results['correct'] + validate_results['failed']), epoch) # 添加各项的单独loss到tb tb.add_scalar('loss/val_loss', validate_results['loss'], epoch) for n, l in validate_results['losses'].items(): tb.add_scalar('val_loss/' + n, l, epoch) if validate_results['acc'] > max_acc: max_acc = validate_results['acc'] torch.save( net.state_dict(), '{0}/model{1}_epoch{2}_batch_{3}.pth'.format( save_folder, str(validate_results['acc'])[0:5], epoch, batch_size))