def main(class_num, pre_trained, train_data, batch_size, momentum, lr, cate_weight, epoch, weights): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = PSPNet(num_classes=class_num, downsample_factor=16, pretrained=True, aux_branch=False) model = model.to(device) train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum) loss_func = nn.CrossEntropyLoss(weight=torch.from_numpy(np.array(cate_weight)).float()).cuda() model.train() for i in range(epoch): for step, (b_x, b_y) in enumerate(train_loader): b_x = b_x.to(device) b_y = b_y.to(device) b_y = b_y.view(-1, 473, 473) output = model(b_x) loss = loss_func(output, b_y.long()) loss = loss.to(device) optimizer.zero_grad() loss.backward() optimizer.step() if step % 1 == 0: print("Epoch:{0} || Step:{1} || Loss:{2}".format(i, step, format(loss, ".4f"))) torch.save(model.state_dict(), weights + "PSPNet_weights" + ".pth")
def main(): # Step 0: preparation #place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(): # Step 1: Define training dataloader image_folder="work/dummy_data" image_list_file="work/dummy_data/list.txt" transform = TrainAugmentation(224) data = BasicDataLoader(image_folder,image_list_file,transform=transform) #TODO: create dataloader train_dataloader = fluid.io.DataLoader.from_generator(capacity=2,return_list=True) train_dataloader.set_sample_generator(data,args.batch_size) total_batch = len(data)//args.batch_size # Step 2: Create model if args.net == "basic": #TODO: create basicmodel model = PSPNet() else: raise NotImplementedError(f"args.net: {args.net} is not Supported!") # Step 3: Define criterion and optimizer criterion = Basic_SegLoss # create optimizer optimizer = AdamOptimizer(learning_rate=args.lr,parameter_list=model.parameters()) # Step 4: Training for epoch in range(1, args.num_epochs+1): train_loss = train(train_dataloader, model, criterion, optimizer, epoch, total_batch) print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}") if epoch % args.save_freq == 0 or epoch == args.num_epochs: model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}") # TODO: save model and optmizer states model_dict = model.state_dict() fluid.save_dygraph(model_dict,model_path) optim_dict = optimizer.state_dict() fluid.save_dygraph(optim_dict,model_path) print(f'----- Save model: {model_path}.pdparams') print(f'----- Save optimizer: {model_path}.pdopt')
return confusion_matrix if __name__ == "__main__": IMG_SIZE = 128 print ("=> the training size is {}".format(IMG_SIZE)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) train_loader = DataLoader(TinySegData(img_size=IMG_SIZE, phase='train'), batch_size=32, shuffle=True, num_workers=8) #val_loader = DataLoader(TinySegData(phase='val'), batch_size=1, shuffle=False, num_workers=0) model = PSPNet(n_classes=6, pretrained=True).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) criterion = torch.nn.CrossEntropyLoss() mkdirs = lambda x: os.makedirs(x, exist_ok=True) # model.load_state_dict(torch.load("ckpt_seg/epoch_79_iou0.88.pth")) ckpt_dir = "ckpt_seg" mkdirs(ckpt_dir) epoch = 80 for i in range(0, epoch): # train model.train() epoch_iou = [] epoch_start = time.time()
def main(): global args, best_record args = parser.parse_args() if args.augment: transform_train = joint_transforms.Compose([ joint_transforms.FreeScale((512, 512)), joint_transforms.RandomHorizontallyFlip(), joint_transforms.RandomVerticallyFlip(), joint_transforms.Rotate(90), ]) transform_val = joint_transforms.Compose( [joint_transforms.FreeScale((512, 512))]) else: transform_train = None dataset_train = dataset.PRCVData('train', args.data_root, args.label_train_list, transform_train) dataloader_train = data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=8) dataset_val = dataset.PRCVData('val', args.data_root, args.label_val_list, transform_val) dataloader_val = data.DataLoader(dataset_val, batch_size=args.batch_size, shuffle=None, num_workers=8) model = PSPNet(num_classes=args.num_class) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) saved_state_dict = torch.load(args.restore_from) new_params = model.state_dict().copy() if args.num_class != 21: for i in saved_state_dict: #Scale.layer5.conv2d_list.3.weight i_parts = i.split('.') # print i_parts if i_parts[0] != 'fc': new_params[i] = saved_state_dict[i] model.load_state_dict(new_params) model = model.cuda() model = torch.nn.DataParallel(model) cudnn.benchmark = True # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # define loss function (criterion) and pptimizer criterion = torch.nn.CrossEntropyLoss(ignore_index=255).cuda() optimizer = torch.optim.SGD([{ 'params': get_1x_lr_params(model), 'lr': args.learning_rate }, { 'params': get_10x_lr_params(model), 'lr': 10 * args.learning_rate }], lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(dataloader_train, model, criterion, optimizer, epoch) # evaluate on validation set acc, mean_iou, val_loss = validate(dataloader_val, model, criterion, args.result_pth, epoch) is_best = mean_iou > best_record['miou'] if is_best: best_record['epoch'] = epoch best_record['val_loss'] = val_loss.avg best_record['acc'] = acc best_record['miou'] = mean_iou save_checkpoint( { 'epoch': epoch + 1, 'val_loss': val_loss.avg, 'accuracy': acc, 'miou': mean_iou, 'state_dict': model.state_dict(), }, is_best) print( '------------------------------------------------------------------------------------------------------' ) print('[epoch: %d], [val_loss: %5f], [acc: %.5f], [miou: %.5f]' % (epoch, val_loss.avg, acc, mean_iou)) print( 'best record: [epoch: {epoch}], [val_loss: {val_loss:.5f}], [acc: {acc:.5f}], [miou: {miou:.5f}]' .format(**best_record)) print( '------------------------------------------------------------------------------------------------------' )
def train(): print(torch.cuda.device_count()) # os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1' net = PSPNet(n_classes=1, sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34', pretrained=False) net = nn.DataParallel(net) net = net.cuda() weight = torch.ones(3) weight[0] = 0 # print(net) optimizer = Adam(net.parameters(), lr=1e-3) # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() loaders = prepared_train_data() test_loaders = prepared_test_data() # print(len(loaders)) # print(loaders) for epoch in range(1, 100): print('Training................') epoch_loss = [] iteration = 0 net.train(mode=True) for step, sample_batch in enumerate(loaders): # print("Iter:"+str(iteration)) iteration = iteration + 1 images = sample_batch['image'].cuda() # masks = torch.transpose(sample_batch['mask'], 1, 3) masks = sample_batch['mask'].cuda() # print(images.size()) # print(masks.size()) inputs = Variable(images) targets = Variable(masks) # print(targets.size()) outputs = net(inputs) outputs = torch.clamp(outputs, 0., 255.) # results = outputs.cpu().data.numpy() # # # print(np.shape(results)) # # map = np.squeeze(results, axis=[0, 1]) # # misc.imsave('./test_images/test_image_' + str(iteration) + '.png', map) # print(outputs) # print(outputs) # print(outputs.size()) optimizer.zero_grad() loss = criterion(outputs, targets) loss.backward() optimizer.step() epoch_loss.append(loss.data[0]) # if iteration % 10 == 0: # print("Epoch:{}, iteration:{}, loss:{}".format(epoch, # step, # loss)) # if iteration % 10 == 0: # results = outputs.cpu().data.numpy() # # print(np.shape(results)) # map = np.squeeze(results, axis=1) # # print(np.shape(map)) # # map = np.transpose(map, [0, 2, 3, 1]) # # misc.imsave('./train_image/test_image'+str(iteration)+'.png', map[0, :, :]) # if iteration % 400 == 0: # torch.save(net, 'Models/modol-'+str(epoch)+'-'+str(iteration)+'.pth') # torch.save(net, 'Models/modol-' + str(epoch) + '.pth') print('Testing........................') net.train(mode=False) total_m1 = 0 disc = 44 for iteration, item in enumerate(test_loaders): images = item['image'].cuda() # masks = torch.transpose(sample_batch['mask'], 1, 3) masks = item['mask'].numpy() name = item['name'] masks = np.squeeze(masks, axis=0) test_image = Variable(images).cuda() predict_label = net(test_image) predict_label = torch.clamp(predict_label, 0., 255.) results = predict_label.cpu().data.numpy() map = np.squeeze(results, axis=[0, 1]) gt = np.zeros(shape=np.shape(masks)) gt[masks > 200] = 1 prediction = np.zeros(shape=np.shape(map)) prediction[map > disc] = 1 overlap = gt + prediction # print(overlap.max(), overlap.min()) # print(np.shape(overlap)) image_inter = np.zeros(shape=np.shape(overlap)) image_inter[overlap > 1] = 1 num_inter = sum(sum(image_inter)) # print(np.shape(num_inter)) image_union = np.zeros(shape=np.shape(overlap)) image_union[overlap > 0] = 1 num_union = sum(sum(image_union)) # print(np.shape(num_union)) m_1 = (1 - num_inter / num_union) print('Image name is {}, and m1 is {}'.format(name[0], m_1)) total_m1 = total_m1 + m_1 map[map > disc] = 255 # # misc.imsave('./test_images/test_image_' + str(iteration) + '.png', map) misc.imsave('./test_image/' + name[0], map) print('m1 is {}'.format(total_m1 / 200))
def main(): global args # 如果有多GPU 使用多GPU训练 if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") args.batch_size = args.batch_size * torch.cuda.device_count() else: print("Let's use", torch.cuda.current_device()) if args.resume: assert os.path.isfile(args.resume), \ "=> no checkpoint found at '{}'".format(args.resume) print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args = checkpoint['args'] start_epoch = checkpoint['epoch'] + 1 best_result = checkpoint['best_result'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] output_directory = os.path.dirname(os.path.abspath(args.resume)) print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch'])) train_loader, val_loader = create_data_loaders(args) args.resume = True else: train_loader, val_loader = create_data_loaders(args) print("=> creating Model ") model = PSPNet(n_classes=args.max_classes, sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34') print("=> model created.") start_epoch = 0 optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.decay) best_result = np.inf # create results folder, if not already exists output_directory = os.path.join(args.saved_path) if not os.path.exists(output_directory): os.makedirs(output_directory) log_path = os.path.join(args.log_path, "{}".format('vocaug')) if os.path.isdir(log_path): shutil.rmtree(log_path) os.makedirs(log_path) logger = SummaryWriter(log_path) # for multi-gpu training if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() # define loss function weights = torch.ones(args.max_classes) weights[0] = 0 seg_criterion = nn.NLLLoss2d(weight=weights.cuda()).cuda() cls_criterion = nn.BCEWithLogitsLoss(weight=weights.cuda()).cuda() # criterion = [seg_criterion, cls_criterion] is_best = False for epoch in range(start_epoch, args.epochs): loss = train(train_loader, model, seg_criterion, optimizer, epoch, logger) # train for one epoch if loss < best_result: best_result = loss is_best = True else: is_best = False utils.save_checkpoint( { 'args': args, 'epoch': epoch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, }, is_best, epoch, output_directory) if (epoch + 1) % 10 == 0: validate(val_loader, model, epoch, logger)
def train(epo_num=50, show_vgg_params=False): vis = visdom.Visdom() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') vgg_model = VGGNet(requires_grad=True, show_params=show_vgg_params) fcn_model = PSPNet(n_classes=2) fcn_model = fcn_model.to(device) criterion = nn.BCELoss().to(device) optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7) all_train_iter_loss = [] all_test_iter_loss = [] # start timing prev_time = datetime.now() for epo in range(epo_num): train_loss = 0 fcn_model.train() for index, (img, lidar, label, color_label) in enumerate(train_dataloader): img = img.to(device) lidar = lidar.to(device) label = label.to(device) optimizer.zero_grad() output, out_cls = fcn_model(img) output = torch.sigmoid( output) # output.shape is torch.Size([4, 2, 160, 160]) loss = criterion(output, label) loss.backward() iter_loss = loss.item() all_train_iter_loss.append(iter_loss) train_loss += iter_loss optimizer.step() output_np = output.cpu().detach().numpy().copy() output_np = np.argmin(output_np, axis=1) bag_msk_np = label.cpu().detach().numpy().copy() bag_msk_np = np.argmin(bag_msk_np, axis=1) if np.mod(index, 15) == 0: print('epoch {}, {}/{},train loss is {}'.format( epo, index, len(train_dataloader), iter_loss)) vis.images(output_np[:, None, :, :], win='train_pred', opts=dict(title='train prediction')) vis.images(bag_msk_np[:, None, :, :], win='train_label', opts=dict(title='label')) vis.line(all_train_iter_loss, win='train_iter_loss', opts=dict(title='train iter loss')) test_loss = 0 fcn_model.eval() for index, (img, lidar, label, color_label) in enumerate(test_dataloader): img = img.to(device) lidar = lidar.to(device) label = label.to(device) with torch.no_grad(): optimizer.zero_grad() output, out_cls = fcn_model(img) output = torch.sigmoid(output) loss = criterion(output, label) iter_loss = loss.item() all_test_iter_loss.append(iter_loss) test_loss += iter_loss output_np = output.cpu().detach().numpy().copy() output_np = np.argmin(output_np, axis=1) bag_msk_np = label.cpu().detach().numpy().copy() bag_msk_np = np.argmin(bag_msk_np, axis=1) if np.mod(index, 15) == 0: print( r'Testing... Open http://localhost:8097/ to see test result.' ) vis.images(output_np[:, None, :, :], win='test_pred', opts=dict(title='test prediction')) vis.images(bag_msk_np[:, None, :, :], win='test_label', opts=dict(title='label')) vis.line(all_test_iter_loss, win='test_iter_loss', opts=dict(title='test iter loss')) cur_time = datetime.now() h, remainder = divmod((cur_time - prev_time).seconds, 3600) m, s = divmod(remainder, 60) time_str = "Time %02d:%02d:%02d" % (h, m, s) prev_time = cur_time print('epoch train loss = %f, epoch test loss = %f, %s' % (train_loss / len(train_dataloader), test_loss / len(test_dataloader), time_str)) if np.mod(epo, 5) == 0: torch.save(fcn_model, 'checkpoints/fcn_model_{}.pt'.format(epo)) print('saveing checkpoints/fcn_model_{}.pt'.format(epo))
if __name__ == '__main__': path = './keyboard.pth' #导入的模型文件必须与当前文件在同一目录下 # path='./seg_hand.pth' checkpoint = torch.load(path) if isinstance(checkpoint, dict) and 'state_dict' in checkpoint.keys(): checkpoint = checkpoint['state_dict'] if 'module' in list(checkpoint.keys())[0] and not isinstance( model, torch.nn.DataParallel): model = torch.nn.DataParallel(model) model.load_state_dict(checkpoint) model.to(device) model.eval() paramerters = sum(x.numel() for x in model.parameters()) #---model->51M print("models have {} M paramerters in total".format(paramerters / 1e6)) img_paths = './mask_imgs' img_paths = [ os.path.join(img_paths, x) for x in os.listdir(img_paths) if x.endswith('.png') ] # for img_path in img_paths: # if not os.path.basename(img_path)=='0000.png':continue # img=cv2.imread('./keyboard_images/0015.jpg') # print(img_path) # test_mask(img,img_path)