def test(img_path, data_size='single'): device = torch.device('cuda') if torch.cuda.is_available else torch.device( 'cpu') # Image input im = Image.open(img_path) im = np.array(im, dtype=np.float32) / 255 image = np.transpose(im, (2, 0, 1)) data = torch.from_numpy(image).unsqueeze(0) data = Variable(data).to(device) model = SegNet(opt, data.shape[1]) if opt.model_path: model.load_state_dict(torch.load(opt.model_path)) model = model.to(device) model.train() feats, output = model(data) output = output[0].permute(1, 2, 0).contiguous().view(-1, opt.nClass) feats = feats[0].permute(1, 2, 0).contiguous().view(-1, opt.nChannel) _, pred_clusters = torch.max(output, 1) pred_clusters = pred_clusters.data.cpu().numpy() # Post processing labels = np.unique(pred_clusters) counts = {} for i in pred_clusters: counts[i] = counts.get(i, 0) + 1 sorts = sorted(counts.items(), key=lambda x: x[1]) cache = {} cache[sorts[-1][0]] = 0 n = 1 for (num, _) in sorts[:-1]: cache[num] = n n += 1 label_colors = [[10, 10, 10], [0, 0, 255], [0, 255, 0], [255, 0, 0], [255, 255, 0], [0, 255, 255], [255, 0, 255]] im_target_rgb = np.array([label_colors[cache[c]] for c in pred_clusters]) im_target_rgb = im_target_rgb.reshape(im.shape).astype(np.uint8) # change path path = ".".join(img_path.split('/')[1].split('.')[:2]) #path = img_path.split('/')[1].split('.')[0] if data_size == 'single': cv2.imwrite("outputs_single/{}_out.png".format(path), im_target_rgb) elif data_size == 'all': cv2.imwrite("outputs_all/{}_out.png".format(path), im_target_rgb)
def test_autoencoder(epoch_plus, text, index): use_gpu = torch.cuda.is_available() ngpu = torch.cuda.device_count() device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") model = SegNet(3) if ngpu > 1: model = nn.DataParallel(model) if use_gpu: model = model.to(device, non_blocking=True) text = text.to(device, non_blocking=True) if epoch_plus > 0: model.load_state_dict(torch.load('./autoencoder_models_2/autoencoder_{}.pth'.format(epoch_plus))) model.eval() if use_gpu: text.to(device, non_blocking=True) predicted = model(text) predicted[predicted > 1.0] = 1.0 save_path1 = './results/text' save_path2 = './results/masked' if not os.path.exists(save_path1): os.mkdir(save_path1) if not os.path.exists(save_path2): os.mkdir(save_path2) binary_predicted = predicted.clone() binary_mask = predicted.clone() binary_predicted[binary_predicted > 0.0] = 1.0 binary_mask[binary_mask > 0.1] = 1.0 masked = text + binary_mask masked[masked > 1.0] = 1.0 trans = torchvision.transforms.ToPILImage() predicted = predicted.squeeze().cpu() masked = masked.squeeze().cpu() image = trans(predicted) image2 = trans(masked) image.save(os.path.join(save_path1, 'text_{}.png'.format(index))) image2.save(os.path.join(save_path2, 'masked_{}.png'.format(index))) del text del predicted del masked del binary_predicted
def test(args): cfg = load_cfg(args.cfg) weight_path = args.wts img_path = args.im_path segnet = SegNet().float().cuda() segnet.load_state_dict(torch.load(weight_path)) segnet.eval() im = cv2.imread(img_path).transpose(2, 0, 1) im = torch.tensor(im[np.newaxis, :], dtype=torch.float).cuda() out = segnet(im) out = out.detach().cpu().numpy().transpose(0, 2, 3, 1) out = np.argmax(out, axis=3).astype(np.uint8)[0] out = out[:, :, np.newaxis] out = out * 20 cv2.imshow('f**k', out) cv2.waitKey(0)
def predict_image(dir): use_gpu = torch.cuda.is_available() ngpu = torch.cuda.device_count() device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") image_to_tensor = torchvision.transforms.ToTensor() tensor_to_image = torchvision.transforms.ToPILImage() save_path = Path(dir).parent image = Image.open(dir).convert('RGB') image = image_to_tensor(image) c, w, h = image.shape image = torch.reshape(image, (1, c, w, h)) model = SegNet(3) if use_gpu: model = model.to(device, non_blocking=True) image = image.to(device, non_blocking=True) model.load_state_dict(torch.load('./models/model.pth', map_location=device)) model.eval() predicted = model(image) predicted[predicted > 1.0] = 1.0 binary_predicted = predicted.clone() binary_mask = predicted.clone() binary_predicted[binary_predicted > 0.0] = 1.0 binary_mask[binary_mask > 0.1] = 1.0 masked = image + binary_mask masked[masked > 1.0] = 1.0 predicted = predicted.squeeze().cpu() masked = masked.squeeze().cpu() image = tensor_to_image(predicted) image2 = tensor_to_image(masked) image.save(os.path.join(save_path, 'tmp_text.png')) image2.save(os.path.join(save_path, 'tmp_masked.png'))
batch_size=BATCH_SIZE, # mini-batch size shuffle=True, # Use shuffle num_workers=4) # number of cpu # If using cuda if CUDA: # Initialize SegNet model on gpu memory model = SegNet(input_channels=NUM_INPUT_CHANNELS, output_channels=NUM_OUTPUT_CHANNELS).cuda(GPU_ID) # Set target class values on gpu memory class_weights = 1.0 / val_dataset.get_class_probability().cuda(GPU_ID) # Set loss function on gpu memory criterion = torch.nn.CrossEntropyLoss( weight=class_weights).cuda(GPU_ID) else: # Initialize SegNet model on cpu memory model = SegNet(input_channels=NUM_INPUT_CHANNELS, output_channels=NUM_OUTPUT_CHANNELS) # Set target class values class_weights = 1.0 / val_dataset.get_class_probability() # Set loss function criterion = torch.nn.CrossEntropyLoss(weight=class_weights) # Load saved model model.load_state_dict(torch.load(SAVED_MODEL_PATH)) # Start to validation validate()
help="size of the batches") parser.add_argument("--img_size", type=int, default=512, help="size of each image dimension") parser.add_argument("--checkpoint_model", type=str, help="path to checkpoint model") opt = parser.parse_args() print(opt) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") os.makedirs("output", exist_ok=True) model = SegNet().to(device) model.load_state_dict(torch.load(opt.checkpoint_model)) model.eval() dataloader = DataLoader( ImageFolder(opt.image_folder, img_size=opt.img_size), batch_size=opt.batch_size, shuffle=False, num_workers=4, ) Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor print("\nPerforming object detection:") prev_time = time.time() for batch_i, (img_paths, input_imgs) in enumerate(dataloader):
def train_autoencoder(epoch_plus): writer = SummaryWriter(log_dir='./runs_autoencoder_2') num_epochs = 400 - epoch_plus lr = 0.001 bta1 = 0.9 bta2 = 0.999 weight_decay = 0.001 # model = autoencoder(nchannels=3, width=172, height=600) model = SegNet(3) if ngpu > 1: model = nn.DataParallel(model) if use_gpu: model = model.to(device, non_blocking=True) if epoch_plus > 0: model.load_state_dict( torch.load('./autoencoder_models_2/autoencoder_{}.pth'.format( epoch_plus))) criterion = nn.MSELoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(bta1, bta2), weight_decay=weight_decay) for epoch in range(num_epochs): degree = randint(-180, 180) transforms = torchvision.transforms.Compose([ torchvision.transforms.CenterCrop((172, 200)), torchvision.transforms.Resize((172, 200)), torchvision.transforms.RandomRotation((degree, degree)), torchvision.transforms.ToTensor() ]) dataloader = get_dataloader(data_dir, train=True, transform=transforms, batch_size=batch_size) model.train() epoch_losses = AverageMeter() with tqdm(total=(1000 - 1000 % batch_size)) as _tqdm: _tqdm.set_description('epoch: {}/{}'.format( epoch + 1 + epoch_plus, num_epochs + epoch_plus)) for data in dataloader: gt, text = data if use_gpu: gt, text = gt.to(device, non_blocking=True), text.to( device, non_blocking=True) predicted = model(text) # loss = criterion_bce(predicted, gt) + criterion_dice(predicted, gt) loss = criterion( predicted, gt - text ) # predicts extracted text in white, all others in black epoch_losses.update(loss.item(), len(gt)) optimizer.zero_grad() loss.backward() optimizer.step() _tqdm.set_postfix(loss='{:.6f}'.format(epoch_losses.avg)) _tqdm.update(len(gt)) save_path = './autoencoder_models_2' if not os.path.exists(save_path): os.mkdir(save_path) gt_text = gt - text predicted_mask = text + predicted torch.save( model.state_dict(), os.path.join(save_path, 'autoencoder_{}.pth'.format(epoch + 1 + epoch_plus))) writer.add_scalar('Loss', epoch_losses.avg, epoch + 1 + epoch_plus) writer.add_image('text/text_image_{}'.format(epoch + 1 + epoch_plus), text[0].squeeze(), epoch + 1 + epoch_plus) writer.add_image('gt/gt_image_{}'.format(epoch + 1 + epoch_plus), gt[0].squeeze(), epoch + 1 + epoch_plus) writer.add_image('gt_text/gt_image_{}'.format(epoch + 1 + epoch_plus), gt_text[0].squeeze(), epoch + 1 + epoch_plus) writer.add_image( 'predicted/predicted_image_{}'.format(epoch + 1 + epoch_plus), predicted_mask[0].squeeze(), epoch + 1 + epoch_plus) writer.add_image( 'predicted_text/predicted_image_{}'.format(epoch + 1 + epoch_plus), predicted[0].squeeze(), epoch + 1 + epoch_plus) writer.close()
CUDA = args.gpu is not None GPU_ID = args.gpu val_dataset = PascalVOCDataset(list_file=val_path, img_dir=img_dir, mask_dir=mask_dir) val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) if CUDA: model = SegNet(input_channels=NUM_INPUT_CHANNELS, output_channels=NUM_OUTPUT_CHANNELS).cuda(GPU_ID) class_weights = 1.0 / val_dataset.get_class_probability().cuda(GPU_ID) criterion = torch.nn.CrossEntropyLoss( weight=class_weights).cuda(GPU_ID) else: model = SegNet(input_channels=NUM_INPUT_CHANNELS, output_channels=NUM_OUTPUT_CHANNELS) class_weights = 1.0 / val_dataset.get_class_probability() criterion = torch.nn.CrossEntropyLoss(weight=class_weights) print('LOADING...') model.load_state_dict(torch.load(SAVED_MODEL_PATH, map_location='cpu')) validate()
parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--data_root', type=str, default='/data/pascal/VOCdevkit/VOC2012') parser.add_argument('--val_path', type=str, default='ImageSets/Segmentation/val.txt') parser.add_argument('--img_dir', type=str, default='JPEGImages') parser.add_argument('--mask_dir', type=str, default='SegmentationClass') parser.add_argument('--model_path', type=str, required=True) args = parser.parse_args() print(args) data_root = args.data_root val_dir = os.path.join(data_root, args.val_path) img_dir = os.path.join(data_root, args.img_dir) mask_dir = os.path.join(data_root, args.mask_dir) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = SegNet().to(device) model.load_state_dict(torch.load(args.model_path)) val_loss, val_score, val_class_iou = validate( model=model, val_path=val_dir, img_path=img_dir, mask_path=mask_dir, batch_size=args.batch_size ) for k, v in val_score.items(): print(k, v) for k, v in val_class_iou.items(): print(k, v) print('\nval_loss:'+val_loss)
img_size=512, is_transform=True) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) model = SegNet().to(device) class_weights = 1.0 / train_dataset.get_class_probability() print(class_weights) criterion = torch.nn.CrossEntropyLoss(weight=class_weights).to(device) # start from checkpoint if args.checkpoint: model.load_state_dict(torch.load(args.checkpoint)) optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM) # training is_better = True prev_loss = float('inf') epoch_loss = AverageMeter() logger.info(args) model.train() for epoch in range(args.epochs): t_start = time.time()
if CUDA: model = SegNet(input_channels=NUM_INPUT_CHANNELS, output_channels=NUM_OUTPUT_CHANNELS).cuda(GPU_ID) class_weights = 1.0 / train_dataset.get_class_probability().cuda( GPU_ID) criterion = torch.nn.CrossEntropyLoss( weight=class_weights).cuda(GPU_ID) else: print('MODEL') model = SegNet(input_channels=NUM_INPUT_CHANNELS, output_channels=NUM_OUTPUT_CHANNELS) model.init_vgg_weigts() print('STATE_DICT') class_weights = 1.0 / train_dataset.get_class_probability() print('class_weights', len(class_weights)) criterion = torch.nn.CrossEntropyLoss(weight=class_weights) if args.checkpoint: print('Loading Checkpoint') if GPU_ID is None: model.load_state_dict( torch.load(args.checkpoint, map_location='cpu')) else: model.load_state_dict(torch.load(args.checkpoint)) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) train()