def print_caption(model, device, dataset): transform = transforms.Compose( [ transforms.Resize((299, 299)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ] ) model.eval() test_img = transform(Image.open("child.jpg").convert("RGB")).unsqueeze(0) print(generate_caption(model, test_img.to(device), dataset.vocab))
def main(): if not os.path.exists(os.path.join(config.tensorboard_dir, config.phone)): os.makedirs(os.path.join(config.tensorboard_dir, config.phone)) if not os.path.exists(os.path.join(config.checkpoint_dir, config.phone)): os.makedirs(os.path.join(config.checkpoint_dir, config.phone)) device = torch.device('cuda:0' if config.use_cuda else 'cpu') models = WESPE(config).to(device) if config.load_iter != 0: load_checkpoints(models) if config.is_train: models.train() writer = SummaryWriter( logdir=os.path.join(config.tensorboard_dir, config.phone)) train(models, writer, device) else: models.eval() test(models, device)
def generate_caption(model, image, vocabulary, max_length=50): model.eval() # Initializing an empty list generated_caption = [] # Inference time so no grad is required with torch.no_grad(): # Defining the initial Input and the cell state x = model.encoder(image).unsqueeze(0) state = None for _ in range(max_length): # finding the hidden and cell states hidden, state = model.decoder.lstm(x, state) # applying the linear layer on the hidden state to get the output distribution output = model.decoder.linear(hidden.squeeze(0)) # find out the word with the highest probability predicted = output.argmax(1) # appending the index of the word in our generated_caption list generated_caption.append(predicted.cpu().detach().numpy().tolist()) # setting the input for the next iteration x = model.decoder.embedding(predicted).unsqueeze(0) # if our model predicts End of Sequence then we just stop if vocabulary.itos[predicted.item()] == "<EOS>": break # return generated_caption # we convert the indices to the words caption = [] for i in range(len(generated_caption)): idx = int(generated_caption[i][0]) caption.append(vocabulary.itos[idx]) return caption
def test(model): img_list=[] tag_list=[] seg_list=[] with torch.no_grad(): model.to(device) model.eval() # img,tag=next(iter(test_loader)) for img,tag in test_loader: img=img.to(device) img_list.append(img) tag_list.append(tag) output=model(img) label=output.argmax(dim=1) tmp=label.cpu() img_final=torch.from_numpy(label2image(tmp)) seg_list.append(img_final) img=torch.cat(img_list,dim=0) tag=torch.cat(tag_list,dim=0) seg=torch.cat(seg_list,dim=0) score=iou(seg,tag) pic_pred(img[0:4].numpy(),tag[0:4].numpy(),seg[0:4].numpy())
loss = criterion(output_pred, target_output) loss.backward() optimizer.step() topv, topi = output_pred.topk(1) check_target = (topi.reshape(-1) == target_output) ACC += check_target.float().sum() if batchHelper_train._index_in_epoch % batch_size == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.9f} current ACC {} '\ .format(y, batchHelper_train._index_in_epoch, totalSample_train,\ (batchHelper_train._index_in_epoch * 100.) / totalSample_train, \ loss.item(),(check_target.float().sum()/batch_size)*100)) ############# validation ##################### batchHelper_val.resetIndex() models.eval() ACC = 0 coutBatch = 0 while batchHelper_val._epochs_completed == 0: input_image, labelImage = batchHelper_val.next_batch( batch_size, True) inputImageDataset = torch.from_numpy(input_image) inputImageDataset = inputImageDataset.to(device=device, dtype=torch.float) target_output = torch.from_numpy(labelImage.astype(int)) target_output = target_output.to(device=device, dtype=torch.long) output_pred = models(inputImageDataset) topv, topi = output_pred.topk(1) check_target = (topi.reshape(-1) == target_output) ACC += check_target.float().sum()
def _freeze_bn(self): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.eval()
def test_visualisation(model_path, models, test_images_paths, test_dataset, mode ,device): ''' Test and save sample images with bounding boxes and the labels Inputs: - model_path : string( path to the model ) - models : weights (model weights) - test_images_paths : string ( model path ) - test_dataset : Dataset (testing dataset) - device : device (cuda) Outputs: saved images ''' import random label_dict= { 1: 'Car', 2 : 'Van' , 3 : 'Truck', 0 : 'Background'} directory = os.path.join('results', 'sample_bbox') def save_image(image,directory, filename): if not os.path.exists(directory): os.makedirs(directory) img_file = os.path.join(directory, filename) image.save(img_file) print(f'Loading model to score images. Scores saved {model_path}') # Testing the model mean_test_accuracy = [] model_file = torch.load(model_path) models.load_state_dict(model_file) print('Model loaded') models.to(device) models.eval() with torch.no_grad(): for path in test_images_paths: with tqdm(total = len(test_dataset)) as bar: for image in test_dataset.images: img = os.path.join(path, image) l_img, pic_image = load_image(img) output = models(l_img) im_show = l_img.permute(2,0,3,1) im_show = im_show.squeeze(1) labels = output[0]['labels'] scores = output[0]['scores'] mean_score = scores.mean() if torch.isnan(mean_score).any(): continue # print(f'This is the mean score : {mean_score}') rect = output[0]['boxes'] if rect.nelement() != 0: i = 0 int_rects = rect.int().cpu().numpy() labels = labels.int().cpu().numpy() scores = scores.float().cpu().numpy() for int_rect, label, score in zip(int_rects, labels, scores): # print(label_dict[label], score) if score >= 0.5: r = random.randint(20,255) g = random.randint(20,255) b = random.randint(20,255) rgb = (r,g,b) x0,y0 ,x1,y1 = int_rect img1 = ImageDraw.Draw( pic_image ) font = ImageFont.truetype("bevan.ttf", 20) # img1.text([x0,y0,x1,y1+10], label, fill=(255,255,0)) img1.text((0,0+i),f'{label_dict[label]} {score} ', rgb,font=font) img1.rectangle([x0,y0 ,x1,y1], outline = rgb, width = 3) # Draw the text on the i += 20 else: continue save_image(pic_image, os.path.join(directory, str(mode)), f'{image[:-4]}_samplebbox.png') mean_score = mean_score.float().cpu().numpy() mean_test_accuracy.append(mean_score) bar.update() print('FINISHED TESTING')