# -------------------------------------------------------------------------------------------- # BEFORE TRAINING STARTS # -------------------------------------------------------------------------------------------- # Tensorboard summary writer for logging losses and metrics. tensorboard_writer = SummaryWriter(logdir=_A.serialization_dir) # Checkpoint manager to serialize checkpoints periodically while training and keep track of # best performing checkpoint. checkpoint_manager = CheckpointManager(model, optimizer, _A.serialization_dir, mode="max") # Evaluator submits predictions to EvalAI and retrieves results. evaluator = NocapsEvaluator(phase="val") # Load checkpoint to resume training from there if specified. # Infer iteration number through file name (it's hacky but very simple), so don't rename # saved checkpoints if you intend to continue training. if _A.start_from_checkpoint != "": training_checkpoint: Dict[str, Any] = torch.load(_A.start_from_checkpoint, map_location={ 'cuda:0': 'cpu', 'cuda:1': 'cpu' }) for key in training_checkpoint: if key == "optimizer": # optimizer.load_state_dict(training_checkpoint[key]) continue
with torch.no_grad(): # shape: (batch_size, max_caption_length) batch_predictions = model(batch["image_features"])["predictions"] for i, image_id in enumerate(batch["image_id"]): instance_predictions = batch_predictions[i, :] # De-tokenize caption tokens and trim until first "@@BOUNDARY@@". caption = [vocabulary.get_token_from_index(p.item()) for p in instance_predictions] eos_occurences = [j for j in range(len(caption)) if caption[j] == "@@BOUNDARY@@"] caption = caption[: eos_occurences[0]] if len(eos_occurences) > 0 else caption predictions.append({"image_id": image_id.item(), "caption": " ".join(caption)}) # Print first 25 captions with their Image ID. for k in range(25): print(predictions[k]["image_id"], predictions[k]["caption"]) json.dump(predictions, open(_A.output_path, "w")) if _A.evalai_submit: evaluator = NocapsEvaluator("test" if "test" in _C.DATA.TEST_FEATURES else "val") evaluation_metrics = evaluator.evaluate(predictions) print(f"Evaluation metrics for checkpoint {_A.checkpoint_path}:") for metric_name in evaluation_metrics: print(f"\t{metric_name}:") for domain in evaluation_metrics[metric_name]: print(f"\t\t{domain}:", evaluation_metrics[metric_name][domain])
batch["penultimate_features"], fsm=batch.get("fsm", None), num_constraints=batch.get("num_constraints", None), )["predictions"] for i, image_id in enumerate(batch["image_id"]): instance_predictions = batch_predictions[i, :] # De-tokenize caption tokens and trim until first "@@BOUNDARY@@". caption = [vocabulary.get_token_from_index(p.item()) for p in instance_predictions] eos_occurences = [j for j in range(len(caption)) if caption[j] == "@@BOUNDARY@@"] caption = caption[: eos_occurences[0]] if len(eos_occurences) > 0 else caption predictions.append({"image_id": image_id.item(), "caption": " ".join(caption)}) # Print first 25 captions with their Image ID. for k in range(25): print(predictions[k]["image_id"], predictions[k]["caption"]) json.dump(predictions, open(_A.output_path, "w")) if _A.evalai_submit: evaluator = NocapsEvaluator("val") evaluation_metrics = evaluator.evaluate(predictions) print(f"Evaluation metrics for checkpoint {_A.checkpoint_path}:") for metric_name in evaluation_metrics: print(f"\t{metric_name}:") for domain in evaluation_metrics[metric_name]: print(f"\t\t{domain}:", evaluation_metrics[metric_name][domain])
vocabulary.get_token_from_index(p.item()) for p in instance_predictions ] eos_occurences = [ j for j in range(len(caption)) if caption[j] == "@@BOUNDARY@@" ] caption = caption[:eos_occurences[0]] if len( eos_occurences) > 0 else caption predictions.append({ "image_id": image_id.item(), "caption": " ".join(caption) }) # Print first 25 captions with their Image ID. for k in range(25): print(predictions[k]["image_id"], predictions[k]["caption"]) json.dump(predictions, open(_A.output_path, "w")) if _A.evalai_submit: evaluator = NocapsEvaluator("val" if _A.run_val else "test") evaluation_metrics = evaluator.evaluate(predictions) print(f"Evaluation metrics for checkpoint {_A.checkpoint_path}:") for metric_name in evaluation_metrics: print(f"\t{metric_name}:") for domain in evaluation_metrics[metric_name]: print(f"\t\t{domain}:", evaluation_metrics[metric_name][domain])