def test_evaluate_2_gt_boxes_incorrect_confidence(self): """ Test case where 2 gt, 1 boxes were predicted with one matches, and has incorrect confidence score :return: """ # Arrange sut = MAPEvaluator() target = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4], [11, 12, 13, 14]]).float(), "labels": torch.tensor([1, 1]), "iscrowd": torch.tensor([0, 0]) }] # [1, 2, 3, 4] predicted = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4]]).float(), "labels": torch.tensor([1]), "scores": torch.tensor([0.8]) }] expected_map = .5 # Act actual = sut(target, predicted) # Assert self.assertEqual(round(expected_map, 2), round(actual, 2))
def test_evaluate_single_full_match(self): """ Test simple case :return: """ # Arrange sut = MAPEvaluator() target = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4]]).float(), "labels": torch.tensor([1]), "iscrowd": torch.tensor([0]) }] predicted = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4]]).float(), "labels": torch.tensor([1]), "scores": torch.tensor([1.0]) }] expected_map_score = 1.0 # Act actual = sut(target, predicted) # Assert self.assertEqual(expected_map_score, round(actual, 2))
def get(self, train_dataset): accumulation_steps = int( self._get_value(self.additional_args, "accumulation_steps", "1")) self.logger.info( "Using accumulation steps {}".format(accumulation_steps)) evaluator = MAPEvaluator( max_detections_per_image=train_dataset.max_detections_per_image) trainer = Train(patience_epochs=self.patience_epochs, early_stopping=self.early_stopping, epochs=self.epochs, evaluator=evaluator, accumulation_steps=accumulation_steps) # Model self.logger.info("Using model {}".format(self.model_factory_name)) model_factory = ModelFactoryServiceLocator().get_factory( self.model_factory_name) model = model_factory.get_model(num_classes=train_dataset.num_classes) # If checkpoint file is available, load from checkpoint if self.checkpoint_dir is not None: model_files = list( glob.glob("{}/*.pth".format(self.checkpoint_dir))) if len(model_files) > 0: model_file = model_files[0] self.logger.info( "Loading checkpoint {} , found {} checkpoint files".format( model_file, len(model_files))) model = model_factory.load_model( model_file, num_classes=train_dataset.num_classes) # TODO: Enable multi gpu, nn.dataparallel doesnt really work... if torch.cuda.device_count() > 1: self.logger.info( "Using nn.DataParallel../ multigpu.. Currently not working..") model = nn.DataParallel(model) # Increase batch size so that is equivalent to the batch self.batch_size = self.batch_size * torch.cuda.device_count() self.logger.info("Using model {}".format(type(model))) # Define optimiser learning_rate = float( self._get_value(self.additional_args, "learning_rate", ".0001")) self.logger.info("Using learning_rate {}".format(learning_rate)) # weight_decay = float(self._get_value(self.additional_args, "weight_decay", "5e-5")) # momentum = float(self._get_value(self.additional_args, "momentum", ".9")) # optimiser = SGD(lr=learning_rate, params=model.parameters(), momentum=momentum, weight_decay=weight_decay) optimiser = Adam(lr=learning_rate, params=model.parameters()) self.logger.info("Using optimiser {}".format(type(optimiser))) # Kick off training pipeline train_pipeline = TrainPipeline(batch_size=self.batch_size, optimiser=optimiser, trainer=trainer, num_workers=self.num_workers, model=model) return train_pipeline
def test_evaluate_2_images(self): """ Test case where there are 2 images as input :return: """ # Arrange sut = MAPEvaluator() target = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4]]).float(), "labels": torch.tensor([1]), "area": torch.tensor([1.0]), "iscrowd": torch.tensor([0]) }, { "image_id": torch.tensor(2), "boxes": torch.tensor([[1, 2, 3, 4]]).float(), "labels": torch.tensor([1]), "area": torch.tensor([1.0]), "iscrowd": torch.tensor([0]) }] predicted = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]]).float(), "labels": torch.tensor([1, 1]), "scores": torch.tensor([1.0, 1.0]) }, { "image_id": torch.tensor(2), "boxes": torch.tensor([[7, 8, 9, 10]]).float(), "labels": torch.tensor([1]), "scores": torch.tensor([1.0]) }] expected_map = .5 # Act actual = sut(target, predicted) # Assert self.assertEqual(expected_map, round(actual, 2))
def test_evaluate_2_p_boxes_correct_confidence(self): """ Test case where 2 boxes were predicted and only one matches, and has correct confidence score :return: """ # Arrange sut = MAPEvaluator() target = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([[1, 2, 3, 4]]).float(), "labels": torch.tensor([1]), "area": torch.tensor([1.0]), "iscrowd": torch.tensor([0]) }] # [1, 2, 3, 4] predicted = [{ "image_id": torch.tensor(1), "boxes": torch.tensor([ [5, 6, 7, 8], [1, 2, 3, 4], ]).float(), "labels": torch.tensor([1, 1]), "scores": torch.tensor([0.5, 0.7]) }] expected_map = 1.0 # Act actual = sut(target, predicted) # Assert self.assertEqual(round(expected_map, 2), round(actual, 2))
def evaluateDataset(model, sess, tokenizer, vectorizer, ds, previous_losses=None): embedding_vectorizer = EmbeddingLookup( "/home/martin/data/qatarliving/embeddings/qatarliving_qc_size100_win10_mincnt5_rpl_skip1_phrFalse_2016_02_23.word2vec.bin") seq2seq_embedding_vectorizer = Seq2SeqEmbeddingLookup(sess) additional_vectorizers = [ {'vectorizer': vectorizer, 'label': 'tfidf-cosine' }, { 'vectorizer': embedding_vectorizer, 'label': 'embeddings' } ] from evaluators.bleu_single_evaluator import BLEUSingleEvaluator evaluators = [BLEUEvaluator(), MAPEvaluator(), PersisterEvaluator( os.path.join(FLAGS.train_dir, 'responseEvolution-%s-%s' % (ds, model.global_step.eval()))), VocabularyEvaluator(), MAPEvaluatorSummed(), LengthEvaluator(), TTREvaluator(), MegaMAPEvaluator(additional_vectorizers), BLEUSingleEvaluator() ] visitDatasetParameterized(sess, model, tokenizer, vectorizer, ds, evaluators) MAP = evaluators[1].results() bleu_results = evaluators[0].results() BLEU = bleu_results['BLEU'] BLEU_ALL = bleu_results['BLEU_ALL'] MAP_BLEU = evaluators[8].results()['MAP-BLEU'] meanAvgBLEU = evaluators[8].results()['meanAvgBLEU'] ## Persister evaluator saves in a different file, so just call results() evaluators[2].results() vocab_eval = evaluators[3].results() MAP_SUMMED = evaluators[4].results() LENGTH = evaluators[5].results() TTR = evaluators[6].results() MEGA_MAP_SCORES = evaluators[7].results() score_path = os.path.join(FLAGS.train_dir, 'scoreEvolution-%s' % ds) if not os.path.isfile(score_path): with open(score_path, 'w') as out: metrics = ["Global step", "Training Perplexity", "MAP", "MAP_SUMMED", "MAP-tfidf", "MAP-tfidf_SUMMED", "MAP-embeddings", "MAP-embeddings_SUMMED", "MAP-bm25", "MAP-bm25_SUMMED", "MAP_AVG", "MAP_BLEU_SUMMED", "MAP-BLEU", "meanAvgBLEU", "BLEU_POS", "BLEU_ALL", "Vocab size", "Target Vocab Size", "Intersection Vocab size", "LENGTH", "TTR"] out.write("\t".join(metrics) + "\n") with open(score_path, 'a') as out: out.write("\t".join([ str(model.global_step.eval()), str(previous_losses[-1]) if len(previous_losses) > 0 else 'n/a', str(MAP), str(MAP_SUMMED), str(MEGA_MAP_SCORES['tfidf-cosine']), str(MEGA_MAP_SCORES['tfidf-cosine_SUMMED']), str(MEGA_MAP_SCORES['embeddings']), str(MEGA_MAP_SCORES['embeddings_SUMMED']), str(MEGA_MAP_SCORES['bm25']), str(MEGA_MAP_SCORES['bm25_SUMMED']), str(MEGA_MAP_SCORES['MAP_AVG']), str(MEGA_MAP_SCORES['bleu_map_SUMMED']), str(MAP_BLEU), str(meanAvgBLEU), str(BLEU), str(BLEU_ALL), str(vocab_eval[0][1]), str(vocab_eval[1][1]), str(vocab_eval[2][1]), str(LENGTH), str(TTR) ]) + "\n") return MAP, BLEU, MEGA_MAP_SCORES