def main(args): results_type = args.results_type saving_dir = os.path.join(".", "models_evaluation", "results", results_type) os.makedirs(saving_dir, exist_ok=True) address_parser = AddressParser(model_type=args.model_type, device=0) directory_path = args.test_directory test_files = os.listdir(directory_path) training_test_results = {} zero_shot_test_results = {} for idx, test_file in enumerate(test_files): results, country = test_on_country_data(address_parser, test_file, directory_path, args) print(f"{idx} file done of {len(test_files)}.") if train_country_file(test_file): training_test_results.update({country: results["test_accuracy"]}) elif zero_shot_eval_country_file(test_file): zero_shot_test_results.update({country: results["test_accuracy"]}) else: print( f"Error with the identification of test file type {test_file}." ) training_base_string = "training_test_results" training_incomplete_base_string = "training_incomplete_test_results" zero_shot_base_string = "zero_shot_test_results" with open( os.path.join(saving_dir, f"{training_base_string}_{args.model_type}.json"), "w", encoding="utf-8", ) as file: json.dump(training_test_results, file, ensure_ascii=False) with open( os.path.join(saving_dir, f"{zero_shot_base_string}_{args.model_type}.json"), "w", encoding="utf-8", ) as file: json.dump(zero_shot_test_results, file, ensure_ascii=False) incomplete_test_directory = args.incomplete_test_directory incomplete_test_files = os.listdir(incomplete_test_directory) incomplete_training_test_results = {} for idx, incomplete_test_file in enumerate(incomplete_test_files): results, country = test_on_country_data(address_parser, incomplete_test_file, incomplete_test_directory, args) print(f"{idx} file done of {len(incomplete_test_files)}.") if train_country_file(incomplete_test_file): incomplete_training_test_results.update( {country: results["test_accuracy"]}) else: print( f"Error with the identification of test file type {incomplete_test_file}." ) with open( os.path.join( saving_dir, f"{training_incomplete_base_string}_{args.model_type}.json"), "w", encoding="utf-8", ) as file: json.dump(incomplete_training_test_results, file, ensure_ascii=False)
def test_givenATrainCountryFile_whenIsTrainCountryFile_thenReturnTrue(self): for train_test_file in self.some_train_test_files: self.assertTrue(train_country_file(train_test_file))
def test_givenANonZeroShotCountryFile_whenIsZeroShotCountryFile_thenReturnFalse( self, ): for zero_shot_test_file in self.some_zero_shot_test_files: self.assertFalse(train_country_file(zero_shot_test_file))
def main(args): saving_dir = os.path.join(".", "models_evaluation", "results") os.makedirs(saving_dir, exist_ok=True) address_parser = AddressParser(model_type=args.model_type, device=0) directory_path = args.test_directory test_files = os.listdir(directory_path) training_test_results = {} zero_shot_test_results = {} for idx, test_file in enumerate(test_files): results, country = test_on_country_data(address_parser, test_file, directory_path, args) print(f"{idx} file done of {len(test_files)}.") if train_country_file(test_file): training_test_results.update({country: results['test_accuracy']}) elif zero_shot_eval_country_file(test_file): zero_shot_test_results.update({country: results['test_accuracy']}) else: print( f"Error with the identification of test file type {test_file}." ) if args.fine_tuning: training_base_string = "training_fine_tuned_test_results" training_noisy_base_string = "training_noisy_fine_tuned_test_results" zero_shot_base_string = "zero_shot_fine_tuned_test_results" else: training_base_string = "training_test_results" training_noisy_base_string = "training_noisy_test_results" zero_shot_base_string = "zero_shot_test_results" json.dump(training_test_results, open(os.path.join( saving_dir, f"{training_base_string}_{args.model_type}.json"), "w", encoding="utf-8"), ensure_ascii=False) json.dump(zero_shot_test_results, open(os.path.join( saving_dir, f"{zero_shot_base_string}_{args.model_type}.json"), "w", encoding="utf-8"), ensure_ascii=False) noisy_test_directory = args.noisy_test_directory noisy_test_files = os.listdir(noisy_test_directory) noisy_training_test_results = {} for idx, noisy_test_file in enumerate(noisy_test_files): results, country = test_on_country_data(address_parser, noisy_test_file, noisy_test_directory, args) print(f"{idx} file done of {len(noisy_test_files)}.") if train_country_file(noisy_test_file): noisy_training_test_results.update( {country: results['test_accuracy']}) else: print( f"Error with the identification of test file type {noisy_test_file}." ) json.dump(noisy_training_test_results, open(os.path.join( saving_dir, f"{training_noisy_base_string}_{args.model_type}.json"), "w", encoding="utf-8"), ensure_ascii=False)