Ejemplo n.º 1
0
def main(args):
    results_type = args.results_type
    saving_dir = os.path.join(".", "models_evaluation", "results",
                              results_type)
    os.makedirs(saving_dir, exist_ok=True)

    address_parser = AddressParser(model_type=args.model_type, device=0)
    directory_path = args.test_directory
    test_files = os.listdir(directory_path)
    training_test_results = {}
    zero_shot_test_results = {}
    for idx, test_file in enumerate(test_files):
        results, country = test_on_country_data(address_parser, test_file,
                                                directory_path, args)
        print(f"{idx} file done of {len(test_files)}.")

        if train_country_file(test_file):
            training_test_results.update({country: results["test_accuracy"]})
        elif zero_shot_eval_country_file(test_file):
            zero_shot_test_results.update({country: results["test_accuracy"]})
        else:
            print(
                f"Error with the identification of test file type {test_file}."
            )

    training_base_string = "training_test_results"
    training_incomplete_base_string = "training_incomplete_test_results"
    zero_shot_base_string = "zero_shot_test_results"

    with open(
            os.path.join(saving_dir,
                         f"{training_base_string}_{args.model_type}.json"),
            "w",
            encoding="utf-8",
    ) as file:
        json.dump(training_test_results, file, ensure_ascii=False)

    with open(
            os.path.join(saving_dir,
                         f"{zero_shot_base_string}_{args.model_type}.json"),
            "w",
            encoding="utf-8",
    ) as file:
        json.dump(zero_shot_test_results, file, ensure_ascii=False)

    incomplete_test_directory = args.incomplete_test_directory
    incomplete_test_files = os.listdir(incomplete_test_directory)
    incomplete_training_test_results = {}
    for idx, incomplete_test_file in enumerate(incomplete_test_files):
        results, country = test_on_country_data(address_parser,
                                                incomplete_test_file,
                                                incomplete_test_directory,
                                                args)
        print(f"{idx} file done of {len(incomplete_test_files)}.")

        if train_country_file(incomplete_test_file):
            incomplete_training_test_results.update(
                {country: results["test_accuracy"]})
        else:
            print(
                f"Error with the identification of test file type {incomplete_test_file}."
            )

    with open(
            os.path.join(
                saving_dir,
                f"{training_incomplete_base_string}_{args.model_type}.json"),
            "w",
            encoding="utf-8",
    ) as file:
        json.dump(incomplete_training_test_results, file, ensure_ascii=False)
 def test_givenATrainCountryFile_whenIsTrainCountryFile_thenReturnTrue(self):
     for train_test_file in self.some_train_test_files:
         self.assertTrue(train_country_file(train_test_file))
 def test_givenANonZeroShotCountryFile_whenIsZeroShotCountryFile_thenReturnFalse(
     self,
 ):
     for zero_shot_test_file in self.some_zero_shot_test_files:
         self.assertFalse(train_country_file(zero_shot_test_file))
Ejemplo n.º 4
0
def main(args):
    saving_dir = os.path.join(".", "models_evaluation", "results")
    os.makedirs(saving_dir, exist_ok=True)

    address_parser = AddressParser(model_type=args.model_type, device=0)
    directory_path = args.test_directory
    test_files = os.listdir(directory_path)
    training_test_results = {}
    zero_shot_test_results = {}
    for idx, test_file in enumerate(test_files):
        results, country = test_on_country_data(address_parser, test_file,
                                                directory_path, args)
        print(f"{idx} file done of {len(test_files)}.")

        if train_country_file(test_file):
            training_test_results.update({country: results['test_accuracy']})
        elif zero_shot_eval_country_file(test_file):
            zero_shot_test_results.update({country: results['test_accuracy']})
        else:
            print(
                f"Error with the identification of test file type {test_file}."
            )

    if args.fine_tuning:
        training_base_string = "training_fine_tuned_test_results"
        training_noisy_base_string = "training_noisy_fine_tuned_test_results"
        zero_shot_base_string = "zero_shot_fine_tuned_test_results"
    else:
        training_base_string = "training_test_results"
        training_noisy_base_string = "training_noisy_test_results"
        zero_shot_base_string = "zero_shot_test_results"

    json.dump(training_test_results,
              open(os.path.join(
                  saving_dir,
                  f"{training_base_string}_{args.model_type}.json"),
                   "w",
                   encoding="utf-8"),
              ensure_ascii=False)
    json.dump(zero_shot_test_results,
              open(os.path.join(
                  saving_dir,
                  f"{zero_shot_base_string}_{args.model_type}.json"),
                   "w",
                   encoding="utf-8"),
              ensure_ascii=False)

    noisy_test_directory = args.noisy_test_directory
    noisy_test_files = os.listdir(noisy_test_directory)
    noisy_training_test_results = {}
    for idx, noisy_test_file in enumerate(noisy_test_files):
        results, country = test_on_country_data(address_parser,
                                                noisy_test_file,
                                                noisy_test_directory, args)
        print(f"{idx} file done of {len(noisy_test_files)}.")

        if train_country_file(noisy_test_file):
            noisy_training_test_results.update(
                {country: results['test_accuracy']})
        else:
            print(
                f"Error with the identification of test file type {noisy_test_file}."
            )

    json.dump(noisy_training_test_results,
              open(os.path.join(
                  saving_dir,
                  f"{training_noisy_base_string}_{args.model_type}.json"),
                   "w",
                   encoding="utf-8"),
              ensure_ascii=False)