Beispiel #1
0
  def testSNameStandardiser(self):
    """Test name standardiser routines (surname first)"""

#    return

    ns = standardisation.NameStandardiser(descript = 'Test name standardiser',
                                          input_fields = ['in_sname'],
                                          output_fiel = ['title',
                                                         'gender_guess',
                                                         'given_name',
                                                         'alt_given_name',
                                                         'surname',
                                                         'alt_surname'],
                                          female_t = self.name_female_titles,
                                          male_t = self.name_male_titles,
                                          tag_t=self.name_tag_table,
                                          corr_l=self.name_corr_list,
                                          first_name_c = 'sname',
                                          hmm_train_fi = 'test-hmm-train.txt')

    rs = standardisation.RecordStandardiser(descr = 'Test record standardiser',
                                            input_dataset = self.in_ds,
                                            output_dataset = self.out_ds,
                                            comp_stand_list =[ns])

    for (name_str, name_res) in self.names_snames:

      clean_name_str = ns.clean_component(name_str)
      test_name_res =  ns.standardise(name_str, clean_name_str)

#      assert name_res == test_name_res, \
#             'Wrong surname first standardisation: %s, should be: %s' % \
#             (str(test_name_res), str(name_res))

    print 'Count dict:', ns.count_dict
Beispiel #2
0
    def testGNameStandardiser(
            self):  # -----------------------------------------
        """Test name standardiser routines (given name first)"""

        #    return

        ns = standardisation.NameStandardiser(
            descript="Test name standardiser",
            input_fields=["in_gname"],
            output_fiel=[
                "title",
                "gender_guess",
                "given_name",
                "alt_given_name",
                "surname",
                "alt_surname",
            ],
            female_t=self.name_female_titles,
            male_t=self.name_male_titles,
            tag_t=self.name_tag_table,
            corr_l=self.name_corr_list,
            hmm_train_fil="test-hmm-train.txt",
        )

        rs = standardisation.RecordStandardiser(
            descr="Test record standardiser",
            input_dataset=self.in_ds,
            output_dataset=self.out_ds,
            comp_stand_list=[ns],
        )

        for (name_str, name_res) in self.names_gnames:

            clean_name_str = ns.clean_component(name_str)
            test_name_res = ns.standardise(name_str, clean_name_str)

        #      assert name_res == test_name_res, \
        #             'Wrong given name first standardisation: %s, should be: %s' % \
        #             (str(test_name_res), str(name_res))

        #    rs.standardise()  # Use record standardiser and write output file

        print("Count dict:", ns.count_dict)