def testSNameStandardiser(self): """Test name standardiser routines (surname first)""" # return ns = standardisation.NameStandardiser(descript = 'Test name standardiser', input_fields = ['in_sname'], output_fiel = ['title', 'gender_guess', 'given_name', 'alt_given_name', 'surname', 'alt_surname'], female_t = self.name_female_titles, male_t = self.name_male_titles, tag_t=self.name_tag_table, corr_l=self.name_corr_list, first_name_c = 'sname', hmm_train_fi = 'test-hmm-train.txt') rs = standardisation.RecordStandardiser(descr = 'Test record standardiser', input_dataset = self.in_ds, output_dataset = self.out_ds, comp_stand_list =[ns]) for (name_str, name_res) in self.names_snames: clean_name_str = ns.clean_component(name_str) test_name_res = ns.standardise(name_str, clean_name_str) # assert name_res == test_name_res, \ # 'Wrong surname first standardisation: %s, should be: %s' % \ # (str(test_name_res), str(name_res)) print 'Count dict:', ns.count_dict
def testGNameStandardiser( self): # ----------------------------------------- """Test name standardiser routines (given name first)""" # return ns = standardisation.NameStandardiser( descript="Test name standardiser", input_fields=["in_gname"], output_fiel=[ "title", "gender_guess", "given_name", "alt_given_name", "surname", "alt_surname", ], female_t=self.name_female_titles, male_t=self.name_male_titles, tag_t=self.name_tag_table, corr_l=self.name_corr_list, hmm_train_fil="test-hmm-train.txt", ) rs = standardisation.RecordStandardiser( descr="Test record standardiser", input_dataset=self.in_ds, output_dataset=self.out_ds, comp_stand_list=[ns], ) for (name_str, name_res) in self.names_gnames: clean_name_str = ns.clean_component(name_str) test_name_res = ns.standardise(name_str, clean_name_str) # assert name_res == test_name_res, \ # 'Wrong given name first standardisation: %s, should be: %s' % \ # (str(test_name_res), str(name_res)) # rs.standardise() # Use record standardiser and write output file print("Count dict:", ns.count_dict)