def test_hmm(self): pp = PreProcessor() pp.test_preprocess() instance_list = pp.get_instance_list() self.label_codebook=pp.get_label_codebook() self.feature_codebook=pp.get_feature_codebook() self.train(instance_list) print "\ntransition_count_table--------------------" print self.transition_count_table print "\ntransition_matrix-------------------------" print self.transition_matrix print "\ninitial_state_count_table------------------" print self.initial_state_count_table print "\ntermination_state_count_table------------------" print self.termination_state_count_table print "\nemission matrix----------------------------" print self.emission_matrix for i in range(10): self.test_classify_instance(instance_list[i])
def test_hmm_all(): """ This method tests the entire workflow: PreProcessor: populate the label codebook, feature codebook, and instance list from the dataset """ # Please specify the dataset path here train_set_path = "C:\\Users\\DIAOSHUO\\Dropbox\\SNLP\\cs134assn2\\np_chunking_wsj_15_18_train" test_set_path = "C:\\Users\\DIAOSHUO\\Dropbox\\SNLP\\cs134assn2\\np_chunking_wsj_20_test" # Preprocess pp = PreProcessor()#use hmm = HMM() hmm.label_codebook= pp.get_label_codebook() hmm.feature_codebook = pp.get_feature_codebook() #train_set = pp.make_instance_list() # default, the dataset path is the training set, np_chunking_wsj_15_18_train #test_set = pp.make_instance_list(test_set_path) train_instance_list = pp.get_instance_list(train_set_path) test_instance_list = pp.get_instance_list(test_set_path) #running 3 kinds of test split_train_test(hmm, train_instance_list, [0.8,0.2]) n_fold_cross_validation(hmm,train_instance_list) simple_train_test(hmm,train_instance_list, test_instance_list)