class TestRecognize(TestCase): def setUp(self): self.asl = AslDb() self.training_set = self.asl.build_training(FEATURES) self.test_set = self.asl.build_test(FEATURES) self.models = train_all_words(self.training_set, SelectorConstant) def test_recognize_probabilities_interface(self): probs, _ = recognize(self.models, self.test_set) self.assertEqual( len(probs), self.test_set.num_items, "Number of test items in probabilities list incorrect.") self.assertIn( 'FRANK', probs[0], "Dictionary of probabilities does not contain correct keys") self.assertIn( 'CHICKEN', probs[-1], "Dictionary of probabilities does not contain correct keys") def test_recognize_guesses_interface(self): _, guesses = recognize(self.models, self.test_set) self.assertEqual(len(guesses), self.test_set.num_items, "Number of test items in guesses list incorrect.") self.assertIsInstance(guesses[0], str, "The guesses are not strings") self.assertIsInstance(guesses[-1], str, "The guesses are not strings")
class TestRecognize(TestCase): def setUp(self): self.asl = AslDb() self.training_set = self.asl.build_training(FEATURES) self.test_set = self.asl.build_test(FEATURES) self.models = train_all_words(self.training_set, SelectorConstant) def test_recognize_probabilities_interface(self): probs, _ = recognize(self.models, self.test_set) self.assertEqual(len(probs), self.test_set.num_items, "Number of test items in probabilities list incorrect.") self.assertIn('FRANK', probs[0], "Dictionary of probabilities does not contain correct keys") self.assertIn('CHICKEN', probs[-1], "Dictionary of probabilities does not contain correct keys") def test_recognize_guesses_interface(self): _, guesses = recognize(self.models, self.test_set) self.assertEqual(len(guesses), self.test_set.num_items, "Number of test items in guesses list incorrect.") self.assertIsInstance(guesses[0], str, "The guesses are not strings") self.assertIsInstance(guesses[-1], str, "The guesses are not strings")
training = asl.build_training( features ) # Experiment here with different feature sets defined in part 1 sequences = training.get_all_sequences() Xlengths = training.get_all_Xlengths() model_dict = {} for word in training.words: model = model_selector(sequences, Xlengths, word, n_constant=3).select() model_dict[word] = model return model_dict # models = train_all_words(features_ground, SelectorConstant) print("Number of word models returned = {}".format(len(models))) test_set = asl.build_test(features_ground) print("Number of test set items: {}".format(test_set.num_items)) print("Number of test set sentences: {}".format( len(test_set.sentences_index))) # TODO implement the recognize method in my_recognizer from my_recognizer import recognize from asl_utils import show_errors # TODO Choose a feature set and model selector features = features_ground # change as needed model_selector = SelectorDIC # change as needed # TODO Recognize the test set and display the result with the show_errors method models = train_all_words(features, model_selector) test_set = asl.build_test(features)
sequences = training.get_all_sequences() Xlengths = training.get_all_Xlengths() model_dict = {} for word in training.words: model = model_selector(sequences, Xlengths, word).select() model_dict[word] = model return model_dict start = timeit.default_timer() features = features_norm_delta # change as needed model_selector = SelectorBIC # change as needed models = train_all_words(features, model_selector) print('words trained, created {} models'.format(len(models))) test_set = asl.build_test(features) print('test set built') probabilities, guesses = recognize(models, test_set) one_gram_guesses = OneGram().guess_words(probabilities) two_gram_guesses = TwoGram().guess_words(test_set, probabilities) three_gram_guesses = ThreeGram().guess_words(test_set, probabilities) print('recognizer results:') show_errors(guesses, test_set) # WER 51.7 / 52.8 print('unigram results:') show_errors(one_gram_guesses, test_set) # WER 56.2 / 58.4 print('bigram results:') show_errors(two_gram_guesses, test_set) # WER 42.1 / 46.6 print('trigram results:') show_errors(three_gram_guesses, test_set) # WER 39.3 / 41.6
best_sentence = s except: continue if best_sentence is not None: sentence_guesses[video_num] = best_sentence errors = 0 for video_num in sentence_guesses: correct_sentence = [ test_set.wordlist[i] for i in test_set.sentences_index[video_num] ] recognised_sentence = sentence_guesses[video_num] for c, r in zip(correct_sentence, list(recognised_sentence)): if c != r: errors += 1 # print('Correct {}'.format(correct_sentence)) # print('Recognised {}'.format(recognised_sentence)) # print() print(float(errors) / float(178)) if __name__ == '__main__': # use n-gram models = train_all_words(features_custom, all_model_selectors['SelectorBIC']) test_set = asl.build_test(features_custom) # load 3-gram language model lm_models = arpa.loadf(os.path.join('data', 'n-grams', 'ukn.3.lm')) lm = lm_models[0] recognize_ngram(lm, models, test_set)
if __name__ == "__main__": asl = AslDb() add_features1(asl) for feats in feature_sets: print("===============================================") print(" FEATURES") print(" {}".format(feats)) print("===============================================") for sel in selectors: print(" Selector: {}".format(sel)) start_t = time.time() print("Start time: {}".format(start_t)) print("--------------------------") try: models = train_all_words(feats, sel) test_set = asl.build_test(feats) probabilities, guesses = recognize(models, test_set) print(show_errors(guesses, test_set)) end_t = time.time() total_t = end_t - start_t print("End time: {}".format(end_t)) print("Total time in seconds: {}".format(total_t)) except Exception as exc: print(traceback.format_exc()) print(exc) print("There was some kind of problem with the specified selector") print("--------------------------")
# if model is not None: # print("Training complete for {} with {} states with time {} seconds".format(word, model.n_components, end)) # else: # print("Training failed for {}".format(word)) # print() # print('Building models with DIC selector:') # for word in words_to_train: # start = timeit.default_timer() # model = SelectorDIC(sequences, Xlengths, word, # min_n_components=2, max_n_components=15, random_state = 14).select() # end = timeit.default_timer()-start # if model is not None: # print("Training complete for {} with {} states with time {} seconds".format(word, model.n_components, end)) # else: # print("Training failed for {}".format(word)) # print() # models = train_all_words(features['ground'], SelectorConstant) # print("Number of word models returned = {}".format(len(models))) # test_set = asl.build_test(features['ground']) # probabilities, guesses = recognize(models, test_set) # print("Number of test set items: {}".format(test_set.num_items)) # print("Number of test set sentences: {}".format(len(test_set.sentences_index))) models = train_all_words(features['ground'], SelectorCV) test_set = asl.build_test(features['ground']) probabilities, guesses = recognize(models, test_set) show_errors(guesses, test_set)
features = { 'features_ground': features_ground, 'features_polar': features_polar, 'features_delta': features_delta, 'features_norm': features_norm, 'ALL': features_ground + features_polar + features_delta + features_norm, 'features_custom': features_custom, 'ALL_with_custom': features_ground + features_polar + features_delta + features_norm + features_custom } selectors = { 'SelectorBIC': SelectorBIC, 'SelectorDIC': SelectorDIC, 'SelectorCV': SelectorCV } for set_name, set_value in features.items(): for sel_name, sel in selectors.items(): models = train_all_words(set_value, sel) test_set = asl.build_test(set_value) probabilities, guesses = recognize(models, test_set) print('|{}|{}|{}|{}|'.format(set_name, sel_name, CORRECT(guesses, test_set), WER(guesses, test_set)))
feature_mean = asl.df['speaker'].map(df_means[feature]) feature_std = asl.df['speaker'].map(df_std[feature]) asl.df[features_custom[i]] = (asl.df[feature] - feature_mean) / feature_std print("Top rows of data:\n\n{}".format(asl.df.head())) all_selectors = [SelectorBIC, SelectorDIC, SelectorCV] feature_sets = [features_ground, features_polar, features_custom] warnings.filterwarnings("ignore") test_SLM = BasicSLM("SLM_data/corpus_sentences.txt", verbose=False) feature_set = features_custom selector = SelectorCV training_set = asl.build_training(feature_set) testing_set = asl.build_test(feature_set) train_words = training_set.words test_words = testing_set.wordlist #train_words = ['FISH', 'BOOK', 'VEGETABLE'] #test_words = ['FISH', 'BOOK', 'VEGETABLE'] sentences = testing_set.sentences_index sentences = [sentences[i] for i in sentences] models_dict = train_all_words(training_set, selector, train_words, verbose=False, features=feature_set) test_probs, test_guesses = recognize_words(models_dict, testing_set,