Example #1
0
class TestRecognize(TestCase):
    def setUp(self):
        self.asl = AslDb()
        self.training_set = self.asl.build_training(FEATURES)
        self.test_set = self.asl.build_test(FEATURES)
        self.models = train_all_words(self.training_set, SelectorConstant)

    def test_recognize_probabilities_interface(self):
        probs, _ = recognize(self.models, self.test_set)
        self.assertEqual(
            len(probs), self.test_set.num_items,
            "Number of test items in probabilities list incorrect.")
        self.assertIn(
            'FRANK', probs[0],
            "Dictionary of probabilities does not contain correct keys")
        self.assertIn(
            'CHICKEN', probs[-1],
            "Dictionary of probabilities does not contain correct keys")

    def test_recognize_guesses_interface(self):
        _, guesses = recognize(self.models, self.test_set)
        self.assertEqual(len(guesses), self.test_set.num_items,
                         "Number of test items in guesses list incorrect.")
        self.assertIsInstance(guesses[0], str, "The guesses are not strings")
        self.assertIsInstance(guesses[-1], str, "The guesses are not strings")
class TestRecognize(TestCase):
    def setUp(self):
        self.asl = AslDb()
        self.training_set = self.asl.build_training(FEATURES)
        self.test_set = self.asl.build_test(FEATURES)
        self.models = train_all_words(self.training_set, SelectorConstant)

    def test_recognize_probabilities_interface(self):
        probs, _ = recognize(self.models, self.test_set)
        self.assertEqual(len(probs), self.test_set.num_items, "Number of test items in probabilities list incorrect.")
        self.assertIn('FRANK', probs[0], "Dictionary of probabilities does not contain correct keys")
        self.assertIn('CHICKEN', probs[-1], "Dictionary of probabilities does not contain correct keys")

    def test_recognize_guesses_interface(self):
        _, guesses = recognize(self.models, self.test_set)
        self.assertEqual(len(guesses), self.test_set.num_items, "Number of test items in guesses list incorrect.")
        self.assertIsInstance(guesses[0], str, "The guesses are not strings")
        self.assertIsInstance(guesses[-1], str, "The guesses are not strings")
        training = asl.build_training(
            features
        )  # Experiment here with different feature sets defined in part 1
        sequences = training.get_all_sequences()
        Xlengths = training.get_all_Xlengths()
        model_dict = {}
        for word in training.words:
            model = model_selector(sequences, Xlengths, word,
                                   n_constant=3).select()
            model_dict[word] = model
        return model_dict

    #
    models = train_all_words(features_ground, SelectorConstant)
    print("Number of word models returned = {}".format(len(models)))
    test_set = asl.build_test(features_ground)
    print("Number of test set items: {}".format(test_set.num_items))
    print("Number of test set sentences: {}".format(
        len(test_set.sentences_index)))

    # TODO implement the recognize method in my_recognizer
    from my_recognizer import recognize
    from asl_utils import show_errors

    # TODO Choose a feature set and model selector
    features = features_ground  # change as needed
    model_selector = SelectorDIC  # change as needed

    # TODO Recognize the test set and display the result with the show_errors method
    models = train_all_words(features, model_selector)
    test_set = asl.build_test(features)
Example #4
0
    sequences = training.get_all_sequences()
    Xlengths = training.get_all_Xlengths()
    model_dict = {}
    for word in training.words:
        model = model_selector(sequences, Xlengths, word).select()
        model_dict[word] = model
    return model_dict


start = timeit.default_timer()
features = features_norm_delta  # change as needed
model_selector = SelectorBIC  # change as needed

models = train_all_words(features, model_selector)
print('words trained, created {} models'.format(len(models)))
test_set = asl.build_test(features)
print('test set built')
probabilities, guesses = recognize(models, test_set)

one_gram_guesses = OneGram().guess_words(probabilities)
two_gram_guesses = TwoGram().guess_words(test_set, probabilities)
three_gram_guesses = ThreeGram().guess_words(test_set, probabilities)

print('recognizer results:')
show_errors(guesses, test_set)  # WER 51.7 / 52.8
print('unigram results:')
show_errors(one_gram_guesses, test_set)  # WER 56.2 / 58.4
print('bigram results:')
show_errors(two_gram_guesses, test_set)  # WER 42.1 / 46.6
print('trigram results:')
show_errors(three_gram_guesses, test_set)  # WER 39.3 / 41.6
                    best_sentence = s
            except:
                continue
        if best_sentence is not None:
            sentence_guesses[video_num] = best_sentence

    errors = 0
    for video_num in sentence_guesses:
        correct_sentence = [
            test_set.wordlist[i] for i in test_set.sentences_index[video_num]
        ]
        recognised_sentence = sentence_guesses[video_num]
        for c, r in zip(correct_sentence, list(recognised_sentence)):
            if c != r:
                errors += 1
        # print('Correct {}'.format(correct_sentence))
        # print('Recognised {}'.format(recognised_sentence))
        # print()
    print(float(errors) / float(178))


if __name__ == '__main__':
    # use n-gram
    models = train_all_words(features_custom,
                             all_model_selectors['SelectorBIC'])
    test_set = asl.build_test(features_custom)
    # load 3-gram language model
    lm_models = arpa.loadf(os.path.join('data', 'n-grams', 'ukn.3.lm'))
    lm = lm_models[0]
    recognize_ngram(lm, models, test_set)
Example #6
0

if __name__ == "__main__":

    asl = AslDb()
    add_features1(asl)
    for feats in feature_sets:
        print("===============================================")
        print(" FEATURES")
        print(" {}".format(feats))
        print("===============================================")
        for sel in selectors:
            print(" Selector: {}".format(sel))
            start_t = time.time()
            print("Start time: {}".format(start_t))
            print("--------------------------")
            try:
                models = train_all_words(feats, sel)
                test_set = asl.build_test(feats)
                probabilities, guesses = recognize(models, test_set)
                print(show_errors(guesses, test_set))
                end_t = time.time()
                total_t = end_t - start_t
                print("End time: {}".format(end_t))
                print("Total time in seconds: {}".format(total_t))
            except Exception as exc:
                print(traceback.format_exc())
                print(exc)
                print("There was some kind of problem with the specified selector")
            print("--------------------------")
Example #7
0
    #     if model is not None:
    #         print("Training complete for {} with {} states with time {} seconds".format(word, model.n_components, end))
    #     else:
    #         print("Training failed for {}".format(word))
    # print()

    # print('Building models with DIC selector:')
    # for word in words_to_train:
    #     start = timeit.default_timer()
    #     model = SelectorDIC(sequences, Xlengths, word, 
    #                     min_n_components=2, max_n_components=15, random_state = 14).select()
    #     end = timeit.default_timer()-start
    #     if model is not None:
    #         print("Training complete for {} with {} states with time {} seconds".format(word, model.n_components, end))
    #     else:
    #         print("Training failed for {}".format(word))
    # print()

    # models = train_all_words(features['ground'], SelectorConstant)
    # print("Number of word models returned = {}".format(len(models)))

    # test_set = asl.build_test(features['ground'])
    # probabilities, guesses = recognize(models, test_set)
    # print("Number of test set items: {}".format(test_set.num_items))
    # print("Number of test set sentences: {}".format(len(test_set.sentences_index)))

    models = train_all_words(features['ground'], SelectorCV)
    test_set = asl.build_test(features['ground'])
    probabilities, guesses = recognize(models, test_set)
    show_errors(guesses, test_set)
Example #8
0
    features = {
        'features_ground':
        features_ground,
        'features_polar':
        features_polar,
        'features_delta':
        features_delta,
        'features_norm':
        features_norm,
        'ALL':
        features_ground + features_polar + features_delta + features_norm,
        'features_custom':
        features_custom,
        'ALL_with_custom':
        features_ground + features_polar + features_delta + features_norm +
        features_custom
    }
    selectors = {
        'SelectorBIC': SelectorBIC,
        'SelectorDIC': SelectorDIC,
        'SelectorCV': SelectorCV
    }
    for set_name, set_value in features.items():
        for sel_name, sel in selectors.items():
            models = train_all_words(set_value, sel)
            test_set = asl.build_test(set_value)
            probabilities, guesses = recognize(models, test_set)
            print('|{}|{}|{}|{}|'.format(set_name, sel_name,
                                         CORRECT(guesses, test_set),
                                         WER(guesses, test_set)))
    feature_mean = asl.df['speaker'].map(df_means[feature])
    feature_std = asl.df['speaker'].map(df_std[feature])
    asl.df[features_custom[i]] = (asl.df[feature] - feature_mean) / feature_std

print("Top rows of data:\n\n{}".format(asl.df.head()))

all_selectors = [SelectorBIC, SelectorDIC, SelectorCV]
feature_sets = [features_ground, features_polar, features_custom]

warnings.filterwarnings("ignore")

test_SLM = BasicSLM("SLM_data/corpus_sentences.txt", verbose=False)
feature_set = features_custom
selector = SelectorCV
training_set = asl.build_training(feature_set)
testing_set = asl.build_test(feature_set)
train_words = training_set.words
test_words = testing_set.wordlist
#train_words   = ['FISH', 'BOOK', 'VEGETABLE']
#test_words    = ['FISH', 'BOOK', 'VEGETABLE']
sentences = testing_set.sentences_index
sentences = [sentences[i] for i in sentences]

models_dict = train_all_words(training_set,
                              selector,
                              train_words,
                              verbose=False,
                              features=feature_set)

test_probs, test_guesses = recognize_words(models_dict,
                                           testing_set,