def test_get_word_pair(self): data = "This is a sentence. Hello. World. Good bye; See you later." expected_pairs = [("this", "is"), ("is", "a"), ("a", "sentence"), ("good", "bye"), ("see", "you"), ("you", "later")] text_stream = TextStream(data=data) parser = WordParser(text_stream) actual_pairs = [] i = 0 limit = 10 while True: pair = parser.get_word_pair() prev,curr = pair if not curr: break actual_pairs.append(pair) # Check that the sizes match actual_size = len(actual_pairs) expected_size = len(expected_pairs) self.assertTrue(actual_size == expected_size, msg="Expected Size: %s but got %s" % (expected_size, actual_size)) # Verify contents match for expected,actual in zip(expected_pairs, actual_pairs): self.assertTrue(expected == actual, msg="Expected %s but got %s" % (expected,actual))
def test_get_word(self): # Get all the words in text input file. file_path = self.sample_input_file expected_words = self.get_words_from_file(file_path) # Now try to use WordParser to extract words text_stream = TextStream(file_path) parser = WordParser(text_stream) actual_words = [] while True: word = parser.get_word() if word: actual_words.append(word) else: break self.assertTrue(actual_words == expected_words, msg="Expected: %s \nActual: %s" % (expected_words, actual_words))
def get_all_sentences(text): sentences = [] words = WordParser.get_all_words(text) temp_words = [] for word in words: if word.type == WordType.END: temp_words.append(word) sentences.append(Sentence(temp_words)) temp_words = [] else: temp_words.append(word) return sentences
def suggest_word(suggest_dict, curr_word): """ A suggestion dictionary. Args: suggest_dict: A dict that has the the following form. current_word -> {next_word -> WordCount object }e curr_word: The current word. Returns: A word count with the the most probable next word or None if no suggestions are available. """ # normalize given word curr_word = WordParser.normalize_word(curr_word) if curr_word in suggest_dict: candidates_dict = suggest_dict[curr_word] top_word_count = None for item,word_count in candidates_dict.iteritems(): if not top_word_count: top_word_count = word_count continue if word_count.count > top_word_count.count: top_word_count = word_count elif word_count.count == top_word_count.count: # Randomly pick either word: 50/50 chance if (random.random() > 0.5): top_word_count = word_count else: # The word is less likely than our current top word # So do nothing, and just skip ahead pass return top_word_count else: return None
#!/usr/bin/env python from amcc_ui import Ui from blessed import Terminal from configparser import ConfigParser from word_parser import WordParser if __name__ == '__main__': config = ConfigParser() config.read('amcc.conf') # Select the 'main' section from the config file. parser_config = config['main'] ui_config = config['ui'] term = Terminal() with term.fullscreen(): ui = Ui(ui_config, term) parser = WordParser(parser_config, ui) parser.start()
def eqExpr(self): word = WordParser('equals') word.setDiscard(True) sec = SequenceParser() sec.add(word, self.operand()) return sec