def test_get_word_pair(self):
     data = "This is a sentence. Hello. World. Good bye; See you later."
     expected_pairs = [("this", "is"), 
                       ("is", "a"),
                       ("a", "sentence"),
                       ("good", "bye"),
                       ("see", "you"),
                       ("you", "later")]
     text_stream = TextStream(data=data)
     parser = WordParser(text_stream)
     actual_pairs = []
     i = 0
     limit = 10
     while True:
         pair = parser.get_word_pair() 
         prev,curr = pair
         if not curr:
             break
         actual_pairs.append(pair)
         
     # Check that the sizes match
     actual_size = len(actual_pairs)
     expected_size = len(expected_pairs)
     self.assertTrue(actual_size == expected_size, 
                     msg="Expected Size: %s but got %s" % 
                     (expected_size, actual_size))
         
     # Verify contents match
     for expected,actual in zip(expected_pairs, actual_pairs):
         self.assertTrue(expected == actual,
                         msg="Expected %s but got %s" % 
                         (expected,actual))
    def test_get_word(self):
        
        # Get all the words in text input file.
        file_path = self.sample_input_file
        expected_words = self.get_words_from_file(file_path)
        
        
        # Now try to use WordParser to extract words

        text_stream = TextStream(file_path)
        parser = WordParser(text_stream)
        actual_words = []
        while True:
            word = parser.get_word()
            if word:
                actual_words.append(word)
            else:
                break
        self.assertTrue(actual_words == expected_words,
                        msg="Expected: %s \nActual: %s" %
                        (expected_words, actual_words))
 def get_all_sentences(text):
     sentences = []
     words = WordParser.get_all_words(text)
     temp_words = []
     for word in words:
         if word.type == WordType.END:
             temp_words.append(word)
             sentences.append(Sentence(temp_words))
             temp_words = []
         else:
             temp_words.append(word)
     return sentences
Beispiel #4
0
def suggest_word(suggest_dict, curr_word):
    """ A suggestion dictionary. 

    Args:
        suggest_dict: A dict that has the the following form. 
           current_word -> {next_word -> WordCount object }e

        curr_word: The current word. 

    Returns:
       A word count with the the most probable next word or None
       if no suggestions are available. 
    """

    # normalize given word
    curr_word = WordParser.normalize_word(curr_word)

    if curr_word in suggest_dict:
        candidates_dict = suggest_dict[curr_word]
        top_word_count = None
        for item,word_count in candidates_dict.iteritems():
            if not top_word_count:
                top_word_count = word_count
                continue
            if word_count.count > top_word_count.count:
                top_word_count = word_count
            elif word_count.count == top_word_count.count:
                # Randomly pick either word: 50/50 chance
                if (random.random() > 0.5):
                    top_word_count = word_count
            else: 
                # The word is less likely than our current top word
                # So do nothing, and just skip ahead
                pass
        return top_word_count
    else:
        return None
Beispiel #5
0
#!/usr/bin/env python
from amcc_ui import Ui
from blessed import Terminal
from configparser import ConfigParser
from word_parser import WordParser

if __name__ == '__main__':
    config = ConfigParser()
    config.read('amcc.conf')
    # Select the 'main' section from the config file.
    parser_config = config['main']
    ui_config = config['ui']

    term = Terminal()
    with term.fullscreen():
        ui = Ui(ui_config, term)
        parser = WordParser(parser_config, ui)
        parser.start()
Beispiel #6
0
 def eqExpr(self):
     word = WordParser('equals')
     word.setDiscard(True)
     sec = SequenceParser()
     sec.add(word, self.operand())
     return sec