Ejemplo n.º 1
0
 def __init__(self, n, alpha=None, ngram_model=None):
     self.n = n
     if ngram_model != None:
         self.language_model = ngram_model
     elif alpha != None:
         self.language_model = NgramModel(n, alpha=alpha)
     else:
         self.language_model = NgramModel(n)
Ejemplo n.º 2
0
    def __init__(self,
                 pronouncer=Pronouncer(),
                 phoneme_to_digit_dict=None,
                 max_word_length=None,
                 min_sentence_length=5,
                 n=3,
                 alpha=0.1,
                 select_most_likely=True,
                 tagger=UnigramTagger(brown.tagged_sents())):
        '''
        Initializes the NgramPOSContextEncoder. The encoder will consider the context of at most
        (n - 1) previous words and choose the subsequent word with highest probability 
        part-of-speech and with highest n-gram probability if select_most_likely is True 
        (otherwise, will sample weighted by probabilities).
        '''
        super(NgramPOSContextEncoder,
              self).__init__(pronouncer=pronouncer,
                             phoneme_to_digit_dict=phoneme_to_digit_dict,
                             max_word_length=max_word_length,
                             context_length=n - 1,
                             min_sentence_length=min_sentence_length)
        self.ngram = NgramModel(n, alpha=alpha)
        self.select_most_likely = select_most_likely

        self.tagger = tagger
        self.pos_ngram = NgramPOSModel(n, alpha=alpha)
Ejemplo n.º 3
0
    def __init__(self,
                 pronouncer=Pronouncer(),
                 phoneme_to_digit_dict=None,
                 max_word_length=2):
        '''
        Initializes the UnigramGreedyEncoder. The encoder will greedily group digits to make words
        as long as possible (where the length of a word refers to the number of digits it encodes).
        '''
        super(UnigramGreedyEncoder,
              self).__init__(pronouncer=pronouncer,
                             phoneme_to_digit_dict=phoneme_to_digit_dict,
                             max_word_length=max_word_length)

        self.unigram = NgramModel(1)
Ejemplo n.º 4
0
    def __init__(self,
                 pronouncer=Pronouncer(),
                 phoneme_to_digit_dict=None,
                 max_word_length=None,
                 min_sentence_length=5,
                 n=3,
                 alpha=0.1,
                 select_most_likely=True,
                 tagger_type=UnigramTagger,
                 tagged_sents=brown.tagged_sents(tagset='universal'),
                 num_sentence_templates=100,
                 word_length_weight=10):
        '''
        Initializes the SentenceTaggerEncoder.
        Uses the num_sentence_templates most common part-of-speech sentence types, requiring at
        least min_sentence_length words per sentence. Favors words that encode more digits as a
        function of word_length_weight (0 means unweighted). Scores words and sentences through an
        n-gram model with the given n and alpha.
        '''
        super(SentenceTaggerEncoder,
              self).__init__(pronouncer=pronouncer,
                             phoneme_to_digit_dict=phoneme_to_digit_dict,
                             max_word_length=max_word_length,
                             min_sentence_length=min_sentence_length,
                             n=n,
                             alpha=alpha,
                             select_most_likely=select_most_likely)

        # set up our tagger and sentence_templates
        self.tagger = tagger_type(tagged_sents)
        self.tagged_sents = tagged_sents
        self.num_sentence_templates = num_sentence_templates
        # some parts of speech can be reasonably omitted from any sentence - we call these optional
        self.optional_tags = ['DET', 'ADJ', 'ADV']
        self.sentence_templates = self._get_sentence_templates()
        self.word_length_weight = word_length_weight
        # set up bigram model for post processing
        self.bigram = NgramModel(n=2, alpha=0.05)