def test( self, test_sequence, verbose = False, **kwargs ): """ Tests the HiddenMarkovModelTagger instance. :param test_sequence: a sequence of labeled test instances :type test_sequence: list(list) :param verbose: boolean flag indicating whether training should be verbose or include printed output :type verbose: bool """ def words( sent ): return [ word for (word, tag) in sent ] def tags( sent ): return [ tag for (word, tag) in sent ] def flatten( seq ): return list( itertools.chain( *seq ) ) test_sequence = self._transform( test_sequence ) predicted_sequence = list( imap( self._tag, imap( words, test_sequence ) ) ) if verbose: for test_sent, predicted_sent in izip( test_sequence, predicted_sequence ): print( 'Test:', ' '.join( '%s/%s' % (token, tag) for (token, tag) in test_sent ) ) print( ) print( 'Untagged:', ' '.join( "%s" % token for (token, tag) in test_sent ) ) print( ) print( 'HMM-tagged:', ' '.join( '%s/%s' % (token, tag) for (token, tag) in predicted_sent ) ) print( ) print( 'Entropy:', self.entropy( [ (token, None) for (token, tag) in predicted_sent ] ) ) print( ) print( '-' * 60 ) test_tags = flatten( imap( tags, test_sequence ) ) predicted_tags = flatten( imap( tags, predicted_sequence ) ) acc = accuracy( test_tags, predicted_tags ) count = sum( len( sent ) for sent in test_sequence ) print( 'accuracy over %d tokens: %.2f' % (count, acc * 100) )
def test(self, test_sequence, verbose=False, **kwargs): """ Tests the HiddenMarkovModelTagger instance. :param test_sequence: a sequence of labeled test instances :type test_sequence: list(list) :param verbose: boolean flag indicating whether training should be verbose or include printed output :type verbose: bool """ def words(sent): return [word for (word, tag) in sent] def tags(sent): return [tag for (word, tag) in sent] def flatten(seq): return list(itertools.chain(*seq)) test_sequence = self._transform(test_sequence) predicted_sequence = list(imap(self._tag, imap(words, test_sequence))) if verbose: for test_sent, predicted_sent in izip(test_sequence, predicted_sequence): print( 'Test:', ' '.join('%s/%s' % (token, tag) for (token, tag) in test_sent)) print() print('Untagged:', ' '.join("%s" % token for (token, tag) in test_sent)) print() print( 'HMM-tagged:', ' '.join('%s/%s' % (token, tag) for (token, tag) in predicted_sent)) print() print( 'Entropy:', self.entropy([(token, None) for (token, tag) in predicted_sent])) print() print('-' * 60) test_tags = flatten(imap(tags, test_sequence)) predicted_tags = flatten(imap(tags, predicted_sequence)) acc = accuracy(test_tags, predicted_tags) count = sum(len(sent) for sent in test_sequence) print('accuracy over %d tokens: %.2f' % (count, acc * 100))
def nsmallest(n, iterable, key=None): """Find the n smallest elements in a dataset. Equivalent to: sorted(iterable, key=key)[:n] """ if key is None: it = izip(iterable, count()) # decorate result = _nsmallest(n, it) return map(itemgetter(0), result) # undecorate in1, in2 = tee(iterable) it = izip(imap(key, in1), count(), in2) # decorate result = _nsmallest(n, it) return map(itemgetter(2), result) # undecorate