def info(self): """ Print the feature statistics for the given model. (Assumes MaxEnt) """ mallet = c['mallet'] env = set_env_lang_utf8() info_bin = os.path.join(os.path.join(mallet, 'bin'), 'classifier2info') info_p = sub.Popen([info_bin, '--classifier', self._model], stdout=sub.PIPE, stdin=sub.PIPE, stderr=sub.PIPE, env=env) cur_class = None feats = TwoLevelCountDict() # Go through and pick out what the features are for for line in info_p.stdout: content = line.decode(encoding='utf-8') class_change = re.search('FEATURES FOR CLASS (.*)', content) # Set the current class if the section changes if class_change: cur_class = class_change.group(1).strip() continue # Otherwise, let's catalog the features. word, prob = content.split() feats.add(cur_class, word, float(prob)) # Now, print some info for cur_class in feats.keys(): print(cur_class, end='\t') print('%s:%.4f' % ('<default>', feats[cur_class]['<default>']), end='\t') top_10 = feats.top_n(cur_class, n=10, key2_re='^nom') print('\t'.join(['%s:%.4f' % (w,p) for w,p in top_10]))
def __init__(self, model=None): if model is None: self._model = classifier else: self._model = model mallet_bin = os.path.join(os.path.join(mallet, 'bin'), 'mallet') env = set_env_lang_utf8() self.c = sub.Popen([mallet_bin, 'classify-file', '--classifier', self._model, '--input', '-', '--output', '-'], stdout=sub.PIPE, stdin=sub.PIPE, env=env) self._first = True