def demo(): from nltk.classify.util import names_demo, binary_names_demo_features classifier = names_demo( f, #DecisionTreeClassifier.train, binary_names_demo_features) print(classifier.pp(depth=7)) print(classifier.pseudocode(depth=7))
def demo(): from nltk.classify.util import names_demo, binary_names_demo_features classifier = names_demo( f, binary_names_demo_features # DecisionTreeClassifier.train, ) print(classifier.pp(depth=7)) print(classifier.pseudocode(depth=7))
def demo(): from nltk.classify.util import binary_names_demo_features, names_demo classifier = names_demo( f, binary_names_demo_features # DecisionTreeClassifier.train, ) print(classifier.pretty_format(depth=7)) print(classifier.pseudocode(depth=7))
def demo(): from nltk.classify.util import names_demo classifier = names_demo(NaiveBayesClassifier.train) classifier.show_most_informative_features() save_classifier = open("naivebayes.pickle", "wb") pickle.dump(classifier, save_classifier) save_classifier.close() classifier_f = pickle.load(open("naivebayes.pickle", "rb")) classifier_f.show_most_informative_features()
def names_demo(): from nltk.classify.util import names_demo from nltk.classify.maxent import TadmMaxentClassifier classifier = names_demo(TadmMaxentClassifier.train)
p = subprocess.Popen(cmd, stdout=sys.stdout) (stdout, stderr) = p.communicate() # Check the return code. if p.returncode != 0: print() print(stderr) raise OSError('tadm command failed!') def names_demo(): from nltk.classify.util import names_demo from nltk.classify.maxent import TadmMaxentClassifier classifier = names_demo(TadmMaxentClassifier.train) def encoding_demo(): import sys from nltk.classify.maxent import TadmEventMaxentFeatureEncoding tokens = [({'f0':1, 'f1':1, 'f3':1}, 'A'), ({'f0':1, 'f2':1, 'f4':1}, 'B'), ({'f0':2, 'f2':1, 'f3':1, 'f4':1}, 'A')] encoding = TadmEventMaxentFeatureEncoding.train(tokens) write_tadm_file(tokens, encoding, sys.stdout) print() for i in range(encoding.length()): print('%s --> %d' % (encoding.describe(i), i)) print() if __name__ == '__main__': encoding_demo() names_demo()
def demo(): classifier = names_demo(f, binary_names_demo_features) #print (classifier.pp(depth=7)) print (classifier.pseudocode(depth=7))
def demo(): from nltk.classify.util import names_demo classifier = names_demo(NaiveBayesClassifier.train) classifier.show_most_informative_features()
def _make_probdist(self, y_proba): classes = self._encoder.classes_ return DictionaryProbDist(dict((classes[i], p) for i, p in enumerate(y_proba))) # skip doctests if scikit-learn is not installed def setup_module(module): from nose import SkipTest try: import sklearn except ImportError: raise SkipTest("scikit-learn is not installed") if __name__ == "__main__": from nltk.classify.util import names_demo, names_demo_features from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import BernoulliNB # Bernoulli Naive Bayes is designed for binary classification. We set the # binarize option to False since we know we're passing boolean features. print("scikit-learn Naive Bayes:") names_demo(SklearnClassifier(BernoulliNB(binarize=False)).train, features=names_demo_features) # The C parameter on logistic regression (MaxEnt) controls regularization. # The higher it's set, the less regularized the classifier is. print("\n\nscikit-learn logistic regression:") names_demo(SklearnClassifier(LogisticRegression(C=1000)).train, features=names_demo_features)
###################################################################### ## ## Guess an unseen name's gender! ## from nltk.classify.naivebayes import NaiveBayesClassifier from nltk.classify.util import names_demo # Feature Extraction: def name_features(name): features = {} return features # Test the classifier: classifier = names_demo(NaiveBayesClassifier.train, name_features) # Feature Analysis: #classifier.show_most_informative_features()
dtype=self._dtype) for i, fs in enumerate(featuresets): for f, v in fs.iteritems(): try: X[i, self._feature_index[f]] = self._dtype(v) except KeyError: # feature not seen in training pass return X def _make_probdist(self, y_proba): return DictionaryProbDist(dict((self._index_label[i], p) for i, p in enumerate(y_proba))) if __name__ == "__main__": from nltk.classify.util import names_demo, binary_names_demo_features try: from sklearn.linear_model.sparse import LogisticRegression except ImportError: # separate sparse LR to be removed in 0.12 from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import BernoulliNB print("scikit-learn Naive Bayes:") names_demo(SklearnClassifier(BernoulliNB(binarize=False), dtype=bool).train, features=binary_names_demo_features) print("scikit-learn logistic regression:") names_demo(SklearnClassifier(LogisticRegression(), dtype=np.float64).train, features=binary_names_demo_features)
def demo(): from nltk.classify.util import names_demo, binary_names_demo_features classifier = names_demo(DecisionTreeClassifier.train, binary_names_demo_features) print classifier.pp(depth=7)
return X def _make_probdist(self, y_proba): return DictionaryProbDist(dict((self._index_label[i], p) for i, p in enumerate(y_proba))) # skip doctests if scikit-learn is not installed def setup_module(module): from nose import SkipTest try: import sklearn except ImportError: raise SkipTest("scikit-learn is not installed") if __name__ == "__main__": from nltk.classify.util import names_demo, binary_names_demo_features from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import BernoulliNB print("scikit-learn Naive Bayes:") # Bernoulli Naive Bayes is designed for binary classification. We set the # binarize option to False since we know we're passing binary features # (when binarize=False, scikit-learn does x>0 on the feature values x). names_demo(SklearnClassifier(BernoulliNB(binarize=False), dtype=bool).train, features=binary_names_demo_features) print("scikit-learn logistic regression:") names_demo(SklearnClassifier(LogisticRegression(), dtype=np.float64).train, features=binary_names_demo_features)
# skip doctests if scikit-learn is not installed def setup_module(module): from nose import SkipTest try: import sklearn except ImportError: raise SkipTest("scikit-learn is not installed") if __name__ == "__main__": from nltk.classify.util import names_demo, names_demo_features from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import BernoulliNB # Bernoulli Naive Bayes is designed for binary classification. We set the # binarize option to False since we know we're passing boolean features. print("scikit-learn Naive Bayes:") names_demo( SklearnClassifier(BernoulliNB(binarize=False)).train, features=names_demo_features, ) # The C parameter on logistic regression (MaxEnt) controls regularization. # The higher it's set, the less regularized the classifier is. print("\n\nscikit-learn logistic regression:") names_demo( SklearnClassifier(LogisticRegression(C=1000)).train, features=names_demo_features, )
def demo(): from nltk.classify.util import names_demo print 'Generalized Iterative Scaling:' classifier = names_demo(train_maxent_classifier_with_gis) print 'Improved Iterative Scaling:' classifier = names_demo(train_maxent_classifier_with_iis)
labeled = tokens and isinstance(tokens[0], (tuple, list)) if not labeled: tokens = [(tok, None) for tok in tokens] # Data section s = '\n@DATA\n' for (tok, label) in tokens: for fname, ftype in self._features: s += '%s,' % self._fmt_arff_val(tok.get(fname)) s += '%s\n' % self._fmt_arff_val(label) return s def _fmt_arff_val(self, fval): if fval is None: return '?' elif isinstance(fval, (bool, int, long)): return '%s' % fval elif isinstance(fval, float): return '%r' % fval else: return '%r' % fval if __name__ == '__main__': from nltk.classify.util import names_demo, binary_names_demo_features def make_classifier(featuresets): return WekaClassifier.train('/tmp/name.model', featuresets, 'C4.5') classifier = names_demo(make_classifier, binary_names_demo_features)
tokens = [ ({ "f0": 1, "f1": 1, "f3": 1 }, "A"), ({ "f0": 1, "f2": 1, "f4": 1 }, "B"), ({ "f0": 2, "f2": 1, "f3": 1, "f4": 1 }, "A"), ] encoding = TadmEventMaxentFeatureEncoding.train(tokens) write_tadm_file(tokens, encoding, sys.stdout) print() for i in range(encoding.length()): print("%s --> %d" % (encoding.describe(i), i)) print() if __name__ == "__main__": encoding_demo() names_demo()