Esempio n. 1
0
def demo():
    from nltk.classify.util import names_demo, binary_names_demo_features
    classifier = names_demo(
        f,  #DecisionTreeClassifier.train,
        binary_names_demo_features)
    print(classifier.pp(depth=7))
    print(classifier.pseudocode(depth=7))
Esempio n. 2
0
def demo():
    from nltk.classify.util import names_demo, binary_names_demo_features

    classifier = names_demo(
        f, binary_names_demo_features  # DecisionTreeClassifier.train,
    )
    print(classifier.pp(depth=7))
    print(classifier.pseudocode(depth=7))
Esempio n. 3
0
def demo():
    from nltk.classify.util import binary_names_demo_features, names_demo

    classifier = names_demo(
        f,
        binary_names_demo_features  # DecisionTreeClassifier.train,
    )
    print(classifier.pretty_format(depth=7))
    print(classifier.pseudocode(depth=7))
Esempio n. 4
0
def demo():
    from nltk.classify.util import names_demo
    classifier = names_demo(NaiveBayesClassifier.train)
    classifier.show_most_informative_features()
    save_classifier = open("naivebayes.pickle", "wb")
    pickle.dump(classifier, save_classifier)
    save_classifier.close()
    classifier_f = pickle.load(open("naivebayes.pickle", "rb"))
    classifier_f.show_most_informative_features()
Esempio n. 5
0
def names_demo():
    from nltk.classify.util import names_demo
    from nltk.classify.maxent import TadmMaxentClassifier
    classifier = names_demo(TadmMaxentClassifier.train)
Esempio n. 6
0
    p = subprocess.Popen(cmd, stdout=sys.stdout)
    (stdout, stderr) = p.communicate()

    # Check the return code.
    if p.returncode != 0:
        print()
        print(stderr)
        raise OSError('tadm command failed!')

def names_demo():
    from nltk.classify.util import names_demo
    from nltk.classify.maxent import TadmMaxentClassifier
    classifier = names_demo(TadmMaxentClassifier.train)

def encoding_demo():
    import sys
    from nltk.classify.maxent import TadmEventMaxentFeatureEncoding
    tokens = [({'f0':1, 'f1':1, 'f3':1}, 'A'),
              ({'f0':1, 'f2':1, 'f4':1}, 'B'),
              ({'f0':2, 'f2':1, 'f3':1, 'f4':1}, 'A')]
    encoding = TadmEventMaxentFeatureEncoding.train(tokens)
    write_tadm_file(tokens, encoding, sys.stdout)
    print()
    for i in range(encoding.length()):
        print('%s --> %d' % (encoding.describe(i), i))
    print()

if __name__ == '__main__':
    encoding_demo()
    names_demo()
Esempio n. 7
0
def names_demo():
    from nltk.classify.util import names_demo
    from nltk.classify.maxent import TadmMaxentClassifier

    classifier = names_demo(TadmMaxentClassifier.train)
def demo():
	classifier = names_demo(f, binary_names_demo_features)
	#print (classifier.pp(depth=7))
	print (classifier.pseudocode(depth=7))
def demo():
    from nltk.classify.util import names_demo
    classifier = names_demo(NaiveBayesClassifier.train)
    classifier.show_most_informative_features()
Esempio n. 10
0
    def _make_probdist(self, y_proba):
        classes = self._encoder.classes_
        return DictionaryProbDist(dict((classes[i], p)
                                       for i, p in enumerate(y_proba)))


# skip doctests if scikit-learn is not installed
def setup_module(module):
    from nose import SkipTest
    try:
        import sklearn
    except ImportError:
        raise SkipTest("scikit-learn is not installed")

if __name__ == "__main__":
    from nltk.classify.util import names_demo, names_demo_features
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import BernoulliNB

    # Bernoulli Naive Bayes is designed for binary classification. We set the
    # binarize option to False since we know we're passing boolean features.
    print("scikit-learn Naive Bayes:")
    names_demo(SklearnClassifier(BernoulliNB(binarize=False)).train,
               features=names_demo_features)

    # The C parameter on logistic regression (MaxEnt) controls regularization.
    # The higher it's set, the less regularized the classifier is.
    print("\n\nscikit-learn logistic regression:")
    names_demo(SklearnClassifier(LogisticRegression(C=1000)).train,
               features=names_demo_features)
Esempio n. 11
0
######################################################################
##
##  Guess an unseen name's gender!
##

from nltk.classify.naivebayes import NaiveBayesClassifier
from nltk.classify.util import names_demo


# Feature Extraction:
def name_features(name):
    features = {}
    return features


# Test the classifier:
classifier = names_demo(NaiveBayesClassifier.train, name_features)

# Feature Analysis:
#classifier.show_most_informative_features()
Esempio n. 12
0
######################################################################
##
##  Guess an unseen name's gender!
##

from nltk.classify.naivebayes import NaiveBayesClassifier
from nltk.classify.util import names_demo

# Feature Extraction:
def name_features(name):
    features = {}
    return features

# Test the classifier:
classifier = names_demo(NaiveBayesClassifier.train, name_features)

# Feature Analysis:
#classifier.show_most_informative_features()
Esempio n. 13
0
                  dtype=self._dtype)

        for i, fs in enumerate(featuresets):
            for f, v in fs.iteritems():
                try:
                    X[i, self._feature_index[f]] = self._dtype(v)
                except KeyError:    # feature not seen in training
                    pass

        return X

    def _make_probdist(self, y_proba):
        return DictionaryProbDist(dict((self._index_label[i], p)
                                       for i, p in enumerate(y_proba)))


if __name__ == "__main__":
    from nltk.classify.util import names_demo, binary_names_demo_features
    try:
        from sklearn.linear_model.sparse import LogisticRegression
    except ImportError:     # separate sparse LR to be removed in 0.12
        from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import BernoulliNB

    print("scikit-learn Naive Bayes:")
    names_demo(SklearnClassifier(BernoulliNB(binarize=False), dtype=bool).train,
               features=binary_names_demo_features)
    print("scikit-learn logistic regression:")
    names_demo(SklearnClassifier(LogisticRegression(), dtype=np.float64).train,
               features=binary_names_demo_features)
Esempio n. 14
0
def demo():
    from nltk.classify.util import names_demo, binary_names_demo_features

    classifier = names_demo(DecisionTreeClassifier.train, binary_names_demo_features)
    print classifier.pp(depth=7)
Esempio n. 15
0
        return X

    def _make_probdist(self, y_proba):
        return DictionaryProbDist(dict((self._index_label[i], p)
                                       for i, p in enumerate(y_proba)))


# skip doctests if scikit-learn is not installed
def setup_module(module):
    from nose import SkipTest
    try:
        import sklearn
    except ImportError:
        raise SkipTest("scikit-learn is not installed")

if __name__ == "__main__":
    from nltk.classify.util import names_demo, binary_names_demo_features
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import BernoulliNB

    print("scikit-learn Naive Bayes:")
    # Bernoulli Naive Bayes is designed for binary classification. We set the
    # binarize option to False since we know we're passing binary features
    # (when binarize=False, scikit-learn does x>0 on the feature values x).
    names_demo(SklearnClassifier(BernoulliNB(binarize=False), dtype=bool).train,
               features=binary_names_demo_features)
    print("scikit-learn logistic regression:")
    names_demo(SklearnClassifier(LogisticRegression(), dtype=np.float64).train,
               features=binary_names_demo_features)
Esempio n. 16
0
# skip doctests if scikit-learn is not installed
def setup_module(module):
    from nose import SkipTest

    try:
        import sklearn
    except ImportError:
        raise SkipTest("scikit-learn is not installed")


if __name__ == "__main__":
    from nltk.classify.util import names_demo, names_demo_features
    from sklearn.linear_model import LogisticRegression
    from sklearn.naive_bayes import BernoulliNB

    # Bernoulli Naive Bayes is designed for binary classification. We set the
    # binarize option to False since we know we're passing boolean features.
    print("scikit-learn Naive Bayes:")
    names_demo(
        SklearnClassifier(BernoulliNB(binarize=False)).train,
        features=names_demo_features,
    )

    # The C parameter on logistic regression (MaxEnt) controls regularization.
    # The higher it's set, the less regularized the classifier is.
    print("\n\nscikit-learn logistic regression:")
    names_demo(
        SklearnClassifier(LogisticRegression(C=1000)).train,
        features=names_demo_features,
    )
Esempio n. 17
0
def demo():
    from nltk.classify.util import names_demo
    print 'Generalized Iterative Scaling:'
    classifier = names_demo(train_maxent_classifier_with_gis)
    print 'Improved Iterative Scaling:'
    classifier = names_demo(train_maxent_classifier_with_iis)
Esempio n. 18
0
            labeled = tokens and isinstance(tokens[0], (tuple, list))
        if not labeled:
            tokens = [(tok, None) for tok in tokens]
    
        # Data section
        s = '\n@DATA\n'
        for (tok, label) in tokens:
            for fname, ftype in self._features:
                s += '%s,' % self._fmt_arff_val(tok.get(fname))
            s += '%s\n' % self._fmt_arff_val(label)
    
        return s

    def _fmt_arff_val(self, fval):
        if fval is None:
            return '?'
        elif isinstance(fval, (bool, int, long)):
            return '%s' % fval
        elif isinstance(fval, float):
            return '%r' % fval
        else:
            return '%r' % fval


if __name__ == '__main__':
    from nltk.classify.util import names_demo, binary_names_demo_features
    def make_classifier(featuresets):
        return WekaClassifier.train('/tmp/name.model', featuresets,
                                    'C4.5')
    classifier = names_demo(make_classifier, binary_names_demo_features)
Esempio n. 19
0
    tokens = [
        ({
            "f0": 1,
            "f1": 1,
            "f3": 1
        }, "A"),
        ({
            "f0": 1,
            "f2": 1,
            "f4": 1
        }, "B"),
        ({
            "f0": 2,
            "f2": 1,
            "f3": 1,
            "f4": 1
        }, "A"),
    ]
    encoding = TadmEventMaxentFeatureEncoding.train(tokens)
    write_tadm_file(tokens, encoding, sys.stdout)
    print()
    for i in range(encoding.length()):
        print("%s --> %d" % (encoding.describe(i), i))
    print()


if __name__ == "__main__":
    encoding_demo()
    names_demo()
Esempio n. 20
0
def demo():
    from nltk.classify.util import names_demo
    classifier = names_demo(NaiveBayesClassifier.train)
    classifier.show_most_informative_features()
Esempio n. 21
0
def demo():
    from nltk.classify.util import names_demo
    print 'Generalized Iterative Scaling:'
    classifier = names_demo(train_maxent_classifier_with_gis)
    print 'Improved Iterative Scaling:'
    classifier = names_demo(train_maxent_classifier_with_iis)