コード例 #1
0
ファイル: file.py プロジェクト: artscoop/scoop
 def get_corpus(self):
     """ Lire et peupler le corpus """
     if self.corpus is None:
         self.corpus = Dictionary()
         self.corpus.updated = time.time()
         try:
             directory = Paths.get_root_dir(*CORPUS_PATH)
             infile = '{name}.csv'.format(name=self.pathname)
             path = join(directory,
                         '{name}.csv.zip'.format(name=self.pathname))
             # Lire le CSV dans le fichier zip
             with ZipFile(open(path, 'rb')) as zipfile:
                 buffer = StringIO(zipfile.read(infile))
                 reader = csv.reader(buffer)
                 for row in reader:
                     # 0: category, 1: doc, 2: hash
                     self.corpus[row[2]] = (row[0], row[1])
         except IOError:
             pass
     if self.corpus_shadow is None or self.corpus_shadow.updated < self.corpus.updated:
         self.corpus_shadow = List(self.corpus.values())
         self.corpus_shadow.updated = time.time()
         self.classifier = MaxEntClassifier(
             self.corpus_shadow,
             feature_extractor=extractor_base)  # ou NaiveBayesClassifier
     return self.corpus_shadow
コード例 #2
0
def add_global_hook():
    tweets = TweetBank(50)
    train, test = tweets.data_set()

    naive_bayes = NaiveBayesClassifier(train)
    maxent = MaxEntClassifier(train)
    classifier_dictionary = {"Naive Bayes": naive_bayes, "Maxent": maxent}
    g = web.storage({
        "classifier_dictionary": classifier_dictionary,
        "test_set": test
    })

    def _wrapper(handler):
        web.ctx.globals = g
        return handler()

    return _wrapper
コード例 #3
0
 def __init__(self, data):
     # self._classifier = NaiveBayesClassifier(data)
     self._classifier = MaxEntClassifier(data)
コード例 #4
0
 def setUp(self):
     self.classifier = MaxEntClassifier(train_set)
コード例 #5
0
print(len(words), len(tags))

for i in range(1000):
    if (i < 800):
        temp = (words[i], tags[i])
        train.append(temp)
    else:
        temp = (words[i], tags[i])
        test.append(temp)
print(train)
print(test)

naive = NaiveBayesClassifier(train)
dtc = DecisionTreeClassifier(train)
mec = MaxEntClassifier(train)

print("NaiveBayesClassifier Accuracy: {0}".format(naive.accuracy(test)))
print("DecisionTreeClassifier Accuracy: {0}".format(dtc.accuracy(test)))
print("MaxEntClassifier Accuracy: {0}".format(mec.accuracy(test)))

cl = NaiveBayesClassifier(train)
print("NaiveBayesClassifier Accuracy: {0}".format(cl.accuracy(test)))
for i in range(0, len(test)):
    tag = cl.classify(test[i])
    pred_tags.append(tag)
    if (tag == test_tags[i]):
        count += 1
print(len(pred_tags), len(test_tags))
print(count)
コード例 #6
0
def search_department(job, train):
    cl_depart = MaxEntClassifier(train)
    prob_dist = cl_depart.prob_classify(job)
    print(prob_dist.max())
    return prob_dist.max()
コード例 #7
0
#     trains.append(train[i])

trains = train

if choice == "1":
    print("\n" + "#NaiveBayesClassifier")
    cl1 = NaiveBayesClassifier(trains)
    print("Classifier: Naive Bayes -- Accuracy: ", cl1.accuracy(test), "\n")

elif choice == "2":
    print("\n" + "#DecisionTreeClassifier")
    cl2 = DecisionTreeClassifier(trains)
    print("Classifier: Decision Tree -- Accuracy: ", cl2.accuracy(test), "\n")

elif choice == "3":
    print("\n" + "#MaxEntClassifier")
    cl3 = MaxEntClassifier(trains)
    print("Classifier: Maximum Entropy -- Accuracy: ", cl3.accuracy(test),
          "\n")

elif choice == "4":
    print("\n" + "#NLTKClassifier")
    cl4 = NLTKClassifier(trains)
    print("Classifier: NLTK -- Accuracy: ", cl4.accuracy(test), "\n")

else:
    print("Bad input!")

# most repeated words (most important properties)
totalDictPosSorted = sorted(totalDictPos.items(), key=operator.itemgetter(1))
totalDictNegSorted = sorted(totalDictNeg.items(), key=operator.itemgetter(1))
コード例 #8
0
from textblob.classifiers import MaxEntClassifier

with open('data/train-toy.csv', 'r') as fp:
    cl = MaxEntClassifier(fp, format="csv")

with open('data/test-toy.csv', 'r') as gp:
    print cl.accuracy(gp, format="csv")
コード例 #9
0
print('Before pre-processing \n')
cl = DecisionTreeClassifier(training_array)
classify_review(cl)
print('\n After removing stop-words \n')
cl = DecisionTreeClassifier(training_array_without_sw)
classify_review(cl)
print('\n After stemming \n')
cl = DecisionTreeClassifier(training_array_stemmed_without_sw)
classify_review(cl)
print('\n ************ NaiveBayesClassifier ********************\n')
print('Before pre-processing\n')
cl = NaiveBayesClassifier(training_array)
classify_review(cl)
print('\n After removing stop-words \n')
cl = NaiveBayesClassifier(training_array_without_sw)
classify_review(cl)
print('\n After stemming \n')
cl = NaiveBayesClassifier(training_array_stemmed_without_sw)
classify_review(cl)

print('\n ************ MaxEntClassifier ********************\n')
cl= MaxEntClassifier(training_array)
print('Before pre-processing\n')
classify_review(cl)
print('\n After removing stop-words \n')
cl = MaxEntClassifier(training_array_without_sw)
classify_review(cl)
print('\n After stemming \n')
cl = MaxEntClassifier(training_array_stemmed_without_sw)
classify_review(cl)