Python Classifierの例

プログラミング言語: Python

名前空間/パッケージ名: docclass

クラス/型: Classifier

hotexamples.comのコード掲載数: 15

Python Classifier - 15件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdocclass.Classifierの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

incc(6)

incf(5)

train(5)

catcount(2)

classify(2)

fcount(2)

prob(2)

fprob(1)

getfeatures(1)

infc(1)

save(1)

totalcount(1)

weightedprob(1)

コード例 #1

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testTrain(self):
     c = Classifier(getwords)
     item = "Hello hello world, my name is Python."
     cat = "Good"
     c.train(item, cat)
     self.assertEqual(c.catcount("Good"), 1)
     self.assertEqual(c.fcount("hello", "Good"), 1)
     self.assertFalse(c.fc.has_key("my"))

コード例 #2

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testGetFeatures(self):
     c = Classifier(getwords)
     dict = c.getfeatures("Hello World world world ,       hello has cats and vervyveryveryveryveryverylongword")
     self.assertIsNotNone(dict)
     self.assertIsNotNone(dict["hello"])
     self.assertEqual(dict["hello"], 1)
     self.assertFalse(dict.has_key("has"))
     self.assertFalse(dict.has_key("vervyveryveryveryveryverylongword"))

コード例 #3

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testCatCount(self):
     c = Classifier(getwords)
     c.incc("Bad")
     c.incc("Bad")
     c.incc("Good")
     self.assertEqual(c.catcount("Good"), 1)
     self.assertEqual(c.catcount("Bad"), 2)

コード例 #4

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testIncC(self):
     c = Classifier(getwords)
     c.incc("Bad")
     self.assertEqual(c.cc["Bad"], 1)
     c.incc("Bad")
     self.assertEqual(c.cc["Bad"], 2)
     c.incc("Good")
     self.assertEqual(c.cc["Good"], 1)

コード例 #5

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testFCount(self):
     c = Classifier(getwords)
     c.incf("hello", "Good")
     c.incf("hello", "Good")
     c.incf("hello", "Bad")
     self.assertEqual(c.fcount("hello", "Good"), 2)
     self.assertEqual(c.fcount("hello", "Bad"), 1)
     self.assertEqual(c.fcount("wurst", "Bad"), 0)

コード例 #6

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testIncF(self):
     c = Classifier(getwords)
     c.incf("hello", "Good")
     self.assertEqual(c.fc["hello"]["Good"], 1)
     c.incf("hello", "Good")
     self.assertEqual(c.fc["hello"]["Good"], 2)
     c.incf("hello", "Bad")
     self.assertEqual(c.fc["hello"]["Bad"], 1)

コード例 #7

ファイルを表示

ファイル: test.py プロジェクト: ubear/MachineLearn

def test_infc_func():
    c = Classifier(getfeatures=None)
    c.infc("python", "good")
    c.infc("python", "good")
    c.infc("the", "bad")
    c.infc("the", "good")

    print c.fc

コード例 #8

ファイルを表示

ファイル: parseTechFeed.py プロジェクト: stefanseibert/DataMining

              'http://www.spiegel.de/schlagzeilen/tops/index.rss',
              'http://www.sueddeutsche.de/app/service/rss/alles/rss.xml'
              ]

test=["http://rss.golem.de/rss.php?r=sw&feed=RSS0.91",
          'http://newsfeed.zeit.de/politik/index',  
          'http://www.welt.de/?service=Rss'
           ]

countnews={}
countnews['tech']=0
countnews['nontech']=0
countnews['test']=0


c = Classifier(getwords, initprob=0.5)

print "--------------------News from trainTech------------------------"
for feed in trainTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['tech']+=1

        c.train(fulltext,"Tech")

print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

コード例 #9

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

    def testProb(self):
        c = Classifier(getwords)

        # training
        c.incc("Good")
        c.incf("hello", "Good")
        c.incc("Good")
        c.incf("world", "Good")
        c.incc("Good")
        c.incf("world", "Good")
        c.incc("Bad")
        c.incf("world", "Bad")

        # classify new document
        item = "world world wurst Wurst wurst world"

        self.assertEqual(c.prob(item, "Good"), 0.234375)

コード例 #10

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testWeightedProb(self):
     c = Classifier(getwords)
     c.incc("Good")
     c.incf("hello", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     c.incc("Bad")
     c.incf("world", "Bad")
     self.assertEqual(c.weightedprob("world", "Good"), 5.0/8.0)
     self.assertEqual(c.weightedprob("wurst", "Good"), 0.5)

コード例 #11

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testFProb(self):
     c = Classifier(getwords)
     c.incc("Good")
     c.incf("hello", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     c.incc("Good")
     c.incf("world", "Good")
     self.assertEqual(c.fprob("world", "Good"), 2.0/3.0)

コード例 #12

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

 def testTotalCount(self):
     c = Classifier(getwords)
     c.incc("Bad")
     c.incc("Bad")
     c.incc("Good")
     self.assertEqual(c.totalcount(), 3)

コード例 #13

ファイルを表示

ファイル: test_classifier.py プロジェクト: stefanseibert/DataMining

    def testClassifier(self):
        c = Classifier(getwords)
        c.train("nobody owns the water", "Good")
        c.train("the quick rabbit jumps fences", "Good")
        c.train("buy pharmaceuticals now", "Bad")
        c.train("make quick money at the online casino", "Bad")
        c.train("the quick brown fox jumps", "Good")
        c.train("next meeting is at night", "Good")
        c.train("meeting with your superstar", "Bad")
        c.train("money like water", "Bad")

        # added quick to the test string, because with 'money jumps' Good and Bad got the same value.
        self.assertEqual(c.classify("the money jumps quick"), "Good")

コード例 #14

ファイルを表示

ファイル: parseTechFeed.py プロジェクト: fridy77/DataMining

      fulltext=stripHTML(e.title+' '+e.description)
      print fulltext
      data.append(fulltext)
      countnews['test']+=1
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"

print 'Number of used trainings samples in categorie tech',countnews['tech']
print 'Number of used trainings samples in categorie notech',countnews['nontech']
print 'Number of used test samples',countnews['test']
print '--'*30



rss_classifier = Classifier()

for tech in train_data["good"]:
    rss_classifier.train(tech, "good")

for nontech in train_data["bad"]:
    rss_classifier.train(nontech, "bad")

print "---- training finished ---------------------"
for test in data:
    g_pb = rss_classifier.prob(test, "good")
    b_pb = rss_classifier.prob(test, "bad")
    # Normalisierung der Wahrscheinlichkeiten
    g_pb_n = g_pb /(g_pb + b_pb)
    b_pb_n = b_pb /(g_pb + b_pb)
    print test

コード例 #15

ファイルを表示

ファイル: parseTechFeed_more_categories.py プロジェクト: stefanseibert/DataMining

          'http://www.welt.de/?service=Rss',
          'http://www.haz.de/rss/feed/haz_schlagzeilen']



countnews={}
countnews['tech']=0
countnews['sports']=0
countnews['economy']=0
countnews['politics']=0
countnews['science']=0

countnews['test']=0


c = Classifier(getwords, initprob=0.5)

print "--------------------News from trainTech------------------------"
for feed in trainTech:
    f=feedparser.parse(feed)
    for e in f.entries:
        print '\n---------------------------'
        fulltext=stripHTML(e.title+' '+e.description)
        print fulltext
        countnews['tech']+=1

        c.train(fulltext,"Tech")

print "----------------------------------------------------------------"
print "----------------------------------------------------------------"
print "----------------------------------------------------------------"