def rsstrain(cl): feedfilter.read('https://www.google.com/search?q=technology&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=tech%20news&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=latest%20gadgets&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=future%20technology&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=inspiring%20technology&tbm=blg&output=rss',cl, 'Technology') feedfilter.read('https://www.google.com/search?q=politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=indian%20politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=dirty%20politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=united%20states%20politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=world%20politics&tbm=blg&output=rss',cl, 'Politics') feedfilter.read('https://www.google.com/search?q=fashion&tbm=blg&output=rss',cl, 'Fashion')
def rsstrain(cl): feedfilter.read( 'https://www.google.com/search?q=technology&tbm=blg&output=rss', cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=tech%20news&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=latest%20gadgets&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=future%20technology&tbm=blg&output=rss',cl, 'Technology') #feedfilter.read('https://www.google.com/search?q=inspiring%20technology&tbm=blg&output=rss',cl, 'Technology') feedfilter.read( 'https://www.google.com/search?q=politics&tbm=blg&output=rss', cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=indian%20politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=dirty%20politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=united%20states%20politics&tbm=blg&output=rss',cl, 'Politics') #feedfilter.read('https://www.google.com/search?q=world%20politics&tbm=blg&output=rss',cl, 'Politics') feedfilter.read( 'https://www.google.com/search?q=fashion&tbm=blg&output=rss', cl, 'Fashion')
def main(): cl=docclass.fisherclassifier(docclass.getwords) cl.setdb('smajeti.db') print "testing the program" feedfilter.read('toiEntertainment.xml',cl)
import feedfilter import docclass c=docclass.fisherclassifier(docclass.getwords) c.setdb('mln-f-measure.db') feed = 'http://f-measure.blogspot.com/feeds/posts/default?max-results=100' feedfilter.read(feed,c)
import docclass import feedfilter cl=docclass.fisherclassifier(docclass.getwords) cl.setdb('politics_feed2.db') # Only if you implemented SQLite feedfilter.read('politics_search2.xml',cl)
cleantext = re.sub("\n", ' ', cleantext) cleantext = cleantext.strip() cleantext = cleantext.replace(u'\u201c', '') cleantext = cleantext.replace(u'\u201d', '') cleantext = cleantext.replace(u'\u2019', '\'') cleantext = cleantext.replace('->', '') return _illegal_xml_chars_RE.sub(' ', cleantext) for x in range(0,100): entries[x]['content'][0]['value'] = remove_tags(entries[x]['content'][0]['value']) entries[x]['title'] = remove_tags(entries[x]['title']) cl=docclass.fisherclassifier(docclass.getwords) cl.setdb('feed.db') feedfilter.read(entries, cl) #write results to file, for table creation tab = open('table.txt', 'w', 0) tab.write('Title\tClassifier\tPredicted\tActual\tcprob()\n') for x in range(0, 50): tab.write(entries[x]['title'] + '\t' + entries[x]['classifier'] + '\t' + ' '+ '\t' + entries[x]['actual']+'\n') for x in range(50, 100): tab.write(entries[x]['title'] + '\t' + ' ' + '\t' + entries[x]['pred']+'\t' + entries[x]['actual']+'\t'+str(entries[x]['cprob'])+'\n') tab.close() # compute Precision, recall, f1 # tp is labelled correctly, fn is not labelled but should have been, fp is incorrectly labelled tp = 0
import docclass import feedfilter cl=docclass.fisherclassifier(docclass.getwords) cl.setdb('politics_feed.db') # Only if you implemented SQLite feedfilter.read('politics_search.xml',cl)
# -*- coding: utf-8 -*- import docclass import feedfilter print '=========================================\n' print u'Классификатор Фишера \n' print '=========================================' c1=docclass.fisherclassifier(docclass.getwords) c1.setdb('feeds.db') print u'Этап 1: Обучение классификатора' feedfilter.read('feeds_train.xml', c1) print '\n=========================================' print u'Этап 2: Проверка работы классификатора - используется база feeds.db' feedfilter.test('feeds_test.xml', c1) raw_input()
def main(): cl = docclass.fisherclassifier(docclass.getwords) cl.setdb('bbokka.db') print "testing the program" feedfilter.read('test.xml', cl)
import docclass import feedfilter # # Naive Bayes classifier # #cl2=docclass.naivebayes(docclass.getwords) #cl2.setdb('test2.db') #docclass.sampletrain(cl2) # # Fisher classifier # cl=docclass.fisherclassifier(docclass.getwords) cl.setdb('feed.db') docclass.sampletrain(cl) feedfilter.read('python_search.xml', cl)