def rsstrain(cl):
    feedfilter.read('https://www.google.com/search?q=technology&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=tech%20news&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=latest%20gadgets&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=future%20technology&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=inspiring%20technology&tbm=blg&output=rss',cl, 'Technology')
    
    feedfilter.read('https://www.google.com/search?q=politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=indian%20politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=dirty%20politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=united%20states%20politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=world%20politics&tbm=blg&output=rss',cl, 'Politics')
    
    feedfilter.read('https://www.google.com/search?q=fashion&tbm=blg&output=rss',cl, 'Fashion')
Example #2
0
def rsstrain(cl):
    feedfilter.read(
        'https://www.google.com/search?q=technology&tbm=blg&output=rss', cl,
        'Technology')
    #feedfilter.read('https://www.google.com/search?q=tech%20news&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=latest%20gadgets&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=future%20technology&tbm=blg&output=rss',cl, 'Technology')
    #feedfilter.read('https://www.google.com/search?q=inspiring%20technology&tbm=blg&output=rss',cl, 'Technology')

    feedfilter.read(
        'https://www.google.com/search?q=politics&tbm=blg&output=rss', cl,
        'Politics')
    #feedfilter.read('https://www.google.com/search?q=indian%20politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=dirty%20politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=united%20states%20politics&tbm=blg&output=rss',cl, 'Politics')
    #feedfilter.read('https://www.google.com/search?q=world%20politics&tbm=blg&output=rss',cl, 'Politics')

    feedfilter.read(
        'https://www.google.com/search?q=fashion&tbm=blg&output=rss', cl,
        'Fashion')
Example #3
0
def main():
    cl=docclass.fisherclassifier(docclass.getwords) 
    cl.setdb('smajeti.db')
    print "testing the program"
    feedfilter.read('toiEntertainment.xml',cl)
Example #4
0
import feedfilter
import docclass
c=docclass.fisherclassifier(docclass.getwords) 
c.setdb('mln-f-measure.db')
feed = 'http://f-measure.blogspot.com/feeds/posts/default?max-results=100'

feedfilter.read(feed,c)
Example #5
0
import docclass
import feedfilter
cl=docclass.fisherclassifier(docclass.getwords)
cl.setdb('politics_feed2.db') # Only if you implemented SQLite
feedfilter.read('politics_search2.xml',cl)
Example #6
0
    cleantext = re.sub("\n", ' ', cleantext)
    cleantext = cleantext.strip()	
    cleantext = cleantext.replace(u'\u201c', '')
    cleantext = cleantext.replace(u'\u201d', '')
    cleantext = cleantext.replace(u'\u2019', '\'')
    cleantext = cleantext.replace('->', '')
    return _illegal_xml_chars_RE.sub(' ', cleantext)

for x in range(0,100):
  entries[x]['content'][0]['value'] = remove_tags(entries[x]['content'][0]['value'])
  entries[x]['title'] = remove_tags(entries[x]['title'])

cl=docclass.fisherclassifier(docclass.getwords)
cl.setdb('feed.db')

feedfilter.read(entries, cl)

#write results to file, for table creation
tab = open('table.txt', 'w', 0)
tab.write('Title\tClassifier\tPredicted\tActual\tcprob()\n')
for x in range(0, 50):
  tab.write(entries[x]['title'] + '\t' + entries[x]['classifier'] + '\t' + ' '+ '\t' + entries[x]['actual']+'\n')

for x in range(50, 100):
  tab.write(entries[x]['title'] + '\t' + ' ' + '\t' + entries[x]['pred']+'\t' + entries[x]['actual']+'\t'+str(entries[x]['cprob'])+'\n')

tab.close()

# compute Precision, recall, f1
# tp is labelled correctly, fn is not labelled but should have been, fp is incorrectly labelled
tp = 0
Example #7
0
import docclass
import feedfilter
cl=docclass.fisherclassifier(docclass.getwords)
cl.setdb('politics_feed.db') # Only if you implemented SQLite
feedfilter.read('politics_search.xml',cl)
Example #8
0
# -*- coding: utf-8 -*-

import docclass
import feedfilter

print '=========================================\n'
print u'Классификатор Фишера \n'
print '========================================='
c1=docclass.fisherclassifier(docclass.getwords)
c1.setdb('feeds.db')
print u'Этап 1: Обучение классификатора'
feedfilter.read('feeds_train.xml', c1)
print '\n========================================='
print u'Этап 2: Проверка работы классификатора - используется база feeds.db'
feedfilter.test('feeds_test.xml', c1)

raw_input()
Example #9
0
def main():
    cl = docclass.fisherclassifier(docclass.getwords)
    cl.setdb('bbokka.db')
    print "testing the program"
    feedfilter.read('test.xml', cl)
Example #10
0
import docclass
import feedfilter

#
# Naive Bayes classifier
#
#cl2=docclass.naivebayes(docclass.getwords)
#cl2.setdb('test2.db')
#docclass.sampletrain(cl2)

#
# Fisher classifier
#

cl=docclass.fisherclassifier(docclass.getwords)
cl.setdb('feed.db')
docclass.sampletrain(cl)
feedfilter.read('python_search.xml', cl)