def loadFeedInformationToDatabase(settings,allCats):
	# Put feed information in database

	database = FeedDatabase(settings['database'])
	counter = 0
	size = len(allCats)

	for elem in allCats.keys():
		sys.stderr.write('...Uploading... ({0}/{1})\n'.format(counter,size))
		title = allCats[elem]['title']
		guid = elem
		description = allCats[elem]['description']
		categry = allCats[elem]['category']

		database.add_feed_element(title,guid,description,categry)
		counter +=1 
	sys.stderr.write('...Finished Uploading Information to Database\n')
	database.close_database()
def train_classifier(settings,trainingData):
	
	counter = 0
	size = len(trainingData)
	database = FeedDatabase(settings['database'])

	for key in trainingData.keys():
		database.change_classified(key,classified=True)
	database.close_database()


	classifier = fisherclassifier(getwords)
	classifier.setdb(settings['database'])
	for key in trainingData.keys():
		sys.stderr.write('...Training ({0}/{1})...\n'.format(counter,size))
		classifier.train(trainingData[key]['description'],trainingData[key]['category'])
		counter +=1
	sys.stderr.write('...Finished Training Classifier\n')
Exemple #3
0
def classifyEntries(settings):
	database = FeedDatabase(settings['database'])
	unclassifiedEntries = database.get_unpredicted_entries()
	#for i in unclassifiedEntries:
	#	print(i)
	#print(len(unclassifiedEntries))
	database.close_database()

	classifier = fisherclassifier(getwords)
	classifier.setdb(settings['database'])
	counter = 0
	size = len(unclassifiedEntries)
	results = []
	for entr in unclassifiedEntries:
		a = open('script50.txt','w+')
		for i in results:
			a.write('{0}|{1}\n'.format(i['guid'],i['category']))
		a.close()
		category = classifier.classify(entr['description'])
		#print('{0}|{1}'.format(entr['guid'],category))
		results.append({'guid':entr['guid'],'category':category})

		counter += 1 
		sys.stderr.write('...Classified {0} of {1} entries\n'.format(counter,size))