def loadFeedInformationToDatabase(settings,allCats): # Put feed information in database database = FeedDatabase(settings['database']) counter = 0 size = len(allCats) for elem in allCats.keys(): sys.stderr.write('...Uploading... ({0}/{1})\n'.format(counter,size)) title = allCats[elem]['title'] guid = elem description = allCats[elem]['description'] categry = allCats[elem]['category'] database.add_feed_element(title,guid,description,categry) counter +=1 sys.stderr.write('...Finished Uploading Information to Database\n') database.close_database()
def train_classifier(settings,trainingData): counter = 0 size = len(trainingData) database = FeedDatabase(settings['database']) for key in trainingData.keys(): database.change_classified(key,classified=True) database.close_database() classifier = fisherclassifier(getwords) classifier.setdb(settings['database']) for key in trainingData.keys(): sys.stderr.write('...Training ({0}/{1})...\n'.format(counter,size)) classifier.train(trainingData[key]['description'],trainingData[key]['category']) counter +=1 sys.stderr.write('...Finished Training Classifier\n')
def classifyEntries(settings): database = FeedDatabase(settings['database']) unclassifiedEntries = database.get_unpredicted_entries() #for i in unclassifiedEntries: # print(i) #print(len(unclassifiedEntries)) database.close_database() classifier = fisherclassifier(getwords) classifier.setdb(settings['database']) counter = 0 size = len(unclassifiedEntries) results = [] for entr in unclassifiedEntries: a = open('script50.txt','w+') for i in results: a.write('{0}|{1}\n'.format(i['guid'],i['category'])) a.close() category = classifier.classify(entr['description']) #print('{0}|{1}'.format(entr['guid'],category)) results.append({'guid':entr['guid'],'category':category}) counter += 1 sys.stderr.write('...Classified {0} of {1} entries\n'.format(counter,size))