def test(): """ Run tests on the implementation of the naive Bayes classifier. The tests are going to be ran on instances 20-25 from both the train and test sets of the contest agent. Passing this test is a very good (however not a perfect) indicator that your code is correct. """ train_path = os.path.join('classifier_data', 'contest_training.tsv') test_path = os.path.join('classifier_data', 'contest_test.tsv') smoothing = [0, 1] logtransform = { 0: [True, False], 1: [True] } trainData, trainLabels, trainFeatures, = loadDataset(train_path) testData, testLabels, testFeatures = loadDataset(test_path) labels = set(trainLabels) | set(testLabels) for s in smoothing: for lt in logtransform[s]: classifierArgs = {'smoothing':s, 'logTransform':lt} classifierArgs['legalLabels'] = labels if s: featureValues = mergeFeatureValues(trainFeatures, testFeatures) classifierArgs['featureValues'] = featureValues # train on train set classifier = NaiveBayesClassifier(**classifierArgs) classifier.fit(trainData, trainLabels) # evaluate on train set trainPredictions = classifier.predict(trainData) evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k) staticOutputCheck(train_path, s, lt, classifier.posteriors[20:25]) # evaluate on test set testPredictions = classifier.predict(testData) evaluateClassifier(testPredictions, testLabels, 'test', classifier.k) staticOutputCheck(test_path, s, lt, classifier.posteriors[20:25])
def runClassifier(train_path, test_path, smoothing, logtransform): classifierArgs = {'smoothing':smoothing, 'logTransform':logtransform} trainData, trainLabels, trainFeatures, = loadDataset(train_path) testData, testLabels, testFeatures = loadDataset(test_path) labels = set(trainLabels) | set(testLabels) classifierArgs['legalLabels'] = labels if smoothing: featureValues = mergeFeatureValues(trainFeatures, testFeatures) classifierArgs['featureValues'] = featureValues # train the actual model classifier = NaiveBayesClassifier(**classifierArgs) classifier.fit(trainData, trainLabels) trainPredictions = classifier.predict(trainData) evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k) testPredictions = classifier.predict(testData) evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
economy_messages.append(Message(str(i), is_spam=False)) for i in health: economy_messages.append(Message(str(i), is_spam=False)) health_messages = [ Message(str(i), is_spam=True) for i in health] for i in sports: health_messages.append(Message(str(i), is_spam=False)) for i in economy: health_messages.append(Message(str(i), is_spam=False)) for i in politics: health_messages.append(Message(str(i), is_spam=False)) # print (sport_messages) sport = NaiveBayesClassifier(k=0.5) sport.train(sport_messages) politics = NaiveBayesClassifier(k=0.5) politics.train(politics_messages) economy = NaiveBayesClassifier(k=0.5) economy.train(economy_messages) health = NaiveBayesClassifier(k=0.5) health.train(health_messages) url = input("Jepni linkun per te shikuar llojin e lajmit: ") html = requests.get(url).text soup=BeautifulSoup(html,'html.parser') text = "" for item in soup.find('div', class_='article-heading').find_all('h1'): text += str(item)
continue politics.append(i) sports = list(set(sports)) politics = list(set(politics)) sports_list = np.setdiff1d(sports,politics) politics_list = np.setdiff1d(politics,sports) same_words = set(sports).intersection(set(politics)) sport_messages = [ Message(str(i), is_spam=True) for i in sports_list] for i in politics_list: sport_messages.append(Message(str(i), is_spam=False)) # print (sport_messages) sport = NaiveBayesClassifier(k=0.5) sport.train(sport_messages) politics_messages = [ Message(str(i), is_spam=True) for i in politics_list] for i in sports_list: politics_messages.append(Message(str(i), is_spam=False)) # print (sport_messages) politics = NaiveBayesClassifier(k=0.5) politics.train(politics_messages) url = input("Jepni linkun per te shikuar llojin e lajmit: ") html = requests.get(url).text soup=BeautifulSoup(html,'html.parser') text = ""