Example #1
0
def test():
    """
    Run tests on the implementation of the naive Bayes classifier. 
    The tests are going to be ran on instances 20-25 from both the train and test sets of the contest agent.
    Passing this test is a very good (however not a perfect) indicator that your code is correct.
    """
    train_path = os.path.join('classifier_data', 'contest_training.tsv')
    test_path = os.path.join('classifier_data', 'contest_test.tsv')
    smoothing = [0, 1]
    logtransform = {
        0: [True, False],
        1: [True]
    }
    
    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    
    for s in smoothing:
        for lt in logtransform[s]:
            classifierArgs = {'smoothing':s, 'logTransform':lt}
            classifierArgs['legalLabels'] = labels 
            if s:
                featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
                classifierArgs['featureValues'] = featureValues

            # train on train set
            classifier = NaiveBayesClassifier(**classifierArgs)
            classifier.fit(trainData, trainLabels)
            
            # evaluate on train set
            trainPredictions = classifier.predict(trainData)
            evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)
            staticOutputCheck(train_path, s, lt, classifier.posteriors[20:25])

            # evaluate on test set
            testPredictions = classifier.predict(testData)
            evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
            staticOutputCheck(test_path, s, lt, classifier.posteriors[20:25])
Example #2
0
def runClassifier(train_path, test_path, smoothing, logtransform):
    classifierArgs = {'smoothing':smoothing, 'logTransform':logtransform}

    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    classifierArgs['legalLabels'] = labels 

    if smoothing:
        featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
        classifierArgs['featureValues'] = featureValues

    # train the actual model
    classifier = NaiveBayesClassifier(**classifierArgs)
    classifier.fit(trainData, trainLabels)
    
    trainPredictions = classifier.predict(trainData)
    evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)

    testPredictions = classifier.predict(testData)
    evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
Example #3
0
def test():
    """
    Run tests on the implementation of the naive Bayes classifier. 
    The tests are going to be ran on instances 20-25 from both the train and test sets of the contest agent.
    Passing this test is a very good (however not a perfect) indicator that your code is correct.
    """
    train_path = os.path.join('classifier_data', 'contest_training.tsv')
    test_path = os.path.join('classifier_data', 'contest_test.tsv')
    smoothing = [0, 1]
    logtransform = {
        0: [True, False],
        1: [True]
    }
    
    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    
    for s in smoothing:
        for lt in logtransform[s]:
            classifierArgs = {'smoothing':s, 'logTransform':lt}
            classifierArgs['legalLabels'] = labels 
            if s:
                featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
                classifierArgs['featureValues'] = featureValues

            # train on train set
            classifier = NaiveBayesClassifier(**classifierArgs)
            classifier.fit(trainData, trainLabels)
            
            # evaluate on train set
            trainPredictions = classifier.predict(trainData)
            evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)
            staticOutputCheck(train_path, s, lt, classifier.posteriors[20:25])

            # evaluate on test set
            testPredictions = classifier.predict(testData)
            evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
            staticOutputCheck(test_path, s, lt, classifier.posteriors[20:25])
Example #4
0
def runClassifier(train_path, test_path, smoothing, logtransform):
    classifierArgs = {'smoothing':smoothing, 'logTransform':logtransform}

    trainData, trainLabels, trainFeatures, = loadDataset(train_path)
    testData, testLabels, testFeatures = loadDataset(test_path)
    
    labels = set(trainLabels) | set(testLabels)
    classifierArgs['legalLabels'] = labels 

    if smoothing:
        featureValues = mergeFeatureValues(trainFeatures, testFeatures) 
        classifierArgs['featureValues'] = featureValues

    # train the actual model
    classifier = NaiveBayesClassifier(**classifierArgs)
    classifier.fit(trainData, trainLabels)
    
    trainPredictions = classifier.predict(trainData)
    evaluateClassifier(trainPredictions, trainLabels, 'train', classifier.k)

    testPredictions = classifier.predict(testData)
    evaluateClassifier(testPredictions, testLabels, 'test', classifier.k)
Example #5
0
        economy_messages.append(Message(str(i), is_spam=False))
    for i in health:
        economy_messages.append(Message(str(i), is_spam=False))

    health_messages = [
        Message(str(i), is_spam=True) for i in health]
    for i in sports:
        health_messages.append(Message(str(i), is_spam=False))
    for i in economy:
        health_messages.append(Message(str(i), is_spam=False))
    for i in politics:
        health_messages.append(Message(str(i), is_spam=False))
    # print (sport_messages)


    sport = NaiveBayesClassifier(k=0.5)
    sport.train(sport_messages)
    politics = NaiveBayesClassifier(k=0.5)
    politics.train(politics_messages)
    economy = NaiveBayesClassifier(k=0.5)
    economy.train(economy_messages)
    health = NaiveBayesClassifier(k=0.5)
    health.train(health_messages)

    url = input("Jepni linkun per te shikuar llojin e lajmit: ")
    html = requests.get(url).text
    soup=BeautifulSoup(html,'html.parser')
    text = ""

    for item in soup.find('div', class_='article-heading').find_all('h1'):
        text += str(item)
Example #6
0
            continue
        politics.append(i)
    sports = list(set(sports))
    politics = list(set(politics))

    sports_list = np.setdiff1d(sports,politics)
    politics_list = np.setdiff1d(politics,sports)
    same_words = set(sports).intersection(set(politics))

    sport_messages = [
        Message(str(i), is_spam=True) for i in sports_list]
    for i in politics_list:
        sport_messages.append(Message(str(i), is_spam=False))
    # print (sport_messages)

    sport = NaiveBayesClassifier(k=0.5)
    sport.train(sport_messages)

    politics_messages = [
        Message(str(i), is_spam=True) for i in politics_list]
    for i in sports_list:
        politics_messages.append(Message(str(i), is_spam=False))
    # print (sport_messages)

    politics = NaiveBayesClassifier(k=0.5)
    politics.train(politics_messages)

    url = input("Jepni linkun per te shikuar llojin e lajmit: ")
    html = requests.get(url).text
    soup=BeautifulSoup(html,'html.parser')
    text = ""