Beispiel #1
0
    def Running_Algorithm(self, Path, Algorithm, train, test):
        '''
        running the given algorithm for the train and test files and return the results .
        :param Path: the path of the files
        :param Algorithm: the wanted algorithm
        :param train: the train data frame
        :param test: the test data frame
        :return: a dict that contain the results for the train and the test files.
        '''
        if Algorithm == 'naive bayes classifier (our)':
            prediction_train = NaiveBayesClassifier.Testing_model(
                Path, self.Load_Model(Path), train)
            prediction_test = NaiveBayesClassifier.Testing_model(
                Path, self.Load_Model(Path), test)

        if Algorithm == 'ID3 (our)':
            prediction_train = ID3.Testing_model(Path, self.Load_Model(Path),
                                                 train)
            prediction_test = ID3.Testing_model(Path, self.Load_Model(Path),
                                                test)

        train_targets = train['class'].tolist()
        test_targets = test['class'].tolist()

        train_results = {}
        train_results['Confusion Matrix'] = list(
            metrics.confusion_matrix(train_targets, prediction_train))
        report = metrics.classification_report(
            train_targets, prediction_train, output_dict=True)['weighted avg']
        train_results['Accuracy'] = accuracy_score(train_targets,
                                                   prediction_train)
        train_results['Precision'] = report['precision']
        train_results['Recall'] = report['recall']
        train_results['F-measure'] = report['f1-score']

        test_results = {}
        test_results['Confusion Matrix'] = list(
            metrics.confusion_matrix(test_targets, prediction_test))
        report = metrics.classification_report(
            test_targets, prediction_test, output_dict=True)['weighted avg']
        test_results['Accuracy'] = accuracy_score(test_targets,
                                                  prediction_test)
        test_results['Precision'] = report['precision']
        test_results['Recall'] = report['recall']
        test_results['F-measure'] = report['f1-score']

        return {'train': train_results, 'test': test_results}
Beispiel #2
0
def getModel(trainData, modelName):
    if modelName == "naive":
        return NaiveBayesClassifier(trainData)
    elif modelName == "logreg":
        return LogisticRegressionClassifier(trainData)
    elif modelName == "tree":
        return DecisionTreeClassifier(trainData)
    else:
        printUsage()
Beispiel #3
0
 def buildClick(self):
     if self.checkValidPath() and self.checkValidBins(
     ) and self.check_not_empty_files():
         self._nbc = NaiveBayesClassifier.NaiveBayesClassifier(
             str(self.Path_Entry.get()), self.bins)
         self._nbc.load_train_data_frame()
         self.BuildPassed = True
         tkMessageBox.showinfo(
             "Naive Bayes Classifier",
             "Building classifier using train-set is done!")
     else:
         self.BuildPassed = False
Beispiel #4
0
    def Build_Model(self, Path, Algorithm, train):
        '''

        :param Path: the path of the files
        :param Algorithm: which algorithm we want to build the model for it
        :param train: data frame that we want to build the model for it.
        :return: a model
        '''
        if Algorithm == 'naive bayes classifier (our)':
            return NaiveBayesClassifier.NaiveBayesClassifier(Path, train)

        if Algorithm == 'ID3 (our)':
            return ID3.ID3(Path, train)
class LanguageDetector(yaml.YAMLObject):
    yaml_tag = u"!LanguageDetector"

    """
    Gven a set of sentences in multiple languages,
    build a classifier to detect the majority language.
    """

    def __init__(self, options):

        options = dict({"ngram_size": 3}.items() + options.items())
        self.classifier = NaiveBayesClassifier(2)
        self.ngram_size = options["ngram_size"]

    """
    def initialize(options = {})
    options = {:ngram_size => 3}.merge(options)
    @ngram_size = options[:ngram_size]
    @classifier = NaiveBayesClassifier.new(:num_categories => 2)
    """
    """
    def train(max_epochs, training_sentences):
        classifier = NaiveBayesClassifier.train_em(max_epochs, training_sentences.map{ |sentence| sentence.to_ngrams(ngram_size) })
        classifier.category_names =
        if classifier.get_prior_category_probability(0) > @classifier.get_prior_category_probability(1)
            %w( majority minority )
        else
            %w( minority majority )
    """

    # Returns the (named) category the sentence belongs to.
    def classify(self, text):
        sentence = String(text)
        category_index = self.classifier.classify(sentence.to_ngrams(self.ngram_size))
        return self.classifier.category_names[category_index]

    def probabilities(self, sentence):
        classifier.get_posterior_category_probabilities(sentence.to_ngrams(self.ngram_size))

    """
    # Dumps the language model to a file.
    def yamlize(filename):
        File.open(filename, "w") do |f|
        f.puts self.to_yaml
    """

    # Loads the language model from a file.
    def load_yaml(filename):
        stream = open("english-tweet-detector.yaml", "r")
        return yaml.load(stream)
Beispiel #6
0
def create_classifier():
    dir_pos = os.path.join(BASE_DIR, "pos")
    dir_neg = os.path.join(BASE_DIR, "neg")

    nbc = nb.NaiveBayesClassifier(positive_corpus=dir_pos,
                                  negative_corpus=dir_neg)

    nbc.train_positive()
    nbc.train_negative()

    # cria um dicionario com as probabilidades de cada palavra
    nbc.calculate_probabilities()

    return nbc
'''
""" This script will read all the emails and it will train the classifier """

import os
from Email import *
from FeatureSelection import *
from NaiveBayesClassifier import *

trainPath = "dataset"
trainSet_emails = []

#create an email for every file we read
for f in os.listdir(trainPath):
    fileName = trainPath + '/' + f
    e = Email()
    if "spm" in fileName:
        e.setCategory("SPAM")
    else:
        e.setCategory("HAM")
    e.read(fileName)
    #insert the email we created to a collection of emails
    trainSet_emails.append(e)

#select features from our training set(automatic feature selection)
fs = FeatureSelection(trainSet_emails)
fs.selectFeatures()

#create a naive bayes classifier and train it
nb = NaiveBayesClassifier()
nb.setEmails(trainSet_emails)
nb.train()
Beispiel #8
0
#########################################
## Naive bayes test kodları

from sklearn.datasets import load_iris
from sklearn.utils import shuffle

iris_X, iris_y = load_iris(return_X_y=True)
iris_X, iris_y = shuffle(iris_X, iris_y)
X_train = iris_X[:-30]
X_test = iris_X[-30:]
y_train = iris_y[:-30]
y_test = iris_y[-30:]

from NaiveBayesClassifier import *

bayes = NaiveBayesClassifier()
bayes.buildModel(X_train, y_train)
bayes.evaluateModel(X_test, y_test)
bayes.showLabel(X_test[3], load_iris())

print(load_iris().target_names)
print(y_test[3])

##################################################
## Karar ağacı test kodları

from sklearn.datasets import load_iris
from sklearn.utils import shuffle

iris_X, iris_y = load_iris(return_X_y=True)
iris_X, iris_y = shuffle(iris_X, iris_y)
Beispiel #9
0
    if sys.argv[2] == "unigram":
       gram = unigram
    elif sys.argv[2] == "bigram":
       gram = bigram
    elif sys.argv[2] == "trigram":
       gram = trigram
#-----------------------------------------------------------------------END - Check for command line arguments

#-----------------------------------------------------------------------START - Load the training set
sys.stdout.write("Loading training set...")
trainingDataset = getDataSet(numDocuments, POS="sent/train/pos/", NEG="sent/train/neg/", n=gram)
print "complete!"
#-----------------------------------------------------------------------END - Load the training set

#-----------------------------------------------------------------------START - Create the NaiveBayesClassifier
classifier = NaiveBayesClassifier(1)
#-----------------------------------------------------------------------END - Create the NaiveBayesClassifier

#-----------------------------------------------------------------------START - Train the classifier
sys.stdout.write("Training classifier...")
classifier.train(trainingDataset)
print "complete!"

del trainingDataset
#-----------------------------------------------------------------------END - Train the classifier

#-----------------------------------------------------------------------START - Load the testing set
sys.stdout.write("Loading test set...")
testingDataset = getDataSet(numDocuments, POS="sent/test/pos/", NEG="sent/test/neg/", n=gram)
print "complete!"
#-----------------------------------------------------------------------END - Load the testing set
'''
Created on Dec 1, 2013

@author: konstantinos kostis , <*****@*****.**>
'''

import os
from Email import *
from NaiveBayesClassifier import *

#read test files/emails
testPath = "test"
testEmails = []

for testEmail in os.listdir(testPath):
    fileName = testPath+'/'+testEmail
    e = Email()
    e.read(fileName)
    #insert the email we created to a collection of emails
    testEmails.append(e)

#create a NaiveBayesClassifier object
NBC = NaiveBayesClassifier()
results = open("results.txt","w")
#classify every email
for testEmail in testEmails:
    results.write(testEmail.getName()+ " " +NBC.classify(testEmail)+"\n")
    results.flush()
    #print "%s \t %s" % (testEmail.getName(),NBC.classify(testEmail))
results.close()
""" This script will read all the emails and it will train the classifier """


import os
from Email import *
from FeatureSelection import *
from NaiveBayesClassifier import *

trainPath = "dataset"
trainSet_emails = []

#create an email for every file we read
for f in os.listdir(trainPath):
    fileName = trainPath+'/'+f
    e = Email()
    if "spm" in fileName:
        e.setCategory("SPAM")
    else:
        e.setCategory("HAM")
    e.read(fileName)
    #insert the email we created to a collection of emails
    trainSet_emails.append(e)

#select features from our training set(automatic feature selection)
fs = FeatureSelection(trainSet_emails)
fs.selectFeatures()

#create a naive bayes classifier and train it
nb = NaiveBayesClassifier()
nb.setEmails(trainSet_emails)
nb.train()
Beispiel #12
0
    # print(I2F)
    # print(L2I)
    # print(I2L)
    # print(V2I)
    # print(I2V)

    TRAIN = get_data(TRAIN_DATA)
    TEST = get_data(TEST_DATA)
    print("TESTTTTTTTTTTT")
    print(TEST)

    # creating the different models
    dt = ID3.Tree(TRAIN, values={i: value.keys() for i, value in I2V.items()})

    predsKNN, accuracyKNN, corrects = knn_alg.runKnn()
    predsNB, accuracyNB = nb_alg.NBresults()
    predsID3, accuracyID3 = predictTree(test_set=TEST, treeAlg=dt)
    print("ID3:")
    print("")
    print(predsID3)
    print(I2L)
    predsID3ByTag = []
    for i in predsID3:
        predsID3ByTag.append(I2L[i])
    print(predsID3ByTag)

    output_predictions((predsID3ByTag, accuracyID3), (predsKNN, accuracyKNN),
                       (predsNB, accuracyNB), len(corrects), 'output.txt')

    # printing the tree that DecisionTree created
    t = open("output_tree.txt", 'w')
Beispiel #13
0
sc.printResults('youtube', 6)

print "SVM based prediction done !!!\n"

#--------------------------------------- STEP 9: NAIVE BAYES CLASSIFICATION -----------------------------------

# Calculate using Naive Bayes Classifier
print "Using Naive Bayes Classification Technique on Twitter Data. Please wait...\n"

trainingDataFile = '../Data/NaiveBayes/full_training_dataset.csv'
# Here we can use some other training data set - shortened for speeding process.
classifierDumpFile = '../Data/NaiveBayes/naivebayes_test_model.pickle'
trainingRequired = 0  # Set to 0 when not required after pickle file is created.
time = 'today'
nb = NaiveBayesClassifier.NaiveBayesClassifier(twitter_data, key_word, time,
                                               trainingDataFile,
                                               classifierDumpFile,
                                               trainingRequired)
nb.classify()
#nb.accuracy()
#nb.writeOutput('nboutput1.txt')
nb.printResults('twitter', 8)

print "Using Naive Bayes Classification Technique on YouTube Data. Please wait...\n"
trainingRequired = 0
nb = NaiveBayesClassifier.NaiveBayesClassifier(youTube_data, key_word, time,
                                               trainingDataFile,
                                               classifierDumpFile,
                                               trainingRequired)
nb.classify()
#nb.accuracy()
#nb.writeOutput('nboutput2.txt')
Beispiel #14
0
def runNBClassifier(train, test):
    print "building naive bayes classifier"
    model = NaiveBayesClassifier(train)
    validate(model, "naive bayes", test)
    return model
import NaiveBayesClassifier
lines = """
Donate Diore			    Email: [email protected]
Chief Executive Officer		Office 800-555-5555
Broadlook Technoplogies		Cell : 414-555-5555
21140 Capitol Drive		    Fax   : 262-754-8081
Pewaukee WI 53072		    Blog www.idanato.com
http://www.broadlook.com
"""
sentences = nltk.sent_tokenize(lines)  # tokenize sentences
nouns = []  # empty to array to hold all nouns

for sentence in sentences:
    for word, pos in nltk.pos_tag(nltk.word_tokenize(str(sentence))):
        if (pos == 'NN'):
            nouns.append(word)

    file = open('D:\IFS\python_train\Position.csv', 'r')
    reader = csv.reader(file)
    feature_set = []

    for word, label in reader:
        feature_set.append((word, label))

    print(feature_set)

    cl = NaiveBayesClassifier(feature_set)
    print(nouns, "and entity is :")
    entity = cl.classify(nouns)
    print(entity)
    t = entity
Beispiel #16
0
    data, labels = getDataAndLabels(datafilename, labelfilename)
    accuracysNB = []*6;
    accuracysLR = []*6;
    for splitRatio in [0.1, 0.3, 0.5, 0.7, 0.8, 0.9]:

        print("Iteration "+str(splitRatio))
        # Split the data into the training and test set
        X_train, X_test, Y_train, Y_test = train_test_split(data, labels, train_size=splitRatio)



        
        #Naive Bayes Implementation

        # Call Naive Bayes
        nb = NBC.NaiveBayesClassifier()
        nb.trainModel(X_train, Y_train)

        print(datetime.datetime.now())

        #test
        predictedValues = nb.predict(X_test)

        print(datetime.datetime.now())

        #Accuracy
        accuracy = nb.accuracy(Y_test, predictedValues)
        accuracysNB.append(accuracy)
        print("Split Ratio = " + str(splitRatio) + "Accuracy = " + str(accuracy))

        print(datetime.datetime.now())
    def __init__(self, options):

        options = dict({"ngram_size": 3}.items() + options.items())
        self.classifier = NaiveBayesClassifier(2)
        self.ngram_size = options["ngram_size"]
import NaiveBayesClassifier
from subprocess import check_output

#The code for this comes from test.py
#http://cs532s18.slack.com/files/U8K4TSGJ1/F9Z33U1B6/test.py

c = NaiveBayesClassifier.naivebayes(NaiveBayesClassifier.getwords)
#remove previous db file
check_output(['rm', 'neverett.db'])
c.setdb('neverett.db')
NaiveBayesClassifier.spamTrain(c)

#classify files as spam or not spam
f1 = open('Testing\\email1.txt')
e1 = f1.read()
print(c.classify(e1))
f2 = open('Testing\\email2.txt')
e2 = f2.read()
print(c.classify(e2))
f3 = open('Testing\\email3.txt')
e3 = f3.read()
print(c.classify(e3))
f4 = open('Testing\\email4.txt')
e4 = f4.read()
print(c.classify(e4))
f5 = open('Testing\\email5.txt')
e5 = f5.read()
print(c.classify(e5))
f6 = open('Testing\\email6.txt')
e6 = f6.read()
print(c.classify(e6))
'''
Created on Dec 1, 2013

@author: konstantinos kostis , <*****@*****.**>
'''

import os
from Email import *
from NaiveBayesClassifier import *

#read test files/emails
testPath = "test"
testEmails = []

for testEmail in os.listdir(testPath):
    fileName = testPath + '/' + testEmail
    e = Email()
    e.read(fileName)
    #insert the email we created to a collection of emails
    testEmails.append(e)

#create a NaiveBayesClassifier object
NBC = NaiveBayesClassifier()
results = open("results.txt", "w")
#classify every email
for testEmail in testEmails:
    results.write(testEmail.getName() + " " + NBC.classify(testEmail) + "\n")
    results.flush()
    #print "%s \t %s" % (testEmail.getName(),NBC.classify(testEmail))
results.close()
Beispiel #20
0
filepath = "little.txt"
import NaiveBayesClassifier as s
with open(filepath) as fp:
    line = fp.readline()
    cnt = 1
    while line:
        sen = s.sentiment(line)
        print("Line {}:{}".format(cnt, line.strip()))
        print(sen)
        line = fp.readline()
        cnt += 1
import nltk

import NaiveBayesClassifier as nbc

#nltk.download('punkt')
sentence = 'this movie is superb'

print(sentence + ":" + nbc.naive_bayes_classifier(sentence))