def Running_Algorithm(self, Path, Algorithm, train, test): ''' running the given algorithm for the train and test files and return the results . :param Path: the path of the files :param Algorithm: the wanted algorithm :param train: the train data frame :param test: the test data frame :return: a dict that contain the results for the train and the test files. ''' if Algorithm == 'naive bayes classifier (our)': prediction_train = NaiveBayesClassifier.Testing_model( Path, self.Load_Model(Path), train) prediction_test = NaiveBayesClassifier.Testing_model( Path, self.Load_Model(Path), test) if Algorithm == 'ID3 (our)': prediction_train = ID3.Testing_model(Path, self.Load_Model(Path), train) prediction_test = ID3.Testing_model(Path, self.Load_Model(Path), test) train_targets = train['class'].tolist() test_targets = test['class'].tolist() train_results = {} train_results['Confusion Matrix'] = list( metrics.confusion_matrix(train_targets, prediction_train)) report = metrics.classification_report( train_targets, prediction_train, output_dict=True)['weighted avg'] train_results['Accuracy'] = accuracy_score(train_targets, prediction_train) train_results['Precision'] = report['precision'] train_results['Recall'] = report['recall'] train_results['F-measure'] = report['f1-score'] test_results = {} test_results['Confusion Matrix'] = list( metrics.confusion_matrix(test_targets, prediction_test)) report = metrics.classification_report( test_targets, prediction_test, output_dict=True)['weighted avg'] test_results['Accuracy'] = accuracy_score(test_targets, prediction_test) test_results['Precision'] = report['precision'] test_results['Recall'] = report['recall'] test_results['F-measure'] = report['f1-score'] return {'train': train_results, 'test': test_results}
def getModel(trainData, modelName): if modelName == "naive": return NaiveBayesClassifier(trainData) elif modelName == "logreg": return LogisticRegressionClassifier(trainData) elif modelName == "tree": return DecisionTreeClassifier(trainData) else: printUsage()
def buildClick(self): if self.checkValidPath() and self.checkValidBins( ) and self.check_not_empty_files(): self._nbc = NaiveBayesClassifier.NaiveBayesClassifier( str(self.Path_Entry.get()), self.bins) self._nbc.load_train_data_frame() self.BuildPassed = True tkMessageBox.showinfo( "Naive Bayes Classifier", "Building classifier using train-set is done!") else: self.BuildPassed = False
def Build_Model(self, Path, Algorithm, train): ''' :param Path: the path of the files :param Algorithm: which algorithm we want to build the model for it :param train: data frame that we want to build the model for it. :return: a model ''' if Algorithm == 'naive bayes classifier (our)': return NaiveBayesClassifier.NaiveBayesClassifier(Path, train) if Algorithm == 'ID3 (our)': return ID3.ID3(Path, train)
class LanguageDetector(yaml.YAMLObject): yaml_tag = u"!LanguageDetector" """ Gven a set of sentences in multiple languages, build a classifier to detect the majority language. """ def __init__(self, options): options = dict({"ngram_size": 3}.items() + options.items()) self.classifier = NaiveBayesClassifier(2) self.ngram_size = options["ngram_size"] """ def initialize(options = {}) options = {:ngram_size => 3}.merge(options) @ngram_size = options[:ngram_size] @classifier = NaiveBayesClassifier.new(:num_categories => 2) """ """ def train(max_epochs, training_sentences): classifier = NaiveBayesClassifier.train_em(max_epochs, training_sentences.map{ |sentence| sentence.to_ngrams(ngram_size) }) classifier.category_names = if classifier.get_prior_category_probability(0) > @classifier.get_prior_category_probability(1) %w( majority minority ) else %w( minority majority ) """ # Returns the (named) category the sentence belongs to. def classify(self, text): sentence = String(text) category_index = self.classifier.classify(sentence.to_ngrams(self.ngram_size)) return self.classifier.category_names[category_index] def probabilities(self, sentence): classifier.get_posterior_category_probabilities(sentence.to_ngrams(self.ngram_size)) """ # Dumps the language model to a file. def yamlize(filename): File.open(filename, "w") do |f| f.puts self.to_yaml """ # Loads the language model from a file. def load_yaml(filename): stream = open("english-tweet-detector.yaml", "r") return yaml.load(stream)
def create_classifier(): dir_pos = os.path.join(BASE_DIR, "pos") dir_neg = os.path.join(BASE_DIR, "neg") nbc = nb.NaiveBayesClassifier(positive_corpus=dir_pos, negative_corpus=dir_neg) nbc.train_positive() nbc.train_negative() # cria um dicionario com as probabilidades de cada palavra nbc.calculate_probabilities() return nbc
''' """ This script will read all the emails and it will train the classifier """ import os from Email import * from FeatureSelection import * from NaiveBayesClassifier import * trainPath = "dataset" trainSet_emails = [] #create an email for every file we read for f in os.listdir(trainPath): fileName = trainPath + '/' + f e = Email() if "spm" in fileName: e.setCategory("SPAM") else: e.setCategory("HAM") e.read(fileName) #insert the email we created to a collection of emails trainSet_emails.append(e) #select features from our training set(automatic feature selection) fs = FeatureSelection(trainSet_emails) fs.selectFeatures() #create a naive bayes classifier and train it nb = NaiveBayesClassifier() nb.setEmails(trainSet_emails) nb.train()
######################################### ## Naive bayes test kodları from sklearn.datasets import load_iris from sklearn.utils import shuffle iris_X, iris_y = load_iris(return_X_y=True) iris_X, iris_y = shuffle(iris_X, iris_y) X_train = iris_X[:-30] X_test = iris_X[-30:] y_train = iris_y[:-30] y_test = iris_y[-30:] from NaiveBayesClassifier import * bayes = NaiveBayesClassifier() bayes.buildModel(X_train, y_train) bayes.evaluateModel(X_test, y_test) bayes.showLabel(X_test[3], load_iris()) print(load_iris().target_names) print(y_test[3]) ################################################## ## Karar ağacı test kodları from sklearn.datasets import load_iris from sklearn.utils import shuffle iris_X, iris_y = load_iris(return_X_y=True) iris_X, iris_y = shuffle(iris_X, iris_y)
if sys.argv[2] == "unigram": gram = unigram elif sys.argv[2] == "bigram": gram = bigram elif sys.argv[2] == "trigram": gram = trigram #-----------------------------------------------------------------------END - Check for command line arguments #-----------------------------------------------------------------------START - Load the training set sys.stdout.write("Loading training set...") trainingDataset = getDataSet(numDocuments, POS="sent/train/pos/", NEG="sent/train/neg/", n=gram) print "complete!" #-----------------------------------------------------------------------END - Load the training set #-----------------------------------------------------------------------START - Create the NaiveBayesClassifier classifier = NaiveBayesClassifier(1) #-----------------------------------------------------------------------END - Create the NaiveBayesClassifier #-----------------------------------------------------------------------START - Train the classifier sys.stdout.write("Training classifier...") classifier.train(trainingDataset) print "complete!" del trainingDataset #-----------------------------------------------------------------------END - Train the classifier #-----------------------------------------------------------------------START - Load the testing set sys.stdout.write("Loading test set...") testingDataset = getDataSet(numDocuments, POS="sent/test/pos/", NEG="sent/test/neg/", n=gram) print "complete!" #-----------------------------------------------------------------------END - Load the testing set
''' Created on Dec 1, 2013 @author: konstantinos kostis , <*****@*****.**> ''' import os from Email import * from NaiveBayesClassifier import * #read test files/emails testPath = "test" testEmails = [] for testEmail in os.listdir(testPath): fileName = testPath+'/'+testEmail e = Email() e.read(fileName) #insert the email we created to a collection of emails testEmails.append(e) #create a NaiveBayesClassifier object NBC = NaiveBayesClassifier() results = open("results.txt","w") #classify every email for testEmail in testEmails: results.write(testEmail.getName()+ " " +NBC.classify(testEmail)+"\n") results.flush() #print "%s \t %s" % (testEmail.getName(),NBC.classify(testEmail)) results.close()
""" This script will read all the emails and it will train the classifier """ import os from Email import * from FeatureSelection import * from NaiveBayesClassifier import * trainPath = "dataset" trainSet_emails = [] #create an email for every file we read for f in os.listdir(trainPath): fileName = trainPath+'/'+f e = Email() if "spm" in fileName: e.setCategory("SPAM") else: e.setCategory("HAM") e.read(fileName) #insert the email we created to a collection of emails trainSet_emails.append(e) #select features from our training set(automatic feature selection) fs = FeatureSelection(trainSet_emails) fs.selectFeatures() #create a naive bayes classifier and train it nb = NaiveBayesClassifier() nb.setEmails(trainSet_emails) nb.train()
# print(I2F) # print(L2I) # print(I2L) # print(V2I) # print(I2V) TRAIN = get_data(TRAIN_DATA) TEST = get_data(TEST_DATA) print("TESTTTTTTTTTTT") print(TEST) # creating the different models dt = ID3.Tree(TRAIN, values={i: value.keys() for i, value in I2V.items()}) predsKNN, accuracyKNN, corrects = knn_alg.runKnn() predsNB, accuracyNB = nb_alg.NBresults() predsID3, accuracyID3 = predictTree(test_set=TEST, treeAlg=dt) print("ID3:") print("") print(predsID3) print(I2L) predsID3ByTag = [] for i in predsID3: predsID3ByTag.append(I2L[i]) print(predsID3ByTag) output_predictions((predsID3ByTag, accuracyID3), (predsKNN, accuracyKNN), (predsNB, accuracyNB), len(corrects), 'output.txt') # printing the tree that DecisionTree created t = open("output_tree.txt", 'w')
sc.printResults('youtube', 6) print "SVM based prediction done !!!\n" #--------------------------------------- STEP 9: NAIVE BAYES CLASSIFICATION ----------------------------------- # Calculate using Naive Bayes Classifier print "Using Naive Bayes Classification Technique on Twitter Data. Please wait...\n" trainingDataFile = '../Data/NaiveBayes/full_training_dataset.csv' # Here we can use some other training data set - shortened for speeding process. classifierDumpFile = '../Data/NaiveBayes/naivebayes_test_model.pickle' trainingRequired = 0 # Set to 0 when not required after pickle file is created. time = 'today' nb = NaiveBayesClassifier.NaiveBayesClassifier(twitter_data, key_word, time, trainingDataFile, classifierDumpFile, trainingRequired) nb.classify() #nb.accuracy() #nb.writeOutput('nboutput1.txt') nb.printResults('twitter', 8) print "Using Naive Bayes Classification Technique on YouTube Data. Please wait...\n" trainingRequired = 0 nb = NaiveBayesClassifier.NaiveBayesClassifier(youTube_data, key_word, time, trainingDataFile, classifierDumpFile, trainingRequired) nb.classify() #nb.accuracy() #nb.writeOutput('nboutput2.txt')
def runNBClassifier(train, test): print "building naive bayes classifier" model = NaiveBayesClassifier(train) validate(model, "naive bayes", test) return model
import NaiveBayesClassifier lines = """ Donate Diore Email: [email protected] Chief Executive Officer Office 800-555-5555 Broadlook Technoplogies Cell : 414-555-5555 21140 Capitol Drive Fax : 262-754-8081 Pewaukee WI 53072 Blog www.idanato.com http://www.broadlook.com """ sentences = nltk.sent_tokenize(lines) # tokenize sentences nouns = [] # empty to array to hold all nouns for sentence in sentences: for word, pos in nltk.pos_tag(nltk.word_tokenize(str(sentence))): if (pos == 'NN'): nouns.append(word) file = open('D:\IFS\python_train\Position.csv', 'r') reader = csv.reader(file) feature_set = [] for word, label in reader: feature_set.append((word, label)) print(feature_set) cl = NaiveBayesClassifier(feature_set) print(nouns, "and entity is :") entity = cl.classify(nouns) print(entity) t = entity
data, labels = getDataAndLabels(datafilename, labelfilename) accuracysNB = []*6; accuracysLR = []*6; for splitRatio in [0.1, 0.3, 0.5, 0.7, 0.8, 0.9]: print("Iteration "+str(splitRatio)) # Split the data into the training and test set X_train, X_test, Y_train, Y_test = train_test_split(data, labels, train_size=splitRatio) #Naive Bayes Implementation # Call Naive Bayes nb = NBC.NaiveBayesClassifier() nb.trainModel(X_train, Y_train) print(datetime.datetime.now()) #test predictedValues = nb.predict(X_test) print(datetime.datetime.now()) #Accuracy accuracy = nb.accuracy(Y_test, predictedValues) accuracysNB.append(accuracy) print("Split Ratio = " + str(splitRatio) + "Accuracy = " + str(accuracy)) print(datetime.datetime.now())
def __init__(self, options): options = dict({"ngram_size": 3}.items() + options.items()) self.classifier = NaiveBayesClassifier(2) self.ngram_size = options["ngram_size"]
import NaiveBayesClassifier from subprocess import check_output #The code for this comes from test.py #http://cs532s18.slack.com/files/U8K4TSGJ1/F9Z33U1B6/test.py c = NaiveBayesClassifier.naivebayes(NaiveBayesClassifier.getwords) #remove previous db file check_output(['rm', 'neverett.db']) c.setdb('neverett.db') NaiveBayesClassifier.spamTrain(c) #classify files as spam or not spam f1 = open('Testing\\email1.txt') e1 = f1.read() print(c.classify(e1)) f2 = open('Testing\\email2.txt') e2 = f2.read() print(c.classify(e2)) f3 = open('Testing\\email3.txt') e3 = f3.read() print(c.classify(e3)) f4 = open('Testing\\email4.txt') e4 = f4.read() print(c.classify(e4)) f5 = open('Testing\\email5.txt') e5 = f5.read() print(c.classify(e5)) f6 = open('Testing\\email6.txt') e6 = f6.read() print(c.classify(e6))
''' Created on Dec 1, 2013 @author: konstantinos kostis , <*****@*****.**> ''' import os from Email import * from NaiveBayesClassifier import * #read test files/emails testPath = "test" testEmails = [] for testEmail in os.listdir(testPath): fileName = testPath + '/' + testEmail e = Email() e.read(fileName) #insert the email we created to a collection of emails testEmails.append(e) #create a NaiveBayesClassifier object NBC = NaiveBayesClassifier() results = open("results.txt", "w") #classify every email for testEmail in testEmails: results.write(testEmail.getName() + " " + NBC.classify(testEmail) + "\n") results.flush() #print "%s \t %s" % (testEmail.getName(),NBC.classify(testEmail)) results.close()
filepath = "little.txt" import NaiveBayesClassifier as s with open(filepath) as fp: line = fp.readline() cnt = 1 while line: sen = s.sentiment(line) print("Line {}:{}".format(cnt, line.strip())) print(sen) line = fp.readline() cnt += 1
import nltk import NaiveBayesClassifier as nbc #nltk.download('punkt') sentence = 'this movie is superb' print(sentence + ":" + nbc.naive_bayes_classifier(sentence))