def drawSimilarities(placeOfWordToCompare): plt.figure() file = importation.importcsv() k = 0 for line in file: value = float(line[placeOfWordToCompare]) * 100 #print(value) if int(line[-1]) == 1: color = 'blue' if value < 200: plt.plot(value, value + 10, color=color, linestyle='dashed', marker='o', markerfacecolor=color, markersize=3) else: color = 'orange' if value < 200: plt.plot(value, value - 10, color=color, linestyle='dashed', marker='o', markerfacecolor=color, markersize=3) k += 1 print('rrrr') plt.show()
KNeighborsClassifier(n_neighbors = 2, weights = "distance"), #GaussianProcessClassifier(1.0 * RBF(1.0)), #GaussianNB(), SVC(gamma=2, C=1) #QuadraticDiscriminantAnalysis(), ] names = ["AdaBoost", "Linear SVM", "Neural Net", "Decision Tree", "Random Forest", "Nearest Neighbors", "RBF SVM" ]#"QDA",#"Gaussian Process","Naive Bayes" rowData = importcsv("spambase.data") data = [] for line in rowData: listLine = [] for value in line: listLine.append(float(value)) data.append(listLine) usedData = [] usedValue = [] for line in data: listLine = [] for k in range(len(line)): if k not in [27, 28, 31, 57, 0,3,14,16,22,24,26,30,31, 32, 33, 34, 37, 39, 40, 41,42, 46,47, 50, 51]: listLine.append(line[k])
def drawSimilarities(listOfPlace): plt.figure() file = importation.importcsv() k = 0 totSpam = 0 totNonSpam = 0 for line in file: value = float(line[placeOfWordToCompare]) if value < 0.05: if int(line[-1]) == 1: spamClasses["<0.05"].append(value) totSpam += 1 else: nonSpamClasses["<0.05"].append(value) totNonSpam += 1 elif 0.05 <= value and value < 0.5: if int(line[-1]) == 1: spamClasses["0.05<0.5"].append(value) totSpam += 1 else: nonSpamClasses["0.05<0.5"].append(value) totNonSpam += 1 elif 0.5 <= value and value < 1: if int(line[-1]) == 1: spamClasses["0.5<1"].append(value) totSpam += 1 else: nonSpamClasses["0.5<1"].append(value) totNonSpam += 1 elif 1 <= value and value < 1.5: if int(line[-1]) == 1: spamClasses["1<1.5"].append(value) totSpam += 1 else: nonSpamClasses["1<1.5"].append(value) totNonSpam += 1 elif 1.5 <= value and value < 2: if int(line[-1]) == 1: spamClasses["1.5<2"].append(value) totSpam += 1 else: nonSpamClasses["1.5<2"].append(value) totNonSpam += 1 elif 2 <= value: if int(line[-1]) == 1: spamClasses["2<"].append(value) totSpam += 1 else: nonSpamClasses["2<"].append(value) totNonSpam += 1 nb = 0 for value in spamClasses: tot = (len(spamClasses[value]) / totSpam) * 100 plt.plot(nb, tot, color='blue', linestyle='dashed', marker='o', markerfacecolor='blue', markersize=5) nb += 1 print("\n") print(value) print(tot) print(totSpam) nb = 0 print("\n non spam : \n") for value in nonSpamClasses: tot = (len(nonSpamClasses[value]) / totNonSpam) * 100 plt.plot(nb, tot, color='orange', linestyle='dashed', marker='o', markerfacecolor='orange', markersize=5) nb += 1 print("\n") print(value) print(tot) print(totNonSpam) print('rrrr') plt.show()
from sklearn.naive_bayes import GaussianNB from random import random from importation import importcsv import numpy as np # Load data rowData = importcsv() data = [] for line in rowData: listLine = [] for value in line: listLine.append(float(value)) data.append(listLine) usedData = [] usedValue = [] for line in data: listLine = [] for k in range(len(line)): if k != 27 and k != 28 and k != 31 and k != 57: listLine.append(line[k]) usedValue.append(line[-1]) usedData.append(listLine) '''data = np.array(data) print(data) ''' testSetX = [] testSetY = [] for k in range(1000): placeToTakeForTest = int(random() * len(usedData))
def drawSimilarities(listPlaceOfWordToCompare, precision, valueToCompare, pathFile, draw=True): nbOk = [] file = importcsv(pathFile) for placeOfWordToCompare in listPlaceOfWordToCompare: #print("place of word : ") #print(placeOfWordToCompare) if draw: plt.figure() plt.title('Word nb = ' + str(placeOfWordToCompare)) k = 0 spamClasses = {"<value": [], "value<": []} nonSpamClasses = {"<value": [], "value<": []} totSpam = 0 totNonSpam = 0 for line in file: value = float(line[placeOfWordToCompare]) if value < valueToCompare: if int(line[-1]) == 1: spamClasses["<value"].append(value) totSpam += 1 else: nonSpamClasses["<value"].append(value) totNonSpam += 1 else: if int(line[-1]) == 1: spamClasses["value<"].append(value) totSpam += 1 else: nonSpamClasses["value<"].append(value) totNonSpam += 1 results = [] nbPlot = 1 for type in ["<value", "value<"]: try: totalType = len(spamClasses[type]) + len(nonSpamClasses[type]) totalSpam = len(spamClasses[type]) / totalType totalNonSpam = len(nonSpamClasses[type]) / totalType except: totalType, totalSpam, totalNonSpam = 0, 0, 0 results.append(totalSpam) results.append(totalNonSpam) if draw: labels = "Spam", "Non Spam" sizes = [totalSpam, totalNonSpam] colors = ['yellowgreen', 'lightskyblue'] plt.subplot(210 + nbPlot) nbPlot += 1 plt.title("For : " + str(type) + " : " + str(valueToCompare) + " for the word nb : " + str(placeOfWordToCompare) + " with a number of " + str(totalType) + " emails concerned") plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90) if (results[0] * 100 > results[1] * 100 * precision or results[1] * 100 > results[0] * 100 * precision): if (results[2] * 100 > results[3] * 100 * precision or results[3] * 100 > results[2] * 100 * precision): #print("okk") nbOk.append(placeOfWordToCompare) if draw: plt.axis('equal') plt.savefig('PieChart01.png') plt.show() else: if draw: plt.close() else: if draw: plt.close() return nbOk