Ejemplo n.º 1
0
def drawSimilarities(placeOfWordToCompare):
    plt.figure()
    file = importation.importcsv()
    k = 0
    for line in file:
        value = float(line[placeOfWordToCompare]) * 100
        #print(value)
        if int(line[-1]) == 1:
            color = 'blue'
            if value < 200:
                plt.plot(value,
                         value + 10,
                         color=color,
                         linestyle='dashed',
                         marker='o',
                         markerfacecolor=color,
                         markersize=3)
        else:
            color = 'orange'
            if value < 200:
                plt.plot(value,
                         value - 10,
                         color=color,
                         linestyle='dashed',
                         marker='o',
                         markerfacecolor=color,
                         markersize=3)

        k += 1
    print('rrrr')
    plt.show()
    KNeighborsClassifier(n_neighbors = 2, weights = "distance"),

    #GaussianProcessClassifier(1.0 * RBF(1.0)),



    #GaussianNB(),
    SVC(gamma=2, C=1)
    #QuadraticDiscriminantAnalysis(),
    ]

names = ["AdaBoost", "Linear SVM", "Neural Net", "Decision Tree", "Random Forest",  "Nearest Neighbors",
          "RBF SVM"
         ]#"QDA",#"Gaussian Process","Naive Bayes"

rowData = importcsv("spambase.data")
data = []
for line in rowData:
    listLine = []
    for value in  line:
        listLine.append(float(value))
    data.append(listLine)


usedData = []
usedValue = []
for line in data:
    listLine = []
    for k in range(len(line)):
        if k not in [27, 28, 31, 57, 0,3,14,16,22,24,26,30,31, 32, 33, 34, 37, 39, 40, 41,42,  46,47, 50, 51]:
            listLine.append(line[k])
Ejemplo n.º 3
0
def drawSimilarities(listOfPlace):

    plt.figure()
    file = importation.importcsv()
    k = 0

    totSpam = 0
    totNonSpam = 0

    for line in file:

        value = float(line[placeOfWordToCompare])
        if value < 0.05:
            if int(line[-1]) == 1:
                spamClasses["<0.05"].append(value)
                totSpam += 1
            else:
                nonSpamClasses["<0.05"].append(value)
                totNonSpam += 1
        elif 0.05 <= value and value < 0.5:
            if int(line[-1]) == 1:
                spamClasses["0.05<0.5"].append(value)
                totSpam += 1
            else:
                nonSpamClasses["0.05<0.5"].append(value)
                totNonSpam += 1
        elif 0.5 <= value and value < 1:
            if int(line[-1]) == 1:
                spamClasses["0.5<1"].append(value)
                totSpam += 1
            else:
                nonSpamClasses["0.5<1"].append(value)
                totNonSpam += 1

        elif 1 <= value and value < 1.5:
            if int(line[-1]) == 1:
                spamClasses["1<1.5"].append(value)
                totSpam += 1
            else:
                nonSpamClasses["1<1.5"].append(value)
                totNonSpam += 1

        elif 1.5 <= value and value < 2:
            if int(line[-1]) == 1:
                spamClasses["1.5<2"].append(value)
                totSpam += 1
            else:
                nonSpamClasses["1.5<2"].append(value)
                totNonSpam += 1

        elif 2 <= value:
            if int(line[-1]) == 1:
                spamClasses["2<"].append(value)
                totSpam += 1
            else:
                nonSpamClasses["2<"].append(value)
                totNonSpam += 1

    nb = 0
    for value in spamClasses:
        tot = (len(spamClasses[value]) / totSpam) * 100
        plt.plot(nb,
                 tot,
                 color='blue',
                 linestyle='dashed',
                 marker='o',
                 markerfacecolor='blue',
                 markersize=5)
        nb += 1
        print("\n")
        print(value)
        print(tot)
        print(totSpam)
    nb = 0
    print("\n non spam : \n")
    for value in nonSpamClasses:
        tot = (len(nonSpamClasses[value]) / totNonSpam) * 100
        plt.plot(nb,
                 tot,
                 color='orange',
                 linestyle='dashed',
                 marker='o',
                 markerfacecolor='orange',
                 markersize=5)
        nb += 1
        print("\n")
        print(value)
        print(tot)
        print(totNonSpam)

    print('rrrr')
    plt.show()
from sklearn.naive_bayes import GaussianNB
from random import random
from importation import importcsv
import numpy as np

# Load data
rowData = importcsv()
data = []
for line in rowData:
    listLine = []
    for value in line:
        listLine.append(float(value))
    data.append(listLine)

usedData = []
usedValue = []
for line in data:
    listLine = []
    for k in range(len(line)):
        if k != 27 and k != 28 and k != 31 and k != 57:
            listLine.append(line[k])
    usedValue.append(line[-1])
    usedData.append(listLine)
'''data = np.array(data)
print(data)
'''

testSetX = []
testSetY = []
for k in range(1000):
    placeToTakeForTest = int(random() * len(usedData))
Ejemplo n.º 5
0
def drawSimilarities(listPlaceOfWordToCompare,
                     precision,
                     valueToCompare,
                     pathFile,
                     draw=True):
    nbOk = []
    file = importcsv(pathFile)
    for placeOfWordToCompare in listPlaceOfWordToCompare:
        #print("place of word : ")
        #print(placeOfWordToCompare)
        if draw:
            plt.figure()
            plt.title('Word nb = ' + str(placeOfWordToCompare))

        k = 0
        spamClasses = {"<value": [], "value<": []}
        nonSpamClasses = {"<value": [], "value<": []}

        totSpam = 0
        totNonSpam = 0

        for line in file:
            value = float(line[placeOfWordToCompare])
            if value < valueToCompare:
                if int(line[-1]) == 1:
                    spamClasses["<value"].append(value)
                    totSpam += 1
                else:
                    nonSpamClasses["<value"].append(value)
                    totNonSpam += 1
            else:
                if int(line[-1]) == 1:
                    spamClasses["value<"].append(value)
                    totSpam += 1
                else:
                    nonSpamClasses["value<"].append(value)
                    totNonSpam += 1

        results = []
        nbPlot = 1
        for type in ["<value", "value<"]:
            try:
                totalType = len(spamClasses[type]) + len(nonSpamClasses[type])
                totalSpam = len(spamClasses[type]) / totalType
                totalNonSpam = len(nonSpamClasses[type]) / totalType
            except:
                totalType, totalSpam, totalNonSpam = 0, 0, 0

            results.append(totalSpam)
            results.append(totalNonSpam)
            if draw:
                labels = "Spam", "Non Spam"
                sizes = [totalSpam, totalNonSpam]
                colors = ['yellowgreen', 'lightskyblue']
                plt.subplot(210 + nbPlot)
                nbPlot += 1
                plt.title("For : " + str(type) + " : " + str(valueToCompare) +
                          " for the word nb : " + str(placeOfWordToCompare) +
                          " with a number of " + str(totalType) +
                          " emails concerned")
                plt.pie(sizes,
                        labels=labels,
                        colors=colors,
                        autopct='%1.1f%%',
                        shadow=True,
                        startangle=90)

        if (results[0] * 100 > results[1] * 100 * precision
                or results[1] * 100 > results[0] * 100 * precision):
            if (results[2] * 100 > results[3] * 100 * precision
                    or results[3] * 100 > results[2] * 100 * precision):
                #print("okk")
                nbOk.append(placeOfWordToCompare)
                if draw:
                    plt.axis('equal')
                    plt.savefig('PieChart01.png')
                    plt.show()
            else:
                if draw:
                    plt.close()
        else:
            if draw:
                plt.close()

    return nbOk