Esempio n. 1
0
def main():
    print("Importing Data")
    ((trX, trY),
     (teX, teY)) = extractor(file='data/emails.csv').min_max_nomalized()
    print(trX[0])
    print("Creating Model")
    model = FFNN()
    print("Training Model")
    print(type(trX))
    acc, t = model.train(trX[0:100], trY[0:100], teX[0:100], teY[0:100])
    print(acc)
    return acc, t
def main(name, args={}, norm="minmax", dim=1500):
    print("Importing Data")
    if norm == "minmax":
        ((trX, trY),
         (teX, teY)) = extractor(file='data/emails.csv').min_max_nomalized()
    elif norm == "pca":
        ((trX, trY), (teX, teY)) = extractor(
            file='data/emails.csv').pca_reduced_nomarlize(dim=dim)
    print("Creating Model")
    model = FFNN(**args)
    print("Training Model")
    print(type(trX))
    acc, t, lr = model.train(trX, trY, teX, teY)

    df = pd.DataFrame({
        'accuracy': acc,
        'epoc': list(range(len(acc))),
        'time_per_iteration(s)': t,
        'learning_rate': lr
    })

    df.to_csv("output/{0}.csv".format(name))
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.metrics import precision_recall_fscore_support
import time

validation_list = []
precision_list = []
time_list = []

for i in range(1,10):
    # init
    start = 0.0
    end   = 0.0
    prediction = dict()
    kwarg = {'test_size':i*0.1}
    ((x_train,x_test),(y_train,y_test)) = extractor(file='data/emails.csv',**kwarg).get()

    model = MultinomialNB()
    model.fit(x_train,x_test)
    

    #prediction
    start = time.time()
    prediction["naive_bayes"] = model.predict(y_train)
    end = time.time()

    precision,recall,fscore,support = precision_recall_fscore_support(  y_test,
                                                                        prediction['naive_bayes'],
                                                                        average='macro')
    precision_list.append(precision)
    validation_list.append(kwarg['test_size'])
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_recall_fscore_support
import time

validation_list = []
precision_list = []
time_list = []

for i in range(1, 10):
    # init
    start = 0.0
    end = 0.0
    prediction = dict()
    kwarg = {'test_size': i * 0.1}
    ((x_train, x_test), (y_train, y_test)) = extractor(
        file='data/emails.csv').min_max_nomalized(on_hot=False)

    model = RandomForestClassifier(n_estimators=100,
                                   max_depth=100,
                                   random_state=0)
    model.fit(x_train, x_test)

    #prediction
    start = time.time()
    prediction["random_forest"] = model.predict(y_train)
    end = time.time()

    precision, recall, fscore, support = precision_recall_fscore_support(
        y_test, prediction['random_forest'], average='macro')
    precision_list.append(precision)
    validation_list.append(kwarg['test_size'])