Python Processor Exemples, Project1.src.Processor.Processor Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Clean.py Projet : mohamed-youssef-bou/Machine-Learning-Projects

    def Ionosphere(X):
        binaryCols = {"signal": {"g": 1, "b": 0}}
        X = X.copy()
        X = Processor.removeMissing(X)
        X = X.drop(columns=['col0', 'col1', "col13"])
        X = Processor.toBinaryCol(X, binaryCols)
        Y = X["signal"]
        X = X.iloc[:, :-1]

        return [X, Y]

Exemple #2

0

Afficher le fichier

Fichier : Clean.py Projet : oielbanna/comp551

    def ttt(X):
        labels = {"o": 0, "b": 1, "x": 2}
        encoding = {"result": {"positive": 1, "negative": 0}}
        X = X.copy()
        X = Processor.toBinaryCol(X, encoding)
        X = Processor.OHE(
            X, cols=["tl", "tm", "tr", "ml", "mm", "mr", "bl", "bm", "br"])
        Y = X["result"]
        X = X.drop(columns=["result"])

        return [X, Y]

Exemple #3

0

Afficher le fichier

Fichier : Clean.py Projet : mohamed-youssef-bou/Machine-Learning-Projects

    def adult(X):
        binaryCols = {
            "sex": {
                "Male": 0,
                "Female": 1
            },
            "salary": {
                ">50K": 0,
                "<=50K": 1
            }
        }
        X = X.copy()
        X = Processor.removeMissing(X)
        X = Processor.toBinaryCol(X, binaryCols)

        X = Processor.normalize(X, ["fnlwgt", "hours-per-week"])
        Y = X["salary"]
        X = X.iloc[:, :-1]
        X = Processor.OHE(X)

        countryCols = [
            "native-country_Cambodia", "native-country_England",
            "native-country_Puerto-Rico", "native-country_Canada",
            "native-country_Outlying-US(Guam-USVI-etc)",
            "native-country_India", "native-country_Japan",
            "native-country_Greece", "native-country_South",
            "native-country_China", "native-country_Cuba",
            "native-country_Iran", "native-country_Honduras",
            "native-country_Italy", "native-country_Poland",
            "native-country_Jamaica", "native-country_Vietnam",
            "native-country_Portugal", "native-country_Ireland",
            "native-country_France", "native-country_Dominican-Republic",
            "native-country_Laos", "native-country_Ecuador",
            "native-country_Taiwan", "native-country_Haiti",
            "native-country_Columbia", "native-country_Hungary",
            "native-country_Guatemala", "native-country_Nicaragua",
            "native-country_Scotland", "native-country_Thailand",
            "native-country_Yugoslavia", "native-country_El-Salvador",
            "native-country_Trinadad&Tobago", "native-country_Peru",
            "native-country_Hong", "native-country_Holand-Netherlands"
        ]

        X = X.drop(columns=(["capital-gain", "capital-loss", "education-num"] +
                            countryCols))

        return [X, Y]

Exemple #4

0

Afficher le fichier

Fichier : Clean.py Projet : mohamed-youssef-bou/Machine-Learning-Projects

 def mam(X):
     X = X.copy()
     X = Processor.fillMissing(X)
     Y = X["result"]
     X = X.drop(columns=["result"])
     return [X, Y]

Exemple #5

0

Afficher le fichier

Fichier : Timing.py Projet : oielbanna/comp551

    df = pd.DataFrame(data)

    df_to_table(df, 'time_table_all_final')

    # print(evaluate_acc(Processor.ToNumpyCol(Y_test), model.predict(X_test.to_numpy())))

    #print(cross_validation(5, X_train.to_numpy(), Processor.ToNumpyCol(Y_train), model))

elif ds == "ionosphere":
    path = "../datasets/ionosphere/ionosphere.data"

    header = ["{}{}".format("col", x) for x in range(33 + 1)]
    header.append("signal")

    All = Processor.read(path, header)

    [X, Y] = Clean.Ionosphere(All)

    [X_train, X_test, Y_train, Y_test] = Processor.split(X, Y, train=0.8)
    setup = '''
from Project1.src.NaiveBayes import NaiveBayes
from Project1.src.Processor import Processor
from Project1.src.Clean import Clean
from Project1.src.CrossValidation import cross_validation

path = "../datasets/ionosphere/ionosphere.data"

header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")

Exemple #6

0

Afficher le fichier

Fichier : TestSizePlots.py Projet : mohamed-youssef-bou/Machine-Learning-Projects

import matplotlib.pyplot as plt
import numpy as np
from Project1.src.LogisticRegression import LogisticRegression
from Project1.src.NaiveBayes import NaiveBayes
from Project1.src.Processor import Processor
from Project1.src.Clean import Clean
from Project1.src.CrossValidation import cross_validation
from Project1.src.CrossValidation import evaluate_acc



print("Analyzing the ionosphere data set")
path = "../datasets/ionosphere/ionosphere.data"
header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
All = Processor.read(path, header)
[X, Y] = Clean.Ionosphere(All)

X = X.to_numpy()
Y = Processor.ToNumpyCol(Y)

iters = np.arange(20, X.shape[0], 50)
#print(X.shape)
#print(Y.shape)

accuracies = []

for iter_ in iters:
    #rowsX = X[0:X.shape[0], :]
    #rowsY = Y[0:Y.shape[0], :]
    rowsX = X[0:iter_, :]

Exemple #7

0

Afficher le fichier

Learning rates and threshold gradient were chosen using the results of the hyperparameter tuning script
"""
from Project1.src.LogisticRegression import LogisticRegression
from Project1.src.NaiveBayes import NaiveBayes
from Project1.src.CrossValidation import cross_validation
from Project1.src.Processor import Processor
from Project1.src.Clean import Clean
from Project1.src.HPTuning import df_to_table
import pandas as pd

# Find accuracies for ionosphere data set
print("Analyzing the ionosphere data set")
path = "../datasets/ionosphere/ionosphere.data"
header = ["{}{}".format("col", x) for x in range(33 + 1)]
header.append("signal")
All = Processor.read(path, header)
[X, Y] = Clean.Ionosphere(All)

ionosphere_results = ['ionosphere']
acc, _, _ = cross_validation(5,
                             X.to_numpy(),
                             Processor.ToNumpyCol(Y),
                             LogisticRegression(),
                             learning_rate=1.0,
                             max_gradient=1e-2,
                             max_iters=50000)
ionosphere_results.append(round(acc, 2))
acc = cross_validation(5, X.to_numpy(), Processor.ToNumpyCol(Y), NaiveBayes())
ionosphere_results.append(round(acc, 2))
print(ionosphere_results)