Beispiel #1
0
from numpy import where, mean, std, c_, array
from pandas.core.frame import DataFrame
from mlfwk.utils import split_random, get_project_root
from collections import Counter
from mlfwk.models import knn
from mlfwk.metrics import metric
from mlfwk.visualization import generate_space, coloring
from matplotlib.colors import ListedColormap
from mlfwk.readWrite import load_base
from mlfwk.utils import normalization

if __name__ == '__main__':
    print("run coluna 3 classes")

    # carregar a base
    base = load_base(path='column_3C_weka.arff', type='arff')

    # features
    features = ['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius',
                'degree_spondylolisthesis']

    print(base.info())

    # ----------------------------- Clean the data ----------------------------------------------------------------

    # -------------------------- Normalization ------------------------------------------------------------------

    # normalizar a base
    base[features] = normalization(base[features], type='min-max')

    base = base.drop(['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope'], axis=1)
Beispiel #2
0
        'best_cf': [],
        'alphas': []
    }
    results = {
        'realization': [],
        'ACCURACY': [],
        # 'MCC': [],
        'f1_score': [],
        'precision': [],
        'recall': [],
        'cf': [],
        'alphas': []
    }

    # carregar a base
    base = load_base(path='iris.data', type='csv')

    # normalizar a base
    base[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']] = normalization(
        base[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']], type='min-max')

    N, M = base.shape
    C = len(base['Species'].unique())

    y_out_of_c = pd.get_dummies(base['Species'])

    base = base.drop(['Species'], axis=1)
    base = concatenate([base[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']], y_out_of_c], axis=1)

    for realization in range(20):
        train, test = split_random(base, train_percentage=.8)
Beispiel #3
0
from pandas import DataFrame
from matplotlib.colors import ListedColormap

from numpy import where, append, ones, array, zeros, mean, argmax, linspace, concatenate, c_, std
from mlfwk.metrics import metric
from mlfwk.readWrite import load_base
from mlfwk.utils import split_random, get_project_root, normalization, out_of_c_to_label
from mlfwk.models import MultiLayerPerceptron
from mlfwk.visualization import generate_space, coloring

if __name__ == '__main__':
    print("run emt")

    # --------------------------- Read dataset ----------------------------------------

    df = load_base('measures_v2.csv', type='csv')
    df = df.drop(['profile_id'], axis=1)

    df = df.iloc[:100000]
    nRow, nCol = df.shape
    print(f'There are {nRow} rows and {nCol} columns')


    df.info()

    features = ['u_q', 'coolant', 'u_d', 'motor_speed',
                'i_d', 'i_q', 'ambient', 'torque'] # 'profile_id'

    targets = ['stator_yoke', 'pm',  'stator_winding', 'stator_tooth']

    # -------------------- Realiztions ---------------------------------------------
Beispiel #4
0
        'ACCURACY': [],
        # 'MCC': [],
        'f1_score': [],
        'precision': [],
        'recall': [],
        # 'cf': [],
        'alphas': []
    }

    # carregar a base
    columns = []
    for i in range(34):
        columns.append('x' + str(i))

    columns.append('y')
    base = load_base(path='dermatology.data', column_names=columns, type='csv')

    # features
    features = columns[:len(columns) - 1]
    print(base.info())

    # ----------------------------- Clean the data ----------------------------------------------------------------

    # The Age has values ?
    for unique_value in base['x33'].unique():
        if unique_value != '?':
            base['x33'][base['x33'] == unique_value] = int(unique_value)

    # ? -> mean of column
    base['x33'][base['x33'] == '?'] = int(
        np.mean(base['x33'][base['x33'] != '?']))
Beispiel #5
0
        'R2': [],
        'std R2': [],
        'alphas': []
    }

    results = {
        'realization': [],
        'MSE': [],
        'RMSE': [],
        'R2': [],
        'alphas': []
    }

    # --------------------------- Read dataset ----------------------------------------

    df = load_base('abalone.csv', type='csv')

    # The age of abalone is 1.5 + the rings
    df['age'] = df.Rings + 1.5

    # so after the calculate the age, drop the Rings column
    df.drop('Rings', axis=1, inplace=True)

    # Label enconding of sex feature
    df.Sex = df.Sex.replace({"M": 1, "I": 0, "F": -1})

    df.info()

    features = [
        'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight',
        'Viscera weight', 'Shell weight', 'Sex'
Beispiel #6
0
        'alphas': []
    }

    results = {
        'realization': [],
        'ACCURACY': [],
        # 'MCC': [],
        'f1_score': [],
        'precision': [],
        'recall': [],
        # 'cf': [],
        'alphas': []
    }

    # carregar a base
    base = load_base(path='breast-cancer-wisconsin.data', type='csv')
    base = base.drop(['Sample code number'], axis=1)

    # features
    features = [
        'Clump Thickness', 'Uniformity of Cell Size',
        'Uniformity of Cell Shape', 'Marginal Adhesion',
        'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin',
        'Normal Nucleoli', 'Mitoses'
    ]

    print(base.info())

    # ----------------------------- Clean the data ----------------------------------------------------------------

    # The values at the column Bare Nuclei are all strings so we have to transform to int each of them.
Beispiel #7
0
from mlfwk.visualization import generate_space, coloring

if __name__ == '__main__':
    print("run car fuel")
    final_result = {
        'MSE': [],
        'std MSE': [],
        'RMSE': [],
        'std RMSE': [],
        'R2': [],
        'std R2': []
    }

    results = {'realization': [], 'MSE': [], 'RMSE': [], 'R2': []}

    df = load_base('measurements.csv', type='csv')

    # ---------------------------------- cleaning data base --------------------------------------

    # NaN Columns
    new_df = df.drop(columns=['refill liters', 'refill gas', 'specials'])

    # specials_dummies = pd.get_dummies(new_df['specials'])

    # change E10 and SP98, for numerical
    new_df['gas_type'][new_df['gas_type'] == 'E10'] = int(0)
    new_df['gas_type'][new_df['gas_type'] == 'SP98'] = int(1)
    new_df['gas_type'] = new_df['gas_type'].astype('int')

    target = ['consume']
    features = [