Esempio n. 1
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv("Dataset/Data.csv")
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, 3].values
print(X)
print(Y)

# taking care of missing data - taking mean of other observations

from sklearn.preprocessing import Imputer
# axs =0 -> column wide
#axs = 1 -> row wide
imputer = Imputer(missing_values='NaN', strategy="mean", axis=0)
imputer.axis(X)
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 3].values


# managing missing values
"""-----------------------------------------------------------------------------------
from sklearn.impute import SimpleImputer
imputer = SimpleImputer("'missing_values = NaN'", "'strategy' = 'mean'", "'axis = 0'")
imputer = imputer.fit(X[:,1:3])
X[:,1:3]= imputer.transform(X[:,1:3])
-----------------------------------------------------------------------------------"""
from sklearn.preprocessing import Imputer
imputer = Imputer()
imputer.missing_values = 'NaN'
imputer.strategy = 'mean'
imputer.axis = 0
imputer = imputer.fit(X[:,1:3])
X[:,1:3]= imputer.transform(X[:,1:3])
#----------------------------------------------------------------------------------

#encoding X and y in numeric forms
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X = LabelEncoder()
X[:,0] = labelencoder_X.fit_transform(X[:,0])
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
#----------------------------------------------------------------------------------

# Splitting the dataset into the Training set and Test set