Ejemplo n.º 1
0
envs = load_dotenv(find_dotenv())
file = os.getenv("lib")
sys.path.insert(0, file)
from utils import LoadData
from preprocessing import PreProcessing
from visuals import ClassifierVisual

# Import data
dataset = LoadData("Churn_Modelling.csv").data
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

# Lets do some preprocessing...
processor = PreProcessing()
# Encode the data (Country/Gender)
X[:, 1] = processor.encode(X[:, 1])
X[:, 2] = processor.encode(X[:, 2])
X = processor.hot_encoding(data=X, features=[1])
X = X[:, 1:]

# Split the data into training+test
X_train, X_test, y_train, y_test = processor.split(X, y, test_size=0.2)

# Fitting XGboost
classifier = XGBClassifier()
classifier.fit(X_train, y_train)

# Predicting the test results
y_pred = classifier.predict(X_test)

# Making the confusion matrix
Ejemplo n.º 2
0
np.around(X, decimals=0).astype(int)

# Splitting the data into train adn test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

# Feature scaling
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

#-----------Using the PreProcessing class from preprocessing.py:---------------
processor = PreProcessing()
# Fill Missing data
X[:, 1:3] = processor.replace_with_mean(X[:, 1:3])
# Handle categorical data
X[:, 0] = processor.encode(X[:, 0])
X = processor.hot_encoding(X, features=[0])
# Encode target
y = processor.encode(y)
# Split the data
X_train, X_test, y_train, y_test = processor.split(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=0)
# Feature Scaling
X_train = processor.fit_scaler(X_train)
X_test = processor.scale(X_test)