min_child_weight=min_child_weight, reg_alpha=reg_alpha, reg_lambda=reg_lambda #eval_metric=eval_metric ) t2 = time.time() classifier.fit(X_train, y_train) t3 = time.time() print(t3 - t2) # Predicting the Test set results y_pred = classifier.predict_proba(X_test)[:, 1] y_pred_train = classifier.predict_proba(X_train)[:, 1] print("gini normalized score (train): ") gini_score = gini_normalized(y_train, y_pred_train) print(gini_score) print("gini normalized score (test): ") gini_score = gini_normalized(y_test, y_pred) print(gini_score) import numpy as np np.savetxt("y_test", y_test) np.savetxt("y_pred", y_pred) np.savetxt("y_train", y_test) np.savetxt("y_pred_train", y_pred) print("mean de y pred") print(np.mean(y_pred))
# Fitting the ANN to the Training set class_weight = {0: 1., 1: alpha} classifier.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, class_weight=class_weight) # Part 3 - Making the predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) print("gini normalized score: ") gini_score = gini_normalized(y_test, y_pred) print(gini_score) import numpy as np np.savetxt("y_test", y_test) np.savetxt("y_pred", y_pred) print("mean de y pred") print(np.mean(y_pred)) f = open("results.json", "r") results_txt = f.read() f.close() results = json.loads(results_txt) # décommenter cette ligne si vous voulez sauvegarder les résultats # results.append(parameters)
lgb_eval = lgb.Dataset(X_eval, y_eval, reference=lgb_train) # Create the 3 classifiers with 1000 rounds and a window of 100 for early stopping clf_1 = lgb.train(params_1, lgb_train, num_boost_round=num_boost_round, valid_sets=lgb_eval, early_stopping_rounds=100, verbose_eval=50) from util import gini_normalized # Predict raw scores for validation ids y_eval_pred = clf_1.predict(X_eval, raw_score=True) print("Gini eval mean on all trees: ") print(gini_normalized(y_eval, y_eval_pred)) y_train_pred = clf_1.predict(X_train, raw_score=True) print("Gini train mean on all trees: ") print(gini_normalized(y_train, y_train_pred)) c1 = 0 c2 = 0 temp = y_train_pred < 0.2 indexes_to_retrain = [i for i in range(temp) if y_train_pred[i]] X_to_retrain = X_train[indexes_to_retrain] y_to_retrain = y_train[indexes_to_retrain] clf_2 = lgb.train(params_1,
for i in range(len(layers)-3): classifier.add(Dense(units=layers[i+2], kernel_initializer = 'uniform', activation = activation_functions[i+1])) # Adding the output layer classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid')) # Compiling the ANN classifier.compile(optimizer = 'adam', loss = loss, metrics = []) train_x, train_y = X_train[train_index], y_train[train_index] eval_x, eval_y = X_train[test_index], y_train[test_index] classifier.fit(train_x, train_y, batch_size = batch_size, epochs = epochs, class_weight=class_weight) res_eval = classifier.predict(eval_x) res = classifier.predict(X_test) results.append(res) print('gini_eval', i) gini_score = gini_normalized(eval_y, res_eval) print(gini_score) i+=1 def to_csv(y_pred, ids): import csv with open('sumbission_5Kfold_nn.csv', 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') spamwriter.writerow(['id', 'target']) for i in range(len(y_pred)): spamwriter.writerow([ids[i], y_pred[i]]) submission = (results[0] + results[1] + results[2] + results[3] + results[4]) / 5 idx = dataset_test.iloc[:, 0].values to_csv(submission[:,0],idx)
kf = KFold(n_splits=K, random_state=42, shuffle=True) #training with KFold Cross Validation i=0 results = [] for train_index, test_index in kf.split(X_train): train_x, train_y = X_train[train_index], y_train[train_index] eval_x, eval_y = X_train[test_index], y_train[test_index] classifier = RandomForestClassifier(n_estimators=30, criterion = 'gini', random_state = 1, max_depth=5, max_features='auto', class_weight=class_weight) classifier.fit(train_x, train_y) res_train = classifier.predict(train_x) res_eval = classifier.predict(eval_x) res = classifier.predict(X_test) results.append(res) print('round k=',i) print('eval gini score ', 'train gini score') gini_eval = gini_normalized(eval_y, res_eval) gini_train = gini_normalized(train_y, res_train) print(gini_eval,' ', gini_train) print() i+=1 def to_csv(y_pred, ids): import csv with open('sumbission_5Kfold_random_forest.csv', 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') spamwriter.writerow(['id', 'target']) for i in range(len(y_pred)): spamwriter.writerow([ids[i], y_pred[i]]) submission = (results[0] + results[1] + results[2] + results[3] + results[4]) / 5