def mlp_titanic(): """ constant_params = { 'random_state': 0, 'solver': 'lbfgs', # показал лучше всех 'early_stopping': False, # флаг возможности ранней остановки градиентного спуска 'learning_rate': 'invscaling', 'max_iter': 1000, # максимальное количество итераций градиентного спуска } MLP - model1: acc = 0.8731117824773413 %, tested 331 total. MLP - model2: acc = 0.9063444108761329 %, tested 331 total. MLP - model3: acc = 0.9274924471299094 %, tested 331 total. """ complete = False titanic = Titanic('../data/titanic/') train = titanic.load_train(complete) test = titanic.load_test(complete) constant_params = { 'random_state': 0, 'solver': 'lbfgs', 'early_stopping': False, 'learning_rate': 'invscaling', 'learning_rate_init': 0.001, 'max_iter': 1000, 'activation': 'relu' } model1(train, test, constant_params) model2(train, test, constant_params) model3(train, test, constant_params)
def main(): complete = True titanic = Titanic('../data/titanic/') train, test = titanic.load_train(complete), titanic.load_test(complete) nb(train, test) regressions(train, test)
def titanic(): titanic_train = Titanic('../../data/titanic/').load_train(complete=False) X, y = titanic_train.data, titanic_train.target survived = y == 1 died = y == 0 p = 35 X_emb = TSNE(n_components=2, perplexity=p, random_state=0, verbose=1).fit_transform(X) plt.figure() plt.title("t-SNE for Titanic. perplexity={}, persons={}".format(p, len(y))) plt.scatter(X_emb[survived, 0], X_emb[survived, 1], c='g', marker='.', label='Survived') plt.scatter(X_emb[died, 0], X_emb[died, 1], c='r', marker='x', label='Died') plt.legend(loc='best', shadow=False, scatterpoints=1) plt.show()
def titanic_lda_pca(): complete = False titanic = Titanic('../data/titanic/') train = titanic.load_train(complete) test = titanic.load_test(complete) X, y = train.data, train.target # todo join train + test -> PCA -> split again train_pca = PCA(n_components=2, random_state=0) train_pca_X = train_pca.fit_transform(X) test_pca = PCA(n_components=2, random_state=0) test_pca_X = test_pca.fit_transform(test.data) m = RandomForestClassifier(random_state=0, max_depth=4, min_samples_split=25, min_samples_leaf=10) m.fit(train_pca_X, y) # todo bokeh https://bokeh.pydata.org/en/latest/docs/gallery.html survival_prediction = m.predict(test_pca_X) # Статистика print('Titanic PCA stats:') print('explained variance ratio (first two components): {}'.format(train_pca.explained_variance_ratio_)) print('Components_: \n', pd.DataFrame(train_pca.components_, ['PC1', 'PC2'])) print('RF for PCA data: acc = {}%, tested {} total.'.format((survival_prediction == test.target).mean(), len(survival_prediction))) train_lda = LinearDiscriminantAnalysis(n_components=2) traind_lda_X = train_lda.fit_transform(X, y) test_lda = LinearDiscriminantAnalysis(n_components=2) test_lda_X = test_lda.fit_transform(test.data, test.target) m = RandomForestClassifier(random_state=0, max_depth=4, min_samples_split=25, min_samples_leaf=10) m.fit(traind_lda_X, y) survival_prediction = m.predict(test_lda_X) print('RF for LDA data: acc = {}%, tested {} total.'.format((survival_prediction == test.target).mean(), len(survival_prediction))) pca_viz(train_pca_X, y) lda_viz(traind_lda_X, y) plt.show()
def main(): """ https://www.kaggle.com/c/titanic/data """ complete = False titanic = Titanic('../data/titanic/') train, test = titanic.load_train(complete), titanic.load_test(complete) params = [ # 1 {}, # 2 { "max_depth": 10, "min_samples_split": 10, "min_samples_leaf": 50, "max_features": 2 }, # 3 { "max_depth": 4, "min_samples_split": 10, "min_samples_leaf": 50, "max_features": 3 }, # 4 { "max_depth": 10, "min_samples_split": 40, "min_samples_leaf": 10, "max_features": 2 }, # 5 { "max_depth": 10, "min_samples_split": 2, "min_samples_leaf": 1, "max_features": 3 }, # 6 { "max_depth": 10, "min_samples_split": 25, "min_samples_leaf": 10, "max_features": None }, # 7 { "max_depth": 10, "min_samples_split": 25, "min_samples_leaf": 10, "max_features": 6 }, ] for i, p in enumerate(params): dt = DecisionTreeClassifier(random_state=5, **p) dt.fit(train.data, train.target) survival_prediction = dt.predict(test.data) print('DT {}: acc = {}%, tested {} total.'.format( i + 1, np.round((survival_prediction == test.target).mean(), 4) * 100, len(survival_prediction))) dt_viz(dt, train.feature_names, train.target_names, "dt_titanic_{}_model.tmp".format(i + 1))
def main(): titanic = Titanic('../data/titanic/') train, test = titanic.load_train(False), titanic.load_test(False) params = [ #1 {}, #2 { "kernel": "rbf", "C": 1, "max_iter": -1, "tol": 1e-3 }, #3 { "kernel": "rbf", "C": 1, "max_iter": -1, "tol": 1e-5 }, #4 { "kernel": "rbf", "C": 3, "max_iter": -1, "tol": 1e-3 }, #5 { "kernel": "rbf", "C": 3, "max_iter": -1, "tol": 1e-5 }, #6 { "kernel": "linear", "C": 1, "max_iter": 10 * 10**6, "tol": 1e-3 }, #7 { "kernel": "linear", "C": 1, "max_iter": 10 * 10**6, "tol": 1e-5 }, #8 { "kernel": "linear", "C": 3, "max_iter": 10 * 10**6, "tol": 1e-3 }, #9 { "kernel": "linear", "C": 3, "max_iter": 10 * 10**6, "tol": 1e-5 } ] params = [ { "kernel": "linear", "C": 1, "max_iter": 10 * 10**1, "tol": 0.001 }, ] for i, p in enumerate(params): m = SVC(random_state=0, **p) m.fit(train.data, train.target) survival_prediction = m.predict(test.data) print('SVM {}: acc = {}%, tested {} total.'.format( i + 1, np.round((survival_prediction == test.target).mean(), 4) * 100, len(survival_prediction)))
def main(): params = [{}, { "max_depth": 15, "n_estimators": 13, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 2, }, { "max_depth": 15, "n_estimators": 5, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 2 }, { "max_depth": 15, "n_estimators": 2, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 2 }, { "max_depth": 15, "n_estimators": 13, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 3 }, { "max_depth": 15, "n_estimators": 5, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 3 }, { "max_depth": 15, "n_estimators": 2, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 3 }, { "max_depth": 15, "n_estimators": 13, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 7 }, { "max_depth": 15, "n_estimators": 5, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 7 }, { "max_depth": 15, "n_estimators": 2, "min_samples_split": 50, "min_samples_leaf": 10, "max_features": 7 }] complete = True titanic = Titanic('../data/titanic/') train, test = titanic.load_train(complete), titanic.load_test(complete) for i, p in enumerate(params): m = RandomForestClassifier(random_state=0, **p) m.fit(train.data, train.target) survival_prediction = m.predict(test.data) print('Random Forest: acc = {}%, tested {} total.'.format( (survival_prediction == test.target).mean(), len(survival_prediction)))