# print(test.columns) # print(train.columns) train_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data']; test_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data']; # print(train[train_columns].to_string()) train[train_columns],test[test_columns] = processor_ms.scale_fit_train_test(train[train_columns],test[test_columns]); # print(train[train_columns].to_string()) # print(test[test_columns]) # Regressor regressor_object_1 = Regressor(); regressor_object_1.train_machine(train[train_columns], train['Survived']); prediction = regressor_object_1.predict(test[train_columns]); prediction = prediction.astype(int); print(prediction); # ################# # SUBMIT ANSWER # ################# # print(test.columns); holdout_ids = test["PassengerId"]; sub_df = { "PassengerId":holdout_ids, "Survived": prediction }; ds = Data_Set(sub_df);
test = master.create_dummy(test, column, 1); master.generates_dummies(arr, train, test); # print(train) # columns = ['Pclass_2', 'Pclass_3', 'Sex_male']; columns = ['Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male', 'Age_categories_Missing','Age_categories_Infant', 'Age_categories_Child', 'Age_categories_Teenager', 'Age_categories_Young Adult', 'Age_categories_Adult', 'Age_categories_Senior'] target_column = 'Survived'; r.train_machine(train[columns], train[target_column]); holdout = test; all_X = train[columns] all_y = train[target_column] train_x, test_x, train_y, test_y = sp.split(train[columns], train[target_column]); # toPrint = sr.get_train()['Age'].describe(); # print(toPrint) r.train_machine(train_x, train_y); predictions = r.predict(test_x); accuracy = mt.model_accuracy(test_y, predictions); regressor_object = Regressor(); reg = regressor_object.get_regressor();
# backward elimination max_p_value = 1 non_significant_index = -1 eliminator = None while max_p_value > 0.05: if not non_significant_index == -1: x_train = np.delete(x_train, non_significant_index, 1) x_test = np.delete(x_test, non_significant_index, 1) eliminator = Back_Elimination() eliminator.fit_OLS(y_train, x_train) p_values = eliminator.get_p_values() max_p_value = np.amax(p_values) non_significant_index = list(p_values).index(max_p_value) """ LOGISTIC REGRESSION """ regressor = Regressor() regressor.train_machine(x_train, y_train) prediction = regressor.predict(x_test) print(prediction) # ################# # SUBMIT ANSWER # ################# # print(test.columns); holdout_ids = df_test['Id'] sub_df = { "Id": holdout_ids, "Cover_Type": prediction } ds = Data_Set(sub_df) ds.to_csv("submission", index=false)