from Ensemble.Predictor import Predictor
import xgboost as xgb
import sys

test = False

titanic_train_data = pd.read_csv(r"data/train.csv").drop(['Ticket'], axis=1)
titanic_test_data = pd.read_csv(r"data/test.csv").drop(['Ticket'], axis=1)
all_data = titanic_train_data.append(titanic_test_data).drop(['Survived'],
                                                             axis=1)
survived_data = titanic_train_data['Survived']

freq_port = all_data.Embarked.dropna().mode()[0]
all_data.loc[:, 'Embarked'] = all_data['Embarked'].fillna(freq_port)

all_data = ap.correct_age(all_data)
#all_data = cp.correct_cabin(all_data)

all_data.loc[:, 'Cabin'] = all_data['Cabin'].map(lambda x: 'U'
                                                 if pd.isnull(x) else x[0])
all_data.Cabin.replace(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'T', 'U'],
                       [0, 0, 0, 0, 0, 0, 0, 0, 1],
                       inplace=True)

all_data = tp.preprocessing(all_data)
to_predict = all_data[survived_data.shape[0]:]

train_x, test_x, train_y, test_y = train_test_split(
    all_data[0:survived_data.shape[0]], survived_data, test_size=0.4)

with open('error.log', 'w+') as err_log:
예제 #2
0
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import numpy as np

# read data
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

titanic_train_data = pd.read_csv(r"data/train.csv")
titanic_test_data = pd.read_csv(r"data/test.csv")

# correct age
titanic_train_data = ap.correct_age(titanic_train_data)
#titanic_train_data.Age.dropna(axis=0)
titanic_test_data = ap.correct_age(titanic_test_data)

# preprocess data
titanic_train_data = tp.preprocessing(titanic_train_data)
titanic_test_data = tp.preprocessing(titanic_test_data)

# split train data

predictions = np.zeros(titanic_test_data.shape[0], np.uint32)
counter = 0

for i in range(2000):
    counter += 1
    train_x, test_x, train_y, test_y = train_test_split(