Ejemplo n.º 1
0
# print(test.columns)
# print(train.columns)

train_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data'];
test_columns = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S', 'Embarked_missing_data'];
# print(train[train_columns].to_string())

train[train_columns],test[test_columns] = processor_ms.scale_fit_train_test(train[train_columns],test[test_columns]);

# print(train[train_columns].to_string())
# print(test[test_columns])

# Regressor
regressor_object_1 = Regressor();
regressor_object_1.train_machine(train[train_columns], train['Survived']);
prediction = regressor_object_1.predict(test[train_columns]);
prediction = prediction.astype(int);
print(prediction);

# #################
# SUBMIT ANSWER
# #################
# print(test.columns);
holdout_ids = test["PassengerId"];
sub_df = {
	"PassengerId":holdout_ids,
	"Survived": prediction	
};

ds = Data_Set(sub_df);
Ejemplo n.º 2
0
	test = master.create_dummy(test, column, 1);

master.generates_dummies(arr, train, test);

# print(train)

# columns = ['Pclass_2', 'Pclass_3', 'Sex_male'];
columns = ['Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male',
       'Age_categories_Missing','Age_categories_Infant',
       'Age_categories_Child', 'Age_categories_Teenager',
       'Age_categories_Young Adult', 'Age_categories_Adult',
       'Age_categories_Senior']

target_column = 'Survived';

r.train_machine(train[columns], train[target_column]);

holdout = test; 

all_X = train[columns]
all_y = train[target_column]

train_x, test_x, train_y, test_y = sp.split(train[columns], train[target_column]);
# toPrint = sr.get_train()['Age'].describe();
# print(toPrint)
r.train_machine(train_x, train_y);
predictions = r.predict(test_x);
accuracy = mt.model_accuracy(test_y, predictions);

regressor_object = Regressor();
reg = regressor_object.get_regressor();
# backward elimination
max_p_value = 1
non_significant_index = -1
eliminator = None
while max_p_value > 0.05:
    if not non_significant_index == -1:
        x_train = np.delete(x_train, non_significant_index, 1)
        x_test = np.delete(x_test, non_significant_index, 1)
    eliminator = Back_Elimination()
    eliminator.fit_OLS(y_train, x_train)
    p_values = eliminator.get_p_values()
    max_p_value = np.amax(p_values)
    non_significant_index = list(p_values).index(max_p_value)
""" LOGISTIC REGRESSION """
regressor = Regressor()
regressor.train_machine(x_train, y_train)
prediction = regressor.predict(x_test)
print(prediction)

# #################
# SUBMIT ANSWER
# #################
# print(test.columns);
holdout_ids = df_test['Id']
sub_df = {
    "Id": holdout_ids,
    "Cover_Type": prediction
}

ds = Data_Set(sub_df)
ds.to_csv("submission", index=false)