Ejemplo n.º 1
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 16 23:54:47 2017

@author: vishal
"""

from HousingSpyder import loadin_housing_data

dataset = loadin_housing_data()
#print(dataset.head())

from sklearn.preprocessing import Imputer

# Missing value

imputer = Imputer(strategy='median')

# Imputer work on numeric col only so we need to extract numrric call only
dataset_num = dataset.drop('ocean_proximity', axis=1)
#print(dataset_num.head())
imputer.fit(dataset_num)

#print(imputer.statistics_)
#print(dataset_num.median().values)

X = imputer.transform(dataset_num)

import pandas as pd
Ejemplo n.º 2
0
@author: vishal
"""

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer, LabelBinarizer

from HousingSpyder import loadin_housing_data
from CoustomTransformation import CombinedAttributesAdder
from DataFrameSlector import DataFrameSelector

from sklearn.cross_validation import StratifiedShuffleSplit

import numpy as np

#print(dataset.head())
housing = loadin_housing_data()
housing["income_cat"] = np.ceil(housing["median_income"] / 1.5)
housing["income_cat"].where(housing["income_cat"] < 5, 5.0, inplace=True)

split = StratifiedShuffleSplit(housing["income_cat"], test_size=0.2)
train_indices, test_indices = next(iter(split))
strat_train_set = housing.loc[train_indices]
strat_test_set = housing.loc[test_indices]

housing = strat_train_set.drop("median_house_value", axis=1)
housing_labels = strat_train_set["median_house_value"].copy()

dataset = housing

dataset_num = dataset.drop('ocean_proximity', axis=1)