print(get_continuous_features(house)) print(get_categorical_features(house)) features_to_drop = ['Id', 'SalePrice'] features_to_drop.extend(get_features_to_drop_on_missingdata(house, 0.25)) house1 = drop_features(house, features_to_drop) house1.info() imputable_cat_features = get_categorical_features(house1) cat_imputer = get_categorical_imputers(house1, imputable_cat_features) house1[imputable_cat_features] = cat_imputer.transform( house1[imputable_cat_features]) imputable_cont_features = get_continuous_features(house1) cont_imputer = get_continuous_imputers(house1, imputable_cont_features) house1[imputable_cont_features] = cont_imputer.transform( house1[imputable_cont_features]) house1.info() house2 = ohe(house1, imputable_cat_features) scaler = get_scaler(house2) house3 = scaler.transform(house2) house3 = pd.DataFrame(house3, columns=house2.columns) X_train = house3[:house_train.shape[0]] y_train = house_train['SalePrice'] sns.distplot(y_train, hist=True) y_trans = np.log1p(y_train) sns.distplot(y_trans, hist=True)
titanic_train1 = utils.drop_features( titanic_train, ['PassengerId', 'Name', 'Survived', 'Ticket', 'Cabin']) #type casting utils.cast_to_cat(titanic_train1, ['Sex', 'Pclass', 'Embarked']) cat_features = utils.get_categorical_features(titanic_train1) print(cat_features) cont_features = utils.get_continuous_features(titanic_train1) print(cont_features) #handle missing data(imputation) cat_imputers = utils.get_categorical_imputers(titanic_train1, cat_features) titanic_train1[cat_features] = cat_imputers.transform( titanic_train1[cat_features]) cont_imputers = utils.get_continuous_imputers(titanic_train1, cont_features) titanic_train1[cont_features] = cont_imputers.transform( titanic_train1[cont_features]) #adding new levels #titanic_train['Pclass'] = titanic_train['Pclass'].cat.add_categories([4,5]) #one hot encoding titanic_train2 = utils.ohe(titanic_train1, cat_features) scaler = preprocessing.StandardScaler() X_train = scaler.fit_transform(titanic_train2) y_train = titanic_train['Survived'] kernel_svm_estimator = svm.SVC(kernel='rbf') kernel_svm_grid = { 'gamma': [0.001, 0.01, 0.1, 1],