def predict() : data = flask.request.json print("in service") print(data) titanic_test = pd.DataFrame(data) #print(titanic_test.info()) model_objects = joblib.load(os.path.join(dir,'titanic_model_1.pkl') ) titanic_test1 = utils.drop_features(titanic_test, ['PassengerId', 'Name', 'Ticket', 'Cabin']) utils.cast_to_cat(titanic_test1, ['Sex', 'Pclass', 'Embarked']) cat_features = utils.get_categorical_features(titanic_test1) #print(cat_features) cont_features = utils.get_continuous_features(titanic_test1) #print(cont_features) titanic_test1[cat_features] = model_objects.get('cat_imputers').transform(titanic_test1[cat_features]) titanic_test1[cont_features] = model_objects.get('cont_imputers').transform(titanic_test1[cont_features]) utils.cast_to_cat(titanic_test1, ['Sex', 'Pclass', 'Embarked']) titanic_test1['Sex'] = titanic_test1['Sex'].cat.add_categories(['male', 'female']) titanic_test1['Pclass'] = titanic_test1['Pclass'].cat.add_categories([1,2,3]) titanic_test1['Embarked'] = titanic_test1['Embarked'].cat.add_categories(['S','Q','C']) #print(titanic_test1.info()) titanic_test2 = utils.ohe(titanic_test1, cat_features) print(titanic_test2.shape) X_test = model_objects.get('scaler').transform(titanic_test2) result = model_objects.get('estimator').predict(X_test) print(result) return flask.jsonify(prediction=str(1))
house_test.shape house_test.info() house = pd.concat((house_train, house_test), axis=0) house.shape house.info() features_to_cast = ['MSSubClass'] cast_cont_to_cat(house, features_to_cast) print(get_continuous_features(house)) print(get_categorical_features(house)) features_to_drop = ['Id', 'SalePrice'] features_to_drop.extend(get_features_to_drop_on_missingdata(house, 0.25)) house1 = drop_features(house, features_to_drop) house1.info() imputable_cat_features = get_categorical_features(house1) cat_imputer = get_categorical_imputers(house1, imputable_cat_features) house1[imputable_cat_features] = cat_imputer.transform( house1[imputable_cat_features]) imputable_cont_features = get_continuous_features(house1) cont_imputer = get_continuous_imputers(house1, imputable_cont_features) house1[imputable_cont_features] = cont_imputer.transform( house1[imputable_cont_features]) house1.info() house2 = ohe(house1, imputable_cat_features)
import pandas as pd import numpy as np import os import common_utils as utils from sklearn import preprocessing, neighbors, svm, linear_model, ensemble, pipeline, model_selection import classification_utils as cutils import kernel_utils as kutils dir = 'E:/' titanic_train = pd.read_csv(os.path.join(dir, 'train.csv')) print(titanic_train.shape) print(titanic_train.info()) titanic_train1 = utils.drop_features( titanic_train, ['PassengerId', 'Name', 'Survived', 'Ticket', 'Cabin']) #type casting utils.cast_to_cat(titanic_train1, ['Sex', 'Pclass', 'Embarked']) cat_features = utils.get_categorical_features(titanic_train1) print(cat_features) cont_features = utils.get_continuous_features(titanic_train1) print(cont_features) #handle missing data(imputation) cat_imputers = utils.get_categorical_imputers(titanic_train1, cat_features) titanic_train1[cat_features] = cat_imputers.transform( titanic_train1[cat_features]) cont_imputers = utils.get_continuous_imputers(titanic_train1, cont_features) titanic_train1[cont_features] = cont_imputers.transform(
return 'Medium' else: return 'Large' titanic['FamilyGroup'] = titanic['FamilySize'].map(convert_familysize) sns.factorplot(x="FamilyGroup", hue="Survived", data=titanic, kind="count", size=6) sns.countplot(x='Cabin', data=titanic) titanic['Cabin'] = titanic['Cabin'].fillna('U') titanic = utils.drop_features(titanic, ['PassengerId', 'Name', 'Survived', 'Ticket']) #type casting utils.cast_to_cat( titanic, ['Sex', 'Pclass', 'Embarked', 'Title', 'FamilyGroup', 'Cabin']) cat_features = utils.get_categorical_features(titanic) print(cat_features) cont_features = utils.get_continuous_features(titanic) print(cont_features) #handle missing data(imputation) cat_imputers = utils.get_categorical_imputers(titanic, cat_features) titanic[cat_features] = cat_imputers.transform(titanic[cat_features]) cont_imputers = utils.get_continuous_imputers(titanic, cont_features) titanic[cont_features] = cont_imputers.transform(titanic[cont_features])
path = 'G://house-prices' house_train = pd.read_csv(os.path.join(path, "train.csv")) house_train.shape house_train.info() #type cast features features_to_cast = ['MSSubClass'] utils.cast_to_cat(house_train, features_to_cast) #manual feature selection features_to_drop = ['Id', 'SalePrice'] missing_features_above_th = utils.get_features_to_drop_on_missingdata( house_train, 0.25) features_to_drop.extend(missing_features_above_th) house_train1 = utils.drop_features(house_train, features_to_drop) house_train1.info() #build pipeline for categorical features categorical_pipeline = pipeline.Pipeline([ ('imputer', impute.SimpleImputer(strategy="most_frequent")), ('ohe', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')) ]) #build pipeline for numerical features numerical_pipeline = pipeline.Pipeline([('imputer', impute.SimpleImputer()), ('scaler', preprocessing.StandardScaler())]) #build preprocessing pipeline for all features cat_features = utils.get_non_continuous_features(house_train1)
sns.FacetGrid(house_train, hue="YearBuilt", size=8).map(sns.kdeplot, "SalePrice").add_legend() sns.countplot(x='YrSold', data=house_train) sns.jointplot(x="SalePrice", y="YrSold", data=house_train) sns.FacetGrid(house_train, hue="YrSold", size=8).map(sns.kdeplot, "SalePrice").add_legend() features_to_cast = ['MSSubClass'] utils.cast_to_cat(house, features_to_cast) features_to_drop = ['Id'] missing_features_above_th = utils.get_features_to_drop_on_missingdata( house, 0.25) features_to_drop.extend(missing_features_above_th) house1 = utils.drop_features(house, features_to_drop) house1.info() imputable_cat_features = utils.get_non_continuous_features(house1) cat_imputer = utils.get_categorical_imputers(house1, imputable_cat_features) house1[imputable_cat_features] = cat_imputer.transform( house1[imputable_cat_features]) imputable_cont_features = utils.get_continuous_features(house1) cont_imputer = utils.get_continuous_imputers(house1, imputable_cont_features) house1[imputable_cont_features] = cont_imputer.transform( house1[imputable_cont_features]) house1.info() house2 = utils.ohe(house1, imputable_cat_features)