Esempio n. 1
0
def predict() :
    data = flask.request.json
    print("in service")
    print(data)
    titanic_test = pd.DataFrame(data)
    #print(titanic_test.info()) 
    
    model_objects = joblib.load(os.path.join(dir,'titanic_model_1.pkl') )
    
    titanic_test1 = utils.drop_features(titanic_test, ['PassengerId', 'Name', 'Ticket', 'Cabin'])
    utils.cast_to_cat(titanic_test1, ['Sex', 'Pclass', 'Embarked'])

    cat_features = utils.get_categorical_features(titanic_test1)
    #print(cat_features)
    cont_features = utils.get_continuous_features(titanic_test1)
    #print(cont_features)

    titanic_test1[cat_features] = model_objects.get('cat_imputers').transform(titanic_test1[cat_features])
    titanic_test1[cont_features] = model_objects.get('cont_imputers').transform(titanic_test1[cont_features])

    utils.cast_to_cat(titanic_test1, ['Sex', 'Pclass', 'Embarked'])

    titanic_test1['Sex'] = titanic_test1['Sex'].cat.add_categories(['male', 'female'])
    titanic_test1['Pclass'] = titanic_test1['Pclass'].cat.add_categories([1,2,3])
    titanic_test1['Embarked'] = titanic_test1['Embarked'].cat.add_categories(['S','Q','C'])
    #print(titanic_test1.info())

    titanic_test2 = utils.ohe(titanic_test1, cat_features)
    print(titanic_test2.shape)
    X_test = model_objects.get('scaler').transform(titanic_test2)
    result = model_objects.get('estimator').predict(X_test)
    print(result)
    return flask.jsonify(prediction=str(1))  
Esempio n. 2
0
house_train.shape
house_train.info()

house_test = pd.read_csv(os.path.join(path, "test.csv"))
house_test.shape
house_test.info()

house = pd.concat((house_train, house_test), axis=0)
house.shape
house.info()

features_to_cast = ['MSSubClass']
cast_cont_to_cat(house, features_to_cast)

print(get_continuous_features(house))
print(get_categorical_features(house))

features_to_drop = ['Id', 'SalePrice']
features_to_drop.extend(get_features_to_drop_on_missingdata(house, 0.25))
house1 = drop_features(house, features_to_drop)
house1.info()

imputable_cat_features = get_categorical_features(house1)
cat_imputer = get_categorical_imputers(house1, imputable_cat_features)
house1[imputable_cat_features] = cat_imputer.transform(
    house1[imputable_cat_features])

imputable_cont_features = get_continuous_features(house1)
cont_imputer = get_continuous_imputers(house1, imputable_cont_features)
house1[imputable_cont_features] = cont_imputer.transform(
    house1[imputable_cont_features])
import classification_utils as cutils
import kernel_utils as kutils

dir = 'E:/'
titanic_train = pd.read_csv(os.path.join(dir, 'train.csv'))

print(titanic_train.shape)
print(titanic_train.info())

titanic_train1 = utils.drop_features(
    titanic_train, ['PassengerId', 'Name', 'Survived', 'Ticket', 'Cabin'])

#type casting
utils.cast_to_cat(titanic_train1, ['Sex', 'Pclass', 'Embarked'])

cat_features = utils.get_categorical_features(titanic_train1)
print(cat_features)
cont_features = utils.get_continuous_features(titanic_train1)
print(cont_features)

#handle missing data(imputation)
cat_imputers = utils.get_categorical_imputers(titanic_train1, cat_features)
titanic_train1[cat_features] = cat_imputers.transform(
    titanic_train1[cat_features])
cont_imputers = utils.get_continuous_imputers(titanic_train1, cont_features)
titanic_train1[cont_features] = cont_imputers.transform(
    titanic_train1[cont_features])

#adding new levels
#titanic_train['Pclass'] = titanic_train['Pclass'].cat.add_categories([4,5])
Esempio n. 4
0
               hue="Survived",
               data=titanic,
               kind="count",
               size=6)

sns.countplot(x='Cabin', data=titanic)
titanic['Cabin'] = titanic['Cabin'].fillna('U')

titanic = utils.drop_features(titanic,
                              ['PassengerId', 'Name', 'Survived', 'Ticket'])

#type casting
utils.cast_to_cat(
    titanic, ['Sex', 'Pclass', 'Embarked', 'Title', 'FamilyGroup', 'Cabin'])

cat_features = utils.get_categorical_features(titanic)
print(cat_features)
cont_features = utils.get_continuous_features(titanic)
print(cont_features)

#handle missing data(imputation)
cat_imputers = utils.get_categorical_imputers(titanic, cat_features)
titanic[cat_features] = cat_imputers.transform(titanic[cat_features])
cont_imputers = utils.get_continuous_imputers(titanic, cont_features)
titanic[cont_features] = cont_imputers.transform(titanic[cont_features])

#one hot encoding
titanic = utils.ohe(titanic, cat_features)

#scale the data
scaler = preprocessing.StandardScaler()