Ejemplo n.º 1
0
def submission(model, norm, feat_selection, inputation, new_features, subm_name):
    dao = DAO(new_features=new_features)

    if norm:
        train = dao.get_normalized_data(dataset="train", inputation=inputation, max_na_count_columns=0.05)
        test = dao.get_normalized_data(dataset="test", inputation=inputation, max_na_count_columns=1)
        print(len(test))
    else:
        train = dao.get_data(cols_type="numeric", dataset="train", max_na_count_columns=0.05)
        test = dao.get_data(cols_type="numeric", dataset="test", max_na_count_columns=0.05)

    test_ids = test.index.tolist()


    if feat_selection is None:
        feat_selection_name = ""
    else:
        feat_selection_name = feat_selection.__name__
        columns = feat_selection(train)
        train_columns = columns + [TARGET]
        train = train[train_columns]
        test = test[columns]


    ev = Evaluator(model=model)
    pred = ev.run(train, test, abs_target=False)

    pred = pd.Series(pred).round(10)
    subm = pd.DataFrame()
    subm["ParcelId"] = test_ids
    subm["201610"] = pred
    subm["201611"] = pred
    subm["201612"] = pred
    subm["201710"] = pred
    subm["201711"] = pred
    subm["201712"] = pred

    subm_path = PathManager().get_submission_dir() + subm_name + ".csv"
    subm.to_csv(subm_path, index=False)

    subm_metadata = PathManager().get_submission_dir() + subm_name + ".json"
    with open(subm_metadata, 'w') as file:
        submission_dict = {}
        submission_dict["submission_name"] = subm_name
        submission_dict["norm"] = norm
        submission_dict["feat_selection"] = feat_selection_name
        submission_dict["model"] = model.get_model_name()
        submission_dict["inputation"] = inputation
        submission_dict["score"] = ""

        json.dump(submission_dict, file)
Ejemplo n.º 2
0
import pandas as pd
from sklearn import preprocessing

from src.dao.dao import DAO

dao = DAO()
numeric_df = dao.get_data(cols_type="numeric", max_na_count_columns=0.05)

#remove na rows
numeric_df = numeric_df .dropna()

print(numeric_df.head()[["regionidcity", "calculatedbathnbr", "fullbathcnt"]])

x = numeric_df.values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled)

df_norm = pd.DataFrame(df)
df_norm.columns = numeric_df.columns

print(df_norm.head()[["regionidcity", "calculatedbathnbr", "fullbathcnt"]])