Ejemplo n.º 1
0
def predict(data):

    # impute NA
    for var in config.CATEGORICAL_TO_IMPUTE:
        data[var] = pf.impute_na(data, var, replacement='Missing')

    data[config.NUMERICAL_TO_IMPUTE] = pf.impute_na(
        data, config.NUMERICAL_TO_IMPUTE, replacement=config.LOTFRONTAGE_MODE)

    # capture elapsed time
    data[config.YEAR_VARIABLE] = pf.elapsed_years(data,
                                                  config.YEAR_VARIABLE,
                                                  ref_var='YrSold')

    # log transform numerical variables
    for var in config.NUMERICAL_LOG:
        data[var] = pf.log_transform(data, var)

    # Group rare labels
    for var in config.CATEGORICAL_ENCODE:
        data[var] = pf.remove_rare_labels(data, var,
                                          config.FREQUENT_LABELS[var])

    # encode variables
    for var in config.CATEGORICAL_ENCODE:
        data[var] = pf.encode_categorical(data, var,
                                          config.ENCODING_MAPPINGS[var])

    # scale variables
    data = pf.scale_features(data[config.FEATURES], config.OUTPUT_SCALER_PATH)

    # make predictions
    predictions = pf.predict(data, config.OUTPUT_MODEL_PATH)

    return predictions
Ejemplo n.º 2
0
def predict(data):
    
    # imputar datos faltantes
    for var in config.CATEGORICAL_TO_IMPUTE:
        data[var] = pf.impute_na(data, var, replacement='Missing')
    
    data[config.NUMERICAL_TO_IMPUTE] = pf.impute_na(data,
           config.NUMERICAL_TO_IMPUTE,
           replacement=config.LOTFRONTAGE_MODE)
    
    
    # intervalos de tiempo
    data[config.YEAR_VARIABLE] = pf.elapsed_years(data,
           config.YEAR_VARIABLE, ref_var='YrSold')
    
    
    # transformación logarítmica
    for var in config.NUMERICAL_LOG:
       data[var] = pf.log_transform(data, var)
    
    
    # agrupación de etiquetas poco frecuentes
    for var in config.CATEGORICAL_ENCODE:
        data[var] = pf.remove_rare_labels(data, var, config.FREQUENT_LABELS[var])
    
    # codificación de var. categóricas
    for var in config.CATEGORICAL_ENCODE:
        data[var] = pf.encode_categorical(data, var,
               config.ENCODING_MAPPINGS[var])
    
    
    # escalar variables
    data = pf.scale_features(data[config.FEATURES],
                             config.OUTPUT_SCALER_PATH)
    
    # obtener predicciones
    predictions = pf.predict(data, config.OUTPUT_MODEL_PATH)
    
    return predictions
Ejemplo n.º 3
0
data = pf.load_data(config.PATH_TO_DATASET)

# dividir el set
X_train, X_test, y_train, y_test = pf.divide_train_test(data, config.TARGET)

# imputar variables categóricas
for var in config.CATEGORICAL_TO_IMPUTE:
    X_train[var] = pf.impute_na(X_train, var, replacement='Missing')

# imputar variables numéricas
X_train[config.NUMERICAL_TO_IMPUTE] = pf.impute_na(
    X_train, config.NUMERICAL_TO_IMPUTE, replacement=config.LOTFRONTAGE_MODE)

# intervalos de tiempo
X_train[config.YEAR_VARIABLE] = pf.elapsed_years(X_train,
                                                 config.YEAR_VARIABLE,
                                                 ref_var='YrSold')

# transformación logarítmica
for var in config.NUMERICAL_LOG:
    X_train[var] = pf.log_transform(X_train, var)

# agrupación de categorías poco frecuentes
for var in config.CATEGORICAL_ENCODE:
    X_train[var] = pf.remove_rare_labels(X_train, var,
                                         config.FREQUENT_LABELS[var])

# codificación de variables categóricas
for var in config.CATEGORICAL_ENCODE:
    X_train[var] = pf.encode_categorical(X_train, var,
                                         config.ENCODING_MAPPINGS[var])