コード例 #1
0
## Scaling and Encoding the data
for colum in df.columns:
    if df[colum].dtype == object:
        # print(colum , df[colum].unique().tolist())
        df[colum] = OneHotEncoder().fit_transform(df[colum])

columns = df.columns
df = MinMaxScaler().fit_transform(df)
df = pd.DataFrame(df, columns=columns)

## Finding the Correlations between Features
# sns.heatmap(df.corr(), fmt = '.1f',annot = True)
# plt.show()

correlations = df.corr()['SalePrice'].drop('SalePrice')

# print(correlations)
# print(correlations.quantile(.25))
# print(correlations.quantile(.75))
# print(correlations.quantile(.50))


## Choosing the best threshold for improving the model
def get_features(correlation_threshold):
    abs_corrs = correlations.abs()
    high_correlations = abs_corrs[
        abs_corrs > correlation_threshold].index.values.tolist()
    return high_correlations

コード例 #2
0
                      'school', 'sex', 'age', 'address', 'famsize', 'Pstatus',
                      'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian',
                      'traveltime', 'studytime', 'failures', 'schoolsup',
                      'famsup', 'paid', 'activities', 'nursery', 'higher',
                      'internet', 'romantic', 'famrel', 'freetime', 'goout',
                      'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2', 'G3'
                  ])

# print(df.head())
# print(df.dtypes)

## Finding the Correlations between Features
# sns.heatmap(df.corr(), fmt = '.1f',annot = True)
# plt.show()

correlations = df.corr()['G3'].drop('G3')

#print(correlations)
# print(correlations.quantile(.25))
# print(correlations.quantile(.75))


## Choosing the best threshold for improving the model
def get_features(correlation_threshold):
    abs_corrs = correlations.abs()
    high_correlations = abs_corrs[
        abs_corrs > correlation_threshold].index.values.tolist()
    return high_correlations


# thresh = []