def Run(self, data_container, store_folder='', store_key=''): temp_frame = data_container.GetFrame().select_dtypes(include=None, exclude=['object']) new_data_container = DataContainer() new_data_container.SetFrame(temp_frame) if store_folder and os.path.isdir(store_folder): feature_store_path = os.path.join(store_folder, 'numeric_feature.csv') featureinfo_store_path = os.path.join(store_folder, 'feature_select_info.csv') new_data_container.Save(feature_store_path) SaveSelectInfo(new_data_container.GetFeatureName(), featureinfo_store_path, is_merge=False) return new_data_container
vif_dict[exog] = vif # calculate tolerance tolerance = 1 - r_squared tolerance_dict[exog] = tolerance # return VIF DataFrame df_vif = pd.DataFrame({'VIF': vif_dict, 'Tolerance': tolerance_dict}) return df_vif if __name__ == '__main__': data_path = r'..\..\Demo\train_numeric_feature.csv' from BC.DataContainer.DataContainer import DataContainer from BC.FeatureAnalysis.Normalizer import NormalizerZeroCenter pca = DimensionReductionByPCA() dc = DataContainer() dc.Load(data_path) dc = NormalizerZeroCenter.Run(dc) # dc = pca.Run(dc) df = pd.DataFrame(dc.GetArray(), index=dc.GetCaseName(), columns=dc.GetFeatureName()) dr = DimensionReductionByVIF() new_df = dr.CalculateVIF(df) print(dc.GetArray().shape, new_df.shape)