train_df = pandas.read_csv(train_fullpath, sep=',', na_values='NA', low_memory=False) #for item in train_df.columns.values: # pandas.to_numeric(train_df[item]) X_train = train_df[attributes] y_train = train_df[target_key] train_datapreprocessing = DataPreprocessing( pandas.concat([X_train, y_train], axis=1), attributes, target_key) #train_datapreprocessing.data_summary() binary_transform_attrs = [ 'user_live_address', 'user_rela_name', 'user_relation', 'user_rela_phone', 'user_high_edu', 'user_company_name' ] X_train = train_datapreprocessing.transform_x_to_binary( binary_transform_attrs) X_train = train_datapreprocessing.transform_x_dtype(binary_transform_attrs, d_type=[int], uniform_type=True) area_attrs = ['user_live_province', 'user_live_city'] resource_dir = '../resources' X_train = train_datapreprocessing.china_area_number_mapping( area_attrs, resource_dir) X_train = train_datapreprocessing.transform_x_dtype(area_attrs, d_type=[int], uniform_type=True) X_train = train_datapreprocessing.x_dummies_and_fillna() #train_datapreprocessing.data_summary() Gini_DF = pandas.concat([X_train, y_train], axis=1) #gini_attrs = Gini_DF.axes[1]
train_df = pandas.read_csv(train_fullpath, sep=',', na_values='NA', low_memory=False) #for item in train_df.columns.values: # pandas.to_numeric(train_df[item]) X_train = train_df[attributes] y_train = train_df[target_key] train_datapreprocessing = DataPreprocessing( pandas.concat([X_train, y_train], axis=1), attributes, target_key) #train_datapreprocessing.data_summary() binary_transform_attrs = [ 'user_live_address', 'user_rela_name', 'user_relation', 'user_rela_phone', 'user_high_edu', 'user_company_name' ] X_train = train_datapreprocessing.transform_x_to_binary( binary_transform_attrs) X_train = train_datapreprocessing.transform_x_dtype(binary_transform_attrs, d_type=[int], uniform_type=True) area_attrs = ['user_live_province', 'user_live_city'] resource_dir = '../resources' X_train = train_datapreprocessing.china_area_number_mapping( area_attrs, resource_dir) X_train = train_datapreprocessing.transform_x_dtype(area_attrs, d_type=[int], uniform_type=True) X_train = train_datapreprocessing.x_dummies_and_fillna() #train_datapreprocessing.data_summary() #Gini_DF = pandas.concat([X_train,y_train],axis=1) ##gini_attrs = Gini_DF.axes[1]