uniform_type=True) area_attrs = ['user_live_province', 'user_live_city'] resource_dir = '../resources' X_train = X_train_datapreprocessing.china_area_number_mapping( area_attrs, resource_dir) X_train = X_train_datapreprocessing.transform_dtype(area_attrs, d_type=[int], uniform_type=True) X_train = X_train_datapreprocessing.dummies_and_fillna() #X_train.info() #print(X_train.head(5)) Gini_DF = pandas.concat([X_train, Y_train], axis=1) #gini_attrs = Gini_DF.axes[1] gini_attrs = list(Gini_DF.columns.values) gini = GiniIndex(Gini_DF, gini_attrs, target_key, Gini_DF[target_key]) gini_index_dict = gini.gini_index() gini_list = sorted(gini_index_dict.items(), key=lambda item: item[1]) for item in gini_list: print(item) scoring = 'accuracy' models = [] models.append(('LR', LogisticRegression())) models.append(('CART', DecisionTreeClassifier())) #models.append(('LDA',LinearDiscriminantAnalysis())) models.append(('KNN', KNeighborsClassifier())) models.append(('NB', GaussianNB())) models.append(('RF', RandomForestClassifier())) #models.append(('SVM',SVC()))
if __name__ == "__main__": file_fullpath = '/home/login01/Workspaces/python/dataset/cs.csv' #df = pandas.read_csv(file_fullpath,sep=',',index_col=0,na_values='NA',dtype=object,low_memory=False) #df = pandas.read_csv(file_fullpath,sep=',',index_col=0,na_values='NA',low_memory=False) df = pandas.read_csv(file_fullpath, sep=',', na_values='NA', low_memory=False) attribute = [ "RevolvingUtilizationOfUnsecuredLines", "age", "NumberOfTime30-59DaysPastDueNotWorse", "DebtRatio", "MonthlyIncome", "NumberOfOpenCreditLinesAndLoans", "NumberOfTimes90DaysLate", "NumberRealEstateLoansOrLines", "NumberOfTime60-89DaysPastDueNotWorse", "NumberOfDependents" ] target_key = "SeriousDlqin2yrs" df[target_key] = df[target_key].fillna(0) target = df[target_key] #pandas.set_option('display.max_rows', None) #print(target) gini = GiniIndex(df, attribute, target_key, target) #mygini_index = gini.gini_index() #print("mygini_score:", mygini_index) mygini_index_dict = gini.gini_index() gini_list = sorted(mygini_index_dict.items(), key=lambda item: item[1]) print("Gini index of each attribute:") #for key,val in mygini_index_dict.items(): # print("%s:%s" % (key,val)) for item in gini_list: print(item)