コード例 #1
0
                                                        uniform_type=True)
    area_attrs = ['user_live_province', 'user_live_city']
    resource_dir = '../resources'
    X_train = X_train_datapreprocessing.china_area_number_mapping(
        area_attrs, resource_dir)
    X_train = X_train_datapreprocessing.transform_dtype(area_attrs,
                                                        d_type=[int],
                                                        uniform_type=True)
    X_train = X_train_datapreprocessing.dummies_and_fillna()
    #X_train.info()
    #print(X_train.head(5))

    Gini_DF = pandas.concat([X_train, Y_train], axis=1)
    #gini_attrs = Gini_DF.axes[1]
    gini_attrs = list(Gini_DF.columns.values)
    gini = GiniIndex(Gini_DF, gini_attrs, target_key, Gini_DF[target_key])
    gini_index_dict = gini.gini_index()
    gini_list = sorted(gini_index_dict.items(), key=lambda item: item[1])
    for item in gini_list:
        print(item)

    scoring = 'accuracy'
    models = []
    models.append(('LR', LogisticRegression()))
    models.append(('CART', DecisionTreeClassifier()))
    #models.append(('LDA',LinearDiscriminantAnalysis()))
    models.append(('KNN', KNeighborsClassifier()))
    models.append(('NB', GaussianNB()))
    models.append(('RF', RandomForestClassifier()))
    #models.append(('SVM',SVC()))
コード例 #2
0
if __name__ == "__main__":
    file_fullpath = '/home/login01/Workspaces/python/dataset/cs.csv'
    #df = pandas.read_csv(file_fullpath,sep=',',index_col=0,na_values='NA',dtype=object,low_memory=False)
    #df = pandas.read_csv(file_fullpath,sep=',',index_col=0,na_values='NA',low_memory=False)
    df = pandas.read_csv(file_fullpath,
                         sep=',',
                         na_values='NA',
                         low_memory=False)
    attribute = [
        "RevolvingUtilizationOfUnsecuredLines", "age",
        "NumberOfTime30-59DaysPastDueNotWorse", "DebtRatio", "MonthlyIncome",
        "NumberOfOpenCreditLinesAndLoans", "NumberOfTimes90DaysLate",
        "NumberRealEstateLoansOrLines", "NumberOfTime60-89DaysPastDueNotWorse",
        "NumberOfDependents"
    ]
    target_key = "SeriousDlqin2yrs"
    df[target_key] = df[target_key].fillna(0)
    target = df[target_key]
    #pandas.set_option('display.max_rows', None)
    #print(target)
    gini = GiniIndex(df, attribute, target_key, target)
    #mygini_index = gini.gini_index()
    #print("mygini_score:", mygini_index)
    mygini_index_dict = gini.gini_index()
    gini_list = sorted(mygini_index_dict.items(), key=lambda item: item[1])
    print("Gini index of each attribute:")
    #for key,val in mygini_index_dict.items():
    #    print("%s:%s" % (key,val))
    for item in gini_list:
        print(item)