sm = SMOTE(k_neighbors=args.kneighbors, random_state=args.randomseed, n_jobs=-1) x_resampled, y_resampled = sm.fit_sample(X, y) # after over sampleing 读取分类信息并返回数量 np_resampled_y = np.asarray(np.unique(y_resampled, return_counts=True)) df_resampled_y = pd.DataFrame(np_resampled_y.T, columns=['Class', 'Sum']) print("\nNumber of samples after over sampleing:\n{0}\n".format( df_resampled_y)) # 初始化 classifier clf = CascadeForestClassifier(random_state=args.randomseed) print("\nClassifier parameters:") print(clf.get_params()) print("\nSMOTE parameters:") print(sm.get_params()) print("\n") # 使用SMOTE后数据进行训练 clf.fit(x_resampled, y_resampled) # 预测测试集 y_pred = clf.predict(X_test) # 输出测试集统计结果 if (num_categories > 2): model_evaluation(num_categories, y_test, y_pred) else: bi_model_evaluation(y_test, y_pred) end_time = time.time() # 程序结束时间 print("\n[Finished in: {0:.6f} mins = {1:.6f} seconds]".format( ((end_time - start_time) / 60), (end_time - start_time)))
# Binarización de características categóricas. # Usamos M.todense() para ver los datos en tamaño normal. Si no, se guardan en formato COOmatrix OHE = preprocessing.OneHotEncoder( categorical_features = categoricalAttributes, handle_unknown = 'ignore' ).fit( X_train ) X_train = OHE.transform(X_train).todense() X_test = OHE.transform(X_test).todense() # Equilibrado de representación de cada clase sm = SMOTE( ratio = 'minority', random_state = seed, k_neighbors = 3 ) Xres, Yres = sm.fit_sample( X_train, y_train ) print ("Tras el equilibrado con SMOTE:{}".format(sm.get_params())) for i in np.unique( y_train ): print( "Número de instancias en la clase {}: {} {}" .format( i, len( np.where( y_train == i )[0] ), len( np.where( y_test == i )[0] ) ) ) # Creación y ajuste del modelo de aprendizaje Random Forest # para la selección de instancais y características rfc = RandomForestClassifier( random_state=seed, n_estimators = 50, n_jobs = -1, max_depth = 30, min_samples_leaf = 10,