def run_bs_adaboost(): df = pd.read_csv('Files/csv_result-Descriptors_Training.csv', sep=',') df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0]) df = prc.handle_outlier(prc.detect_outlier_iterative_IQR(df).dropna(thresh=20)) df = prc.standarize(df) # or normalize dt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, class_weight = {1: 20, 0:1}), n_estimators=20) print(main(df, "AdaBoost", dt, bs_estimate = True, verbose=True))
def run_bs_adaboost(): df = pd.read_csv('Files/csv_result-Descriptors_Training.csv', sep=',') df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0]) df = prc.handle_outlier( prc.detect_outlier_iterative_IQR(df).dropna(thresh=20)) df = prc.standarize(df) # or normalize dt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1, class_weight={ 1: 20, 0: 1 }), n_estimators=20) print(main(df, "AdaBoost", dt, bs_estimate=True, verbose=True)) # run_depth_test() # run_bs_dt() #run_bs_adaboost() # Test meta learning example #abc = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=100) #main(df=df, name = "AdaBoost Decision Stumps", model=abc) # Print PR Curves from test #plt.legend(loc=1) #plt.title("Precision Recall Curve") #plt.show()
def run_depth_test(): df = pd.read_csv('Files/csv_result-Descriptors_Training.csv', sep=',') df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0]) df = prc.handle_outlier(prc.detect_outlier_iterative_IQR(df).dropna(thresh=20)) df = prc.standarize(df) # or normalize rslt = test_tree_depth(df) print("Run Time: " + str(datetime.now() - startTime)) # Print PR Curves from test plt.legend(loc=1) plt.title("Precision Recall Curve") plt.show() # Print out the distribution of curves plt.plot(list(range(2, len(rslt))), rslt[2:]) plt.ylabel("Depth of Tree") plt.xlabel("Pr@Re>50") plt.title("Testing Decision Tree Depth") plt.xticks(list(range(2, len(rslt)))) plt.show()
from sklearn.manifold import TSNE from sklearn.decomposition import PCA, KernelPCA from sklearn.datasets import make_circles import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import seaborn as sns import preprocessing as prc import feature_selection as fs df = pd.read_csv('Files\csv_result-Descriptors_Training.csv', sep=',') df = df.drop(['id'], axis=1).replace(['P', 'N'], [1, 0]) df = prc.handle_outlier(prc.detect_outlier_iterative_IQR(df).dropna(thresh=20)) df = prc.standarize(df) #normalize # return all features with at least thershold # no selection below 1 !!! #fs_vairance = fs.variance_threshold(df, threshold=1) #fs_vairance = pd.concat([fs_vairance, df['class']], axis=1) X = df.drop(['class'], axis=1) y = df['class'] kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA() X_pca = pca.fit_transform(X)