import os from sklearn.pipeline import Pipeline from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_validate from sklearn.metrics import roc_auc_score from sklearn.metrics import classification_report import score_cv_3_classes name = 'svm_rbf' dim_reduction = 'NONE' #load data import load_data_3_class import save_output public_data, public_labels = load_data_3_class.function_load_data_3_class() def create_score_csv_default_HP(scaler_, RS_outer_KF): n_comp_pca = None #whiten_ = True C_ = 'default' gamma_ = 'default' class_weight_ = 'default' random_state_clf = 503 #random_state_PCA = 42 #random_state_outer_kf = RS_outer_KF dict_best_params = { 'SCALER': [scaler_],
import scipy from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.svm import SVC from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import GridSearchCV, KFold, cross_val_predict, cross_val_score, StratifiedKFold import load_data_3_class import save_output name_clf = 'SVM_poly_MMS' #load data X_train, y_train, X_test, y_test = load_data_3_class.function_load_data_3_class( ) #Scalers from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler scalers_to_test = [StandardScaler(), RobustScaler(), MinMaxScaler(), None] df = pd.DataFrame() # Designate distributions to sample hyperparameters from C_range = np.power(2, np.arange(-10, 11, dtype=float)) gamma_range = np.power(2, np.arange(-10, 11, dtype=float)) n_features_to_test = [0.85, 0.9, 0.95] for i in range(1, 11):
from sklearn.model_selection import GridSearchCV, KFold, cross_val_predict, cross_val_score, StratifiedKFold, learning_curve from sklearn.feature_selection import SelectKBest, SelectPercentile from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler from sklearn.preprocessing import LabelEncoder from sklearn.metrics import roc_auc_score, classification_report, accuracy_score, balanced_accuracy_score import seaborn as sns import load_data_3_class import os name_clf = 'RandomForestClassifier' #load data data, labels = load_data_3_class.function_load_data_3_class() # Designate distributions to sample hyperparameters from n_features_to_test = [0.85, 0.9, 0.95] n_tree = [15, 30, 45, 60, 75, 90, 105, 120, 140, 160, 180, 200, 220, 240] depth = [2, 5, 10, 15, 20, 30, 45, 60, 80, 100, 120, 140, 160, 180] clf = RandomForestClassifier(class_weight='balanced', random_state=503) pca = PCA(random_state=42, n_components=0.85) steps = [('scaler', StandardScaler()), ('red_dim', pca), ('clf', clf)] pipeline = Pipeline(steps) parameteres = [{'clf__n_estimators': n_tree, 'clf__max_depth': depth}]