def fit_best_m(): for dataset in DATASETS: data1 = train_test_split(dataset, dtype='dataset') train1 = data1[0] X, y = train1[:, 1:], train1[:, 0] data2 = train_test_split(dataset, dtype='frame') train2 = data2[0] X_frame, y_frame = train2[:, 1:], train2[:, 0] i = 0 for model in MODELS: print(f"fitting best '{MODEL_NAME[i]}' model for '{dataset}'") best_params = load_experiment(MODEL_NAME[i] + '_' + dataset + "_best_params") best_params_f = load_experiment(MODEL_NAME[i] + '_' + dataset + "_frame" + "_best_params") mod1 = model.set_params(**best_params) mod1 = mod1.fit(X, y) mod2 = model.set_params(**best_params_f) mod2 = mod2.fit(X_frame, y_frame) save_experiment(mod1, f"best_'{MODEL_NAME[i]}'_'{dataset}'") save_experiment(mod2, f"best_'{MODEL_NAME[i]}'_'{dataset}'_frame") print(f"Completed best '{MODEL_NAME[i]}' model for '{dataset}'") i += 1 return None
def calc_frame(): for dataset in DATASETS: print(f"entering calculation of frame for '{dataset}'") data = load_experiment(dataset) X = data[:, 1:] q = frame(X) save_experiment(q, dataset + "_frame") print(f"exiting calculating frame for '{dataset}'") return None
def generate_data(): i = 0 for params in PARAMS: print(f"generating dataset for '{params}'") data = generateData(n=params[0], d=params[2], classWeights=params[1]) save_experiment(data, DATASETS[i]) i += 1 print(f"Completed dataset generation for '{params}'") return None
def fit_best_m(): train_full_df = load_experiment('train_full_df') train_from_fframe = load_experiment('train_from_fframe') train_from_CLframe = load_experiment('train_from_CLframe') i = 0 for model in MODELS: print(f"entering grid search for the '{MODEL_NAME[i]}'") # Grid search and model fit for full data best_prams_fdf = list(map(Gridsearch,[model]*len(DATASET_NAMES),[M_PARAMS[i]]*len(DATASET_NAMES),train_full_df)) bst_mod_fdf = [] for j in range(len(train_full_df)): fitted = TRAINED_MODELS[i][j].set_params(**best_prams_fdf[j]) bst_mod_fdf.append(fitted) models = [] for j in range(len(train_full_df)): fitted = bst_mod_fdf[j].fit(train_full_df[j][:,1:],train_full_df[j][:,0]) models.append(fitted) save_experiment(models, MODEL_NAME[i] + '_' + "best_model_f_full_df") # Grid search and model fit for frame data best_prams_ffdf = list(map(Gridsearch,[model]*len(DATASET_NAMES),[M_PARAMS[i]]*len(DATASET_NAMES),train_from_fframe)) bst_mod_ffdf = [] for j in range(len(train_from_fframe)): fitted = TRAINED_MODELS[i][j].set_params(**best_prams_ffdf[j]) bst_mod_ffdf.append(fitted) models1 = [] for j in range(len(train_from_fframe)): fitted = bst_mod_ffdf[j].fit(train_from_fframe[j][:,1:],train_from_fframe[j][:,0]) models1.append(fitted) save_experiment(models1, MODEL_NAME[i] + '_' + "best_model_f_full_frame") # Grid search and model fit for class frame data best_prams_CLfdf = list(map(Gridsearch,[model]*len(DATASET_NAMES),[M_PARAMS[i]]*len(DATASET_NAMES),train_from_CLframe)) bst_mod_fdf = [] for j in range(len(train_from_CLframe)): fitted = TRAINED_MODELS[i][j].set_params(**best_prams_CLfdf[j]) bst_mod_fdf.append(fitted) models2 = [] for j in range(len(train_from_CLframe)): fitted = bst_mod_fdf[j].fit(train_from_CLframe[j][:,1:],train_from_CLframe[j][:,0]) models2.append(fitted) save_experiment(models2, MODEL_NAME[i] + '_' + "best_model_f_Class_frames") print(f"exiting grid search for the '{MODEL_NAME[i]}'") i +=1 return None
def calc_frame(): DATASETS = load_experiment('DATASETS') # Compute frame for entire dataset print("entering calculation of frame for full datasets") q = dview.map_sync(frame1,DATASETS) save_experiment(q,"full_frames") print("exiting calculation of frame for full datasets") # Compute Frame for majority class print("entering calculation of frame for majority class") q_0 = dview.map_sync(frame2,DATASETS) save_experiment(q_0, "majority_class_frames") print("exiting calculation of frame for majority class") # Compute frame for minority class print("entering calculation of frame for minority class") q_1 = dview.map_sync(frame3,DATASETS) save_experiment(q_1, "minority_class_frames") print("exiting calculation of frame for minority class") return None
return q # Function to generate frames def frame_multicore(X, M=1000.0): np.random.seed(10) # initialization X = X[:, 1:] eng_count = len(rc.ids) n = X.shape[0] q = np.array([], dtype=numpy.int64) Q = np.vstack((X.T, M * np.ones(n))) #Split the data into n where n is the number of available engines Range = partition(range(n), eng_count) list_Q = [Q] * eng_count #Map the indices worl ind = dview.map(frame_indices, list_Q, Range) ind = ind.get() # Merge the results ind = [y for x in ind for y in x] q = np.union1d(q, ind) return q # Lad generated datasets DATASETS = load_experiment('DATASETS') # Compute frame for entire dataset print("entering calculation of frame for full datasets") q = list(map(frame_multicore, DATASETS)) save_experiment(q, "full_frames") print("exiting calculation of frame for full datasets")
def generate_data(): print("generating datasets...") DATASETS = dview.map_sync(generateData,n,d,w) save_experiment(DATASETS,'DATASETS') print("Completed dataset generation") return None
def train_test_split (): #Split full datasets DATASETS = load_experiment('DATASETS') print("Train test split for full dataset...") train_ind = dview.map_sync(t_t_split,DATASETS) train_full_df = dview.map_sync(lambda x,ind:x[ind],DATASETS,train_ind) test_ind = dview.map_sync(lambda x1,x2: list(set(list(range(len(x1))))-set(x2)), DATASETS,train_ind) test_full_df = dview.map_sync(lambda x,ind: x[ind],DATASETS,test_ind) save_experiment(train_full_df,"train_full_df") save_experiment(test_full_df,"test_full_df") print("Train test split for full dataset completed") #Split for frame on full dataset based on furthest sum approach print("Train test split for full frame...") full_frame_indices = load_experiment('full_frames') #confirm version train_ind = dview.map_sync(frthst_sum_f_frame,DATASETS,full_frame_indices,list(map(int,np.multiply(n,0.8)))) train_df = dview.map_sync(lambda x,ind:x[ind],DATASETS,train_ind) test_ind = dview.map_sync(lambda x1,x2: list(set(list(range(len(x1))))-set(x2)), DATASETS,train_ind) test_df = dview.map_sync(lambda x,ind: x[ind],DATASETS,test_ind) save_experiment(train_df,"train_from_fframe") save_experiment(test_df,"test_from_fframe") print("Train test split for full frame completed") #Split from frame of each class print("Train test split for separate frames by class...") majCls_f_ind = load_experiment('majority_class_frames') minCls_f_ind = load_experiment('minority_class_frames') ind1 = dview.map_sync(frthst_sum_f_frame,DATASETS,majCls_f_ind,list(map(int,np.multiply(np.multiply([i[0] for i in w],0.8),n)))) ind2 = dview.map_sync(frthst_sum_f_frame,DATASETS,minCls_f_ind,list(map(int,np.multiply(np.multiply([i[1] for i in w],0.8),n)))) train_ind = dview.map_sync(lambda x1,x2: x1+x2, ind1,ind2) train_df = train_df = dview.map_sync(lambda x,ind:x[ind],DATASETS,train_ind) test_ind = dview.map_sync(lambda x1,x2: list(set(list(range(len(x1))))-set(x2)), DATASETS,train_ind) test_df = dview.map_sync(lambda x,ind: x[ind],DATASETS,test_ind) save_experiment(train_df,"train_from_CLframe") save_experiment(test_df,"test_from_CLframe") print("Train test split for separate frames completed") return None