def fit_best_m():
    for dataset in DATASETS:
        data1 = train_test_split(dataset, dtype='dataset')
        train1 = data1[0]
        X, y = train1[:, 1:], train1[:, 0]

        data2 = train_test_split(dataset, dtype='frame')
        train2 = data2[0]
        X_frame, y_frame = train2[:, 1:], train2[:, 0]
        i = 0
        for model in MODELS:
            print(f"fitting best '{MODEL_NAME[i]}' model for '{dataset}'")
            best_params = load_experiment(MODEL_NAME[i] + '_' + dataset +
                                          "_best_params")
            best_params_f = load_experiment(MODEL_NAME[i] + '_' + dataset +
                                            "_frame" + "_best_params")
            mod1 = model.set_params(**best_params)
            mod1 = mod1.fit(X, y)
            mod2 = model.set_params(**best_params_f)
            mod2 = mod2.fit(X_frame, y_frame)
            save_experiment(mod1, f"best_'{MODEL_NAME[i]}'_'{dataset}'")
            save_experiment(mod2, f"best_'{MODEL_NAME[i]}'_'{dataset}'_frame")
            print(f"Completed best '{MODEL_NAME[i]}' model for '{dataset}'")
            i += 1
    return None
def calc_frame():
    for dataset in DATASETS:
        print(f"entering calculation of frame for '{dataset}'")
        data = load_experiment(dataset)
        X = data[:, 1:]
        q = frame(X)
        save_experiment(q, dataset + "_frame")
        print(f"exiting calculating frame for '{dataset}'")
    return None
def generate_data():
    i = 0
    for params in PARAMS:
        print(f"generating dataset for '{params}'")
        data = generateData(n=params[0], d=params[2], classWeights=params[1])
        save_experiment(data, DATASETS[i])
        i += 1
        print(f"Completed dataset generation for '{params}'")
    return None
예제 #4
0
def fit_best_m():
    train_full_df = load_experiment('train_full_df')
    train_from_fframe = load_experiment('train_from_fframe')
    train_from_CLframe = load_experiment('train_from_CLframe')
    i = 0
    for model in MODELS:
        print(f"entering grid search for the '{MODEL_NAME[i]}'")
        # Grid search and model fit for full data
        best_prams_fdf = list(map(Gridsearch,[model]*len(DATASET_NAMES),[M_PARAMS[i]]*len(DATASET_NAMES),train_full_df))
        bst_mod_fdf = []
        for j in range(len(train_full_df)):
            fitted = TRAINED_MODELS[i][j].set_params(**best_prams_fdf[j])
            bst_mod_fdf.append(fitted)
        models = []
        for j in range(len(train_full_df)):
            fitted = bst_mod_fdf[j].fit(train_full_df[j][:,1:],train_full_df[j][:,0])
            models.append(fitted)
        save_experiment(models, MODEL_NAME[i] + '_' + "best_model_f_full_df")
        
        # Grid search and model fit for frame data
        best_prams_ffdf = list(map(Gridsearch,[model]*len(DATASET_NAMES),[M_PARAMS[i]]*len(DATASET_NAMES),train_from_fframe))
        bst_mod_ffdf = []
        for j in range(len(train_from_fframe)):
            fitted = TRAINED_MODELS[i][j].set_params(**best_prams_ffdf[j])
            bst_mod_ffdf.append(fitted)
        models1 = []
        for j in range(len(train_from_fframe)):
            fitted = bst_mod_ffdf[j].fit(train_from_fframe[j][:,1:],train_from_fframe[j][:,0])
            models1.append(fitted)
        save_experiment(models1, MODEL_NAME[i] + '_' + "best_model_f_full_frame")
        
        # Grid search and model fit for class frame data
        best_prams_CLfdf = list(map(Gridsearch,[model]*len(DATASET_NAMES),[M_PARAMS[i]]*len(DATASET_NAMES),train_from_CLframe))
        bst_mod_fdf = []
        for j in range(len(train_from_CLframe)):
            fitted = TRAINED_MODELS[i][j].set_params(**best_prams_CLfdf[j])
            bst_mod_fdf.append(fitted)
        models2 = []
        for j in range(len(train_from_CLframe)):
            fitted = bst_mod_fdf[j].fit(train_from_CLframe[j][:,1:],train_from_CLframe[j][:,0])
            models2.append(fitted)
        save_experiment(models2, MODEL_NAME[i] + '_' + "best_model_f_Class_frames")
        print(f"exiting grid search for the '{MODEL_NAME[i]}'")
        i +=1
    return None
예제 #5
0
def calc_frame():    
    DATASETS = load_experiment('DATASETS')
    # Compute frame for entire dataset
    print("entering calculation of frame for full datasets")
    q = dview.map_sync(frame1,DATASETS)
    save_experiment(q,"full_frames")
    print("exiting calculation of frame for full datasets")
    
    # Compute Frame for majority class
    print("entering calculation of frame for majority class")
    q_0 = dview.map_sync(frame2,DATASETS)
    save_experiment(q_0, "majority_class_frames")
    print("exiting calculation of frame for majority class")
    
    # Compute frame for minority class
    print("entering calculation of frame for minority class")
    q_1 = dview.map_sync(frame3,DATASETS)
    save_experiment(q_1, "minority_class_frames")
    print("exiting calculation of frame for minority class")
    return None
예제 #6
0
    return q


# Function to generate frames
def frame_multicore(X, M=1000.0):
    np.random.seed(10)
    # initialization
    X = X[:, 1:]
    eng_count = len(rc.ids)
    n = X.shape[0]
    q = np.array([], dtype=numpy.int64)
    Q = np.vstack((X.T, M * np.ones(n)))
    #Split the data into n where n is the number of available engines
    Range = partition(range(n), eng_count)
    list_Q = [Q] * eng_count
    #Map the indices worl
    ind = dview.map(frame_indices, list_Q, Range)
    ind = ind.get()
    # Merge the results
    ind = [y for x in ind for y in x]
    q = np.union1d(q, ind)
    return q


# Lad generated datasets
DATASETS = load_experiment('DATASETS')
# Compute frame for entire dataset
print("entering calculation of frame for full datasets")
q = list(map(frame_multicore, DATASETS))
save_experiment(q, "full_frames")
print("exiting calculation of frame for full datasets")
예제 #7
0
def generate_data():
    print("generating datasets...")
    DATASETS = dview.map_sync(generateData,n,d,w)
    save_experiment(DATASETS,'DATASETS')
    print("Completed dataset generation")
    return None
예제 #8
0
def train_test_split ():
    #Split full datasets
    DATASETS = load_experiment('DATASETS')
    print("Train test split for full dataset...")
    train_ind = dview.map_sync(t_t_split,DATASETS)
    train_full_df = dview.map_sync(lambda x,ind:x[ind],DATASETS,train_ind)
    test_ind = dview.map_sync(lambda x1,x2: list(set(list(range(len(x1))))-set(x2)), DATASETS,train_ind)
    test_full_df  = dview.map_sync(lambda x,ind: x[ind],DATASETS,test_ind)
    save_experiment(train_full_df,"train_full_df")
    save_experiment(test_full_df,"test_full_df")
    print("Train test split for full dataset completed")
    
    #Split for frame on full dataset based on furthest sum approach
    print("Train test split for full frame...")
    full_frame_indices = load_experiment('full_frames') #confirm version
    train_ind = dview.map_sync(frthst_sum_f_frame,DATASETS,full_frame_indices,list(map(int,np.multiply(n,0.8))))
    train_df = dview.map_sync(lambda x,ind:x[ind],DATASETS,train_ind)
    test_ind = dview.map_sync(lambda x1,x2: list(set(list(range(len(x1))))-set(x2)), DATASETS,train_ind)
    test_df  = dview.map_sync(lambda x,ind: x[ind],DATASETS,test_ind)
    save_experiment(train_df,"train_from_fframe")
    save_experiment(test_df,"test_from_fframe")
    print("Train test split for full frame completed")
    
    #Split from frame of each class 
    print("Train test split for separate frames by class...")
    majCls_f_ind = load_experiment('majority_class_frames')
    minCls_f_ind = load_experiment('minority_class_frames')
    ind1 = dview.map_sync(frthst_sum_f_frame,DATASETS,majCls_f_ind,list(map(int,np.multiply(np.multiply([i[0] for i in w],0.8),n))))

    ind2 = dview.map_sync(frthst_sum_f_frame,DATASETS,minCls_f_ind,list(map(int,np.multiply(np.multiply([i[1] for i in w],0.8),n))))

    train_ind = dview.map_sync(lambda x1,x2: x1+x2, ind1,ind2)
    train_df = train_df = dview.map_sync(lambda x,ind:x[ind],DATASETS,train_ind)
    test_ind = dview.map_sync(lambda x1,x2: list(set(list(range(len(x1))))-set(x2)), DATASETS,train_ind)
    test_df  = dview.map_sync(lambda x,ind: x[ind],DATASETS,test_ind)    
    save_experiment(train_df,"train_from_CLframe")
    save_experiment(test_df,"test_from_CLframe")   
    print("Train test split for separate frames completed")
    return None