def test_df_sorting(): df_c, df_p = create_df([ random.sample(range(10, 300), 50), random.sample(range(10, 300), 50), random.sample(range(10, 300), 50) ]) def do_sort(col, ascending): srt_c = df_c.sort_values(by=col, ascending=ascending) srt_p = df_p.sort_values(by=col, ascending=ascending) assert_eq(srt_c, srt_p) # single column for asc in [True, False]: for c in range(0, 3): do_sort(c, asc) # multi column for asc in [True, False]: for c1 in range(0, 3): for c2 in range(0, 3): if c1 != c2: do_sort([c1, c2], asc) for c3 in range(0, 3): if c1 != c2 and c1 != c3 and c2 != c3: do_sort([c1, c2, c3], asc)
def test_df_joining(): df_c_1, df_p_1 = create_df([ random.sample(range(10, 300), 50), random.sample(range(10, 300), 50), random.sample(range(10, 300), 50) ]) df_c_2, df_p_2 = create_df([ random.sample(range(10, 300), 50), random.sample(range(10, 300), 50), random.sample(range(10, 300), 50) ]) def do_join(col): df_c_1.set_index(col, inplace=True) df_c_2.set_index(col, inplace=True) #df_p_1.set_index(col, inplace=True, drop=False) #df_p_2.set_index(col, inplace=True, drop=False) print(col) srt_c = df_c_1.join(on=col, other=df_c_2) print(srt_c) #print(df_p_1) #print(df_p_2) srt_p = df_p_1.join(on=col, other=df_p_2, lsuffix="l", rsuffix="r") print(srt_p) #assert_eq(srt_c, srt_p, sort=True) # multi column for asc in [True, False]: for c1 in range(0, 3): # single column do_join([c1]) for c2 in range(0, 3): if c1 != c2: do_join([c1, c2]) for c3 in range(0, 3): if c1 != c2 and c1 != c3 and c2 != c3: do_join([c1, c2, c3])
def plot_corr_model(y, y_pred, name='plotcorr.png'): df = utils.create_df(y, y_pred) font_size = 22 labelx = [r'$\phi_{cep}$', r'$\theta$', r'$L_p$'] labely = [r'predict $\phi_{cep}$', r'predict $\theta$', r'predict $L_p$'] for i in range(len(pnt)): fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 7), dpi=200) ax.scatter(df[pnt[i]], df[pnp[i]], s=2) red_line = [df[pnt[i]].min(), df[pnt[i]].max()] ax.plot(red_line, red_line, color='red') ax.set_xlabel(labelx[i], fontsize=font_size) ax.set_ylabel(labely[i], fontsize=font_size) ax.tick_params(axis='both', which='major', labelsize=15) plt.tight_layout() plt.savefig('../picture/' + name[:-3] + str(i) + '.png') # plt.clf() plt.close()
k = 10 y_min = np.array([0, 0, 0.08]) y_max = np.array([180, 75, 0.78]) for i in range(k): model_fcnn.append(load_model(fcnn_dir + 'model' + str(i) +'.h5')) model_pca.append(load_model(pca_dir + 'model' + str(i) +'.h5')) model_aug.append(load_model(aug_dir + 'model' + str(i) +'.h5')) x, y = load_data(fcnn_dir + 'data' + str(i) + '.h5') data_fcnn['x'], data_fcnn['y'] = x, y y_pred = model_fcnn[i].predict(x) y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max) accuracy_fcnn.append(utils.metric(utils.create_df(y, y_pred))) x, y = load_data(pca_dir + 'data' + str(i) + '.h5') data_pca['x'], data_pca['y'] = x, y y_pred = model_pca[i].predict(x) y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max) accuracy_pca.append(utils.metric(utils.create_df(y, y_pred))) x, y = load_data(aug_dir + 'data' + str(i) + '.h5') data_aug['x'], data_aug['y'] = x, y y_pred = model_aug[i].predict(x) y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max) accuracy_aug.append(utils.metric(utils.create_df(y, y_pred))) print('fcnn') print(get_mean_result(accuracy_fcnn))
text_feature_path = args.text_feature_path label_des_path = args.label_des_path os.makedirs(save_root, exist_ok=True) ################################################################################################################### if text_edge_weight: save_seed_idxname = '{}/seed_{}knn_k{}_sw{}_top{}.npy'.format( save_root, text_dist_def, text_k, text_self_weight, text_topk) else: save_seed_idxname = '{}/seed_{}knn_k{}_sw{}negw_top{}.npy'.format( save_root, text_dist_def, text_k, text_self_weight, text_topk) textdf_file_path = os.path.join(save_root, 'data_frame.pkl') if os.path.exists(textdf_file_path): df = pd.read_pickle(textdf_file_path) else: df = utils.create_df(text_imglist_path, textdf_file_path) class_num = np.unique(np.array(df['y'])) if os.path.exists(save_seed_idxname): seed_index = np.load(save_seed_idxname) print('Read Seed File from {}.'.format(save_seed_idxname), flush=True) else: if text_edge_weight: save_textknn_filename = '{}/textknn_{}knn_k{}_sw{}'.format( save_root, text_dist_def, text_k, text_self_weight) else: save_textknn_filename = '{}/textknn_{}knn_k{}_sw{}negw'.format( save_root, text_dist_def, text_k, text_self_weight) # select seed and return index if text_k > 0: if use_multisource: df = utils.get_knn_conf_multisource(
def process_data(self): """ Run a processing :return: """ # =========================================== # CREATE PANDAS DATAFRAME FROM RAW CSV FILE # =========================================== df_raw = ut.create_df(self.raw_csv_filename) # =========================================== # GENERATE PROCESSED CSV # =========================================== output_dwellings = self.create_ouput_files_raw_csv_processing( )['output_dwellings'] output_pois = self.create_ouput_files_raw_csv_processing( )['output_pois'] ut.sanitize_and_separate_df_pois( df=df_raw, output_file_dwelling=output_dwellings, output_file_pois=output_pois, struct_type_col=self.params['struct_type_col'], null_feat_cat_replacement="missing", cols_to_keep_df=self.params['cols_df'], cols_to_keep_poi=self.params['cols_poi'], new_col_names_df=self.params['new_names_df'], new_col_names_poi=self.params['new_names_poi'], residential_struct_category=self.params['res_struct_val']) # =========================================== # SPLIT CSV BY WARD # =========================================== if self.district: prov = self.province dist = self.district self.dir_with_ward_subdirs = self.ea_demarcation_dir.joinpath( prov, dist) ward_id_col = self.params['ward_id_col'] ut.split_csv_into_wards(csv_file=output_dwellings, ward_id_col=ward_id_col, output_folder=self.dir_with_ward_subdirs, suffix="df") ut.split_csv_into_wards(csv_file=output_pois, ward_id_col=ward_id_col, output_folder=self.dir_with_ward_subdirs, suffix="poi") # =========================================== # CREATE SHP FILES # =========================================== ut.create_shp_for_each_ward(self.dir_with_ward_subdirs, crs=self.params["crs"], lon_col=self.params["lon"], lat_col=self.params["lat"]) # ========================================================= # APPEND HH LISTING BASED POPULATION COUNT, STRUCTURE COUNT # AND BUILDING COUNTS FROM SATELLITE IMAGERY # ========================================================== self.append_building_attributes_to_ward_level_ea_shp()
#choose global_color_histograms: image will be procesed and change space color and save global_color_hist in resuts_GVHistogram (create file ) global_color_histograms = False # Final evaluation and Test performEvaluation = 0 performTest = 1 # which of the three different final methods is performed, 1 BGR, 2 LUV, 3 Wavelet method = 2 #type of space and level if (method == 1): spaceType = "BGR" level = 2 elif (method == 2): spaceType = "LUV" level = 2 dfDataset = create_df(pathDS) dfQuery = create_df(pathQueries) dfQueryTest = create_df(pathQueriesTest) if global_color_histograms == True: for i in range(len(dfDataset)): dfSingle = dfDataset.iloc[i] imgBGR = get_full_image(dfSingle, pathDS) imageName = dfSingle['Image'] channel0Single, channel1Single, channel2Single = global_color_hist( imgBGR, spaceType, pathprep_resultDS, imageName) save_global_color_hist(channel0Single, channel1Single, channel2Single, dfSingle, spaceType, imageName, pathResults) if build_dataset == True: # Read Images
os.mkdir('../result/pca_enumeration/') path = '../result/pca_enumeration/' y_min = np.array([0, 0, 0.08]) y_max = np.array([180, 75, 0.78]) for i in range(100, 2, -10): acc = [] mod = [] for j in range(3): mod.append(load_model(path + 'model' + str(i) + '_' + str(j) + '.h5')) x, y = load_data(path + 'data' + str(i) + '_' + str(j) + '.h5') y_pred = mod[j].predict(x) y_pred, y = utils.postprocessing(y_pred, y, y_min, y_max) acc.append(utils.metric(utils.create_df(y, y_pred))) accuracy[i] = acc result = [] met = 'average' crit = 'MRE' for k in accuracy.keys(): result.append([ accuracy[k][0][met][crit], accuracy[k][1][met][crit], accuracy[k][2][met][crit] ]) df_accuracy = pd.DataFrame(result, index=list(accuracy.keys())) df_accuracy = df_accuracy.T
scaler = StandardScaler() scaler.fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) pca = PCA(n_components=24) pca.fit(x_train) x_train = pca.transform(x_train) x_test = pca.transform(x_test) #x_train = x_train[:, 3:] #x_test = x_test[:, 3:] print(x_train.shape) x_train, x_test, x_min, x_max = utils.preprocessing(x_train, x_test) y_train, y_test, y_min, y_max = utils.preprocessing(y_train, y_test) model, history = train_nn(x_train, y_train, epochs) y_pred = model.predict(x_test) y_pred, y = utils.postprocessing(y_pred, y_test, y_min, y_max) print(utils.metric(utils.create_df(y, y_pred))) #model.save(path + 'model' + str(i) +'.h5') #utils.save_data(x_test, y_test, i, path)