def features_to_array(path, input_file): ''' Converts a dataframe to array :param path: directory path to the raster files :param input_file: features in dataframe :return: array with height and width similar to the input rasters ''' rows, cols, num = image_to_array(path).shape my_df = pd.read_csv(input_file) #df_features = my_df.drop(my_df.columns[0], axis=1) matrix_features = my_df.values num_of_layers = matrix_features.shape[1] f2Array = matrix_features.reshape(rows, cols, num_of_layers) return f2Array
def calculateFeatures(path, parameters, reset_df, raster_mask=None, tiff_output=True, workers=None): ''' Calculates features or the statistical characteristics of time-series raster data. It can also save features as a csv file (dataframe) and/or tiff file. :param path: directory path to the raster files :param parameters: a dictionary of features to be extracted :param reset_df: boolean option for existing raster inputs as dataframe :param raster_mask: path to binary raster mask :param tiff_output: boolean option for exporting tiff file :return: extracted features as a dataframe and tiff file ''' if reset_df == False: #if reset_df =F read in csv file holding saved version of my_df my_df = tr.read_my_df(path) else: #if reset_df =T calculate ts_series and save csv my_df = image_to_series(path) print('df: ' + os.path.join(path, 'my_df.csv')) my_df.to_csv(os.path.join(path, 'my_df.csv'), chunksize=10000, index=False) # mask if raster_mask is not None: my_df = tr.mask_df(raster_mask=raster_mask, original_df=my_df) if workers is not None: Distributor = MultiprocessingDistributor( n_workers=workers, disable_progressbar=False, progressbar_title="Feature Extraction") #Distributor = LocalDaskDistributor(n_workers=workers) else: Distributor = None extracted_features = extract_features( my_df, default_fc_parameters=parameters, column_sort="time", column_value="value", column_id="pixel_id", column_kind="kind", #chunksize = 1000, distributor=Distributor) # change index name to match pixel and time period extracted_features.index.rename('pixel_id', inplace=True) extracted_features.reset_index(inplace=True, level=['pixel_id']) extracted_features['time'] = str(my_df.time.min()) + "_" + str( my_df.time.max()) extracted_features.set_index(['pixel_id', 'time'], inplace=True) # unmask extracted features extracted_features = tr.unmask_from_mask(mask_df_output=extracted_features, missing_value=-9999, raster_mask=raster_mask) # deal with output location out_path = Path(path).parent.joinpath(Path(path).stem + "_features") out_path.mkdir(parents=True, exist_ok=True) # write out features to csv file print("features:" + os.path.join(out_path, 'extracted_features.csv')) extracted_features.to_csv(os.path.join(out_path, 'extracted_features.csv'), chunksize=10000) # write out feature names kr = pd.DataFrame(list(extracted_features.columns)) kr.index += 1 kr.index.names = ['band'] kr.columns = ['feature_name'] kr.to_csv(os.path.join(out_path, "features_names.csv")) # write out features to tiff file if tiff_output == False: return extracted_features else: # get image dimension from raw data rows, cols, num = image_to_array(path).shape # get the total number of features extracted matrix_features = extracted_features.values num_of_layers = matrix_features.shape[1] #reshape the dimension of features extracted f2Array = matrix_features.reshape(rows, cols, num_of_layers) output_file = 'extracted_features.tiff' #Get Meta Data from raw data raw_data = read_images(path) GeoTransform = raw_data[0].GetGeoTransform() driver = gdal.GetDriverByName('GTiff') noData = -9999 Projection = raw_data[0].GetProjectionRef() DataType = gdal.GDT_Float32 #export tiff CreateTiff(output_file, f2Array, driver, noData, GeoTransform, Projection, DataType, path=out_path) return extracted_features
raster_ex = "F:/5year/aet/aet-201201.tif" ex_row, ex_cols = rasterio.open("F:/5year/aet/aet-201201.tif").shape f2Array = concatenated_df_predict.reshape(ex_row, ex_cols) print(f2Array.shape) # Plot the grid plt.imshow(f2Array) plt.gray() plt.show() #%% path = "F:/5year/Fires/" image_name = tr.image_names(path) rasters = tr.image_to_array(path)[:,:,0] plt.imshow(rasters) plt.gray() plt.show() #%% # first, get the original dimension/shape of image og_rasters = tr.image2array(path) rows, cols, nums = og_rasters.shape # convert df to matrix array matrix_features = ts_features.values num_of_layers = matrix_features.shape[1]
#%% image_names PASS print(tr.image_names(ex_crs)) print(tr.image_names(ex_ts)) print(tr.image_names(ex_single)) #%% read_images PASS print(tr.read_images(ex_crs)) print(tr.read_images(ex_ts)) tr.read_images(ex_single) #%% image_to_array PASS print(tr.image_to_array(ex_crs).shape) print(tr.image_to_array(ex_ts).shape) print(tr.image_to_array(ex_single).shape) #%% image_to_series PASS print(tr.image_to_series(ex_crs).head()) print( tr.image_to_series(ex_ts).head()) print( tr.image_to_series(ex_single).head()) #%% image_to_series for target data (PASS) print(tr.image_to_series(tg_crs).head()) print( tr.image_to_series(tg_ts).head()) print( tr.image_to_series(tg_single).head())