''' prints out the names of raster files inside the directory ''' from tsraster.prep import image_to_series #file directory path = "../docs/img/temperature/" data = image_to_series(path) print(data.shape)
def calculateFeatures(path, parameters, reset_df, raster_mask=None, tiff_output=True, workers=None): ''' Calculates features or the statistical characteristics of time-series raster data. It can also save features as a csv file (dataframe) and/or tiff file. :param path: directory path to the raster files :param parameters: a dictionary of features to be extracted :param reset_df: boolean option for existing raster inputs as dataframe :param raster_mask: path to binary raster mask :param tiff_output: boolean option for exporting tiff file :return: extracted features as a dataframe and tiff file ''' if reset_df == False: #if reset_df =F read in csv file holding saved version of my_df my_df = tr.read_my_df(path) else: #if reset_df =T calculate ts_series and save csv my_df = image_to_series(path) print('df: ' + os.path.join(path, 'my_df.csv')) my_df.to_csv(os.path.join(path, 'my_df.csv'), chunksize=10000, index=False) # mask if raster_mask is not None: my_df = tr.mask_df(raster_mask=raster_mask, original_df=my_df) if workers is not None: Distributor = MultiprocessingDistributor( n_workers=workers, disable_progressbar=False, progressbar_title="Feature Extraction") #Distributor = LocalDaskDistributor(n_workers=workers) else: Distributor = None extracted_features = extract_features( my_df, default_fc_parameters=parameters, column_sort="time", column_value="value", column_id="pixel_id", column_kind="kind", #chunksize = 1000, distributor=Distributor) # change index name to match pixel and time period extracted_features.index.rename('pixel_id', inplace=True) extracted_features.reset_index(inplace=True, level=['pixel_id']) extracted_features['time'] = str(my_df.time.min()) + "_" + str( my_df.time.max()) extracted_features.set_index(['pixel_id', 'time'], inplace=True) # unmask extracted features extracted_features = tr.unmask_from_mask(mask_df_output=extracted_features, missing_value=-9999, raster_mask=raster_mask) # deal with output location out_path = Path(path).parent.joinpath(Path(path).stem + "_features") out_path.mkdir(parents=True, exist_ok=True) # write out features to csv file print("features:" + os.path.join(out_path, 'extracted_features.csv')) extracted_features.to_csv(os.path.join(out_path, 'extracted_features.csv'), chunksize=10000) # write out feature names kr = pd.DataFrame(list(extracted_features.columns)) kr.index += 1 kr.index.names = ['band'] kr.columns = ['feature_name'] kr.to_csv(os.path.join(out_path, "features_names.csv")) # write out features to tiff file if tiff_output == False: return extracted_features else: # get image dimension from raw data rows, cols, num = image_to_array(path).shape # get the total number of features extracted matrix_features = extracted_features.values num_of_layers = matrix_features.shape[1] #reshape the dimension of features extracted f2Array = matrix_features.reshape(rows, cols, num_of_layers) output_file = 'extracted_features.tiff' #Get Meta Data from raw data raw_data = read_images(path) GeoTransform = raw_data[0].GetGeoTransform() driver = gdal.GetDriverByName('GTiff') noData = -9999 Projection = raw_data[0].GetProjectionRef() DataType = gdal.GDT_Float32 #export tiff CreateTiff(output_file, f2Array, driver, noData, GeoTransform, Projection, DataType, path=out_path) return extracted_features
#extracted_features.head() #%%Mask a dataframe raster_mask = u"F:/Boundary/StatePoly_buf.tif" original_df = r"F:\3month_features\extracted_features.csv" df_mask = tr.mask_df(raster_mask, original_df) #%% Mask a series raster_mask = u"F:/Boundary/StatePoly_buf.tif" original_series = tr.image_to_series_simple(raster_mask) series_mask = tr.mask_df(raster_mask, original_series) #%% mask a long format dataframe path = r"F://3month/" my_df = tr.image_to_series(path) raster_mask = r"F:/Boundary/StatePoly_buf.tif" long_mask = tr.mask_df(raster_mask, my_df) #%% Test unmask for series # Update the values in the masked dataset to something else, here 10 series_mask.iloc[:] = 10 # unmask and update values of original df print(original_series.iloc[:].value_counts().head(4)) updated_s = tr.unmask_df(original_series, series_mask) print(updated_s.iloc[:, 0].value_counts().head(4)) print(original_series.shape) print(series_mask.shape) print(updated_s.shape) updated_s.head()
''' prints out the names of raster files inside the directory ''' from tsraster.prep import image_to_series #file directory path = "../docs/img/temperature/" series_df = image_to_series(path) print(series_df)
#%% read_images PASS print(tr.read_images(ex_crs)) print(tr.read_images(ex_ts)) tr.read_images(ex_single) #%% image_to_array PASS print(tr.image_to_array(ex_crs).shape) print(tr.image_to_array(ex_ts).shape) print(tr.image_to_array(ex_single).shape) #%% image_to_series PASS print(tr.image_to_series(ex_crs).head()) print( tr.image_to_series(ex_ts).head()) print( tr.image_to_series(ex_single).head()) #%% image_to_series for target data (PASS) print(tr.image_to_series(tg_crs).head()) print( tr.image_to_series(tg_ts).head()) print( tr.image_to_series(tg_single).head()) #%% mask unmask PASS I think mk_ex_crs might have too many observations, but can't figure out how df_ex_crs = tr.image_to_series(ex_crs) df_ex_ts = tr.image_to_series(ex_ts)