parameters = { "mean": None, "maximum": None}, reset_df=False, raster_mask = r"F:/Boundary/StatePoly_buf.tif" , tiff_output=True, workers = 1) #%% target_data = tr.image_to_series_simple("F:/5year/Fires/") raster_mask = u"F:/Boundary/StatePoly_buf.tif" original_df = [ target_data, extracted_features] target_data_mask, extracted_features_mask = tr.mask_df(raster_mask, original_df=original_df, reset_index = False) print(target_data_mask.head()) extracted_features_mask.head() #%% join and test train split yX data obj = [target_data_mask,extracted_features_mask] #from sklearn.preprocessing import StandardScaler as scaler X_train, X_test, y_train, y_test = md.get_data(obj, stratify=True, test_size=0.9, scale=False)
def calculateFeatures(path, parameters, reset_df, raster_mask=None, tiff_output=True, workers=None): ''' Calculates features or the statistical characteristics of time-series raster data. It can also save features as a csv file (dataframe) and/or tiff file. :param path: directory path to the raster files :param parameters: a dictionary of features to be extracted :param reset_df: boolean option for existing raster inputs as dataframe :param raster_mask: path to binary raster mask :param tiff_output: boolean option for exporting tiff file :return: extracted features as a dataframe and tiff file ''' if reset_df == False: #if reset_df =F read in csv file holding saved version of my_df my_df = tr.read_my_df(path) else: #if reset_df =T calculate ts_series and save csv my_df = image_to_series(path) print('df: ' + os.path.join(path, 'my_df.csv')) my_df.to_csv(os.path.join(path, 'my_df.csv'), chunksize=10000, index=False) # mask if raster_mask is not None: my_df = tr.mask_df(raster_mask=raster_mask, original_df=my_df) if workers is not None: Distributor = MultiprocessingDistributor( n_workers=workers, disable_progressbar=False, progressbar_title="Feature Extraction") #Distributor = LocalDaskDistributor(n_workers=workers) else: Distributor = None extracted_features = extract_features( my_df, default_fc_parameters=parameters, column_sort="time", column_value="value", column_id="pixel_id", column_kind="kind", #chunksize = 1000, distributor=Distributor) # change index name to match pixel and time period extracted_features.index.rename('pixel_id', inplace=True) extracted_features.reset_index(inplace=True, level=['pixel_id']) extracted_features['time'] = str(my_df.time.min()) + "_" + str( my_df.time.max()) extracted_features.set_index(['pixel_id', 'time'], inplace=True) # unmask extracted features extracted_features = tr.unmask_from_mask(mask_df_output=extracted_features, missing_value=-9999, raster_mask=raster_mask) # deal with output location out_path = Path(path).parent.joinpath(Path(path).stem + "_features") out_path.mkdir(parents=True, exist_ok=True) # write out features to csv file print("features:" + os.path.join(out_path, 'extracted_features.csv')) extracted_features.to_csv(os.path.join(out_path, 'extracted_features.csv'), chunksize=10000) # write out feature names kr = pd.DataFrame(list(extracted_features.columns)) kr.index += 1 kr.index.names = ['band'] kr.columns = ['feature_name'] kr.to_csv(os.path.join(out_path, "features_names.csv")) # write out features to tiff file if tiff_output == False: return extracted_features else: # get image dimension from raw data rows, cols, num = image_to_array(path).shape # get the total number of features extracted matrix_features = extracted_features.values num_of_layers = matrix_features.shape[1] #reshape the dimension of features extracted f2Array = matrix_features.reshape(rows, cols, num_of_layers) output_file = 'extracted_features.tiff' #Get Meta Data from raw data raw_data = read_images(path) GeoTransform = raw_data[0].GetGeoTransform() driver = gdal.GetDriverByName('GTiff') noData = -9999 Projection = raw_data[0].GetProjectionRef() DataType = gdal.GDT_Float32 #export tiff CreateTiff(output_file, f2Array, driver, noData, GeoTransform, Projection, DataType, path=out_path) return extracted_features
#%% collect multitple years of Y (target) data path = r"G:\Fire_target_train" target_file_prefix = 'fire_' concatenated_target_df = combine_target_rasters(path,target_file_prefix,write_out=False) #%% mask both the attribute data and targets raster_mask =u"F:/Boundary/StatePoly_buf.tif" original_df = [concatenated_attribute_df, concatenated_target_df] mask_attributes_df, mask_target_df = tr.mask_df(raster_mask, original_df) #%% switch panel data from wide to long format import re target = mask_target_df features = mask_attributes_df sep='-' target_ln, features_ln = wide_to_long_target_features(target,features,sep='-') #%% add lagged variables
from tsraster.prep import mask_df, unmask_df, check_mask import pandas as pd #path to files raster_mask = "../../wildfire_FRAP/Data/Examples/3month/aet-198401.tif" original_df = "../../wildfire_FRAP/Data/Examples/3month/my_df.csv" #run tasks masked_data = mask_df(raster_mask, original_df) #check maksing original_df_file = pd.read_csv(original_df) print("Check Masking") print("Size of original data: ", original_df_file.shape) print("Size of masked data: ", masked_data.shape) print("Check Unmasking") #check unmaksing unmasked_data = unmask_df(original_df, masked_data) print("Size of original data: ", original_df_file.shape) print("Size of unmasked data: ", unmasked_data.shape)
#mask = r"F:/Boundary/StatePoly_buf.tif" #parameters = { # "mean": None, # "maximum": None} # #extracted_features = ca.calculateFeatures(path, # parameters, # reset_df=False, # tiff_output=False) #extracted_features.head() #%%Mask a dataframe raster_mask = u"F:/Boundary/StatePoly_buf.tif" original_df = r"F:\3month_features\extracted_features.csv" df_mask = tr.mask_df(raster_mask, original_df) #%% Mask a series raster_mask = u"F:/Boundary/StatePoly_buf.tif" original_series = tr.image_to_series_simple(raster_mask) series_mask = tr.mask_df(raster_mask, original_series) #%% mask a long format dataframe path = r"F://3month/" my_df = tr.image_to_series(path) raster_mask = r"F:/Boundary/StatePoly_buf.tif" long_mask = tr.mask_df(raster_mask, my_df) #%% Test unmask for series # Update the values in the masked dataset to something else, here 10 series_mask.iloc[:] = 10
path = r"C:\Users\mmann\Documents\wildfire_FRAP\Data\Examples\Panel_Example\Fire" target_file_prefix = 'fire_' concatenated_target_df = combine_target_rasters(path, target_file_prefix, write_out=False) #%% mask both the attribute data and targets raster_mask =r"C:\Users\mmann\Documents\wildfire_FRAP\Data/Examples/buffer/StatePoly_buf.tif" original_df = [concatenated_attribute_df, concatenated_target_df] mask_attributes_df, mask_target_df = tr.mask_df(raster_mask, original_df, missing_value=-9999, reset_index = False) #%% switch panel data from wide to long format # stub name for jepson issue !!!!!! target_ln, features_ln = wide_to_long_target_features(target = mask_target_df, features = mask_attributes_df, sep='-') #%% import io data = io.StringIO('''Fruit,Color,Count,Price Apple,Red,3,$1.29 Apple,Green,9,$0.99
print( tr.image_to_series(tg_single).head()) #%% mask unmask PASS I think mk_ex_crs might have too many observations, but can't figure out how df_ex_crs = tr.image_to_series(ex_crs) df_ex_ts = tr.image_to_series(ex_ts) df_ex_single = tr.image_to_series(ex_single) #original size print(df_ex_single.shape) # masked mk_ex_crs = tr.mask_df(raster_mask= mask, original_df = df_ex_crs,missing_value = -9999) mk_ex_ts = tr.mask_df(raster_mask= mask, original_df = df_ex_ts,missing_value = -9999) mk_ex_single = tr.mask_df(raster_mask= mask, original_df = df_ex_single,missing_value = -9999) print(mk_ex_crs.shape[0]/3 ) # three periods PROBLEM TOO MANY OBSERVATIONS print(mk_ex_ts.shape[0]/6 ) # three periods for two variables print(mk_ex_single.shape) #%% unmask PASS print(df_ex_crs.shape) print(tr.unmask_df(original_df= df_ex_crs, mask_df_output= mk_ex_crs).shape) print(df_ex_ts.shape) print(tr.unmask_df(original_df= df_ex_ts, mask_df_output= mk_ex_ts).shape) print(df_ex_single.shape)