parameters = {
                                                "mean": None,
                                                "maximum": None}, 
                                          reset_df=False,
                                          raster_mask =  r"F:/Boundary/StatePoly_buf.tif"  ,
                                          tiff_output=True,
                                          workers = 1)

#%%
target_data = tr.image_to_series_simple("F:/5year/Fires/")
raster_mask = u"F:/Boundary/StatePoly_buf.tif"

original_df = [ target_data, extracted_features]
 
target_data_mask, extracted_features_mask  = tr.mask_df(raster_mask,
                                   original_df=original_df,
                                   reset_index = False)
print(target_data_mask.head())
extracted_features_mask.head()



#%% join and test train split yX data

obj = [target_data_mask,extracted_features_mask]
 
#from sklearn.preprocessing import StandardScaler as scaler
X_train, X_test, y_train, y_test = md.get_data(obj, 
                                               stratify=True,
                                               test_size=0.9,
                                               scale=False)
Exemple #2
0
def calculateFeatures(path,
                      parameters,
                      reset_df,
                      raster_mask=None,
                      tiff_output=True,
                      workers=None):
    '''
    Calculates features or the statistical characteristics of time-series raster data.
    It can also save features as a csv file (dataframe) and/or tiff file.
    
    :param path: directory path to the raster files
    :param parameters: a dictionary of features to be extracted
    :param reset_df: boolean option for existing raster inputs as dataframe
    :param raster_mask: path to binary raster mask
    :param tiff_output: boolean option for exporting tiff file
    :return: extracted features as a dataframe and tiff file
    '''

    if reset_df == False:
        #if reset_df =F read in csv file holding saved version of my_df
        my_df = tr.read_my_df(path)

    else:
        #if reset_df =T calculate ts_series and save csv
        my_df = image_to_series(path)
        print('df: ' + os.path.join(path, 'my_df.csv'))
        my_df.to_csv(os.path.join(path, 'my_df.csv'),
                     chunksize=10000,
                     index=False)

    # mask
    if raster_mask is not None:
        my_df = tr.mask_df(raster_mask=raster_mask, original_df=my_df)

    if workers is not None:
        Distributor = MultiprocessingDistributor(
            n_workers=workers,
            disable_progressbar=False,
            progressbar_title="Feature Extraction")
        #Distributor = LocalDaskDistributor(n_workers=workers)
    else:
        Distributor = None

    extracted_features = extract_features(
        my_df,
        default_fc_parameters=parameters,
        column_sort="time",
        column_value="value",
        column_id="pixel_id",
        column_kind="kind",
        #chunksize = 1000,
        distributor=Distributor)

    # change index name to match pixel and time period
    extracted_features.index.rename('pixel_id', inplace=True)
    extracted_features.reset_index(inplace=True, level=['pixel_id'])

    extracted_features['time'] = str(my_df.time.min()) + "_" + str(
        my_df.time.max())
    extracted_features.set_index(['pixel_id', 'time'], inplace=True)

    # unmask extracted features
    extracted_features = tr.unmask_from_mask(mask_df_output=extracted_features,
                                             missing_value=-9999,
                                             raster_mask=raster_mask)

    # deal with output location
    out_path = Path(path).parent.joinpath(Path(path).stem + "_features")
    out_path.mkdir(parents=True, exist_ok=True)

    # write out features to csv file
    print("features:" + os.path.join(out_path, 'extracted_features.csv'))
    extracted_features.to_csv(os.path.join(out_path, 'extracted_features.csv'),
                              chunksize=10000)

    # write out feature names
    kr = pd.DataFrame(list(extracted_features.columns))
    kr.index += 1
    kr.index.names = ['band']
    kr.columns = ['feature_name']
    kr.to_csv(os.path.join(out_path, "features_names.csv"))

    # write out features to tiff file
    if tiff_output == False:
        return extracted_features
    else:
        # get image dimension from raw data
        rows, cols, num = image_to_array(path).shape
        # get the total number of features extracted
        matrix_features = extracted_features.values
        num_of_layers = matrix_features.shape[1]

        #reshape the dimension of features extracted
        f2Array = matrix_features.reshape(rows, cols, num_of_layers)
        output_file = 'extracted_features.tiff'

        #Get Meta Data from raw data
        raw_data = read_images(path)
        GeoTransform = raw_data[0].GetGeoTransform()
        driver = gdal.GetDriverByName('GTiff')

        noData = -9999

        Projection = raw_data[0].GetProjectionRef()
        DataType = gdal.GDT_Float32

        #export tiff
        CreateTiff(output_file,
                   f2Array,
                   driver,
                   noData,
                   GeoTransform,
                   Projection,
                   DataType,
                   path=out_path)
        return extracted_features
#%%  collect multitple years of Y (target) data

path = r"G:\Fire_target_train"
target_file_prefix = 'fire_'

concatenated_target_df = combine_target_rasters(path,target_file_prefix,write_out=False)



#%% mask both the attribute data and targets 

raster_mask =u"F:/Boundary/StatePoly_buf.tif"
original_df = [concatenated_attribute_df, concatenated_target_df]

mask_attributes_df, mask_target_df = tr.mask_df(raster_mask, original_df)



#%% switch panel data from wide to long format
import re 
target = mask_target_df
features = mask_attributes_df
sep='-'

target_ln, features_ln = wide_to_long_target_features(target,features,sep='-')



#%% add lagged variables 
Exemple #4
0
from tsraster.prep import mask_df, unmask_df, check_mask
import pandas as pd

#path to files
raster_mask = "../../wildfire_FRAP/Data/Examples/3month/aet-198401.tif"

original_df = "../../wildfire_FRAP/Data/Examples/3month/my_df.csv"

#run tasks
masked_data = mask_df(raster_mask, original_df)

#check maksing
original_df_file = pd.read_csv(original_df)
print("Check Masking")
print("Size of original data: ", original_df_file.shape)
print("Size of masked data: ", masked_data.shape)

print("Check Unmasking")
#check unmaksing
unmasked_data = unmask_df(original_df, masked_data)
print("Size of original data: ", original_df_file.shape)
print("Size of unmasked data: ", unmasked_data.shape)
#mask =  r"F:/Boundary/StatePoly_buf.tif"
#parameters = {
#    "mean": None,
#    "maximum": None}
#
#extracted_features = ca.calculateFeatures(path,
#                                          parameters,
#                                          reset_df=False,
#                                          tiff_output=False)
#extracted_features.head()

#%%Mask a dataframe

raster_mask = u"F:/Boundary/StatePoly_buf.tif"
original_df = r"F:\3month_features\extracted_features.csv"
df_mask = tr.mask_df(raster_mask, original_df)

#%% Mask a series
raster_mask = u"F:/Boundary/StatePoly_buf.tif"
original_series = tr.image_to_series_simple(raster_mask)
series_mask = tr.mask_df(raster_mask, original_series)

#%% mask a long format dataframe
path = r"F://3month/"
my_df = tr.image_to_series(path)
raster_mask = r"F:/Boundary/StatePoly_buf.tif"
long_mask = tr.mask_df(raster_mask, my_df)

#%% Test unmask for series
# Update the values in the masked dataset to something else, here 10
series_mask.iloc[:] = 10
Exemple #6
0
path = r"C:\Users\mmann\Documents\wildfire_FRAP\Data\Examples\Panel_Example\Fire"
target_file_prefix = 'fire_'

concatenated_target_df = combine_target_rasters(path,
                                                target_file_prefix,
                                                write_out=False)
 

#%% mask both the attribute data and targets 

raster_mask =r"C:\Users\mmann\Documents\wildfire_FRAP\Data/Examples/buffer/StatePoly_buf.tif"
original_df = [concatenated_attribute_df, concatenated_target_df]

mask_attributes_df, mask_target_df = tr.mask_df(raster_mask, 
                                                original_df,  
                                                missing_value=-9999,
                                                reset_index = False)

#%% switch panel data from wide to long format

# stub name for jepson issue !!!!!!  

target_ln, features_ln = wide_to_long_target_features(target = mask_target_df,
                                                      features = mask_attributes_df,
                                                      sep='-')

#%%
import io
data = io.StringIO('''Fruit,Color,Count,Price
Apple,Red,3,$1.29
Apple,Green,9,$0.99
Exemple #7
0
print( tr.image_to_series(tg_single).head())

 

#%% mask unmask PASS I think  mk_ex_crs might have too many observations, but can't figure out how

df_ex_crs = tr.image_to_series(ex_crs)
df_ex_ts = tr.image_to_series(ex_ts)
df_ex_single = tr.image_to_series(ex_single)


#original size 
print(df_ex_single.shape)

# masked 
mk_ex_crs = tr.mask_df(raster_mask= mask, original_df = df_ex_crs,missing_value = -9999)
mk_ex_ts = tr.mask_df(raster_mask= mask, original_df = df_ex_ts,missing_value = -9999)
mk_ex_single = tr.mask_df(raster_mask= mask, original_df = df_ex_single,missing_value = -9999)
 
print(mk_ex_crs.shape[0]/3 ) # three periods PROBLEM TOO MANY OBSERVATIONS
print(mk_ex_ts.shape[0]/6 ) # three periods for two variables 
print(mk_ex_single.shape)


#%% unmask PASS

print(df_ex_crs.shape)
print(tr.unmask_df(original_df= df_ex_crs, mask_df_output= mk_ex_crs).shape)
print(df_ex_ts.shape)
print(tr.unmask_df(original_df= df_ex_ts, mask_df_output= mk_ex_ts).shape)
print(df_ex_single.shape)