def plot_raw_and_regularized(raw_dt, ax, idx="NDVI", time_step_size=10): a_df = raw_dt.copy() a_regularized_TS = nc.regularize_a_field(a_df, V_idks = idx, interval_size = time_step_size) # a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS.copy(), V_idx=idx) a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS, V_idx=idx) # Smoothen by Savitzky-Golay SG = scipy.signal.savgol_filter(a_regularized_TS_noGap[idx].values, window_length=7, polyorder=3) # SG might violate the boundaries. clip them: SG[SG > 1 ] = 1 SG[SG < -1 ] = -1 ax.plot(raw_dt['human_system_start_time'], raw_dt[idx], '-', label="raw", linewidth=3.5, color='red', alpha=0.4) # ax.plot(a_regularized_TS['human_system_start_time'], # a_regularized_TS[idx], # '-.', label="regularized", linewidth=1, color='red') # ax.plot(a_regularized_TS_noGap['human_system_start_time'], # a_regularized_TS_noGap[idx], # '-', label="no gap", linewidth=3, color='k') ax.plot(a_regularized_TS_noGap['human_system_start_time'], SG, '-', label="SG", linewidth=3, color='dodgerblue') # , alpha=0.8 ax.set_title(raw_dt.ID.unique()[0] + ", " + raw_dt.CropTyp.unique()[0] + ", " + raw_dt.dataset.unique()[0]) ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize, ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize) ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # ax.legend(loc="lower right"); ax.xaxis.set_major_locator(mdates.YearLocator(1)) ax.set_ylim(raw_dt[idx].min()-0.05, 1)
def one_satellite_smoothed(raw_dt, ax, color_dict, idx="NDVI", time_step_size=10, set_negatives_to_zero=True): """Returns a dataframe that has replaced the missing parts of regular_TS. Arguments --------- raw_dt : dataframe A datafram of raw values from GEE. i.e. not regularized yet. F For a given field and a given satelltite ax : axis An axis object of Matplotlib. idx : string A string indicating vegetation index. time_step_size : integer An integer that is the regularization window size: every 10 days we want a given NDVI. Returns ------- """ a_df = raw_dt.copy() a_df.loc[a_df[idx]<0, idx] = 0 assert (len(a_df.ID.unique()) == 1) assert (len(a_df.dataset.unique()) == 1) a_regularized_TS = nc.regularize_a_field(a_df, V_idks = idx, interval_size = time_step_size) # a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS.copy(), V_idx=idx) a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS, V_idx=idx) # Smoothen by Savitzky-Golay SG = scipy.signal.savgol_filter(a_regularized_TS_noGap[idx].values, window_length=7, polyorder=3) # SG might violate the boundaries. clip them: SG[SG > 1 ] = 1 SG[SG < -1 ] = -1 ax.plot(a_regularized_TS_noGap['human_system_start_time'], SG, '-', label="SG", linewidth=1.25, color=color_dict[a_df.dataset.unique()[0]]) # , alpha=0.8 ax.set_title(a_df.ID.unique()[0] + ", " + a_df.CropTyp.unique()[0]) ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize, ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize) ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # # ax.legend(loc="lower right"); # ax.xaxis.set_major_locator(mdates.YearLocator(1)) ax.set_ylim(-0.5, 1)
counter = 0 for a_poly in ID_list: if (counter % 300 == 0): print (counter) curr_field = an_EE_TS[an_EE_TS[IDcolName]==a_poly].copy() ################################################################ # Sort by DoY (sanitary check) curr_field.sort_values(by=['human_system_start_time'], inplace=True) curr_field.reset_index(drop=True, inplace=True) ################################################################ regularized_TS = nc.regularize_a_field(a_df = curr_field, \ V_idks = indeks, \ interval_size = regular_window_size,\ start_year=st_yr, \ end_year=end_yr) regularized_TS = nc.fill_theGap_linearLine(a_regularized_TS = regularized_TS, V_idx = indeks) if (counter == 0): print ("output_df columns:", output_df.columns) print ("regularized_TS.columns", regularized_TS.columns) ################################################################ row_pointer = no_steps * counter """ The reason for the following line is that we assume all years are 366 days! so, the actual thing might be smaller! """
L7 = pd.read_csv(raw_dir + raw_names[0], low_memory=False) L8 = pd.read_csv(raw_dir + raw_names[1], low_memory=False) raw_df = pd.concat([L7, L8]) raw_df["ID"] = raw_df["ID"].astype(str) del (L7, L8) """ Plots should be exact. Therefore, we need to filter by last survey year, toss out NASS, and we are sticking to irrigated fields for now. """ SF_data = pd.read_csv(param_dir + SF_data_name) SF_data["ID"] = SF_data["ID"].astype(str) if county != "Monterey2014": # filter by last survey date. Last 4 digits of county name! SF_data = nc.filter_by_lastSurvey(SF_data, year=county[-4:]) SF_data = nc.filter_out_NASS(SF_data) # Toss NASS SF_data = nc.filter_out_nonIrrigated(SF_data) # keep only irrigated lands f**k = list(SF_data.ID) raw_df = raw_df[raw_df.ID.isin(f**k)] SG_df_EVI = SG_df_EVI[SG_df_EVI.ID.isin(f**k)] SG_df_NDVI = SG_df_NDVI[SG_df_NDVI.ID.isin(f**k)] raw_df_EVI = raw_df.copy() raw_df_NDVI = raw_df.copy() del (raw_df) raw_df_EVI.drop(["NDVI"], axis=1, inplace=True) raw_df_NDVI.drop(["EVI"], axis=1, inplace=True)
raw_df = pd.concat([L5, L7, L8]) del (L5, L7, L8) raw_df = raw_df[raw_df.ID.isin(IDs)] raw_df_EVI = raw_df.copy() raw_df_NDVI = raw_df.copy() del (raw_df) raw_df_EVI.drop(["NDVI"], axis=1, inplace=True) raw_df_NDVI.drop(["EVI"], axis=1, inplace=True) raw_df_EVI = raw_df_EVI[raw_df_EVI["EVI"].notna()] raw_df_NDVI = raw_df_NDVI[raw_df_NDVI["NDVI"].notna()] raw_df_EVI = nc.add_human_start_time_by_system_start_time(raw_df_EVI) raw_df_NDVI = nc.add_human_start_time_by_system_start_time(raw_df_NDVI) ######################################## SG_df_NDVI = nc.initial_clean(df=SG_df_NDVI, column_to_be_cleaned="NDVI") SG_df_EVI = nc.initial_clean(df=SG_df_EVI, column_to_be_cleaned="EVI") raw_df_NDVI = nc.initial_clean(df=raw_df_NDVI, column_to_be_cleaned="NDVI") raw_df_EVI = nc.initial_clean(df=raw_df_EVI, column_to_be_cleaned="EVI") counter = 0 for ID in IDs: if (counter % 100 == 0): print("_____________________________________")
counter = 0 row_pointer = 0 for a_poly in IDs: if (counter % 1000 == 0): print(counter) curr_field = an_EE_TS[an_EE_TS[IDcolName] == a_poly].copy() ################################################################ # Sort by DoY (sanitary check) curr_field.sort_values(by=['human_system_start_time'], inplace=True) curr_field.reset_index(drop=True, inplace=True) ################################################################ no_Outlier_TS = nc.correct_big_jumps_1DaySeries_JFD( dataTMS_jumpie=curr_field, give_col=indeks, maxjump_perDay=0.018) output_df[row_pointer:row_pointer + curr_field.shape[0]] = no_Outlier_TS.values counter += 1 row_pointer += curr_field.shape[0] #################################################################################### ### ### Write the outputs ### #################################################################################### output_df.drop_duplicates(inplace=True) output_df.to_csv(out_name, index=False) end_time = time.time()
######################################################################################## counter = 0 for a_poly in ID_list: if (counter % 300 == 0): print(counter) curr_field = an_EE_TS[an_EE_TS[IDcolName] == a_poly].copy() ################################################################ # Sort by DoY (sanitary check) curr_field.sort_values(by=['human_system_start_time'], inplace=True) curr_field.reset_index(drop=True, inplace=True) ################################################################ regularized_TS = nc.regularize_a_field(a_df = curr_field, \ V_idks = indeks, \ interval_size = regular_window_size) ################################################################ row_pointer = no_steps * counter output_df[row_pointer:row_pointer + no_steps] = regularized_TS.values counter += 1 #################################################################################### ### ### Write the outputs ### #################################################################################### out_name = output_dir + "00_noJumpsRegularized_" + indeks + ".csv" os.makedirs(output_dir, exist_ok=True)
def SG_clean_SOS(raw_dt, SG_dt, idx, ax, onset_cut=0.5, offset_cut=0.5): """Returns A plot with of a given VI (NDVI or EVI) with SOS and EOS points. Arguments --------- raw_dt : dataframe pandas dataframe of raw observations from Google Earth Engine SG_dt : dataframe pandas dataframe of smoothed version of data points. idx : str A string indicating vegetation index. ax : axis An axis object of Matplotlib. onset_cut : float Start Of Season threshold offset_cut : float End Of Season threshold Returns ------- A plot a given VI (NDVI or EVI) with SOS and EOS points. """ assert (len(SG_dt['ID'].unique()) == 1) ############################################# ### ### find SOS's and EOS's ### ############################################# SEOS_output_columns = ['ID', idx, 'human_system_start_time', 'EVI_ratio', 'SOS', 'EOS', 'season_count'] """ The reason I am multiplying len(a_df) by 4 is that we can have at least two seasons which means 2 SOS and 2 EOS. So, at least 4 rows are needed. and the reason for 14 is that there are 14 years from 2008 to 2021. """ all_poly_and_SEOS = pd.DataFrame(data = None, index = np.arange(4*14*len(SG_dt)), columns = SEOS_output_columns) unique_years = SG_dt['human_system_start_time'].dt.year.unique() pointer_SEOS_tab = 0 SG_dt = SG_dt[SEOS_output_columns[0:3]] """ detect SOS and EOS in each year """ yr_count = 0 for yr in unique_years: curr_field_yr = SG_dt[SG_dt['human_system_start_time'].dt.year == yr].copy() curr_field_yr = nc.addToDF_SOS_EOS_White(pd_TS = curr_field_yr, VegIdx = idx, onset_thresh = onset_cut, offset_thresh = offset_cut) curr_field_yr = nc.Null_SOS_EOS_by_DoYDiff(pd_TS=curr_field_yr, min_season_length=40) ############################################# ### ### plot ### ############################################# # sb.set(); # plot SG smoothed # ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], label= "SG", c='k', linewidth=2); ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], c='k', linewidth=2, label= 'SG' if yr_count == 0 else ""); # plot raw data ax.scatter(raw_dt['human_system_start_time'], raw_dt[idx], s=7, c='dodgerblue', label="raw" if yr_count == 0 else ""); ### ### plot SOS and EOS ### # Update the EVI/NDVI values to the smoothed version. # # Start of the season # SOS = curr_field_yr[curr_field_yr['SOS'] != 0] ax.scatter(SOS['human_system_start_time'], SOS['SOS'], marker='+', s=155, c='g') # annotate SOS for ii in np.arange(0, len(SOS)): style = dict(size=10, color='g', rotation='vertical') ax.text(x = SOS.iloc[ii]['human_system_start_time'].date(), y = -0.2, s = str(SOS.iloc[ii]['human_system_start_time'].date())[5:], # **style) # # End of the season # EOS = curr_field_yr[curr_field_yr['EOS'] != 0] ax.scatter(EOS['human_system_start_time'], EOS['EOS'], marker='+', s=155, c='r') # annotate EOS for ii in np.arange(0, len(EOS)): style = dict(size=10, color='r', rotation='vertical') ax.text(x = EOS.iloc[ii]['human_system_start_time'].date(), y = -0.2, s = str(EOS.iloc[ii]['human_system_start_time'].date())[5:], #[6:] **style) # Plot ratios: column_ratio = idx + "_" + "ratio" ax.plot(curr_field_yr['human_system_start_time'], curr_field_yr[column_ratio], c='gray', label="EVI Ratio" if yr_count == 0 else "") yr_count += 1 ax.axhline(0 , color = 'r', linewidth=.5) ax.axhline(1 , color = 'r', linewidth=.5) ax.set_title(SG_dt['ID'].unique()[0]); ax.set(ylabel=idx) # ax.set_xlim([datetime.date(2007, 12, 10), datetime.date(2022, 1, 10)]) ax.set_xlim([SG_dt.human_system_start_time.min() - timedelta(10), SG_dt.human_system_start_time.max() + timedelta(10)]) ax.set_ylim([-0.3, 1.15]) ax.xaxis.set_major_locator(mdates.YearLocator(1)) # every year. ax.legend(loc="upper left");
def SG_clean_SOS_orchardinPlot(raw_dt, SG_dt, idx, ax, onset_cut=0.5, offset_cut=0.5): """Returns A plot with of a given VI (NDVI or EVI) with SOS and EOS points. Arguments --------- raw_dt : dataframe pandas dataframe of raw observations from Google Earth Engine SG_dt : dataframe pandas dataframe of smoothed version of data points. idx : str A string indicating vegetation index. ax : axis An axis object of Matplotlib. onset_cut : float Start Of Season threshold offset_cut : float End Of Season threshold Returns ------- A plot a given VI (NDVI or EVI) with SOS and EOS points. """ assert (len(SG_dt['ID'].unique()) == 1) ############################################# ### ### find SOS's and EOS's ### ############################################# ratio_colName = idx + "_ratio" SEOS_output_columns = ['ID', idx, 'human_system_start_time', ratio_colName, 'SOS', 'EOS', 'season_count'] """ The reason I am multiplying len(SG_dt) by 4 is that we can have at least two seasons which means 2 SOS and 2 EOS. So, at least 4 rows are needed. and the reason for 14 is that there are 14 years from 2008 to 2021. """ all_poly_and_SEOS = pd.DataFrame(data = None, index = np.arange(4*14*len(SG_dt)), columns = SEOS_output_columns) unique_years = SG_dt['human_system_start_time'].dt.year.unique() pointer_SEOS_tab = 0 SG_dt = SG_dt[SEOS_output_columns[0:3]] """ detect SOS and EOS in each year """ yr_count = 0 for yr in unique_years: curr_field_yr = SG_dt[SG_dt['human_system_start_time'].dt.year == yr].copy() y_orchard = curr_field_yr[curr_field_yr['human_system_start_time'].dt.month >= 5] y_orchard = y_orchard[y_orchard['human_system_start_time'].dt.month <= 10] y_orchard_range = max(y_orchard[idx]) - min(y_orchard[idx]) if y_orchard_range > 0.3: curr_field_yr = nc.addToDF_SOS_EOS_White(pd_TS = curr_field_yr, VegIdx = idx, onset_thresh = onset_cut, offset_thresh = offset_cut) curr_field_yr = nc.Null_SOS_EOS_by_DoYDiff(pd_TS=curr_field_yr, min_season_length=40) else: VegIdx_min = curr_field_yr[idx].min() VegIdx_max = curr_field_yr[idx].max() VegRange = VegIdx_max - VegIdx_min + sys.float_info.epsilon curr_field_yr[ratio_colName] = (curr_field_yr[idx] - VegIdx_min) / VegRange curr_field_yr['SOS'] = 666 curr_field_yr['EOS'] = 666 ############################################# ### ### plot ### ############################################# # sb.set(); # plot SG smoothed ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], c='k', linewidth=2, label= 'SG' if yr_count == 0 else ""); ax.scatter(raw_dt['human_system_start_time'], raw_dt[idx], s=7, c='dodgerblue', label="raw" if yr_count == 0 else ""); ### ### plot SOS and EOS ### # # SOS # SOS = curr_field_yr[curr_field_yr['SOS'] != 0] if len(SOS)>0: # dataframe might be empty if SOS.iloc[0]['SOS'] != 666: ax.scatter(SOS['human_system_start_time'], SOS['SOS'], marker='+', s=155, c='g', label="") # annotate SOS for ii in np.arange(0, len(SOS)): style = dict(size=10, color='g', rotation='vertical') ax.text(x = SOS.iloc[ii]['human_system_start_time'].date(), y = -0.1, s = str(SOS.iloc[ii]['human_system_start_time'].date())[5:], # **style) else: ax.plot(curr_field_yr['human_system_start_time'], np.ones(len(curr_field_yr['human_system_start_time']))*1, c='g', linewidth=2); # # EOS # EOS = curr_field_yr[curr_field_yr['EOS'] != 0] if len(EOS)>0: # dataframe might be empty if EOS.iloc[0]['EOS'] != 666: ax.scatter(EOS['human_system_start_time'], EOS['EOS'], marker='+', s=155, c='r', label="") # annotate EOS for ii in np.arange(0, len(EOS)): style = dict(size=10, color='r', rotation='vertical') ax.text(x = EOS.iloc[ii]['human_system_start_time'].date(), y = -0.1, s = str(EOS.iloc[ii]['human_system_start_time'].date())[5:], #[6:] **style) # Plot ratios: ax.plot(curr_field_yr['human_system_start_time'], curr_field_yr[ratio_colName], c='gray', label=ratio_colName if yr_count == 0 else "") yr_count += 1 # ax.axhline(0 , color = 'r', linewidth=.5) # ax.axhline(1 , color = 'r', linewidth=.5) ax.set_title(SG_dt['ID'].unique()[0] + ", cut: " + str(onset_cut) + ", " + idx); ax.set(ylabel=idx) # ax.set_xlim([datetime.date(2007, 12, 10), datetime.date(2022, 1, 10)]) ax.set_xlim([SG_dt.human_system_start_time.min() - timedelta(10), SG_dt.human_system_start_time.max() + timedelta(10)]) ax.set_ylim([-0.3, 1.15]) ax.xaxis.set_major_locator(mdates.YearLocator(1)) # every year. ax.legend(loc="best");
nrows = no_steps * len(polygon_list) output_df = pd.DataFrame(data=None, index=np.arange(nrows), columns=reg_cols) ######################################################################################## counter = 0 for a_poly in polygon_list: if (counter % 10 == 0): print(counter) curr_field = an_EE_TS[an_EE_TS['ID'] == a_poly].copy() curr_field.sort_values(by=['human_system_start_time'], inplace=True) curr_field.reset_index(drop=True, inplace=True) ################################################################ curr_field = nc.fill_theGap_linearLine(a_regularized_TS=curr_field, V_idx=indeks) ################################################################ row_pointer = no_steps * counter output_df[row_pointer:row_pointer + no_steps] = curr_field.values counter += 1 # nc.convert_human_system_start_time_to_systemStart_time(output_df) #################################################################################### ### ### Write the outputs ### #################################################################################### out_name = output_dir + "01_Regular_filledGap_" + indeks + ".csv" os.makedirs(output_dir, exist_ok=True) output_df.to_csv(out_name, index=False)
] A = pd.read_csv(data_dir + file_names[0]) A = A[A['NDVI'].notna()] dataframe_list.append(A) A = pd.read_csv(data_dir + file_names[1]) A = A[A['NDVI'].notna()] dataframe_list.append(A) A = pd.read_csv(data_dir + file_names[2]) A = A[A['NDVI'].notna()] dataframe_list.append(A) all_data = pd.concat(dataframe_list) all_data.reset_index(drop=True, inplace=True) all_data = nc.add_human_start_time_by_system_start_time(all_data) all_data["dataset"] = "Sentinel" ID_list = list(np.sort(all_data.ID.unique())) print("len(ID_list): " + str(len(ID_list))) ################################################################## ################################################################## #### #### Set the plotting style #### ################################################################## ################################################################## size = 20
L8 = L8[L8[indeks].notna()] IDs = np.sort(L5[IDcolName].unique()) L578 = pd.concat([L5, L7, L8]) del (L5, L7, L8) ######################################################## ####### ####### Choose X random fields ####### if random_or_all == "random": IDs = random.sample(list(IDs), k=randCount) L578 = L578[L578.ID.isin(IDs)] L578.reset_index(drop=True, inplace=True) L578 = nc.add_human_start_time_by_system_start_time(L578) print("Number of unique fields is: ") print(len(IDs)) print("__________________________________________") ######################################################### print("Dimension of the data is: " + str(L578.shape)) print("__________________________________________") ######################################################### L578 = nc.initial_clean(df=L578, column_to_be_cleaned=indeks) #########################################################
fields for now. """ SF_data = pd.read_csv(SF_data_dir + county + ".csv") SF_data["ID"] = SF_data["ID"].astype(str) if county == "Monterey2014": SF_data['Crop2014'] = SF_data['Crop2014'].str.lower().str.replace( " ", "_").str.replace(",", "").str.replace("/", "_") else: SF_data['CropTyp'] = SF_data['CropTyp'].str.lower().str.replace( " ", "_").str.replace(",", "").str.replace("/", "_") if county != "Monterey2014": # filter by last survey date. Last 4 digits of county name! print("No. of fields in SF_data is {}.".format(len(SF_data.ID.unique()))) SF_data = nc.filter_by_lastSurvey(SF_data, year=county[-4:]) print("No. of fields in SF_data after survey year is {}.".format( len(SF_data.ID.unique()))) SF_data = nc.filter_out_NASS(SF_data) # Toss NASS print("No. of fields in SF_data after NASS is {}.".format( len(SF_data.ID.unique()))) SF_data = nc.filter_out_nonIrrigated(SF_data) # keep only irrigated lands print("No. of fields in SF_data after Irrigation is {}.".format( len(SF_data.ID.unique()))) f**k = list(SF_data.ID) SG_df = SG_df[SG_df.ID.isin(f**k)] SG_df = pd.merge(SG_df, SF_data, on=['ID'], how='left') print("columns of SG_df right after merging is: ")