Example #1
def plot_raw_and_regularized(raw_dt, ax, idx="NDVI", time_step_size=10):
    a_df = raw_dt.copy()

    a_regularized_TS = nc.regularize_a_field(a_df, V_idks = idx, interval_size = time_step_size)
    # a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS.copy(), V_idx=idx)
    a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS, V_idx=idx)

    # Smoothen by Savitzky-Golay
    SG = scipy.signal.savgol_filter(a_regularized_TS_noGap[idx].values, window_length=7, polyorder=3)

    # SG might violate the boundaries. clip them:
    SG[SG > 1 ] = 1
    SG[SG < -1 ] = -1

    ax.plot(raw_dt['human_system_start_time'], raw_dt[idx], 
    	    '-', label="raw", linewidth=3.5, color='red', alpha=0.4)

    # ax.plot(a_regularized_TS['human_system_start_time'], 
    #         a_regularized_TS[idx], 
    #         '-.', label="regularized", linewidth=1, color='red')

    # ax.plot(a_regularized_TS_noGap['human_system_start_time'], 
    #         a_regularized_TS_noGap[idx],
    #         '-', label="no gap", linewidth=3, color='k')

    ax.plot(a_regularized_TS_noGap['human_system_start_time'], SG,
            '-', label="SG", linewidth=3, color='dodgerblue') # , alpha=0.8

    ax.set_title(raw_dt.ID.unique()[0] + ", " + raw_dt.CropTyp.unique()[0] + ", " + raw_dt.dataset.unique()[0])
    ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize,
    ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize)
    ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # 
    ax.legend(loc="lower right");
    ax.set_ylim(raw_dt[idx].min()-0.05, 1)
Example #2
def one_satellite_smoothed(raw_dt, ax, color_dict, idx="NDVI", time_step_size=10, set_negatives_to_zero=True):
    """Returns a dataframe that has replaced the missing parts of regular_TS.

    raw_dt : dataframe
        A datafram of raw values from GEE. i.e. not regularized yet. F
        For a given field and a given satelltite

    ax : axis
        An axis object of Matplotlib.

    idx : string
        A string indicating vegetation index.

    time_step_size : integer
        An integer that is the regularization window size: every 10 days we want a given NDVI.

    a_df = raw_dt.copy()
    a_df.loc[a_df[idx]<0, idx] = 0

    assert (len(a_df.ID.unique()) == 1)
    assert (len(a_df.dataset.unique()) == 1)

    a_regularized_TS = nc.regularize_a_field(a_df, V_idks = idx, interval_size = time_step_size)
    # a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS.copy(), V_idx=idx)
    a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS, V_idx=idx)

    # Smoothen by Savitzky-Golay
    SG = scipy.signal.savgol_filter(a_regularized_TS_noGap[idx].values, window_length=7, polyorder=3)

    # SG might violate the boundaries. clip them:
    SG[SG > 1 ] = 1
    SG[SG < -1 ] = -1

    ax.plot(a_regularized_TS_noGap['human_system_start_time'], SG,
            '-', label="SG", 
            linewidth=1.25, color=color_dict[a_df.dataset.unique()[0]]) # , alpha=0.8

    ax.set_title(a_df.ID.unique()[0] + ", " + a_df.CropTyp.unique()[0])
    ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize,
    ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize)
    ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # 
    # ax.legend(loc="lower right");
    # ax.xaxis.set_major_locator(mdates.YearLocator(1))
    ax.set_ylim(-0.5, 1)
Example #3
counter = 0

for a_poly in ID_list:
    if (counter % 300 == 0):
        print (counter)
    curr_field = an_EE_TS[an_EE_TS[IDcolName]==a_poly].copy()
    # Sort by DoY (sanitary check)
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)
    regularized_TS = nc.regularize_a_field(a_df = curr_field, \
                                           V_idks = indeks, \
                                           interval_size = regular_window_size,\
                                           start_year=st_yr, \
    regularized_TS = nc.fill_theGap_linearLine(a_regularized_TS = regularized_TS, V_idx = indeks)
    if (counter == 0):
        print ("output_df columns:",     output_df.columns)
        print ("regularized_TS.columns", regularized_TS.columns)
    row_pointer = no_steps * counter
    The reason for the following line is that we assume all years are 366 days!
    so, the actual thing might be smaller!
L7 = pd.read_csv(raw_dir + raw_names[0], low_memory=False)
L8 = pd.read_csv(raw_dir + raw_names[1], low_memory=False)
raw_df = pd.concat([L7, L8])
raw_df["ID"] = raw_df["ID"].astype(str)
del (L7, L8)
  Plots should be exact. Therefore, we need to filter by
  last survey year, toss out NASS, and we are sticking to irrigated
  fields for now.
SF_data = pd.read_csv(param_dir + SF_data_name)
SF_data["ID"] = SF_data["ID"].astype(str)

if county != "Monterey2014":
    # filter by last survey date. Last 4 digits of county name!
    SF_data = nc.filter_by_lastSurvey(SF_data, year=county[-4:])
    SF_data = nc.filter_out_NASS(SF_data)  # Toss NASS
    SF_data = nc.filter_out_nonIrrigated(SF_data)  # keep only irrigated lands

    f**k = list(SF_data.ID)
    raw_df = raw_df[raw_df.ID.isin(f**k)]
    SG_df_EVI = SG_df_EVI[SG_df_EVI.ID.isin(f**k)]
    SG_df_NDVI = SG_df_NDVI[SG_df_NDVI.ID.isin(f**k)]

raw_df_EVI = raw_df.copy()
raw_df_NDVI = raw_df.copy()
del (raw_df)

raw_df_EVI.drop(["NDVI"], axis=1, inplace=True)
raw_df_NDVI.drop(["EVI"], axis=1, inplace=True)
Example #5
raw_df = pd.concat([L5, L7, L8])
del (L5, L7, L8)

raw_df = raw_df[raw_df.ID.isin(IDs)]

raw_df_EVI = raw_df.copy()
raw_df_NDVI = raw_df.copy()
del (raw_df)

raw_df_EVI.drop(["NDVI"], axis=1, inplace=True)
raw_df_NDVI.drop(["EVI"], axis=1, inplace=True)

raw_df_EVI = raw_df_EVI[raw_df_EVI["EVI"].notna()]
raw_df_NDVI = raw_df_NDVI[raw_df_NDVI["NDVI"].notna()]

raw_df_EVI = nc.add_human_start_time_by_system_start_time(raw_df_EVI)
raw_df_NDVI = nc.add_human_start_time_by_system_start_time(raw_df_NDVI)


SG_df_NDVI = nc.initial_clean(df=SG_df_NDVI, column_to_be_cleaned="NDVI")
SG_df_EVI = nc.initial_clean(df=SG_df_EVI, column_to_be_cleaned="EVI")

raw_df_NDVI = nc.initial_clean(df=raw_df_NDVI, column_to_be_cleaned="NDVI")
raw_df_EVI = nc.initial_clean(df=raw_df_EVI, column_to_be_cleaned="EVI")

counter = 0

for ID in IDs:
    if (counter % 100 == 0):
Example #6
counter = 0
row_pointer = 0

for a_poly in IDs:
    if (counter % 1000 == 0):
    curr_field = an_EE_TS[an_EE_TS[IDcolName] == a_poly].copy()

    # Sort by DoY (sanitary check)
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)


    no_Outlier_TS = nc.correct_big_jumps_1DaySeries_JFD(
        dataTMS_jumpie=curr_field, give_col=indeks, maxjump_perDay=0.018)

    output_df[row_pointer:row_pointer +
              curr_field.shape[0]] = no_Outlier_TS.values
    counter += 1
    row_pointer += curr_field.shape[0]

###                   Write the outputs
output_df.to_csv(out_name, index=False)

end_time = time.time()
Example #7

counter = 0

for a_poly in ID_list:
    if (counter % 300 == 0):
    curr_field = an_EE_TS[an_EE_TS[IDcolName] == a_poly].copy()
    # Sort by DoY (sanitary check)
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)

    regularized_TS = nc.regularize_a_field(a_df = curr_field, \
                                           V_idks = indeks, \
                                           interval_size = regular_window_size)

    row_pointer = no_steps * counter
    output_df[row_pointer:row_pointer + no_steps] = regularized_TS.values
    counter += 1

###                   Write the outputs

out_name = output_dir + "00_noJumpsRegularized_" + indeks + ".csv"
os.makedirs(output_dir, exist_ok=True)
Example #8
def SG_clean_SOS(raw_dt, SG_dt, idx, ax, onset_cut=0.5, offset_cut=0.5):
    """Returns A plot with of a given VI (NDVI or EVI) with SOS and EOS points.

    raw_dt : dataframe
        pandas dataframe of raw observations from Google Earth Engine
    SG_dt  : dataframe
        pandas dataframe of smoothed version of data points.
    idx : str
        A string indicating vegetation index.
    ax : axis
       An axis object of Matplotlib.
    onset_cut : float
        Start Of Season threshold
    offset_cut : float
        End Of Season threshold

    A plot a given VI (NDVI or EVI) with SOS and EOS points.
    assert (len(SG_dt['ID'].unique()) == 1)

    ###      find SOS's and EOS's
    SEOS_output_columns = ['ID', idx, 'human_system_start_time', 
                           'EVI_ratio', 'SOS', 'EOS', 'season_count']

     The reason I am multiplying len(a_df) by 4 is that we can have at least two
     seasons which means 2 SOS and 2 EOS. So, at least 4 rows are needed.
     and the reason for 14 is that there are 14 years from 2008 to 2021.
    all_poly_and_SEOS = pd.DataFrame(data = None, 
                                     index = np.arange(4*14*len(SG_dt)), 
                                     columns = SEOS_output_columns)
    unique_years = SG_dt['human_system_start_time'].dt.year.unique()
    pointer_SEOS_tab = 0
    SG_dt = SG_dt[SEOS_output_columns[0:3]]
    detect SOS and EOS in each year
    yr_count = 0
    for yr in unique_years:
        curr_field_yr = SG_dt[SG_dt['human_system_start_time'].dt.year == yr].copy()

        curr_field_yr = nc.addToDF_SOS_EOS_White(pd_TS = curr_field_yr, 
                                                 VegIdx = idx, 
                                                 onset_thresh = onset_cut, 
                                                 offset_thresh = offset_cut)
        curr_field_yr = nc.Null_SOS_EOS_by_DoYDiff(pd_TS=curr_field_yr, min_season_length=40)
        ###             plot
        # sb.set();
        # plot SG smoothed
        # ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], label= "SG", c='k', linewidth=2);
        ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], c='k', linewidth=2,
                label= 'SG' if yr_count == 0 else "");

        # plot raw data
                   s=7, c='dodgerblue', label="raw" if yr_count == 0 else "");

        ###   plot SOS and EOS
        # Update the EVI/NDVI values to the smoothed version.
        #  Start of the season
        SOS = curr_field_yr[curr_field_yr['SOS'] != 0]
        ax.scatter(SOS['human_system_start_time'], SOS['SOS'], marker='+', s=155, c='g')
        # annotate SOS
        for ii in np.arange(0, len(SOS)):
            style = dict(size=10, color='g', rotation='vertical')
            ax.text(x = SOS.iloc[ii]['human_system_start_time'].date(), 
                    y = -0.2, 
                    s = str(SOS.iloc[ii]['human_system_start_time'].date())[5:], #

        #  End of the season
        EOS = curr_field_yr[curr_field_yr['EOS'] != 0]
        ax.scatter(EOS['human_system_start_time'], EOS['EOS'], marker='+', s=155, c='r')

        # annotate EOS
        for ii in np.arange(0, len(EOS)):
            style = dict(size=10, color='r', rotation='vertical')
            ax.text(x = EOS.iloc[ii]['human_system_start_time'].date(), 
                    y = -0.2, 
                    s = str(EOS.iloc[ii]['human_system_start_time'].date())[5:], #[6:]

        # Plot ratios:
        column_ratio = idx + "_" + "ratio"
                c='gray', label="EVI Ratio" if yr_count == 0 else "")
        yr_count += 1

    ax.axhline(0 , color = 'r', linewidth=.5)
    ax.axhline(1 , color = 'r', linewidth=.5)

    # ax.set_xlim([datetime.date(2007, 12, 10), datetime.date(2022, 1, 10)])
    ax.set_xlim([SG_dt.human_system_start_time.min() - timedelta(10), 
                 SG_dt.human_system_start_time.max() + timedelta(10)])

    ax.set_ylim([-0.3, 1.15])
    ax.xaxis.set_major_locator(mdates.YearLocator(1)) # every year.
    ax.legend(loc="upper left");
Example #9
def SG_clean_SOS_orchardinPlot(raw_dt, SG_dt, idx, ax, onset_cut=0.5, offset_cut=0.5):
    """Returns A plot with of a given VI (NDVI or EVI) with SOS and EOS points.

    raw_dt : dataframe
        pandas dataframe of raw observations from Google Earth Engine
    SG_dt  : dataframe
        pandas dataframe of smoothed version of data points.
    idx : str
        A string indicating vegetation index.
    ax : axis
       An axis object of Matplotlib.
    onset_cut : float
        Start Of Season threshold
    offset_cut : float
        End Of Season threshold

    A plot a given VI (NDVI or EVI) with SOS and EOS points.
    assert (len(SG_dt['ID'].unique()) == 1)

    ###      find SOS's and EOS's
    ratio_colName = idx + "_ratio"
    SEOS_output_columns = ['ID', idx, 'human_system_start_time', 
                           ratio_colName, 'SOS', 'EOS', 'season_count']

     The reason I am multiplying len(SG_dt) by 4 is that we can have at least two
     seasons which means 2 SOS and 2 EOS. So, at least 4 rows are needed.
     and the reason for 14 is that there are 14 years from 2008 to 2021.
    all_poly_and_SEOS = pd.DataFrame(data = None, 
                                     index = np.arange(4*14*len(SG_dt)), 
                                     columns = SEOS_output_columns)
    unique_years = SG_dt['human_system_start_time'].dt.year.unique()
    pointer_SEOS_tab = 0
    SG_dt = SG_dt[SEOS_output_columns[0:3]]
    detect SOS and EOS in each year
    yr_count = 0
    for yr in unique_years:
        curr_field_yr = SG_dt[SG_dt['human_system_start_time'].dt.year == yr].copy()
        y_orchard = curr_field_yr[curr_field_yr['human_system_start_time'].dt.month >= 5]
        y_orchard = y_orchard[y_orchard['human_system_start_time'].dt.month <= 10]
        y_orchard_range = max(y_orchard[idx]) - min(y_orchard[idx])

        if y_orchard_range > 0.3:
            curr_field_yr = nc.addToDF_SOS_EOS_White(pd_TS = curr_field_yr,
                                                     VegIdx = idx, 
                                                     onset_thresh = onset_cut, 
                                                     offset_thresh = offset_cut)
            curr_field_yr = nc.Null_SOS_EOS_by_DoYDiff(pd_TS=curr_field_yr, min_season_length=40)
            VegIdx_min = curr_field_yr[idx].min()
            VegIdx_max = curr_field_yr[idx].max()
            VegRange = VegIdx_max - VegIdx_min + sys.float_info.epsilon
            curr_field_yr[ratio_colName] = (curr_field_yr[idx] - VegIdx_min) / VegRange
            curr_field_yr['SOS'] = 666
            curr_field_yr['EOS'] = 666
        ###             plot
        # sb.set();
        # plot SG smoothed
        ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], c='k', linewidth=2,
                label= 'SG' if yr_count == 0 else "");

        ax.scatter(raw_dt['human_system_start_time'], raw_dt[idx], 
                   s=7, c='dodgerblue', label="raw" if yr_count == 0 else "");
        ###   plot SOS and EOS
        #  SOS
        SOS = curr_field_yr[curr_field_yr['SOS'] != 0]
        if len(SOS)>0: # dataframe might be empty
            if SOS.iloc[0]['SOS'] != 666:
                ax.scatter(SOS['human_system_start_time'], SOS['SOS'], marker='+', s=155, c='g', 
                # annotate SOS
                for ii in np.arange(0, len(SOS)):
                    style = dict(size=10, color='g', rotation='vertical')
                    ax.text(x = SOS.iloc[ii]['human_system_start_time'].date(), 
                            y = -0.1, 
                            s = str(SOS.iloc[ii]['human_system_start_time'].date())[5:], #
                         c='g', linewidth=2);
        #  EOS
        EOS = curr_field_yr[curr_field_yr['EOS'] != 0]
        if len(EOS)>0: # dataframe might be empty
            if EOS.iloc[0]['EOS'] != 666:
                ax.scatter(EOS['human_system_start_time'], EOS['EOS'], 
                           marker='+', s=155, c='r', 

                # annotate EOS
                for ii in np.arange(0, len(EOS)):
                    style = dict(size=10, color='r', rotation='vertical')
                    ax.text(x = EOS.iloc[ii]['human_system_start_time'].date(), 
                            y = -0.1, 
                            s = str(EOS.iloc[ii]['human_system_start_time'].date())[5:], #[6:]

        # Plot ratios:
                c='gray', label=ratio_colName if yr_count == 0 else "")
        yr_count += 1

    # ax.axhline(0 , color = 'r', linewidth=.5)
    # ax.axhline(1 , color = 'r', linewidth=.5)

    ax.set_title(SG_dt['ID'].unique()[0] + ", cut: " + str(onset_cut) + ", " + idx);

    # ax.set_xlim([datetime.date(2007, 12, 10), datetime.date(2022, 1, 10)])
    ax.set_xlim([SG_dt.human_system_start_time.min() - timedelta(10), 
                 SG_dt.human_system_start_time.max() + timedelta(10)])
    ax.set_ylim([-0.3, 1.15])
    ax.xaxis.set_major_locator(mdates.YearLocator(1)) # every year.
Example #10
nrows = no_steps * len(polygon_list)
output_df = pd.DataFrame(data=None, index=np.arange(nrows), columns=reg_cols)

counter = 0

for a_poly in polygon_list:
    if (counter % 10 == 0):
    curr_field = an_EE_TS[an_EE_TS['ID'] == a_poly].copy()
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)

    curr_field = nc.fill_theGap_linearLine(a_regularized_TS=curr_field,

    row_pointer = no_steps * counter
    output_df[row_pointer:row_pointer + no_steps] = curr_field.values
    counter += 1

# nc.convert_human_system_start_time_to_systemStart_time(output_df)
###                   Write the outputs
out_name = output_dir + "01_Regular_filledGap_" + indeks + ".csv"
os.makedirs(output_dir, exist_ok=True)
output_df.to_csv(out_name, index=False)
Example #11
A = pd.read_csv(data_dir + file_names[0])
A = A[A['NDVI'].notna()]

A = pd.read_csv(data_dir + file_names[1])
A = A[A['NDVI'].notna()]

A = pd.read_csv(data_dir + file_names[2])
A = A[A['NDVI'].notna()]

all_data = pd.concat(dataframe_list)
all_data.reset_index(drop=True, inplace=True)
all_data = nc.add_human_start_time_by_system_start_time(all_data)

all_data["dataset"] = "Sentinel"

ID_list = list(np.sort(all_data.ID.unique()))

print("len(ID_list): " + str(len(ID_list)))
####  Set the plotting style

size = 20
Example #12
L8 = L8[L8[indeks].notna()]

IDs = np.sort(L5[IDcolName].unique())
L578 = pd.concat([L5, L7, L8])
del (L5, L7, L8)

#######   Choose X random fields
if random_or_all == "random":
    IDs = random.sample(list(IDs), k=randCount)
    L578 = L578[L578.ID.isin(IDs)]
    L578.reset_index(drop=True, inplace=True)

L578 = nc.add_human_start_time_by_system_start_time(L578)

print("Number of unique fields is: ")

print("Dimension of the data is: " + str(L578.shape))


L578 = nc.initial_clean(df=L578, column_to_be_cleaned=indeks)

Example #13
  fields for now.
SF_data = pd.read_csv(SF_data_dir + county + ".csv")
SF_data["ID"] = SF_data["ID"].astype(str)

if county == "Monterey2014":
    SF_data['Crop2014'] = SF_data['Crop2014'].str.lower().str.replace(
        " ", "_").str.replace(",", "").str.replace("/", "_")
    SF_data['CropTyp'] = SF_data['CropTyp'].str.lower().str.replace(
        " ", "_").str.replace(",", "").str.replace("/", "_")

if county != "Monterey2014":
    # filter by last survey date. Last 4 digits of county name!
    print("No. of fields in SF_data is {}.".format(len(SF_data.ID.unique())))
    SF_data = nc.filter_by_lastSurvey(SF_data, year=county[-4:])
    print("No. of fields in SF_data after survey year is {}.".format(
    SF_data = nc.filter_out_NASS(SF_data)  # Toss NASS
    print("No. of fields in SF_data after NASS is {}.".format(
    SF_data = nc.filter_out_nonIrrigated(SF_data)  # keep only irrigated lands
    print("No. of fields in SF_data after Irrigation is {}.".format(

    f**k = list(SF_data.ID)
    SG_df = SG_df[SG_df.ID.isin(f**k)]

SG_df = pd.merge(SG_df, SF_data, on=['ID'], how='left')

print("columns of SG_df right after merging is: ")