Example #1
0
def plot_raw_and_regularized(raw_dt, ax, idx="NDVI", time_step_size=10):
    a_df = raw_dt.copy()

    a_regularized_TS = nc.regularize_a_field(a_df, V_idks = idx, interval_size = time_step_size)
    # a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS.copy(), V_idx=idx)
    a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS, V_idx=idx)

    # Smoothen by Savitzky-Golay
    SG = scipy.signal.savgol_filter(a_regularized_TS_noGap[idx].values, window_length=7, polyorder=3)

    # SG might violate the boundaries. clip them:
    SG[SG > 1 ] = 1
    SG[SG < -1 ] = -1

    ax.plot(raw_dt['human_system_start_time'], raw_dt[idx], 
    	    '-', label="raw", linewidth=3.5, color='red', alpha=0.4)

    # ax.plot(a_regularized_TS['human_system_start_time'], 
    #         a_regularized_TS[idx], 
    #         '-.', label="regularized", linewidth=1, color='red')

    # ax.plot(a_regularized_TS_noGap['human_system_start_time'], 
    #         a_regularized_TS_noGap[idx],
    #         '-', label="no gap", linewidth=3, color='k')

    ax.plot(a_regularized_TS_noGap['human_system_start_time'], SG,
            '-', label="SG", linewidth=3, color='dodgerblue') # , alpha=0.8

    ax.set_title(raw_dt.ID.unique()[0] + ", " + raw_dt.CropTyp.unique()[0] + ", " + raw_dt.dataset.unique()[0])
    ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize,
    ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize)
    ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # 
    ax.legend(loc="lower right");
    ax.xaxis.set_major_locator(mdates.YearLocator(1))
    ax.set_ylim(raw_dt[idx].min()-0.05, 1)
Example #2
0
def one_satellite_smoothed(raw_dt, ax, color_dict, idx="NDVI", time_step_size=10, set_negatives_to_zero=True):
    """Returns a dataframe that has replaced the missing parts of regular_TS.

    Arguments
    ---------
    raw_dt : dataframe
        A datafram of raw values from GEE. i.e. not regularized yet. F
        For a given field and a given satelltite

    ax : axis
        An axis object of Matplotlib.

    idx : string
        A string indicating vegetation index.

    time_step_size : integer
        An integer that is the regularization window size: every 10 days we want a given NDVI.

    Returns
    -------
    """
    a_df = raw_dt.copy()
    a_df.loc[a_df[idx]<0, idx] = 0

    assert (len(a_df.ID.unique()) == 1)
    assert (len(a_df.dataset.unique()) == 1)

    a_regularized_TS = nc.regularize_a_field(a_df, V_idks = idx, interval_size = time_step_size)
    # a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS.copy(), V_idx=idx)
    a_regularized_TS_noGap = nc.fill_theGap_linearLine(a_regularized_TS, V_idx=idx)

    # Smoothen by Savitzky-Golay
    SG = scipy.signal.savgol_filter(a_regularized_TS_noGap[idx].values, window_length=7, polyorder=3)

    # SG might violate the boundaries. clip them:
    SG[SG > 1 ] = 1
    SG[SG < -1 ] = -1

    ax.plot(a_regularized_TS_noGap['human_system_start_time'], SG,
            '-', label="SG", 
            linewidth=1.25, color=color_dict[a_df.dataset.unique()[0]]) # , alpha=0.8

    ax.set_title(a_df.ID.unique()[0] + ", " + a_df.CropTyp.unique()[0])
    ax.set_ylabel(idx) # , labelpad=20); # fontsize = label_FontSize,
    ax.tick_params(axis='y', which='major') #, labelsize = tick_FontSize)
    ax.tick_params(axis='x', which='major') #, labelsize = tick_FontSize) # 
    # ax.legend(loc="lower right");
    # ax.xaxis.set_major_locator(mdates.YearLocator(1))
    ax.set_ylim(-0.5, 1)
Example #3
0
counter = 0

for a_poly in ID_list:
    if (counter % 300 == 0):
        print (counter)
    curr_field = an_EE_TS[an_EE_TS[IDcolName]==a_poly].copy()
    ################################################################
    # Sort by DoY (sanitary check)
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)
    
    ################################################################
    regularized_TS = nc.regularize_a_field(a_df = curr_field, \
                                           V_idks = indeks, \
                                           interval_size = regular_window_size,\
                                           start_year=st_yr, \
                                           end_year=end_yr)
    
    regularized_TS = nc.fill_theGap_linearLine(a_regularized_TS = regularized_TS, V_idx = indeks)
    if (counter == 0):
        print ("output_df columns:",     output_df.columns)
        print ("regularized_TS.columns", regularized_TS.columns)
    
    ################################################################
    row_pointer = no_steps * counter
    
    """
    The reason for the following line is that we assume all years are 366 days!
    so, the actual thing might be smaller!
    """
L7 = pd.read_csv(raw_dir + raw_names[0], low_memory=False)
L8 = pd.read_csv(raw_dir + raw_names[1], low_memory=False)
raw_df = pd.concat([L7, L8])
raw_df["ID"] = raw_df["ID"].astype(str)
del (L7, L8)
"""
  Plots should be exact. Therefore, we need to filter by
  last survey year, toss out NASS, and we are sticking to irrigated
  fields for now.
"""
SF_data = pd.read_csv(param_dir + SF_data_name)
SF_data["ID"] = SF_data["ID"].astype(str)

if county != "Monterey2014":
    # filter by last survey date. Last 4 digits of county name!
    SF_data = nc.filter_by_lastSurvey(SF_data, year=county[-4:])
    SF_data = nc.filter_out_NASS(SF_data)  # Toss NASS
    SF_data = nc.filter_out_nonIrrigated(SF_data)  # keep only irrigated lands

    f**k = list(SF_data.ID)
    raw_df = raw_df[raw_df.ID.isin(f**k)]
    SG_df_EVI = SG_df_EVI[SG_df_EVI.ID.isin(f**k)]
    SG_df_NDVI = SG_df_NDVI[SG_df_NDVI.ID.isin(f**k)]

raw_df_EVI = raw_df.copy()
raw_df_NDVI = raw_df.copy()
del (raw_df)

raw_df_EVI.drop(["NDVI"], axis=1, inplace=True)
raw_df_NDVI.drop(["EVI"], axis=1, inplace=True)
Example #5
0
raw_df = pd.concat([L5, L7, L8])
del (L5, L7, L8)

raw_df = raw_df[raw_df.ID.isin(IDs)]

raw_df_EVI = raw_df.copy()
raw_df_NDVI = raw_df.copy()
del (raw_df)

raw_df_EVI.drop(["NDVI"], axis=1, inplace=True)
raw_df_NDVI.drop(["EVI"], axis=1, inplace=True)

raw_df_EVI = raw_df_EVI[raw_df_EVI["EVI"].notna()]
raw_df_NDVI = raw_df_NDVI[raw_df_NDVI["NDVI"].notna()]

raw_df_EVI = nc.add_human_start_time_by_system_start_time(raw_df_EVI)
raw_df_NDVI = nc.add_human_start_time_by_system_start_time(raw_df_NDVI)

########################################

SG_df_NDVI = nc.initial_clean(df=SG_df_NDVI, column_to_be_cleaned="NDVI")
SG_df_EVI = nc.initial_clean(df=SG_df_EVI, column_to_be_cleaned="EVI")

raw_df_NDVI = nc.initial_clean(df=raw_df_NDVI, column_to_be_cleaned="NDVI")
raw_df_EVI = nc.initial_clean(df=raw_df_EVI, column_to_be_cleaned="EVI")

counter = 0

for ID in IDs:
    if (counter % 100 == 0):
        print("_____________________________________")
Example #6
0
counter = 0
row_pointer = 0

for a_poly in IDs:
    if (counter % 1000 == 0):
        print(counter)
    curr_field = an_EE_TS[an_EE_TS[IDcolName] == a_poly].copy()

    ################################################################
    # Sort by DoY (sanitary check)
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)

    ################################################################

    no_Outlier_TS = nc.correct_big_jumps_1DaySeries_JFD(
        dataTMS_jumpie=curr_field, give_col=indeks, maxjump_perDay=0.018)

    output_df[row_pointer:row_pointer +
              curr_field.shape[0]] = no_Outlier_TS.values
    counter += 1
    row_pointer += curr_field.shape[0]

####################################################################################
###
###                   Write the outputs
###
####################################################################################
output_df.drop_duplicates(inplace=True)
output_df.to_csv(out_name, index=False)

end_time = time.time()
Example #7
0
########################################################################################

counter = 0

for a_poly in ID_list:
    if (counter % 300 == 0):
        print(counter)
    curr_field = an_EE_TS[an_EE_TS[IDcolName] == a_poly].copy()
    ################################################################
    # Sort by DoY (sanitary check)
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)

    ################################################################
    regularized_TS = nc.regularize_a_field(a_df = curr_field, \
                                           V_idks = indeks, \
                                           interval_size = regular_window_size)

    ################################################################
    row_pointer = no_steps * counter
    output_df[row_pointer:row_pointer + no_steps] = regularized_TS.values
    counter += 1

####################################################################################
###
###                   Write the outputs
###
####################################################################################

out_name = output_dir + "00_noJumpsRegularized_" + indeks + ".csv"
os.makedirs(output_dir, exist_ok=True)
Example #8
0
def SG_clean_SOS(raw_dt, SG_dt, idx, ax, onset_cut=0.5, offset_cut=0.5):
    """Returns A plot with of a given VI (NDVI or EVI) with SOS and EOS points.

    Arguments
    ---------
    raw_dt : dataframe
        pandas dataframe of raw observations from Google Earth Engine
    
    SG_dt  : dataframe
        pandas dataframe of smoothed version of data points.
    
    idx : str
        A string indicating vegetation index.
    
    ax : axis
       An axis object of Matplotlib.
    
    onset_cut : float
        Start Of Season threshold
    offset_cut : float
        End Of Season threshold

    Returns
    -------
    A plot a given VI (NDVI or EVI) with SOS and EOS points.
    """
    assert (len(SG_dt['ID'].unique()) == 1)

    #############################################
    ###
    ###      find SOS's and EOS's
    ###
    #############################################
    SEOS_output_columns = ['ID', idx, 'human_system_start_time', 
                           'EVI_ratio', 'SOS', 'EOS', 'season_count']

    """
     The reason I am multiplying len(a_df) by 4 is that we can have at least two
     seasons which means 2 SOS and 2 EOS. So, at least 4 rows are needed.
     and the reason for 14 is that there are 14 years from 2008 to 2021.
    """
    all_poly_and_SEOS = pd.DataFrame(data = None, 
                                     index = np.arange(4*14*len(SG_dt)), 
                                     columns = SEOS_output_columns)
    unique_years = SG_dt['human_system_start_time'].dt.year.unique()
    
    pointer_SEOS_tab = 0
    SG_dt = SG_dt[SEOS_output_columns[0:3]]
    
    """
    detect SOS and EOS in each year
    """
    yr_count = 0
    for yr in unique_years:
        curr_field_yr = SG_dt[SG_dt['human_system_start_time'].dt.year == yr].copy()

        curr_field_yr = nc.addToDF_SOS_EOS_White(pd_TS = curr_field_yr, 
                                                 VegIdx = idx, 
                                                 onset_thresh = onset_cut, 
                                                 offset_thresh = offset_cut)
        curr_field_yr = nc.Null_SOS_EOS_by_DoYDiff(pd_TS=curr_field_yr, min_season_length=40)
            
        #############################################
        ###
        ###             plot
        ###
        #############################################
        # sb.set();
        # plot SG smoothed
        # ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], label= "SG", c='k', linewidth=2);
        ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], c='k', linewidth=2,
                label= 'SG' if yr_count == 0 else "");


        # plot raw data
        ax.scatter(raw_dt['human_system_start_time'], 
                   raw_dt[idx], 
                   s=7, c='dodgerblue', label="raw" if yr_count == 0 else "");


        ###
        ###   plot SOS and EOS
        ###
        # Update the EVI/NDVI values to the smoothed version.
        #
        #  Start of the season
        #
        SOS = curr_field_yr[curr_field_yr['SOS'] != 0]
        ax.scatter(SOS['human_system_start_time'], SOS['SOS'], marker='+', s=155, c='g')
        # annotate SOS
        for ii in np.arange(0, len(SOS)):
            style = dict(size=10, color='g', rotation='vertical')
            ax.text(x = SOS.iloc[ii]['human_system_start_time'].date(), 
                    y = -0.2, 
                    s = str(SOS.iloc[ii]['human_system_start_time'].date())[5:], #
                    **style)

        #
        #  End of the season
        #
        EOS = curr_field_yr[curr_field_yr['EOS'] != 0]
        ax.scatter(EOS['human_system_start_time'], EOS['EOS'], marker='+', s=155, c='r')

        # annotate EOS
        for ii in np.arange(0, len(EOS)):
            style = dict(size=10, color='r', rotation='vertical')
            ax.text(x = EOS.iloc[ii]['human_system_start_time'].date(), 
                    y = -0.2, 
                    s = str(EOS.iloc[ii]['human_system_start_time'].date())[5:], #[6:]
                    **style)

        # Plot ratios:
        column_ratio = idx + "_" + "ratio"
        ax.plot(curr_field_yr['human_system_start_time'], 
                curr_field_yr[column_ratio], 
                c='gray', label="EVI Ratio" if yr_count == 0 else "")
        yr_count += 1

    ax.axhline(0 , color = 'r', linewidth=.5)
    ax.axhline(1 , color = 'r', linewidth=.5)

    ax.set_title(SG_dt['ID'].unique()[0]);
    ax.set(ylabel=idx)
    # ax.set_xlim([datetime.date(2007, 12, 10), datetime.date(2022, 1, 10)])
    ax.set_xlim([SG_dt.human_system_start_time.min() - timedelta(10), 
                 SG_dt.human_system_start_time.max() + timedelta(10)])

    ax.set_ylim([-0.3, 1.15])
    ax.xaxis.set_major_locator(mdates.YearLocator(1)) # every year.
    ax.legend(loc="upper left");
Example #9
0
def SG_clean_SOS_orchardinPlot(raw_dt, SG_dt, idx, ax, onset_cut=0.5, offset_cut=0.5):
    """Returns A plot with of a given VI (NDVI or EVI) with SOS and EOS points.

    Arguments
    ---------
    raw_dt : dataframe
        pandas dataframe of raw observations from Google Earth Engine
    
    SG_dt  : dataframe
        pandas dataframe of smoothed version of data points.
    
    idx : str
        A string indicating vegetation index.
    
    ax : axis
       An axis object of Matplotlib.
    
    onset_cut : float
        Start Of Season threshold
    offset_cut : float
        End Of Season threshold

    Returns
    -------
    A plot a given VI (NDVI or EVI) with SOS and EOS points.
    """
    assert (len(SG_dt['ID'].unique()) == 1)

    #############################################
    ###
    ###      find SOS's and EOS's
    ###
    #############################################
    ratio_colName = idx + "_ratio"
    SEOS_output_columns = ['ID', idx, 'human_system_start_time', 
                           ratio_colName, 'SOS', 'EOS', 'season_count']

    """
     The reason I am multiplying len(SG_dt) by 4 is that we can have at least two
     seasons which means 2 SOS and 2 EOS. So, at least 4 rows are needed.
     and the reason for 14 is that there are 14 years from 2008 to 2021.
    """
    all_poly_and_SEOS = pd.DataFrame(data = None, 
                                     index = np.arange(4*14*len(SG_dt)), 
                                     columns = SEOS_output_columns)
    unique_years = SG_dt['human_system_start_time'].dt.year.unique()
    
    pointer_SEOS_tab = 0
    SG_dt = SG_dt[SEOS_output_columns[0:3]]
    
    """
    detect SOS and EOS in each year
    """
    yr_count = 0
    for yr in unique_years:
        curr_field_yr = SG_dt[SG_dt['human_system_start_time'].dt.year == yr].copy()
        y_orchard = curr_field_yr[curr_field_yr['human_system_start_time'].dt.month >= 5]
        y_orchard = y_orchard[y_orchard['human_system_start_time'].dt.month <= 10]
        y_orchard_range = max(y_orchard[idx]) - min(y_orchard[idx])

        if y_orchard_range > 0.3:
            curr_field_yr = nc.addToDF_SOS_EOS_White(pd_TS = curr_field_yr,
                                                     VegIdx = idx, 
                                                     onset_thresh = onset_cut, 
                                                     offset_thresh = offset_cut)
            curr_field_yr = nc.Null_SOS_EOS_by_DoYDiff(pd_TS=curr_field_yr, min_season_length=40)
        else:
            VegIdx_min = curr_field_yr[idx].min()
            VegIdx_max = curr_field_yr[idx].max()
            VegRange = VegIdx_max - VegIdx_min + sys.float_info.epsilon
            curr_field_yr[ratio_colName] = (curr_field_yr[idx] - VegIdx_min) / VegRange
            curr_field_yr['SOS'] = 666
            curr_field_yr['EOS'] = 666
        #############################################
        ###
        ###             plot
        ###
        #############################################
        # sb.set();
        # plot SG smoothed
        ax.plot(SG_dt['human_system_start_time'], SG_dt[idx], c='k', linewidth=2,
                label= 'SG' if yr_count == 0 else "");

        ax.scatter(raw_dt['human_system_start_time'], raw_dt[idx], 
                   s=7, c='dodgerblue', label="raw" if yr_count == 0 else "");
        ###
        ###   plot SOS and EOS
        ###
        #
        #  SOS
        #
        SOS = curr_field_yr[curr_field_yr['SOS'] != 0]
        if len(SOS)>0: # dataframe might be empty
            if SOS.iloc[0]['SOS'] != 666:
                ax.scatter(SOS['human_system_start_time'], SOS['SOS'], marker='+', s=155, c='g', 
                          label="")
                # annotate SOS
                for ii in np.arange(0, len(SOS)):
                    style = dict(size=10, color='g', rotation='vertical')
                    ax.text(x = SOS.iloc[ii]['human_system_start_time'].date(), 
                            y = -0.1, 
                            s = str(SOS.iloc[ii]['human_system_start_time'].date())[5:], #
                            **style)
            else:
                 ax.plot(curr_field_yr['human_system_start_time'], 
                         np.ones(len(curr_field_yr['human_system_start_time']))*1, 
                         c='g', linewidth=2);
        #
        #  EOS
        #
        EOS = curr_field_yr[curr_field_yr['EOS'] != 0]
        if len(EOS)>0: # dataframe might be empty
            if EOS.iloc[0]['EOS'] != 666:
                ax.scatter(EOS['human_system_start_time'], EOS['EOS'], 
                           marker='+', s=155, c='r', 
                           label="")

                # annotate EOS
                for ii in np.arange(0, len(EOS)):
                    style = dict(size=10, color='r', rotation='vertical')
                    ax.text(x = EOS.iloc[ii]['human_system_start_time'].date(), 
                            y = -0.1, 
                            s = str(EOS.iloc[ii]['human_system_start_time'].date())[5:], #[6:]
                            **style)

        # Plot ratios:
        ax.plot(curr_field_yr['human_system_start_time'], 
                curr_field_yr[ratio_colName], 
                c='gray', label=ratio_colName if yr_count == 0 else "")
        yr_count += 1

    # ax.axhline(0 , color = 'r', linewidth=.5)
    # ax.axhline(1 , color = 'r', linewidth=.5)

    ax.set_title(SG_dt['ID'].unique()[0] + ", cut: " + str(onset_cut) + ", " + idx);
    ax.set(ylabel=idx)

    # ax.set_xlim([datetime.date(2007, 12, 10), datetime.date(2022, 1, 10)])
    ax.set_xlim([SG_dt.human_system_start_time.min() - timedelta(10), 
                 SG_dt.human_system_start_time.max() + timedelta(10)])
    
    ax.set_ylim([-0.3, 1.15])
    ax.xaxis.set_major_locator(mdates.YearLocator(1)) # every year.
    ax.legend(loc="best");
Example #10
0
nrows = no_steps * len(polygon_list)
output_df = pd.DataFrame(data=None, index=np.arange(nrows), columns=reg_cols)
########################################################################################

counter = 0

for a_poly in polygon_list:
    if (counter % 10 == 0):
        print(counter)
    curr_field = an_EE_TS[an_EE_TS['ID'] == a_poly].copy()
    curr_field.sort_values(by=['human_system_start_time'], inplace=True)
    curr_field.reset_index(drop=True, inplace=True)

    ################################################################
    curr_field = nc.fill_theGap_linearLine(a_regularized_TS=curr_field,
                                           V_idx=indeks)

    ################################################################
    row_pointer = no_steps * counter
    output_df[row_pointer:row_pointer + no_steps] = curr_field.values
    counter += 1

# nc.convert_human_system_start_time_to_systemStart_time(output_df)
####################################################################################
###
###                   Write the outputs
###
####################################################################################
out_name = output_dir + "01_Regular_filledGap_" + indeks + ".csv"
os.makedirs(output_dir, exist_ok=True)
output_df.to_csv(out_name, index=False)
Example #11
0
]
A = pd.read_csv(data_dir + file_names[0])
A = A[A['NDVI'].notna()]
dataframe_list.append(A)

A = pd.read_csv(data_dir + file_names[1])
A = A[A['NDVI'].notna()]
dataframe_list.append(A)

A = pd.read_csv(data_dir + file_names[2])
A = A[A['NDVI'].notna()]
dataframe_list.append(A)

all_data = pd.concat(dataframe_list)
all_data.reset_index(drop=True, inplace=True)
all_data = nc.add_human_start_time_by_system_start_time(all_data)

all_data["dataset"] = "Sentinel"

ID_list = list(np.sort(all_data.ID.unique()))

print("len(ID_list): " + str(len(ID_list)))
##################################################################
##################################################################
####
####  Set the plotting style
####
##################################################################
##################################################################

size = 20
Example #12
0
L8 = L8[L8[indeks].notna()]

IDs = np.sort(L5[IDcolName].unique())
L578 = pd.concat([L5, L7, L8])
del (L5, L7, L8)

########################################################
#######
#######   Choose X random fields
#######
if random_or_all == "random":
    IDs = random.sample(list(IDs), k=randCount)
    L578 = L578[L578.ID.isin(IDs)]
    L578.reset_index(drop=True, inplace=True)

L578 = nc.add_human_start_time_by_system_start_time(L578)

print("Number of unique fields is: ")
print(len(IDs))
print("__________________________________________")

#########################################################
print("Dimension of the data is: " + str(L578.shape))
print("__________________________________________")

#########################################################

L578 = nc.initial_clean(df=L578, column_to_be_cleaned=indeks)

#########################################################
Example #13
0
  fields for now.
"""
SF_data = pd.read_csv(SF_data_dir + county + ".csv")
SF_data["ID"] = SF_data["ID"].astype(str)

if county == "Monterey2014":
    SF_data['Crop2014'] = SF_data['Crop2014'].str.lower().str.replace(
        " ", "_").str.replace(",", "").str.replace("/", "_")
else:
    SF_data['CropTyp'] = SF_data['CropTyp'].str.lower().str.replace(
        " ", "_").str.replace(",", "").str.replace("/", "_")

if county != "Monterey2014":
    # filter by last survey date. Last 4 digits of county name!
    print("No. of fields in SF_data is {}.".format(len(SF_data.ID.unique())))
    SF_data = nc.filter_by_lastSurvey(SF_data, year=county[-4:])
    print("No. of fields in SF_data after survey year is {}.".format(
        len(SF_data.ID.unique())))
    SF_data = nc.filter_out_NASS(SF_data)  # Toss NASS
    print("No. of fields in SF_data after NASS is {}.".format(
        len(SF_data.ID.unique())))
    SF_data = nc.filter_out_nonIrrigated(SF_data)  # keep only irrigated lands
    print("No. of fields in SF_data after Irrigation is {}.".format(
        len(SF_data.ID.unique())))

    f**k = list(SF_data.ID)
    SG_df = SG_df[SG_df.ID.isin(f**k)]

SG_df = pd.merge(SG_df, SF_data, on=['ID'], how='left')

print("columns of SG_df right after merging is: ")