def run_comparison(var_name,input_date,interpolation_types,rep,loc_dictionary,cvar_dictionary,file_path_elev,elev_array,idx_list,phi_input=None,calc_phi=True,\
                   kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)']\
                            ,'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],\
                            'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],\
                            'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}):
    '''Execute the shuffle-split cross-validation for the given interpolation types 
     Parameters
         interpolation_types (list of str): list of interpolation types to consider
     Returns 
         interpolation_best (str): returns the selected interpolation type name 
     '''
    MAE_dict = {}
    for method in interpolation_types[var_name]:
        if method not in [
                'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS',
                'GPR', 'RF'
        ]:
            print(
                'The method %s is not currently a supported interpolation type.'
                % (method))
            sys.exit()

        else:
            if method == 'IDW2':
                MAE = idw.shuffle_split(loc_dictionary,
                                        cvar_dictionary,
                                        shapefile,
                                        2,
                                        rep,
                                        False,
                                        res=10000)
                MAE_dict[method] = MAE

            if method == 'IDW3':
                MAE = idw.shuffle_split(loc_dictionary,
                                        cvar_dictionary,
                                        shapefile,
                                        3,
                                        rep,
                                        False,
                                        res=10000)
                MAE_dict[method] = MAE

            if method == 'IDW4':
                MAE = idw.shuffle_split(loc_dictionary,
                                        cvar_dictionary,
                                        shapefile,
                                        4,
                                        rep,
                                        False,
                                        res=10000)
                MAE_dict[method] = MAE

            if method == 'IDEW2':
                MAE = idew.shuffle_split_IDEW(loc_dictionary,
                                              cvar_dictionary,
                                              shapefile,
                                              file_path_elev,
                                              elev_array,
                                              idx_list,
                                              2,
                                              rep,
                                              res=10000)
                MAE_dict[method] = MAE

            if method == 'IDEW3':
                MAE = idew.shuffle_split_IDEW(loc_dictionary,
                                              cvar_dictionary,
                                              shapefile,
                                              file_path_elev,
                                              elev_array,
                                              idx_list,
                                              3,
                                              rep,
                                              res=10000)
                MAE_dict[method] = MAE

            if method == 'IDEW4':
                MAE = idew.shuffle_split_IDEW(loc_dictionary,
                                              cvar_dictionary,
                                              shapefile,
                                              file_path_elev,
                                              elev_array,
                                              idx_list,
                                              4,
                                              rep,
                                              res=10000)
                MAE_dict[method] = MAE

            if method == 'TPS':
                MAE = tps.shuffle_split_tps(loc_dictionary,
                                            cvar_dictionary,
                                            shapefile,
                                            10,
                                            res=10000)
                MAE_dict[method] = MAE

            if method == 'RF':
                MAE = rf.shuffle_split_rf(loc_dictionary,
                                          cvar_dictionary,
                                          shapefile,
                                          file_path_elev,
                                          elev_array,
                                          idx_list,
                                          10,
                                          res=10000)
                MAE_dict[method] = MAE

            if method == 'GPR':
                MAE = gpr.shuffle_split_gpr(loc_dictionary,
                                            cvar_dictionary,
                                            shapefile,
                                            file_path_elev,
                                            elev_array,
                                            idx_list,
                                            kernels[var_name],
                                            10,
                                            res=10000)
                MAE_dict[method] = MAE

    best_method = min(MAE_dict, key=MAE_dict.get)
    print('The best method for %s is: %s' % (var_name, best_method))
    if method == 'IDW2':
        choix_surf, maxmin = idw.IDW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            2,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDW3':
        choix_surf, maxmin = idw.IDW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            3,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDW4':
        choix_surf, maxmin = idw.IDW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            4,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDEW2':
        choix_surf, maxmin, elev_array = idew.IDEW(
            loc_dictionary,
            cvar_dictionary,
            input_date,
            'Variable',
            shapefile,
            False,
            file_path_elev,
            idx_list,
            2,
            False,
            res=10000)  #Expand_area is not supported yet

    if method == 'IDEW3':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary,
                                                   input_date,
                                                   'Variable',
                                                   shapefile,
                                                   False,
                                                   file_path_elev,
                                                   idx_list,
                                                   3,
                                                   False,
                                                   res=10000)

    if method == 'IDEW4':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary,
                                                   input_date,
                                                   'Variable',
                                                   shapefile,
                                                   False,
                                                   file_path_elev,
                                                   idx_list,
                                                   4,
                                                   False,
                                                   res=10000)

    if method == 'TPS':
        choix_surf, maxmin = tps.TPS(loc_dictionary,
                                     cvar_dictionary,
                                     input_date,
                                     'Variable',
                                     shapefile,
                                     False,
                                     phi_input,
                                     False,
                                     calc_phi,
                                     res=10000)

    if method == 'RF':
        choix_surf, maxmin = rf.random_forest_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\
                                                           file_path_elev,idx_list,False,res=10000)

    if method == 'GPR':
        choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\
                    file_path_elev,idx_list,False,kernels[var_name],0,False,False,res=10000)

    return best_method, choix_surf, maxmin
예제 #2
0
                if var == 'start':

                    days_dict, latlon_station = fwi.start_date_calendar_csv(
                        file_path_daily, year
                    )  #Get two things: start date for each station and the lat lon of the station

                elif var == 'end':
                    days_dict, latlon_station = fwi.end_date_calendar_csv(
                        file_path_daily, year, 'sep')

                else:
                    print('That is not a correct variable!')

                if interpolator == 'IDW2':
                    grd_size, maxmin = idw.IDW(
                        latlon_station, days_dict, year,
                        'End Date (# Days since September 1)', shapefile,
                        False, 2, True)

                    try:
                        inBoreal = GD.is_station_in_boreal(
                            latlon_station, days_dict, boreal_shapefile)
                        filtered_dict = {
                            k: v
                            for k, v in days_dict.items() if k in inBoreal
                        }
                        num_stations = len(filtered_dict.keys(
                        ))  #Number clusters= # stations / 3, /5, /10
                        cluster_num1 = int(round(num_stations / 3))
                        cluster_num2 = int(round(num_stations / 5))
                        cluster_num3 = int(round(num_stations / 10))
                        cluster_num,MAE,stdev_stations = idw.select_block_size_IDW(10,'clusters',latlon_station,days_dict,grd_size,shapefile,\
def execute_sequential_calc(file_path_hourly,file_path_daily,file_path_daily_csv,loc_dictionary_hourly, loc_dictionary_daily, date_dictionary,\
                            year,interpolation_types,rep,file_path_elev,idx_list,save_path,shapefile,shapefile2,phi_input=None,calc_phi=True,\
                   kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)']\
                            ,'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],\
                            'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],\
                            'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}):
    '''Execute the DC, DMC, FFMC seq calculations  
     Parameters
         interpolation_types (list of str): list of interpolation types to consider
     Returns 
         interpolation_best (str): returns the selected interpolation type name 
     '''
    #Fire season start and end dates
    start = time.time()
    start_dict, latlon_station = fwi.start_date_calendar_csv(
        file_path_daily_csv, year
    )  #Get two things: start date for each station and the lat lon of the station
    end_dict, latlon_station = fwi.end_date_calendar_csv(
        file_path_daily_csv, year, 'oct')  #start searching from Oct 1

    daysurface, maxmin = idw.IDW(latlon_station,
                                 start_dict,
                                 year,
                                 '# Days Since March 1',
                                 shapefile,
                                 False,
                                 3,
                                 False,
                                 res=10000)  #Interpolate the start date, IDW3
    endsurface, maxmin = idw.IDW(latlon_station,
                                 end_dict,
                                 year,
                                 '# Days Since Oct 1',
                                 shapefile,
                                 False,
                                 3,
                                 False,
                                 res=10000)  #Interpolate the end date

    end_dc_vals = np.zeros(endsurface.shape)  #For now, no overwinter procedure
    end = time.time()
    time_elapsed = (end - start) / 60
    print('Finished getting season start & end dates, it took %s minutes' %
          (time_elapsed / 60))

    #Initialize the input elev_array (which is stable)
    placeholder_surf, maxmin, elev_array = idew.IDEW(loc_dictionary_hourly,end_dict,'placeholder','Variable',shapefile,False,\
                                                         file_path_elev,idx_list,2,True,res=10000)

    #Get the dates in the fire season, overall, the surfaces will take care of masking
    sdate = pd.to_datetime(year + '-03-01').date(
    )  #Get the start date to start (if its too early everything will be masked out so can put any day before april)
    edate = pd.to_datetime(
        year + '-12-31').date()  #End date, for right now it's Dec 31
    #dates = list(pd.date_range(sdate,edate-timedelta(days=1),freq='d')) #Get the dates for all the potential days in the season
    dates = list(pd.date_range(sdate, edate, freq='d'))
    dc_list = []
    dmc_list = []
    ffmc_list = []
    isi_list = []
    bui_list = []
    fwi_list = []
    count = 0
    for input_date in dates:
        print(input_date)
        gc.collect()
        #Get the dictionary
        start = time.time()
        temp = GD.get_noon_temp(str(input_date)[:-3], file_path_hourly)
        rh = GD.get_relative_humidity(str(input_date)[:-3], file_path_hourly)
        wind = GD.get_wind_speed(str(input_date)[:-3], file_path_hourly)
        pcp = GD.get_pcp(
            str(input_date)[0:10], file_path_daily, date_dictionary)

        end = time.time()
        time_elapsed = end - start
        print('Finished getting weather dictionaries, it took %s seconds' %
              (time_elapsed))

        start = time.time()

        best_interp_temp, choice_surf_temp, maxmin = run_comparison(
            'temp', input_date, interpolation_types, rep,
            loc_dictionary_hourly, temp, file_path_elev, elev_array, idx_list)

        best_interp_rh, choice_surf_rh, maxmin = run_comparison(
            'rh', input_date, interpolation_types, rep, loc_dictionary_hourly,
            rh, file_path_elev, elev_array, idx_list)
        best_interp_wind, choice_surf_wind, maxmin = run_comparison(
            'wind', input_date, interpolation_types, rep,
            loc_dictionary_hourly, wind, file_path_elev, elev_array, idx_list)
        best_interp_pcp, choice_surf_pcp, maxmin = run_comparison(
            'pcp', input_date, interpolation_types, rep, loc_dictionary_daily,
            pcp, file_path_elev, elev_array, idx_list)

        end = time.time()
        time_elapsed = end - start
        print('Finished getting best methods & surfaces, it took %s seconds' %
              (time_elapsed))

        #Get date index information
        year = str(input_date)[0:4]
        index = dates.index(input_date)
        dat = str(input_date)
        day_index = fwi.get_date_index(year, dat, 3)
        eDay_index = fwi.get_date_index(year, dat, 10)

        start = time.time()

        mask1 = fwi.make_start_date_mask(day_index, daysurface)
        if eDay_index < 0:
            endMask = np.ones(
                endsurface.shape)  #in the case that the index is before Oct 1
        else:
            endMask = fwi.make_end_date_mask(eDay_index, endsurface)

        if count > 0:
            dc_array = dc_list[
                count -
                1]  #the last one added will be yesterday's val, but there's a lag bc none was added when count was0, so just use count-1
            dmc_array = dmc_list[count - 1]
            ffmc_array = ffmc_list[count - 1]
            index = count - 1
            dc = fwi.DC(input_date,choice_surf_pcp,choice_surf_rh,choice_surf_temp,choice_surf_wind,maxmin,\
                    dc_array,index,False,shapefile,mask1,endMask,None,False)
            dmc = fwi.DMC(input_date,choice_surf_pcp,choice_surf_rh,choice_surf_temp,choice_surf_wind,maxmin,\
                    dmc_array,index,False,shapefile,mask1,endMask)
            ffmc = fwi.FFMC(input_date,choice_surf_pcp,choice_surf_rh,choice_surf_temp,choice_surf_wind,maxmin,\
                    ffmc_array,index,False,shapefile,mask1,endMask)

            isi = fwi.ISI(ffmc,choice_surf_wind,maxmin,\
                    False,shapefile,mask1,endMask)
            bui = fwi.BUI(dmc,dc,maxmin,\
                    False,shapefile,mask1,endMask)
            fwi_val = fwi.FWI(isi,bui,maxmin,\
                    False,shapefile,mask1,endMask)

            dc_list.append(dc)
            dmc_list.append(dmc)
            ffmc_list.append(ffmc)
            isi_list.append(isi)
            bui_list.append(bui)
            fwi_list.append(fwi_val)

        else:
            rain_shape = choice_surf_pcp.shape
            dc_initialize = np.zeros(
                rain_shape
            ) + 15  #merge with the other overwinter array once it's calculated
            dc_yesterday1 = dc_initialize * mask1
            dc_list.append(dc_yesterday1)  #placeholder

            rain_shape = choice_surf_pcp.shape
            dmc_initialize = np.zeros(
                rain_shape
            ) + 6  #merge with the other overwinter array once it's calculated
            dmc_yesterday1 = dmc_initialize * mask1
            dmc_list.append(dmc_yesterday1)  #placeholder

            rain_shape = choice_surf_pcp.shape
            ffmc_initialize = np.zeros(
                rain_shape
            ) + 85  #merge with the other overwinter array once it's calculated
            ffmc_yesterday1 = ffmc_initialize * mask1
            ffmc_list.append(ffmc_yesterday1)  #placeholder

        end = time.time()
        time_elapsed = end - start
        print('Finished getting DC for date in stream, it took %s seconds' %
              (time_elapsed))

        count += 1

    #prep to serialize
    dc_list = [x.tolist() for x in dc_list]

    dmc_list = [x.tolist() for x in dmc_list]

    ffmc_list = [x.tolist() for x in ffmc_list]

    isi_list = [x.tolist() for x in isi_list]

    bui_list = [x.tolist() for x in bui_list]

    fwi_list = [x.tolist() for x in fwi_list]

    with open(save_path + year + '_DC_auto_select.json', 'w') as fp:
        json.dump(dc_list, fp)

    with open(save_path + year + '_DC_auto_select.json', 'r') as fp:
        dc_list = json.load(fp)

    with open(save_path + year + '_DMC_auto_select.json', 'w') as fp:
        json.dump(dmc_list, fp)

    with open(save_path + year + '_FFMC_auto_select.json', 'w') as fp:
        json.dump(ffmc_list, fp)

    with open(save_path + year + '_ISI_auto_select.json', 'w') as fp:
        json.dump(isi_list, fp)

    with open(save_path + year + '_BUI_auto_select.json', 'w') as fp:
        json.dump(bui_list, fp)

    with open(save_path + year + '_FWI_auto_select.json', 'w') as fp:
        json.dump(fwi_list, fp)

    dc_list = [np.array(x) for x in dc_list]  #convert to np array for plotting

    fwi.plot_june(dc_list, maxmin, year, 'DC', shapefile, shapefile2)

    return dc_list
def run_comparison(
    var_name,
    input_date,
    interpolation_types,
    rep,
    loc_dictionary,
    cvar_dictionary,
    file_path_elev,
    elev_array,
    idx_list,
    phi_input=None,
    calc_phi=True,
    kernels={
        'temp':
        ['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'],
        'rh':
        ['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],
        'pcp':
        ['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],
        'wind':
        ['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']
    }):
    '''Execute the shuffle-split cross-validation for the given interpolation types

   Parameters
   ----------
        var_name : string
             name of weather variable you are interpolating
        input_date : string
             date of weather data (day of fire season) 
        interpolation_types : list
             list of interpolation types to consider
        rep : int
             number of replications to run
        loc_dictionary : dictionary
             dictionary of station locations
        cvar_dictionary : dictionary
             dictionary containing the weather data for each station available
        file_path_elev : string
             path to the elevation lookup file
        elev_array : ndarray
             array for elevation, create using IDEW interpolation (this is a trick to speed up code)
        idx_list : int
             position of the elevation column in the lookup file
        phi_input : float
             smoothing parameter for the thin plate spline, if 0 no smoothing, default is None (it is calculated)
        calc_phi : bool
             whether to calculate phi in the function, if True, phi can = None
        kernels : dictionary
             the kernels for each weather variable for gaussian process regression
             
        
   Returns
   ----------
        string
            - returns the selected interpolation type name 
    '''
    MAE_dict = {}
    for method in interpolation_types:
        if method not in [
                'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS',
                'GPR', 'RF'
        ]:
            print(
                'The method %s is not currently a supported interpolation type.'
                % (method))
            sys.exit()

        else:
            if method == 'IDW2':
                MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary,
                                        shapefile, 2, rep, False)
                MAE_dict[method] = MAE

            if method == 'IDW3':
                MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary,
                                        shapefile, 3, rep, False)
                MAE_dict[method] = MAE

            if method == 'IDW4':
                MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary,
                                        shapefile, 4, rep, False)
                MAE_dict[method] = MAE

            if method == 'IDEW2':
                MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary,
                                              shapefile, file_path_elev,
                                              elev_array, idx_list, 2, rep)
                MAE_dict[method] = MAE

            if method == 'IDEW3':
                MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary,
                                              shapefile, file_path_elev,
                                              elev_array, idx_list, 3, rep)
                MAE_dict[method] = MAE

            if method == 'IDEW4':
                MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary,
                                              shapefile, file_path_elev,
                                              elev_array, idx_list, 4, rep)
                MAE_dict[method] = MAE

            if method == 'TPS':
                MAE = tps.shuffle_split_tps(loc_dictionary, cvar_dictionary,
                                            shapefile, 10)
                MAE_dict[method] = MAE

            if method == 'RF':
                MAE = rf.shuffle_split_rf(loc_dictionary, cvar_dictionary,
                                          shapefile, file_path_elev,
                                          elev_array, idx_list, 10)
                MAE_dict[method] = MAE

            if method == 'GPR':
                MAE = gpr.shuffle_split_gpr(loc_dictionary, cvar_dictionary,
                                            shapefile, file_path_elev,
                                            elev_array, idx_list,
                                            kernels[var_name], 10)
                MAE_dict[method] = MAE

    best_method = min(MAE_dict, key=MAE_dict.get)
    print('The best method for %s is: %s' % (var_name, best_method))
    if method == 'IDW2':
        choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     2,
                                     False)  # Expand_area is not supported yet

    if method == 'IDW3':
        choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     3,
                                     False)  # Expand_area is not supported yet

    if method == 'IDW4':
        choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     4,
                                     False)  # Expand_area is not supported yet

    if method == 'IDEW2':
        choix_surf, maxmin, elev_array = idew.IDEW(
            loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile,
            False, file_path_elev, idx_list,
            2)  # Expand_area is not supported yet

    if method == 'IDEW3':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary, input_date,
                                                   'Variable', shapefile,
                                                   False, file_path_elev,
                                                   idx_list, 3)

    if method == 'IDEW4':
        choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary,
                                                   cvar_dictionary, input_date,
                                                   'Variable', shapefile,
                                                   False, file_path_elev,
                                                   idx_list, 4)

    if method == 'TPS':
        choix_surf, maxmin = tps.TPS(loc_dictionary, cvar_dictionary,
                                     input_date, 'Variable', shapefile, False,
                                     phi_input, False, calc_phi)

    if method == 'RF':
        choix_surf, maxmin = rf.random_forest_interpolator(
            loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile,
            False, file_path_elev, idx_list, False)

    if method == 'GPR':
        choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary,
                                                  cvar_dictionary, input_date,
                                                  'Variable', shapefile, False,
                                                  file_path_elev, idx_list,
                                                  False, kernels[var_name],
                                                  None, None, False, False)

    return best_method, choix_surf, maxmin
def execute_sequential_calc(
    file_path_hourly,
    file_path_daily,
    file_path_daily_csv,
    loc_dictionary_hourly,
    loc_dictionary_daily,
    date_dictionary,
    year,
    interpolation_types,
    rep,
    file_path_elev,
    idx_list,
    save_path,
    shapefile,
    shapefile2,
    phi_input=None,
    calc_phi=True,
    kernels={
        'temp':
        ['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'],
        'rh':
        ['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],
        'pcp':
        ['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],
        'wind':
        ['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']
    }):
    '''Execute the DC, DMC, FFMC sequential calculations

    Parameters
    ----------
        file_path_hourly : string
             path to hourly feather files
        file_path_daily : string
             path to daily feather files
        file_path_daily_csv : string
             path to daily csv files 
        var_name : string
             name of weather variable you are interpolating
        input_date : string
             date of weather data (day of fire season)
        loc_dictionary_daily : dictionary
             dictionary of station locations (daily) 
        loc_dictionary_hourly : dictionary
             dictionary of station locations (hourly)
        year : string
             year to execute sequential calculations for 
        interpolation_types : list
             list of interpolation types to consider
        rep : int
             number of replications to run
        file_path_elev : string
             path to the elevation lookup file
        idx_list : int
             position of the elevation column in the lookup file
        save_path : string
             where in computer to save the output file to
        shapefile : string
             path to study area shapefile (ON + QC)
        shapefile2 : string
             path to boreal shapefile 
        phi_input : float
             smoothing parameter for the thin plate spline, if 0 no smoothing, default is None (it is calculated)
        calc_phi : bool
             whether to calculate phi in the function, if True, phi can = None
        kernels : dictionary
             the kernels for each weather variable for gaussian process regression
             
    Returns
    ----------
        list
            - list of array of FWI codes for each day in fire season 
    '''
    # Fire season start and end dates
    start = time.time()
    # Get two things: start date for each station and the lat lon of the station
    start_dict, latlon_station = fwi.start_date_calendar_csv(
        file_path_daily_csv, year)
    end_dict, latlon_station = fwi.end_date_calendar_csv(
        file_path_daily_csv, year, 'oct')  # start searching from Oct 1

    daysurface, maxmin = idw.IDW(latlon_station, start_dict, year,
                                 '# Days Since March 1', shapefile, False, 3,
                                 False)  # Interpolate the start date, IDW3
    endsurface, maxmin = idw.IDW(latlon_station, end_dict, year,
                                 '# Days Since Oct 1', shapefile, False, 3,
                                 False)  # Interpolate the end date

    # For now, no overwinter procedure
    end_dc_vals = np.zeros(endsurface.shape)
    end = time.time()
    time_elapsed = (end - start) / 60
    print('Finished getting season start & end dates, it took %s minutes' %
          (time_elapsed / 60))

    # Initialize the input elev_array (which is stable)
    placeholder_surf, maxmin, elev_array = idew.IDEW(loc_dictionary_hourly,
                                                     end_dict, 'placeholder',
                                                     'Variable', shapefile,
                                                     False, file_path_elev,
                                                     idx_list, 2)

    # Get the dates in the fire season, overall, the surfaces will take care of masking
    # Get the start date to start (if its too early everything will be masked out so can put any day before april)
    sdate = pd.to_datetime(year + '-03-01').date()
    # End date, for right now it's Dec 31
    edate = pd.to_datetime(year + '-12-31').date()
    # Get the dates for all the potential days in the season
    dates = list(pd.date_range(sdate, edate - timedelta(days=1), freq='d'))
    dc_list = []
    count = 0
    for input_date in dates:
        print(input_date)
        gc.collect()
        # Get the dictionary
        start = time.time()
        temp = GD.get_noon_temp(str(input_date)[:-3], file_path_hourly)
        rh = GD.get_relative_humidity(str(input_date)[:-3], file_path_hourly)
        wind = GD.get_wind_speed(str(input_date)[:-3], file_path_hourly)
        pcp = GD.get_pcp(
            str(input_date)[0:10], file_path_daily, date_dictionary)

        end = time.time()
        time_elapsed = end - start
        print('Finished getting weather dictionaries, it took %s seconds' %
              (time_elapsed))

        start = time.time()

        best_interp_temp, choice_surf_temp, maxmin = run_comparison(
            'temp', input_date, interpolation_types, rep,
            loc_dictionary_hourly, temp, file_path_elev, elev_array, idx_list)

        best_interp_rh, choice_surf_rh, maxmin = run_comparison(
            'rh', input_date, interpolation_types, rep, loc_dictionary_hourly,
            rh, file_path_elev, elev_array, idx_list)
        best_interp_wind, choice_surf_wind, maxmin = run_comparison(
            'wind', input_date, interpolation_types, rep,
            loc_dictionary_hourly, wind, file_path_elev, elev_array, idx_list)
        best_interp_pcp, choice_surf_pcp, maxmin = run_comparison(
            'pcp', input_date, interpolation_types, rep, loc_dictionary_daily,
            pcp, file_path_elev, elev_array, idx_list)

        end = time.time()
        time_elapsed = end - start
        print('Finished getting best methods & surfaces, it took %s seconds' %
              (time_elapsed))

        # Get date index information
        year = str(input_date)[0:4]
        index = dates.index(input_date)
        dat = str(input_date)
        day_index = fwi.get_date_index(year, dat, 3)
        eDay_index = fwi.get_date_index(year, dat, 10)

        start = time.time()

        mask1 = fwi.make_start_date_mask(day_index, daysurface)
        if eDay_index < 0:
            # in the case that the index is before Oct 1
            endMask = np.ones(endsurface.shape)
        else:
            endMask = fwi.make_end_date_mask(eDay_index, endsurface)

        if count > 0:
            # the last one added will be yesterday's val, but there's a lag bc none was added when count was0, so just use count-1
            dc_array = dc_list[count - 1]
            index = count - 1
            dc = fwi.DC(input_date, choice_surf_pcp, choice_surf_rh,
                        choice_surf_temp, choice_surf_wind, maxmin, dc_array,
                        index, False, shapefile, mask1, endMask, None, False)
            dc_list.append(dc)

        else:
            rain_shape = choice_surf_pcp.shape
            # merge with the other overwinter array once it's calculated
            dc_initialize = np.zeros(rain_shape) + 15
            dc_yesterday1 = dc_initialize * mask1
            dc_list.append(dc_yesterday1)  # placeholder
        end = time.time()
        time_elapsed = end - start
        print('Finished getting DC for date in stream, it took %s seconds' %
              (time_elapsed))

        count += 1

    # prep to serialize
    dc_list = [x.tolist() for x in dc_list]

    with open(save_path + year + '_DC_auto_select.json', 'w') as fp:
        json.dump(dc_list, fp)

    with open(save_path + year + '_DC_auto_select.json', 'r') as fp:
        dc_list = json.load(fp)

    # convert to np array for plotting
    dc_list = [np.array(x) for x in dc_list]

    fwi.plot_july(dc_list, maxmin, year, 'DC', shapefile, shapefile2)
    fwi.plot_june(dc_list, maxmin, year, 'DC', shapefile, shapefile2)

    return dc_list
def stack_and_average(year1, year2, file_path_daily, file_path_hourly,
                      shapefile, file_path_elev, idx_list, method):
    '''Get the fire season duration for every year in between the two input years
    and average them. Output the average array.

    Parameters
    ----------

    year1 : int
        first year taken into account
    year2 : int
        last year taken into account
    file_path_daily : string
        path to the daily weather csv files from Environment & Climate Change Canada
    file_path_hourly : string
        path to the hourly feather files
    shapefile : string
        path to the study area shapefile
    file_path_elev : string
        path to the elevation lookup file
    idx_list : list
        column index of elevation information in the lookup file
    method : string
        type of interpolation to use to create the yearly arrays, one of: 'IDW2', 'IDW3', 'IDW4', 'TPSS', 'RF'

    Returns
    ----------
    ndarray
        - average of each pixel of all the years considered in array format 
    '''
    list_of_arrays = []
    for year in range(int(year1), int(year2) + 1):
        print('Processing...' + str(year))
        days_dict, latlon_station = fwi.start_date_calendar_csv(
            file_path_daily, str(year))
        end_dict, latlon_station2 = fwi.end_date_calendar_csv(
            file_path_daily, str(year), 'oct')
        if year >= 2020:
            hourly_dict, latlon_stationH = fwi.start_date_add_hourly(
                file_path_hourly, str(year))
            hourly_end, latlon_stationE = fwi.end_date_add_hourly(
                file_path_hourly, str(year))

            days_dict = combine_stations(days_dict, hourly_dict)
            latlon_station = combine_stations(latlon_station, latlon_stationH)

            end_dict = combine_stations(end_dict, hourly_end)
            latlon_station2 = combine_stations(latlon_station2,
                                               latlon_stationE)

        if method == 'IDW2':

            start_surface, maxmin = idw.IDW(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, 2, True)
            end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, 2, True)

        elif method == 'IDW3':

            start_surface, maxmin = idw.IDW(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, 3, True)
            end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, 3, True)

        elif method == 'IDW4':

            start_surface, maxmin = idw.IDW(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, 4, True)
            end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, 4, True)

        elif method == 'TPSS':
            num_stationsS = int(len(days_dict.keys()))
            phi_inputS = int(num_stations) - (math.sqrt(2 * num_stations))
            num_stationsE = int(len(end_dict.keys()))
            phi_inputE = int(num_stations) - (math.sqrt(2 * num_stations))
            start_surface, maxmin = tps.TPS(latlon_station, days_dict,
                                            str(year), 'Start', shapefile,
                                            False, phi_inputS, True, True)
            end_surface, maxmin = tps.TPS(latlon_station2, end_dict, str(year),
                                          'End', shapefile, False, phi_inputE,
                                          True, True)

        elif method == 'RF':
            start_surface, maxmin = rf.random_forest_interpolator(
                latlon_station, days_dict, str(year), 'Start', shapefile,
                False, file_path_elev, idx_list, True)
            end_surface, maxmin = rf.random_forest_interpolator(
                latlon_station2, end_dict, str(year), 'End', shapefile, False,
                file_path_elev, idx_list, True)

        else:
            print(
                'Either that method does not exist or there is no support for it. You can use IDW2-4, TPSS, or RF'
            )

        dur_array = calc_season_duration(start_surface, end_surface, year)
        list_of_arrays.append(dur_array)
    voxels = np.dstack(list_of_arrays)  #stack arrays based on depth
    averaged_voxels = np.array([[np.mean(x) for x in group]
                                for group in voxels])
    return averaged_voxels
예제 #7
0
yi = ds_gem.gridlat_0
Ei = ds_gem.HGT_P0_L1_GST0

# Initialize dataArray
da_list = []

test_plots = False

# Loop through time
for t in ds_pts.Time_UTC:  #.sel(Time_UTC=slice('2014-11-28T00:00:00', '2014-11-29T01:00:00')):
    # Get current time
    cval = ds_pts.sel(Time_UTC=t)
    print(t.values)

    # Set up IDW
    w = idw.IDW(x, y, xi, yi, mz=E, GridZ=Ei, power=2)

    # Check we have some observations
    if cval.notnull().sum() == 0:
        print(t)
        raise ValueError('No stations with data on this time step found')

    # De-trend (wrt Elevation)
    cval_grid = w.detrendedIDW(cval.values, 0, zeros=None)
    cval_grid = cval_grid.where(cval_grid >= 0).fillna(0)
    cval_grid = cval_grid.where(Ei.notnull())  # Replace original missing cells

    # Add time stamp
    cval_grid['Time_UTC'] = t

    # Store interpolated grid