def mjo_tc_freq_int(ibtracs_file, analysis_file, trno): ib_file = ibtracs_file ib_data = np.genfromtxt(ib_file, dtype=float, skip_header=1) ib_lon = ib_data[:, 7] ib_lat = ib_data[:, 8] ib_mslp = ib_data[:, 9] ib_wind = ib_data[:, 10] ib_datelist = pl.get_dates(ib_data) an_file = analysis_file an_data = np.genfromtxt(an_file, dtype=float, skip_header=1) an_lon = an_data[:, 7] an_lat = an_data[:, 8] an_mslp = an_data[:, 9] an_wind = an_data[:, 10] an_datelist = pl.get_dates(an_data) #want to get the tracks that started in a certain MJO phase, but based on the whole lifecycle (i.e. the analysis) rather than just TC stages first_date = datetime.datetime.strptime(str(an_datelist[0]), "%Y%m%d%H") #print first_date.strftime("%m-%d") #then find the index of MJO dates at which the date is the first date of the analysis track if not first_date.strftime( "%m-%d") == '02-29': #leap years missing in the MJO data? z = MJOdates.index(first_date.strftime("%Y-%m-%d")) #then find out the MJO amplitude on this date (i.e. at the index of this date) amp = MJOamp[z] #print "MJO amp", amp #if the MJO amplitude is less than 1, don't include this foreacst #we only want to compute the errors for forecasts started during this MJO phase / phase pair, when the MJO amplitude is >1 #if amp < 1.0: #continue #if the MJO amplitude was >1 on this day, add 1 to the number of storms included for this MJO phase pair #and get the intensity data from the ibtracs track, to plot the range of max intensities of the storms in this MJO phase pair if amp >= 1.0: number_of_storms.append(1) storm_max_winds.append(np.nanmax(an_wind)) storm_min_mslp.append(np.nanmin(an_mslp)) print trno
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len): """This function calculates the statistics for all the forecasts of one storm""" global all_storms_speed_err global all_storms_speed_bias if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track storm_err, storm_bias = (np.zeros((NT, array_len)) for i in range(2)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm storm_err_sum, storm_err_wgt, storm_bias_sum, storm_bias_wgt = ( np.zeros(array_len) for i in range(4)) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_datelist = pl.get_dates(fcst_data) """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero(np.in1d(fcst_datelist, obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = ( np.ma.masked_all((array_len, 2)) for i in range(3)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time print "indices_obs: ", indices_obs for i, z in zip(indices_obs, range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i] matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i] #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time print "indices_fcst: ", indices_fcst print "indices_fcst[0:array_len-1]: ", indices_fcst[ 0:array_len - 1] for i, z in zip(indices_fcst[0:array_len - 1], range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 1] = fcst_datelist[i] matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i] #calculate the translation (propagation) speed along the whole forecast track, and along the whole observed track obs_speed = ts.prop_speed_vals(matched_obs_lon_lat[:, 0], matched_obs_lon_lat[:, 1]) fcst_speed = ts.prop_speed_vals(matched_fcst_lon_lat[:, 0], matched_fcst_lon_lat[:, 1]) print obs_speed print fcst_speed err, bias = (np.zeros(array_len) for i in range(2)) for lt in range(array_len - 1): #trerr[lt]=ts.trerr(matched_obs_lon_lat[lt,:],matched_fcst_lon_lat[lt,:]) err[lt] = ts.prop_speed_abs_err( obs_speed[lt], fcst_speed[lt]) bias[lt] = ts.prop_speed_bias(obs_speed[lt], fcst_speed[lt]) #add the errors for this forecast track, to the arrays holding all the errors for this storm #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation #this is because some forecasts are shorter than others, and we want to divide by the correct sample size for lt in range(array_len): if not np.isnan(err[lt]): #storm_err[a,lt] = err[lt] #storm_err_sum[lt] += err[lt] #storm_err_wgt[lt] += 1 #all_storms_err_sum[lt] += err[lt] all_storms_wgt[lt] += 1 #if not np.isnan(bias[lt]): #storm_bias[a,lt] = bias[lt] #storm_bias_sum[lt] += bias[lt] #storm_bias_wgt[lt] += 1 #all_storms_bias_sum[lt] += bias[lt] #all_storms_bias_wgt[lt] += 1 if np.all(all_storms_speed_err == 0): for lt in range(array_len): all_storms_speed_err[lt] = err[lt] all_storms_speed_bias[lt] = bias[lt] else: all_storms_speed_err = np.vstack( [all_storms_speed_err, err]) all_storms_speed_bias = np.vstack( [all_storms_speed_bias, bias])
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len,e): global all_storms_trerr_ib global all_storms_mslp_bias_ib global all_storms_wind_bias_ib global all_storms_trerr_an global all_storms_mslp_bias_an global all_storms_wind_bias_an #print ibtracs_file #print analysis_file if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track #storm_trerr, storm_mslp_bias, storm_wind_bias =(np.zeros((NT,array_len)) for i in range(3)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm #storm_trerr_sum, storm_trerr_wgt, storm_mslp_bias_sum, storm_mslp_bias_wgt, storm_wind_bias_sum, storm_wind_bias_wgt = (np.zeros(array_len) for i in range(6)) #Get the date, lon, lat and vorticity data for the observed track obs_data=np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon=obs_data[:,7] obs_lat=obs_data[:,8] obs_mslp=obs_data[:,9] obs_wind=obs_data[:,10] obs_datelist = pl.get_dates(obs_data) #print obs_lon #print obs_lat #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print "NT: ", NT for ff,a in zip(nwp_files_list, range(NT)): #print ff #print ff #get the forecast date, lat, lon and vorticity fcst_data=np.genfromtxt(ff,dtype=float,skip_header=1,usecols=np.arange(0,11)) #print fcst_data[np.where(fcst_data[:,0]==e),:] fcst_lon=fcst_data[np.where(fcst_data[:,0] == e),7][0] fcst_lat=fcst_data[np.where(fcst_data[:,0] == e),8][0] fcst_mslp=fcst_data[np.where(fcst_data[:,0] == e),9][0] fcst_wind=fcst_data[np.where(fcst_data[:,0] == e),10][0] fcst_datelist = pl.get_dates(fcst_data[np.where(fcst_data[:,0] == e),:][0]) #print fcst_lon #print fcst_lat #print fcst_datelist if len(fcst_datelist) == 0: continue else: #check whether first date of this forecast file has an MJO amplitude > 1 (do we want to use it for this MJO phase?) #first get the forecast data date into datetime format to compare to the MJO dates first_fcst_date = datetime.datetime.strptime(str(fcst_datelist[0]), "%Y%m%d%H") #print first_fcst_date.strftime("%m-%d") #then find the index of MJO dates at which the date is the first date of the forecast if first_fcst_date.strftime("%m-%d") == '02-29': #leap years missing in the MJO data? continue else: z = MJOdates.index(first_fcst_date.strftime("%Y-%m-%d")) #then find out the MJO amplitude on this date (i.e. at the index of this date) amp = MJOamp[z] #print "MJO amp", amp #if the MJO amplitude is less than 1, don't include this foreacst #we only want to compute the errors for forecasts started during this MJO phase / phase pair, when the MJO amplitude is >1 #if amp < 1.0: #print "MJO < 1" #continue #if the MJO amplitude was >1 on this day, include this forecast in the stats calculations if (MJOphase[z] == MJO1 or MJOphase[z] == MJO2) and MJOamp[z] >= 1.0: """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero(np.in1d(fcst_datelist,obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = (np.ma.masked_all((array_len,2)) for i in range(3)) matched_obs_mslp, matched_fcst_mslp, matched_obs_wind, matched_fcst_wind = (np.ma.masked_all((array_len, 1)) for i in range(4)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time for i,z in zip(indices_obs,range(array_len-lt_to_skip)): matched_data_dates[z+lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z+lt_to_skip,0] = obs_lon[i] matched_obs_lon_lat[z+lt_to_skip,1] = obs_lat[i] matched_obs_mslp[z+lt_to_skip] = obs_mslp[i] matched_obs_wind[z + lt_to_skip] = obs_wind[i] #print matched_data_dates #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time for i,z in zip(indices_fcst[0:array_len-1], range(array_len-lt_to_skip)): matched_data_dates[z+lt_to_skip,1] = fcst_datelist[i] matched_fcst_lon_lat[z+lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z+lt_to_skip, 1] = fcst_lat[i] matched_fcst_mslp[z+lt_to_skip] = fcst_mslp[i] matched_fcst_wind[z + lt_to_skip] = fcst_wind[i] #print matched_data_dates #calculate the track error (great circle distance, in km) for this forecast, at each lead time trerr, mslp_bias, wind_bias = (np.zeros(array_len) for i in range(3)) for lt in range(array_len): trerr[lt]=ts.trerr(matched_obs_lon_lat[lt,:],matched_fcst_lon_lat[lt,:]) mslp_bias[lt]=ts.bias(matched_obs_mslp[lt],matched_fcst_mslp[lt]) wind_bias[lt]=ts.bias(matched_obs_wind[lt],matched_fcst_wind[lt]) for lt in range(array_len): if not np.isnan(trerr[lt]): if obs_track == "ibtracs": all_storms_sample_size_ib[lt] += 1 elif obs_track == "analysis": all_storms_sample_size_an[lt] += 1 if obs_track == "ibtracs": if np.all(all_storms_trerr_ib == 0.): for lt in range(array_len): all_storms_trerr_ib[lt] = trerr[lt] all_storms_mslp_bias_ib[lt] = mslp_bias[lt] all_storms_wind_bias_ib[lt] = wind_bias[lt] else: all_storms_trerr_ib = np.vstack([all_storms_trerr_ib, trerr]) all_storms_mslp_bias_ib = np.vstack([all_storms_mslp_bias_ib, mslp_bias]) all_storms_wind_bias_ib = np.vstack([all_storms_wind_bias_ib, wind_bias]) if obs_track == "analysis": if np.all(all_storms_trerr_an == 0.): for lt in range(array_len): all_storms_trerr_an[lt] = trerr[lt] all_storms_mslp_bias_an[lt] = mslp_bias[lt] all_storms_wind_bias_an[lt] = wind_bias[lt] else: all_storms_trerr_an = np.vstack([all_storms_trerr_an, trerr]) all_storms_mslp_bias_an = np.vstack([all_storms_mslp_bias_an, mslp_bias]) all_storms_wind_bias_an = np.vstack([all_storms_wind_bias_an, wind_bias]) else: continue
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len): """This function calculates the statistics for all the forecasts of one storm""" global this_mjo_trerr global this_mjo_mslp_bias global this_mjo_wind_bias if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_mslp = obs_data[:, 9] obs_wind = obs_data[:, 10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" #print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_mslp = fcst_data[:, 9] fcst_wind = fcst_data[:, 10] fcst_datelist = pl.get_dates(fcst_data) #check whether first date of this forecast file has an MJO amplitude > 1 (do we want to use it for this MJO phase?) #first get the forecast data date into datetime format to compare to the MJO dates first_fcst_date = datetime.datetime.strptime( str(fcst_datelist[0]), "%Y%m%d%H") #print first_fcst_date.strftime("%m-%d") #then find the index of MJO dates at which the date is the first date of the forecast if first_fcst_date.strftime( "%m-%d" ) == '02-29': #leap years missing in the MJO data? continue else: z = MJOdates.index(first_fcst_date.strftime("%Y-%m-%d")) #then find out the MJO amplitude on this date (i.e. at the index of this date) amp = MJOamp[z] #print "MJO amp", amp #if the MJO amplitude is less than 1, don't include this foreacst #we only want to compute the errors for forecasts started during this MJO phase / phase pair, when the MJO amplitude is >1 #if amp < 1.0: #continue #if the MJO amplitude was >1 on this day, include this forecast in the stats calculations if (MJOphase[z] == MJO1 or MJOphase[z] == MJO2) and MJOamp[z] >= 1.0: print MJOphase[z] print MJOamp[z] """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero( np.in1d(fcst_datelist, obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero( np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = ( np.ma.masked_all((array_len, 2)) for i in range(3)) matched_obs_mslp, matched_fcst_mslp, matched_obs_wind, matched_fcst_wind = ( np.ma.masked_all((array_len, 1)) for i in range(4)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time #print "indices_obs: ", indices_obs for i, z in zip(indices_obs, range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i] matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i] matched_obs_mslp[z + lt_to_skip] = obs_mslp[i] matched_obs_wind[z + lt_to_skip] = obs_wind[i] #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time #print "indices_fcst: ", indices_fcst #print "indices_fcst[0:array_len-1]: ", indices_fcst[0:array_len-1] for i, z in zip(indices_fcst[0:array_len - 1], range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 1] = fcst_datelist[i] matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i] matched_fcst_mslp[z + lt_to_skip] = fcst_mslp[i] matched_fcst_wind[z + lt_to_skip] = fcst_wind[i] #print matched_data_dates #calculate the track error (great circle distance, in km) for this forecast, at each lead time #calculate the mslp and wind biases for this forecast, at each lead time trerr, mslp_bias, wind_bias = (np.zeros(array_len) for i in range(3)) for lt in range(array_len): trerr[lt] = ts.trerr( matched_obs_lon_lat[lt, :], matched_fcst_lon_lat[lt, :]) mslp_bias[lt] = ts.bias( matched_obs_mslp[lt], matched_fcst_mslp[lt]) wind_bias[lt] = ts.bias( matched_obs_wind[lt], matched_fcst_wind[lt]) print trerr print mslp_bias print wind_bias #We want to know the sample size of forecats included, so for each lead time, as long as there is a value #(i.e. don't count one in the sample size if we couldn't verify the first 3 days of the forecast because there #wasn't yet a best track to verify against), add one to the sample size (weight) for lt in range(array_len): if not np.isnan(trerr[lt]): all_storms_trerr_wgt[lt] += 1 #If the array is jsut full of zeros, this is the first forecast we've run calcs for, so replace the empty array #with the errors for this forecast if np.all(this_mjo_trerr == 0): for lt in range(array_len): this_mjo_trerr[lt] = trerr[lt] this_mjo_mslp_bias[lt] = mslp_bias[lt] this_mjo_wind_bias[lt] = wind_bias[lt] #Otherwise, if there are already values in that array, this isn't the first forecast we've run calcs for, #so stack these errors on top of the existing array, so that we can calculate the mean later over all the forecasts #initialised in this MJO phase else: this_mjo_trerr = np.vstack( [this_mjo_trerr, trerr]) this_mjo_mslp_bias = np.vstack( [this_mjo_mslp_bias, mslp_bias]) this_mjo_wind_bias = np.vstack( [this_mjo_wind_bias, wind_bias]) else: continue
def plot_timeseries_intensity(ib_track_file_list, an_track_file_list, outfile): fig, ax = plt.subplots() fig.set_size_inches(10, 6) xlen = [] no_tracks = len(an_track_file_list) #colors = cm.viridis(np.linspace(0, 1, no_tracks)) for file_in_list, i in zip(an_track_file_list, range(len(an_track_file_list))): # load in data from a specified TRACK file (reformatted & interpolated) #an_data = np.genfromtxt(file_in_list, dtype=float, skip_header=1) ib_data = np.genfromtxt(ib_track_file_list[i], dtype=float, skip_header=1) obs_wind = ib_data[:, 10] obs_dates = pl.get_dates(ib_data) first_date = datetime.datetime.strptime(str(obs_dates[0]), "%Y%m%d%H") if first_date.strftime("%m-%d") == '02-29': continue else: z = MJOdates.index(first_date.strftime("%Y-%m-%d")) amp = MJOamp[z] #if amp < 1.0: #continue if MJOphase[z] == MJO and MJOamp[z] >= 1.0: for i in range(len(obs_wind)): if obs_wind[i] > 10000: obs_wind[i] = np.nan print obs_wind if all(np.isnan(v) for v in obs_wind): print "all nan" continue else: max_wind = np.nanmax(obs_wind) * 3.6 obs_wind = obs_wind * 3.6 RI = np.zeros(len(obs_wind)) RIwind = np.zeros(len(obs_wind)) for j in range(len(obs_wind) - 4): if (obs_wind[j + 4] - obs_wind[j]) >= 55.56: RIwind[j:j + 5] = obs_wind[j:j + 5] if (obs_wind[j + 3] - obs_wind[j]) >= 55.56: #RI[j:j+3] = obs_wind[j+3] - obs_wind[j] RIwind[j:j + 4] = obs_wind[j:j + 4] if (obs_wind[j + 2] - obs_wind[j]) >= 55.56: #RI[j:j+2] = obs_wind[j+2] - obs_wind[j] RIwind[j:j + 3] = obs_wind[j:j + 3] if (obs_wind[j + 1] - obs_wind[j]) >= 55.56: #RI[j:j+1]=obs_wind[j+1] - obs_wind[j] RIwind[j:j + 2] = obs_wind[j:j + 2] #RIwind = np.zeros(len(obs_wind)) for i in reversed(range(len(RIwind))): if RIwind[i] < RIwind[i - 1]: RIwind[i + 1] = 0.0 for i in range(len(RIwind)): if RIwind[i] == 0.0: #RI[i] = np.nan RIwind[i] = np.nan #elif RI[i] > 0.0: #RIwind[i] = obs_wind[i] #print "RI: ", RI print "RIwind: ", RIwind if max_wind < 51: c = 'khaki' ls = 'dotted' elif 51 <= max_wind < 63: c = 'gold' ls = 'dotted' elif 63 <= max_wind < 89: c = 'darkorange' ls = 'dotted' elif 89 <= max_wind < 118: c = 'black' ls = 'dotted' elif 118 <= max_wind < 166: c = 'orangered' ls = '-' elif 166 <= max_wind < 213: c = 'firebrick' ls = '-' elif max_wind >= 213: c = 'k' ls = '-' x = np.arange(0, len(obs_wind), 1) xlen.append(len(x)) print "x: ", x print "obs_wind: ", obs_wind plt.plot(x, obs_wind, color=c, linestyle=ls) #if RI.any() > 0.: #plt.plot(x[np.where(RI>0.)], obs_wind[np.where(RI>0.)]*3.6, color='blue',linestyle=ls) plt.plot(x, RIwind, color='blue', linestyle=ls) else: continue print "xlen: ", xlen plt.xticks(fontsize=12) plt.xlim(0, 65) xticklocs = np.arange(0, 65, step=4) plt.ylim(0, 300) plt.xticks( xticklocs, xticklocs / 4 ) #sets the location of the xticks (one tick per day = every 4 timesteps), and the values (want to give it in days not timesteps, so /4) plt.yticks(fontsize=12) plt.xlabel('Days', fontsize=14) plt.ylabel('Maximum Sustained Wind Speed (km/h)', fontsize=14) plt.savefig(outfile, bbox_inches='tight', pad_inches=0.05, dpi=500) plt.close()
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len): print ibtracs_file print analysis_file """This function calculates the statistics for all the forecasts of one storm""" if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track storm_trerr, storm_mslp_bias, storm_wind_bias = (np.zeros( (NT, array_len)) for i in range(3)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm storm_trerr_sum, storm_trerr_wgt, storm_mslp_bias_sum, storm_mslp_bias_wgt, storm_wind_bias_sum, storm_wind_bias_wgt = ( np.zeros(array_len) for i in range(6)) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_mslp = obs_data[:, 9] obs_wind = obs_data[:, 10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_mslp = fcst_data[:, 9] fcst_wind = fcst_data[:, 10] fcst_datelist = pl.get_dates(fcst_data) #what's the initialisation date of this forecast? first_fcst_date = datetime.datetime.strptime( str(fcst_datelist[0]), "%Y%m%d%H") print first_fcst_date.strftime("%m-%d") if first_fcst_date.strftime("%m-%d") == '02-29': continue #leap years missing in MJO data #Find the index of this date in the MJO data file else: z = MJOdates.index(first_fcst_date.strftime("%Y-%m-%d")) #Find the MJO amplitude on this date amp = MJOamp[z] #If the MJO amplitude is <0, we don't want to use this forecast, so continue to the next forecast if amp < 1.0: continue #If the amplitude is greater than 1, we want to include this forecast in the analysis for this MJO phase elif amp >= 1.0: """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero( np.in1d(fcst_datelist, obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero( np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = ( np.ma.masked_all((array_len, 2)) for i in range(3)) matched_obs_mslp, matched_fcst_mslp, matched_obs_wind, matched_fcst_wind = ( np.ma.masked_all((array_len, 1)) for i in range(4)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time for i, z in zip(indices_obs, range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i] matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i] matched_obs_mslp[z + lt_to_skip] = obs_mslp[i] matched_obs_wind[z + lt_to_skip] = obs_wind[i] #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time for i, z in zip(indices_fcst[0:array_len - 1], range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 1] = fcst_datelist[i] matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i] matched_fcst_mslp[z + lt_to_skip] = fcst_mslp[i] matched_fcst_wind[z + lt_to_skip] = fcst_wind[i] #print matched_data_dates #calculate the translation (propagation) speed along the whole forecast track, and along the whole observed track obs_speed = ts.prop_speed_vals( matched_obs_lon_lat[:, 0], matched_obs_lon_lat[:, 1]) fcst_speed = ts.prop_speed_vals( matched_fcst_lon_lat[:, 0], matched_fcst_lon_lat[:, 1]) #calculate the track error (great circle distance, in km) for this forecast, at each lead time trerr, mslp_bias, wind_bias = (np.zeros(array_len - 1) for i in range(3)) for lt in range(array_len - 1): trerr[lt] = ts.trerr(matched_obs_lon_lat[lt, :], matched_fcst_lon_lat[lt, :]) mslp_bias[lt] = ts.bias(matched_obs_mslp[lt], matched_fcst_mslp[lt]) wind_bias[lt] = ts.bias(matched_obs_wind[lt], matched_fcst_wind[lt]) #add the errors for this forecast track, to the arrays holding all the errors for this storm #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation #this is because some forecasts are shorter than others, and we want to divide by the correct sample size for lt in range(array_len - 1): if not np.isnan(trerr[lt]): #storm_trerr[a,lt] = trerr[lt] #storm_trerr_sum[lt] += trerr[lt] #storm_trerr_wgt[lt] += 1 all_storms_trerr_sum[lt] += trerr[lt] all_storms_trerr_wgt[lt] += 1 if not np.isnan(mslp_bias[lt]): #storm_mslp_bias[a,lt] = mslp_bias[lt] #storm_mslp_bias_sum[lt] += mslp_bias[lt] #storm_mslp_bias_wgt[lt] += 1 all_storms_mslp_bias_sum[lt] += mslp_bias[lt] all_storms_mslp_bias_wgt[lt] += 1 if not np.isnan(wind_bias[lt]): #storm_wind_bias[a,lt] = wind_bias[lt] #storm_wind_bias_sum[lt] += wind_bias[lt] #storm_wind_bias_wgt[lt] += 1 all_storms_wind_bias_sum[lt] += wind_bias[lt] all_storms_wind_bias_wgt[lt] += 1 #If you wanted to save the stats for each individual forecast of each storm, you'd do that here - but this is a repetition of data we've already saved #calculate the average error at each lead time, across all the forecasts of *this* storm storm_err_mean, storm_bias_mean = (np.zeros(array_len - 1) for i in range(2))
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len): print ibtracs_file print analysis_file """This function calculates the statistics for all the forecasts of one storm""" if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track storm_trerr, storm_mslp_bias, storm_wind_bias = (np.zeros( (NT, array_len)) for i in range(3)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm storm_trerr_sum, storm_trerr_wgt, storm_mslp_bias_sum, storm_mslp_bias_wgt, storm_wind_bias_sum, storm_wind_bias_wgt = ( np.zeros(array_len) for i in range(6)) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_mslp = obs_data[:, 9] obs_wind = obs_data[:, 10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_mslp = fcst_data[:, 9] fcst_wind = fcst_data[:, 10] fcst_datelist = pl.get_dates(fcst_data) """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero(np.in1d(fcst_datelist, obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = ( np.ma.masked_all((array_len, 2)) for i in range(3)) matched_obs_mslp, matched_fcst_mslp, matched_obs_wind, matched_fcst_wind = ( np.ma.masked_all((array_len, 1)) for i in range(4)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time for i, z in zip(indices_obs, range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i] matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i] matched_obs_mslp[z + lt_to_skip] = obs_mslp[i] matched_obs_wind[z + lt_to_skip] = obs_wind[i] #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time for i, z in zip(indices_fcst[0:array_len - 1], range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 1] = fcst_datelist[i] matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i] matched_fcst_mslp[z + lt_to_skip] = fcst_mslp[i] matched_fcst_wind[z + lt_to_skip] = fcst_wind[i] #print matched_data_dates #calculate the track error (great circle distance, in km) for this forecast, at each lead time trerr, mslp_bias, wind_bias = (np.zeros(array_len) for i in range(3)) for lt in range(array_len): trerr[lt] = ts.trerr(matched_obs_lon_lat[lt, :], matched_fcst_lon_lat[lt, :]) mslp_bias[lt] = ts.bias(matched_obs_mslp[lt], matched_fcst_mslp[lt]) wind_bias[lt] = ts.bias(matched_obs_wind[lt], matched_fcst_wind[lt]) #add the errors for this forecast track, to the arrays holding all the errors for this storm #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation #this is because some forecasts are shorter than others, and we want to divide by the correct sample size #print wind_bias for lt in range(array_len): if not np.isnan(trerr[lt]): storm_trerr[a, lt] = trerr[lt] storm_trerr_sum[lt] += trerr[lt] storm_trerr_wgt[lt] += 1 all_storms_trerr_sum[lt] += trerr[lt] all_storms_trerr_wgt[lt] += 1 if not np.isnan(mslp_bias[lt]): storm_mslp_bias[a, lt] = mslp_bias[lt] storm_mslp_bias_sum[lt] += mslp_bias[lt] storm_mslp_bias_wgt[lt] += 1 all_storms_mslp_bias_sum[lt] += mslp_bias[lt] all_storms_mslp_bias_wgt[lt] += 1 if not np.isnan(wind_bias[lt]): storm_wind_bias[a, lt] = wind_bias[lt] storm_wind_bias_sum[lt] += wind_bias[lt] storm_wind_bias_wgt[lt] += 1 all_storms_wind_bias_sum[lt] += wind_bias[lt] all_storms_wind_bias_wgt[lt] += 1 #calculate the average error at each lead time, across all the forecasts of this storm storm_trerr_mean, storm_mslp_bias_mean, storm_wind_bias_mean = ( np.zeros(array_len) for i in range(3)) for lt in range(array_len): storm_trerr_mean[ lt] = storm_trerr_sum[lt] / storm_trerr_wgt[lt] storm_mslp_bias_mean[ lt] = storm_mslp_bias_sum[lt] / storm_mslp_bias_wgt[lt] storm_wind_bias_mean[ lt] = storm_wind_bias_sum[lt] / storm_wind_bias_wgt[lt] print storm_wind_bias_sum print storm_wind_bias_wgt print storm_wind_bias_mean #np.savetxt(savedir + dir + "_each_forecast_location_error_per_lead_time_vs_"+use_analysis_or_ibtracs+"_"+fcst_type+".txt", storm_trerr[:,:], '%.4f') np.savetxt( savedir + dir + "_each_forecast_mslp_bias_per_lead_time_vs_" + use_analysis_or_ibtracs + "_" + fcst_type + ".txt", storm_mslp_bias[:, :], '%.4f') np.savetxt( savedir + dir + "_each_forecast_wind_bias_per_lead_time_vs_" + use_analysis_or_ibtracs + "_" + fcst_type + ".txt", storm_wind_bias[:, :], '%.4f') #np.savetxt(savedir + dir + "_average_location_error_per_lead_time_vs_"+use_analysis_or_ibtracs+"_"+fcst_type+".txt", storm_trerr_mean[:], '%.4f') np.savetxt( savedir + dir + "_average_mslp_bias_per_lead_time_vs_" + use_analysis_or_ibtracs + "_" + fcst_type + ".txt", storm_mslp_bias_mean[:], '%.4f') np.savetxt( savedir + dir + "_average_wind_bias_per_lead_time_vs_" + use_analysis_or_ibtracs + "_" + fcst_type + ".txt", storm_wind_bias_mean[:], '%.4f')
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len, e): print ibtracs_file print analysis_file """This function calculates the statistics for all the forecasts of one storm""" if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file weight_array = all_storms_number_forecasts_an storm_trerr_wgt = storm_trerr_wgt_an storm_trerr_sum = storm_trerr_sum_an storm_mslp_bias_wgt = storm_mslp_bias_wgt_an storm_mslp_bias_sum = storm_mslp_bias_sum_an storm_wind_bias_wgt = storm_wind_bias_wgt_an storm_wind_bias_sum = storm_wind_bias_sum_an elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file storm_trerr_wgt = storm_trerr_wgt_ib storm_trerr_sum = storm_trerr_sum_ib storm_mslp_bias_wgt = storm_mslp_bias_wgt_ib storm_mslp_bias_sum = storm_mslp_bias_sum_ib storm_wind_bias_wgt = storm_wind_bias_wgt_ib storm_wind_bias_sum = storm_wind_bias_sum_ib #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_mslp = obs_data[:, 9] obs_wind = obs_data[:, 10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1, usecols=np.arange(0, 11)) print fcst_data[np.where(fcst_data[:, 0] == e), :] fcst_lon = fcst_data[np.where(fcst_data[:, 0] == e), 7][0] fcst_lat = fcst_data[np.where(fcst_data[:, 0] == e), 8][0] fcst_mslp = fcst_data[np.where(fcst_data[:, 0] == e), 9][0] fcst_wind = fcst_data[np.where(fcst_data[:, 0] == e), 10][0] fcst_datelist = pl.get_dates( fcst_data[np.where(fcst_data[:, 0] == e), :][0]) print fcst_lon print fcst_lat print fcst_datelist if len(fcst_datelist) == 0: continue #some ensemble members didn't have a forecast so check if it's length is 0 and ignore if so else: first_fcst_date = datetime.datetime.strptime( str(fcst_datelist[0]), "%Y%m%d%H") if first_fcst_date.strftime( "%m-%d" ) == '02-29': #MJO files don't include leap years continue else: z = MJOdates.index( first_fcst_date.strftime("%Y-%m-%d")) amp = MJOamp[z] if amp < 1.0: continue elif amp >= 1.0: """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero( np.in1d(fcst_datelist, obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero( np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = ( np.ma.masked_all((array_len, 2)) for i in range(3)) matched_obs_mslp, matched_fcst_mslp, matched_obs_wind, matched_fcst_wind = ( np.ma.masked_all((array_len, 1)) for i in range(4)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time for i, z in zip(indices_obs, range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i] matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i] matched_obs_mslp[z + lt_to_skip] = obs_mslp[i] matched_obs_wind[z + lt_to_skip] = obs_wind[i] print matched_data_dates #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time for i, z in zip(indices_fcst[0:array_len - 1], range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 1] = fcst_datelist[i] matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i] matched_fcst_mslp[ z + lt_to_skip] = fcst_mslp[i] matched_fcst_wind[ z + lt_to_skip] = fcst_wind[i] #print matched_data_dates #calculate the track error (great circle distance, in km) for this forecast, at each lead time trerr, mslp_bias, wind_bias = ( np.zeros(array_len) for i in range(3)) for lt in range(array_len): trerr[lt] = ts.trerr( matched_obs_lon_lat[lt, :], matched_fcst_lon_lat[lt, :]) mslp_bias[lt] = ts.bias( matched_obs_mslp[lt], matched_fcst_mslp[lt]) wind_bias[lt] = ts.bias( matched_obs_wind[lt], matched_fcst_wind[lt]) #add the errors for this forecast track, to the arrays holding all the errors for this storm #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation #this is because some forecasts are shorter than others, and we want to divide by the correct sample size for lt in range(array_len): if not np.isnan(trerr[lt]): storm_trerr_sum[lt] += trerr[lt] storm_trerr_wgt[lt] += 1 if not np.isnan(mslp_bias[lt]): storm_mslp_bias_sum[lt] += mslp_bias[ lt] storm_mslp_bias_wgt[lt] += 1 if not np.isnan(wind_bias[lt]): storm_wind_bias_sum[lt] += wind_bias[ lt] storm_wind_bias_wgt[lt] += 1
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len): global this_category_trerr global this_category_wind_bias global this_category_mslp_bias """This function calculates the statistics for all the forecasts of one storm""" if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track storm_trerr, storm_mslp_bias, storm_wind_bias =(np.zeros((NT,array_len)) for i in range(3)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm storm_trerr_sum, storm_trerr_wgt, storm_mslp_bias_sum, storm_mslp_bias_wgt, storm_wind_bias_sum, storm_wind_bias_wgt = (np.zeros(array_len) for i in range(6)) #Get the date, lon, lat and vorticity data for the observed track obs_data=np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon=obs_data[:,7] obs_lat=obs_data[:,8] obs_mslp=obs_data[:,9] obs_wind=obs_data[:,10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" for ff,a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data=np.genfromtxt(ff,dtype=float,skip_header=1) fcst_lon=fcst_data[:,7] fcst_lat=fcst_data[:,8] fcst_mslp=fcst_data[:,9] fcst_wind=fcst_data[:,10] fcst_datelist = pl.get_dates(fcst_data) #check whether first date of this forecast file has an MJO amplitude > 1 (do we want to use it for this MJO phase?) #first get the forecast data date into datetime format to compare to the MJO dates first_fcst_date = datetime.datetime.strptime(str(fcst_datelist[0]), "%Y%m%d%H") """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero(np.in1d(fcst_datelist,obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = (np.ma.masked_all((array_len,2)) for i in range(3)) matched_obs_mslp, matched_fcst_mslp, matched_obs_wind, matched_fcst_wind = (np.ma.masked_all((array_len, 1)) for i in range(4)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time #print "indices_obs: ", indices_obs for i,z in zip(indices_obs,range(array_len-lt_to_skip)): matched_data_dates[z+lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z+lt_to_skip,0] = obs_lon[i] matched_obs_lon_lat[z+lt_to_skip,1] = obs_lat[i] matched_obs_mslp[z+lt_to_skip] = obs_mslp[i] matched_obs_wind[z + lt_to_skip] = obs_wind[i] #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time #print "indices_fcst: ", indices_fcst #print "indices_fcst[0:array_len-1]: ", indices_fcst[0:array_len-1] for i,z in zip(indices_fcst[0:array_len-1], range(array_len-lt_to_skip)): matched_data_dates[z+lt_to_skip,1] = fcst_datelist[i] matched_fcst_lon_lat[z+lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z+lt_to_skip, 1] = fcst_lat[i] matched_fcst_mslp[z+lt_to_skip] = fcst_mslp[i] matched_fcst_wind[z + lt_to_skip] = fcst_wind[i] #print matched_data_dates #calculate the track error (great circle distance, in km) for this forecast, at each lead time trerr, mslp_bias, wind_bias = (np.zeros(array_len) for i in range(3)) for lt in range(array_len): trerr[lt]=ts.trerr(matched_obs_lon_lat[lt,:],matched_fcst_lon_lat[lt,:]) mslp_bias[lt]=ts.bias(matched_obs_mslp[lt],matched_fcst_mslp[lt]) wind_bias[lt]=ts.bias(matched_obs_wind[lt],matched_fcst_wind[lt]) #print wind_bias for lt in range(array_len): if not np.isnan(trerr[lt]): #all_storms_trerr_sum[lt] += trerr[lt] all_storms_trerr_wgt[lt] += 1 if not np.isnan(mslp_bias[lt]): #all_storms_mslp_bias_sum[lt] += mslp_bias[lt] all_storms_mslp_bias_wgt[lt] += 1 if not np.isnan(wind_bias[lt]): #all_storms_wind_bias_sum[lt] += wind_bias[lt] all_storms_wind_bias_wgt[lt] += 1 if np.all(this_category_trerr == 0.): for lt in range(array_len): this_category_trerr[lt] = trerr[lt] this_category_mslp_bias[lt] = mslp_bias[lt] this_category_wind_bias[lt] = wind_bias[lt] else: this_category_trerr = np.vstack([this_category_trerr, trerr]) this_category_mslp_bias = np.vstack([this_category_mslp_bias, mslp_bias]) this_category_wind_bias = np.vstack([this_category_wind_bias, wind_bias])
track_file = trackdir + dir + "/" + entry else: continue track = np.genfromtxt(track_file, dtype=float, skip_header=1) tracklons = track[:, 7] tracklats = track[:, 8] tracktime = track[:, 6] print tracktime print type(tracktime[0]) track_datelist = pl.get_dates(track) print "track_datelist: ", track_datelist first_track_date = datetime.datetime.strptime( str(track_datelist[0]), "%Y%m%d%H") #z = pcp_dates.index(first_track_date.strftime("%Y-%m-%d")) #Here, just read in the precip forecast data for this date rather than the whole file #pcp = precip_data.variables['tp'][z,:,:] this_track_tc_pcp = np.zeros( (len(tracktime), len(plats), len(plons))) print "length: ", len(track_datelist) for date, di in zip(track_datelist, range(len(track_datelist))):
def storm_pos_int(obs_file, nwp_files_list): """This function calculates the statistics for all the forecasts of one storm""" global tc_lat global tc_lon global tc_wind global tc_mslp NT = len(nwp_files_list) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_mslp = obs_data[:, 9] obs_wind = obs_data[:, 10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" #print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_mslp = fcst_data[:, 9] fcst_wind = fcst_data[:, 10] fcst_datelist = pl.get_dates(fcst_data) #check whether first date of this forecast file has an MJO amplitude > 1 (do we want to use it for this MJO phase?) #first get the forecast data date into datetime format to compare to the MJO dates first_fcst_date = datetime.datetime.strptime( str(fcst_datelist[0]), "%Y%m%d%H") print first_fcst_date.strftime("%m-%d") #then find the index of MJO dates at which the date is the first date of the forecast if first_fcst_date.strftime( "%m-%d") == '02-29': #leap years missing in the MJO data? continue else: z = MJOdates.index(first_fcst_date.strftime("%Y-%m-%d")) #then find out the MJO amplitude on this date (i.e. at the index of this date) amp = MJOamp[z] #print "MJO amp", amp #if the MJO amplitude is less than 1, don't include this foreacst #we only want to compute the errors for forecasts started during this MJO phase / phase pair, when the MJO amplitude is >1 #if amp < 1.0: #continue #if the MJO amplitude was >1 on this day, include this forecast in the stats calculations if MJOphase[z] == MJO and MJOamp[z] >= 1.0: print fcst_datelist print obs_datelist if fcst_datelist[0] in obs_datelist: i = obs_datelist.index(fcst_datelist[0]) print i print fcst_datelist[0] print obs_datelist[i] tc_lat.append(obs_lat[i]) tc_lon.append(obs_lon[i]) tc_wind.append(obs_wind[i]) tc_mslp.append(obs_mslp[i]) else: continue
def storm_stats(eps_files_list, mean_files_list, array_len, NT_eps, storm_no): storm_loc_spread, storm_wind_spread, storm_mslp_spread, storm_speed_spread, storm_wgt = (np.zeros(array_len) for i in range(5)) #first put the avg diff ebtween each ens member and the mean, for each forecast track for this storm, in here #then afterwards, average across the avg difference for each forecast track, to get the average for each storm #always at each lead time... each_forecast_loc_spread, each_forecast_wind_spread, each_forecast_mslp_spread, each_forecast_speed_spread, each_forecast_wgt = (np.zeros((NT_eps, array_len)) for i in range(5)) #load in the ensemble mean file for this date #load in the ensemble file for this date #then loop over the ensemble forecast tracks and calculate the difference between that and the mean track #for each pair of forecasts, calculate the difference between the two forecasts for ff_mean,ff_eps, tno in zip(mean_files_list, eps_files_list, range(NT_eps)): print ff_mean print ff_eps all_ens_members_loc_diff = np.zeros((50, array_len)) all_ens_members_wind_diff = np.zeros((50, array_len)) all_ens_members_mslp_diff = np.zeros((50, array_len)) all_ens_members_speed_diff = np.zeros((50, array_len)) mean_data=np.genfromtxt(ff_mean,dtype=float,skip_header=1,usecols=np.arange(0,11)) mean_lon=mean_data[:,7] mean_lat=mean_data[:,8] mean_mslp=mean_data[:,9] mean_wind=mean_data[:,10] mean_datelist = pl.get_dates(mean_data) mean_speed = ts.prop_speed_vals(mean_lon, mean_lat) print "mean_speed: ", mean_speed ML = len(mean_speed) print ML print "mean dates: ", mean_datelist #here, will need to load in separate file with the mean forecast translation speed eps_data=np.genfromtxt(ff_eps,dtype=float,skip_header=1,usecols=np.arange(0,11)) #here, will need to load in separate file with the eps forecast translation speed for e, j in zip(es, range(len(es))): print "ensemble member number ", e,j eps_lon=eps_data[np.where(eps_data[:,0] == e),7][0] eps_lat=eps_data[np.where(eps_data[:,0] == e),8][0] eps_mslp=eps_data[np.where(eps_data[:,0] == e),9][0] eps_wind=eps_data[np.where(eps_data[:,0] == e),10][0] eps_datelist = pl.get_dates(eps_data[np.where(eps_data[:,0] == e),:][0]) #print "eps_lon", eps_lat #print "eps_lat", eps_lat eps_speed = ts.prop_speed_vals(eps_lon, eps_lat) print "eps_speed: ", eps_speed EL = len(eps_speed) print EL print "eps dates: ", eps_datelist fcst_len = np.min([ML,EL]) print "fcst_len: ", fcst_len if fcst_len > 41: fcst_len = 41 if not fcst_len == 0: #loc_diff, wind_diff, mslp_diff, speed_diff = (np.zeros(array_len) for i in range(4)) for lt in range(fcst_len+1): #print lt if lt == fcst_len: all_ens_members_loc_diff[j,lt] = ts.trerr([mean_lon[lt],mean_lat[lt]], [eps_lon[lt],eps_lat[lt]]) all_ens_members_wind_diff[j,lt] = abs(mean_wind[lt] - eps_wind[lt]) all_ens_members_mslp_diff[j,lt] = abs(mean_mslp[lt] - eps_mslp[lt]) else: all_ens_members_loc_diff[j,lt] = ts.trerr([mean_lon[lt],mean_lat[lt]], [eps_lon[lt],eps_lat[lt]]) all_ens_members_wind_diff[j,lt] = abs(mean_wind[lt] - eps_wind[lt]) all_ens_members_mslp_diff[j,lt] = abs(mean_mslp[lt] - eps_mslp[lt]) all_ens_members_speed_diff[j,lt] = ts.prop_speed_abs_err(mean_speed[lt], eps_speed[lt]) for lt in range(array_len): for e,j in zip(es, range(len(es))): if not np.isnan(all_ens_members_loc_diff[j,lt]): storm_wgt[lt] += 1 all_storms_wgt[lt] += 1 each_forecast_loc_spread[tno,lt] = np.nanmean(all_ens_members_loc_diff[:,lt]) each_forecast_wind_spread[tno,lt] = np.nanmean(all_ens_members_wind_diff[:,lt]) each_forecast_mslp_spread[tno,lt] = np.nanmean(all_ens_members_mslp_diff[:,lt]) each_forecast_speed_spread[tno,lt] = np.nanmean(all_ens_members_speed_diff[:,lt]) for lt in range(array_len): storm_loc_spread[lt] = np.nanmean(each_forecast_loc_spread[:,lt]) storm_wind_spread[lt] = np.nanmean(each_forecast_wind_spread[:,lt]) storm_mslp_spread[lt] = np.nanmean(each_forecast_mslp_spread[:,lt]) storm_speed_spread[lt] = np.nanmean(each_forecast_speed_spread[:,lt]) np.savetxt(savedir + dir + "_loc_spread_per_lead_time.txt", storm_loc_spread[:], '%.4f') np.savetxt(savedir + dir + "_wind_spread_per_lead_time.txt", storm_wind_spread[:], '%.4f') np.savetxt(savedir + dir + "_mslp_spread_per_lead_time.txt", storm_mslp_spread[:], '%.4f') np.savetxt(savedir + dir + "_speed_spread_per_lead_time.txt", storm_speed_spread[:], '%.4f') np.savetxt(savedir + dir + "_number_ens_members_included_in_spread_calcs_per_lead_time.txt", storm_wgt[:], '%.4f') all_storms_ind_loc_spread[storm_no,:] = storm_loc_spread[:] all_storms_ind_wind_spread[storm_no,:] = storm_wind_spread[:] all_storms_ind_mslp_spread[storm_no,:] = storm_mslp_spread[:] all_storms_ind_speed_spread[storm_no,:] = storm_speed_spread[:]
def map_all_obs_tracks_SIO(ib_track_file_list, an_track_file_list, outfile, region): """Plots a map of storm tracks (analysis track) in the SWIO, that had their genesis in a given MJO phase (pair) Colour-codes the track according to the SWIO intensity category scale, using max winds from IBTrACS""" # set up map of region if region == "SH": lat1 = 30 lat2 = -60 lon1 = -25 lon2 = 335 elif region == "SIO": lat1 = 10 lat2 = -50 lon1 = 10 lon2 = 110 fig = plt.figure(figsize=(6, 3)) ax = fig.add_axes([0.05, 0.1, 0.9, 0.96]) m = Basemap(llcrnrlon=lon1, llcrnrlat=lat2, urcrnrlon=lon2, urcrnrlat=lat1, projection='mill', resolution='l') def draw_rectangle(lats, lons, m): x, y = m(lons, lats) xy = zip(x, y) poly = Polygon(xy, facecolor='None', edgecolor='darkgrey', linewidth=0.75, alpha=0.75) plt.gca().add_patch(poly) RSMClats = [-40, 0, 0, -40] RSMClons = [30, 30, 90, 90] draw_rectangle(RSMClats, RSMClons, m) m.fillcontinents(color='white') m.drawcoastlines(linewidth=0.4, color='k') m.drawcountries(linewidth=0.4, color='k') colours = ['#fcc200', '#f05238', '#a1005c', '#08025c'] no_tracks = len(an_track_file_list) #colors = cm.viridis(np.linspace(0, 1, no_tracks)) for file_in_list, i in zip(ib_track_file_list, range(len(ib_track_file_list))): # load in data from a specified TRACK file (reformatted & interpolated) #an_data = np.genfromtxt(file_in_list, dtype=float, skip_header=1) ib_data = np.genfromtxt(ib_track_file_list[i], dtype=float, skip_header=1) obs_wind = ib_data[:, 10] obs_dates = pl.get_dates( ib_data) #get the start date of the ibtracs track first_date = datetime.datetime.strptime(str(obs_dates[0]), "%Y%m%d%H") #print "TC start date: ", first_date.strftime("%Y-%m-%d") for i in range(len(obs_wind)): if obs_wind[i] > 10000: obs_wind[i] = np.nan max_wind = np.nanmax(obs_wind) * 3.6 if max_wind < 51: c = 'khaki' ls = 'dotted' elif 51 <= max_wind < 63: c = 'gold' ls = 'dotted' elif 63 <= max_wind < 89: c = 'darkorange' ls = 'dotted' elif 89 <= max_wind < 118: c = 'black' ls = 'dotted' elif 118 <= max_wind < 166: c = 'orangered' ls = '-' elif 166 <= max_wind < 213: c = 'firebrick' ls = '-' elif max_wind >= 213: c = 'k' ls = '-' elif np.isnan(max_wind): c = 'grey' ls = '--' #plot this forecast track x, y = m(ib_data[:, 7], ib_data[:, 8]) m.plot(x, y, linewidth=0.75, color=c, linestyle=ls) hurricane = get_hurricane_symbol() xs, ys = m(ib_data[0, 7], ib_data[0, 8]) m.scatter(xs, ys, marker=hurricane, edgecolors=c, facecolors='None', s=50, linewidth=0.6) #legend tdf = plt.Line2D((0, 1), (0, 0), color='khaki', linestyle='dotted', linewidth=1) td = plt.Line2D((0, 1), (0, 0), color='yellow', linestyle='dotted', linewidth=1) tsm = plt.Line2D((0, 1), (0, 0), color='darkorange', linestyle='dotted', linewidth=1) tsf = plt.Line2D((0, 1), (0, 0), color='black', linestyle='dotted', linewidth=1) tc = plt.Line2D((0, 1), (0, 0), color='orangered', linewidth=1) tci = plt.Line2D((0, 1), (0, 0), color='firebrick', linewidth=1) tcti = plt.Line2D((0, 1), (0, 0), color='black', linewidth=1) title = str(year1[0]) + " - " + str( year2[-1]) + ": " + str(no_tracks) + " Cyclones" legend = ax.legend(title=title, loc='lower left') plt.setp(legend.get_title(), fontsize='12') #save and close the plot fig.subplots_adjust(wspace=0) plt.savefig(outfile, bbox_inches='tight', pad_inches=0.05, dpi=500) plt.close()
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, array_len, track_number): global every_TC_database """This function calculates the statistics for all the forecasts of one storm""" obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_mslp = obs_data[:, 9] obs_wind = obs_data[:, 10] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" for ff, a in zip(nwp_files_list, range(NT)): print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_mslp = fcst_data[:, 9] fcst_wind = fcst_data[:, 10] fcst_datelist = pl.get_dates(fcst_data) first_fcst_date = datetime.datetime.strptime( str(fcst_datelist[0]), "%Y%m%d%H") #print fcst_datelist #print obs_datelist if first_fcst_date.strftime( "%m-%d" ) == '02-29': #leap years missing in the MJO data? continue else: z = MJOdates.index(first_fcst_date.strftime("%Y-%m-%d")) if len(fcst_datelist) <= lead_time * 4: continue #print lead_time*4 #print len(fcst_datelist) elif fcst_datelist[lead_time * 4] in obs_datelist[:]: this_dates_array = np.zeros(15) this_dates_array[0] = int(y1) print track_number print track_number[2:6] this_dates_array[1] = int(track_number[2:6]) this_dates_array[2] = int( fcst_datelist[lead_time * 4] ) #validation date / obs date - date we're forecasting for i = obs_datelist.index(fcst_datelist[lead_time * 4]) print i this_dates_array[3] = obs_lon[i] this_dates_array[4] = obs_lat[i] if obs_wind[i] > 10000: this_dates_array[5] = np.nan else: this_dates_array[5] = obs_wind[i] * 3.6 if obs_mslp[i] > 10000: this_dates_array[6] = np.nan else: this_dates_array[6] = obs_mslp[i] this_dates_array[7] = int( fcst_datelist[0]) #date forecast initialised this_dates_array[8] = fcst_lon[lead_time * 4] this_dates_array[9] = fcst_lat[lead_time * 4] this_dates_array[11] = fcst_wind[lead_time * 4] * 3.6 this_dates_array[12] = fcst_mslp[lead_time * 4] if MJOamp[z] >= 1.0: this_dates_array[13] = int(MJOphase[z]) this_dates_array[14] = MJOamp[z] else: this_dates_array[13] = np.nan this_dates_array[14] = np.nan this_dates_array[10] = ts.trerr( [obs_lon[i], obs_lat[i]], [fcst_lon[lead_time * 4], fcst_lat[lead_time * 4]]) print this_dates_array every_TC_database = np.vstack( [every_TC_database, this_dates_array]) print every_TC_database else: continue