def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len): """This function calculates the statistics for all the forecasts of one storm""" global all_storms_speed_err global all_storms_speed_bias if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track storm_err, storm_bias = (np.zeros((NT, array_len)) for i in range(2)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm storm_err_sum, storm_err_wgt, storm_bias_sum, storm_bias_wgt = ( np.zeros(array_len) for i in range(4)) #Get the date, lon, lat and vorticity data for the observed track obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon = obs_data[:, 7] obs_lat = obs_data[:, 8] obs_datelist = pl.get_dates(obs_data) #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print NT for ff, a in zip(nwp_files_list, range(NT)): #print ff #get the forecast date, lat, lon and vorticity fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1) fcst_lon = fcst_data[:, 7] fcst_lat = fcst_data[:, 8] fcst_datelist = pl.get_dates(fcst_data) """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero(np.in1d(fcst_datelist, obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = ( np.ma.masked_all((array_len, 2)) for i in range(3)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time print "indices_obs: ", indices_obs for i, z in zip(indices_obs, range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i] matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i] #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time print "indices_fcst: ", indices_fcst print "indices_fcst[0:array_len-1]: ", indices_fcst[ 0:array_len - 1] for i, z in zip(indices_fcst[0:array_len - 1], range(array_len - lt_to_skip)): matched_data_dates[z + lt_to_skip, 1] = fcst_datelist[i] matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i] #calculate the translation (propagation) speed along the whole forecast track, and along the whole observed track obs_speed = ts.prop_speed_vals(matched_obs_lon_lat[:, 0], matched_obs_lon_lat[:, 1]) fcst_speed = ts.prop_speed_vals(matched_fcst_lon_lat[:, 0], matched_fcst_lon_lat[:, 1]) print obs_speed print fcst_speed err, bias = (np.zeros(array_len) for i in range(2)) for lt in range(array_len - 1): #trerr[lt]=ts.trerr(matched_obs_lon_lat[lt,:],matched_fcst_lon_lat[lt,:]) err[lt] = ts.prop_speed_abs_err( obs_speed[lt], fcst_speed[lt]) bias[lt] = ts.prop_speed_bias(obs_speed[lt], fcst_speed[lt]) #add the errors for this forecast track, to the arrays holding all the errors for this storm #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation #this is because some forecasts are shorter than others, and we want to divide by the correct sample size for lt in range(array_len): if not np.isnan(err[lt]): #storm_err[a,lt] = err[lt] #storm_err_sum[lt] += err[lt] #storm_err_wgt[lt] += 1 #all_storms_err_sum[lt] += err[lt] all_storms_wgt[lt] += 1 #if not np.isnan(bias[lt]): #storm_bias[a,lt] = bias[lt] #storm_bias_sum[lt] += bias[lt] #storm_bias_wgt[lt] += 1 #all_storms_bias_sum[lt] += bias[lt] #all_storms_bias_wgt[lt] += 1 if np.all(all_storms_speed_err == 0): for lt in range(array_len): all_storms_speed_err[lt] = err[lt] all_storms_speed_bias[lt] = bias[lt] else: all_storms_speed_err = np.vstack( [all_storms_speed_err, err]) all_storms_speed_bias = np.vstack( [all_storms_speed_bias, bias])
def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len,e): print ibtracs_file print analysis_file """This function calculates the statistics for all the forecasts of one storm""" if use_analysis_or_ibtracs == "analysis": obs_file = analysis_file elif use_analysis_or_ibtracs == "ibtracs": obs_file = ibtracs_file #Find out number of forecast tracks for this storm NT = len(nwp_files_list) #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track storm_err, storm_bias =(np.zeros((NT,array_len)) for i in range(2)) #empty arrays to hold the error sums and counts, for calculating the average errors for this storm storm_err_sum, storm_err_wgt, storm_bias_sum, storm_bias_wgt = (np.zeros(array_len) for i in range(4)) #Get the date, lon, lat and vorticity data for the observed track obs_data=np.genfromtxt(obs_file, dtype=float, skip_header=1) obs_lon=obs_data[:,7] obs_lat=obs_data[:,8] obs_datelist = pl.get_dates(obs_data) print obs_lon print obs_lat #compute the statistics for each forecast of this storm, at each lead time """need to make sure we compare the forecast timestamp with the correct obs timestamp!!""" print NT for ff,a in zip(nwp_files_list, range(NT)): #print ff print ff #get the forecast date, lat, lon and vorticity fcst_data=np.genfromtxt(ff,dtype=float,skip_header=1,usecols=np.arange(0,11)) print fcst_data[np.where(fcst_data[:,0]==e),:] fcst_lon=fcst_data[np.where(fcst_data[:,0] == e),7][0] fcst_lat=fcst_data[np.where(fcst_data[:,0] == e),8][0] fcst_datelist = pl.get_dates(fcst_data[np.where(fcst_data[:,0] == e),:][0]) print fcst_lon print fcst_lat print fcst_datelist """We need to get the indices of both the observed data and the forecast data, where the dates match""" #This is because the dates in the observed track file and forecast track files cover different ranges, #depending on the date the forecast was initialised and the period the forecast covers #find the indices of the forecast array, where the dates exist in the observed dates array indices_fcst = np.nonzero(np.in1d(fcst_datelist,obs_datelist))[0] # find the indices of the observed array, where the dates exist in the forecast dates array indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0] #So the first few lead times of the forecast might have no observations to match against #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast #So fill the arrays with empty values for the first few lead times where there are no obs to verify against #How many lead times do we need to skip? Up to the index of the first matched forecast date: #if using ibtracs, sometimes there are no observations for the entire length of the forecast track #so we tell it not to run the calculations if there were no matching dates: if not len(indices_fcst) == 0: lt_to_skip = indices_fcst[0] #empty arrays to hold the data at the matched timesteps matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = (np.ma.masked_all((array_len,2)) for i in range(3)) #get the observed track data for the dates where this forecast track matches the observed track #at each lead time for i,z in zip(indices_obs,range(array_len-lt_to_skip)): matched_data_dates[z+lt_to_skip, 0] = obs_datelist[i] matched_obs_lon_lat[z+lt_to_skip,0] = obs_lon[i] matched_obs_lon_lat[z+lt_to_skip,1] = obs_lat[i] print matched_data_dates #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time for i,z in zip(indices_fcst[0:array_len-1], range(array_len-lt_to_skip)): matched_data_dates[z+lt_to_skip,1] = fcst_datelist[i] matched_fcst_lon_lat[z+lt_to_skip, 0] = fcst_lon[i] matched_fcst_lon_lat[z+lt_to_skip, 1] = fcst_lat[i] #print matched_data_dates #calculate the translation (propagation) speed along the whole forecast track, and along the whole observed track obs_speed = ts.prop_speed_vals(matched_obs_lon_lat[:,0], matched_obs_lon_lat[:,1]) fcst_speed = ts.prop_speed_vals(matched_fcst_lon_lat[:,0], matched_fcst_lon_lat[:,1]) #calculate the track error (great circle distance, in km) for this forecast, at each lead time err, bias = (np.zeros(array_len-1) for i in range(2)) for lt in range(array_len-1): err[lt] = ts.prop_speed_abs_err(obs_speed[lt], fcst_speed[lt]) bias[lt] = ts.prop_speed_bias(obs_speed[lt], fcst_speed[lt]) #add the errors for this forecast track, to the arrays holding all the errors for this storm #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation #this is because some forecasts are shorter than others, and we want to divide by the correct sample size #print wind_bias for lt in range(array_len-1): if not np.isnan(err[lt]): storm_err[a,lt] = err[lt] storm_err_sum[lt] += err[lt] storm_err_wgt[lt] += 1 #all_storms_err_sum[lt] += err[lt] #all_storms_err_wgt[lt] += 1 if not np.isnan(bias[lt]): storm_bias[a,lt] = bias[lt] storm_bias_sum[lt] += bias[lt] storm_bias_wgt[lt] += 1 #all_storms_bias_sum[lt] += bias[lt] #all_storms_bias_wgt[lt] += 1 #calculate the average error at each lead time, across all the forecasts of this storm storm_err_mean, storm_bias_mean = (np.zeros(array_len) for i in range(2)) for lt in range(array_len-1): storm_err_mean[lt] = storm_err_sum[lt] / storm_err_wgt[lt] storm_bias_mean[lt] = storm_bias_sum[lt] / storm_bias_wgt[lt] #print storm_wind_bias_sum #print storm_wind_bias_wgt #print storm_wind_bias_mean if obs_track == "ibtracs": eps_avg_err_ib[j,:] = storm_err_mean eps_avg_bias_ib[j, :] = storm_bias_mean elif obs_track == "analysis": eps_avg_err_an[j,:] = storm_err_mean eps_avg_bias_an[j, :] = storm_bias_mean en = str(int(e)-3) np.savetxt(savedir + dir + "_each_forecast_translation_speed_error_per_lead_time_vs_"+use_analysis_or_ibtracs+"_EPS_"+en+".txt", storm_err[:,:], '%.4f') np.savetxt(savedir + dir + "_each_forecast_translation_speed_bias_per_lead_time_vs_" + use_analysis_or_ibtracs + "_EPS_"+en+".txt", storm_bias[:, :], '%.4f') np.savetxt(savedir + dir + "_average_translation_speed_error_per_lead_time_vs_"+use_analysis_or_ibtracs+"_"+fcst_type+".txt", storm_err_mean[:], '%.4f') np.savetxt(savedir + dir + "_average_translation_speed_bias_per_lead_time_vs_"+use_analysis_or_ibtracs+"_"+fcst_type+".txt", storm_bias_mean[:], '%.4f')