Beispiel #1
0
def storm_stats(eps_files_list, mean_files_list, array_len, NT_eps, storm_no):
	
	storm_loc_spread, storm_wind_spread, storm_mslp_spread, storm_speed_spread, storm_wgt = (np.zeros(array_len) for i in range(5))
		
	#first put the avg diff ebtween each ens member and the mean, for each forecast track for this storm, in here
	#then afterwards, average across the avg difference for each forecast track, to get the average for each storm
	#always at each lead time...
	each_forecast_loc_spread, each_forecast_wind_spread, each_forecast_mslp_spread, each_forecast_speed_spread, each_forecast_wgt = (np.zeros((NT_eps, array_len)) for i in range(5))
		
		
	#load in the ensemble mean file for this date
		
	#load in the ensemble file for this date
	#then loop over the ensemble forecast tracks and calculate the difference between that and the mean track
		
	#for each pair of forecasts, calculate the difference between the two forecasts
	for ff_mean,ff_eps, tno in zip(mean_files_list, eps_files_list, range(NT_eps)):
	
		print ff_mean
		print ff_eps
		
		all_ens_members_loc_diff = np.zeros((50, array_len))
		all_ens_members_wind_diff = np.zeros((50, array_len))
		all_ens_members_mslp_diff = np.zeros((50, array_len))
		all_ens_members_speed_diff = np.zeros((50, array_len))
		
		mean_data=np.genfromtxt(ff_mean,dtype=float,skip_header=1,usecols=np.arange(0,11))
		mean_lon=mean_data[:,7]
		mean_lat=mean_data[:,8]
		mean_mslp=mean_data[:,9]
		mean_wind=mean_data[:,10]
		mean_datelist = pl.get_dates(mean_data)
		
		mean_speed = ts.prop_speed_vals(mean_lon, mean_lat)
		print "mean_speed: ", mean_speed
		ML = len(mean_speed)
		print ML
		print "mean dates: ", mean_datelist
		
		#here, will need to load in separate file with the mean forecast translation speed
		
			
		eps_data=np.genfromtxt(ff_eps,dtype=float,skip_header=1,usecols=np.arange(0,11))
		
		#here, will need to load in separate file with the eps forecast translation speed
		
		for e, j in zip(es, range(len(es))):
		
			print "ensemble member number ", e,j
		
			eps_lon=eps_data[np.where(eps_data[:,0] == e),7][0]
			eps_lat=eps_data[np.where(eps_data[:,0] == e),8][0]
			eps_mslp=eps_data[np.where(eps_data[:,0] == e),9][0]
			eps_wind=eps_data[np.where(eps_data[:,0] == e),10][0]
			eps_datelist = pl.get_dates(eps_data[np.where(eps_data[:,0] == e),:][0])
			
			#print "eps_lon", eps_lat
			#print "eps_lat", eps_lat
				
			eps_speed = ts.prop_speed_vals(eps_lon, eps_lat)
			print "eps_speed: ", eps_speed	
			EL = len(eps_speed)
			print EL
			print "eps dates: ", eps_datelist
			
			fcst_len = np.min([ML,EL])
			print "fcst_len: ", fcst_len
			
			if fcst_len > 41:
				fcst_len = 41
				
			if not fcst_len == 0:
			
			
				#loc_diff, wind_diff, mslp_diff, speed_diff = (np.zeros(array_len) for i in range(4))
				for lt in range(fcst_len+1):
					#print lt
			
					if lt == fcst_len:
						all_ens_members_loc_diff[j,lt] = ts.trerr([mean_lon[lt],mean_lat[lt]], [eps_lon[lt],eps_lat[lt]])
						all_ens_members_wind_diff[j,lt] = abs(mean_wind[lt] - eps_wind[lt])
						all_ens_members_mslp_diff[j,lt] = abs(mean_mslp[lt] - eps_mslp[lt])
					else:
						all_ens_members_loc_diff[j,lt] = ts.trerr([mean_lon[lt],mean_lat[lt]], [eps_lon[lt],eps_lat[lt]])
						all_ens_members_wind_diff[j,lt] = abs(mean_wind[lt] - eps_wind[lt])
						all_ens_members_mslp_diff[j,lt] = abs(mean_mslp[lt] - eps_mslp[lt])
						all_ens_members_speed_diff[j,lt] = ts.prop_speed_abs_err(mean_speed[lt], eps_speed[lt])
					
				
		for lt in range(array_len):
			for e,j in zip(es, range(len(es))):
				if not np.isnan(all_ens_members_loc_diff[j,lt]):
					storm_wgt[lt] += 1
					all_storms_wgt[lt] += 1

			each_forecast_loc_spread[tno,lt] = np.nanmean(all_ens_members_loc_diff[:,lt])
			each_forecast_wind_spread[tno,lt] = np.nanmean(all_ens_members_wind_diff[:,lt])
			each_forecast_mslp_spread[tno,lt] = np.nanmean(all_ens_members_mslp_diff[:,lt])
			each_forecast_speed_spread[tno,lt] = np.nanmean(all_ens_members_speed_diff[:,lt])
			
	
	for lt in range(array_len):
		
			
		storm_loc_spread[lt] = np.nanmean(each_forecast_loc_spread[:,lt])
		storm_wind_spread[lt] = np.nanmean(each_forecast_wind_spread[:,lt])
		storm_mslp_spread[lt] = np.nanmean(each_forecast_mslp_spread[:,lt])
		storm_speed_spread[lt] = np.nanmean(each_forecast_speed_spread[:,lt])
		
	np.savetxt(savedir + dir + "_loc_spread_per_lead_time.txt", storm_loc_spread[:], '%.4f')
	np.savetxt(savedir + dir + "_wind_spread_per_lead_time.txt", storm_wind_spread[:], '%.4f')
	np.savetxt(savedir + dir + "_mslp_spread_per_lead_time.txt", storm_mslp_spread[:], '%.4f')
	np.savetxt(savedir + dir + "_speed_spread_per_lead_time.txt", storm_speed_spread[:], '%.4f')
	np.savetxt(savedir + dir + "_number_ens_members_included_in_spread_calcs_per_lead_time.txt", storm_wgt[:], '%.4f')
	
		
	all_storms_ind_loc_spread[storm_no,:] = storm_loc_spread[:]
	all_storms_ind_wind_spread[storm_no,:] = storm_wind_spread[:]
	all_storms_ind_mslp_spread[storm_no,:] = storm_mslp_spread[:]
	all_storms_ind_speed_spread[storm_no,:] = storm_speed_spread[:]
        def storm_stats(ibtracs_file, analysis_file, nwp_files_list,
                        use_analysis_or_ibtracs, array_len):
            """This function calculates the statistics for all the forecasts of one storm"""

            global all_storms_speed_err
            global all_storms_speed_bias

            if use_analysis_or_ibtracs == "analysis":
                obs_file = analysis_file
            elif use_analysis_or_ibtracs == "ibtracs":
                obs_file = ibtracs_file
            #Find out number of forecast tracks for this storm
            NT = len(nwp_files_list)

            #empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track
            storm_err, storm_bias = (np.zeros((NT, array_len))
                                     for i in range(2))

            #empty arrays to hold the error sums and counts, for calculating the average errors for this storm
            storm_err_sum, storm_err_wgt, storm_bias_sum, storm_bias_wgt = (
                np.zeros(array_len) for i in range(4))

            #Get the date, lon, lat and vorticity data for the observed track
            obs_data = np.genfromtxt(obs_file, dtype=float, skip_header=1)
            obs_lon = obs_data[:, 7]
            obs_lat = obs_data[:, 8]
            obs_datelist = pl.get_dates(obs_data)

            #compute the statistics for each forecast of this storm, at each lead time
            """need to make sure we compare the forecast timestamp with the correct obs timestamp!!"""
            print NT
            for ff, a in zip(nwp_files_list, range(NT)):
                #print ff

                #get the forecast date, lat, lon and vorticity
                fcst_data = np.genfromtxt(ff, dtype=float, skip_header=1)
                fcst_lon = fcst_data[:, 7]
                fcst_lat = fcst_data[:, 8]
                fcst_datelist = pl.get_dates(fcst_data)
                """We need to get the indices of both the observed data and the forecast data, where the dates match"""
                #This is because the dates in the observed track file and forecast track files cover different ranges,
                #depending on the date the forecast was initialised and the period the forecast covers

                #find the indices of the forecast array, where the dates exist in the observed dates array
                indices_fcst = np.nonzero(np.in1d(fcst_datelist,
                                                  obs_datelist))[0]
                # find the indices of the observed array, where the dates exist in the forecast dates array
                indices_obs = np.nonzero(np.in1d(obs_datelist,
                                                 fcst_datelist))[0]

                #So the first few lead times of the forecast might have no observations to match against
                #BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast
                #So fill the arrays with empty values for the first few lead times where there are no obs to verify against
                #How many lead times do we need to skip? Up to the index of the first matched forecast date:

                #if using ibtracs, sometimes there are no observations for the entire length of the forecast track
                #so we tell it not to run the calculations if there were no matching dates:

                if not len(indices_fcst) == 0:

                    lt_to_skip = indices_fcst[0]

                    #empty arrays to hold the data at the matched timesteps
                    matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = (
                        np.ma.masked_all((array_len, 2)) for i in range(3))

                    #get the observed track data for the dates where this forecast track matches the observed track
                    #at each lead time
                    print "indices_obs: ", indices_obs
                    for i, z in zip(indices_obs,
                                    range(array_len - lt_to_skip)):
                        matched_data_dates[z + lt_to_skip, 0] = obs_datelist[i]
                        matched_obs_lon_lat[z + lt_to_skip, 0] = obs_lon[i]
                        matched_obs_lon_lat[z + lt_to_skip, 1] = obs_lat[i]

                    #get the forecast track data for the dates where this forecast track matches the observed track, at each lead time

                    print "indices_fcst: ", indices_fcst
                    print "indices_fcst[0:array_len-1]: ", indices_fcst[
                        0:array_len - 1]
                    for i, z in zip(indices_fcst[0:array_len - 1],
                                    range(array_len - lt_to_skip)):
                        matched_data_dates[z + lt_to_skip,
                                           1] = fcst_datelist[i]
                        matched_fcst_lon_lat[z + lt_to_skip, 0] = fcst_lon[i]
                        matched_fcst_lon_lat[z + lt_to_skip, 1] = fcst_lat[i]

                    #calculate the translation (propagation) speed along the whole forecast track, and along the whole observed track
                    obs_speed = ts.prop_speed_vals(matched_obs_lon_lat[:, 0],
                                                   matched_obs_lon_lat[:, 1])
                    fcst_speed = ts.prop_speed_vals(matched_fcst_lon_lat[:, 0],
                                                    matched_fcst_lon_lat[:, 1])

                    print obs_speed
                    print fcst_speed

                    err, bias = (np.zeros(array_len) for i in range(2))
                    for lt in range(array_len - 1):

                        #trerr[lt]=ts.trerr(matched_obs_lon_lat[lt,:],matched_fcst_lon_lat[lt,:])
                        err[lt] = ts.prop_speed_abs_err(
                            obs_speed[lt], fcst_speed[lt])
                        bias[lt] = ts.prop_speed_bias(obs_speed[lt],
                                                      fcst_speed[lt])

                    #add the errors for this forecast track, to the arrays holding all the errors for this storm
                    #and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation
                    #this is because some forecasts are shorter than others, and we want to divide by the correct sample size

                    for lt in range(array_len):
                        if not np.isnan(err[lt]):
                            #storm_err[a,lt] = err[lt]
                            #storm_err_sum[lt] += err[lt]
                            #storm_err_wgt[lt] += 1
                            #all_storms_err_sum[lt] += err[lt]
                            all_storms_wgt[lt] += 1

                        #if not np.isnan(bias[lt]):
                        #storm_bias[a,lt] = bias[lt]
                        #storm_bias_sum[lt] += bias[lt]
                        #storm_bias_wgt[lt] += 1
                        #all_storms_bias_sum[lt] += bias[lt]
                        #all_storms_bias_wgt[lt] += 1

                    if np.all(all_storms_speed_err == 0):
                        for lt in range(array_len):
                            all_storms_speed_err[lt] = err[lt]

                            all_storms_speed_bias[lt] = bias[lt]

                    else:
                        all_storms_speed_err = np.vstack(
                            [all_storms_speed_err, err])
                        all_storms_speed_bias = np.vstack(
                            [all_storms_speed_bias, bias])
Beispiel #3
0
			def storm_stats(ibtracs_file, analysis_file, nwp_files_list, use_analysis_or_ibtracs, array_len,e):
		
				print ibtracs_file
				print analysis_file
		
				"""This function calculates the statistics for all the forecasts of one storm"""

				if use_analysis_or_ibtracs == "analysis":
					obs_file = analysis_file
				elif use_analysis_or_ibtracs == "ibtracs":
					obs_file = ibtracs_file
				#Find out number of forecast tracks for this storm
				NT = len(nwp_files_list)

				#empty arrays to hold the track error and intensity bias statistics for this storm, for each forecast track
				storm_err, storm_bias =(np.zeros((NT,array_len)) for i in range(2))

				#empty arrays to hold the error sums and counts, for calculating the average errors for this storm
				storm_err_sum, storm_err_wgt, storm_bias_sum, storm_bias_wgt = (np.zeros(array_len) for i in range(4))

				#Get the date, lon, lat and vorticity data for the observed track
				obs_data=np.genfromtxt(obs_file, dtype=float, skip_header=1)
				obs_lon=obs_data[:,7]
				obs_lat=obs_data[:,8]
				obs_datelist = pl.get_dates(obs_data)
				
				print obs_lon
				print obs_lat

				#compute the statistics for each forecast of this storm, at each lead time
				"""need to make sure we compare the forecast timestamp with the correct obs timestamp!!"""
				print NT
				for ff,a in zip(nwp_files_list, range(NT)):
					#print ff
					
					print ff

					#get the forecast date, lat, lon and vorticity
					fcst_data=np.genfromtxt(ff,dtype=float,skip_header=1,usecols=np.arange(0,11))
					
					print fcst_data[np.where(fcst_data[:,0]==e),:]
					
					fcst_lon=fcst_data[np.where(fcst_data[:,0] == e),7][0]
					fcst_lat=fcst_data[np.where(fcst_data[:,0] == e),8][0]
					fcst_datelist = pl.get_dates(fcst_data[np.where(fcst_data[:,0] == e),:][0])
					
					print fcst_lon
					print fcst_lat
					
					print fcst_datelist

					"""We need to get the indices of both the observed data and the forecast data, where the dates match"""
					#This is because the dates in the observed track file and forecast track files cover different ranges,
					#depending on the date the forecast was initialised and the period the forecast covers

					#find the indices of the forecast array, where the dates exist in the observed dates array
					indices_fcst = np.nonzero(np.in1d(fcst_datelist,obs_datelist))[0]
					# find the indices of the observed array, where the dates exist in the forecast dates array
					indices_obs = np.nonzero(np.in1d(obs_datelist, fcst_datelist))[0]

					#So the first few lead times of the forecast might have no observations to match against
					#BUT we don't want the first matching date to then be calculated as if it were lead time 1 of the forecast
					#So fill the arrays with empty values for the first few lead times where there are no obs to verify against
					#How many lead times do we need to skip? Up to the index of the first matched forecast date:

					#if using ibtracs, sometimes there are no observations for the entire length of the forecast track
					#so we tell it not to run the calculations if there were no matching dates:
					if not len(indices_fcst) == 0:

						lt_to_skip = indices_fcst[0]


						#empty arrays to hold the data at the matched timesteps
						matched_data_dates, matched_obs_lon_lat, matched_fcst_lon_lat = (np.ma.masked_all((array_len,2)) for i in range(3))

						#get the observed track data for the dates where this forecast track matches the observed track
						#at each lead time
						for i,z in zip(indices_obs,range(array_len-lt_to_skip)):
							matched_data_dates[z+lt_to_skip, 0] = obs_datelist[i]
							matched_obs_lon_lat[z+lt_to_skip,0] = obs_lon[i]
							matched_obs_lon_lat[z+lt_to_skip,1] = obs_lat[i]

							
						print matched_data_dates

						#get the forecast track data for the dates where this forecast track matches the observed track, at each lead time

						for i,z in zip(indices_fcst[0:array_len-1], range(array_len-lt_to_skip)):
							matched_data_dates[z+lt_to_skip,1] = fcst_datelist[i]
							matched_fcst_lon_lat[z+lt_to_skip, 0] = fcst_lon[i]
							matched_fcst_lon_lat[z+lt_to_skip, 1] = fcst_lat[i]
		
						#print matched_data_dates
						
						#calculate the translation (propagation) speed along the whole forecast track, and along the whole observed track
						obs_speed = ts.prop_speed_vals(matched_obs_lon_lat[:,0], matched_obs_lon_lat[:,1])
						fcst_speed = ts.prop_speed_vals(matched_fcst_lon_lat[:,0], matched_fcst_lon_lat[:,1])

						#calculate the track error (great circle distance, in km) for this forecast, at each lead time
						err, bias = (np.zeros(array_len-1) for i in range(2))
						for lt in range(array_len-1):
	
							err[lt] = ts.prop_speed_abs_err(obs_speed[lt], fcst_speed[lt])
							bias[lt] = ts.prop_speed_bias(obs_speed[lt], fcst_speed[lt])

						#add the errors for this forecast track, to the arrays holding all the errors for this storm
						#and add one to the "weight" for each error, which counts the number of forecasts contributing to the error calculation
						#this is because some forecasts are shorter than others, and we want to divide by the correct sample size

						#print wind_bias
						for lt in range(array_len-1):
							if not np.isnan(err[lt]):
								storm_err[a,lt] = err[lt]
								storm_err_sum[lt] += err[lt]
								storm_err_wgt[lt] += 1
								#all_storms_err_sum[lt] += err[lt]
								#all_storms_err_wgt[lt] += 1
							if not np.isnan(bias[lt]):
								storm_bias[a,lt] = bias[lt]
								storm_bias_sum[lt] += bias[lt]
								storm_bias_wgt[lt] += 1
								#all_storms_bias_sum[lt] += bias[lt]
								#all_storms_bias_wgt[lt] += 1

				#calculate the average error at each lead time, across all the forecasts of this storm
				storm_err_mean, storm_bias_mean = (np.zeros(array_len) for i in range(2))
				for lt in range(array_len-1):
					storm_err_mean[lt] = storm_err_sum[lt] / storm_err_wgt[lt]
					storm_bias_mean[lt] = storm_bias_sum[lt] / storm_bias_wgt[lt]
					

				#print storm_wind_bias_sum
				#print storm_wind_bias_wgt
				#print storm_wind_bias_mean
			
				if obs_track == "ibtracs":
					eps_avg_err_ib[j,:] = storm_err_mean
					eps_avg_bias_ib[j, :] = storm_bias_mean
				
				elif obs_track == "analysis":
					eps_avg_err_an[j,:] = storm_err_mean
					eps_avg_bias_an[j, :] = storm_bias_mean
				
					
				en = str(int(e)-3)

				
				np.savetxt(savedir + dir + "_each_forecast_translation_speed_error_per_lead_time_vs_"+use_analysis_or_ibtracs+"_EPS_"+en+".txt", storm_err[:,:], '%.4f')
				np.savetxt(savedir + dir + "_each_forecast_translation_speed_bias_per_lead_time_vs_" + use_analysis_or_ibtracs + "_EPS_"+en+".txt", storm_bias[:, :], '%.4f')

				np.savetxt(savedir + dir + "_average_translation_speed_error_per_lead_time_vs_"+use_analysis_or_ibtracs+"_"+fcst_type+".txt", storm_err_mean[:], '%.4f')
				np.savetxt(savedir + dir + "_average_translation_speed_bias_per_lead_time_vs_"+use_analysis_or_ibtracs+"_"+fcst_type+".txt", storm_bias_mean[:], '%.4f')