def _Initialize(self, csv_name): """Creates Overpass objects for each overpass, labelled as {PointSource short name}{YYYYMMDD} for example, Gavin20160207""" if not os.path.exists(csv_name): raise IOError("File %s does not exist" % csv_name) try: csv=open(csv_name,'r') except: print "Could not open %s: See message" % csv_name raise header=csv.readline() line=csv.readline() list = line.split(',') overpass_list = [] while line !="": try: key = list[0] source = Sources.Sources[key] except AttributeError: raise AttributeError("Attribute {0} does not exist in module Sources.\nCurrent line of csv: {1}; in csv {2}".format(key, line, self.source_file)) if len(list)==18: try: date = map(int,list[1].split('-')) year,month,day,hour,minute = date dt = datetime.datetime(year,month,day,hour,minute) time = PST.Time(dt,source) merra_vector = map(float,list[2:4]) ecmwf_old_vector = map(float,list[4:6]) ecmwf_vector = (0,0) gem_vector = map(float,list[6:8]) avg_vector = map(float,list[8:10]) surf_vector = map(float,list[10:12]) stability_class = list[12] a = float(list[13].strip('\n')) stability_corrected = list[14] a_corrected = float(list[15]) lite = list[16] full = list[17].strip('\n').strip('\r') except IndexError: raise except Exception as exc: print "An unecpexted exception was raised: see exception" raise else: try: date = map(int,list[1].split('-')) year,month,day,hour,minute = date dt = datetime.datetime(year,month,day,hour,minute) time = PST.Time(dt,source) merra_beginning = map(float,list[2:4]) merra_middle = map(float, list[4:6]) merra_interp = map(float, list[6:8]) ecmwf_old_vector = map(float,list[8:10]) ecmwf_vector = map(float,list[10:12]) gem_vector = map(float,list[12:14]) avg_vector = map(float,list[14:16]) surf_vector = map(float,list[16:18]) stability_class = list[18] a = float(list[19].strip('\n')) stability_corrected = list[20] a_corrected = float(list[21]) lite = list[22] full = list[23].strip('\n').strip('\r') except IndexError: raise except Exception as exc: print "An unecpexted exception was raised: see exception" raise try: overpass = PST.Overpass(time) overpass.height = source.height overpass.MERRA_beginning = PST.Wind(merra_beginning,source.height) overpass.MERRA = PST.Wind(merra_middle, source.height) overpass.MERRA_interp = PST.Wind(merra_interp, source.height) overpass.ECMWF = PST.Wind(ecmwf_vector,source.height) overpass.ECMWF_old = PST.Wind(ecmwf_old_vector, source.height) overpass.GEM = PST.Wind(gem_vector,source.height) overpass.Average_beginning = 0.5*(overpass.MERRA_beginning + overpass.ECMWF) overpass.Average = 0.5*(overpass.MERRA + overpass.ECMWF) overpass.Average_interp = 0.5*(overpass.MERRA_interp + overpass.ECMWF) overpass.Average_old = 0.5*(overpass.MERRA + overpass.ECMWF_old) overpass.Average_hybrid = PST.Wind.construct(overpass.Average.speed, overpass.Average_beginning.bearing, overpass.height) overpass.surface = PST.Wind(surf_vector,source.height) overpass.a_elevated = PST.Stability(overpass.Average.speed,0.).a overpass.a_old = a overpass.a = a_corrected overpass.stability_old = stability_class overpass.stability = stability_corrected overpass.source = source overpass.FullFile = full overpass.LiteFile = lite try: obs_mode = full.split('/')[-1].split('_')[1][-2:] except: obs_mode = '' overpass.observation_mode = obs_mode overpass_name = source.short + time.strf8 setattr(self, overpass_name, overpass) self.all.append(overpass) Sources.Sources[key].Overpasses.append(overpass) overpass_list.append(overpass) line=csv.readline() list=line.split(',') overpass.secondary = source.secondary except: raise csv.close() return overpass_list
def find(sources, lat_threshold=_lat_threshold, lon_threshold=_lon_threshold, min_date=None, max_date=None, download=True, min_points=0): """searches all OCO-2 lite files for soundings close to each source in sources. Writes a text file summarizing the overpass (number of observations, time, file names, etc), and writes overpass data to a CSV file. option download will download ECMWF data for the overpass date if we don't already have that data downloaded """ print "Starting search for %d sources" % len(sources) print "Lat threshold: %s" % lat_threshold print "Lon threshold: %s" % lon_threshold print "Minimum points found: %d" % min_points print "Sources being searched:" for src in sources: print src.short print "" if min_date==None and max_date==None: year_list = ['2014','2015','2016'] elif min_date==None: year_list = map(str, range(2014,max_date.year+1)) elif max_date == None: year_list = map(str, range(min_date.year, 2017)) else: year_list = map(str, range(min_date.year, max_date.year + 1)) if min_date==None: min_date = dt.datetime(2014,1,1,0,0) if max_date==None: max_date = dt.datetime(2016,12,31,23,59) fnames = [_summary_file_loc.format(source.short) for source in sources] for s in fnames: dir = os.path.dirname(s) if not os.path.exists(dir): os.mkdir(dir) # use 'with' context manager so if this crashes or is stopped you don't lose all the data! with open(_csv_save_loc,'w') as csv_output: csv_output.write(PST.Overpass.header) with nested(*[open(nm,"w") for nm in fnames]) as open_files: summary_files = {source.short: open_files[i] for i,source in enumerate(sources)} for name,file in summary_files.items(): file.write('Overpasses for %s' % name) file.write('Fields are ID, Date [Year Month Day Hour Minute Second Millisecond], Number of soundings found, Filename\n') for year in year_list: print year for lite_name in os.listdir(lite_dir.format(year)): date = dt.datetime.strptime(lite_name.split('_')[2],'%Y%m%d') if min_date<=date<=max_date: # dict of name:[# nadir, # glint, # target, # other] overpasses all_overpasses={source.short:[0,0,0,0] for source in sources} lite_file = os.path.join(lite_dir.format(year),lite_name) lite_data = File.lite(lite_file) print lite_file lats = lite_data.latitude lons = lite_data.longitude times = lite_data.date lite_id = lite_data.sounding_id mode = lite_data.observation_mode found = [] close_indices = [] for k in range(len(lite_data)): sounding_lat = lats[k] sounding_lon = lons[k] for source in sources: key = source.short lat = source.lat lon = source.lon dlat = abs(sounding_lat-lat) % 360. dlon = abs(sounding_lon-lon) % 360. if dlat<=lat_threshold and dlon<=lon_threshold: if not key in found: close_indices.append(k) found.append(key) print("Found overpass for %s: dlat=%f, dlon=%f" % (source.short,dlat, dlon)) else: pass if mode[k]=="ND": all_overpasses[key][0]+=1 elif mode[k]=="GL": all_overpasses[key][1]+=1 elif mode[k]=="TG": all_overpasses[key][2]+=1 else: all_overpasses[key][3]+=1 for (i,source_name) in zip(close_indices,found): print "Processing %s overpass" % source_name nadir,glint,tg,other = all_overpasses[source_name] total_points = nadir+glint+tg+other if total_points<min_points: print "Only {0} points".format(total_points) else: id = lite_id[i] id_info = 'ID: {0}'.format(id) type_info = 'Nadir: {0}, Glint: {1}, Target: {2}, Transition: {3}'.format(nadir,glint,tg,other) source = Sources.Sources[source_name] time = PST.Time(None,source,time_string=str(times[i])) if download: ECMWF.download(time) full = lite_data.full_file(i) overpass = PST.Overpass.new(source, time, full, lite_file) file_info = 'File: {0}, {1}'.format(lite_file, full) overpass_info=', '.join([id_info,str(times[i]),type_info,file_info]) if full!="": csv_output.write(overpass.write()) summary_file = summary_files[source_name] summary_file.write(overpass_info+'\n') print("Done") print("Saved overpass information to {0}".format(_csv_save_loc)) return _csv_save_loc
def get_gem_highres(time): """Reads 1h Gem Forecast data""" print "\nGathering GEM data..." t_lon = 360. + time.lon if time.lon < 0. else time.lon t_lat = time.lat year = time.year month = time.month day = time.day hour = time.hour minute = time.minute decimal_hour = time.decimaltime hour_minus = time.hour hour_plus = time.hour + 1 time_minus = PST.Time(dt.datetime(year, month, day, hour_minus, 0), time.source) time_plus = PST.Time(time_minus.datetimeobj + dt.timedelta(hours=1), time.source) hour_string_minus = '00' if time_minus.hour < 12 else '12' hour_string_plus = '00' if time_plus.hour < 12 else '12' file_minus = time_minus.strftime(_gemh_fmt).replace("AM", "00").replace( "PM", "12") file_plus = time_plus.strftime(_gemh_fmt).replace("AM", "00").replace( "PM", "12") if (not os.path.isfile(file_minus)) and (not os.path.isfile(file_plus)): err = IOError( "Neither file on either side of the sounding time exists. Unable to read wind data" ) raise err elif (not os.path.isfile(file_minus)) and os.path.isfile(file_plus): print( "File before in time does not exist, but file after in time does; using only time after sounding" ) files = [file_plus, file_plus] elif (not os.path.isfile(file_plus)) and os.path.isfile(file_minus): print( "File after in time does not exist, but file before in time does; using only time before sounding" ) files = [file_minus, file_minus] else: files = [file_minus, file_plus] u_interp = [] v_interp = [] for fname in files: try: rpn = fstd.open(fname) except IOError as e: print "Unable to open file {0}".format(fname) raise e try: U = rpn.UU V = rpn.VV GZ = rpn.GZ except AttributeError as error: print("Unable to read all fields -- see exception raised") print error # <Var 'GZ'> has 160 levels, but U, V only have 80. In tests opening files, # <LogHybrid> from GZ has extra levels compared to it from U and V; test to make sure of this if not numpy.array_equal(U.axes[2][:], GZ.axes[2][1::2]): raise ValueError( "<LogHybrid> from GZ and UU, VV variables don't match up. Look closer at this file" ) return PST.Wind((0, 0), 0) lats = U.lat[:] lons = U.lon[:] lat_row = 0 lon_col = 0 while lats[lat_row] < t_lat: lat_row += 1 while lons[lon_col] < t_lon: lon_col += 1 lat_row -= 1 lon_col -= 1 used_lat = lats[lat_row] used_lon = lons[lon_col] lat_error = abs(t_lat - used_lat) lon_error = abs(t_lon - used_lon) if lat_error > abs(lats[1] - lats[0]) or lon_error > abs(lons[1] - lons[0]): print "Source position :", (t_lat, t_lon) print "Calculated position:", (used_lat, used_lon) raise ValueError( "Rounding error: latitude or longitude was off by more than their resolution" ) U = U[0, 0, :, lat_row, lon_col] V = V[0, 0, :, lat_row, lon_col] GZ = GZ[0, 0, 1::2, lat_row, lon_col] i = 0 while i < len(GZ): if GZ[i] < time.height: h1 = GZ[i] h2 = GZ[i - 1] break i += 1 else: h1 = h2 = time.height u1 = U[i] / 3.6 u2 = U[i - 1] / 3.6 v1 = V[i] / 3.6 v2 = V[i - 1] / 3.6 u = numpy.interp(time.height, [h1, h2], [u1, u2]) v = numpy.interp(time.height, [h1, h2], [v1, v2]) u_interp.append(u) v_interp.append(v) u, v = map( lambda x: numpy.interp(decimal_hour, [hour_minus, hour_plus], x), [u_interp, v_interp]) return PST.Wind((u, v), time.height)
def get_new_ecmwf(time, interp=True, return_stability=True, return_surface=False): """Reads 0.75 degree 6h ECMWF data""" source = time.source print "\nGathering ECMWF data..." grib_file = _new_ecmwf_fmt step = _new_ecmwf_step year = time.year month = time.month day = time.day hour = time.hour minute = time.minute tlon = source.lon % 360 tlat = source.lat hour_minus = int(step * (time.decimaltime // step)) delta_minus = dt.timedelta(hours=(hour_minus - hour), minutes=-minute) hour_plus = hour_minus + step delta_plus = dt.timedelta(hours=hour_plus - hour, minutes=-minute) time_minus = PST.Time(time.datetimeobj + delta_minus, source) time_plus = PST.Time(time.datetimeobj + delta_plus, source) decimal_hour = hour + minute / 60. file_minus = time_minus.strftime(grib_file) file_plus = time_plus.strftime(grib_file) time_closest = time.round(step * 3600) file_closest = time_closest.strftime(grib_file) if interp: files = [file_minus, file_plus] hours = [hour_minus, hour_plus] else: files = [file_closest] hours = [decimal_hour] u_interp = [] v_interp = [] usurf_interp = [] vsurf_interp = [] cloud_interp = [] for file_name in files: try: grib = pygrib.open(file_name) except IOError: raise IOError("Could not open file {0}".format(file_name)) except: print "Unexpected Error follows:" raise u_list = grib.select(name='U component of wind') v_list = grib.select(name='V component of wind') Psurface = grib.select(name='Surface pressure')[0].values cloud = grib.select(name="Total cloud cover")[0].values u_surf_list = grib.select(name="10 metre U wind component")[0].values v_surf_list = grib.select(name="10 metre V wind component")[0].values A = u_list[0].pv[:61] / 100. B = u_list[1].pv[61:] lat_ax = u_list[0].latlons()[0][:, 0] lon_ax = u_list[0].latlons()[1][0, :] lat0 = lat_ax[0] lat1 = lat_ax[1] lon0 = lon_ax[0] lon1 = lon_ax[1] dlat = lat1 - lat0 dlon = lon1 - lon0 lat_row = int((tlat - lat0) // dlat) lon_col = int((tlon - lon0) // dlon) lat_used = lat_ax[lat_row] lon_used = lon_ax[lon_col] lat_error = lat_used - tlat lon_error = lon_used - tlon # check if lat, lon are too far from the expected lat, lon if lat_error > 1. or lon_error > 1.: raise ValueError( "Lat/lon disagree by more than the resolution: Errors are ({0}, {1})" .format(lat_error, lon_error)) # print "Using value at ({0}, {1})".format(lat_row,lon_col) stack = source.height H = 7000. p0 = Psurface[lat_row, lon_col] / 100. H_list = [] i = 0 prev = 0 while i < 60: A0 = A[i] A1 = A[i + 1] B0 = B[i] B1 = B[i + 1] Pk0 = A0 + B0 * p0 Pk1 = A1 + B1 * p0 Pk = 0.5 * (Pk0 + Pk1) z = H * math.log(p0 / Pk) if z < stack: h1 = z h2 = prev break i += 1 prev = z else: h1 = z h2 = stack i -= 1 u1 = u_list[i].values[lat_row, lon_col] u2 = u_list[i - 1].values[lat_row, lon_col] v1 = v_list[i].values[lat_row, lon_col] v2 = v_list[i - 1].values[lat_row, lon_col] u = numpy.interp(stack, [h1, h2], [u1, u2]) u_interp.append(u) v = numpy.interp(stack, [h1, h2], [v1, v2]) v_interp.append(v) cloud_fraction = cloud[lat_row, lon_col] cloud_interp.append(cloud_fraction) u_surf = u_surf_list[lat_row, lon_col] usurf_interp.append(u_surf) v_surf = v_surf_list[lat_row, lon_col] vsurf_interp.append(v_surf) u_final = numpy.interp(decimal_hour, hours, u_interp) v_final = numpy.interp(decimal_hour, hours, v_interp) usurf_final = numpy.interp(decimal_hour, hours, usurf_interp) vsurf_final = numpy.interp(decimal_hour, hours, vsurf_interp) cloud_final = numpy.interp(decimal_hour, hours, cloud_interp) surface_wind = PST.Wind((usurf_final, vsurf_final), 0.) stability = PST.Stability(surface_wind.speed, cloud_final) return_values = [PST.Wind((u_final, v_final), stack)] if return_stability: return_values.append(stability) if return_surface: return_values.append(surface_wind) return return_values
def get_ecmwf_highres(time, surface=True): """Reads 0.3 degree ECMWF data""" print "Gathering ECMWF data..." source = time.source step = 1 year = time.year month = time.month day = time.day hour = time.hour minute = time.minute hour_minus = hour hour_plus = hour + 1 time_minus = PST.Time(dt.datetime(year, month, day, hour_minus, 0), source) time_plus = PST.Time(time_minus.datetimeobj + dt.timedelta(hours=1), source) file_minus = time_minus.strftime(_ecmwfh_fmt) file_plus = time_plus.strftime(_ecmwfh_fmt) u_time_interpolate = [] v_time_interpolate = [] u_surface_interp = [] v_surface_interp = [] cloud_fractions = [] lat_resolution, lon_resolution = (0.3, 0.3) for file_name in [file_minus, file_plus]: if not os.path.isfile(file_name): raise IOError('File {0} does not exist'.format(file_name)) try: ecmwf = pygrib.open(file_name) except IOError: print("IOError: Could not open file {0}".format(file_name)) raise except Excption as exc: print "Unexpected error occured" raise exc try: UU = ecmwf.select(name='U component of wind') VV = ecmwf.select(name='V component of wind') P_surface = ecmwf.select(name='Surface pressure')[0] P_surface.expand_grid(0) P_surface = numpy.reshape(P_surface.values[:55800], (150, 372)) cloud_fraction = ecmwf.select(name="Total cloud cover")[0].values U_surface = ecmwf.select( name="10 metre U wind component")[0].values[:-1] V_surface = ecmwf.select( name="10 metre V wind component")[0].values[:-1] except AttributeError: raise AttributeError( "File {0} is missing some expected attributes".format( file_name)) # This data is missing part of the last row ( 33.3 to 33.0 degrees latitude for -109.8 to -64.8 degrees longitude) # so ignore the last row of the data (last 151 values) U = [] V = [] levels = len(UU) half_levels = levels + 1 try: for level in range(len(UU)): UU[level].expand_grid(0) U.append(numpy.reshape((UU[level].values)[:55800], (150, 372))) VV[level].expand_grid(0) V.append(numpy.reshape((VV[level].values)[:55800], (150, 372))) latitudes = numpy.reshape(UU[0].latitudes[:55800], (150, 372))[:, 0] longitudes = numpy.reshape(UU[0].longitudes[:55800], (150, 372))[0] - 360. except: raise IndexError( "Problem reshaping U, V, lat, lon arrays in file {0}".format( file_name)) U = numpy.array(U) V = numpy.array(V) A = UU[0].pv[:half_levels] A /= 100. B = VV[0].pv[half_levels:] lat_row = int((latitudes[0] - time.lat) // lat_resolution) lon_col = int((time.lon - longitudes[0]) // lon_resolution) lat_check = abs(latitudes[lat_row] - time.lat) < 0.3 lon_check = abs(longitudes[lon_col] - time.lon) < 0.3 coord = (latitudes[lat_row], longitudes[lon_col]) if not lat_check or not lon_check: raise ValueError( "Latitude and longitude points are further than the resolution away from the source. Source position ({0},{1}); calculated as ({2},{3})" .format(time.lat, time.lon, latitudes[lat_row], longitudes[lon_col])) p0 = P_surface[lat_row, lon_col] H = 7000. z_prev = 0. k = 0 while k < (levels - 1): A0 = A[k] A1 = A[k + 1] A0 = A[k] A1 = A[k + 1] B0 = B[k] B1 = B[k + 1] Pk0 = A0 + B0 * p0 Pk1 = A1 + B1 * p0 Pk = 0.5 * (Pk0 + Pk1) z = H * math.log(p0 / Pk) if z < time.height: h1 = z h2 = z_prev break k += 1 z_prev = z else: h1 = h2 = time.height u1 = U[k][lat_row, lon_col] u2 = U[k - 1][lat_row, lon_col] v1 = V[k][lat_row, lon_col] v2 = V[k - 1][lat_row, lon_col] u_surface = U_surface[lat_row, lon_col] v_surface = V_surface[lat_row, lon_col] u_interpolated = numpy.interp(time.height, [h1, h2], [u1, u2]) v_interpolated = numpy.interp(time.height, [h1, h2], [v1, v2]) u_time_interpolate.append(u_interpolated) v_time_interpolate.append(v_interpolated) u_surface_interp.append(u_surface) v_surface_interp.append(v_surface) cloud_fractions.append(cloud_fraction[lat_row, lon_col]) u = numpy.interp(time.decimaltime, [hour_minus, hour_plus], u_time_interpolate) v = numpy.interp(time.decimaltime, [hour_minus, hour_plus], v_time_interpolate) # print u,v u_surface = numpy.interp(time.decimaltime, [hour_minus, hour_plus], u_surface_interp) v_surface = numpy.interp(time.decimaltime, [hour_minus, hour_plus], v_surface_interp) surface_wind = PST.Wind( (u_surface, v_surface), 10.) if surface else PST.Wind( (u, v), time.height) wind = PST.Wind((u, v), time.height) if surface: stability = PST.Stability(surface_wind.speed, min(cloud_fractions)) else: stability = PST.Stability(wind.speed, min(cloud_fractions)) return (wind, stability)