def main(organisation, start_dt, end_dt, station_id): ''' This controls all the functions that we have written. It should be generic (i.e. works for all NMSs) :param start_dt: datetime object specifying the start of the period :param end_dt: datetime object specifying the end of the period :param station_id: integer relating to a station (we may add in a geographical search later) :return: Lots of plots in a directory and an html page allowing navigation of the plots TODO: Geographical search of station data. Requires a file that relates station_id to lat/lons TODO: Retrieve UM / WRF model data and plot on the same figure (possibly a reduced set of variables) TODO: Create a web page of plots to allow easy viewing ''' # Set some location-specific defaults settings = config.load_location_settings(organisation) # Get the obs data for st_id in station_id: df = getData(start_dt, end_dt, settings, st_id) # Get the model data # Make the obs only plots plotsettings = { 'plotdir': settings['plot_dir'], 'station_id': st_id, 'start': start_dt, 'end': end_dt } plotStationData(df, plotsettings)
def main(start_date, end_date, obstype, agency): settings = config.load_location_settings(agency) start_date = datetime.strptime(str(start_date), '%Y%m%d') end_date = datetime.strptime(str(end_date), '%Y%m%d') outdir = settings["datadir"] syn_times = ['{:02d}'.format(i) for i in range(0, 24, 3)] temp_times = ['{:02d}'.format(i) for i in range(0, 24, 12)] for single_date in daterange(start_date, end_date): year, month, day = getYMD(single_date) syn_dir = outdir + "/synop/" + year +"/"+ month +"/"+ day temp_dir = outdir + "/temp/" + year +"/"+ month +"/"+ day if not os.path.isdir(syn_dir): mkdir_p(syn_dir) if not os.path.isdir(temp_dir): mkdir_p(temp_dir) if obstype == 'synop': for time in syn_times: url = settings['db_link'] + 'user='******'db_uname'] + '&mode=web&dateRef=' + year + month + day + time + '0000&timeDepth=1H&obsType=SYNOP¶m=TH-PMER-TD-T-N-WIND-HU-TEND-WW_symb-VISI-CL_symb-CM_symb-CH_symb-RAF1-RAF2-TN12-TX12-TMIN10&level=0GRND&format=csv&output=binary&bbox=Indonesia' r = requests.get(url) file_write = syn_dir + "/SYNOP" + year + month + day + time + ".csv" open(file_write, 'wb').write(r.content) mergedata(file_write, obstype) if obstype == 'temp': for time in temp_times: url = settings['db_link'] + 'user='******'db_uname'] + '&mode=web&dateRef=' + year + month + day + time + '0000&timeDepth=1H&obsType=TEMP¶m=TH-T-TD-HU-Z-WIND-TURBUL&level=1050HPA-1000HPA-950HPA-900HPA-850HPA-800HPA-700HPA-600HPA-500HPA-400HPA-300HPA-250HPA-200HPA-150HPA-100HPA-70HPA-50HPA&format=csv&output=binary&bbox=Indonesia' r = requests.get(url) file_write = temp_dir + "/TEMP" + year + month + day + time + ".csv" open(file_write, 'wb').write(r.content) mergedata(file_write, obstype)
def main(start, end, organisation): # Do some downloading settings = config.load_location_settings(organisation) modelcheck = ['analysis', 'ga7', 'km4p4', 'km1p5'] for model_id in modelcheck: filelist = getUM(start, end, model_id, settings)
def main(organisation, dtime): settings = load_location_settings(organisation) ddir = settings['sounding_path'] temp, wind = get_variables(ddir, dtime) lat0 = 14.5812 lon0 = 121.3693 tanay_temp = get_sounding_temp(temp, lat0, lon0) tanay_wind = get_sounding_wind(wind, lat0, lon0)
def main(organisation, start_dt, end_dt, station_id): # Set some location-specific defaults settings = config.load_location_settings(organisation) # Get the obs data for st_id in station_id: input_dict, dates = getData(start_dt, end_dt, settings, st_id) for thisdt in dates: plot_fname = settings[ 'plot_dir'] + '/upper-air/' + thisdt + '_' + str( st_id) + '.png' tephi_plot(st_id, thisdt, input_dict, plot_fname, style_dict=None)
def main(latency, dt_startdt, dt_enddt, plotdomain, region_name, eventname, organisation): # Set some things at the start ... settings = config.load_location_settings(organisation) rootdir = settings['plot_dir'] # region_name = sf.getDomain_bybox(plotdomain).lower() template_file = 'gpm_template.html' outdir = rootdir + region_name + '/' + eventname + '/gpm/' local_dir = outdir #local_dir = os.environ['HOME'] + '/public_html/' + region_name + '/gpm_casestudies/' # if not os.path.isdir(local_dir): # os.makedirs(local_dir) overwrite = True # TODO make plotdomains consistent across all code (perhaps dictionary item rather than a list) domain = [plotdomain[0], plotdomain[2], plotdomain[1], plotdomain[3]] # Change the start and end dates into datetime objects # dt_startdt = datetime.strptime(dt_start, "%Y%m%d") # dt_enddt = datetime.strptime(dt_end, "%Y%m%d") + dt.timedelta(days=1) # Add an extra day so that we include the whole of the last day in the range dt_outhtml = dt_enddt # Make Output dirs mkOutDirs(dt_startdt, dt_enddt, outdir) cube_dom = sf.getGPMCube(dt_startdt, dt_enddt, latency, plotdomain, settings, aggregate=False) cube_dom = addTimeCats(cube_dom[0]) accums = ['30mins', '3hr', '6hr', '12hr', '24hr'] #['12hr', '24hr']# for accum in accums: print(accum) filelist = nrtplt.plotGPM(cube_dom, outdir, domain, overwrite, accum) out_html_file = outdir + dt_outhtml.strftime( "%Y") + '/' + dt_outhtml.strftime( "%m") + '/' + 'gpm_' + accum + '_' + dt_outhtml.strftime( "%Y%m%dT%H%MZ") + '.html' writeHTML(filelist, local_dir, template_file, out_html_file, dt_startdt, dt_enddt, accum, eventname, settings) pt.create_summary_html(settings)
def main(start=None, end=None, bbox=None): ''' This function is callable from the command line. It simply checks on the ftp for data relating to an event, and downloads it if it doesn't exist on the local datadir. The region_name can also be 'RealTime' in which case, the script will check the RealTime folder on the FTP for files. The RealTime folder should contain files from the last ~12 model runs. Older files will be deleted! :param start: datetime :param end: datetime :param bbox: :return: a list of files that are available locally following download ''' settings = config.load_location_settings() if not start: start = settings['start'] if not start: end = settings['end'] if not start: bbox = settings['bbox'] domain = sf.getModelDomain_bybox(bbox) # What files exist locally? local_files = get_local_flist(start, end, settings) # What files exist on the FTP? ftp_files = get_ftp_flist(domain, settings) # Which files are on the ftp, but not the local directory? local_files_basenames = [os.path.basename(fn) for fn in local_files] ftp_notlocal = [ ftpfn for ftpfn in ftp_files if not os.path.basename(ftpfn) in local_files_basenames ] # Download missing files if ftp_notlocal: ftp_download_files(ftp_notlocal, settings) # New list of local files ofilelist = get_local_flist(start, end, settings) return ofilelist
def main(start_date, end_date, agency, station_id): settings = config.load_location_settings(agency) start_date = datetime.strptime(str(start_date), '%Y%m%d') end_date = datetime.strptime(str(end_date), '%Y%m%d') station_id = int(station_id) outdir = settings["datadir"] syn_times = ['{:02d}'.format(i) for i in range(0, 24, 3)] flist = [] for single_date in daterange(start_date, end_date): year, month, day = getYMD(single_date) for time in syn_times: syn_dir = outdir + "/synop/" + year + "/" + month + "/" + day file = syn_dir + "/SYNOP" + year + month + day + time + ".csv" flist.append(file) df = get_data(flist, station_id) plotdata(df, station_id, start_date, end_date)
def main(): plot_scripts = [ 'plot_walkercirculation.py' ] # 'plot_thermodynamics.py', 'plot_precip.py', 'plot_tephi.py', 'plot_walkercirculation.py'] code_dir = os.getcwd() # Reads the std_domains file and creates extract jobs for each domain (realtime and case study) df = pd.read_csv('std_domains.csv') df2plot = df.loc[df['run_plotting'], :] for row in df2plot.itertuples(): for plt_script in plot_scripts: now = dt.datetime.utcnow().replace(minute=0, second=0, microsecond=0) try: os.environ['start'] = dt.datetime.strptime( row.start, '%Y%m%d').strftime('%Y%m%d%H%M') except: os.environ['start'] = ( now - dt.timedelta(days=7)).strftime('%Y%m%d%H%M') try: os.environ['end'] = dt.datetime.strptime( row.end, '%Y%m%d').strftime('%Y%m%d%H%M') except: os.environ['end'] = now.strftime('%Y%m%d%H%M') os.environ['region_name'] = row.region_name os.environ['location_name'] = row.location_name os.environ['bbox'] = row.bbox os.environ['ftp_upload'] = str(row.ftp_upload) settings = config.load_location_settings() if not settings['organisation'] == 'UKMO': # Run python script subprocess.Popen(['python', plt_script]) else: shell_script = code_dir + '/batch_output/run_plots_' + row.location_name + '_' + row.start + '_' + plt_script.replace( '.py', '') + '.sh' with open(shell_script, 'w') as the_file: the_file.write('#!/bin/bash -l\n') the_file.write('#SBATCH --qos=long\n') the_file.write('#SBATCH --mem=10000\n') the_file.write('#SBATCH --output=' + shell_script.replace('.sh', '_%j_%N.out') + '\n') the_file.write('#SBATCH --time=4320 \n') the_file.write('\n') the_file.write('. ~/.profile\n') the_file.write('export start=' + os.environ["start"] + '\n') the_file.write('export end=' + os.environ["end"] + '\n') the_file.write('export region_name=' + os.environ["region_name"] + '\n') the_file.write('export location_name=' + os.environ["location_name"] + '\n') the_file.write('export bbox=' + os.environ["bbox"] + '\n') the_file.write('export ftp_upload=' + os.environ["ftp_upload"] + '\n') the_file.write('conda activate scitools\n') the_file.write('\n') the_file.write('cd ' + code_dir + '\n') the_file.write('echo Running: ' + plt_script + '\n') the_file.write('python ' + plt_script + '\n') st = os.stat(shell_script) os.chmod(shell_script, st.st_mode | stat.S_IEXEC) print(shell_script) subprocess.run(['sbatch', shell_script], capture_output=True)
start_date # Formatted as YYYYMMDD end_date # Formatted as YYYYMMDD [PAGASA|BMKG|MMD|UKMO|Andy-MacBook] Example: python downloadGPM.py auto 20191103 20191105 Andy-MacBook ''' agency = os.environ['organisation'] # UKMO or PAGASA or BMKG or MMD or Andy-MacBook now = dt.datetime.utcnow() # NB: 'auto' latency means that the most scientifically robust dataset is chosen latency = 'auto' # Can be either 'production', 'NRTlate' or 'NRTearly' or 'all' or 'auto' settings = config.load_location_settings() try: start_date = settings['start'] except IndexError: start_date = now.date() - dt.timedelta(days=7) try: end_date = settings['end'] except IndexError: end_date = now.date() # Decide which latency to run the program with if settings['gpm_latency'] == 'all': for l in ['production', 'NRTlate', 'NRTearly']: settings['gpm_latency'] = l
def main(organisation): settings = config.load_location_settings(organisation) url = create_summary_html(settings) print('Created: ', url)
def main(start=None, end=None, region_name=None, location_name=None, bbox=None, model_ids=None, ftp_upload=None): ''' Loads data and runs all the precip plotting routines. The following variables are picked up from the settings dictionary :param start: datetime for the start of the case study :param end: datetime for the end of the case study :param region_name: String. Larger region E.g. 'SEAsia' or 'EastAfrica' :param location_name: String. Zoom area within the region. E.g. 'PeninsularMalaysia' :param bbox: List. Format [xmin, ymin, xmax, ymax] :param model_ids: list of model_ids :param ftp_upload: boolean :return lots of plots ''' settings = config.load_location_settings() if not start: start = settings['start'] if not end: end = settings['end'] if not region_name: region_name = settings['region_name'] if not location_name: location_name = settings['location_name'] if not bbox: bbox = settings['bbox'] if not model_ids: model_ids = settings['model_ids'] if not ftp_upload: ftp_upload = settings['ftp_upload'] # Gets all the stash codes tagged as share stashdf = sf.get_default_stash_proc_codes(list_type='long') # Expand the bbox slightly to account for wind plots needing a larger area bbox = (bbox + np.array([-2, -2, 2, 2])).tolist() # In case no model_ids are specified in the csv table (unlikely!) if not model_ids: model_ids = sf.getModels_bybox(bbox)['model_list'] # Gets the large scale bbox name (either 'SEAsia', 'Africa', or 'global') domain = sf.getModelDomain_bybox(bbox) full_model_ids_avail = sf.getModelID_byJobID(sf.getJobID_byDateTime(start, domain=domain, searchtxt=model_ids), searchtxt=model_ids) regbbox = sf.getBBox_byRegionName(domain) # Set ftp path etc ftp_list = [] ftp_path = '/WCSSP/'+region_name+'/'+location_name+'/' remove_old = True # km1p5 data is too big to send in realtime full_model_ids_avail = [mi for mi in full_model_ids_avail if not 'km1p5' in mi] for model_id in full_model_ids_avail: for row in stashdf.itertuples(index=False): print(model_id, row.stash, row.lbproc) # For this model, get all the available start and end datetimes if model_id == 'analysis': ana_start = start.replace(hour=0, minute=0) - dt.timedelta(days=1) init_times = sf.make_timeseries(ana_start, end, 6) else: init_times = sf.getInitTimes(start, end, domain, model_id=model_id) # Decides whether we want tropics, region or event domains for this model_id / stash / lbproc combination bboxes = domain_size_decider(row, model_id, regbbox, bbox) # Checks whether files exist for this combination of region / location / init_time / model_id / stash / lbproc extract_to_scratch, to_postprocess, remove_from_scratch, ftp_list = check_ofiles(init_times, row.stash, row.lbproc, bboxes, model_id, ftp_list, settings) for e2s in extract_to_scratch: try: if model_id == 'analysis': filelist_models = sf.selectAnalysisDataFromMass(e2s['it'], e2s['it'], row.stash, lbproc=row.lbproc, lblev=row.levels) else: filelist_models = sf.selectModelDataFromMASS([e2s['it']], e2s['stash'], lbproc=e2s['lbproc'], lblev=row.levels, domain=sf.getModelDomain_bybox(bbox), plotdomain=bbox, modelid_searchtxt=model_id) except: pass for scratchfile in to_postprocess: if os.path.isfile(scratchfile): ofilelist = post_process(start, end, bboxes, scratchfile, row, settings) if ofilelist: ftp_list.extend(ofilelist) for scratchfile in remove_from_scratch: if os.path.isfile(scratchfile): print(' Removing:', scratchfile) os.remove(scratchfile) try: # For this model_id, check for temp and RH files in the output dir, and if present, # calculate thermodynamic indices, and add to ftp_list indices_list = check_indices(init_times, model_id, settings) if indices_list: ftp_list.extend(indices_list) except: pass if ftp_upload and ftp_list: sf.send_to_ftp(ftp_list, ftp_path, settings, removeold=remove_old)
def main(start=None, end=None, region_name=None, location_name=None, bbox=None): ''' Loads data and runs all the precip plotting routines. The following variables are picked up from the settings dictionary :param start: datetime for the start of the case study :param end: datetime for the end of the case study :param region_name: String. Larger region E.g. 'SEAsia' or 'EastAfrica' :param location_name: String. Zoom area within the region. E.g. 'PeninsularMalaysia' :param bbox: List. Format [xmin, ymin, xmax, ymax] :return lots of plots ''' settings = config.load_location_settings() if not start: start = settings['start'] if not end: end = settings['end'] if not region_name: region_name = settings['region_name'] if not location_name: location_name = settings['location_name'] if not bbox: bbox = settings['bbox'] # Get the region plot bbox # NB: You can add to this by adding your own REGIONAL item to the dictionary in sf.getBBox_byRegionName reginfo = sf.getRegionBBox_byBBox(bbox) region_bbox = reginfo[ 'region_bbox'] # sf.getBBox_byRegionName(sf.getModelDomain_bybox(bbox)) # Make the start at 0000UTC of the first day and the end 0000UTC the last day start = start.replace(hour=0, minute=0, second=0, microsecond=0) end = (end + dt.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0) # Set model ids to plot (by checking the available data on disk) full_file_list = get_local_flist(start, end, settings, region_type='event') model_ids = list( set([os.path.basename(fn).split('_')[1] for fn in full_file_list ])) # ['analysis', 'ga7', 'km4p4', 'km1p5'] # Time aggregation periods for all plots timeaggs = [1, 3, 6, 12, 24] # 72, 96, 120 # The names used to describe the plot area bbox_name = location_name region_bbox_name = reginfo['region_name'] # Make an empty list for storing precip png plots ofiles = [] # Run plotting functions ofiles = plot_postage(start, end, timeaggs, model_ids, region_name, location_name, bbox, bbox_name, settings, ofiles) ofiles = plot_gpm(start, end, timeaggs, region_name, location_name, bbox, bbox_name, settings, ofiles) ofiles = plot_gpm(start, end, timeaggs, region_name, location_name, region_bbox, region_bbox_name, settings, ofiles) ofiles = plot_regional_plus_winds(start, end, model_ids, region_name, location_name, region_bbox, region_bbox_name, settings, ofiles) html.create(ofiles)
def main(start=None, end=None, region_name=None, location_name=None, model_ids=None): ''' Runs code to plot the large scale tropical circulation using the UM analysis :param start: datetime. Event start :param end: datetime. Event end :param region_name: :param location_name: :param model_ids: list. Could include 'analysis' or 'global' :return png files in the plot directory for the region_name ''' settings = config.load_location_settings() if not start: start = settings['start'] if not end: end = settings['end'] if not region_name: region_name = settings['region_name'] if not location_name: location_name = settings['location_name'] if not model_ids: model_ids = settings['model_ids'] analysis_incr = 6 model_ids = ['analysis'] ofiles = [] lat_ranges = [(-5, 5), (5, 15), (-10, 10)] for model_id in model_ids: analysis_datetimes = sf.make_timeseries(start, end, analysis_incr) for this_dt in analysis_datetimes: # Format this datetime this_dt_fmt = this_dt.strftime('%Y%m%dT%H%MZ') print('Walker Circulation plotting:',this_dt_fmt) vars = ['Uwind-levels', 'Vwind-levels', 'Wwind-levels'] data = load_data.unified_model(this_dt - dt.timedelta(hours=24), this_dt, settings, region_type='tropics', model_id=model_id, var=vars, aggregate=True, totals=False) try: k = list(data.keys())[0] # Gets the model_id recorded in the data dictionary except: continue # Make sure we have data for all vars dks = data[k].keys() if not set(dks).issuperset(set(vars)): continue u = data[k]['Uwind-levels'] v = data[k]['Vwind-levels'] w = data[k]['Wwind-levels'] if u and v and w: for lats in lat_ranges: # Make nice strings of the lat min and max lat0 = str(abs(lats[0])) + 'S' if lats[0] < 0 else str(abs(lats[0])) + 'N' lat1 = str(abs(lats[1])) + 'S' if lats[1] < 0 else str(abs(lats[1])) + 'N' # Set the output file # region_name, location_name, validtime, modelid, timeagg, plottype, plotname, fclt, outtype='filesystem' ofile = sf.make_outputplot_filename(region_name, location_name, this_dt, model_id, 'Tropics-'+lat0+'-to-'+lat1, 'Instantaneous', 'large-scale', 'walker-circulation', 'T+0') try: if not os.path.isfile(ofile): print('Plotting:', ofile) plot_walker(u, v, w, ofile, lats=lats) # Append to list of ofiles ofiles.append(ofile) except: continue # If this is a realtime plot, make a symbolic link to most recent files in ofiles if end > (dt.datetime.utcnow() - dt.timedelta(days=2)): make_symlinks(ofiles) # Make the html file so that the images can be viewed html.create(ofiles)
def main(start=None, end=None, region_name=None, location_name=None, bbox=None, model_ids=None, ftp_upload=None): settings = config.load_location_settings() if not start: start = settings['start'] if not end: end = settings['end'] if not region_name: region_name = settings['region_name'] if not location_name: location_name = settings['location_name'] if not bbox: bbox = settings['bbox'] if not model_ids: model_ids = settings['model_ids'] if not ftp_upload: ftp_upload = settings['ftp_upload'] overwrite = True satellite, area, area_name, productid, proj4string, img_bnd_coords = getAutoSatDetails( bbox) sat_scratch = settings['scratchdir'].rstrip('/') + '/ModelData/autosat' sat_datadir = settings['datadir'].rstrip( '/') + '/satellite_olr/' + region_name + '/' + location_name # Loop through dates delta = dt.timedelta(days=1) this_dt = start while this_dt <= end: print(this_dt.strftime('%Y-%m-%d')) ocube_fn = sat_datadir + '/' + this_dt.strftime( '%Y%m') + '/' + productid + '_' + this_dt.strftime( '%Y%m%d') + '.nc' if not os.path.isdir(os.path.dirname(ocube_fn)): os.makedirs(os.path.dirname(ocube_fn)) if (not os.path.isfile(ocube_fn)): # and (len(ifiles) > 0): print(' Create a daily netcdf file for ', this_dt.strftime('%Y-%m-%d')) # Extract the data from the MASS archive, and return a sorted list of files ... ifiles = getOLR(this_dt, this_dt + delta, satellite, area, productid, sat_scratch) # Now loop through ifiles cubes = iris.cube.CubeList([]) for file in ifiles: outtiff = os.path.splitext(file)[0] + '_ll.tif' if file.endswith('.nc'): # For SE Asia, netcdf files are already archived in lat/lon cubetmp = iris.load_cube(file) u = cf_units.Unit('hours since 1970-01-01 00:00:00', calendar=cf_units.CALENDAR_STANDARD) timecoord = iris.coords.DimCoord( cubetmp.coord('time').points[0], standard_name='time', units=u) array = cubetmp.data[np.newaxis, ...] cubell = iris.cube.Cube(array, dim_coords_and_dims=[ (timecoord, 0), (cubetmp.coord('latitude'), 1), (cubetmp.coord('longitude'), 2) ]) elif not os.path.exists(outtiff) or overwrite: # Projects and converts png or jpg to geotiff cubell = projectToLatLong(file, outtiff, img_bnd_coords, proj4string) else: # If the projection and conversion are already done, convert to a cube timestamp = os.path.basename(outtiff).split('.')[0].split( '_')[1] cubell = sf.gdalds2cube(outtiff, timestamp) # Add the resulting cube to a list of cubes for this day cubes.append(cubell) # Concatenate everything together into a cube cube = cubes.concatenate_cube() print(' ... Saving netcdf file') # The following saves as int16 (rather than float), which cuts the filesize by 50%, and means it saves much more quickly # cube.data = (cube * 10).data.astype(np.int16) iris.save(cube, ocube_fn, zlib=True) # Remove all files if os.path.isfile(ocube_fn): print(' ... Removing temporary files') for fn in ifiles: os.remove(fn) # png, jpg or nc files try: os.remove( os.path.splitext(os.path.basename(fn))[0] + '_ll.tif') except: continue this_dt += delta
def main(start_dt=None, end_dt=None, region_name=None, location_name=None, bbox=None, model_ids=None): settings = config.load_location_settings() if not start_dt: start_dt = settings['start'] if not end_dt: end_dt = settings['end'] if not region_name: region_name = settings['region_name'] if not location_name: location_name = settings['location_name'] if not bbox: bbox = settings['bbox'] if not model_ids: model_ids = settings['model_ids'] # Get Data obsdata, stations = getObsData(start_dt, end_dt, bbox, settings) modeldata = getModelData(start_dt, end_dt, bbox, stations, settings, model_id=model_ids) if not isinstance(modeldata, pd.DataFrame): print('No model data is available') return data2plot = merge_data_for_plotting(obsdata, modeldata) # Plot map of stations map_plot_fname = settings['plot_dir'] + region_name + '/' + location_name + '/upper-air/station_map.png' sf.make_outputplot_filename(region_name, location_name, start_dt, 'Radiosonde', 'Station Map', 'Instantaneous', 'upper-air', 'tephigram', 'T+0') plot_station_map(stations, bbox, map_plot_fname) # Output list of filenames for html page ofiles = [] # Loop through station ID(s) for i, station in stations.iterrows(): # Get the obs data print(station['name'] + ', ' + station['territory']) stn_id = station['wigosStationIdentifier'] # Get a list of unique datetimes datesnp = pd.unique(data2plot['datetimeUTC']) dates = sorted(pd.DatetimeIndex(datesnp).to_pydatetime()) # Converts numpy.datetime64 to datetime.datetime for thisdt in dates: print(thisdt) this_dt_fmt = thisdt.strftime('%Y%m%dT%H%MZ') # Create a boolean list of records for this station and datetime stndt = (data2plot.station_id == str(stn_id)) & (data2plot.datetimeUTC == thisdt) # Plot just the observation asubset = data2plot.loc[stndt & (data2plot.model_id == 'observation')] if not asubset.empty: plot_fname = sf.make_outputplot_filename(region_name, location_name, thisdt, 'Radiosonde', station['name'], 'Instantaneous', 'upper-air', 'tephigram', 'T+0') if not os.path.isfile(plot_fname): try: tephi_plot(station, thisdt, asubset, plot_fname) except: print('Unable to plot', plot_fname) if os.path.isfile(plot_fname): ofiles.append(plot_fname) # Plot observation + analysis obsana = ((data2plot.model_id == 'observation') | (data2plot.model_id == 'analysis')) asubset = data2plot.loc[stndt & obsana] if not asubset.empty: plot_fname = sf.make_outputplot_filename(region_name, location_name, thisdt, 'Radiosonde+Analysis', station['name'], 'Instantaneous', 'upper-air', 'tephigram', 'T+0') if not os.path.isfile(plot_fname): try: tephi_plot(station, thisdt, asubset, plot_fname) except: print('Unable to plot', plot_fname) if os.path.isfile(plot_fname): ofiles.append(plot_fname) # Plot observation + analysis + models @ multiple lead times (T+0-24, 24-48, 48-72, 72-96, 96-120) fclts = np.arange(0, 120, 24) for fclt_start in fclts: fclt_end = fclt_start + 24 fc = (data2plot.fcast_lead_time > fclt_start) & (data2plot.fcast_lead_time <= fclt_end) & (data2plot.model_id != 'analysis') asubset = data2plot.loc[stndt & (fc | obsana)] if not asubset.empty: plot_fname = sf.make_outputplot_filename(region_name, location_name, thisdt, 'All-Models', station['name'], 'Instantaneous', 'upper-air', 'tephigram', 'T+'+str(fclt_end)) if not os.path.isfile(plot_fname): try: tephi_plot(station, thisdt, asubset, plot_fname) except: print('Unable to plot', plot_fname) if os.path.isfile(plot_fname): ofiles.append(plot_fname) # Plot observation + analysis + models @ all lead times asubset = data2plot.loc[stndt] if not asubset.empty: plot_fname = sf.make_outputplot_filename(region_name, location_name, thisdt, 'All-Models', station['name'], 'Instantaneous', 'upper-air', 'tephigram', 'All-FCLT') if not os.path.isfile(plot_fname): try: tephi_plot(station, thisdt, asubset, plot_fname) except: print('Unable to plot', plot_fname) if os.path.isfile(plot_fname): ofiles.append(plot_fname) html.create(ofiles)
def main(latency, start_date, end_date, agency): product = 'imerg' # This shouldn't change # change the accounts settings = config.load_location_settings(agency) outdir = settings['gpm_path'] server = { 'production': ['arthurhou.pps.eosdis.nasa.gov', settings['gpm_username'], '.HDF5'], 'NRTlate': ['jsimpson.pps.eosdis.nasa.gov', settings['gpm_username'], '.RT-H5'], 'NRTearly': ['jsimpson.pps.eosdis.nasa.gov', settings['gpm_username'], '.RT-H5'] } var = 'precipitationCal' # Shouldn't need to change anything below here .. first_date = dt.datetime(2000, 6, 1) # end_date = dt.datetime.strptime(end_date, '%Y%m%d') # start_date = end_date - dt.timedelta(days) if start_date < first_date: start_date = first_date # Loop through dates for single_date in daterange(start_date, end_date): print(latency + ' : ' + single_date.strftime("%Y-%m-%d")) year, month, day = getYMD(single_date) sfilepath = { 'production': '/gpmdata/' + year + '/' + month + '/' + day + '/imerg/3B-HHR.MS.MRG.3IMERG.', 'NRTlate': '/NRTPUB/imerg/late/' + year + month + '/3B-HHR-L.MS.MRG.3IMERG.' + year + month + day, 'NRTearly': '/NRTPUB/imerg/early/' + year + month + '/3B-HHR-E.MS.MRG.3IMERG.' + year + month + day } # +'.*.RT-H5' rawdata_dir = outdir.rstrip( '/' ) + '/rawdata/' + product + '/' + latency + '/' + year + '/' + month + '/' + day netcdf_dir = outdir.rstrip( '/') + '/netcdf/' + product + '/' + latency + '/' + year + '/' ofile_test = netcdf_dir.rstrip( '/' ) + '/gpm_' + product + '_' + latency + '_*_' + year + month + day + '.nc' ofile_part_test = netcdf_dir.rstrip( '/' ) + '/gpm_' + product + '_' + latency + '_*_' + year + month + day + '_part.nc' ofileq_test = netcdf_dir.rstrip( '/' ) + '/gpm_' + product + '_' + latency + '_*_' + year + month + day + '_quality.nc' ofileq_part_test = netcdf_dir.rstrip( '/' ) + '/gpm_' + product + '_' + latency + '_*_' + year + month + day + '_quality_part.nc' if not os.path.isdir(rawdata_dir): mkdir_p(rawdata_dir) if not os.path.isdir(netcdf_dir): mkdir_p(netcdf_dir) rawdatafiles = glob.glob(rawdata_dir + '/3B-HHR*' + server[latency][2]) if ((len(glob.glob(ofile_test)) == 1) or (len(glob.glob(ofile_part_test)) == 2)) & (len( glob.glob(ofileq_test)) == 1) or (len( glob.glob(ofileq_part_test)) == 2): print(' Nothing to do') else: # Do everything if not len(rawdatafiles) == 48: serverpath = sfilepath[latency] thisserver = server[latency] downloadftp(rawdata_dir, thisserver, serverpath, settings) rawdatafiles = glob.glob(rawdata_dir + '/3B-HHR*' + server[latency][2]) versions = [ os.path.basename(rdf).split('.')[6] for rdf in rawdatafiles ] for curVer in set(versions): print(' ', curVer) ofile = netcdf_dir + 'gpm_' + product + '_' + latency + '_' + curVer + '_' + year + month + day + '.nc' ofile_part = netcdf_dir + 'gpm_' + product + '_' + latency + '_' + curVer + '_' + year + month + day + '_part.nc' ofileq = netcdf_dir + 'gpm_' + product + '_' + latency + '_' + curVer + '_' + year + month + day + '_quality.nc' ofileq_part = netcdf_dir + 'gpm_' + product + '_' + latency + '_' + curVer + '_' + year + month + day + '_quality_part.nc' if os.path.isfile(ofile) & os.path.isfile(ofileq): print(' Nothing to do for this version') else: rawdatafiles = glob.glob(rawdata_dir + '/3B-HHR*' + curVer + server[latency][2]) # If we now have a complete list of netcdf files, make the daily netcdf if len(rawdatafiles) == 48: print(' Merging all 48 files ...') mergeGPM(rawdatafiles, ofile, year, month, day, var, curVer, latency) if os.path.isfile(ofile_part): os.remove(ofile_part) print( ' Creating quality flag for all 48 files ...') calcQuality(rawdatafiles, ofileq, year, month, day, curVer, latency) if os.path.isfile(ofileq_part): os.remove(ofileq_part) # If we don't have a complete set of obs for the day, let's still make a netcdf file ... if 0 < len(rawdatafiles) < 48: print(' Merging', len(rawdatafiles), 'files ...') mergeGPM(rawdatafiles, ofile_part, year, month, day, var, curVer, latency) print(' Creating quality flag for', len(rawdatafiles), 'files ...') calcQuality(rawdatafiles, ofileq_part, year, month, day, curVer, latency) if len(rawdatafiles) == 0: print(' No raw data files to process') # Tidy up if (os.path.isfile(ofile)) & (os.path.isfile(ofileq)) & ( len(rawdatafiles) > 0): for f in rawdatafiles: os.remove(f)
def main(dt_start, dt_end, timeagg, plotdomain, statdomain, organisation, region_name, eventname, searchlist=None, overwrite=False): print('Time aggregation: ', timeagg) settings = config.load_location_settings(organisation) domain = sf.getDomain_bybox(plotdomain) datadir = '/data/users/hadhy/CaseStudies/' odir = datadir + domain.lower() + '/' html_odir = odir + eventname png_odir = html_odir + '/plots/' os.makedirs(png_odir, exist_ok=True) pngfilelist = [] modellist = sf.getModels_bybox(plotdomain)['model_list'] if searchlist: modellist = [ml for ml in modellist if ml in searchlist] # Loop through all timeagg-hour segments within the datetime range start = dt_start end = start + dt.timedelta(hours=timeagg) while end <= dt_end: print(start, ' to ', end) # Load GPM Data try: gpm_prod_data, gpm_prod_qual = sf.getGPMCube( start, end, 'production', plotdomain) except: print('No GPM Production data available') try: gpm_late_data, gpm_late_qual = sf.getGPMCube( start, end, 'NRTlate', plotdomain) except: print('No GPM NRT Late data available') try: gpm_early_data, gpm_early_qual = sf.getGPMCube(start, end, 'NRTearly', plotdomain, aggregate=True) except: print('No GPM NRT Early data available') gpmdict = { "gpm_prod_data": gpm_prod_data if 'gpm_prod_data' in locals() else None, "gpm_prod_qual": gpm_prod_qual if 'gpm_prod_qual' in locals() else None, "gpm_late_data": gpm_late_data if 'gpm_late_data' in locals() else None, "gpm_late_qual": gpm_late_qual if 'gpm_late_qual' in locals() else None, "gpm_early_data": gpm_early_data if 'gpm_early_data' in locals() else None, "gpm_early_qual": gpm_early_qual if 'gpm_early_qual' in locals() else None } jobid = sf.getJobID_byDateTime(start, domain=domain, choice='newest') modelcubes = {} for mod in modellist: stash = sf.getPrecipStash(mod, type='short') lbproc = 128 # args: loadModelData(start, end, stash, plotdomain, timeagg, model_id, jobid, odir, lbproc, overwrite=False print("Overwrite: ", overwrite) # start, end, stash, plotdomain, searchtxt = None, lbproc = 0, overwrite = False try: modelcubes[mod] = sf.loadModelData(start, end, stash, plotdomain, mod, lbproc, aggregate=True, overwrite=overwrite) except: continue # Do plotting # 1) Plot GPM #plotGPM(gpmdict, timeagg, odir) # 2) Plot GPM vs One Model for key in modelcubes.keys(): pngfile = plotOneModel(gpmdict, modelcubes, key, timeagg, plotdomain, png_odir) pngfilelist.append(pngfile) # 3) GPM vs All Models #plotAllData(gpmdict, modelcubes, models2plot=[ga6, ga7, ra1_4p4, ra1_1p5_mal], timeagg, odir) # Set next start and end datetimes start = start + dt.timedelta(hours=timeagg) end = start + dt.timedelta(hours=timeagg) outhtml = makeHTML_fromimages_indir(dt_start, dt_end, domain, pngfilelist, html_odir, eventname, 'timeagg' + str(timeagg) + 'hrs')