def main(indata="ghcnd", diagnostics=False): """ Extract relevant dataset from the command line switchs :param str indata: input dataset to process :param bool diagnostics: output diagnostic information """ # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # if dataset selected and in the list of available, then run if indata in names: process_dataset(all_datasets[names == indata][0], diagnostics=diagnostics) # fail gracefully else: print("data name not available: {}\n".format(indata)) print("available data names: {}".format(" ".join(names))) return # main
def main(indata="acre", diagnostics=False): """ Extract relevant dataset from the command line switchs :param str indata: input dataset to process :param bool diagnostics: output diagnostic information """ # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # if dataset selected and in the list of available, then run if indata in names: dataset = all_datasets[names == indata][0] if dataset.name in ["acre"]: # need to run ACRE as base period set separately process_dataset(dataset, diagnostics=diagnostics) elif dataset.base_period == "00-00": # the input dataset is raw observations, run by climpact, so no need # as will have matched HadEX3 base period pass elif utils.match_reference_period(dataset.base_period): # the input datasets's reference period matches that of HadEX3 version pass else: # base period of input dataset doesn't match HadEX3, so remove # appropriate indices process_dataset(dataset, diagnostics=diagnostics) return # main
def main(index="TX90p", diagnostics=False, qc_flags=""): """ The main DLS function :param str index: which index to run :param bool diagnostics: extra verbose output :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M """ if index in utils.MONTHLY_INDICES: nmonths = 13 timescale = "MON" else: nmonths = 1 timescale = "ANN" # move this up one level eventually? all_datasets = utils.get_input_datasets() # spin through all datasets stations = np.array([]) for dataset in all_datasets: try: ds_stations = utils.read_inventory(dataset, subdir="formatted/indices", final=True, timescale=timescale, index=index, qc_flags=qc_flags) good_stations = utils.select_qc_passes(ds_stations, qc_flags=qc_flags) stations = np.append(stations, good_stations) print("Adding {} ({} stations), nstations = {}".format( dataset.name, len(good_stations), len(stations))) except IOError: # file missing print("No stations with data for {}".format(dataset.name)) nstations = len(stations) # array of lats and lons for calculation of separations all_locations = np.array([[stn.latitude, stn.longitude] for stn in stations]) # get the separations (km, radians) stn_separation, stn_angle = get_separations(stations, all_locations) # assign stations to bands StationBands = assign_to_latitude_bands(stations) # read in all the station data all_data = get_all_data(stations, index, timescale, nyears, nmonths) # set up the DLS defaults bins = np.arange(0, MAX_SEPARATION + BIN_WIDTH, BIN_WIDTH) all_dls = np.zeros([len(utils.LAT_BANDS), nmonths]) all_dls[:] = utils.DEFAULT_DLS # now spin through all latitude bands and months. for lb, band in enumerate(utils.LAT_BANDS): stations_in_bands, = np.where(StationBands == lb) if len(stations_in_bands) <= 30: # insufficient stations within this latitude band, next band if diagnostics: print("Index {}, Band {} to {}".format(index, band[0], band[1])) print("Number of stations {}".format(len(stations_in_bands))) print("Ann, Jan -- Dec, DLS = {} km".format(utils.DEFAULT_DLS)) # spin through months to remove old plots if they exist for month in range(nmonths): if os.path.exists( os.path.join( utils.PLOTLOCS, "DLS", "DLS_{}_{}_{}to{}.png".format( index, month_names[month], band[0], band[1]))): os.remove( os.path.join( utils.PLOTLOCS, "DLS", "DLS_{}_{}_{}to{}.png".format( utils.PLOTLOCS, index, month_names[month], band[0], band[1]))) continue print("{}, # stations {}".format(band, len(stations_in_bands))) # process each month for month in range(nmonths): print(month_names[month]) month_data = all_data[stations_in_bands, :, month] names = [s.id for s in stations[stations_in_bands]] # get the separation and correlation for each cross pair # correlations only from 1951 (match HadEX2) cor_yr = 1951 - utils.STARTYEAR.year seps, cors = separations_and_correlations( month_data[:, cor_yr:], stn_separation[stations_in_bands, :][:, stations_in_bands], names, diagnostics=diagnostics) if len(seps) == 0 and len(cors) == 0: # then none of the available stations either had sufficient overlapping data # or values at that particular point (correlations of lots of zeros doesn't mean anything) # so escape and go on to next month if diagnostics: print("Index {}, Band {} to {}, month {}".format( index, band[0], band[1], month_names[month])) print("Number of stations {}".format( len(stations_in_bands))) print( "Likely that all values for this index, month and band are zero\n hence correlations don't mean anything" ) print("Using default DLS = {}km".format(utils.DEFAULT_DLS)) else: print("No stations, {} - {} DLS = {} km".format( band, month_names[month], utils.DEFAULT_DLS)) continue # get the bins bin_assignment = np.digitize( seps, bins, right=True) # "right" means left bin edge included bin_centers = bins - BIN_WIDTH / 2. # average value for each bin if sufficient correlations to do so. means = np.zeros(len(bins)) sigmas = np.zeros(len(bins)) for b, bin in enumerate(bins): locs, = np.where(bin_assignment == b) if len(locs) > MIN_PER_BIN: # means[b] = np.ma.mean(cors[locs]) means[b] = np.ma.median(cors[locs]) sigmas[b] = np.ma.std(cors[locs]) # print(bin, means[b], len(locs), cors[locs]) # raw_input("stop") filled_bins, = np.where(means != 0) # if sufficient bins are filled then fit the curve if len(filled_bins) / float(len(bins)) >= 0.5: if utils.FIX_ZERO: # fix zero bin to be 1.0, and use bin edges, not centres (HadEX2) means[0] = 1. sigmas[0] = sigmas[1] dls, plot_curve, chisq, R2 = exponential_fit(bins, means, sigmas, C=C) else: dls, plot_curve, chisq, R2 = exponential_fit( bin_centers[1:], means[1:], sigmas[1:], C=C) # only take fit if greater than minimum set overall all_dls[lb, month] = np.max([dls, utils.DEFAULT_DLS]) # test at 5% level and 2 or 3 dofs, as per HadEX2 if utils.FIX_ZERO and chisq >= chi2.isf( 0.05, len(bins[sigmas != 0]) - 2): print("inadequately good fit") all_dls[lb, month] = utils.DEFAULT_DLS elif chisq >= chi2.isf(0.05, len(bins[sigmas != 0]) - 3): print("inadequately good fit") all_dls[lb, month] = utils.DEFAULT_DLS # plot the fit if required plt.clf() plt.scatter(seps, cors, c='b', marker='.', alpha=0.1, edgecolor=None) # calculate the 2D density of the data given counts, xbins, ybins = np.histogram2d(seps, cors, bins=50) # make the contour plot (5 levels) plt.contour(counts.transpose(), 5, extent=[ xbins.min(), xbins.max(), ybins.min(), ybins.max() ], linewidths=1, colors='black', linestyles='solid') if utils.FIX_ZERO: plt.plot(bins[sigmas != 0], means[sigmas != 0], 'ro') plt.errorbar(bins[sigmas != 0], means[sigmas != 0], yerr=sigmas[sigmas != 0], fmt="none", ecolor="r") plt.plot(bins, plot_curve, c='cyan', ls='-', lw=2) else: plt.plot(bin_centers[1:][sigmas[1:] != 0], means[1:][sigmas[1:] != 0], 'ro') plt.errorbar(bin_centers[1:][sigmas[1:] != 0], means[1:][sigmas[1:] != 0], yerr=sigmas[1:][sigmas[1:] != 0], fmt="none", ecolor="r") plt.plot(bin_centers[1:], plot_curve, c='cyan', ls='-', lw=2) # plot curve will have been truncated plt.axvline(dls, c='magenta', ls="--", lw=2) plt.axvline(utils.DEFAULT_DLS, c='k', ls=":", lw=1) plt.axvline(utils.MAX_DLS, c='k', ls=":", lw=1) plt.text(dls + 10, 0.95, "dls = {:4.0f}km".format(dls)) plt.text(3010, -0.95, "r2 = {:6.4f}".format(R2)) plt.text(3010, -0.85, "chi2 = {:6.4f}".format(chisq)) plt.text(3010, -0.75, "Nstat = {}".format(len(stations_in_bands))) plt.xlim([-100, 5000]) plt.ylim([-1, None]) plt.xlabel("Separation (km)") plt.ylabel("Correlation") plt.title("{} - {}; {} to {}".format(index, month_names[month], band[0], band[1])) # add text to show what code created this and when if utils.WATERMARK: watermarkstring = "/".join( os.getcwd().split('/')[4:]) + '/' + os.path.basename( __file__) + " " + dt.datetime.strftime( dt.datetime.now(), "%d-%b-%Y %H:%M") plt.figtext(0.01, 0.01, watermarkstring, size=6) if utils.FIX_ZERO: plt.savefig(os.path.join(utils.PLOTLOCS, "DLS", \ "DLS_{}_{}_{}_{}to{}_fixzero.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \ str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])), dpi=300) else: plt.savefig(os.path.join(utils.PLOTLOCS, "DLS", \ "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \ str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])), dpi=300) print("DLS = {:7.2f} km".format(dls)) else: print("insufficient bins for fit ({}/{})".format( len(filled_bins), float(len(bins)))) if os.path.exists(os.path.join(utils.PLOTLOCS, "DLS", \ "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \ str(utils.REF_END)[-2:]), month_names[month], band[0], band[1]))): # remove old plots! os.remove(os.path.join(utils.PLOTLOCS, "DLS", \ "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \ str(utils.REF_END)[-2:]), month_names[month], band[0], band[1]))) # replace those dls < default with default and > max with max all_dls[all_dls < utils.DEFAULT_DLS] = utils.DEFAULT_DLS # interpolate grid_dls = interpolate_dls_to_grid(all_dls, nmonths) # write output file write_dls_file(os.path.join(utils.DLSLOCS, "dls_{}.txt".format(index)), grid_dls, nmonths, month_names) return # main
def main(indata="ghcndex", index="R95pTOT", diagnostics=False): """ Read PRCPTOT and other indices and write out """ # check if need to do monthly ones if index in utils.MONTHLY_INDICES: timescales = ["ANN", "MON"] else: timescales = ["ANN"] # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # if dataset selected and in the list of available, then run if indata in names: dataset = all_datasets[names == indata][0] dataset_stations = utils.read_inventory(dataset, subdir="formatted/indices") # check each station for stn in dataset_stations: if diagnostics: print("{} - {}".format(dataset.name, stn.id)) # for appropriate number of timescales for ts in timescales: if os.path.exists( os.path.join( stn.location, stn.id, "{}_{}_{}.csv".format( stn.id, PARTNERS[index].lower(), ts))) and os.path.exists( os.path.join( stn.location, stn.id, "{}_{}_{}.csv".format( stn.id, "PRCPTOT".lower(), ts))): rtimes, rXXp = utils.read_station_index( stn, PARTNERS[index].lower(), ts) ptimes, prcptot = utils.read_station_index( stn, "PRCPTOT".lower(), ts) match = np.in1d(rtimes, ptimes) match_b = np.in1d(ptimes, rtimes) if len(match) != 0 and len(match_b) != 0: rXXptot = (100 * rXXp) / prcptot rXXptot_times = rtimes[match] if ts == "MON": myears = [] months = [] for y in rXXptot_times: for m in range(1, 13): myears += [y] months += [m] stn.monthly = rXXptot.filled().reshape(-1) stn.myears = myears stn.months = months path = os.path.join( dataset.location, "formatted", "indices", stn.id, "{}_{}_MON.csv".format(stn.id, index.lower())) if not os.path.exists(path): utils.write_station_index(path, stn, index, doMonthly=True) else: stn.years = rXXptot_times stn.annual = rXXptot.filled() path = os.path.join( dataset.location, "formatted", "indices", stn.id, "{}_{}_ANN.csv".format(stn.id, index.lower())) if not os.path.exists(path): utils.write_station_index(path, stn, index) return # main
def main(indata="acre", diagnostics=False): """ Call the R package climpact2 with appropriate settings to calculate the indices :param str indata: name of dataset to process :param bool diagnostics: output diagnostic information """ # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # select the matching one if indata in names: dataset = all_datasets[names == indata][0] # check that there are stations to process for this dataset stations = utils.read_inventory(dataset) if len(stations) != 0: for station in stations: # read the station data infile = os.path.join(dataset.location, "formatted", "{}.txt".format(station.id)) indata = np.genfromtxt(infile) # get the first year and last year ref_start = int(indata[0][0]) ref_end = int(indata[-1][0]) # write a temporary inventory file for just this station utils.write_climpact_inventory_header(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name))) utils.write_climpact_inventory(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), station) try: with utils.cd(utils.CLIMPACT_LOCS): # call the R process - which should automatically do everything and make suitable files etc # runs in subfolder with context manager, so returning to parent once done. # ACRE (and others?) have stations that do not overlap the reference period. # Means that the QC process throws them out if insufficient overlap between data and reference period print(" ".join(["Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES)])) subprocess.check_call(["Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES)]) except subprocess.CalledProcessError: # handle errors in the called executable raise Exception except OSError: # executable not found print("Cannot find Rscript") raise OSError # remove temporary metadata file os.remove(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name))) # fail gracefully else: print("No stations available in {}".format(indata)) print(" Climpact2 not run") # remove plots, qc, thres and trend folders (save space) if utils.REMOVE_EXTRA: for subdir in ["plots", "qc", "thres", "trend"]: shutil.rmtree(os.path.join(dataset.location, "formatted", subdir)) # fail gracefully else: print("data name not available: {}\n".format(indata)) print("available data names: {}".format(" ".join(names))) return # main
def main(index="TX90p", diagnostics=False, qc_flags="", anomalies="None"): """ Read inventories and make scatter plot :param str index: which index to run :param bool diagnostics: extra verbose output :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M :param str anomalies: run code on anomalies or climatology rather than raw data """ if index in utils.MONTHLY_INDICES: timescale = ["ANN", "MON"] else: timescale = ["ANN"] # move this up one level eventually? all_datasets = utils.get_input_datasets() for ts in timescale: # set up the figure fig = plt.figure(figsize=(10, 6.5)) plt.clf() ax = plt.axes([0.025, 0.14, 0.95, 0.90], projection=cartopy.crs.Robinson()) ax.gridlines() #draw_labels=True) ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor="0.9", edgecolor="k") ax.coastlines() # dummy scatters for full extent plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \ edgecolor='w', linewidth='0.01') # run all datasets total = 0 for dataset in all_datasets: try: # choose appropriate subdirectory. if anomalies == "None": subdir = "formatted/indices" elif anomalies == "anomalies": subdir = "formatted/anomalies" elif anomalies == "climatology": subdir = "formatted/climatology" ds_stations = utils.read_inventory(dataset, subdir=subdir, final=True, \ timescale=ts, index=index, anomalies=anomalies, qc_flags=qc_flags) ds_stations = utils.select_qc_passes(ds_stations, qc_flags=qc_flags) except IOError: # file missing print("No stations with data for {}".format(dataset.name)) ds_stations = [] if len(ds_stations) > 0: lats = np.array([stn.latitude for stn in ds_stations]) lons = np.array([stn.longitude for stn in ds_stations]) # and plot scatter = plt.scatter(lons, lats, c=COLOURS[dataset.name], s=15, \ label="{} ({})".format(get_label(dataset.name), len(ds_stations)), \ transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5') total += len(ds_stations) # make a legend leg = plt.legend(loc='lower center', ncol=5, bbox_to_anchor=(0.50, -0.3), \ frameon=False, title="", prop={'size':12}, labelspacing=0.15, columnspacing=0.5, numpoints=3) plt.setp(leg.get_title(), fontsize=12) plt.figtext(0.06, 0.91, "{} Stations".format(total)) plt.title("{} - {}".format(index, ts)) # extra information if utils.WATERMARK: watermarkstring = "{} {}".format(os.path.join("/".join(os.getcwd().split('/')[4:]), os.path.basename(__file__)), dt.datetime.strftime(dt.datetime.now(), "%d-%b-%Y %H:%M")) plt.figtext(0.01, 0.01, watermarkstring, size=6) # plt.figtext(0.03, 0.95, "(c)", size=14) # and save outname = putils.make_filenames("station_locations", index=index, grid="ADW", anomalies=anomalies, month=ts.capitalize()) plt.savefig("{}/{}/{}".format(utils.PLOTLOCS, index, outname)) plt.close() # write out total station number if ts == "ANN": with open(os.path.join(utils.INFILELOCS, "{}_stations.txt".format(index)), "w") as outfile: outfile.write("{}\n".format(index)) outfile.write("{}".format(total)) return # main
def main(indata="ghcnd", diagnostics=False): """ Call the R package climpact2 with appropriate settings to calculate the indices :param str indata: name of dataset to process :param bool diagnostics: output diagnostic information """ # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # select the matching one if indata in names: dataset = all_datasets[names == indata][0] ''' Process call structure climpact2.batch.stations.r ./sample_data/ ./sample_data/climpact2.sample.batch.metadata.txt 1971 2000 4 ''' # check that there are stations to process for this dataset stations = utils.read_inventory(dataset) if len(stations) != 0: try: with utils.cd(utils.CLIMPACT_LOCS): # call the R process - which should automatically do everything and make suitable files etc # runs in subfolder with context manager, so returning to parent once done. # ACRE (and others?) have stations that do not overlap the reference period. # Means that the QC process throws them out if insufficient overlap between data and reference period if dataset.name == "acre": ref_start = 1901 ref_end = 1930 else: ref_start = utils.REF_START ref_end = utils.REF_END print(" ".join([ "Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES) ])) subprocess.check_call([ "Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES) ]) except subprocess.CalledProcessError: # handle errors in the called executable raise Exception except OSError: # executable not found print("Cannot find Rscript") raise OSError # fail gracefully else: print("No stations available in {}".format(indata)) print(" Climpact2 not run") # remove plots, qc, thres and trend folders (save space) if utils.REMOVE_EXTRA: for subdir in ["plots", "qc", "thres", "trend"]: try: shutil.rmtree( os.path.join(dataset.location, "formatted", subdir)) except FileNotFoundError: print("{} doesn't exist".format( os.path.join(dataset.location, "formatted", subdir))) # fail gracefully else: print("data name not available: {}\n".format(indata)) print("available data names: {}".format(" ".join(names))) return # main
def main(indata="ghcnd", diagnostics=False): """ Read TXn and TNn and write out ETR as difference """ index = "ETR" # check if need to do monthly ones if index in utils.MONTHLY_INDICES: timescales = ["ANN", "MON"] else: timescales = ["ANN"] # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # if dataset selected and in the list of available, then run if indata in names: dataset = all_datasets[names == indata][0] dataset_stations = utils.read_inventory(dataset, subdir="formatted/indices") # check each station for stn in dataset_stations: if diagnostics: print("{} - {}".format(dataset.name, stn.id)) # for appropriate number of timescales for ts in timescales: if os.path.exists(os.path.join(stn.location, stn.id, "{}_{}_{}.csv".format(stn.id, "txx", ts))) and os.path.exists(os.path.join(stn.location, stn.id, "{}_{}_{}.csv".format(stn.id, "tnn", ts))): xtimes, txx = utils.read_station_index(stn, "TXx", ts) ntimes, tnn = utils.read_station_index(stn, "TNn", ts) match = np.in1d(xtimes, ntimes) match_b = np.in1d(ntimes, xtimes) if len(match) != 0 and len(match_b) != 0: etr = txx[match]-tnn[match_b] etr_times = xtimes[match] if ts == "MON": myears = [] months = [] for y in etr_times: for m in range(1, 13): myears += [y] months += [m] stn.monthly = etr.filled().reshape(-1) stn.myears = myears stn.months = months path = os.path.join(dataset.location, "formatted", "indices", stn.id, "{}_{}_MON.csv".format(stn.id, index.lower())) if not os.path.exists(path): utils.write_station_index(path, stn, "ETR", doMonthly=True) else: stn.years = etr_times stn.annual = etr.filled() path = os.path.join(dataset.location, "formatted", "indices", stn.id, "{}_{}_ANN.csv".format(stn.id, index.lower())) if not os.path.exists(path): utils.write_station_index(path, stn, "ETR") return # main
def main(grid="ADW", index="TX90p", month_index=0, diagnostics=False, hadex2_adw=False, qc_flags="", anomalies="None"): """ :param str grid: gridding type ADW/CAM :param str index: which index to run :param int month_index: which month to apply (0 = Annual, 1-12 for months) :param str qc_flags: which QC flags to process W, B, A, N, C, R :param bool diagnostics: output diagnostic information :param str anomalies: run code on anomalies or climatology rather than raw data """ # ensure correct timescale is selected if args.index in utils.MONTHLY_INDICES: if month_index == 0: timescale = "ANN" else: timescale = "MON" else: if month_index == 0: timescale = "ANN" else: print("Monthly requested for annual-only index.\n Exiting") return # move this up one level eventually all_datasets = utils.get_input_datasets() # set up the data arrays if anomalies == "climatology": nyears = 1 else: nyears = len(utils.REFERENCEYEARS) if grid == "CAM": GridData, GridStations = cam(all_datasets, index, timescale, nyears, qc_flags=qc_flags, month_index=month_index, diagnostics=diagnostics, anomalies=anomalies) elif grid == "ADW": GridData, GridStations, GridDLSStations = adw(all_datasets, index, timescale, nyears, qc_flags=qc_flags, month_index=month_index, diagnostics=diagnostics, anomalies=anomalies, hadex2_adw=hadex2_adw) if utils.DOLSM: nmonths = 1 # apply LSM lsm = utils.get_land_sea_mask( utils.box_centre_lats, utils.box_centre_lons, floor=False ) # not taking only purely non-land boxes. Have to have sufficient amount of land! # resize to match lsm_sized = np.tile(np.tile(lsm, (1, 1, 1, 1)), (nyears, nmonths, 1, 1)) GridData.mask = np.logical_or(GridData.mask, lsm_sized) GridStations.mask = np.logical_or(GridStations.mask, lsm_sized) if grid == "ADW": GridDLSStations.mask = np.logical_or(GridDLSStations.mask, lsm_sized) # correct fill_value GridData.fill_value = utils.HADEX_MDI GridStations.fill_value = utils.HADEX_MDI # append appropriate name to filename if anomalies or climatology filename = utils.make_filenames(index=index, grid=grid, anomalies=anomalies, month_index=month_index) ncdfp.netcdf_write(os.path.join(utils.OUTROOT, filename), index, GridData.filled(), utils.REFERENCEYEARS, utils.box_centre_lats, utils.box_centre_lons, single_month=month_index) filename = utils.make_filenames(index=index, grid=grid, anomalies=anomalies, extra="num", month_index=month_index) ncdfp.netcdf_write(os.path.join(utils.OUTROOT, filename), index, GridStations.filled(), utils.REFERENCEYEARS, utils.box_centre_lats, utils.box_centre_lons, single_month=month_index, station_count=True) if grid == "ADW": filename = utils.make_filenames(index=index, grid=grid, anomalies=anomalies, extra="numdls", month_index=month_index) ncdfp.netcdf_write(os.path.join(utils.OUTROOT, filename), index, GridDLSStations.filled(), utils.REFERENCEYEARS, utils.box_centre_lats, utils.box_centre_lons, single_month=month_index, station_count=True) return # main
def main(diagnostics=False): """ Read inventories and make scatter plot :param bool diagnostics: extra verbose output """ # move this up one level eventually? all_datasets = utils.get_input_datasets() # set up the figure fig = plt.figure(figsize=(10, 6.7)) plt.clf() ax = plt.axes([0.025, 0.14, 0.95, 0.90], projection=cartopy.crs.Robinson()) ax.gridlines() #draw_labels=True) ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor="0.9", edgecolor="k") ax.coastlines() # dummy scatters for full extent plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \ edgecolor='w', linewidth='0.01') # run all datasets total = 0 for dataset in all_datasets: try: # choose appropriate subdirectory. subdir = "formatted/indices" ds_stations = utils.read_inventory(dataset, subdir=subdir, final=False, \ timescale="", index="", anomalies="None", qc_flags="") except IOError: # file missing print("No stations with data for {}".format(dataset.name)) ds_stations = [] if len(ds_stations) > 0: lats = np.array([stn.latitude for stn in ds_stations]) lons = np.array([stn.longitude for stn in ds_stations]) # and plot scatter = plt.scatter(lons, lats, c=COLOURS[dataset.name], s=15, \ label="{} ({})".format(get_label(dataset.name), len(ds_stations)), \ transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5') total += len(ds_stations) # make a legend leg = plt.legend(loc='lower center', ncol=5, bbox_to_anchor=(0.50, -0.34), \ frameon=False, title="", prop={'size':12}, labelspacing=0.15, columnspacing=0.5, numpoints=3) plt.setp(leg.get_title(), fontsize=12) plt.figtext(0.05, 0.92, "{} Stations".format(total)) plt.title("HadEX3 stations") # and save outname = putils.make_filenames("station_locations", index="All", grid="ADW", anomalies="None", month="All") plt.savefig("{}/{}".format(utils.PLOTLOCS, outname), dpi=300) plt.close() return # main
def main(index="TX90p", diagnostics=False): """ For all datasets, finds stations that exist for given index (and appropriate timescales) Checks for presence of data and write final station listing :param str index: which index to process :param bool diagnostics: output diagnostic information """ # check if need to do monthly ones if index in utils.MONTHLY_INDICES: timescales = ["ANN", "MON"] else: timescales = ["ANN"] # read in all datasets all_datasets = utils.get_input_datasets() # for appropriate number of timescales for ts in timescales: print("{}".format(ts)) # spin through each dataset for d, dataset in enumerate(all_datasets): dataset_stations = utils.read_inventory(dataset, subdir="formatted/indices") if diagnostics: print("{} - {}".format(dataset.name, index)) final_inventory = [] # check each station for stn in dataset_stations: if diagnostics: print("{} - {}".format(dataset.name, stn.id)) if assess_station(stn, index, ts, diagnostics=diagnostics): final_inventory += [stn] if diagnostics: print("{}\n".format(len(final_inventory))) else: if diagnostics: print("\n") # then write everything out. utils.write_climpact_inventory_header( os.path.join( dataset.location, "{}.metadata.{}.{}.txt".format(dataset.name, index, ts))) for stn in final_inventory: utils.write_climpact_inventory( os.path.join( dataset.location, "{}.metadata.{}.{}.txt".format(dataset.name, index, ts)), stn) print("{} - {} stations".format(dataset.name, len(final_inventory))) return # main
def main(index="TX90p", diagnostics=False, qc_flags="", anomalies="None"): """ Read inventories and make scatter plot :param str index: which index to run :param bool diagnostics: extra verbose output :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M :param str anomalies: run code on anomalies or climatology rather than raw data """ with open( os.path.join(utils.INFILELOCS, "{}_yearly_stations.txt".format(index)), "w") as outfile: outfile.write("{}\n".format(index)) if index in utils.MONTHLY_INDICES: timescale = ["ANN", "MON"] # allow for future! else: timescale = ["ANN"] # move this up one level eventually? all_datasets = utils.get_input_datasets() for ts in timescale: # run all datasets for d, dataset in enumerate(all_datasets): print(dataset) try: # choose appropriate subdirectory. subdir = "formatted/indices" ds_stations = utils.read_inventory(dataset, subdir=subdir, final=True, \ timescale=ts, index=index, anomalies=anomalies, qc_flags=qc_flags) ds_stations = utils.select_qc_passes(ds_stations, qc_flags=qc_flags) except IOError: # file missing print("No stations with data for {}".format(dataset.name)) ds_stations = [] # extract relevant info for this dataset if len(ds_stations) > 0: # extract values for this dataset for s, stn in enumerate(ds_stations): presence = time_presence(stn, index, ts) # year/month if s == 0: ds_presence = np.expand_dims(presence, axis=0)[:] else: ds_presence = np.append(ds_presence, np.expand_dims(presence, axis=0), axis=0) # station/year/month ds_lats = np.array([stn.latitude for stn in ds_stations]) ds_lons = np.array([stn.longitude for stn in ds_stations]) # store in overall arrays try: all_lats = np.append(all_lats, ds_lats[:], axis=0) all_lons = np.append(all_lons, ds_lons[:], axis=0) all_presence = np.append( all_presence, ds_presence[:], axis=0) # dataset*station/year/month all_dataset_names = np.append( all_dataset_names, np.array([dataset.name for i in ds_lats])) except NameError: # if not yet defined, then set up all_lats = ds_lats[:] all_lons = ds_lons[:] all_presence = ds_presence[:] all_dataset_names = np.array( [dataset.name for i in ds_lats]) for y, year in enumerate(utils.REFERENCEYEARS): # set up the figure fig = plt.figure(figsize=(10, 6.5)) plt.clf() ax = plt.axes([0.025, 0.10, 0.95, 0.90], projection=cartopy.crs.Robinson()) ax.gridlines() #draw_labels=True) ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor="0.9", edgecolor="k") ax.coastlines() # dummy scatters for full extent plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \ edgecolor='w', linewidth='0.01') total = 0 for dataset in all_datasets: ds, = np.where(all_dataset_names == dataset.name) locs, = np.where(all_presence[ds, y, 0] == 1) if len(locs) > 0: plt.scatter(all_lons[ds][locs], all_lats[ds][locs], c=ps.COLOURS[dataset.name], \ s=15, label="{} ({})".format(ps.get_label(dataset.name), len(locs)), \ transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5') total += len(locs) else: # aiming to show all, even if zero plt.scatter([-180], [-90], c=ps.COLOURS[dataset.name], s=15, \ label="{} ({})".format(ps.get_label(dataset.name), len(locs)), \ transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5') time.sleep(1) # make a legend leg = plt.legend(loc='lower center', ncol=6, bbox_to_anchor=(0.50, -0.25), frameon=False, \ title="", prop={'size':10}, labelspacing=0.15, columnspacing=0.5, numpoints=3) plt.setp(leg.get_title(), fontsize=12) plt.figtext(0.05, 0.92, "{} Stations".format(total)) plt.title("{} - {} - {}".format(index, ts, year)) # and save outname = putils.make_filenames("station_locations_{}_{}".format( ts.capitalize(), year), index=index, grid="ADW", anomalies=anomalies) plt.savefig("{}/{}/{}".format(utils.PLOTLOCS, index, outname)) plt.close() plt.clf() print("{} done".format(year)) # write out total station number with open( os.path.join(utils.INFILELOCS, "{}_yearly_stations.txt".format(index)), "a") as outfile: outfile.write("{} {}\n".format(year, total)) time.sleep(1) # reset namespace del all_lats del all_lons del all_presence del all_dataset_names return # main
def main(indata="ecad", diagnostics=False): """ Find all metadata files for given dataset across all indices and merge together :param str indata: input dataset name :param bool diagnostics: output diagnostic information """ # get all possible datasets all_datasets = utils.get_input_datasets() # and their names names = np.array([d.name for d in all_datasets]) # if dataset selected and in the list of available, then run if indata in names: dataset = all_datasets[names == indata][0] all_stations = [] all_names = [] # spin through indices for index in utils.ALL_INDICES: if diagnostics: print("processing {}".format(index)) # read in info if dataset.name in ["ecad", "sacad", "lacad"]: index_stations = inventory_utils.read_ecad(dataset, index, diagnostics=diagnostics) elif dataset.name in ["hadex2"]: index_stations = inventory_utils.read_hadex2(dataset, index, diagnostics=diagnostics) elif dataset.name in ["south_america"]: index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics) elif dataset.name in ["west_africa_pptn"]: index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics) elif dataset.name in ["west_africa_indices"]: index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics) elif dataset.name in ["arabia"]: if utils.REF_START == 1961 and utils.REF_END == 1990: index_stations = inventory_utils.read_arabia_6190_index(dataset, index, diagnostics=diagnostics) elif utils.REF_START == 1981 and utils.REF_END == 2010: index_stations = inventory_utils.read_arabia_8110_index(dataset, index, diagnostics=diagnostics) elif dataset.name in ["south_africa"]: index_stations = inventory_utils.read_generic_index(dataset, index, diagnostics=diagnostics) elif dataset.name in ["ghcndex"]: index_stations = inventory_utils.read_ghcndex(dataset, index, diagnostics=diagnostics) # if no station metadata, then move on to next one if index_stations == []: continue # extract names station_names_for_index = [stn.id for stn in index_stations] # check if new for n, name in enumerate(station_names_for_index): if name in all_names: # station exists already loc = all_names.index(name) # check all works out try: assert index_stations[n].latitude == all_stations[loc].latitude assert index_stations[n].longitude == all_stations[loc].longitude except AssertionError: if (index_stations[n].latitude - all_stations[loc].latitude) > 0.1: print("Station {} has mismatch in latitude: {} != {}".format(name, index_stations[n].latitude, all_stations[loc].latitude)) # sys.exit(1) if (index_stations[n].longitude - all_stations[loc].longitude) > 0.1: print("Station {} has mismatch in longitude: {} != {}".format(name, index_stations[n].longitude, all_stations[loc].longitude)) # sys.exit(1) else: all_names += [name] all_stations += [index_stations[n]] # sort alphabetically sort_order = np.argsort(np.array(all_names)) all_stations = np.array(all_stations)[sort_order] # for some data sources there are index and raw data supplied # For these, raw data produces the file, so for the index data, cross check if os.path.exists(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name))) and dataset.name in ["south_america", "south_africa"]: subset_stations = np.array([]) # cross check all stations for this_station in all_stations: keep = True with open(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name))) as infile: for line in infile: if re.search(this_station.id, line): keep = False if diagnostics: print("{} {} already in metadata file".format(dataset.name, this_station.id)) if keep: subset_stations = np.append(subset_stations, this_station) all_station = subset_stations else: # write out the header utils.write_climpact_inventory_header(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name))) # always prefer raw data over index data for this_station in all_stations: # write out the metadata for this station and index utils.write_climpact_inventory(os.path.join(dataset.location, "{}.metadata.txt".format(dataset.name)), this_station) return # main
def main(): datasets = utils.get_input_datasets() # makes data structures return