Beispiel #1
0
def process_dataset(dataset, diagnostics=False):
    """
    Read in the specified dataset inventory.  Check each station for suitability for HadEX3

    :param datasetObj dataset: dataset object holding metadata about the input dataset
    :param bool diagnostics: output diagnostic information
    """

    # read in the dataset inventory metadata
    dataset_stations = utils.read_inventory(dataset,
                                            subdir="formatted/indices")

    # spin through stations
    for station in dataset_stations:

        # spin through indices
        for index in PERCENTILE_INDICES:

            # select appropriate timescales
            if index in utils.MONTHLY_INDICES:
                timescales = ["ANN", "MON"]
            else:
                timescales = ["ANN"]

            # and spin through those
            for timescale in timescales:
                if os.path.exists(
                        os.path.join(
                            station.location, station.id,
                            "{}_{}_{}.csv".format(station.id, index.lower(),
                                                  timescale))):
                    os.remove(
                        os.path.join(
                            station.location, station.id,
                            "{}_{}_{}.csv".format(station.id, index.lower(),
                                                  timescale)))

                    if diagnostics:
                        print("Removing {}".format(
                            os.path.join(
                                station.location, station.id,
                                "{}_{}_{}.csv".format(station.id,
                                                      index.lower(),
                                                      timescale))))

    return  # process_dataset
Beispiel #2
0
def main(indata="ghcndex", index="R95pTOT", diagnostics=False):
    """
    Read PRCPTOT and other indices and write out


    """

    # check if need to do monthly ones
    if index in utils.MONTHLY_INDICES:
        timescales = ["ANN", "MON"]
    else:
        timescales = ["ANN"]

    # get all possible datasets
    all_datasets = utils.get_input_datasets()
    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:
        dataset = all_datasets[names == indata][0]

        dataset_stations = utils.read_inventory(dataset,
                                                subdir="formatted/indices")

        # check each station
        for stn in dataset_stations:

            if diagnostics:
                print("{} - {}".format(dataset.name, stn.id))

            # for appropriate number of timescales
            for ts in timescales:

                if os.path.exists(
                        os.path.join(
                            stn.location, stn.id, "{}_{}_{}.csv".format(
                                stn.id, PARTNERS[index].lower(),
                                ts))) and os.path.exists(
                                    os.path.join(
                                        stn.location, stn.id,
                                        "{}_{}_{}.csv".format(
                                            stn.id, "PRCPTOT".lower(), ts))):
                    rtimes, rXXp = utils.read_station_index(
                        stn, PARTNERS[index].lower(), ts)
                    ptimes, prcptot = utils.read_station_index(
                        stn, "PRCPTOT".lower(), ts)

                    match = np.in1d(rtimes, ptimes)
                    match_b = np.in1d(ptimes, rtimes)

                    if len(match) != 0 and len(match_b) != 0:

                        rXXptot = (100 * rXXp) / prcptot
                        rXXptot_times = rtimes[match]

                        if ts == "MON":
                            myears = []
                            months = []
                            for y in rXXptot_times:
                                for m in range(1, 13):
                                    myears += [y]
                                    months += [m]

                            stn.monthly = rXXptot.filled().reshape(-1)
                            stn.myears = myears
                            stn.months = months
                            path = os.path.join(
                                dataset.location, "formatted", "indices",
                                stn.id,
                                "{}_{}_MON.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path,
                                                          stn,
                                                          index,
                                                          doMonthly=True)

                        else:
                            stn.years = rXXptot_times
                            stn.annual = rXXptot.filled()
                            path = os.path.join(
                                dataset.location, "formatted", "indices",
                                stn.id,
                                "{}_{}_ANN.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path, stn, index)

    return  # main
Beispiel #3
0
def main(index="TX90p", diagnostics=False, qc_flags=""):
    """
    The main DLS function

    :param str index: which index to run
    :param bool diagnostics: extra verbose output
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    """

    if index in utils.MONTHLY_INDICES:
        nmonths = 13
        timescale = "MON"
    else:
        nmonths = 1
        timescale = "ANN"

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    # spin through all datasets
    stations = np.array([])
    for dataset in all_datasets:

        try:
            ds_stations = utils.read_inventory(dataset,
                                               subdir="formatted/indices",
                                               final=True,
                                               timescale=timescale,
                                               index=index,
                                               qc_flags=qc_flags)
            good_stations = utils.select_qc_passes(ds_stations,
                                                   qc_flags=qc_flags)

            stations = np.append(stations, good_stations)

            print("Adding {} ({} stations), nstations = {}".format(
                dataset.name, len(good_stations), len(stations)))

        except IOError:
            # file missing
            print("No stations with data for {}".format(dataset.name))

    nstations = len(stations)

    # array of lats and lons for calculation of separations
    all_locations = np.array([[stn.latitude, stn.longitude]
                              for stn in stations])

    # get the separations (km, radians)
    stn_separation, stn_angle = get_separations(stations, all_locations)

    # assign stations to bands
    StationBands = assign_to_latitude_bands(stations)

    # read in all the station data
    all_data = get_all_data(stations, index, timescale, nyears, nmonths)

    # set up the DLS defaults
    bins = np.arange(0, MAX_SEPARATION + BIN_WIDTH, BIN_WIDTH)

    all_dls = np.zeros([len(utils.LAT_BANDS), nmonths])
    all_dls[:] = utils.DEFAULT_DLS

    # now spin through all latitude bands and months.
    for lb, band in enumerate(utils.LAT_BANDS):

        stations_in_bands, = np.where(StationBands == lb)

        if len(stations_in_bands) <= 30:
            # insufficient stations within this latitude band, next band
            if diagnostics:
                print("Index {}, Band {} to {}".format(index, band[0],
                                                       band[1]))
                print("Number of stations {}".format(len(stations_in_bands)))
            print("Ann, Jan -- Dec, DLS = {} km".format(utils.DEFAULT_DLS))

            # spin through months to remove old plots if they exist
            for month in range(nmonths):
                if os.path.exists(
                        os.path.join(
                            utils.PLOTLOCS, "DLS",
                            "DLS_{}_{}_{}to{}.png".format(
                                index, month_names[month], band[0], band[1]))):
                    os.remove(
                        os.path.join(
                            utils.PLOTLOCS, "DLS",
                            "DLS_{}_{}_{}to{}.png".format(
                                utils.PLOTLOCS, index, month_names[month],
                                band[0], band[1])))
            continue

        print("{}, # stations {}".format(band, len(stations_in_bands)))

        # process each month
        for month in range(nmonths):
            print(month_names[month])

            month_data = all_data[stations_in_bands, :, month]

            names = [s.id for s in stations[stations_in_bands]]

            # get the separation and correlation for each cross pair
            # correlations only from 1951 (match HadEX2)
            cor_yr = 1951 - utils.STARTYEAR.year
            seps, cors = separations_and_correlations(
                month_data[:, cor_yr:],
                stn_separation[stations_in_bands, :][:, stations_in_bands],
                names,
                diagnostics=diagnostics)

            if len(seps) == 0 and len(cors) == 0:
                # then none of the available stations either had sufficient overlapping data
                #  or values at that particular point (correlations of lots of zeros doesn't mean anything)
                #  so escape and go on to next month
                if diagnostics:
                    print("Index {}, Band {} to {}, month {}".format(
                        index, band[0], band[1], month_names[month]))
                    print("Number of stations {}".format(
                        len(stations_in_bands)))
                    print(
                        "Likely that all values for this index, month and band are zero\n hence correlations don't mean anything"
                    )
                    print("Using default DLS = {}km".format(utils.DEFAULT_DLS))
                else:
                    print("No stations, {} - {} DLS = {} km".format(
                        band, month_names[month], utils.DEFAULT_DLS))
                continue

            # get the bins
            bin_assignment = np.digitize(
                seps, bins, right=True)  # "right" means left bin edge included
            bin_centers = bins - BIN_WIDTH / 2.

            # average value for each bin if sufficient correlations to do so.
            means = np.zeros(len(bins))
            sigmas = np.zeros(len(bins))
            for b, bin in enumerate(bins):
                locs, = np.where(bin_assignment == b)

                if len(locs) > MIN_PER_BIN:
                    #                means[b] = np.ma.mean(cors[locs])
                    means[b] = np.ma.median(cors[locs])
                    sigmas[b] = np.ma.std(cors[locs])
    #                print(bin, means[b], len(locs), cors[locs])
    #                raw_input("stop")

            filled_bins, = np.where(means != 0)

            # if sufficient bins are filled then fit the curve
            if len(filled_bins) / float(len(bins)) >= 0.5:

                if utils.FIX_ZERO:
                    # fix zero bin to be 1.0, and use bin edges, not centres (HadEX2)
                    means[0] = 1.
                    sigmas[0] = sigmas[1]
                    dls, plot_curve, chisq, R2 = exponential_fit(bins,
                                                                 means,
                                                                 sigmas,
                                                                 C=C)
                else:
                    dls, plot_curve, chisq, R2 = exponential_fit(
                        bin_centers[1:], means[1:], sigmas[1:], C=C)

                # only take fit if greater than minimum set overall
                all_dls[lb, month] = np.max([dls, utils.DEFAULT_DLS])

                # test at 5% level and 2 or 3 dofs, as per HadEX2
                if utils.FIX_ZERO and chisq >= chi2.isf(
                        0.05,
                        len(bins[sigmas != 0]) - 2):
                    print("inadequately good fit")
                    all_dls[lb, month] = utils.DEFAULT_DLS
                elif chisq >= chi2.isf(0.05, len(bins[sigmas != 0]) - 3):
                    print("inadequately good fit")
                    all_dls[lb, month] = utils.DEFAULT_DLS

                # plot the fit if required
                plt.clf()
                plt.scatter(seps,
                            cors,
                            c='b',
                            marker='.',
                            alpha=0.1,
                            edgecolor=None)

                # calculate the 2D density of the data given
                counts, xbins, ybins = np.histogram2d(seps, cors, bins=50)

                # make the contour plot (5 levels)
                plt.contour(counts.transpose(),
                            5,
                            extent=[
                                xbins.min(),
                                xbins.max(),
                                ybins.min(),
                                ybins.max()
                            ],
                            linewidths=1,
                            colors='black',
                            linestyles='solid')

                if utils.FIX_ZERO:
                    plt.plot(bins[sigmas != 0], means[sigmas != 0], 'ro')
                    plt.errorbar(bins[sigmas != 0],
                                 means[sigmas != 0],
                                 yerr=sigmas[sigmas != 0],
                                 fmt="none",
                                 ecolor="r")
                    plt.plot(bins, plot_curve, c='cyan', ls='-', lw=2)
                else:
                    plt.plot(bin_centers[1:][sigmas[1:] != 0],
                             means[1:][sigmas[1:] != 0], 'ro')
                    plt.errorbar(bin_centers[1:][sigmas[1:] != 0],
                                 means[1:][sigmas[1:] != 0],
                                 yerr=sigmas[1:][sigmas[1:] != 0],
                                 fmt="none",
                                 ecolor="r")
                    plt.plot(bin_centers[1:],
                             plot_curve,
                             c='cyan',
                             ls='-',
                             lw=2)  # plot curve will have been truncated

                plt.axvline(dls, c='magenta', ls="--", lw=2)
                plt.axvline(utils.DEFAULT_DLS, c='k', ls=":", lw=1)
                plt.axvline(utils.MAX_DLS, c='k', ls=":", lw=1)
                plt.text(dls + 10, 0.95, "dls = {:4.0f}km".format(dls))
                plt.text(3010, -0.95, "r2 = {:6.4f}".format(R2))
                plt.text(3010, -0.85, "chi2 = {:6.4f}".format(chisq))
                plt.text(3010, -0.75,
                         "Nstat = {}".format(len(stations_in_bands)))
                plt.xlim([-100, 5000])
                plt.ylim([-1, None])

                plt.xlabel("Separation (km)")
                plt.ylabel("Correlation")
                plt.title("{} - {}; {} to {}".format(index, month_names[month],
                                                     band[0], band[1]))

                # add text to show what code created this and when
                if utils.WATERMARK:
                    watermarkstring = "/".join(
                        os.getcwd().split('/')[4:]) + '/' + os.path.basename(
                            __file__) + "   " + dt.datetime.strftime(
                                dt.datetime.now(), "%d-%b-%Y %H:%M")
                    plt.figtext(0.01, 0.01, watermarkstring, size=6)

                if utils.FIX_ZERO:
                    plt.savefig(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}_fixzero.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])), dpi=300)
                else:
                    plt.savefig(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])), dpi=300)

                print("DLS = {:7.2f} km".format(dls))

            else:
                print("insufficient bins for fit ({}/{})".format(
                    len(filled_bins), float(len(bins))))
                if os.path.exists(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1]))):
                    # remove old plots!
                    os.remove(os.path.join(utils.PLOTLOCS, "DLS", \
                                             "DLS_{}_{}_{}_{}to{}.png".format(index, "{}-{}".format(str(utils.REF_START)[-2:], \
                                                                                                    str(utils.REF_END)[-2:]), month_names[month], band[0], band[1])))

    # replace those dls < default with default and > max with max
    all_dls[all_dls < utils.DEFAULT_DLS] = utils.DEFAULT_DLS

    # interpolate
    grid_dls = interpolate_dls_to_grid(all_dls, nmonths)

    # write output file
    write_dls_file(os.path.join(utils.DLSLOCS, "dls_{}.txt".format(index)),
                   grid_dls, nmonths, month_names)

    return  # main
Beispiel #4
0
def main(indata="ghcnd", diagnostics=False):
    """
    Call the R package climpact2 with appropriate settings to calculate the indices

    :param str indata: name of dataset to process
    :param bool diagnostics: output diagnostic information
    """

    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # select the matching one
    if indata in names:
        dataset = all_datasets[names == indata][0]
        '''
        Process call structure

        climpact2.batch.stations.r ./sample_data/ ./sample_data/climpact2.sample.batch.metadata.txt 1971 2000 4
        '''

        # check that there are stations to process for this dataset
        stations = utils.read_inventory(dataset)
        if len(stations) != 0:

            try:
                with utils.cd(utils.CLIMPACT_LOCS):
                    # call the R process - which should automatically do everything and make suitable files etc
                    #  runs in subfolder with context manager, so returning to parent once done.

                    # ACRE (and others?) have stations that do not overlap the reference period.
                    #   Means that the QC process throws them out if insufficient overlap between data and reference period

                    if dataset.name == "acre":
                        ref_start = 1901
                        ref_end = 1930
                    else:
                        ref_start = utils.REF_START
                        ref_end = utils.REF_END

                    print(" ".join([
                        "Rscript", "climpact2.batch.stations.r",
                        os.path.join(dataset.location, "formatted"),
                        os.path.join(dataset.location,
                                     "{}.metadata.txt".format(dataset.name)),
                        str(ref_start),
                        str(ref_end),
                        str(utils.NCORES)
                    ]))

                    subprocess.check_call([
                        "Rscript", "climpact2.batch.stations.r",
                        os.path.join(dataset.location, "formatted"),
                        os.path.join(dataset.location,
                                     "{}.metadata.txt".format(dataset.name)),
                        str(ref_start),
                        str(ref_end),
                        str(utils.NCORES)
                    ])

            except subprocess.CalledProcessError:
                # handle errors in the called executable
                raise Exception

            except OSError:
                # executable not found
                print("Cannot find Rscript")
                raise OSError

        # fail gracefully
        else:
            print("No stations available in {}".format(indata))
            print("  Climpact2 not run")

        # remove plots, qc, thres and trend folders (save space)
        if utils.REMOVE_EXTRA:
            for subdir in ["plots", "qc", "thres", "trend"]:
                try:
                    shutil.rmtree(
                        os.path.join(dataset.location, "formatted", subdir))
                except FileNotFoundError:
                    print("{} doesn't exist".format(
                        os.path.join(dataset.location, "formatted", subdir)))

    # fail gracefully
    else:
        print("data name not available: {}\n".format(indata))
        print("available data names: {}".format(" ".join(names)))

    return  # main
Beispiel #5
0
def main(indata="acre", diagnostics=False):
    """
    Call the R package climpact2 with appropriate settings to calculate the indices

    :param str indata: name of dataset to process
    :param bool diagnostics: output diagnostic information
    """


    # get all possible datasets
    all_datasets = utils.get_input_datasets()

    # and their names
    names = np.array([d.name for d in all_datasets])

    # select the matching one
    if indata in names:
        dataset = all_datasets[names == indata][0]

        # check that there are stations to process for this dataset
        stations = utils.read_inventory(dataset)
        if len(stations) != 0:

            for station in stations:

                # read the station data
                infile = os.path.join(dataset.location, "formatted", "{}.txt".format(station.id))
                indata = np.genfromtxt(infile)

                # get the first year and last year
                ref_start = int(indata[0][0])
                ref_end = int(indata[-1][0])

                # write a temporary inventory file for just this station
                utils.write_climpact_inventory_header(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)))
                utils.write_climpact_inventory(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), station)

                try:
                    with utils.cd(utils.CLIMPACT_LOCS):
                        # call the R process - which should automatically do everything and make suitable files etc
                        #  runs in subfolder with context manager, so returning to parent once done.

                        # ACRE (and others?) have stations that do not overlap the reference period.
                        #   Means that the QC process throws them out if insufficient overlap between data and reference period

                        print(" ".join(["Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES)]))

                        subprocess.check_call(["Rscript", "climpact2.batch.stations.r", os.path.join(dataset.location, "formatted"), os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)), str(ref_start), str(ref_end), str(utils.NCORES)])

                except subprocess.CalledProcessError:
                    # handle errors in the called executable
                    raise Exception

                except OSError:
                    # executable not found
                    print("Cannot find Rscript")
                    raise OSError

                # remove temporary metadata file
                os.remove(os.path.join(dataset.location, "{}_temp.metadata.txt".format(dataset.name)))

        # fail gracefully
        else:
            print("No stations available in {}".format(indata))
            print("  Climpact2 not run")

        # remove plots, qc, thres and trend folders (save space)
        if utils.REMOVE_EXTRA:
            for subdir in ["plots", "qc", "thres", "trend"]:
                shutil.rmtree(os.path.join(dataset.location, "formatted", subdir))


    # fail gracefully
    else:
        print("data name not available: {}\n".format(indata))
        print("available data names: {}".format(" ".join(names)))
        

    return # main
Beispiel #6
0
def adw(all_datasets,
        index,
        timescale,
        nyears,
        qc_flags="",
        month_index=0,
        diagnostics=False,
        hadex2_adw=False,
        anomalies="None"):
    """
    Angular Distance Weighting

    :param array all_datasets: array of dataset objects
    :param str index: which index to run
    :param str timescale: which timescale (MON/ANN)
    :param int nyears: number of years - to define array
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E
    :param int month_index: which month to read 
    :param bool diagnostics: output diagnostic information
    :param bool hadex2_adw: use the HadEX2 (erroneous) ADW method
    :param str anomalies: run code on anomalies or climatology rather than raw data

    """

    # http://journals.ametsoc.org/doi/pdf/10.1175/1520-0442%282000%29013%3C2217%3ARTCSTC%3E2.0.CO%3B2

    # change to do one month at a time to parallelise a little
    nmonths = 1
    loopwise = False
    print("Running Month {}".format(month_index))

    def calculate_cosine_term(stns_in_dls,
                              box_angle,
                              station_angle,
                              hadex2_adw=False):
        """
        Helper routine to calculate the cosine term for the weighting function
        """

        if hadex2_adw:
            box_angle_k = station_angle[stns_in_dls][:, stns_in_dls]
            box_angle_i = np.tile(box_angle[stns_in_dls],
                                  (stns_in_dls.shape[0], 1))
        else:
            # do change as RJHD thinks - November 2017
            #   So that it is the angles between the stations and the box centre
            #   rather than the station and other stations.
            box_angle_i = np.tile(box_angle[stns_in_dls],
                                  (stns_in_dls.shape[0], 1))
            box_angle_k = box_angle_i.T

        cosines = np.cos(box_angle_k - box_angle_i)
        box_angle_k = 0
        box_angle_i = 0

        return cosines  # calculate_cosine_term

    def calculate_weighting_term(distance_weight, stns_in_dls):
        """
        Helper routine to calculate the weighting term
        """

        # tile the distance_weight array so replicated for all sid
        dist_weight_array = np.tile(distance_weight, (len(stns_in_dls), 1))
        dist_weight_array = np.ma.swapaxes(dist_weight_array, 0, 1)

        # set diagonal to zero (k != l)
        diag = np.arange(dist_weight_array.shape[-1])
        dist_weight_array[diag, diag] = 0.0

        return dist_weight_array  # calculate_weighting_term

    def calculate_top(dist_weight_array, cosines, nyears, mask):
        """
        Helper routine to calculate the top part of the weighting function
        """

        top_part = dist_weight_array * (1.0 - cosines)

        # now repeat this nyears times
        top_part = np.tile(top_part, (nyears, 1, 1))
        # as doing the sum, can just set masked elements to zero
        top_part[mask == True] = 0

        return np.sum(top_part, axis=-2)  # calculate_top

    def calculate_bottom(dist_weight_array, nyears, mask):
        """
        Helper routine to calculate the bottom part of the weighting function
        """

        bottom_part = np.tile(dist_weight_array, (nyears, 1, 1))
        # as doing the sum, can just set masked elements to zero
        bottom_part[mask == True] = 0

        return np.sum(bottom_part, axis=-2)  # calculate_bottom

    def calculate_adw(distance_weight, mask, top, bottom):
        """
        Helper routine to calculate the angular distance weights
        """

        distance_weight = np.tile(distance_weight, (nyears, 1))
        distance_weight[mask == True] = 0

        return distance_weight * (1 + (top / bottom))  # calculate_adw

    def calculate_separations_and_angles(stations, station_locs):
        """
        Calculate the station-station separation and bearing arrays
        """
        separation = np.zeros((stations.shape[0], stations.shape[0]))
        angle = np.copy(separation)

        for s, stn in enumerate(stations):
            this_stn = np.empty([len(stations), 2])
            this_stn[:, 0] = stn.latitude
            this_stn[:, 1] = stn.longitude
            separation[s, :], angle[s, :] = utils.map_2_points(
                this_stn, station_locs)

        return separation, angle

    #*******************************************

    # set up the grids
    GridData, GridStations, GridDLSStations = set_up_grids(nyears, nmonths)

    # get the DLS
    raw_dls = np.genfromtxt(os.path.join(utils.DLSLOCS,
                                         "dls_{}.txt".format(index)),
                            dtype=(float),
                            skip_header=4)

    dls_lat = raw_dls[:, 0]
    dls = raw_dls[:, 1:]

    # get the stations which actually have data
    # spin through all datasets
    stations = np.array([])
    for dataset in all_datasets:

        try:
            # choose appropriate subdirectory.
            if anomalies == "None":
                subdir = "formatted/indices"
            elif anomalies == "anomalies":
                subdir = "formatted/anomalies"
            elif anomalies == "climatology":
                subdir = "formatted/climatology"

            ds_stations = utils.read_inventory(dataset,
                                               subdir=subdir,
                                               final=True,
                                               timescale=timescale,
                                               index=index,
                                               anomalies=anomalies,
                                               qc_flags=qc_flags)

            good_stations = utils.select_qc_passes(ds_stations,
                                                   qc_flags=qc_flags)

            stations = np.append(stations, good_stations)

            print("Adding {}, nstations = {}".format(dataset.name,
                                                     len(stations)))

        except IOError:
            # file missing
            print("No stations with data for {}".format(dataset.name))

    # may have no stations for particular ETSCI combinations
    if len(stations) == 0:
        if diagnostics:
            print("No stations for {} - {}".format(index, timescale))
        return GridData, GridStations, GridDLSStations  # adw

    station_locs = np.array([[stn.latitude, stn.longitude]
                             for stn in stations])

    # get the distance and bearing arrays
    station_separation, station_angle = calculate_separations_and_angles(
        stations, station_locs)

    #*********************
    # read in all the data in one step for all the stations
    all_station_data = np.ma.zeros([nyears, nmonths, len(stations)])
    all_station_data.mask = np.ones(
        np.shape(all_station_data))  # mask everything
    latitudes = np.zeros(len(stations))
    longitudes = np.zeros(len(stations))

    # big and slow read loop
    for s, stat in enumerate(stations):
        data = get_all_data(stat, index, timescale, nyears, month_index)
        all_station_data[:, :, s] = data  # store all the info
        all_station_data.mask[:, :, s] = data.mask  # store all the info
        latitudes[s] = stat.latitude
        longitudes[s] = stat.longitude

    #*********************
    # run through each grid box
    for tlats, latitude in enumerate(utils.box_centre_lats):

        print(str(tlats) + "/" + str(len(utils.box_centre_lats)), latitude)

        for tlons, longitude in enumerate(utils.box_centre_lons):

            # distance of this box centre to all stations
            this_box = np.empty([len(stations), 2])
            this_box[:, 0] = latitude
            this_box[:, 1] = longitude
            box_separation, box_angle = utils.map_2_points(
                this_box, station_locs)

            # find those stations close enough to contribute
            # need to adjust if doing all months so that can read in all, but restrict to relevant ones if necessary
            stns_in_dls, = np.where(box_separation <= np.max(dls[tlats]))
            stns_in_dls_separations = box_separation[stns_in_dls]

            if len(stns_in_dls) < utils.STATIONS_IN_DLS:
                # none of the months have DLS such that sufficient stations are included
                #   skip to next box
                if diagnostics:
                    print(
                        "skipping lat {}, lon {} - no stations in range (max DLS = {})"
                        .format(latitude, longitude, np.max(dls[tlats])))
                continue

# REMOVED FOR SINGLE READ
# set up blank array to store all station data that can contribute
#            stations_contrib_to_box_data = np.ma.zeros([nyears, nmonths, len(stns_in_dls)])
#            stations_contrib_to_box_data.mask = np.ones(np.shape(stations_contrib_to_box_data)) # mask everything
# REMOVED FOR SINGLE READ

# this is for the stations actually within the grid box!
            stations_in_box = np.zeros([nyears, nmonths])

            print(" nstats {}".format(len(stns_in_dls)))

            # get the stations contributing to the box (i.e. within a DLS)
            stations_contrib_to_box_data = np.ma.copy(
                all_station_data[:, :, stns_in_dls])
            stations_in_box = np.ma.count(all_station_data[:, :, stns_in_dls],
                                          axis=2)

            # or get the stations located in the box
            # lat_locs, = np.where(np.logical_and(utils.box_edge_lats[tlats] < latitudes, latitudes <= utils.box_edge_lats[tlats+1]))
            # lon_locs, = np.where(np.logical_and(utils.box_edge_lons[tlons] < longitudes, longitudes <= utils.box_edge_lons[tlons+1]))
            # # station matches both latitude and longitude constraints
            # both_lat_and_lon = np.in1d(lat_locs, lon_locs)
            # in_box_locs = lat_locs[both_lat_and_lon]
            # if len(in_box_locs) > 0:
            #     stations_in_box = np.ma.count(all_station_data[:, :, in_box_locs], axis=2)

            # REMOVED FOR SINGLE READ
            #             # read in all the stations - and do this once
            #             for s, li in enumerate(stns_in_dls):
            # #                if diagnostics:
            # #                    print(stations[li], stns_in_dls_separations[s])

            #                 # read in the station - matching done in subroutine
            #                 data = get_all_data(stations[li], index, timescale, nyears, month_index)

            #                 stations_contrib_to_box_data[:, :, s] = data # store all the info
            #                 stations_contrib_to_box_data.mask[:, :, s] = data.mask # store all the info

            #                 # and number of stations in the box
            #                 # need to subtract if not present at any year or with smaller DLS
            #                 if (utils.box_edge_lats[tlats] < stations[li].latitude <= utils.box_edge_lats[tlats+1]) \
            #                         and (utils.box_edge_lons[tlons] < stations[li].longitude <= utils.box_edge_lons[tlons+1]):
            #                     stations_in_box[data.mask == False] += 1
            # REMOVED FOR SINGLE READ

            # go through each month label - not used as parallelised instead
            for month in range(nmonths):

                if diagnostics:
                    print(
                        "latitude {} ({}), longitude {} ({}), dls {}, nstations {}"
                        .format(
                            latitude, tlats, longitude, tlons,
                            dls[tlats][month_index],
                            len(
                                np.where(box_separation[stns_in_dls] <
                                         dls[tlats][month_index])[0])))

                # get weights
                distance_weight = np.exp(
                    utils.M * -box_separation[stns_in_dls] /
                    dls[tlats][month_index])  # for each contributing station

                # filter out stations too far away for this month DLS
                sep_locs, = np.where(
                    stns_in_dls_separations > dls[tlats][month_index])
                stations_contrib_to_box_data[:, month, sep_locs] = 0
                stations_contrib_to_box_data.mask[:, month, sep_locs] = True

                if loopwise:
                    pass
#                     # this is the original longhand version.
#                     for year in range(nyears):

#                         if np.ma.count(stations_contrib_to_box_data[:, month], axis=1)[year] < utils.STATIONS_IN_DLS:
#                             # insufficient stations - don't bother
#                             continue

#                         # which stations do contribute
#                         this_year_mask = -stations_contrib_to_box_data.mask[year, month]

#                         if diagnostics:
#                             # testing long-hand looping
#                             # using Caesar et al terminology - http://onlinelibrary.wiley.com/doi/10.1029/2005JD006280/pdf
#                             w_is=[]
#                             for i in stns_in_dls[this_year_mask]:
#                                 w_i = np.exp(utils.M * -box_separation[i]/dls[tlats][month_index])

#                                 tops=[]
#                                 bottoms=[]

#                                 for k in stns_in_dls[this_year_mask]: # all other stations
#                                     if i != k:

#                                         w_k = np.exp(utils.M * -box_separation[k]/dls[tlats][month_index])

#                                         bottoms += [w_k]
#                                         if hadex2_adw:
#                                             tops += [w_k * (1.0 - np.cos(station_angle[k,i] - box_angle[i]))]
#                                         else:
#                                             tops += [w_k * (1.0 - np.cos(box_angle[k] - box_angle[i]))]

#                                 w_is += [w_i * (1 + np.sum(tops)/np.sum(bottoms))]
# #                            print("weights", w_is/sum(w_is))

#                         # tile the distance_weight array so replicated for all sid
#                         dist_weight_array = np.tile(distance_weight[this_year_mask], (np.ma.count(stations_contrib_to_box_data[year, month]), 1))
#                         dist_weight_array = np.swapaxes(dist_weight_array, 0, 1)

#                         # set diagonal to zero (k != l)
#                         diag = np.arange(dist_weight_array.shape[0])
#                         dist_weight_array[diag, diag] = 0.0

#                         # make a mesh of these so can subtract.
#                         box_angle_i = np.tile(box_angle[stns_in_dls][this_year_mask],(stns_in_dls[this_year_mask].shape[0],1))

#                         if hadex2_adw:
#                             box_angle_k = station_angle[stns_in_dls[this_year_mask]][:,stns_in_dls[this_year_mask]]
#                         else:
#                             box_angle_k = box_angle_i.T

#                         # get array for top
#                         top_sum = dist_weight_array * (1.0 - np.cos(box_angle_k - box_angle_i))

#                         top = np.sum(top_sum, axis=0)
#                         bottom = np.sum(dist_weight_array, axis=0)

#                         # un-normalised weights
#                         angular_distance_weight = distance_weight[this_year_mask] * (1 + top / bottom)

#                         final_weights = angular_distance_weight/np.ma.sum(angular_distance_weight)

# #                        if diagnostics: print("weights", final_weights)  # should match print line above)

#                         GridData[year, month, tlats, tlons] = np.ma.sum(final_weights * stations_contrib_to_box_data[year, month].compressed())

#                         GridStations[year, month, tlats, tlons] = stations_in_box[year, month]
# #                        if diagnostics:
# #                            print(GridData[year, month, tlats, tlons])
# #                            raw_input("stop {}".format(hadex2_adw))

                else:  # not loopwise
                    """
                    W_k = weight_k * (1 + a_k)
                    
                    a_k = top/bottom
                    
                    bottom = sum_1_nstations(w_k)
                    
                    top = sum_1_nstations(w_k * (1 - cos(theta_k - theta_l))),  k != l
                    """

                    # aim to remove this loop (longer than months loop, so saves more time?)
                    # if sufficient stations
                    insufficient_station_count = np.ma.count(
                        stations_contrib_to_box_data[:, month, :], axis=1)

                    if max(insufficient_station_count) < utils.STATIONS_IN_DLS:
                        # no year has sufficient stations
                        if diagnostics:
                            print(
                                "skipping lat {}, lon {}, month {} - no stations in range (DLS = {})"
                                .format(latitude, longitude, month + 1,
                                        dls[tlats]))
                        continue

                    this_month_mask = stations_contrib_to_box_data.mask[:,
                                                                        month, :]
                    mask = np.array([make_square(m) for m in this_month_mask])

                    # calculate angular part
                    cosines = calculate_cosine_term(stns_in_dls,
                                                    box_angle,
                                                    station_angle,
                                                    hadex2_adw=hadex2_adw)

                    # calculate weight part
                    dist_weight_array = calculate_weighting_term(
                        distance_weight, stns_in_dls)

                    # cosine and dist_weight the same each year - doesn't change
                    # calculate the top without years, clear memory, then expand and mask
                    top = calculate_top(dist_weight_array, cosines, nyears,
                                        mask)
                    cosines = 0

                    bottom = calculate_bottom(dist_weight_array, nyears, mask)
                    dist_weight_array = 0

                    angular_distance_weight = calculate_adw(
                        distance_weight, this_month_mask, top, bottom)
                    top = 0
                    bottom = 0

                    normalisation = np.ma.sum(angular_distance_weight, axis=1)
                    final_weights = angular_distance_weight / normalisation[:,
                                                                            None]

                    normalisation = 0
                    angular_distance_weight = 0

                    #                    if diagnostics: print(final_weights)

                    GridData[:, month, tlats, tlons] = np.ma.sum(
                        final_weights *
                        stations_contrib_to_box_data[:, month, :],
                        axis=1)
                    GridData.mask[
                        insufficient_station_count < utils.STATIONS_IN_DLS,
                        month, tlats, tlons] = True
                    GridStations[:, month, tlats,
                                 tlons] = stations_in_box[:, month]
                    GridDLSStations[:, month, tlats,
                                    tlons] = insufficient_station_count

#                    if diagnostics:
#                        print(np.max(insufficient_station_count))
#                        print(GridData[:, month, tlats, tlons])
#                        raw_input("stop {}".format(hadex2_adw))

                gc.collect()
                sys.stdout.flush()
    return GridData, GridStations, GridDLSStations  # adw
Beispiel #7
0
def main(index="TX90p", diagnostics=False, qc_flags="", anomalies="None"):
    """
    Read inventories and make scatter plot

    :param str index: which index to run
    :param bool diagnostics: extra verbose output
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    :param str anomalies: run code on anomalies or climatology rather than raw data

    """

    if index in utils.MONTHLY_INDICES:
        timescale = ["ANN", "MON"]
    else:
        timescale = ["ANN"]

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    for ts in timescale:
        # set up the figure
        fig = plt.figure(figsize=(10, 6.5))
        plt.clf()
        ax = plt.axes([0.025, 0.14, 0.95, 0.90], projection=cartopy.crs.Robinson())
        ax.gridlines() #draw_labels=True)
        ax.add_feature(cartopy.feature.LAND, zorder=0, facecolor="0.9", edgecolor="k")
        ax.coastlines()

        # dummy scatters for full extent
        plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \
                        edgecolor='w', linewidth='0.01')

        # run all datasets
        total = 0
        for dataset in all_datasets:

            try:
                # choose appropriate subdirectory.
                if anomalies == "None":
                    subdir = "formatted/indices"
                elif anomalies == "anomalies":
                    subdir = "formatted/anomalies"
                elif anomalies == "climatology":
                    subdir = "formatted/climatology"

                ds_stations = utils.read_inventory(dataset, subdir=subdir, final=True, \
                                                   timescale=ts, index=index, anomalies=anomalies, qc_flags=qc_flags)
                ds_stations = utils.select_qc_passes(ds_stations, qc_flags=qc_flags)

            except IOError:
                # file missing
                print("No stations with data for {}".format(dataset.name))
                ds_stations = []

            if len(ds_stations) > 0:
                lats = np.array([stn.latitude for stn in ds_stations])
                lons = np.array([stn.longitude for stn in ds_stations])

                # and plot
                scatter = plt.scatter(lons, lats, c=COLOURS[dataset.name], s=15, \
                                          label="{} ({})".format(get_label(dataset.name), len(ds_stations)), \
                                          transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')

                total += len(ds_stations)

        # make a legend
        leg = plt.legend(loc='lower center', ncol=5, bbox_to_anchor=(0.50, -0.3), \
                             frameon=False, title="", prop={'size':12}, labelspacing=0.15, columnspacing=0.5, numpoints=3)
        plt.setp(leg.get_title(), fontsize=12)

        plt.figtext(0.06, 0.91, "{} Stations".format(total))
        plt.title("{} - {}".format(index, ts))

        # extra information
        if utils.WATERMARK:
            watermarkstring = "{} {}".format(os.path.join("/".join(os.getcwd().split('/')[4:]), os.path.basename(__file__)), dt.datetime.strftime(dt.datetime.now(), "%d-%b-%Y %H:%M"))
            plt.figtext(0.01, 0.01, watermarkstring, size=6)
#        plt.figtext(0.03, 0.95, "(c)", size=14)

        # and save
        outname = putils.make_filenames("station_locations", index=index, grid="ADW", anomalies=anomalies, month=ts.capitalize())

        plt.savefig("{}/{}/{}".format(utils.PLOTLOCS, index, outname))
            
        plt.close()

        # write out total station number
        if ts == "ANN":
            with open(os.path.join(utils.INFILELOCS, "{}_stations.txt".format(index)), "w") as outfile:
                outfile.write("{}\n".format(index))
                outfile.write("{}".format(total))
        
    return # main
Beispiel #8
0
def cam(all_datasets,
        index,
        timescale,
        nyears,
        qc_flags="",
        month_index=0,
        diagnostics=False,
        anomalies="None"):
    """
    Climate anomaly method gridding

    :param array all_datasets: array of dataset objects
    :param str index: which index to run
    :param str timescale: which timescale (MON/ANN)
    :param int nyears: number of years - to define array
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    :param int month_index: which month to read 
    :param bool diagnostics: output diagnostic information
    :param str anomalies: run code on anomalies or climatology rather than raw data

    """
    # change to do one month at a time to parallelise a little
    nmonths = 1
    print("Running Month {}".format(month_index))

    GridData, GridStations, dummy = set_up_grids(nyears, nmonths)

    # get the stations which actually have data
    # spin through all datasets
    stations = np.array([])
    for dataset in all_datasets:

        try:
            # choose appropriate subdirectory.
            if anomalies == "None":
                subdir = "formatted/indices"
            elif anomalies == "anomalies":
                subdir = "formatted/anomalies"
            elif anomalies == "climatology":
                subdir = "formatted/climatology"

            ds_stations = utils.read_inventory(dataset,
                                               subdir=subdir,
                                               final=True,
                                               timescale=timescale,
                                               index=index,
                                               anomalies=anomalies,
                                               qc_flags=qc_flags)

            good_stations = utils.select_qc_passes(ds_stations,
                                                   qc_flags=qc_flags)

            stations = np.append(stations, good_stations)

            print("Adding {}, nstations = {}".format(dataset.name,
                                                     len(stations)))

        except IOError:
            # file missing
            print("No stations with data for {}".format(dataset.name))

    # which lat and lon sequence does the station sit in.
    #   As will be using box centres, need to send list with same length as box_centres
    assign_stations_to_grid_boxes(stations, utils.box_edge_lats[1:],
                                  utils.box_edge_lons[1:])

    lon_sequence = np.array([stn.box_lon_sequence for stn in stations])
    lat_sequence = np.array([stn.box_lat_sequence for stn in stations])

    #*********************
    # run through each grid box
    for tlats, latitude in enumerate(utils.box_centre_lats):

        print(str(tlats) + "/" + str(len(utils.box_centre_lats)), latitude)

        lat_index, = np.where(lat_sequence == tlats)
        if len(lat_index) == 0:
            # no stations at this latitude so don't bother going any further
            continue

        for tlons, longitude in enumerate(utils.box_centre_lons):

            lon_index, = np.where(lon_sequence == tlons)
            if len(lon_index) == 0:
                # no stations so don't bother
                continue

            # get the common stations to both selections (this lat and this lon sequence)
            lat_lon_match = np.intersect1d(lat_index, lon_index)
            if len(lat_lon_match) < 0:
                # no stations so don't bother
                continue

            # have at least one station in this grid box
            # go through each grid box

            box_data = np.ma.zeros([nyears, nmonths, len(lat_lon_match)])
            box_data.mask = np.ones(box_data.shape)

            for month in range(nmonths):

                print(len(lat_lon_match))
                for s, li in enumerate(lat_lon_match):
                    print(stations[li])

                    # read in the stations - use same routine as for ADW
                    data = get_all_data(stations[li], index, timescale, nyears,
                                        month_index)

                    if anomalies == "climatology":
                        # just read in to store
                        box_data[:, :, s] = data

                    else:
                        # calculate the anomalies

                        # back calculate times
                        good_times = utils.REFERENCEYEARS[data.mask[:, 0] ==
                                                          False]

                        # if no data then skip
                        if len(good_times) == 0: continue

                        #*********************
                        # anomalise
                        clim_years = np.where(
                            (utils.REFERENCEYEARS >= utils.CLIM_START.year) &
                            (utils.REFERENCEYEARS < utils.CLIM_END.year), True,
                            False)

                        clim_data = data[clim_years]

                        #*********************
                        # check sufficient data points
                        completeness = np.ma.count(clim_data, axis=0)
                        locs, = np.where(
                            completeness >= utils.CAM_COMPLETENESS)
                        if len(locs) == 0: continue

                        # single month at a time
                        climatology = np.ma.mean(clim_data, axis=0)

                        stn_anomalies = data - climatology

                        box_data[:, :, s] = stn_anomalies

                # done all stations in the box, take the mean
                GridData[:, month, tlats, tlons] = np.ma.mean(box_data,
                                                              axis=-1)[:, 0]
                GridStations[:, month, tlats, tlons] = np.ma.count(box_data,
                                                                   axis=-1)[:,
                                                                            0]
                # need at least N stations (default=3), so mask

                insufficient_stations = np.ma.where(
                    GridStations[:, month, tlats,
                                 tlons] < utils.STATIONS_IN_BOX)
                GridData.mask[insufficient_stations, month, tlats,
                              tlons] = True
                GridStations.mask[insufficient_stations, month, tlats,
                                  tlons] = True
    return GridData, GridStations  # cam
Beispiel #9
0
def main(indata="ghcnd", diagnostics=False):
    """
    Read TXn and TNn and write out ETR as difference


    """
    index = "ETR"

    # check if need to do monthly ones
    if index in utils.MONTHLY_INDICES:
        timescales = ["ANN", "MON"]
    else:
        timescales = ["ANN"]
    
    # get all possible datasets
    all_datasets = utils.get_input_datasets()
    # and their names
    names = np.array([d.name for d in all_datasets])

    # if dataset selected and in the list of available, then run
    if indata in names:
        dataset = all_datasets[names == indata][0]

        dataset_stations = utils.read_inventory(dataset, subdir="formatted/indices")

        # check each station
        for stn in dataset_stations:

            if diagnostics:
                print("{} - {}".format(dataset.name, stn.id))

            # for appropriate number of timescales
            for ts in timescales:

                if os.path.exists(os.path.join(stn.location, stn.id, "{}_{}_{}.csv".format(stn.id, "txx", ts))) and os.path.exists(os.path.join(stn.location, stn.id, "{}_{}_{}.csv".format(stn.id, "tnn", ts))):
                    xtimes, txx = utils.read_station_index(stn, "TXx", ts)
                    ntimes, tnn = utils.read_station_index(stn, "TNn", ts)

                    match = np.in1d(xtimes, ntimes)
                    match_b = np.in1d(ntimes, xtimes)

                    if len(match) != 0 and len(match_b) != 0:

                        etr = txx[match]-tnn[match_b]
                        etr_times = xtimes[match]

                        if ts == "MON":
                            myears = []
                            months = []
                            for y in etr_times:
                                for m in range(1, 13):
                                    myears += [y]
                                    months += [m]                    

                            stn.monthly = etr.filled().reshape(-1)
                            stn.myears = myears
                            stn.months = months 
                            path = os.path.join(dataset.location, "formatted", "indices", stn.id, "{}_{}_MON.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path, stn, "ETR", doMonthly=True)

                        else:            
                            stn.years = etr_times
                            stn.annual = etr.filled()
                            path = os.path.join(dataset.location, "formatted", "indices", stn.id, "{}_{}_ANN.csv".format(stn.id, index.lower()))
                            if not os.path.exists(path):
                                utils.write_station_index(path, stn, "ETR")

    return # main
Beispiel #10
0
def main(diagnostics=False):
    """
    Read inventories and make scatter plot

    :param bool diagnostics: extra verbose output

    """

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    # set up the figure
    fig = plt.figure(figsize=(10, 6.7))
    plt.clf()
    ax = plt.axes([0.025, 0.14, 0.95, 0.90], projection=cartopy.crs.Robinson())
    ax.gridlines()  #draw_labels=True)
    ax.add_feature(cartopy.feature.LAND,
                   zorder=0,
                   facecolor="0.9",
                   edgecolor="k")
    ax.coastlines()

    # dummy scatters for full extent
    plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \
                    edgecolor='w', linewidth='0.01')

    # run all datasets
    total = 0
    for dataset in all_datasets:

        try:
            # choose appropriate subdirectory.
            subdir = "formatted/indices"

            ds_stations = utils.read_inventory(dataset, subdir=subdir, final=False, \
                                               timescale="", index="", anomalies="None", qc_flags="")

        except IOError:
            # file missing
            print("No stations with data for {}".format(dataset.name))
            ds_stations = []

        if len(ds_stations) > 0:
            lats = np.array([stn.latitude for stn in ds_stations])
            lons = np.array([stn.longitude for stn in ds_stations])

            # and plot
            scatter = plt.scatter(lons, lats, c=COLOURS[dataset.name], s=15, \
                                      label="{} ({})".format(get_label(dataset.name), len(ds_stations)), \
                                      transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')

            total += len(ds_stations)

    # make a legend
    leg = plt.legend(loc='lower center', ncol=5, bbox_to_anchor=(0.50, -0.34), \
                         frameon=False, title="", prop={'size':12}, labelspacing=0.15, columnspacing=0.5, numpoints=3)
    plt.setp(leg.get_title(), fontsize=12)

    plt.figtext(0.05, 0.92, "{} Stations".format(total))

    plt.title("HadEX3 stations")

    # and save
    outname = putils.make_filenames("station_locations",
                                    index="All",
                                    grid="ADW",
                                    anomalies="None",
                                    month="All")

    plt.savefig("{}/{}".format(utils.PLOTLOCS, outname), dpi=300)

    plt.close()

    return  # main
Beispiel #11
0
def main(index="TX90p", diagnostics=False):
    """
    For all datasets, finds stations that exist for given index (and appropriate timescales)
    Checks for presence of data and write final station listing
    
    :param str index: which index to process
    :param bool diagnostics: output diagnostic information
    """

    # check if need to do monthly ones
    if index in utils.MONTHLY_INDICES:
        timescales = ["ANN", "MON"]
    else:
        timescales = ["ANN"]

    # read in all datasets
    all_datasets = utils.get_input_datasets()

    # for appropriate number of timescales
    for ts in timescales:
        print("{}".format(ts))

        # spin through each dataset
        for d, dataset in enumerate(all_datasets):

            dataset_stations = utils.read_inventory(dataset,
                                                    subdir="formatted/indices")

            if diagnostics:
                print("{} - {}".format(dataset.name, index))

            final_inventory = []

            # check each station
            for stn in dataset_stations:

                if diagnostics:
                    print("{} - {}".format(dataset.name, stn.id))

                if assess_station(stn, index, ts, diagnostics=diagnostics):
                    final_inventory += [stn]
                    if diagnostics:
                        print("{}\n".format(len(final_inventory)))
                else:
                    if diagnostics:
                        print("\n")

            # then write everything out.
            utils.write_climpact_inventory_header(
                os.path.join(
                    dataset.location,
                    "{}.metadata.{}.{}.txt".format(dataset.name, index, ts)))

            for stn in final_inventory:
                utils.write_climpact_inventory(
                    os.path.join(
                        dataset.location,
                        "{}.metadata.{}.{}.txt".format(dataset.name, index,
                                                       ts)), stn)

            print("{} - {} stations".format(dataset.name,
                                            len(final_inventory)))

    return  # main
Beispiel #12
0
def main(index="TX90p", diagnostics=False, qc_flags="", anomalies="None"):
    """
    Read inventories and make scatter plot

    :param str index: which index to run
    :param bool diagnostics: extra verbose output
    :param str qc_flags: which QC flags to process W, B, A, N, C, R, F, E, V, M
    :param str anomalies: run code on anomalies or climatology rather than raw data

    """
    with open(
            os.path.join(utils.INFILELOCS,
                         "{}_yearly_stations.txt".format(index)),
            "w") as outfile:
        outfile.write("{}\n".format(index))

    if index in utils.MONTHLY_INDICES:
        timescale = ["ANN", "MON"]  # allow for future!
    else:
        timescale = ["ANN"]

    # move this up one level eventually?
    all_datasets = utils.get_input_datasets()

    for ts in timescale:

        # run all datasets
        for d, dataset in enumerate(all_datasets):

            print(dataset)

            try:
                # choose appropriate subdirectory.
                subdir = "formatted/indices"

                ds_stations = utils.read_inventory(dataset, subdir=subdir, final=True, \
                                                       timescale=ts, index=index, anomalies=anomalies, qc_flags=qc_flags)
                ds_stations = utils.select_qc_passes(ds_stations,
                                                     qc_flags=qc_flags)

            except IOError:
                # file missing
                print("No stations with data for {}".format(dataset.name))
                ds_stations = []

            # extract relevant info for this dataset
            if len(ds_stations) > 0:

                # extract values for this dataset
                for s, stn in enumerate(ds_stations):
                    presence = time_presence(stn, index, ts)  # year/month
                    if s == 0:
                        ds_presence = np.expand_dims(presence, axis=0)[:]
                    else:
                        ds_presence = np.append(ds_presence,
                                                np.expand_dims(presence,
                                                               axis=0),
                                                axis=0)  # station/year/month

                ds_lats = np.array([stn.latitude for stn in ds_stations])
                ds_lons = np.array([stn.longitude for stn in ds_stations])

                # store in overall arrays
                try:
                    all_lats = np.append(all_lats, ds_lats[:], axis=0)
                    all_lons = np.append(all_lons, ds_lons[:], axis=0)
                    all_presence = np.append(
                        all_presence, ds_presence[:],
                        axis=0)  # dataset*station/year/month
                    all_dataset_names = np.append(
                        all_dataset_names,
                        np.array([dataset.name for i in ds_lats]))
                except NameError:
                    # if not yet defined, then set up
                    all_lats = ds_lats[:]
                    all_lons = ds_lons[:]
                    all_presence = ds_presence[:]
                    all_dataset_names = np.array(
                        [dataset.name for i in ds_lats])

        for y, year in enumerate(utils.REFERENCEYEARS):

            # set up the figure
            fig = plt.figure(figsize=(10, 6.5))
            plt.clf()
            ax = plt.axes([0.025, 0.10, 0.95, 0.90],
                          projection=cartopy.crs.Robinson())
            ax.gridlines()  #draw_labels=True)
            ax.add_feature(cartopy.feature.LAND,
                           zorder=0,
                           facecolor="0.9",
                           edgecolor="k")
            ax.coastlines()

            # dummy scatters for full extent
            plt.scatter([-180, 180, 0, 0], [0, 0, -90, 90], c="w", s=1, transform=cartopy.crs.Geodetic(), \
                            edgecolor='w', linewidth='0.01')

            total = 0
            for dataset in all_datasets:

                ds, = np.where(all_dataset_names == dataset.name)
                locs, = np.where(all_presence[ds, y, 0] == 1)

                if len(locs) > 0:
                    plt.scatter(all_lons[ds][locs], all_lats[ds][locs], c=ps.COLOURS[dataset.name], \
                                    s=15, label="{} ({})".format(ps.get_label(dataset.name), len(locs)), \
                                    transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')
                    total += len(locs)
                else:
                    # aiming to show all, even if zero
                    plt.scatter([-180], [-90], c=ps.COLOURS[dataset.name], s=15, \
                                    label="{} ({})".format(ps.get_label(dataset.name), len(locs)), \
                                    transform=cartopy.crs.Geodetic(), edgecolor='0.5', linewidth='0.5')
                time.sleep(1)

            # make a legend
            leg = plt.legend(loc='lower center', ncol=6, bbox_to_anchor=(0.50, -0.25), frameon=False, \
                                 title="", prop={'size':10}, labelspacing=0.15, columnspacing=0.5, numpoints=3)
            plt.setp(leg.get_title(), fontsize=12)

            plt.figtext(0.05, 0.92, "{} Stations".format(total))

            plt.title("{} - {} - {}".format(index, ts, year))

            # and save
            outname = putils.make_filenames("station_locations_{}_{}".format(
                ts.capitalize(), year),
                                            index=index,
                                            grid="ADW",
                                            anomalies=anomalies)

            plt.savefig("{}/{}/{}".format(utils.PLOTLOCS, index, outname))

            plt.close()
            plt.clf()
            print("{} done".format(year))

            # write out total station number
            with open(
                    os.path.join(utils.INFILELOCS,
                                 "{}_yearly_stations.txt".format(index)),
                    "a") as outfile:
                outfile.write("{} {}\n".format(year, total))

            time.sleep(1)

        # reset namespace
        del all_lats
        del all_lons
        del all_presence
        del all_dataset_names
    return  # main