예제 #1
0
def find_monthly_scaling(obs_var, station, config_file, diagnostics=False):
    """
    Find scaling parameters for monthly values and store in config file

    :param MetVar obs_var: meteorological variable object
    :param Station station: station object
    :param str config_file: configuration file to store critical values
    :param bool diagnostics: turn on diagnostic output
    """

    all_years = np.unique(station.years)

    for month in range(1, 13):

        month_averages = prepare_monthly_data(obs_var,
                                              station,
                                              month,
                                              diagnostics=diagnostics)

        if len(month_averages.compressed()) >= VALID_MONTHS:

            # have months, now to standardise
            climatology = utils.average(month_averages)  # mean
            spread = utils.spread(month_averages)  # IQR currently
            if spread < SPREAD_LIMIT:
                spread = SPREAD_LIMIT

            # write out the scaling...
            utils.write_qc_config(config_file,
                                  "MDISTRIBUTION-{}".format(obs_var.name),
                                  "{}-clim".format(month),
                                  "{}".format(climatology),
                                  diagnostics=diagnostics)
            utils.write_qc_config(config_file,
                                  "MDISTRIBUTION-{}".format(obs_var.name),
                                  "{}-spread".format(month),
                                  "{}".format(spread),
                                  diagnostics=diagnostics)

        else:
            utils.write_qc_config(config_file,
                                  "MDISTRIBUTION-{}".format(obs_var.name),
                                  "{}-clim".format(month),
                                  "{}".format(utils.MDI),
                                  diagnostics=diagnostics)
            utils.write_qc_config(config_file,
                                  "MDISTRIBUTION-{}".format(obs_var.name),
                                  "{}-spread".format(month),
                                  "{}".format(utils.MDI),
                                  diagnostics=diagnostics)

    return  # find_monthly_scaling
예제 #2
0
def find_thresholds(obs_var,
                    station,
                    config_file,
                    plots=False,
                    diagnostics=False,
                    winsorize=True):
    """
    Use distribution to identify threshold values.  Then also store in config file.

    :param MetVar obs_var: meteorological variable object
    :param Station station: station object
    :param str config_file: configuration file to store critical values
    :param bool plots: turn on plots
    :param bool diagnostics: turn on diagnostic output
    :param bool winsorize: apply winsorization at 5%/95%
    """

    # get hourly climatology for each month
    for month in range(1, 13):

        variances = prepare_data(obs_var,
                                 station,
                                 month,
                                 diagnostics=diagnostics,
                                 winsorize=winsorize)

        if len(variances.compressed()) >= MIN_VARIANCES:
            average_variance = utils.average(variances)
            variance_spread = utils.spread(variances)
        else:
            average_variance = utils.MDI
            variance_spread = utils.MDI

        utils.write_qc_config(config_file,
                              "VARIANCE-{}".format(obs_var.name),
                              "{}-average".format(month),
                              "{}".format(average_variance),
                              diagnostics=diagnostics)
        utils.write_qc_config(config_file,
                              "VARIANCE-{}".format(obs_var.name),
                              "{}-spread".format(month),
                              "{}".format(variance_spread),
                              diagnostics=diagnostics)

    return  # find_thresholds
예제 #3
0
def identify_values(sealp,
                    stnlp,
                    times,
                    config_file,
                    plots=False,
                    diagnostics=False):
    """
    Find average and spread of differences

    :param MetVar sealp: sea level pressure object
    :param MetVar stnlp: station level pressure object
    :param array times: datetime array
    :param str configfile: string for configuration file
    :param bool plots: turn on plots
    :param bool diagnostics: turn on diagnostic output
    """

    difference = sealp.data - stnlp.data

    if len(difference.compressed()) >= utils.DATA_COUNT_THRESHOLD:

        average = utils.average(difference)
        spread = utils.spread(difference)
        if spread < MIN_SPREAD:  # less than XhPa
            spread = MIN_SPREAD
        elif spread > MAX_SPREAD:  # more than XhPa
            spread = MAX_SPREAD

        utils.write_qc_config(config_file,
                              "PRESSURE",
                              "average",
                              "{}".format(average),
                              diagnostics=diagnostics)
        utils.write_qc_config(config_file,
                              "PRESSURE",
                              "spread",
                              "{}".format(spread),
                              diagnostics=diagnostics)

    return  # identify_values
예제 #4
0
def variance_check(obs_var,
                   station,
                   config_file,
                   plots=False,
                   diagnostics=False,
                   winsorize=True):
    """
    Use distribution to identify threshold values.  Then also store in config file.

    :param MetVar obs_var: meteorological variable object
    :param Station station: station object
    :param str config_file: configuration file to store critical values
    :param bool plots: turn on plots
    :param bool diagnostics: turn on diagnostic output
    :param bool winsorize: apply winsorization at 5%/95%
    """

    flags = np.array(["" for i in range(obs_var.data.shape[0])])

    # get hourly climatology for each month
    for month in range(1, 13):
        month_locs, = np.where(station.months == month)

        variances = prepare_data(obs_var,
                                 station,
                                 month,
                                 diagnostics=diagnostics,
                                 winsorize=winsorize)

        try:
            average_variance = float(
                utils.read_qc_config(config_file,
                                     "VARIANCE-{}".format(obs_var.name),
                                     "{}-average".format(month)))
            variance_spread = float(
                utils.read_qc_config(config_file,
                                     "VARIANCE-{}".format(obs_var.name),
                                     "{}-spread".format(month)))
        except KeyError:
            print("Information missing in config file")
            find_thresholds(obs_var,
                            station,
                            config_file,
                            plots=plots,
                            diagnostics=diagnostics)
            average_variance = float(
                utils.read_qc_config(config_file,
                                     "VARIANCE-{}".format(obs_var.name),
                                     "{}-average".format(month)))
            variance_spread = float(
                utils.read_qc_config(config_file,
                                     "VARIANCE-{}".format(obs_var.name),
                                     "{}-spread".format(month)))

        if average_variance == utils.MDI and variance_spread == utils.MDI:
            # couldn't be calculated, move on
            continue

        bad_years, = np.where(
            np.abs(variances - average_variance) /
            variance_spread > SPREAD_THRESHOLD)

        # prepare wind and pressure data in case needed to check for storms
        if obs_var.name in [
                "station_level_pressure", "sea_level_pressure", "wind_speed"
        ]:
            wind_monthly_data = station.wind_speed.data[month_locs]
            if obs_var.name in [
                    "station_level_pressure", "sea_level_pressure"
            ]:
                pressure_monthly_data = obs_var.data[month_locs]
            else:
                pressure_monthly_data = station.sea_level_pressure.data[
                    month_locs]

            if len(pressure_monthly_data.compressed()) < utils.DATA_COUNT_THRESHOLD or \
                    len(wind_monthly_data.compressed()) < utils.DATA_COUNT_THRESHOLD:
                # need sufficient data to work with for storm check to work, else can't tell
                #    move on
                continue

            wind_average = utils.average(wind_monthly_data)
            wind_spread = utils.spread(wind_monthly_data)

            pressure_average = utils.average(pressure_monthly_data)
            pressure_spread = utils.spread(pressure_monthly_data)

        # go through each bad year for this month
        all_years = np.unique(station.years)
        for year in bad_years:

            # corresponding locations
            ym_locs, = np.where(
                np.logical_and(station.months == month,
                               station.years == all_years[year]))

            # if pressure or wind speed, need to do some further checking before applying flags
            if obs_var.name in [
                    "station_level_pressure", "sea_level_pressure",
                    "wind_speed"
            ]:

                # pull out the data
                wind_data = station.wind_speed.data[ym_locs]
                if obs_var.name in [
                        "station_level_pressure", "sea_level_pressure"
                ]:
                    pressure_data = obs_var.data[ym_locs]
                else:
                    pressure_data = station.sea_level_pressure.data[ym_locs]

                # need sufficient data to work with for storm check to work, else can't tell
                if len(pressure_data.compressed()) < utils.DATA_COUNT_THRESHOLD or \
                        len(wind_data.compressed()) < utils.DATA_COUNT_THRESHOLD:
                    # move on
                    continue

                # find locations of high wind speeds and low pressures, cross match
                high_winds, = np.ma.where(
                    (wind_data - wind_average) / wind_spread > STORM_THRESHOLD)
                low_pressures, = np.ma.where(
                    (pressure_average - pressure_data) /
                    pressure_spread > STORM_THRESHOLD)

                match = np.in1d(high_winds, low_pressures)

                couldbe_storm = False
                if len(match) > 0:
                    # this could be a storm, either at tropical station (relatively constant pressure)
                    # or out of season in mid-latitudes.
                    couldbe_storm = True

                if obs_var.name in [
                        "station_level_pressure", "sea_level_pressure"
                ]:
                    diffs = np.ma.diff(pressure_data)
                elif obs_var.name == "wind_speed":
                    diffs = np.ma.diff(wind_data)

                # count up the largest number of sequential negative and positive differences
                negs, poss = 0, 0
                biggest_neg, biggest_pos = 0, 0

                for diff in diffs:

                    if diff > 0:
                        if negs > biggest_neg: biggest_neg = negs
                        negs = 0
                        poss += 1
                    else:
                        if poss > biggest_pos: biggest_pos = poss
                        poss = 0
                        negs += 1

                if (biggest_neg < 10) and (biggest_pos <
                                           10) and not couldbe_storm:
                    # insufficient to identify as a storm (HadISD values)
                    # leave flags set
                    pass
                else:
                    # could be a storm, so better to leave this month unflagged
                    # zero length array to flag
                    ym_locs = np.ma.array([])

            # copy over the flags, if any
            if len(ym_locs) != 0:
                # and set the flags
                flags[ym_locs] = "V"

        # diagnostic plots
        if plots:
            import matplotlib.pyplot as plt

            scaled_variances = ((variances - average_variance) /
                                variance_spread)
            bins = utils.create_bins(scaled_variances, 0.25, obs_var.name)
            hist, bin_edges = np.histogram(scaled_variances, bins)

            plt.clf()
            plt.step(bins[1:], hist, color='k', where="pre")
            plt.yscale("log")

            plt.ylabel("Number of Months")
            plt.xlabel("Scaled {} Variances".format(obs_var.name.capitalize()))
            plt.title("{} - month {}".format(station.id, month))

            plt.ylim([0.1, max(hist) * 2])
            plt.axvline(SPREAD_THRESHOLD, c="r")
            plt.axvline(-SPREAD_THRESHOLD, c="r")

            bad_hist, dummy = np.histogram(scaled_variances[bad_years], bins)
            plt.step(bins[1:], bad_hist, color='r', where="pre")

            plt.show()

    # append flags to object
    obs_var.flags = utils.insert_flags(obs_var.flags, flags)

    if diagnostics:

        print("Variance {}".format(obs_var.name))
        print("   Cumulative number of flags set: {}".format(
            len(np.where(flags != "")[0])))

    return  # variance_check
예제 #5
0
def pressure_offset(sealp,
                    stnlp,
                    times,
                    config_file,
                    plots=False,
                    diagnostics=False):
    """
    Flag locations where difference between station and sea-level pressure
    falls outside of bounds

    :param MetVar sealp: sea level pressure object
    :param MetVar stnlp: station level pressure object
    :param array times: datetime array
    :param str configfile: string for configuration file
    :param bool plots: turn on plots
    :param bool diagnostics: turn on diagnostic output
    """

    flags = np.array(["" for i in range(sealp.data.shape[0])])

    difference = sealp.data - stnlp.data

    if len(difference.compressed()) >= utils.DATA_COUNT_THRESHOLD:

        try:
            average = float(
                utils.read_qc_config(config_file, "PRESSURE", "average"))
            spread = float(
                utils.read_qc_config(config_file, "PRESSURE", "spread"))
        except KeyError:
            print("Information missing in config file")
            average = utils.average(difference)
            spread = utils.spread(difference)
            if spread < MIN_SPREAD:  # less than XhPa
                spread = MIN_SPREAD
            elif spread > MAX_SPREAD:  # more than XhPa
                spread = MAX_SPREAD

            utils.write_qc_config(config_file,
                                  "PRESSURE",
                                  "average",
                                  "{}".format(average),
                                  diagnostics=diagnostics)
            utils.write_qc_config(config_file,
                                  "PRESSURE",
                                  "spread",
                                  "{}".format(spread),
                                  diagnostics=diagnostics)

        if np.abs(np.ma.mean(difference) -
                  np.ma.median(difference)) > THRESHOLD * spread:
            if diagnostics:
                print("Large difference between mean and median")
                print("Likely to have two populations of roughly equal size")
                print("Test won't work")
            pass
        else:
            high, = np.ma.where(difference > (average + (THRESHOLD * spread)))
            low, = np.ma.where(difference < (average - (THRESHOLD * spread)))

            # diagnostic plots
            if plots:
                bins = np.arange(
                    np.round(difference.min()) - 1,
                    np.round(difference.max()) + 1, 0.1)
                import matplotlib.pyplot as plt
                plt.clf()
                plt.hist(difference.compressed(), bins=bins)
                plt.axvline(x=(average + (THRESHOLD * spread)), ls="--", c="r")
                plt.axvline(x=(average - (THRESHOLD * spread)), ls="--", c="r")
                plt.xlim([bins[0] - 1, bins[-1] + 1])
                plt.ylabel("Observations")
                plt.xlabel("Difference (hPa)")
                plt.show()

            if len(high) != 0:
                flags[high] = "p"
                if diagnostics:
                    print("Pressure".format(stnlp.name))
                    print("   Number of high differences {}".format(len(high)))
                if plots:
                    for bad in high:
                        plot_pressure(sealp, stnlp, times, bad)

            if len(low) != 0:
                flags[low] = "p"
                if diagnostics:
                    print("   Number of low differences {}".format(len(low)))
                if plots:
                    for bad in low:
                        plot_pressure(sealp, stnlp, times, bad)

            # only flag the station level pressure
            stnlp.flags = utils.insert_flags(stnlp.flags, flags)

    if diagnostics:

        print("Pressure {}".format(stnlp.name))
        print("   Cumulative number of flags set: {}".format(
            len(np.where(flags != "")[0])))

    return  # pressure_offset
예제 #6
0
def prepare_data(obs_var, station, month, diagnostics=False, winsorize=True):
    """
    Prepare the data for the climatological check.  Makes anomalies and applies low-pass filter

    :param MetVar obs_var: meteorological variable object
    :param Station station: station object
    :param int month: which month to run on
    :param bool plots: turn on plots
    :param bool diagnostics: turn on diagnostic output
    :param bool winsorize: apply winsorization at 5%/95%
    """

    anomalies = np.ma.zeros(obs_var.data.shape[0])
    anomalies.mask = np.ones(anomalies.shape[0])
    normed_anomalies = np.ma.copy(anomalies)

    mlocs, = np.where(station.months == month)

    nyears = len(np.unique(station.years[mlocs]))

    # need to have some data and in at least 5 years!
    if len(mlocs) >= utils.DATA_COUNT_THRESHOLD and nyears >= 5:

        anomalies.mask[mlocs] = False
        normed_anomalies.mask[mlocs] = False

        hourly_clims = np.ma.zeros(24)
        hourly_clims.mask = np.ones(24)
        for hour in range(24):

            # calculate climatology
            hlocs, = np.where(
                np.logical_and(station.months == month, station.hours == hour))

            hour_data = obs_var.data[hlocs]

            if winsorize:
                if len(hour_data.compressed()) > 10:
                    hour_data = utils.winsorize(hour_data, 5)

            if len(hour_data) >= utils.DATA_COUNT_THRESHOLD:
                hourly_clims[hour] = np.ma.mean(hour_data)
                hourly_clims.mask[hour] = False

            # make anomalies - keeping the order
            anomalies[hlocs] = obs_var.data[hlocs] - hourly_clims[hour]

        # if insufficient data at each hour, then no anomalies calculated
        if len(anomalies[mlocs].compressed()) >= utils.DATA_COUNT_THRESHOLD:

            # for the month, normalise anomalies by spread
            spread = utils.spread(anomalies[mlocs])
            if spread < 1.5:
                spread = 1.5

            normed_anomalies[mlocs] = anomalies[mlocs] / spread

            # apply low pass filter derived from monthly values
            all_years = np.unique(station.years)
            monthly_anoms = np.ma.zeros(all_years.shape[0])
            for y, year in enumerate(all_years):

                ylocs, = np.where(station.years == year)
                year_data = obs_var.data[ylocs]

                monthly_anoms[y] = utils.average(year_data)

            lp_filtered_anomalies = low_pass_filter(normed_anomalies, station,
                                                    monthly_anoms, month)

            return lp_filtered_anomalies  # prepare_data

        else:
            return anomalies  # prepare_data
    else:
        return anomalies  # prepare_data
예제 #7
0
def all_obs_gap(obs_var, station, config_file, plots=False, diagnostics=False):
    """
    Extract data for month and find secondary populations in distribution.

    :param MetVar obs_var: meteorological variable object
    :param Station station: station object
    :param str config_file: configuration file to store critical values
    :param bool plots: turn on plots
    :param bool diagnostics: turn on diagnostic output
    """

    flags = np.array(["" for i in range(obs_var.data.shape[0])])

    for month in range(1, 13):

        normalised_anomalies = prepare_all_data(obs_var,
                                                station,
                                                month,
                                                config_file,
                                                full=False,
                                                diagnostics=diagnostics)

        if (len(normalised_anomalies.compressed()) == 1
                and normalised_anomalies[0] == utils.MDI):
            # no data to work with for this month, move on.
            continue

        bins = utils.create_bins(normalised_anomalies, BIN_WIDTH, obs_var.name)
        hist, bin_edges = np.histogram(normalised_anomalies, bins)

        try:
            upper_threshold = float(
                utils.read_qc_config(config_file,
                                     "ADISTRIBUTION-{}".format(obs_var.name),
                                     "{}-uthresh".format(month)))
            lower_threshold = float(
                utils.read_qc_config(config_file,
                                     "ADISTRIBUTION-{}".format(obs_var.name),
                                     "{}-lthresh".format(month)))
        except KeyError:
            print("Information missing in config file")
            find_thresholds(obs_var,
                            station,
                            config_file,
                            plots=plots,
                            diagnostics=diagnostics)
            upper_threshold = float(
                utils.read_qc_config(config_file,
                                     "ADISTRIBUTION-{}".format(obs_var.name),
                                     "{}-uthresh".format(month)))
            lower_threshold = float(
                utils.read_qc_config(config_file,
                                     "ADISTRIBUTION-{}".format(obs_var.name),
                                     "{}-lthresh".format(month)))

        if upper_threshold == utils.MDI and lower_threshold == utils.MDI:
            # these weren't able to be calculated, move on
            continue
        elif len(np.unique(normalised_anomalies)) == 1:
            # all the same value, so won't be able to fit a histogram
            continue

        # now to find the gaps
        uppercount = len(np.where(normalised_anomalies > upper_threshold)[0])
        lowercount = len(np.where(normalised_anomalies < lower_threshold)[0])

        month_locs, = np.where(
            station.months == month)  # append should keep year order
        if uppercount > 0:
            gap_start = utils.find_gap(hist, bins, upper_threshold, GAP_SIZE)

            if gap_start != 0:
                bad_locs, = np.ma.where(normalised_anomalies >
                                        gap_start)  # all years for one month

                month_flags = flags[month_locs]
                month_flags[bad_locs] = "d"
                flags[month_locs] = month_flags

        if lowercount > 0:
            gap_start = utils.find_gap(hist,
                                       bins,
                                       lower_threshold,
                                       GAP_SIZE,
                                       upwards=False)

            if gap_start != 0:
                bad_locs, = np.ma.where(normalised_anomalies <
                                        gap_start)  # all years for one month

                month_flags = flags[month_locs]
                month_flags[bad_locs] = "d"

                # TODO - can this bit be refactored?
                # for pressure data, see if the flagged obs correspond with high winds
                # could be a storm signal
                if obs_var.name in [
                        "station_level_pressure", "sea_level_pressure"
                ]:
                    wind_monthly_data = prepare_monthly_data(
                        station.wind_speed, station, month)
                    pressure_monthly_data = prepare_monthly_data(
                        obs_var, station, month)

                    if len(pressure_monthly_data.compressed()) < utils.DATA_COUNT_THRESHOLD or \
                            len(wind_monthly_data.compressed()) < utils.DATA_COUNT_THRESHOLD:
                        # need sufficient data to work with for storm check to work, else can't tell
                        pass
                    else:

                        wind_monthly_average = utils.average(wind_monthly_data)
                        wind_monthly_spread = utils.spread(wind_monthly_data)

                        pressure_monthly_average = utils.average(
                            pressure_monthly_data)
                        pressure_monthly_spread = utils.spread(
                            pressure_monthly_data)

                        # already a single calendar month, so go through each year
                        all_years = np.unique(station.years)
                        for year in all_years:

                            # what's best - extract only when necessary but repeatedly if so, or always, but once
                            this_year_locs = np.where(
                                station.years[month_locs] == year)

                            if "d" not in month_flags[this_year_locs]:
                                # skip if you get the chance
                                continue

                            wind_data = station.wind_speed.data[month_locs][
                                this_year_locs]
                            pressure_data = obs_var.data[month_locs][
                                this_year_locs]

                            storms, = np.ma.where(
                                np.logical_and(
                                    (((wind_data - wind_monthly_average) /
                                      wind_monthly_spread) > STORM_THRESHOLD),
                                    (((pressure_monthly_average - pressure_data
                                       ) / pressure_monthly_spread) >
                                     STORM_THRESHOLD)))

                            # more than one entry - check if separate events
                            if len(storms) >= 2:
                                # find where separation more than the usual obs separation
                                storm_1diffs = np.ma.diff(storms)
                                separations, = np.where(
                                    storm_1diffs > np.ma.median(
                                        np.ma.diff(wind_data)))

                                if len(separations) != 0:
                                    # multiple storm signals
                                    storm_start = 0
                                    storm_finish = separations[0] + 1
                                    first_storm = expand_around_storms(
                                        storms[storm_start:storm_finish],
                                        len(wind_data))
                                    final_storm_locs = copy.deepcopy(
                                        first_storm)

                                    for j in range(len(separations)):
                                        # then do the rest in a loop

                                        if j + 1 == len(separations):
                                            # final one
                                            this_storm = expand_around_storms(
                                                storms[separations[j] + 1:],
                                                len(wind_data))
                                        else:
                                            this_storm = expand_around_storms(
                                                storms[separations[j] +
                                                       1:separations[j + 1] +
                                                       1], len(wind_data))

                                        final_storm_locs = np.append(
                                            final_storm_locs, this_storm)

                                else:
                                    # locations separated at same interval as data
                                    final_storm_locs = expand_around_storms(
                                        storms, len(wind_data))

                            # single entry
                            elif len(storms) != 0:
                                # expand around the storm signal (rather than
                                #  just unflagging what could be the peak and
                                #  leaving the entry/exit flagged)
                                final_storm_locs = expand_around_storms(
                                    storms, len(wind_data))

                            # unset the flags
                            if len(storms) > 0:
                                month_flags[this_year_locs][
                                    final_storm_locs] = ""

                # having checked for storms now store final flags
                flags[month_locs] = month_flags

        # diagnostic plots
        if plots:
            import matplotlib.pyplot as plt
            plt.step(bins[1:], hist, color='k', where="pre")
            plt.yscale("log")

            plt.ylabel("Number of Observations")
            plt.xlabel(obs_var.name.capitalize())
            plt.title("{} - month {}".format(station.id, month))

            plt.ylim([0.1, max(hist) * 2])

            plt.axvline(upper_threshold, c="r")
            plt.axvline(lower_threshold, c="r")

            bad_locs, = np.where(flags[month_locs] == "d")
            bad_hist, dummy = np.histogram(normalised_anomalies[bad_locs],
                                           bins)
            plt.step(bins[1:], bad_hist, color='r', where="pre")

            plt.show()

    # append flags to object
    obs_var.flags = utils.insert_flags(obs_var.flags, flags)

    if diagnostics:

        print("Distribution (all) {}".format(obs_var.name))
        print("   Cumulative number of flags set: {}".format(
            len(np.where(flags != "")[0])))

    return  # all_obs_gap
예제 #8
0
def prepare_all_data(obs_var,
                     station,
                     month,
                     config_file,
                     full=False,
                     diagnostics=False):
    """
    Extract data for the month, make & store or read average and spread.
    Use to calculate normalised anomalies.

    :param MetVar obs_var: meteorological variable object
    :param Station station: station object
    :param int month: month to process
    :param str config_file: configuration file to store critical values
    :param bool diagnostics: turn on diagnostic output
    """

    month_locs, = np.where(station.months == month)

    all_month_data = obs_var.data[month_locs]

    if full:

        if len(all_month_data.compressed()) >= utils.DATA_COUNT_THRESHOLD:
            # have data, now to standardise
            climatology = utils.average(all_month_data)  # mean
            spread = utils.spread(all_month_data)  # IQR currently
        else:
            climatology = utils.MDI
            spread = utils.MDI

        # write out the scaling...
        utils.write_qc_config(config_file,
                              "ADISTRIBUTION-{}".format(obs_var.name),
                              "{}-clim".format(month),
                              "{}".format(climatology),
                              diagnostics=diagnostics)
        utils.write_qc_config(config_file,
                              "ADISTRIBUTION-{}".format(obs_var.name),
                              "{}-spread".format(month),
                              "{}".format(spread),
                              diagnostics=diagnostics)

    else:

        try:
            climatology = float(
                utils.read_qc_config(config_file,
                                     "ADISTRIBUTION-{}".format(obs_var.name),
                                     "{}-clim".format(month)))
            spread = float(
                utils.read_qc_config(config_file,
                                     "ADISTRIBUTION-{}".format(obs_var.name),
                                     "{}-spread".format(month)))
        except KeyError:

            if len(all_month_data.compressed()) >= utils.DATA_COUNT_THRESHOLD:
                # have data, now to standardise
                climatology = utils.average(all_month_data)  # mean
                spread = utils.spread(all_month_data)  # IQR currently
            else:
                climatology = utils.MDI
                spread = utils.MDI

            # write out the scaling...
            utils.write_qc_config(config_file,
                                  "ADISTRIBUTION-{}".format(obs_var.name),
                                  "{}-clim".format(month),
                                  "{}".format(climatology),
                                  diagnostics=diagnostics)
            utils.write_qc_config(config_file,
                                  "ADISTRIBUTION-{}".format(obs_var.name),
                                  "{}-spread".format(month),
                                  "{}".format(spread),
                                  diagnostics=diagnostics)

    if climatology == utils.MDI and spread == utils.MDI:
        # these weren't calculable, move on
        return np.ma.array([utils.MDI])
    elif spread == 0:
        # all the same value
        return (all_month_data - climatology)  # prepare_all_data
    else:
        return (all_month_data - climatology) / spread  # prepare_all_data