Example #1
0
def bias_overlap(ps, qs):
    """
    Compute the bias between series *ps* and *qs* (positive
    when *qs* is on average bigger than *ps*).

    Returns a (bias, overlap) pair where overlap is the number
    of elements for which both *ps* and *qs* are valid.
    """

    # Sum of the data in each of *ps* and *qs*.
    sum_p = 0.0
    sum_q = 0.0
    # Number of elements where both *ps* and *qs* are valid.
    overlap = 0
    for p,q in itertools.izip(ps, qs):
        if invalid(p) or invalid(q):
            continue
        overlap += 1
        sum_p += p
        sum_q += q

    if overlap == 0:
        bias = None
    else:
        bias = (sum_q-sum_p)/overlap
    return (bias, overlap)
Example #2
0
def monthly_anomalies(data, reference_period=None, base_year=-9999):
    """
    Calculate monthly anomalies, by subtracting from every datum
    the mean for its month.  A pair of (monthly_mean, monthly_anom) is
    returned.  *monthly_mean* is a 12-long sequence giving the mean for
    each of the 12 months; *monthly_anom* is a 12-long sequence giving
    the anomalized series for each of the 12 months.

    If *reference_period* is supplied then it should be a pair (*first*,
    *last*) and the mean for a month is taken over the period (an
    example would be reference_period=(1951,1980)).  *base_year*
    specifies the first year of the data.
    
    The input data is a flat sequence, one datum per month.
    Effectively the data changes shape as it passes through this
    function.
    """

    years = len(data) // 12
    if reference_period:
        base = reference_period[0] - base_year
        limit = reference_period[1] - base_year + 1
    else:
        # Setting base, limit to (0,0) is a bit of a hack, but it
        # does work.
        base = 0
        limit = 0
    monthly_mean = []
    monthly_anom = []
    for m in range(12):
        row = data[m::12]
        mean = valid_mean(row[base:limit])
        if invalid(mean):
            # Fall back to using entire period
            mean = valid_mean(row)
        monthly_mean.append(mean)
        if valid(mean):
            def asanom(datum):
                """Convert a single datum to anomaly."""
                if valid(datum):
                    return datum - mean
                return MISSING
            monthly_anom.append(map(asanom, row))
        else:
            monthly_anom.append([MISSING]*years)
    return monthly_mean, monthly_anom
Example #3
0
def combine(composite, weight, new, new_weight, min_overlap):
    """
    Run the GISTEMP combining algorithm.  This combines the data
    in the *new* array into the *composite* array.  *new* has weight
    *new_weight*; *composite* has weights in the *weight* array.

    *new_weight* can be either a constant or an array of weights for
    each datum in *new*.

    For each of the 12 months of the year, track is kept of how many
    new data are combined.  This list of 12 elements is returned.

    Each month of the year is considered separately.  For the set of
    times where both *composite* and *new* have data the mean difference
    (a bias) is computed.  If there are fewer than *min_overlap* years
    in common, the data (for that month of the year) are not combined.
    The bias is subtracted from the *new* record and it is point-wise
    combined into *composite* according to the weight *new_weight* and
    the existing weights for *composite*.
    """

    new_weight = ensure_array(weight, new_weight)

    # A count (of combined data) for each month.
    data_combined = [0] * 12
    for m in range(12):
        bias, overlap = bias_overlap(composite[m::12], new[m::12])
        if overlap < min_overlap:
            continue

        # Update period of valid data, composite and weights.
        for i in range(m, len(new), 12):
            if invalid(new[i]):
                continue
            new_month_weight = weight[i] + new_weight[i]
            composite[i] = (weight[i]*composite[i]
                          + new_weight[i]*(new[i]-bias))/new_month_weight
            weight[i] = new_month_weight
            data_combined[m] += 1
    return data_combined