Beispiel #1
0
def station_annual_anomalies(data):
    """*data* should be a v2.mean file.  Return a dictionary that
    maps from station record identifier (12-digit) to an annual anomaly
    series.  All the data series start in 1880."""

    from code import series
    from gio import GHCNV2Reader

    return ((record.uid, series.monthly_annual(record.series)[1])
            for record in GHCNV2Reader(data, year_min=1880))
Beispiel #2
0
def station_annual_anomalies(data):
    """*data* should be a v2.mean file.  Return a dictionary that
    maps from station record identifier (12-digit) to an annual anomaly
    series.  All the data series start in 1880."""

    from code import series
    from gio import GHCNV2Reader

    return ((record.uid, series.monthly_annual(record.series)[1])
      for record in GHCNV2Reader(data, year_min=1880))
Beispiel #3
0
def do_combine(stream, log, select_func, combine_func):
    """Drive record combination.

    This is a filter driver function used by ``comb_records`` and
    ``comb_pieces``.

    :Param stream:
        The stream of records to filter.
    :Param log:
        Open log file file.
    :Param select_func:
        A function to call to select the 'best' record from a collection
        of records (belonging to the same station).
    :Param combine_func:
        A function to call to perform the data combining.

    """
    for id11, record_set in itertools.groupby(stream, lambda r: r.station_uid):
        log.write('%s\n' % id11)
        records = set()
        for record in record_set:
            records.add(record)
            ann_mean, ann_anoms = series.monthly_annual(record.series)
            record.set_ann_anoms(ann_anoms)
            record.ann_mean = ann_mean
        begin, end = records_begin_end(records)
        years = end - begin + 1
        # reduce the collection of records (by combining) until there
        # are none (or one) left.
        while records:
            if len(records) == 1:
                # Just one left, yield it.
                yield records.pop()
                break
            record = select_func(records)
            records.remove(record)
            sums, wgts = fresh_arrays(record, years)
            log.write("\t%s %s %s -- %s\n" % (record.uid,
                record.first_valid_year(), record.last_valid_year(),
                record.source))
            combine_func(sums, wgts, begin, records, log, record.uid)
            final_data = average(sums, wgts)
            record.set_series(begin * 12 + 1, final_data)
            yield record
Beispiel #4
0
def get_longest_overlap(target, begin, records):
    """Find the record in the *records* set that has the longest
    overlap with the *target* by considering annual anomalies.  *target*
    is a sequence of monthly values starting in the year *begin*.

    A triple (record, diff, overlap) is returned; *diff* is the average
    difference in annual anomalies between *record* and *target*
    (positive when *record* is higher); *overlap* is the number of years
    in the overlap.  Even when there is no overlap _some_ record is
    returned and in that case *diff* is None and *overlap* is 0.
    
    Like other functions, assumes (and asserts) that *begin* is
    the first year for all the records.
    """

    # Annual mean, and annual anomaly sequence.
    mean, anoms = series.monthly_annual(target)
    overlap = 0
    diff = None
    # :todo: the records are consulted in an essentially arbitrary
    # order (which depends on the implementation), but the order
    # may affect the result.  Tie breaks go to the last record consulted.
    # For exact compatiblity with previous versions, we create a
    # temporary dict.
    t = dict((record.uid, record) for record in records)
    for record in t.values():
        common = [(rec_anom,anom)
          for rec_anom, anom in zip(record.ann_anoms, anoms)
          if valid(rec_anom) and valid(anom)]
        if len(common) < overlap:
            continue
        overlap = len(common)
        best_record = record
        S = sum((record.ann_mean+rec_anom) - (mean+anom)
                for rec_anom, anom in common)
        if common:
            diff = S / len(common)
    return best_record, diff, overlap
def asdict(arg, inp, mode, axes, offset=None, scale=None):
    """`arg` should be a list of 11-digit station identifiers or
    12-digit record identifiers.  The records from `inp` are extracted
    and returned as a dictionary (that maps identifiers to (data,begin)
    pair).  If `mode` is 'anom' then data are converted to monthly
    anomalies; if `mode` is 'annual' then data are converted to annual
    anomalies (using the GISTEMP algorithm that copes with missing
    months).  *offset* can be used to offset each station.  The first
    station in the *arg* list will have no offset, each subsequent
    station will have its data biased by adding *offset* (the offset
    increasing arithmetically for each station).  All of the duplicates
    for a given station will be offset by the same amount.  The visual
    effect is to displace stations upward (if the offset is positive).
    """

    # Clear Climate Code, tool directory
    import ghcnm_index
    # Clear Climate Code
    from code import series

    v2 = ghcnm_index.File(inp)

    table = {}
    if not offset:
        offset = [0.0] * len(arg)
    for id,axis,off in zip(arg, axes, offset):
        for id12,rows in v2.get(id):
            data,begin = from_lines(rows, scale)
            if mode == 'anom':
                series.anomalize(data, None)
            if mode == 'annual':
                _, data = series.monthly_annual(data)
            data = apply_data_offset(data, off)
            table[id12] = (data,begin,axis)

    return table
Beispiel #6
0
def asdict(arg, inp, mode, axes, offset=None, scale=0.1):
    """`arg` should be a list of 11-digit station identifiers or
    12-digit record identifiers.  The records from `inp` are extracted
    and returned as a dictionary (that maps identifiers to (data,begin)
    pair).  If `mode` is 'anom' then data are converted to monthly
    anomalies; if `mode` is 'annual' then data are converted to annual
    anomalies (using the GISTEMP algorithm that copes with missing
    months).  *offset* can be used to offset each station.  The first
    station in the *arg* list will have no offset, each subsequent
    station will have its data biased by adding *offset* (the offset
    increasing arithmetically for each station).  All of the duplicates
    for a given station will be offset by the same amount.  The visual
    effect is to displace stations upward (if the offset is positive).
    """

    # Clear Climate Code, tool directory
    import v2index
    # Clear Climate Code
    from code import series

    v2 = v2index.File(inp)

    table = {}
    if not offset:
        offset = [0.0] * len(arg)
    for id,axis,off in zip(arg, axes, offset):
        for id12,rows in v2.get(id):
            data,begin = from_lines(rows, scale)
            if mode == 'anom':
                series.anomalize(data, None)
            if mode == 'annual':
                _, data = series.monthly_annual(data)
            data = apply_data_offset(data, off)
            table[id12] = (data,begin,axis)

    return table
Beispiel #7
0
def find_quintuples(sums, wgts, record, new_id, log):
    """The *sums* and *wgts* arrays are assumed to begin in the same
    year as *record*.  Returns a boolean."""

    # An identifier common to all the log output.
    logid = "%s %s" % (new_id, record.uid)

    rec_begin = record.first_valid_year()
    rec_end = record.last_valid_year()

    actual_begin, actual_end = get_actual_endpoints(wgts, record.first_year)

    max_begin = max(actual_begin, rec_begin)
    min_end = min(actual_end, rec_end)
    # Since max_begin and min_end are integers, this rounds fractional
    # middle years up.
    middle_year = int(.5 * (max_begin + min_end) + 0.5)
    offset = (middle_year - record.first_year)
    log.write("max begin: %s\tmin end: %s\n" % (max_begin, min_end))

    new_data = average(sums, wgts)
    new_ann_mean, new_ann_anoms = series.monthly_annual(new_data)
    ann_std_dev = sigma(new_ann_anoms)
    log.write("ann_std_dev = %s\n" % ann_std_dev)

    rec_ann_anoms = record.ann_anoms
    rec_ann_mean = record.ann_mean

    # Whether we have an "overlap" or not.  We have an "overlap" if
    # within *rad* years either side of *middle_year* both records have
    # *parameters.station_combine_min_mid_year* valid annnual anomalies.
    ov_success = False
    # The overlap is "okay" when the difference in annual temperature is
    # below a certain threshold.
    okay_flag = False
    for rad in range(1, parameters.station_combine_bucket_radius + 1):
        # For the two series, get data from from -rad to rad (inclusive)
        # around the middle year.
        base = offset-rad
        base = max(0, base)
        limit = offset+rad+1
        new_middle = [x for x in new_ann_anoms[base:limit] if valid(x)]
        rec_middle = [x for x in rec_ann_anoms[base:limit] if valid(x)]
        if (len(new_middle) >= parameters.station_combine_min_mid_years
            and len(rec_middle) >= parameters.station_combine_min_mid_years):
            log.write("overlap success: %s\n" % logid)
            ov_success = True
            avg1 = sum(anom+new_ann_mean for anom in new_middle) / float(
              len(new_middle))
            avg2 = sum(anom+rec_ann_mean for anom in rec_middle) / float(
              len(rec_middle))
            diff = abs(avg1 - avg2)
            log.write("diff = %s\n" % diff)
            if diff < ann_std_dev:
                okay_flag = True
                log.write("combination success: %s\n" % logid)
            else:
                log.write("combination failure: %s\n" % logid)
            break
    if not ov_success:
        log.write("overlap failure: %s\n" % logid)
    log.write("counts: %d %d\n" % (len(new_middle), len(rec_middle)))
    return okay_flag