Ejemplo n.º 1
0
def SBBXtoBX(data):
    """Simultaneously combine the land series and the ocean series and
    combine subboxes into boxes.  *data* should be an iterator of
    (land, ocean) subbox series pairs. Returns an iterator of box data.
    """

    # First item from iterator is normally a pair of metadataobjects,
    # one for land, one for ocean.  If we are piping step3 straight into
    # step5 then it is not a pair.  In that case we synthesize missing
    # ocean data.
    meta = data.next()
    try:
        land_meta, ocean_meta = meta
    except (TypeError, ValueError):
        # Use the land meta object for both land and ocean data
        land_meta, ocean_meta = meta, meta
        print "No ocean data; using land data only"
        data = blank_ocean_data(data)

    # number of subboxes within each box
    nsubbox = 100

    # TODO: Formalise use of only monthlies, see step 3.
    assert land_meta.mavg == 6
    NYRSIN = land_meta.monm / 12
    combined_year_beg = min(land_meta.yrbeg, ocean_meta.yrbeg)
    # Index into the combined array of the first year of the land data.
    land_offset = 12 * (land_meta.yrbeg - combined_year_beg)
    # As land_offset but for ocean data.
    ocean_offset = 12 * (ocean_meta.yrbeg - combined_year_beg)
    combined_n_months = max(land_meta.monm + land_offset,
                            land_meta.monm + ocean_offset)

    info = [
        land_meta.mo1, land_meta.kq, land_meta.mavg, land_meta.monm,
        land_meta.monm4, combined_year_beg, land_meta.missing_flag,
        land_meta.precipitation_flag
    ]

    info[4] = 2 * land_meta.monm + 5
    yield (info, land_meta.title)

    for box_number, box in enumerate(eqarea.grid()):
        # Averages for the land and ocean (one series per subbox)...
        avg = []
        wgtc = []
        # Eat the records from land and ocean 100 (nsubbox) at a time.
        # In other words, all 100 subboxes for the box.
        landsub, oceansub = zip(*itertools.islice(data, nsubbox))
        # :todo: combine below zip with above zip?
        for i, l, o in zip(range(nsubbox), landsub, oceansub):
            a = [MISSING] * combined_n_months
            if (o.good_count < parameters.subbox_min_valid
                    or l.d < parameters.subbox_land_range):
                # use land series for this subbox
                a[land_offset:land_offset + len(l.series)] = l.series
                wgtc.append(l.good_count)
            else:
                # use ocean series for this subbox
                a[ocean_offset:ocean_offset + len(o.series)] = o.series
                wgtc.append(o.good_count)
            avg.append(a)

        # GISTEMP sort.
        # We want to end up with IORDR, the permutation array that
        # represents the sorter order.  IORDR[0] is the index (into the
        # *wgtc* array) of the longest record, IORDR[1] the index of the
        # next longest record, and so on.  We do that by decorating the
        # *wgtc* array with indexes 0 to 99, and then extracting the
        # (permuted) indexes into IORDR.
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        IORDR = range(nsubbox)
        sort(IORDR, lambda x, y: wgtc[y] - wgtc[x])

        # From here to the "for" loop over the cells (below) we are
        # initialising data for the loop.  Primarily the AVGR and WTR
        # arrays.
        nc = IORDR[0]

        # Weights for the box's record.
        wtr = [a != MISSING for a in avg[nc]]
        # Box record
        avgr = avg[nc][:]

        # Loop over the remaining cells.
        for nc in IORDR[1:]:
            if wgtc[nc] >= parameters.subbox_min_valid:
                series.combine(avgr, wtr, avg[nc], 1, 0,
                               combined_n_months / 12,
                               parameters.box_min_overlap)

        series.anomalize(avgr, parameters.subbox_reference_period,
                         combined_year_beg)
        ngood = sum(valid(a) for a in avgr)
        yield (avgr, wtr, ngood, box)
    # We've now consumed all 8000 input boxes and yielded 80 boxes.  We
    # need to tickle the input to check that it is exhausted and to
    # cause it to run the final tail of its generator.
    # We expect the call to .next() to raise StopIteration, which is
    # just what we want.
    data.next()
    # Ordinarily we never reach here.
    assert 0, "Too many input records"
Ejemplo n.º 2
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    station_records = list(station_records)

    log = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Extend box, by half a box east and west and by arc north
        # and south.
        extent = [
            box[0] - arcdeg, box[1] + arcdeg, box[2] - 0.5 * (box[3] - box[2]),
            box[3] + 0.5 * (box[3] - box[2])
        ]
        if box[0] <= -90 or box[1] >= 90:
            # polar
            extent[2] = -180.0
            extent[3] = +180.0

        region_records = list(inbox(station_records, *extent))
        # Descending sort by number of good records
        # TODO: Switch to using Python's sort method here, although it
        # will change the results.
        sort(region_records, lambda x, y: y.good_count - x.good_count)

        # Count how many cells are empty
        n_empty_cells = 0
        # Used to generate the "subbox at" rows in the log.
        lastcentre = (None, None)
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            log.write("\rsubbox at %+05.1f%+06.1f (%d empty)" %
                      (centre + (n_empty_cells, )))
            log.flush()
            lastcentre = centre
            # Of possible station records for this region, filter for those
            # from stations within radius of subbox centre.
            incircle_records = list(incircle(region_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if len(incircle_records) == 0:
                box_obj = giss_data.SubboxRecord(subbox_series,
                                                 box=list(subbox),
                                                 stations=0,
                                                 station_months=0,
                                                 d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise data with first station
            record = incircle_records[0]
            total_good_months = record.good_count
            total_stations = 1

            max_weight = record.weight
            offset = record.rel_first_month - 1
            a = record.series  # just a temporary
            subbox_series[offset:offset + len(a)] = a
            weight = [0.0] * max_months
            for i in range(len(a)):
                if valid(a[i]):
                    weight[i + offset] = record.weight

            # Add in the remaining stations
            for record in incircle_records[1:]:
                # TODO: A StationMethod method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, record.weight,
                    record.rel_first_year, record.rel_last_year + 1,
                    parameters.gridding_min_overlap)
                total_good_months += station_months
                if station_months == 0:
                    continue
                total_stations += 1

                if max_weight < record.weight:
                    max_weight = record.weight

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.SubboxRecord(subbox_series,
                                             n=max_months,
                                             box=list(subbox),
                                             stations=total_stations,
                                             station_months=total_good_months,
                                             d=radius * (1 - max_weight))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        log.write(
            '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n'
            % (tuple(box) + (n_empty_cells, plural_suffix)))
    log.write("\n")
Ejemplo n.º 3
0
def zonav(boxed_data):
    """
    Perform Zonal Averaging.

    The input *boxed_data* is an iterator of boxed time series.
    The data in the boxes are combined to produce averages over
    various latitudinal zones.  Returns an iterator of
    (averages, weights, title) tuples, one per zone.

    14 zones are produced.  The first 8 are the basic belts that are used
    for the equal area grid, the remaining 6 are combinations:

      0 64N - 90N               \
      1 44N - 64N (asin 0.9)     -  8 24N - 90 N  (0 + 1 + 2)
      2 24N - 44N (asin 0.7)    /
      3 Equ - 24N (asin 0.4)    \_  9 24S - 24 N  (3 + 4)
      4 24S - Equ               /
      5 44S - 24S               \
      6 64S - 44S                - 10 90S - 24 S  (5 + 6 + 7)
      7 90S - 64S               /

     11 northern hemisphere (0 + 1 + 2 + 3)
     12 southern hemisphere (4 + 5 + 6 + 7)
     13 global (all belts 0 to 7)
    """

    (info, titlei) = boxed_data.next()
    iyrbeg = info[5]
    monm = info[3]
    nyrsin = monm / 12
    # One more than the last year with data
    yearlimit = nyrsin + iyrbeg

    yield (info, titlei)

    boxes_in_band, band_in_zone = zones()

    bands = len(boxes_in_band)

    lenz = [None] * bands
    wt = [None] * bands
    avg = [None] * bands
    # For each band, combine all the boxes in that band to create a band
    # record.
    for band in range(bands):
        # The temperature (anomaly) series for each of the boxes in this
        # band.
        box_series = [None] * boxes_in_band[band]
        # The weight series for each of the boxes in this band.
        box_weights = [None] * boxes_in_band[band]
        # "length" is the number of months (with valid data) in the box
        # series.  For each box in this band.
        box_length = [None] * boxes_in_band[band]
        for box in range(boxes_in_band[band]):
            # The last element in the tuple is the boundaries of the
            # box.  We ignore it.
            box_series[box], box_weights[box], box_length[box], _ = (
                boxed_data.next())
        # total number of valid data in band's boxes
        total_length = sum(box_length)
        if total_length == 0:
            wt[band] = [0.0] * monm
            avg[band] = [MISSING] * monm
        else:
            box_length, IORD = sort_perm(box_length)
            nr = IORD[0]
            # Copy the longest box record into *wt* and *avg*.
            # Using list both performs a copy and converts into a mutable
            # list.
            wt[band] = list(box_weights[nr])
            avg[band] = list(box_series[nr])
            # And combine the remaining series.
            for n in range(1, boxes_in_band[band]):
                nr = IORD[n]
                if box_length[n] == 0:
                    # Nothing in this box, and since we sorted by length,
                    # all the remaining boxes will also be empty.  We can
                    # stop combining boxes.
                    break
                series.combine(avg[band], wt[band], box_series[nr],
                               box_weights[nr], 0, nyrsin,
                               parameters.box_min_overlap)
        series.anomalize(avg[band], parameters.box_reference_period, iyrbeg)
        lenz[band] = sum(valid(a) for a in avg[band])
        yield (avg[band], wt[band])

    # We expect to have consumed all the boxes (the first 8 bands form a
    # partition of the boxes).  We check that the boxed_data stream is
    # exhausted and contains no more boxes.
    try:
        boxed_data.next()
        assert 0, "Too many boxes found"
    except StopIteration:
        # We fully expect to get here.
        pass

    # *lenz* contains the lengths of each zone 0 to 7 (the number of
    # valid months in each zone).
    lenz, iord = sort_perm(lenz)
    for zone in range(len(band_in_zone)):
        if lenz[0] == 0:
            raise Error('**** NO DATA FOR ZONE %d' % bands + zone)
        # Find the longest band that is in the special zone.
        for j1 in range(bands):
            if iord[j1] in band_in_zone[zone]:
                break
        else:
            # Should be an assertion really.
            raise Error('No band in special zone %d.' % zone)
        band = iord[j1]
        wtg = list(wt[band])
        avgg = list(avg[band])
        # Add in the remaining bands, in length order.
        for j in range(j1 + 1, bands):
            band = iord[j]
            if band not in band_in_zone[zone]:
                continue
            series.combine(avgg, wtg, avg[band], wt[band], 0, nyrsin,
                           parameters.box_min_overlap)
        series.anomalize(avgg, parameters.box_reference_period, iyrbeg)
        yield (avgg, wtg)
Ejemplo n.º 4
0
def subbox_to_box(meta, cells, celltype='BOX'):
    """Aggregate the subboxes (aka cells, typically 8000 per globe)
    into boxes (typically 80 boxes per globe), and combine records to
    produce one time series per box.

    *celltype* is used for logging, using a distinct (3 character) code
    will allow the log output for the land, ocean, and land--ocean
    analyses to be separated.

    *meta* specifies the meta data and is used to determine the first
    year (meta.yrbeg) and length (meta.monm) for all the resulting
    series.

    Returns an iterator of box data: for each box a quadruple of
    (*anom*, *weight*, *ngood*, *box*) is yielded.  *anom* is the
    temperature anomaly series, *weight* is the weights for the series
    (number of cells contributing for each month), *ngood* is total
    number of valid data in the series, *box* is a 4-tuple that
    describes the regions bounds: (southern, northern, western, eastern).
    """

    # The (80) large boxes.
    boxes = list(eqarea.grid())
    # For each box, make a list of contributors (cells that contribute
    # to the box time series); initially empty.
    contributordict = dict((box, []) for box in boxes)
    # Partition the cells into the boxes.
    for cell in cells:
        box = whichbox(boxes, cell.box)
        contributordict[box].append(cell)

    def padded_series(s):
        """Produce a series, that is padded to start in meta.yrbeg and
        is of length meta.monm months.
        *s* should be a giss_data.Series instance.
        """

        result = [MISSING] * meta.monm
        offset = 12 * (s.first_year - meta.yrbeg)
        result[offset:offset + len(s)] = s.series
        return result

    # For each box, sort and combine the contributing cells, and output
    # the result (by yielding it).
    for box in boxes:
        contributors = contributordict[box]
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        sort(contributors, lambda x, y: y.good_count - x.good_count)

        best = contributors[0]
        box_series = padded_series(best)
        box_weight = [float(valid(a)) for a in box_series]

        # Start the *contributed* list with this cell.
        l = [any(valid(v) for v in box_series[i::12]) for i in range(12)]
        s = ''.join('01'[x] for x in l)
        contributed = [[best.uid, 1.0, s]]

        # Loop over the remaining contributors.
        for cell in contributors[1:]:
            if cell.good_count >= parameters.subbox_min_valid:
                addend_series = padded_series(cell)
                weight = 1.0
                station_months = series.combine(box_series, box_weight,
                                                addend_series, weight,
                                                parameters.box_min_overlap)
                s = ''.join('01'[bool(x)] for x in station_months)
            else:
                weight = 0.0
                s = '0' * 12
            contributed.append([cell.uid, weight, s])

        box_first_year = meta.yrbeg
        series.anomalize(box_series, parameters.subbox_reference_period,
                         box_first_year)
        uid = giss_data.boxuid(box, celltype=celltype)
        log.write("%s cells %s\n" % (uid, asjson(contributed)))
        ngood = sum(valid(a) for a in box_series)
        yield (box_series, box_weight, ngood, box)
Ejemplo n.º 5
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    # Clear Climate Code
    import earth # required for radius.

    # Convert to list because we re-use it for each box (region).
    station_records = list(station_records)
    # Descending sort by number of good records.
    # TODO: Switch to using Python's sort method here, although it
    # will change the results.
    sort(station_records, lambda x,y: y.good_count - x.good_count)

    # A dribble of progress messages.
    dribble = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Count how many cells are empty
        n_empty_cells = 0
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            dribble.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % (
              centre + (n_empty_cells,)))
            dribble.flush()
            # Determine the contributing stations to this grid cell.
            contributors = list(incircle(station_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if not contributors:
                box_obj = giss_data.Series(series=subbox_series,
                    box=list(subbox), stations=0, station_months=0,
                    d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise series and weight arrays with first station.
            record,wt = contributors[0]
            total_good_months = record.good_count
            total_stations = 1

            offset = record.rel_first_month - 1
            a = record.series # just a temporary
            subbox_series[offset:offset + len(a)] = a
            max_weight = wt
            weight = [wt*valid(v) for v in subbox_series]

            # For logging, keep a list of stations that contributed.
            # Each item in this list is a triple (in list form, so that
            # it can be converted to JSON easily) of [id12, weight,
            # months].  *id12* is the 12 character station identifier;
            # *weight* (a float) is the weight (computed based on
            # distance) of the station's series; *months* is a 12 digit
            # string that records whether each of the 12 months is used.
            # '0' in position *i* indicates that the month was not used,
            # a '1' indicates that is was used.  January is position 0.
            l = [any(valid(v) for v in subbox_series[i::12])
              for i in range(12)]
            s = ''.join('01'[x] for x in l)
            contributed = [[record.uid,wt,s]]

            # Add in the remaining stations
            for record,wt in contributors[1:]:
                # TODO: A method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, wt,
                    parameters.gridding_min_overlap)
                n_good_months = sum(station_months)
                total_good_months += n_good_months
                if n_good_months == 0:
                    contributed.append([record.uid, 0.0, '0'*12])
                    continue
                total_stations += 1
                s = ''.join('01'[bool(x)] for x in station_months)
                contributed.append([record.uid,wt,s])

                max_weight = max(max_weight, wt)

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.Series(series=subbox_series, n=max_months,
                    box=list(subbox), stations=total_stations,
                    station_months=total_good_months,
                    d=radius*(1-max_weight))
            log.write("%s stations %s\n" % (box_obj.uid,
              asjson(contributed)))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        dribble.write(
          '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' %
            (tuple(box) + (n_empty_cells,plural_suffix)))
    dribble.write("\n")
Ejemplo n.º 6
0
def SBBXtoBX(data):
    """Simultaneously combine the land series and the ocean series and
    combine subboxes into boxes.  *data* should be an iterator of
    (land, ocean) subbox series pairs. Returns an iterator of box data.
    """

    # First item from iterator is normally a pair of metadataobjects,
    # one for land, one for ocean.  If we are piping step3 straight into
    # step5 then it is not a pair.  In that case we synthesize missing
    # ocean data.
    meta = data.next()
    try:
        land_meta, ocean_meta = meta
    except (TypeError, ValueError):
        # Use the land meta object for both land and ocean data
        land_meta,ocean_meta = meta, meta
        print "No ocean data; using land data only"
        data = blank_ocean_data(data)

    # number of subboxes within each box
    nsubbox = 100

    # TODO: Formalise use of only monthlies, see step 3.
    assert land_meta.mavg == 6
    NYRSIN = land_meta.monm/12
    combined_year_beg = min(land_meta.yrbeg, ocean_meta.yrbeg)
    # Index into the combined array of the first year of the land data.
    land_offset = 12*(land_meta.yrbeg-combined_year_beg)
    # As land_offset but for ocean data.
    ocean_offset = 12*(ocean_meta.yrbeg-combined_year_beg)
    combined_n_months = max(land_meta.monm + land_offset,
                            land_meta.monm + ocean_offset)

    info = [land_meta.mo1, land_meta.kq, land_meta.mavg, land_meta.monm,
            land_meta.monm4, combined_year_beg, land_meta.missing_flag,
            land_meta.precipitation_flag]

    info[4] = 2 * land_meta.monm + 5
    yield(info, land_meta.title)

    for box_number,box in enumerate(eqarea.grid()):
        # Averages for the land and ocean (one series per subbox)...
        avg = []
        wgtc = []
        # Eat the records from land and ocean 100 (nsubbox) at a time.
        # In other words, all 100 subboxes for the box.
        landsub,oceansub = zip(*itertools.islice(data, nsubbox))
        # :todo: combine below zip with above zip?
        for i, l, o in zip(range(nsubbox), landsub, oceansub):
            a = [MISSING]*combined_n_months
            if (o.good_count < parameters.subbox_min_valid
                or l.d < parameters.subbox_land_range):
                # use land series for this subbox
                a[land_offset:land_offset+len(l.series)] = l.series
                wgtc.append(l.good_count)
            else:
                # use ocean series for this subbox
                a[ocean_offset:ocean_offset+len(o.series)] = o.series
                wgtc.append(o.good_count)
            avg.append(a)

        # GISTEMP sort.
        # We want to end up with IORDR, the permutation array that
        # represents the sorter order.  IORDR[0] is the index (into the
        # *wgtc* array) of the longest record, IORDR[1] the index of the
        # next longest record, and so on.  We do that by decorating the
        # *wgtc* array with indexes 0 to 99, and then extracting the
        # (permuted) indexes into IORDR.
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        IORDR = range(nsubbox)
        sort(IORDR, lambda x,y: wgtc[y] - wgtc[x])

        # From here to the "for" loop over the cells (below) we are
        # initialising data for the loop.  Primarily the AVGR and WTR
        # arrays.
        nc = IORDR[0]

        # Weights for the box's record.
        wtr = [a != MISSING for a in avg[nc]]
        # Box record
        avgr = avg[nc][:]

        # Loop over the remaining cells.
        for nc in IORDR[1:]:
            if wgtc[nc] >= parameters.subbox_min_valid:
                series.combine(avgr, wtr, avg[nc], 1, 0,
                           combined_n_months/12, parameters.box_min_overlap)

        series.anomalize(avgr, parameters.subbox_reference_period,
                         combined_year_beg)
        ngood = sum(valid(a) for a in avgr)
        yield (avgr, wtr, ngood, box)
    # We've now consumed all 8000 input boxes and yielded 80 boxes.  We
    # need to tickle the input to check that it is exhausted and to
    # cause it to run the final tail of its generator.
    # We expect the call to .next() to raise StopIteration, which is
    # just what we want.
    data.next()
    # Ordinarily we never reach here.
    assert 0, "Too many input records"
Ejemplo n.º 7
0
def zonav(boxed_data):
    """
    Perform Zonal Averaging.

    The input *boxed_data* is an iterator of boxed time series.
    The data in the boxes are combined to produce averages over
    various latitudinal zones.  Returns an iterator of
    (averages, weights, title) tuples, one per zone.

    14 zones are produced.  The first 8 are the basic belts that are used
    for the equal area grid, the remaining 6 are combinations:

      0 64N - 90N               \
      1 44N - 64N (asin 0.9)     -  8 24N - 90 N  (0 + 1 + 2)
      2 24N - 44N (asin 0.7)    /
      3 Equ - 24N (asin 0.4)    \_  9 24S - 24 N  (3 + 4)
      4 24S - Equ               /
      5 44S - 24S               \
      6 64S - 44S                - 10 90S - 24 S  (5 + 6 + 7)
      7 90S - 64S               /

     11 northern hemisphere (0 + 1 + 2 + 3)
     12 southern hemisphere (4 + 5 + 6 + 7)
     13 global (all belts 0 to 7)
    """

    (info, titlei) = boxed_data.next()
    iyrbeg = info[5]
    monm = info[3]
    nyrsin = monm/12
    # One more than the last year with data
    yearlimit = nyrsin + iyrbeg

    yield (info, titlei)

    boxes_in_band,band_in_zone = zones()

    bands = len(boxes_in_band)

    lenz = [None] * bands
    wt = [None] * bands
    avg = [None] * bands
    # For each band, combine all the boxes in that band to create a band
    # record.
    for band in range(bands):
        # The temperature (anomaly) series for each of the boxes in this
        # band.
        box_series = [None] * boxes_in_band[band]
        # The weight series for each of the boxes in this band.
        box_weights = [None] * boxes_in_band[band]
        # "length" is the number of months (with valid data) in the box
        # series.  For each box in this band.
        box_length = [None] * boxes_in_band[band]
        for box in range(boxes_in_band[band]):
            # The last element in the tuple is the boundaries of the
            # box.  We ignore it.
            box_series[box], box_weights[box], box_length[box], _ = (
              boxed_data.next())
        # total number of valid data in band's boxes
        total_length = sum(box_length)
        if total_length == 0:
            wt[band] = [0.0]*monm
            avg[band] = [MISSING]*monm
        else:
            box_length,IORD = sort_perm(box_length)
            nr = IORD[0]
            # Copy the longest box record into *wt* and *avg*.
            # Using list both performs a copy and converts into a mutable
            # list.
            wt[band] = list(box_weights[nr])
            avg[band] = list(box_series[nr])
            # And combine the remaining series.
            for n in range(1,boxes_in_band[band]):
                nr = IORD[n]
                if box_length[n] == 0:
                    # Nothing in this box, and since we sorted by length,
                    # all the remaining boxes will also be empty.  We can
                    # stop combining boxes.
                    break
                series.combine(avg[band], wt[band],
                  box_series[nr], box_weights[nr], 0, nyrsin,
                  parameters.box_min_overlap)
        series.anomalize(avg[band], parameters.box_reference_period, iyrbeg)
        lenz[band] = sum(valid(a) for a in avg[band])
        yield (avg[band], wt[band])

    # We expect to have consumed all the boxes (the first 8 bands form a
    # partition of the boxes).  We check that the boxed_data stream is
    # exhausted and contains no more boxes.
    try:
        boxed_data.next()
        assert 0, "Too many boxes found"
    except StopIteration:
        # We fully expect to get here.
        pass

    # *lenz* contains the lengths of each zone 0 to 7 (the number of
    # valid months in each zone).
    lenz, iord = sort_perm(lenz)
    for zone in range(len(band_in_zone)):
        if lenz[0] == 0:
            raise Error('**** NO DATA FOR ZONE %d' % bands+zone)
        # Find the longest band that is in the special zone.
        for j1 in range(bands):
            if iord[j1] in band_in_zone[zone]:
                break
        else:
            # Should be an assertion really.
            raise Error('No band in special zone %d.' % zone)
        band = iord[j1]
        wtg = list(wt[band])
        avgg = list(avg[band])
        # Add in the remaining bands, in length order.
        for j in range(j1+1,bands):
            band = iord[j]
            if band not in band_in_zone[zone]:
                continue
            series.combine(avgg, wtg, avg[band], wt[band], 0,nyrsin,
                           parameters.box_min_overlap)
        series.anomalize(avgg, parameters.box_reference_period, iyrbeg)
        yield(avgg, wtg)
Ejemplo n.º 8
0
def subbox_to_box(meta, cells, celltype='BOX'):
    """Aggregate the subboxes (aka cells, typically 8000 per globe)
    into boxes (typically 80 boxes per globe), and combine records to
    produce one time series per box.

    *celltype* is used for logging, using a distinct (3 character) code
    will allow the log output for the land, ocean, and land--ocean
    analyses to be separated.

    *meta* specifies the meta data and is used to determine the first
    year (meta.yrbeg) and length (meta.monm) for all the resulting
    series.

    Returns an iterator of box data: for each box a quadruple of
    (*anom*, *weight*, *ngood*, *box*) is yielded.  *anom* is the
    temperature anomaly series, *weight* is the weights for the series
    (number of cells contributing for each month), *ngood* is total
    number of valid data in the series, *box* is a 4-tuple that
    describes the regions bounds: (southern, northern, western, eastern).
    """

    # The (80) large boxes.
    boxes = list(eqarea.grid())
    # For each box, make a list of contributors (cells that contribute
    # to the box time series); initially empty.
    contributordict = dict((box, []) for box in boxes)
    # Partition the cells into the boxes.
    for cell in cells:
        box = whichbox(boxes, cell.box)
        contributordict[box].append(cell)

    def padded_series(s):
        """Produce a series, that is padded to start in meta.yrbeg and
        is of length meta.monm months.
        *s* should be a giss_data.Series instance.
        """

        result = [MISSING] * meta.monm
        offset = 12 * (s.first_year - meta.yrbeg)
        result[offset:offset+len(s)] = s.series
        return result

    # For each box, sort and combine the contributing cells, and output
    # the result (by yielding it).
    for box in boxes:
        contributors = contributordict[box]
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        sort(contributors, lambda x,y: y.good_count - x.good_count)

        best = contributors[0]
        box_series = padded_series(best)
        box_weight = [float(valid(a)) for a in box_series]

        # Start the *contributed* list with this cell.
        l = [any(valid(v) for v in box_series[i::12]) for i in range(12)]
        s = ''.join('01'[x] for x in l)
        contributed = [[best.uid, 1.0, s]]

        # Loop over the remaining contributors.
        for cell in contributors[1:]:
            if cell.good_count >= parameters.subbox_min_valid:
                addend_series = padded_series(cell)
                weight = 1.0
                station_months = series.combine(box_series, box_weight,
                    addend_series, weight, parameters.box_min_overlap)
                s = ''.join('01'[bool(x)] for x in station_months)
            else:
                weight = 0.0
                s = '0'*12
            contributed.append([cell.uid, weight, s])

        box_first_year = meta.yrbeg
        series.anomalize(box_series, parameters.subbox_reference_period,
                         box_first_year)
        uid = giss_data.boxuid(box, celltype=celltype)
        log.write("%s cells %s\n" % (uid, asjson(contributed)))
        ngood = sum(valid(a) for a in box_series)
        yield (box_series, box_weight, ngood, box)
Ejemplo n.º 9
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    station_records = list(station_records)

    log = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Extend box, by half a box east and west and by arc north
        # and south.
        extent = [box[0] - arcdeg,
                  box[1] + arcdeg,
                  box[2] - 0.5 * (box[3] - box[2]),
                  box[3] + 0.5 * (box[3] - box[2])]
        if box[0] <= -90 or box[1] >= 90:
            # polar
            extent[2] = -180.0
            extent[3] = +180.0

        region_records = list(inbox(station_records, *extent))
        # Descending sort by number of good records
        # TODO: Switch to using Python's sort method here, although it
        # will change the results.
        sort(region_records, lambda x,y: y.good_count - x.good_count)

        # Count how many cells are empty
        n_empty_cells = 0
        # Used to generate the "subbox at" rows in the log.
        lastcentre = (None, None)
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            log.write("\rsubbox at %+05.1f%+06.1f (%d empty)" % (
              centre + (n_empty_cells,)))
            log.flush()
            lastcentre = centre
            # Of possible station records for this region, filter for those
            # from stations within radius of subbox centre.
            incircle_records = list(incircle(region_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if len(incircle_records) == 0:
                box_obj = giss_data.SubboxRecord(subbox_series,
                    box=list(subbox), stations=0, station_months=0,
                    d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise data with first station
            record = incircle_records[0]
            total_good_months = record.good_count
            total_stations = 1

            max_weight = record.weight
            offset = record.rel_first_month - 1
            a = record.series # just a temporary
            subbox_series[offset:offset + len(a)] = a
            weight = [0.0] * max_months
            for i in range(len(a)):
                if valid(a[i]):
                    weight[i + offset] = record.weight

            # Add in the remaining stations
            for record in incircle_records[1:]:
                # TODO: A StationMethod method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, record.weight,
                    record.rel_first_year, record.rel_last_year + 1,
                    parameters.gridding_min_overlap)
                total_good_months += station_months
                if station_months == 0:
                    continue
                total_stations += 1

                if max_weight < record.weight:
                    max_weight = record.weight

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.SubboxRecord(subbox_series, n=max_months,
                    box=list(subbox), stations=total_stations,
                    station_months=total_good_months,
                    d=radius*(1-max_weight))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        log.write(
          '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n' %
            (tuple(box) + (n_empty_cells,plural_suffix)))
    log.write("\n")
Ejemplo n.º 10
0
def iter_subbox_grid(station_records, max_months, first_year, radius):
    """Convert the input *station_records*, into a gridded anomaly
    dataset which is returned as an iterator.

    *max_months* is the maximum number of months in any station
    record.  *first_year* is the first year in the dataset.  *radius*
    is the combining radius in kilometres.
    """

    # Clear Climate Code
    import earth  # required for radius.

    # Convert to list because we re-use it for each box (region).
    station_records = list(station_records)
    # Descending sort by number of good records.
    # TODO: Switch to using Python's sort method here, although it
    # will change the results.
    sort(station_records, lambda x, y: y.good_count - x.good_count)

    # A dribble of progress messages.
    dribble = sys.stdout

    # Critical radius as an angle of arc
    arc = radius / earth.radius
    arcdeg = arc * 180 / math.pi

    regions = list(eqarea.gridsub())
    for region in regions:
        box, subboxes = region[0], list(region[1])

        # Count how many cells are empty
        n_empty_cells = 0
        for subbox in subboxes:
            # Select and weight stations
            centre = eqarea.centre(subbox)
            dribble.write("\rsubbox at %+05.1f%+06.1f (%d empty)" %
                          (centre + (n_empty_cells, )))
            dribble.flush()
            # Determine the contributing stations to this grid cell.
            contributors = list(incircle(station_records, arc, *centre))

            # Combine data.
            subbox_series = [MISSING] * max_months

            if not contributors:
                box_obj = giss_data.Series(series=subbox_series,
                                           box=list(subbox),
                                           stations=0,
                                           station_months=0,
                                           d=MISSING)
                n_empty_cells += 1
                yield box_obj
                continue

            # Initialise series and weight arrays with first station.
            record, wt = contributors[0]
            total_good_months = record.good_count
            total_stations = 1

            offset = record.rel_first_month - 1
            a = record.series  # just a temporary
            subbox_series[offset:offset + len(a)] = a
            max_weight = wt
            weight = [wt * valid(v) for v in subbox_series]

            # For logging, keep a list of stations that contributed.
            # Each item in this list is a triple (in list form, so that
            # it can be converted to JSON easily) of [id12, weight,
            # months].  *id12* is the 12 character station identifier;
            # *weight* (a float) is the weight (computed based on
            # distance) of the station's series; *months* is a 12 digit
            # string that records whether each of the 12 months is used.
            # '0' in position *i* indicates that the month was not used,
            # a '1' indicates that is was used.  January is position 0.
            l = [
                any(valid(v) for v in subbox_series[i::12]) for i in range(12)
            ]
            s = ''.join('01'[x] for x in l)
            contributed = [[record.uid, wt, s]]

            # Add in the remaining stations
            for record, wt in contributors[1:]:
                # TODO: A method to produce a padded data series
                #       would be good here. Hence we could just do:
                #           new = record.padded_series(max_months)
                new = [MISSING] * max_months
                aa, bb = record.rel_first_month, record.rel_last_month
                new[aa - 1:bb] = record.series
                station_months = series.combine(
                    subbox_series, weight, new, wt,
                    parameters.gridding_min_overlap)
                n_good_months = sum(station_months)
                total_good_months += n_good_months
                if n_good_months == 0:
                    contributed.append([record.uid, 0.0, '0' * 12])
                    continue
                total_stations += 1
                s = ''.join('01'[bool(x)] for x in station_months)
                contributed.append([record.uid, wt, s])

                max_weight = max(max_weight, wt)

            series.anomalize(subbox_series,
                             parameters.gridding_reference_period, first_year)
            box_obj = giss_data.Series(series=subbox_series,
                                       n=max_months,
                                       box=list(subbox),
                                       stations=total_stations,
                                       station_months=total_good_months,
                                       d=radius * (1 - max_weight))
            log.write("%s stations %s\n" % (box_obj.uid, asjson(contributed)))
            yield box_obj
        plural_suffix = 's'
        if n_empty_cells == 1:
            plural_suffix = ''
        dribble.write(
            '\rRegion (%+03.0f/%+03.0f S/N %+04.0f/%+04.0f W/E): %d empty cell%s.\n'
            % (tuple(box) + (n_empty_cells, plural_suffix)))
    dribble.write("\n")