Exemple #1
0
def SBBXtoBX(data):
    """Simultaneously combine the land series and the ocean series and
    combine subboxes into boxes.  *data* should be an iterator of
    (land, ocean) subbox series pairs. Returns an iterator of box data.
    """

    # First item from iterator is normally a pair of metadataobjects,
    # one for land, one for ocean.  If we are piping step3 straight into
    # step5 then it is not a pair.  In that case we synthesize missing
    # ocean data.
    meta = data.next()
    try:
        land_meta, ocean_meta = meta
    except (TypeError, ValueError):
        # Use the land meta object for both land and ocean data
        land_meta, ocean_meta = meta, meta
        print "No ocean data; using land data only"
        data = blank_ocean_data(data)

    # number of subboxes within each box
    nsubbox = 100

    # TODO: Formalise use of only monthlies, see step 3.
    assert land_meta.mavg == 6
    NYRSIN = land_meta.monm / 12
    combined_year_beg = min(land_meta.yrbeg, ocean_meta.yrbeg)
    # Index into the combined array of the first year of the land data.
    land_offset = 12 * (land_meta.yrbeg - combined_year_beg)
    # As land_offset but for ocean data.
    ocean_offset = 12 * (ocean_meta.yrbeg - combined_year_beg)
    combined_n_months = max(land_meta.monm + land_offset,
                            land_meta.monm + ocean_offset)

    info = [
        land_meta.mo1, land_meta.kq, land_meta.mavg, land_meta.monm,
        land_meta.monm4, combined_year_beg, land_meta.missing_flag,
        land_meta.precipitation_flag
    ]

    info[4] = 2 * land_meta.monm + 5
    yield (info, land_meta.title)

    for box_number, box in enumerate(eqarea.grid()):
        # Averages for the land and ocean (one series per subbox)...
        avg = []
        wgtc = []
        # Eat the records from land and ocean 100 (nsubbox) at a time.
        # In other words, all 100 subboxes for the box.
        landsub, oceansub = zip(*itertools.islice(data, nsubbox))
        # :todo: combine below zip with above zip?
        for i, l, o in zip(range(nsubbox), landsub, oceansub):
            a = [MISSING] * combined_n_months
            if (o.good_count < parameters.subbox_min_valid
                    or l.d < parameters.subbox_land_range):
                # use land series for this subbox
                a[land_offset:land_offset + len(l.series)] = l.series
                wgtc.append(l.good_count)
            else:
                # use ocean series for this subbox
                a[ocean_offset:ocean_offset + len(o.series)] = o.series
                wgtc.append(o.good_count)
            avg.append(a)

        # GISTEMP sort.
        # We want to end up with IORDR, the permutation array that
        # represents the sorter order.  IORDR[0] is the index (into the
        # *wgtc* array) of the longest record, IORDR[1] the index of the
        # next longest record, and so on.  We do that by decorating the
        # *wgtc* array with indexes 0 to 99, and then extracting the
        # (permuted) indexes into IORDR.
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        IORDR = range(nsubbox)
        sort(IORDR, lambda x, y: wgtc[y] - wgtc[x])

        # From here to the "for" loop over the cells (below) we are
        # initialising data for the loop.  Primarily the AVGR and WTR
        # arrays.
        nc = IORDR[0]

        # Weights for the box's record.
        wtr = [a != MISSING for a in avg[nc]]
        # Box record
        avgr = avg[nc][:]

        # Loop over the remaining cells.
        for nc in IORDR[1:]:
            if wgtc[nc] >= parameters.subbox_min_valid:
                series.combine(avgr, wtr, avg[nc], 1, 0,
                               combined_n_months / 12,
                               parameters.box_min_overlap)

        series.anomalize(avgr, parameters.subbox_reference_period,
                         combined_year_beg)
        ngood = sum(valid(a) for a in avgr)
        yield (avgr, wtr, ngood, box)
    # We've now consumed all 8000 input boxes and yielded 80 boxes.  We
    # need to tickle the input to check that it is exhausted and to
    # cause it to run the final tail of its generator.
    # We expect the call to .next() to raise StopIteration, which is
    # just what we want.
    data.next()
    # Ordinarily we never reach here.
    assert 0, "Too many input records"
Exemple #2
0
def SBBXtoBX(data):
    """Simultaneously combine the land series and the ocean series and
    combine subboxes into boxes.  *data* should be an iterator of
    (land, ocean) subbox series pairs. Returns an iterator of box data.
    """

    # First item from iterator is normally a pair of metadataobjects,
    # one for land, one for ocean.  If we are piping step3 straight into
    # step5 then it is not a pair.  In that case we synthesize missing
    # ocean data.
    meta = data.next()
    try:
        land_meta, ocean_meta = meta
    except (TypeError, ValueError):
        # Use the land meta object for both land and ocean data
        land_meta,ocean_meta = meta, meta
        print "No ocean data; using land data only"
        data = blank_ocean_data(data)

    # number of subboxes within each box
    nsubbox = 100

    # TODO: Formalise use of only monthlies, see step 3.
    assert land_meta.mavg == 6
    NYRSIN = land_meta.monm/12
    combined_year_beg = min(land_meta.yrbeg, ocean_meta.yrbeg)
    # Index into the combined array of the first year of the land data.
    land_offset = 12*(land_meta.yrbeg-combined_year_beg)
    # As land_offset but for ocean data.
    ocean_offset = 12*(ocean_meta.yrbeg-combined_year_beg)
    combined_n_months = max(land_meta.monm + land_offset,
                            land_meta.monm + ocean_offset)

    info = [land_meta.mo1, land_meta.kq, land_meta.mavg, land_meta.monm,
            land_meta.monm4, combined_year_beg, land_meta.missing_flag,
            land_meta.precipitation_flag]

    info[4] = 2 * land_meta.monm + 5
    yield(info, land_meta.title)

    for box_number,box in enumerate(eqarea.grid()):
        # Averages for the land and ocean (one series per subbox)...
        avg = []
        wgtc = []
        # Eat the records from land and ocean 100 (nsubbox) at a time.
        # In other words, all 100 subboxes for the box.
        landsub,oceansub = zip(*itertools.islice(data, nsubbox))
        # :todo: combine below zip with above zip?
        for i, l, o in zip(range(nsubbox), landsub, oceansub):
            a = [MISSING]*combined_n_months
            if (o.good_count < parameters.subbox_min_valid
                or l.d < parameters.subbox_land_range):
                # use land series for this subbox
                a[land_offset:land_offset+len(l.series)] = l.series
                wgtc.append(l.good_count)
            else:
                # use ocean series for this subbox
                a[ocean_offset:ocean_offset+len(o.series)] = o.series
                wgtc.append(o.good_count)
            avg.append(a)

        # GISTEMP sort.
        # We want to end up with IORDR, the permutation array that
        # represents the sorter order.  IORDR[0] is the index (into the
        # *wgtc* array) of the longest record, IORDR[1] the index of the
        # next longest record, and so on.  We do that by decorating the
        # *wgtc* array with indexes 0 to 99, and then extracting the
        # (permuted) indexes into IORDR.
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        IORDR = range(nsubbox)
        sort(IORDR, lambda x,y: wgtc[y] - wgtc[x])

        # From here to the "for" loop over the cells (below) we are
        # initialising data for the loop.  Primarily the AVGR and WTR
        # arrays.
        nc = IORDR[0]

        # Weights for the box's record.
        wtr = [a != MISSING for a in avg[nc]]
        # Box record
        avgr = avg[nc][:]

        # Loop over the remaining cells.
        for nc in IORDR[1:]:
            if wgtc[nc] >= parameters.subbox_min_valid:
                series.combine(avgr, wtr, avg[nc], 1, 0,
                           combined_n_months/12, parameters.box_min_overlap)

        series.anomalize(avgr, parameters.subbox_reference_period,
                         combined_year_beg)
        ngood = sum(valid(a) for a in avgr)
        yield (avgr, wtr, ngood, box)
    # We've now consumed all 8000 input boxes and yielded 80 boxes.  We
    # need to tickle the input to check that it is exhausted and to
    # cause it to run the final tail of its generator.
    # We expect the call to .next() to raise StopIteration, which is
    # just what we want.
    data.next()
    # Ordinarily we never reach here.
    assert 0, "Too many input records"
Exemple #3
0
def subbox_to_box(meta, cells, celltype='BOX'):
    """Aggregate the subboxes (aka cells, typically 8000 per globe)
    into boxes (typically 80 boxes per globe), and combine records to
    produce one time series per box.

    *celltype* is used for logging, using a distinct (3 character) code
    will allow the log output for the land, ocean, and land--ocean
    analyses to be separated.

    *meta* specifies the meta data and is used to determine the first
    year (meta.yrbeg) and length (meta.monm) for all the resulting
    series.

    Returns an iterator of box data: for each box a quadruple of
    (*anom*, *weight*, *ngood*, *box*) is yielded.  *anom* is the
    temperature anomaly series, *weight* is the weights for the series
    (number of cells contributing for each month), *ngood* is total
    number of valid data in the series, *box* is a 4-tuple that
    describes the regions bounds: (southern, northern, western, eastern).
    """

    # The (80) large boxes.
    boxes = list(eqarea.grid())
    # For each box, make a list of contributors (cells that contribute
    # to the box time series); initially empty.
    contributordict = dict((box, []) for box in boxes)
    # Partition the cells into the boxes.
    for cell in cells:
        box = whichbox(boxes, cell.box)
        contributordict[box].append(cell)

    def padded_series(s):
        """Produce a series, that is padded to start in meta.yrbeg and
        is of length meta.monm months.
        *s* should be a giss_data.Series instance.
        """

        result = [MISSING] * meta.monm
        offset = 12 * (s.first_year - meta.yrbeg)
        result[offset:offset + len(s)] = s.series
        return result

    # For each box, sort and combine the contributing cells, and output
    # the result (by yielding it).
    for box in boxes:
        contributors = contributordict[box]
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        sort(contributors, lambda x, y: y.good_count - x.good_count)

        best = contributors[0]
        box_series = padded_series(best)
        box_weight = [float(valid(a)) for a in box_series]

        # Start the *contributed* list with this cell.
        l = [any(valid(v) for v in box_series[i::12]) for i in range(12)]
        s = ''.join('01'[x] for x in l)
        contributed = [[best.uid, 1.0, s]]

        # Loop over the remaining contributors.
        for cell in contributors[1:]:
            if cell.good_count >= parameters.subbox_min_valid:
                addend_series = padded_series(cell)
                weight = 1.0
                station_months = series.combine(box_series, box_weight,
                                                addend_series, weight,
                                                parameters.box_min_overlap)
                s = ''.join('01'[bool(x)] for x in station_months)
            else:
                weight = 0.0
                s = '0' * 12
            contributed.append([cell.uid, weight, s])

        box_first_year = meta.yrbeg
        series.anomalize(box_series, parameters.subbox_reference_period,
                         box_first_year)
        uid = giss_data.boxuid(box, celltype=celltype)
        log.write("%s cells %s\n" % (uid, asjson(contributed)))
        ngood = sum(valid(a) for a in box_series)
        yield (box_series, box_weight, ngood, box)
Exemple #4
0
def subbox_to_box(meta, cells, celltype='BOX'):
    """Aggregate the subboxes (aka cells, typically 8000 per globe)
    into boxes (typically 80 boxes per globe), and combine records to
    produce one time series per box.

    *celltype* is used for logging, using a distinct (3 character) code
    will allow the log output for the land, ocean, and land--ocean
    analyses to be separated.

    *meta* specifies the meta data and is used to determine the first
    year (meta.yrbeg) and length (meta.monm) for all the resulting
    series.

    Returns an iterator of box data: for each box a quadruple of
    (*anom*, *weight*, *ngood*, *box*) is yielded.  *anom* is the
    temperature anomaly series, *weight* is the weights for the series
    (number of cells contributing for each month), *ngood* is total
    number of valid data in the series, *box* is a 4-tuple that
    describes the regions bounds: (southern, northern, western, eastern).
    """

    # The (80) large boxes.
    boxes = list(eqarea.grid())
    # For each box, make a list of contributors (cells that contribute
    # to the box time series); initially empty.
    contributordict = dict((box, []) for box in boxes)
    # Partition the cells into the boxes.
    for cell in cells:
        box = whichbox(boxes, cell.box)
        contributordict[box].append(cell)

    def padded_series(s):
        """Produce a series, that is padded to start in meta.yrbeg and
        is of length meta.monm months.
        *s* should be a giss_data.Series instance.
        """

        result = [MISSING] * meta.monm
        offset = 12 * (s.first_year - meta.yrbeg)
        result[offset:offset+len(s)] = s.series
        return result

    # For each box, sort and combine the contributing cells, and output
    # the result (by yielding it).
    for box in boxes:
        contributors = contributordict[box]
        # :todo: should probably import from a purpose built module.
        from step3 import sort
        sort(contributors, lambda x,y: y.good_count - x.good_count)

        best = contributors[0]
        box_series = padded_series(best)
        box_weight = [float(valid(a)) for a in box_series]

        # Start the *contributed* list with this cell.
        l = [any(valid(v) for v in box_series[i::12]) for i in range(12)]
        s = ''.join('01'[x] for x in l)
        contributed = [[best.uid, 1.0, s]]

        # Loop over the remaining contributors.
        for cell in contributors[1:]:
            if cell.good_count >= parameters.subbox_min_valid:
                addend_series = padded_series(cell)
                weight = 1.0
                station_months = series.combine(box_series, box_weight,
                    addend_series, weight, parameters.box_min_overlap)
                s = ''.join('01'[bool(x)] for x in station_months)
            else:
                weight = 0.0
                s = '0'*12
            contributed.append([cell.uid, weight, s])

        box_first_year = meta.yrbeg
        series.anomalize(box_series, parameters.subbox_reference_period,
                         box_first_year)
        uid = giss_data.boxuid(box, celltype=celltype)
        log.write("%s cells %s\n" % (uid, asjson(contributed)))
        ngood = sum(valid(a) for a in box_series)
        yield (box_series, box_weight, ngood, box)