def step3(records, radius=parameters.gridding_radius, year_begin=1880): """Step 3 of the GISS processing. *records* should be a generator that yields each station. """ # Most of the metadata here used to be synthesized in step2.py and # copied from the first yielded record. Now we synthesize here # instead. last_year = giss_data.get_ghcn_last_year() year_begin = giss_data.BASE_YEAR # Compute total number of months in a fixed length record. monm = 12 * (last_year - year_begin + 1) meta = giss_data.SubboxMetaData(mo1=None, kq=1, mavg=6, monm=monm, monm4=monm + 7, yrbeg=year_begin, missing_flag=9999, precipitation_flag=9999, title='GHCN V2 Temperatures (.1 C)') units = '(C)' title = "%20.20s ANOM %-4s CR %4dKM %s-present" % (meta.title, units, radius, year_begin) meta.mo1 = 1 meta.title = title.ljust(80) box_source = iter_subbox_grid(records, monm, year_begin, radius) yield meta for box in box_source: yield box
def urban_adjustments(anomaly_stream): """Takes an iterator of station records and applies an adjustment to urban stations to compensate for urban temperature effects. Returns an iterator of station records. Rural stations are passed unchanged. Urban stations which cannot be adjusted are discarded. The adjustment follows a linear or two-part linear fit to the difference in annual anomalies between the urban station and the combined set of nearby rural stations. The linear fit is to allow for a linear effect at the urban station. The two-part linear fit is to allow for a model of urban effect which starts or stops at some point during the time series. The algorithm is essentially as follows: For each urban station: 1. Find all the rural stations within a fixed radius; 2. Combine the annual anomaly series for those rural stations, in order of valid-data count; 3. Calculate a two-part linear fit for the difference between the urban annual anomalies and this combined rural annual anomaly; 4. If this fit is satisfactory, apply it; otherwise apply a linear fit. If there are not enough nearby rural stations, or the combined rural record does not have enough overlap with the urban record, try a second time for this urban station, with a larger radius. If there is still not enough data, discard the urban station. """ last_year = giss_data.get_ghcn_last_year() first_year = 1880 iyoff = giss_data.BASE_YEAR - 1 iyrm = last_year - iyoff rural_stations = [] urban_stations = {} pi180 = math.pi / 180.0 all = [] for record in anomaly_stream: station = record.station all.append(record) record.urban_adjustment = None annual_anomaly(record) if record.anomalies is None: continue length = len(record.anomalies) d = Struct() d.anomalies = record.anomalies d.cslat = math.cos(station.lat * pi180) d.snlat = math.sin(station.lat * pi180) d.cslon = math.cos(station.lon * pi180) d.snlon = math.sin(station.lon * pi180) d.id = record.uid d.first_year = record.first - iyoff d.last_year = d.first_year + length - 1 d.station = station d.record = record if is_rural(station): rural_stations.append(d) else: urban_stations[record] = d # Sort the rural stations according to the length of the time record # (ignoring gaps). for st in rural_stations: st.recLen = len([v for v in st.anomalies if valid(v)]) rural_stations.sort(key=lambda s:s.recLen) rural_stations.reverse() # Combine time series for rural stations around each urban station for record in all: us = urban_stations.get(record, None) if us is None: # Just remove leading/trailing invalid values for rural stations. record.strip_invalid() record.begin = record.first record.end = record.last yield record continue iyu1 = us.first_year + iyoff - 1 # subtract 1 for a possible partial yr iyu2 = us.last_year + iyoff + 1 # add 1 for partial year usingFullRadius = False dropStation = False needNewNeighbours = True while True: if needNewNeighbours: if usingFullRadius: radius = parameters.urban_adjustment_full_radius else: radius = parameters.urban_adjustment_full_radius / 2 neighbors = get_neighbours(us, rural_stations, radius) if not neighbors: if usingFullRadius: dropStation = True break usingFullRadius = True needNewNeighbours = True continue counts, urban_series, combined = combine_neighbors( us, iyrm, iyoff, neighbors) iy1 = 1 needNewNeighbours = False points, quorate_count, first, last = prepare_series( iy1, iyrm, combined, urban_series, counts, iyoff) if quorate_count < parameters.urban_adjustment_min_years: if usingFullRadius: dropStation = True break usingFullRadius = True needNewNeighbours = True continue if quorate_count >= (parameters.urban_adjustment_proportion_good * (last - first + 0.9)): break # Not enough good years for the given range. Try to save # cases in which the gaps are in the early part, by # dropping that part and going around to prepare_series # again. iy1 = int(last - (quorate_count - 1) / parameters.urban_adjustment_proportion_good) if iy1 < first + 1: iy1 = first + 1 # avoid infinite loop if dropStation: continue fit = getfit(points) # find extended range iyxtnd = int(round(quorate_count / parameters.urban_adjustment_proportion_good) - (last - first + 1)) n1x = first + iyoff n2x = last + iyoff if iyxtnd < 0: sys.exit('impossible') if iyxtnd > 0: lxend = iyu2 - (last + iyoff) if iyxtnd <= lxend: n2x = n2x + lxend else: n1x = n1x - (iyxtnd - lxend) if n1x < iyu1: n1x = iyu1 n2x = iyu2 series = record.series # adjust m1 = record.rel_first_month + record.good_start_idx m2 = record.rel_first_month + record.good_end_idx - 1 offset = record.good_start_idx # index of first valid month a, b = adjust(first_year, record, series, fit, n1x, n2x, first + iyoff, last + iyoff, m1, m2, offset) # a and b are numbers of new first and last valid months aa = a - m1 bb = b - a + 1 record.set_series(a-1 + first_year * 12 + 1, series[aa + offset:aa + offset + bb]) record.begin = ((a-1) / 12) + first_year record.first = record.begin record.end = ((b-1) / 12) + first_year record.last = record.last_year yield record
def urban_adjustments(anomaly_stream): """Takes an iterator of station records and applies an adjustment to urban stations to compensate for urban temperature effects. Returns an iterator of station records. Rural stations are passed unchanged. Urban stations which cannot be adjusted are discarded. The adjustment follows a linear or two-part linear fit to the difference in annual anomalies between the urban station and the combined set of nearby rural stations. The linear fit is to allow for a linear effect at the urban station. The two-part linear fit is to allow for a model of urban effect which starts or stops at some point during the time series. The algorithm is essentially as follows: For each urban station: 1. Find all the rural stations within a fixed radius; 2. Combine the annual anomaly series for those rural stations, in order of valid-data count; 3. Calculate a two-part linear fit for the difference between the urban annual anomalies and this combined rural annual anomaly; 4. If this fit is satisfactory, apply it; otherwise apply a linear fit. If there are not enough nearby rural stations, or the combined rural record does not have enough overlap with the urban record, try a second time for this urban station, with a larger radius. If there is still not enough data, discard the urban station. """ last_year = giss_data.get_ghcn_last_year() first_year = 1880 iyoff = giss_data.BASE_YEAR - 1 iyrm = last_year - iyoff rural_stations = [] urban_stations = {} pi180 = math.pi / 180.0 all = [] for record in anomaly_stream: station = record.station all.append(record) record.urban_adjustment = None annual_anomaly(record) if record.anomalies is None: continue length = len(record.anomalies) d = Struct() d.anomalies = record.anomalies d.cslat = math.cos(station.lat * pi180) d.snlat = math.sin(station.lat * pi180) d.cslon = math.cos(station.lon * pi180) d.snlon = math.sin(station.lon * pi180) d.id = record.uid d.first_year = record.first - iyoff d.last_year = d.first_year + length - 1 d.station = station d.record = record if is_rural(station): rural_stations.append(d) else: urban_stations[record] = d # Sort the rural stations according to the length of the time record # (ignoring gaps). for st in rural_stations: st.recLen = len([v for v in st.anomalies if valid(v)]) rural_stations.sort(key=lambda s: s.recLen) rural_stations.reverse() # Combine time series for rural stations around each urban station for record in all: us = urban_stations.get(record, None) if us is None: # Just remove leading/trailing invalid values for rural stations. record.strip_invalid() record.begin = record.first record.end = record.last yield record continue iyu1 = us.first_year + iyoff - 1 # subtract 1 for a possible partial yr iyu2 = us.last_year + iyoff + 1 # add 1 for partial year usingFullRadius = False dropStation = False needNewNeighbours = True while True: if needNewNeighbours: if usingFullRadius: radius = parameters.urban_adjustment_full_radius else: radius = parameters.urban_adjustment_full_radius / 2 neighbors = get_neighbours(us, rural_stations, radius) if not neighbors: if usingFullRadius: dropStation = True break usingFullRadius = True needNewNeighbours = True continue counts, urban_series, combined = combine_neighbors( us, iyrm, iyoff, neighbors) iy1 = 1 needNewNeighbours = False points, quorate_count, first, last = prepare_series( iy1, iyrm, combined, urban_series, counts, iyoff) if quorate_count < parameters.urban_adjustment_min_years: if usingFullRadius: dropStation = True break usingFullRadius = True needNewNeighbours = True continue if quorate_count >= (parameters.urban_adjustment_proportion_good * (last - first + 0.9)): break # Not enough good years for the given range. Try to save # cases in which the gaps are in the early part, by # dropping that part and going around to prepare_series # again. iy1 = int(last - (quorate_count - 1) / parameters.urban_adjustment_proportion_good) if iy1 < first + 1: iy1 = first + 1 # avoid infinite loop if dropStation: continue fit = getfit(points) # find extended range iyxtnd = int( round(quorate_count / parameters.urban_adjustment_proportion_good) - (last - first + 1)) n1x = first + iyoff n2x = last + iyoff if iyxtnd < 0: sys.exit('impossible') if iyxtnd > 0: lxend = iyu2 - (last + iyoff) if iyxtnd <= lxend: n2x = n2x + lxend else: n1x = n1x - (iyxtnd - lxend) if n1x < iyu1: n1x = iyu1 n2x = iyu2 series = record.series # adjust m1 = record.rel_first_month + record.good_start_idx m2 = record.rel_first_month + record.good_end_idx - 1 offset = record.good_start_idx # index of first valid month a, b = adjust(first_year, record, series, fit, n1x, n2x, first + iyoff, last + iyoff, m1, m2, offset) # a and b are numbers of new first and last valid months aa = a - m1 bb = b - a + 1 record.set_series(a - 1 + first_year * 12 + 1, series[aa + offset:aa + offset + bb]) record.begin = ((a - 1) / 12) + first_year record.first = record.begin record.end = ((b - 1) / 12) + first_year record.last = record.last_year yield record