def step4_load_sst_monthlies(latest_year, latest_month): files = step4_find_monthlies(latest_year, latest_month) if not files: print "No more recent sea-surface data files.\n" return None first_year = files[0][0][0] last_year = files[-1][0][0] n_years = last_year - first_year + 1 # Read in the SST data for recent years sst = make_3d_array(360, 180, 12 * n_years) dates = [] for (date, file) in files: dates.append(date) (year, month) = date f = open_or_uncompress(file) print "reading", file f = fort.File(f, bos = ">") f.readline() # discard first record data = f.readline() f.close() month = 12 * (year - first_year) + month - 1 p = 0 for lat in range(180): for long in range(360): v, = struct.unpack(">f", data[p:p+4]) p += 4 sst[long][lat][month] = v return sst, dates
def zone_series(f): """Convert the Fortran data file *f* into a sequence of monthly anomalies for the zones. The file must be a ZON.* file generated by GISTEMP Step 5. The return value is an iterable over a sequence of pairs ((zone, year, month), datum) with *month* running from 0 to 11; the datum is a float; when the datum is invalid, the corresponding pair is absent from the stream. """ bos = '>' # Byte order and size (for struct.{un,}pack). f = fort.File(f, bos) # The first record is a header l = f.readline() info = struct.unpack('%s8i' % bos, l[:8 * 4]) nmonths = info[3] first_year = info[5] missing = info[6] descriptor = bos + '%df80s' % (nmonths * 2) for z, r in enumerate(f): data = struct.unpack(descriptor, r) for i, v in enumerate(data[:nmonths]): if v != missing: yield ((z, first_year + i // 12, i % 12), v)
def __init__(self, rawfile, bos='>'): self.bos = bos self.f = fort.File(rawfile, bos=self.bos) rec = self.f.readline() (self.mo1, kq, mavg, monm, monm4, yrbeg, missing_flag, precipitation_flag, title) = struct.unpack(self.bos + '8i80s', rec) self.meta = code.giss_data.SubboxMetaData(self.mo1, kq, mavg, monm, monm4, yrbeg, missing_flag, precipitation_flag, title) assert self.meta.mavg == 6, "Only monthly averages supported"
def __init__(self, rawfile, bos='>'): self.bos = bos self.f = fort.File(rawfile, bos=self.bos) rec = self.f.readline() (self.min_month, kq, mavg, monm, monm4, yrbeg, missing_flag, precipitation_flag, self.max_month, title) = struct.unpack(self.bos + '9i80s', rec) self.meta = code.giss_data.StationMetaData(self.min_month, kq, mavg, monm, monm4, yrbeg, missing_flag, precipitation_flag, self.max_month, title)
def totext(file, output=sys.stdout, log=sys.stderr, metaonly=False, bos='>'): """Convert zonal monthly averages to text format. If metaonly is True then only the zonal metadata is output, the time series are not. """ # :todo: move into common module from zonav import swaw # The width of a standard word according to Python's struct module... w = len(struct.pack('=I', 0)) f = fort.File(file, bos=bos) r = f.readline() # Number of words in header, preceding title. n = 8 info = struct.unpack(bos + ('%di' % n), r[:n * w]) output.write(repr(info)) output.write('\n%s\n' % r[n * w:n * w + 80]) output.write('%s\n' % r[n * w + 80:]) # m: time frames per year if info[2] == 6: m = 12 else: m = 4 first_year = info[5] months = info[3] years = months / m last_year = first_year + years - 1 # Each line contains N ar values and N weight (area?) values, # followed by an 80-character title string. descriptor = bos + '%df80s' % (months * 2) for i in range(jzm): r = f.readline() if r is None: raise Error('Unexpected end of file.') data = struct.unpack(descriptor, r) output.write(swaw(data[-1]) + '\n') if metaonly: continue for set in range(2): for year in range(first_year, last_year + 1): offset = (year - first_year) * m + (months * set) output.write('%s[%4d]: %s\n' % (['AR', 'WT'][set], year, ' '.join( map(repr, data[offset:offset + m]))))
def step4_load_clim(): f = open_or_uncompress("input/oisstv2_mod4.clim") f = fort.File(f, bos='>') data = f.readline() f.close() clim_title = data[:80] clim = make_3d_array(360, 180, 12) p = 0 for month in range(12): for lat in range(180): for long in range(360): v, = struct.unpack(">f", data[p+80:p+84]) p += 4 clim[long][lat][month] = v return clim
def step5_bx_output(data): bos = '>' box = open('result/BX.Ts.ho2.GHCN.CL.PA.1200', 'wb') boxf = fort.File(box, bos=bos) (info, title) = data.next() boxf.writeline(struct.pack('%s8i' % bos, *info) + title) yield (info, title) for record in data: (avgr, wtr, ngood, box) = record n = len(avgr) fmt = '%s%df' % (bos, n) boxf.writeline(struct.pack(fmt, *avgr) + struct.pack(fmt, *wtr) + struct.pack('%si' % bos, ngood) + struct.pack('%s4i' % bos, *box)) yield record print "Step5: Closing box file" boxf.close()
def totext(file, output=sys.stdout): """The file argument should be a binary file opened for reading. It is treated as a Fortran binary file and converted to a text format, emitted on the file object output. Each (binary) record is treated as a sequence of words (words being "standard-sized ints" in the native byte-ordering), followed by a possible remainder sequence of bytes (where the record length is not a multiple of a word). The output format is one line per record, with each word being output as a fixed width hexadecimal number, and each trailing byte being output as a 2-digit hexadecimal number. Spaces separate. A "standard-size int" is interpreted the same way that struct.unpack('=I', x) interprets it. """ # Compute the width of a standard word according to Python's struct # module... w = len(struct.pack('=I', 0)) # and a suitable string format. # http://www.python.org/doc/2.3.5/lib/typesseq-strings.html # The string format is of the form '%08x' but the value of 8 may be # replaced. fmt = '%%0%dx' % (2*w) f = fort.File(file) # Iterate over all the records for r in f: # We unpack as much as the record as we can as a sequence of # binary words (typically 32-bits each); then the rest as a # sequence of bytes. # Number of words n = len(r) // w sep = '' for i in struct.unpack('%dI' % n, r[:n*w]): output.write(sep + (fmt % i)) sep = ' ' # Remainder of record, as bytes for c in r[n*w:]: output.write(sep + ('%02x' % ord(c))) sep = ' ' output.write('\n')
def box_series(f): """Convert the Fortran data file *f* into a sequence of monthly anomalies for the geographical boxes. The file must be a BX.* file generated by GISTEMP step 5. The return value is an iterable over a sequence of pairs ((box, year, month), datum) with month running from 0 to 11; the datum is an integer; when the datum is invalid, the corresponding pair is absent from the stream. """ bos = '>' # Byte order and size (for struct.{un,}pack). f = fort.File(f, bos) # The first record of the file is a header (see step5.SBBXtoBX). l = f.readline() info = struct.unpack('%s8i' % bos, l[:8 * 4]) nmonths = info[3] # Number of months in data series. nfields = info[4] # Number of fields in each record. year = info[5] # First year in the series. missing = info[6] # Value for a missing datum. assert nfields == nmonths * 2 + 5 # Each successive record contains a temperature anomaly series for # one box: first, 'nmonths' fields giving the anomalies for each # month; then 'nmonths' fields giving the weights for that box for # each month, then the number of good months in the series (?), then # four values describing the box (see eqarea.grid). box = 0 while 1: l = f.readline() if l is None: return assert len(l) == nfields * 4 data = struct.unpack('%s%df' % (bos, nmonths), l[:4 * nmonths]) for i in xrange(nmonths): if data[i] != missing: yield (((box, year + i // 12, i % 12), data[i])) box += 1
def totext(inp, output=sys.stdout, log=sys.stderr, metaonly=False, bos='>', format='v3'): """ Convert monthly averages to text format; *inp* is the input file and should be either a binary zone file (ZON.*) or a binary box file (BX.*). If metaonly is True then only the zonal metadata is output, the time series are not. """ # The width of a standard word according to Python's struct module. w = len(struct.pack('=I', 0)) f = fort.File(inp, bos=bos) r = f.readline() # Number of words in header, preceding title. n = 8 info = struct.unpack(bos + ('%di' % n), r[:n * w]) title = r[n * w:] if 'zones' in title.lower(): content = 'zones' else: content = 'boxes' if metaonly or 'v3' != format: output.write(repr(info)) output.write('\n' + title + '\n') if metaonly: return if 'v3' == format: ghcnm_out = gio.GHCNV3Writer(file=output, scale=0.01) # m: time frames per year if info[2] == 6: m = 12 else: m = 4 first_year = info[5] months = info[3] years = months / m last_year = first_year + years - 1 # Each line contains N ar values and N weight (area?) values, # followed by... # - (for zones) an 80-character title string. # - (for boxes) 5 words. rest = dict(zones=80, boxes=5 * w)[content] descriptor = bos + '%df%ds' % (months * 2, rest) # Number of records following header. N = dict(zones=16, boxes=80)[content] i = None for i, r in enumerate(f): data = struct.unpack(descriptor, r) suffix = data[-1] if format == 'v3': if 'zones' == content: title = id11fromzone(suffix) else: title = id11frombox(suffix, bos=bos) else: title = suffix output.write(title + '\n') for idx in range(2): if 'v3' == format and idx > 0: # Only output temps, not weights. :todo: fix this. continue for year in range(first_year, last_year + 1): offset = (year - first_year) * m + (months * idx) temps = data[offset:offset + m] if 'v3' == format: assert 12 == m element = ['TAVG', 'WGHT'][idx] ghcnm_out.writeyear(title, element, year, temps) else: output.write( '%s[%4d]: %s\n' % (['AR', 'WT'][idx], year, ' '.join(map(repr, temps)))) if i is None: raise Error('No records found in file %r.' % inp.name) if i != N - 1: way = "many" if i >= N else "few" raise Error('Too %s records. Expected %d, found %d.' % (way, N, i + 1))
def totext(file, output=sys.stdout, error=sys.stderr, metaflag=False): """The file argument should be a binary file opened for reading. It is treated as a Fortran binary file and converted to a text format, emitted on the file object output. """ # Compute the width of a standard word according to Python's struct # module... w = len(struct.pack('=I', 0)) # and a suitable string format. # http://www.python.org/doc/2.3.5/lib/typesseq-strings.html # The string format is of the form '%08x' but the value of 8 may be # replaced. fmt = '%%0%dx' % (2*w) f = fort.File(file) r = f.readline() # First record contains various INFO items. # When referring to comments in to.SBBXgrid.f recall that Fortran # arrays are typically indexed from 1 onwards; Python from 0 # onwards. Therefore INFO(2) corresponds to a[0] a = struct.unpack('9i', r[:9*w]) mfirst = a[0] mlast = a[8] kq = a[1] mavg = a[2] monm = a[3] recsize = a[4] yrbeg = a[5] bad = a[6] trace = a[7] if metaflag: output.write('KQ=%d MAVG=%d MONM=%d YRBEG=%d BAD=%d TRACE=%d\n' % (kq, mavg, monm, yrbeg, bad, trace)) # Length of record trail, the non-variable part, in bytes. ltrail = 15*w # Iterate over remaining records for r in f: trail = r[-ltrail:] lat,lon,id,height = struct.unpack('4i', trail[:4*w]) lat *= 0.1 lon *= 0.1 if len(r[:-ltrail]) != w*(mlast-mfirst+1): error.write(('Station ID %09d has suspect record length.' + 'mfirst=%s mlast=%d record-length=%d\n') % (id, mfirst, mlast, len(r))) name = trail[4*w:-2*w] # Some metadata is stored in the name field, *sigh* meta = name[-6:] name = name[:-6] # 3 digit country code cc = meta[3:6] meta = meta[0:3] # Prepend country code to station ID. Note: id becomes a string. id = '%s%09d' % (cc, id) # Replace any spaces in meta with '*', mostly to preserve the # "words separated by spaces" format. Note that all these # replacements are where (non US) stations do not have # USHCN-brightness indexes. meta = meta.replace(' ', '*') # It just so happens that underscore does not appear in any of # the "name" fields, so we use that instead of space. That # preserves the "words separated by spaces" format. name = name.replace(' ', '_') # It's tidier if we return the trailing underscores back into # spaces. m = re.search('_*$', name) name = name[:m.start()] + ' '*(m.end() - m.start()) if metaflag: output.write('ID %s %s %s %s M%04d\n' % (id, name, meta, iso6709(lat, lon, height), mfirst)) else: n = len(r[:-ltrail])//w data = struct.unpack('%di' % n, r[:-ltrail]) # Convert to 0 == January indexing: mfirst -= 1 assert mfirst >= 0 year = mfirst // 12 year += yrbeg # Pad data at beginning... m = mfirst % 12 data = (bad,)*m + data # ... and at end. m = (-len(data)) % 12 data = data + (bad,)*m assert 0 == len(data) % 12 def changebad(datum): """Convert to GHCN v2.mean BAD format.""" if datum == bad: return -9999 return datum data = map(changebad, data) for y,yeardata in enumerate(data[i:i+12] for i in range(0,len(data),12)): output.write('%s%d' % (id, year + y)) output.write(('%5d'*12 + '\n') % tuple(yeardata)) mfirst, mlast = struct.unpack('2I', trail[-2*w:])
def cmp(a, b, dt=1e-4, dd=0.5, output=sys.stdout, error=sys.stderr): """Compare two files.""" # Compute the width of a standard word according to Python's struct # module... w = len(struct.pack('=I', 0)) # and a suitable string format. # http://www.python.org/doc/2.3.5/lib/typesseq-strings.html # The string format is of the form '%08x' but the value of 8 may be # replaced. fmt = '%%0%dx' % (2 * w) # Width of a float wf = len(struct.pack('f', 0.0)) a = fort.File(a) b = fort.File(b) ra = a.readline() rb = b.readline() rn = 0 # Number of words in header, preceding title. n = 8 if ra[:n * w] != rb[:n * w]: error.write('headers differ:\n' + str(struct.unpack('%di' % n, ra[:n * w])) + str(struct.unpack('%di' % n, rb[:n * w])) + '\n') sys.exit(4) dmax = -1 dmaxrn = None tmax = -1 tmaxrni = () while True: ra = a.readline() rb = b.readline() rn += 1 if ra == None and rb == None: break if ra == None or rb == None: error.write('files differ in size') sys.exit(4) if len(ra) != len(rb): error.write('Record %d is different size' % rn) break traila = ra[-7 * w:] trailb = rb[-7 * w:] if traila[:4 * w] != trailb[:4 * w]: error.write('Record %d is for different boxes' % rn) break counta = struct.unpack('2I', traila[4 * w:6 * w]) countb = struct.unpack('2I', trailb[4 * w:6 * w]) if counta[0] != countb[0]: output.write('Record %d NSTNS: %d %d\n' % (rn, counta[0], countb[0])) if counta[1] != countb[1]: output.write('Record %d NSTMNS: %d %d\n' % (rn, counta[1], countb[1])) da = struct.unpack('f', traila[6 * w:])[0] db = struct.unpack('f', trailb[6 * w:])[0] if abs(da - db) >= dd: output.write('Record %d D: %s %s\n' % (rn, repr(da), repr(db))) if abs(da - db) >= dmax: dmax = abs(da - db) dmaxrn = rn ra = ra[:-7 * w] rb = rb[:-7 * w] n = len(ra) // wf # number of time series entries ta = struct.unpack('%df' % n, ra) tb = struct.unpack('%df' % n, rb) for i in range(n): d = abs(ta[i] - tb[i]) if d >= dt: output.write('Record %d data %i: %s %s diff: %s\n' % (rn, i, repr(ta[i]), repr(tb[i]), repr(d))) if d >= tmax: tmax = d tmaxrni = (rn, i) output.write('Maximum difference in d (record %d): %s\n' % (dmaxrn, repr(dmax))) output.write('Maximum difference in t (record %d item %d): %s\n' % (tmaxrni[0], tmaxrni[1], repr(tmax)))
def totext(file, output=sys.stdout, log=sys.stderr, metaonly=False, bos='>', trimmed='fromfile'): """Convert binary gridded subbox file to text format. If metaonly is True then only the subbox metadata is output, the time series are not. `trimmed` determines whether the input file is trimmed; it can be one of ``True`` (file is trimmed), ``False`` (file is not trimmed), or ``'fromfile'`` (will examine file to determine if it is trimmed or not). """ assert trimmed in [True, False, 'fromfile'] # Compute the width of a standard word according to Python's struct # module... w = len(struct.pack('=I', 0)) # Width of a float wf = len(struct.pack('f', 0.0)) # and a suitable string format. # http://www.python.org/doc/2.3.5/lib/typesseq-strings.html # The string format is of the form '%08x' but the value of 8 may be # replaced. fmt = '%%0%dx' % (2 * w) f = fort.File(file, bos=bos) r = f.readline() # Number of words in header, preceding title. n = 8 info = struct.unpack(bos + ('%di' % n), r[:n * w]) print info print r[n * w:] yrbeg = info[5] mavg = info[2] km = 1 if mavg == 6: km = 12 if trimmed == 'fromfile': trimmed = info[0] != 1 print >> log, "Determined that trimmed=%s from file." % trimmed for r in f: if trimmed: meta = r[1 * w:8 * w] r = r[8 * w:] else: meta = r[-7 * w:] r = r[:-7 * w] box = struct.unpack(bos + '4i', meta[:4 * w]) box = tuple(map(lambda x: x / 100.0, box)) nstns, nstmns = struct.unpack(bos + '2I', meta[4 * w:6 * w]) d = struct.unpack(bos + 'f', meta[6 * w:])[0] loc = '%+06.2f%+06.2f%+07.2f%+07.2f' % box n = len(r) // wf # number of time series entries output.write('%s META %6d %3d %6d %f\n' % (loc, n, nstns, nstmns, d)) if metaonly: continue t = struct.unpack(bos + ('%df' % n), r) # 12 entries per output line, which is usually one year's worth, # (but see km). p = 12 for i in range(len(t) // p): output.write('%s %d' % (loc, (yrbeg + i * p // km))) for j in range(p): k = i * p + j output.write(' %s' % repr(t[k])) output.write('\n')
def step5_output(data): (info, data, wt, ann, monmin, title) = data XBAD = 9999 iy1tab = 1880 zone_titles = step5_zone_titles() titl2 = ' zones: 90->64.2->44.4->23.6->0->-23.6->-44.4->-64.2->-90 ' iyrbeg = info[5] jzm = len(ann) iyrs = len(ann[0]) monm = iyrs * 12 out = ['ZonAnn', 'GLB', 'NH', 'SH'] out = [open('result/'+bit+'.Ts.ho2.GHCN.CL.PA.txt', 'w') for bit in out] zono = open('result/ZON.Ts.ho2.GHCN.CL.PA.1200', 'wb') bos = '>' zono = fort.File(zono, bos) # Create and write out the header record of the output files. print >> out[0], ' Annual Temperature Anomalies (.01 C) - ' + title[28:80] for f in out[1:]: print >> f, title # iord literal borrowed exactly from Fortran... iord = [14,12,13, 9,10,11, 1,2,3,4,5,6,7,8] # ... and then adjusted for Python index convention. iord = map(lambda x: x-1, iord) # Display the annual means. def annasstr(z): """Helper function that returns the annual anomaly for zone *z* as a string representation of an integer (the integer is the anomaly scaled by 100 to convert to centikelvin). The returned value is a string that is 5 characters long. If the integer will not fit into a 5 character string, '*****' is returned (this emulates the Fortran convention of formatting 999900 (which is the XBAD value in centikelvin) as a '*****'. The year, *iy*, is lexically captured which is a bit horrible. """ x = int(math.floor(100*ann[z][iy] + 0.5)) x = '%5d' % x if len(x) > 5: return '*****' return x iyrsp = iyrs # Check (and skip) incomplete year. if data[-1][-1][-1] > 8000: iyrsp -= 1 banner = """ 24N 24S 90S 64N 44N 24N EQU 24S 44S 64S 90S Year Glob NHem SHem -90N -24N -24S -90N -64N -44N -24N -EQU -24S -44S -64S Year """.strip('\n') for iy in range(iy1tab - iyrbeg, iyrsp): if (iy+iyrbeg >= iy1tab+5 and ((iy+iyrbeg) % 20 == 1) or iy == iy1tab - iyrbeg): print >> out[0] print >> out[0], banner iyr = iyrbeg+iy print >> out[0], ('%4d' + ' %s'*3 + ' ' + ' %s'*3 + ' ' + ' %s'*8 + '%5d') % tuple([iyr] + [annasstr(iord[zone]) for zone in range(jzm)] + [iyr]) # The trailing banner is just like the repeated banner, except that # "Year Glob NHem SHem" appears on on the first line, not the # second line (and the same for the "Year" that appears at the end # of the line). *sigh*. banner = banner.split('\n') banner[0] = banner[1][:24] + banner[0][24:] + ' Year' banner[1] = ' '*24 + banner[1][24:-5] banner = '\n'.join(banner) print >> out[0], banner print >> out[0] tit = [' GLOBAL','N.HEMISPH.','S.HEMISPH.'] # Shift the remaining 3 output files so that the indexing works out. out = out[1:] banner = 'Year Jan Feb Mar Apr May Jun Jul Aug' + \ ' Sep Oct Nov Dec J-D D-N DJF MAM JJA SON Year' # All the "WRITE(96+J" stuff in the Fortran is replaced with this # enumeration into the *out* array (an array of file descriptors). for j,outf in enumerate(out): print >> outf, (tit[j] + ' Temperature Anomalies' + ' in .01 C base period: 1951-1980') for iy in range(iy1tab-iyrbeg, iyrs): iout = [100*XBAD]*18 if (iy+iyrbeg >= iy1tab+5 and ((iy+iyrbeg) % 20 == 1) or iy == iy1tab - iyrbeg): print >> outf print >> outf, banner # *data* for this zone, avoids some duplication of code. zdata = data[iord[j]] # :todo: Would probably be better to have a little 4-long # seasonal array to do the computation in. awin = 9999 if iy > 0: awin = zdata[iy-1][11] + zdata[iy][0] + zdata[iy][1] aspr = sum(zdata[iy][2:5]) asmr = sum(zdata[iy][5:8]) afl = sum(zdata[iy][8:11]) if awin < 8000: iout[14] = int(round(100.0*awin/3)) if aspr < 8000: iout[15] = int(round(100.0*aspr/3)) if asmr < 8000: iout[16] = int(round(100.0*asmr/3)) if afl < 8000: iout[17] = int(round(100.0*afl/3)) ann2=awin+aspr+asmr+afl if ann2 < 8000: iout[13] = int(round(100.0*ann2/12)) ann1=ann[iord[j]][iy] if iy == iyrs-1 and zdata[iy][-1] > 8000: ann1 = 9999 if ann1 < 8000: iout[12] = int(round(100.0*ann[iord[j]][iy])) for m in range(12): iout[m] = int(round(100.0*zdata[iy][m])) iyr = iyrbeg+iy # Convert each of *iout* to a string, storing the results in # *sout*. sout = [None]*len(iout) for i,x in enumerate(iout): # All the elements of iout are formatted to width 5, # except for element 13 (14 in the Fortran code), which # is length 4. if i == 13: x = '%4d' % x if len(x) > 4: x = '****' else: x = '%5d' % x if len(x) > 5: x = '*****' sout[i] = x print >> outf, ( '%4d ' + '%s'*12 + ' %s%s ' + '%s'*4 + '%6d') % tuple( [iyr] + sout + [iyr]) print >> outf, banner # Save monthly means on disk. zono.writeline(struct.pack(bos + '8i', *info) + title + titl2) fmt_mon = bos + '%df' % monm for jz in range(jzm): zono.writeline(struct.pack(fmt_mon, *itertools.chain(*data[jz])) + struct.pack(fmt_mon, *itertools.chain(*wt[jz])) + zone_titles[jz]) return "Step 5 Completed"
def __init__(self, rawfile, bos='>'): self.bos = bos self.f = fort.File(rawfile, bos=self.bos) self.meta = None self.buf_record = None
def __init__(self, rawfile, bos='>', trimmed=True): self.trimmed = trimmed self.bos = bos self.f = fort.File(rawfile, bos=self.bos) self.meta = None self.buf_record = None