def ConvertCAMxTime(date, time, nvars=1): """ Use camx date and time arrays to produce an IOAPI standard TFLAG variable """ f = PseudoNetCDFFile() f.dimensions = {'TSTEP': date.shape[0], 'VAR': nvars, 'DATE-TIME': 2} a = array([date, time], dtype='i').swapaxes(0, 1) if len(a.shape) == 2: a = a[:, newaxis, :] date = a[:, :, 0] if (date < 70000).any(): date += 2000000 else: date += 1900000 time = a[:, :, 1] while not (time == 0).all() and time.max() < 10000: time *= 100 a = PseudoNetCDFVariable(f, 'TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME'), values=a[:, [0], :].repeat(nvars, 1)) a.units = 'DATE-TIME'.ljust(16) a.long_name = 'TFLAG'.ljust(16) a.var_desc = a.long_name return a
def __init__(self, ncffile, dimension, oldres, newres, repeat_method=repeat, condense_method=sum, nthick=0): from PseudoNetCDF.sci_var import Pseudo2NetCDF PseudoNetCDFFile.__init__(self) self.__dimension = array(dimension, ndmin=1) oldres = array(oldres, ndmin=1) newres = array(newres, ndmin=1) self.__mesh = newres / oldres.astype('f') self.__condense = condense_method self.__repeat = repeat_method self.__file = ncffile self.__nthick = nthick if not logical_or((self.__mesh % 1) == 0, (1. / self.__mesh) % 1 == 0).any(): raise ValueError("One resolution must be a factor of the other.") Pseudo2NetCDF().addDimensions(self.__file, self) any_non_time_key = [ k for k in self.__file.variables.keys() if 'TFLAG' not in k][0] for dk, dfactor in zip(self.__dimension, 1. / self.__mesh): dimo = self.dimensions[dk] ndimo = self.createDimension(str(dk), len(dimo) * dfactor) ndimo.setunlimited(dimo.isunlimited()) v = self.__file.variables[any_non_time_key] v = self.__method(v) self.variables = PseudoNetCDFVariables( self.__variables, self.__file.variables.keys())
def __getattribute__(self, k): try: return PseudoNetCDFFile.__getattribute__(self, k) except AttributeError: for f in self.__files: try: return getattr(f, k) except: pass raise AttributeError("%s not found" % k)
def ConvertCAMxTime(date,time,nvars=1): """ Use camx date and time arrays to produce an IOAPI standard TFLAG variable """ f = PseudoNetCDFFile() f.dimensions = {'TSTEP': date.shape[0], 'VAR': nvars, 'DATE-TIME': 2} a=array([date,time],dtype='i').swapaxes(0,1) if len(a.shape)==2: a=a[:,newaxis,:] date=a[:,:,0] if (date<70000).any(): date+=2000000 else: date+=1900000 time=a[:,:,1] while not (time==0).all() and time.max()<10000: time*=100 a=PseudoNetCDFVariable(f,'TFLAG','i',('TSTEP','VAR','DATE-TIME'),values=a[:,[0],:].repeat(nvars,1)) a.units='DATE-TIME'.ljust(16) a.long_name='TFLAG'.ljust(16) a.var_desc=a.long_name return a
def __init__(self, ncffile, dimension, oldres, newres, repeat_method = repeat, condense_method = sum, nthick = 0): PseudoNetCDFFile.__init__(self) self.__dimension = array(dimension, ndmin = 1) oldres = array(oldres, ndmin = 1) newres = array(newres, ndmin = 1) self.__mesh = newres / oldres.astype('f') self.__condense = condense_method self.__repeat = repeat_method self.__file = ncffile self.__nthick = nthick if not logical_or((self.__mesh % 1) == 0, (1. / self.__mesh) % 1 ==0).any(): raise ValueError("One resolution must be a factor of the other.") Pseudo2NetCDF().addDimensions(self.__file, self) any_non_time_key = [k for k in self.__file.variables.keys() if 'TFLAG' not in k][0] for dk, dfactor in zip(self.__dimension, 1./self.__mesh): dimo = self.dimensions[dk] ndimo = self.createDimension(str(dk), len(dimo)*dfactor) ndimo.setunlimited(dimo.isunlimited()) v = self.__file.variables[any_non_time_key] v = self.__method(v) self.variables = PseudoNetCDFVariables(self.__variables, self.__file.variables.keys())
def setUp(self): from permm.mechanisms import small_strato from PseudoNetCDF import PseudoNetCDFFile from numpy import arange, newaxis from numpy.random import normal, poisson, random, seed self.mech = small_strato() mrg = self.mrg = PseudoNetCDFFile() mrg.createDimension('TSTEP', 10) mrg.createDimension('ROW', 3) mrg.createDimension('LAY', 4) mrg.createDimension('COL', 5) mrg.createDimension('VAR', 10) mrg.createDimension('DATE-TIME', 2) tflag = mrg.createVariable('TFLAG', 'i', ('TSTEP', 'VAR', 'DATE-TIME')) tflag.units = '<YYYYJJJ, HHMMSS>' tflag.long_name = tflag.var_desc = 'time ' tflag[:, :, 0] = 2004001 tflag[:, :, 1] = arange(10)[:, newaxis] * 10000 for i in range(1, 11): var = mrg.createVariable('IRR_%d' % i, 'f', ('TSTEP', 'LAY', 'ROW', 'COL')) var.units = 'ppt' var.long_name = var.var_desc = 'Integrated rate for IRR ordinal %d' % i seed(1) if i % 2 == 0: data = arange(3 * i, 3 * i + 10)[:, newaxis, newaxis, newaxis].repeat(4, 1).repeat(3, 2).repeat(5, 3) elif i % 1 == 0: data = arange(2 * i, 2 * i + 10)[:, newaxis, newaxis, newaxis].repeat(4, 1).repeat(3, 2).repeat(5, 3) else: data = arange(1 * i, 1 * i + 10)[:, newaxis, newaxis, newaxis].repeat(4, 1).repeat(3, 2).repeat(5, 3) var[:] = data * {True: -1, False: 1}[i % 2 == True] self.mech.set_mrg(mrg)
def __repr__(self): return PseudoNetCDFFile.__repr__(self) + str(self.variables)
def __init__(self, path, keysubs={'/': '_'}, encoding='utf-8', default_llod_flag=-8888, default_llod_value='N/A', default_ulod_flag=-7777, default_ulod_value='N/A'): """ Arguments: self - implied input (not supplied in call) path - path to file keysubs - dictionary of characters to remove from variable keys and their replacements encoding - file encoding (utf-8, latin1, cp1252, etc.) default_llod_flag - flag value for lower limit of detections if not specified default_llod_value - default value to use for replacement of llod_flag default_ulod_flag - flag value for upper limit of detections if not specified default_ulod_value - default value to use for replacement of ulod_flag Returns: out - PseudoNetCDFFile interface to data in file. """ lastattr = None PseudoNetCDFFile.__init__(self) f = openf(path, 'rU', encoding=encoding) missing = [] units = [] line = f.readline() if ',' in line: delim = ',' else: delim = None def split(s): return [s_.strip() for s_ in s.split(delim)] if split(line)[-1] != '1001': raise TypeError("File is the wrong format. " + "Expected 1001; got %s" % (split(line)[-1], )) n, self.fmt = split(line) # n_user_comments = 0 n_special_comments = 0 self.n_header_lines = int(n) try: for li in range(self.n_header_lines - 1): li += 2 line = f.readline() LAST_VAR_DESC_LINE = 12 + len(missing) SPECIAL_COMMENT_COUNT_LINE = LAST_VAR_DESC_LINE + 1 LAST_SPECIAL_COMMENT_LINE = (SPECIAL_COMMENT_COUNT_LINE + n_special_comments) USER_COMMENT_COUNT_LINE = (12 + len(missing) + 2 + n_special_comments) if li == PI_LINE: self.PI_NAME = line.strip() elif li == ORG_LINE: self.ORGANIZATION_NAME = line.strip() elif li == PLAT_LINE: self.SOURCE_DESCRIPTION = line.strip() elif li == MISSION_LINE: self.MISSION_NAME = line.strip() elif li == VOL_LINE: self.VOLUME_INFO = ', '.join(split(line)) elif li == DATE_LINE: line = line.replace(',', ' ').replace('-', ' ').replace(' ', ' ').split() SDATE = ", ".join(line[:3]) WDATE = ", ".join(line[3:]) self.SDATE = SDATE self.WDATE = WDATE self._SDATE = datetime.strptime(SDATE, '%Y, %m, %d') self._WDATE = datetime.strptime(WDATE, '%Y, %m, %d') elif li == TIME_INT_LINE: self.TIME_INTERVAL = line.strip() elif li == UNIT_LINE: unitstr = line.replace('\n', '').replace('\r', '').strip() units.append(unitstr) self.INDEPENDENT_VARIABLE = units[-1] elif li == SCALE_LINE: scales = [eval(i) for i in split(line)] if set([float(s) for s in scales]) != set([1.]): raise ValueError( "Unsupported: scaling is unsupported. " + " data is scaled by %s" % (str(scales), )) elif li == MISSING_LINE: missing = [eval(i) for i in split(line)] elif li > MISSING_LINE and li <= LAST_VAR_DESC_LINE: nameunit = line.replace('\n', '').split(',') name = nameunit[0].strip() if len(nameunit) > 1: units.append(nameunit[1].strip()) elif re.compile('(.*)\((.*)\)').match(nameunit[0]): desc_groups = re.compile('(.*)\((.*)\).*').match( nameunit[0]).groups() name = desc_groups[0].strip() units.append(desc_groups[1].strip()) elif '_' in name: units.append(name.split('_')[1].strip()) else: warn('Could not find unit in string: "%s"' % line) units.append(name.strip()) elif li == SPECIAL_COMMENT_COUNT_LINE: n_special_comments = int(line.replace('\n', '')) elif (li > SPECIAL_COMMENT_COUNT_LINE and li <= LAST_SPECIAL_COMMENT_LINE): colon_pos = line.find(':') if line[:1] == ' ': k = lastattr v = getattr(self, k, '') + line else: k = line[:colon_pos].strip() v = line[colon_pos + 1:].strip() setattr(self, k, v) lastattr = k elif li == USER_COMMENT_COUNT_LINE: lastattr = None # n_user_comments = int(line.replace('\n', '')) elif (li > USER_COMMENT_COUNT_LINE and li < self.n_header_lines): colon_pos = line.find(':') if line[:1] == ' ': k = lastattr v = getattr(self, k, '') + line else: k = line[:colon_pos].strip() v = line[colon_pos + 1:].strip() setattr(self, k, v) lastattr = k elif li == self.n_header_lines: varstr = line.replace(',', ' ').replace(' ', ' ') variables = varstr.split() for oc, nc in keysubs.items(): variables = [vn.replace(oc, nc) for vn in variables] self.TFLAG = variables[0] except Exception as e: raise SyntaxError("Error parsing icartt file %s: %s" % (path, repr(e))) missing = missing[:1] + missing scales = [1.] + scales if hasattr(self, 'LLOD_FLAG'): llod_values = loddelim.sub('\n', self.LLOD_VALUE).split() if len(llod_values) == 1: llod_values *= len(variables) else: llod_values = ['N/A'] + llod_values assert len(llod_values) == len(variables) llod_values = [get_lodval(llod_val) for llod_val in llod_values] llod_flags = len(llod_values) * [self.LLOD_FLAG] llod_flags = [get_lodval(llod_flag) for llod_flag in llod_flags] else: llod_flags = [default_llod_flag] * len(scales) llod_values = [default_llod_value] * len(scales) if hasattr(self, 'ULOD_FLAG'): ulod_values = loddelim.sub('\n', self.ULOD_VALUE).split() if len(ulod_values) == 1: ulod_values *= len(variables) else: ulod_values = ['N/A'] + ulod_values assert len(ulod_values) == len(variables) ulod_values = [get_lodval(ulod_val) for ulod_val in ulod_values] ulod_flags = len(ulod_values) * [self.ULOD_FLAG] ulod_flags = [get_lodval(ulod_flag) for ulod_flag in ulod_flags] else: ulod_flags = [default_ulod_flag] * len(scales) ulod_values = [default_ulod_value] * len(scales) data = f.read() datalines = data.split('\n') ndatalines = len(datalines) while datalines[-1] in ('', ' ', '\r'): ndatalines -= 1 datalines.pop(-1) data = genfromtxt(StringIO('\n'.join(datalines).encode()), delimiter=delim, dtype='d') data = data.reshape(ndatalines, len(variables)) data = data.swapaxes(0, 1) self.createDimension('POINTS', ndatalines) for vi, var in enumerate(variables): scale = scales[vi] miss = missing[vi] unit = units[vi] dat = data[vi] llod_flag = llod_flags[vi] llod_val = llod_values[vi] ulod_flag = ulod_flags[vi] ulod_val = ulod_values[vi] vals = MaskedArray(dat, mask=dat == miss, fill_value=miss) tmpvar = self.variables[var] = PseudoNetCDFVariable(self, var, 'd', ('POINTS', ), values=vals) tmpvar.units = unit tmpvar.standard_name = var tmpvar.missing_value = miss tmpvar.fill_value = miss tmpvar.scale = scale if hasattr(self, 'LLOD_FLAG'): tmpvar.llod_flag = llod_flag tmpvar.llod_value = llod_val if hasattr(self, 'ULOD_FLAG'): tmpvar.ulod_flag = ulod_flag tmpvar.ulod_value = ulod_val def dtime(s): return timedelta(seconds=int(s), microseconds=(s - int(s)) * 1.E6) vtime = vectorize(dtime) tvar = self.variables[self.TFLAG] self._date_objs = (self._SDATE + vtime(tvar).view(type=ndarray))
def setUp(self): from PseudoNetCDF import PseudoNetCDFFile self.checkval = """time,layer,latitude,longitude,test 0.0,0.0,0.0,0.0,0.0 0.0,0.0,0.0,1.0,1.0 0.0,0.0,0.0,2.0,2.0 0.0,0.0,0.0,3.0,3.0 0.0,0.0,0.0,4.0,4.0 0.0,0.0,1.0,0.0,5.0 0.0,0.0,1.0,1.0,6.0 0.0,0.0,1.0,2.0,7.0 0.0,0.0,1.0,3.0,8.0 0.0,0.0,1.0,4.0,9.0 0.0,0.0,2.0,0.0,10.0 0.0,0.0,2.0,1.0,11.0 0.0,0.0,2.0,2.0,12.0 0.0,0.0,2.0,3.0,13.0 0.0,0.0,2.0,4.0,14.0 0.0,0.0,3.0,0.0,15.0 0.0,0.0,3.0,1.0,16.0 0.0,0.0,3.0,2.0,17.0 0.0,0.0,3.0,3.0,18.0 0.0,0.0,3.0,4.0,19.0 0.0,1.0,0.0,0.0,20.0 0.0,1.0,0.0,1.0,21.0 0.0,1.0,0.0,2.0,22.0 0.0,1.0,0.0,3.0,23.0 0.0,1.0,0.0,4.0,24.0 0.0,1.0,1.0,0.0,25.0 0.0,1.0,1.0,1.0,26.0 0.0,1.0,1.0,2.0,27.0 0.0,1.0,1.0,3.0,28.0 0.0,1.0,1.0,4.0,29.0 0.0,1.0,2.0,0.0,30.0 0.0,1.0,2.0,1.0,31.0 0.0,1.0,2.0,2.0,32.0 0.0,1.0,2.0,3.0,33.0 0.0,1.0,2.0,4.0,34.0 0.0,1.0,3.0,0.0,35.0 0.0,1.0,3.0,1.0,36.0 0.0,1.0,3.0,2.0,37.0 0.0,1.0,3.0,3.0,38.0 0.0,1.0,3.0,4.0,39.0 0.0,2.0,0.0,0.0,40.0 0.0,2.0,0.0,1.0,41.0 0.0,2.0,0.0,2.0,42.0 0.0,2.0,0.0,3.0,43.0 0.0,2.0,0.0,4.0,44.0 0.0,2.0,1.0,0.0,45.0 0.0,2.0,1.0,1.0,46.0 0.0,2.0,1.0,2.0,47.0 0.0,2.0,1.0,3.0,48.0 0.0,2.0,1.0,4.0,49.0 0.0,2.0,2.0,0.0,50.0 0.0,2.0,2.0,1.0,51.0 0.0,2.0,2.0,2.0,52.0 0.0,2.0,2.0,3.0,53.0 0.0,2.0,2.0,4.0,54.0 0.0,2.0,3.0,0.0,55.0 0.0,2.0,3.0,1.0,56.0 0.0,2.0,3.0,2.0,57.0 0.0,2.0,3.0,3.0,58.0 0.0,2.0,3.0,4.0,59.0 1.0,0.0,0.0,0.0,60.0 1.0,0.0,0.0,1.0,61.0 1.0,0.0,0.0,2.0,62.0 1.0,0.0,0.0,3.0,63.0 1.0,0.0,0.0,4.0,64.0 1.0,0.0,1.0,0.0,65.0 1.0,0.0,1.0,1.0,66.0 1.0,0.0,1.0,2.0,67.0 1.0,0.0,1.0,3.0,68.0 1.0,0.0,1.0,4.0,69.0 1.0,0.0,2.0,0.0,70.0 1.0,0.0,2.0,1.0,71.0 1.0,0.0,2.0,2.0,72.0 1.0,0.0,2.0,3.0,73.0 1.0,0.0,2.0,4.0,74.0 1.0,0.0,3.0,0.0,75.0 1.0,0.0,3.0,1.0,76.0 1.0,0.0,3.0,2.0,77.0 1.0,0.0,3.0,3.0,78.0 1.0,0.0,3.0,4.0,79.0 1.0,1.0,0.0,0.0,80.0 1.0,1.0,0.0,1.0,81.0 1.0,1.0,0.0,2.0,82.0 1.0,1.0,0.0,3.0,83.0 1.0,1.0,0.0,4.0,84.0 1.0,1.0,1.0,0.0,85.0 1.0,1.0,1.0,1.0,86.0 1.0,1.0,1.0,2.0,87.0 1.0,1.0,1.0,3.0,88.0 1.0,1.0,1.0,4.0,89.0 1.0,1.0,2.0,0.0,90.0 1.0,1.0,2.0,1.0,91.0 1.0,1.0,2.0,2.0,92.0 1.0,1.0,2.0,3.0,93.0 1.0,1.0,2.0,4.0,94.0 1.0,1.0,3.0,0.0,95.0 1.0,1.0,3.0,1.0,96.0 1.0,1.0,3.0,2.0,97.0 1.0,1.0,3.0,3.0,98.0 1.0,1.0,3.0,4.0,99.0 1.0,2.0,0.0,0.0,100.0 1.0,2.0,0.0,1.0,101.0 1.0,2.0,0.0,2.0,102.0 1.0,2.0,0.0,3.0,103.0 1.0,2.0,0.0,4.0,104.0 1.0,2.0,1.0,0.0,105.0 1.0,2.0,1.0,1.0,106.0 1.0,2.0,1.0,2.0,107.0 1.0,2.0,1.0,3.0,108.0 1.0,2.0,1.0,4.0,109.0 1.0,2.0,2.0,0.0,110.0 1.0,2.0,2.0,1.0,111.0 1.0,2.0,2.0,2.0,112.0 1.0,2.0,2.0,3.0,113.0 1.0,2.0,2.0,4.0,114.0 1.0,2.0,3.0,0.0,115.0 1.0,2.0,3.0,1.0,116.0 1.0,2.0,3.0,2.0,117.0 1.0,2.0,3.0,3.0,118.0 1.0,2.0,3.0,4.0,119.0 """ testfile = self.testfile = PseudoNetCDFFile() testfile.createDimension('time', 2) testfile.createDimension('layer', 3) testfile.createDimension('latitude', 4) testfile.createDimension('longitude', 5) for dk, dv in testfile.dimensions.items(): var = testfile.createVariable(dk, 'f', (dk, )) var[:] = np.arange(len(dv), dtype='f') var = testfile.createVariable( 'test', 'f', ('time', 'layer', 'latitude', 'longitude')) var[:] = np.arange(2 * 3 * 4 * 5).reshape(2, 3, 4, 5)
def mrgidx(ipr_paths, irr_paths, idx): if isinstance(irr_paths,str): irrf = NetCDFFile(irr_paths) else: irrf = file_master([NetCDFFile(irr_path) for irr_path in irr_paths]) if isinstance(ipr_paths,str): iprf = NetCDFFile(ipr_paths) else: iprf = file_master([NetCDFFile(ipr_path) for ipr_path in ipr_paths]) # Process and Reaction keys should exclude TFLAG pr_keys = [pr for pr in iprf.variables.keys() if pr not in ('TFLAG',)] rr_keys = [rr for rr in irrf.variables.keys() if rr not in ('TFLAG',)] # Attempt to order reactions by number # this is not necessary, but is nice and clean try: rr_keys = [(int(rr.split('_')[1]), rr) for rr in rr_keys] rr_keys.sort() rr_keys = [rr[1] for rr in rr_keys] except: warn("Cannot sort reaction keys") # Processes are predicated by a delimiter prcs = list(set(['_'.join(pr.split('_')[:-1]) for pr in pr_keys])) # Species are preceded by a delimiter spcs = list(set(['_'.join(pr.split('_')[-1:]) for pr in pr_keys])) # Select a dummy variable for extracting properties pr_tmp = iprf.variables[pr_keys[0]] # Create an empty file and decorate # it as necessary outf = PseudoNetCDFFile() outf.Species = "".join([spc.ljust(16) for spc in spcs]) outf.Process = "".join([prc.ljust(16) for prc in prcs]) outf.Reactions = "".join([rr_key.ljust(16) for rr_key in rr_keys]) outf.createDimension("PROCESS", len(prcs)) outf.createDimension("SPECIES", len(spcs)) outf.createDimension("RXN", len(rr_keys)) outf.createDimension("TSTEP", pr_tmp[:,0,0,0].shape[0]) outf.createDimension("TSTEP_STAG", len(outf.dimensions["TSTEP"])+1) outf.createDimension("ROW", 1) outf.createDimension("LAY", 1) outf.createDimension("COL", 1) outf.createDimension("VAR", 3) outf.createDimension("DATE-TIME", 2) tflag = outf.createVariable("TFLAG", "i", ('TSTEP', 'VAR', 'DATE-TIME')) tflag.__dict__.update(dict(units = "<YYYYJJJ,HHDDMM>", var_desc = 'TFLAG'.ljust(16), long_name = 'TFLAG'.ljust(16))) tflag[:,:,:] = iprf.variables['TFLAG'][:][:,[0],:] shape = outf.createVariable("SHAPE", "i", ("TSTEP", "LAY", "ROW", "COL")) shape.__dict__.update(dict(units = "ON/OFF", var_desc = "SHAPE".ljust(16), long_name = "SHAPE".ljust(16))) shape[:] = 1 irr = outf.createVariable("IRR", "f", ("TSTEP", "RXN")) irr.__dict__.update(dict(units = pr_tmp.units, var_desc = "IRR".ljust(16), long_name = "IRR".ljust(16))) ipr = outf.createVariable("IPR", "f", ("TSTEP", "SPECIES", "PROCESS")) irr.__dict__.update(dict(units = pr_tmp.units, var_desc = "IPR".ljust(16), long_name = "IPR".ljust(16))) for rr, var in zip(rr_keys,irr.swapaxes(0,1)): var[:] = irrf.variables[rr][:][idx] for prc, prcvar in zip(prcs,ipr.swapaxes(0,2)): for spc, spcvar in zip(spcs,prcvar): try: spcvar[:] = iprf.variables['_'.join([prc,spc])][:][idx] except KeyError as es: warn(str(es)) return outf
print(point, isin) varkeys = ['temperature', 'windDir', 'windSpeed', 'dewpoint', 'altimeter'] vardds = [k + 'DD' for k in varkeys] if args.verbose > 1: print('Subset variables') getvarkeys = varkeys + vardds + \ ['stationName', 'timeObs', 'timeNominal', 'elevation', 'latitude', 'longitude'] if args.verbose > 1: print('Slicing files') p2p = Pseudo2NetCDF(verbose=0) outfile = PseudoNetCDFFile() p2p.addDimensions(ncff, outfile) outfile.createDimension('recNum', len(found_point_ids)) p2p.addGlobalProperties(ncff, outfile) for vark in getvarkeys: p2p.addVariable(ncff, outfile, vark, data=False) for vark in getvarkeys: invar = ncff.variables[vark] outvar = outfile.variables[vark] recid = list(invar.dimensions).index('recNum') outvar[:] = invar[:].take(found_point_ids, recid) if args.humidity: varkeys.append('specificHumidity')
def mrgidx(ipr_paths, irr_paths, idx): if isinstance(irr_paths, str): irrf = NetCDFFile(irr_paths) else: irrf = file_master([NetCDFFile(irr_path) for irr_path in irr_paths]) if isinstance(ipr_paths, str): iprf = NetCDFFile(ipr_paths) else: iprf = file_master([NetCDFFile(ipr_path) for ipr_path in ipr_paths]) # Process and Reaction keys should exclude TFLAG pr_keys = [pr for pr in iprf.variables.keys() if pr not in ('TFLAG', )] rr_keys = [rr for rr in irrf.variables.keys() if rr not in ('TFLAG', )] # Attempt to order reactions by number # this is not necessary, but is nice and clean try: rr_keys = [(int(rr.split('_')[1]), rr) for rr in rr_keys] rr_keys.sort() rr_keys = [rr[1] for rr in rr_keys] except: warn("Cannot sort reaction keys") # Processes are predicated by a delimiter prcs = list(set(['_'.join(pr.split('_')[:-1]) for pr in pr_keys])) # Species are preceded by a delimiter spcs = list(set(['_'.join(pr.split('_')[-1:]) for pr in pr_keys])) # Select a dummy variable for extracting properties pr_tmp = iprf.variables[pr_keys[0]] # Create an empty file and decorate # it as necessary outf = PseudoNetCDFFile() outf.Species = "".join([spc.ljust(16) for spc in spcs]) outf.Process = "".join([prc.ljust(16) for prc in prcs]) outf.Reactions = "".join([rr_key.ljust(16) for rr_key in rr_keys]) outf.createDimension("PROCESS", len(prcs)) outf.createDimension("SPECIES", len(spcs)) outf.createDimension("RXN", len(rr_keys)) outf.createDimension("TSTEP", pr_tmp[:, 0, 0, 0].shape[0]) outf.createDimension("TSTEP_STAG", len(outf.dimensions["TSTEP"]) + 1) outf.createDimension("ROW", 1) outf.createDimension("LAY", 1) outf.createDimension("COL", 1) outf.createDimension("VAR", 3) outf.createDimension("DATE-TIME", 2) tflag = outf.createVariable("TFLAG", "i", ('TSTEP', 'VAR', 'DATE-TIME')) tflag.__dict__.update( dict(units="<YYYYJJJ,HHDDMM>", var_desc='TFLAG'.ljust(16), long_name='TFLAG'.ljust(16))) tflag[:, :, :] = iprf.variables['TFLAG'][:][:, [0], :] shape = outf.createVariable("SHAPE", "i", ("TSTEP", "LAY", "ROW", "COL")) shape.__dict__.update( dict(units="ON/OFF", var_desc="SHAPE".ljust(16), long_name="SHAPE".ljust(16))) shape[:] = 1 irr = outf.createVariable("IRR", "f", ("TSTEP", "RXN")) irr.__dict__.update( dict(units=pr_tmp.units, var_desc="IRR".ljust(16), long_name="IRR".ljust(16))) ipr = outf.createVariable("IPR", "f", ("TSTEP", "SPECIES", "PROCESS")) irr.__dict__.update( dict(units=pr_tmp.units, var_desc="IPR".ljust(16), long_name="IPR".ljust(16))) for rr, var in zip(rr_keys, irr.swapaxes(0, 1)): var[:] = irrf.variables[rr][:][idx] for prc, prcvar in zip(prcs, ipr.swapaxes(0, 2)): for spc, spcvar in zip(spcs, prcvar): try: spcvar[:] = iprf.variables['_'.join([prc, spc])][:][idx] except KeyError as es: warn(str(es)) return outf
def __init__(self, rf, multi=False, **props): """ Initialization included reading the header and learning about the format. see __readheader and __gettimestep() for more info Keywords (i.e., props) for projection: P_ALP, P_BET, P_GAM, XCENT, YCENT, XORIG, YORIG, XCELL, YCELL """ self.__rffile = OpenRecordFile(rf) self.__readheader() self.__ipr_record_type = { 24: dtype( dict(names=[ 'SPAD', 'DATE', 'TIME', 'SPC', 'PAGRID', 'NEST', 'I', 'J', 'K', 'INIT', 'CHEM', 'EMIS', 'PTEMIS', 'PIG', 'WADV', 'EADV', 'SADV', 'NADV', 'BADV', 'TADV', 'DIL', 'WDIF', 'EDIF', 'SDIF', 'NDIF', 'BDIF', 'TDIF', 'DDEP', 'WDEP', 'AERCHEM', 'FCONC', 'UCNV', 'AVOL', 'EPAD' ], formats=[ '>i', '>i', '>f', '>S10', '>i', '>i', '>i', '>i', '>i', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>i' ])), 26: dtype( dict(names=[ 'SPAD', 'DATE', 'TIME', 'SPC', 'PAGRID', 'NEST', 'I', 'J', 'K', 'INIT', 'CHEM', 'EMIS', 'PTEMIS', 'PIG', 'WADV', 'EADV', 'SADV', 'NADV', 'BADV', 'TADV', 'DIL', 'WDIF', 'EDIF', 'SDIF', 'NDIF', 'BDIF', 'TDIF', 'DDEP', 'WDEP', 'INORGACHEM', 'ORGACHEM', 'AQACHEM', 'FCONC', 'UCNV', 'AVOL', 'EPAD' ], formats=[ '>i', '>i', '>f', '>S10', '>i', '>i', '>i', '>i', '>i', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>f', '>i' ])) }[len(self.prcnames)] prcs = [ 'SPAD', 'DATE', 'TIME', 'PAGRID', 'NEST', 'I', 'J', 'K', 'INIT', 'CHEM', 'EMIS', 'PTEMIS', 'PIG', 'WADV', 'EADV', 'SADV', 'NADV', 'BADV', 'TADV', 'DIL', 'WDIF', 'EDIF', 'SDIF', 'NDIF', 'BDIF', 'TDIF', 'DDEP', 'WDEP' ] + { 24: ['AERCHEM'], 26: ['INORGACHEM', 'ORGACHEM', 'AQACHEM'] }[len(self.prcnames)] + ['FCONC', 'UCNV', 'AVOL', 'EPAD'] varkeys = ['_'.join(i) for i in cartesian(prcs, self.spcnames)] varkeys += [ 'SPAD', 'DATE', 'TIME', 'PAGRID', 'NEST', 'I', 'J', 'K', 'TFLAG' ] self.groups = {} NSTEPS = len([i_ for i_ in self.timerange()]) NVARS = len(varkeys) self.createDimension('VAR', NVARS) self.createDimension('DATE-TIME', 2) self.createDimension('TSTEP', NSTEPS) padatatype = [] pavarkeys = [] for di, domain in enumerate(self.padomains): dk = 'PA%02d' % di prefix = dk + '_' grp = self.groups[dk] = PseudoNetCDFFile() pavarkeys.extend([prefix + k for k in varkeys]) grp.createDimension('VAR', NVARS) grp.createDimension('DATE-TIME', 2) grp.createDimension('TSTEP', NSTEPS) grp.createDimension('COL', domain['iend'] - domain['istart'] + 1) grp.createDimension('ROW', domain['jend'] - domain['jstart'] + 1) grp.createDimension('LAY', domain['tlay'] - domain['blay'] + 1) padatatype.append( (dk, self.__ipr_record_type, (len(grp.dimensions['ROW']), len(grp.dimensions['COL']), len(grp.dimensions['LAY'])))) if len(self.padomains) == 1: self.createDimension('COL', domain['iend'] - domain['istart'] + 1) self.createDimension('ROW', domain['jend'] - domain['jstart'] + 1) self.createDimension('LAY', domain['tlay'] - domain['blay'] + 1) exec( """def varget(k): return self._ipr__variables('%s', k)""" % dk, dict(self=self), locals()) if len(self.padomains) == 1: self.variables = PseudoNetCDFVariables(varget, varkeys) else: grp.variables = PseudoNetCDFVariables(varget, varkeys) self.__memmaps = memmap(self.__rffile.infile.name, dtype(padatatype), 'r', self.data_start_byte).reshape( NSTEPS, len(self.spcnames)) for k, v in props.items(): setattr(self, k, v) try: add_cf_from_ioapi(self) except: pass
def __init__(self,path): PseudoNetCDFFile.__init__(self) f = open(path, 'r') missing = [] units = [] l = f.readline() if ',' in l: delim = ',' else: delim = None split = lambda s: list(map(str.strip, s.split(delim))) if split(l)[-1] != '1001': raise TypeError("File is the wrong format. Expected 1001; got %s" % (split(l)[-1],)) n, self.fmt = split(l) n_user_comments = 0 n_special_comments = 0 self.n_header_lines = int(n) try: for li in range(self.n_header_lines-1): li += 2 l = f.readline() LAST_VAR_DESC_LINE = 12+len(missing) SPECIAL_COMMENT_COUNT_LINE = LAST_VAR_DESC_LINE + 1 LAST_SPECIAL_COMMENT_LINE = SPECIAL_COMMENT_COUNT_LINE + n_special_comments USER_COMMENT_COUNT_LINE = 12+len(missing)+2+n_special_comments if li == PI_LINE: self.PI_NAME = l.strip() elif li == ORG_LINE: self.ORGANIZATION_NAME = l.strip() elif li == PLAT_LINE: self.SOURCE_DESCRIPTION = l.strip() elif li == MISSION_LINE: self.MISSION_NAME = l.strip() elif li == VOL_LINE: self.VOLUME_INFO = l.strip() elif li == DATE_LINE: l = l.replace(',', '').split() SDATE = "".join(l[:3]) WDATE = "".join(l[3:]) self.SDATE = SDATE self.WDATE = WDATE self._SDATE = datetime.strptime(SDATE, '%Y%m%d') self._WDATE = datetime.strptime(WDATE, '%Y%m%d') elif li == TIME_INT_LINE: self.TIME_INTERVAL = l.strip() elif li == UNIT_LINE: units.append(l.replace('\n', '').replace('\r', '').strip()) self.INDEPENDENT_VARIABLE = units[-1] elif li == SCALE_LINE: scales = [eval(i) for i in split(l)] if set([float(s) for s in scales]) != set([1.]): raise ValueError("Unsupported: scaling is unsupported. data is scaled by %s" % (str(scales),)) elif li == MISSING_LINE: missing = [eval(i) for i in split(l)] elif li > MISSING_LINE and li <= LAST_VAR_DESC_LINE: nameunit = l.replace('\n','').split(',') name = nameunit[0].strip() if len(nameunit) > 1: units.append(nameunit[1].strip()) elif re.compile('(.*)\((.*)\)').match(nameunit[0]): desc_groups = re.compile('(.*)\((.*)\).*').match(nameunit[0]).groups() name = desc_groups[0].strip() units.append(desc_groups[1].strip()) elif '_' in name: units.append(name.split('_')[1].strip()) else: warn('Could not find unit in string: "%s"' % l) units.append(name.strip()) elif li == SPECIAL_COMMENT_COUNT_LINE: n_special_comments = int(l.replace('\n', '')) elif li > SPECIAL_COMMENT_COUNT_LINE and li <= LAST_SPECIAL_COMMENT_LINE: pass elif li == USER_COMMENT_COUNT_LINE: n_user_comments = int(l.replace('\n','')) elif li > USER_COMMENT_COUNT_LINE and li < self.n_header_lines: colon_pos = l.find(':') k = l[:colon_pos].strip() v = l[colon_pos+1:].strip() setattr(self,k,v) elif li == self.n_header_lines: variables = l.replace(',','').split() self.TFLAG = variables[0] except Exception as e: raise SyntaxError("Error parsing icartt file %s: %s" % (path, repr(e))) missing = missing[:1]+missing scales = [1.]+scales if hasattr(self,'LLOD_FLAG'): llod_values = loddelim.sub('\n', self.LLOD_VALUE).split() if len(llod_values) == 1: llod_values *= len(variables) else: llod_values = ['N/A']+llod_values assert len(llod_values) == len(variables) llod_values = [get_lodval(llod_val) for llod_val in llod_values] llod_flags = len(llod_values)*[self.LLOD_FLAG] llod_flags = [get_lodval(llod_flag) for llod_flag in llod_flags] if hasattr(self,'ULOD_FLAG'): ulod_values = loddelim.sub('\n', self.ULOD_VALUE).split() if len(ulod_values) == 1: ulod_values *= len(variables) else: ulod_values = ['N/A']+ulod_values assert len(ulod_values) == len(variables) ulod_values = [get_lodval(ulod_val) for ulod_val in ulod_values] ulod_flags = len(ulod_values)*[self.ULOD_FLAG] ulod_flags = [get_lodval(ulod_flag) for ulod_flag in ulod_flags] data = f.read() datalines = data.split('\n') ndatalines = len(datalines) while datalines[-1] in ('', ' ', '\r'): ndatalines -=1 datalines.pop(-1) data = genfromtxt(StringIO(bytes('\n'.join(datalines), 'utf-8')), delimiter = delim, dtype = 'd') data = data.reshape(ndatalines,len(variables)) data = data.swapaxes(0,1) self.createDimension('POINTS', ndatalines) for var, scale, miss, unit, dat, llod_flag, llod_val, ulod_flag, ulod_val in zip(variables, scales, missing, units, data, llod_flags, llod_values, ulod_flags, ulod_values): vals = MaskedArray(dat, mask = dat == miss, fill_value = miss) tmpvar = self.variables[var] = PseudoNetCDFVariable(self, var, 'd', ('POINTS',), values = vals) tmpvar.units = unit tmpvar.standard_name = var tmpvar.missing_value = miss tmpvar.fill_value = miss tmpvar.scale = scale if hasattr(self,'LLOD_FLAG'): tmpvar.llod_flag = llod_flag tmpvar.llod_value = llod_val if hasattr(self,'ULOD_FLAG'): tmpvar.ulod_flag = ulod_flag tmpvar.ulod_value = ulod_val self._date_objs = self._SDATE + vectorize(lambda s: timedelta(seconds = int(s), microseconds = (s - int(s)) * 1.E6 ))(self.variables[self.TFLAG]).view(type = ndarray)