def test_write_fill_value_decimal_places_follow_column(self): """Fill values should follow the column's data's lead for decimal places. E.g. if the column has data [10.001, 11.123], the normal fill value -999 should be written -999.000. I.e. as many trailing zeros as the data has. If the column has no data in it, default to the old-style C format string for how many decimal places to show. """ with closing(StringIO()) as buff: dfile = DataFile() dfile.create_columns([ 'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'CTDOXY' ]) dfile['STNNBR'].values = [None, None] dfile['CASTNO'].values = [None, None] dfile['BTLNBR'].values = [None, None] dfile['_DATETIME'].values = [None, None] dfile['CTDPRS'].values = [_decimal('10.0001'), None] dfile['CTDOXY'].values = [None, _decimal('243.23')] btlex.write(dfile, buff) result = buff.getvalue().split('\n') # CTDPRS default decplaces is 1 but the data has 4 self.assertEqual('-999.0000', result[4].split(',')[5].lstrip()) # CTDOXY default decplaces is 4 but the data has 2 self.assertEqual('-999.00', result[3].split(',')[6].lstrip())
def read(self, fileobj): """How to read an LDEO ASEP file.""" line1 = _getline(fileobj) dtype_shipcode, stn, cast, lat, lon, date, yday, time, cruise_id = \ line1.split() dtype = dtype_shipcode[0] if not is_datatype_ctd(dtype): log.error(u'Unable to read non-CTD ASEP files at the moment.') return shipcode = dtype_shipcode[1:] # FIXME this is not really the EXPOCODE self.globals['EXPOCODE'] = cruise_id # FIXME this is not really the SECT_ID self.globals['SECT_ID'] = cruise_id self.globals['STNNBR'] = str(int(stn)) self.globals['CASTNO'] = cast self.globals['LATITUDE'] = lat self.globals['LONGITUDE'] = lon self.globals['_DATETIME'] = datetime.strptime(date + time, '%Y/%m/%d%H:%M') self.globals['header'] = '#' + cruise_id line2 = _getline(fileobj) while line2[0] != '&': log.warn(u'Ignoring line not preceded by &: {0!r}'.format(line2)) line2 = _getline(fileobj) self.globals['header'] += "\n#" + line2 + "\n" line3 = _getline(fileobj) while line3[0] != '@': log.warn(u'Ignoring line not preceded by @: {0!r}'.format(line2)) line3 = _getline(fileobj) param_keys = line3[1:].split() parameters = [CTD_PARAM_MAP.get(key, None) for key in param_keys] cols = self.create_columns(parameters) for line in fileobj: for col, val in zip(cols, line.split()): if val == '-9': val = None col.append(_decimal(val)) # rewrite every data column to be the same sigfigs for col in self.columns.values(): decplaces = col.decimal_places() col.values = [pad_decimal(val, decplaces) for val in col.values] if 'pr' in param_keys: pressures = cols[param_keys.index('pr')].values lat = _decimal(self.globals['LATITUDE']) depth = int(depth_unesco(pressures[-1], lat)) self.globals['DEPTH'] = depth self.check_and_replace_parameters()
def test_decimal_places_requires_decimal(self): ccc = Column('test') ccc.values = [ _decimal('-999.0000'), 20.12355, _decimal('-999.00'), ] with self.assertRaises(ValueError): ccc.decimal_places()
def is_in_range(self, x): x = _decimal(x) if self.bound_lower is not None: if x < _decimal(self.bound_lower): return False if self.bound_upper is not None: if x > _decimal(self.bound_upper): return False return True
def read(self, handle): """How to read a CTD Bermuda Atlantic Time-Series Study file.""" comments = [] columns = ( '_DATETIME', 'LATITUDE', 'LONGITUDE', 'CTDPRS', 'CTDTMP', 'CTDSAL', 'CTDOXY', 'FLUOR', ) units = ( '', '', '', 'DBAR', 'DEG C', 'PSU', 'UMOL/KG', 'RFU', ) self.create_columns(columns, units) self.check_and_replace_parameters(convert=False) for l in handle: if l.startswith('%'): comments.append(l[1:].strip()) continue parts = l.split() year, frac_year = parts[1].split('.') year = int(year) self['_DATETIME'].append(bats_time_to_dt(parts[1])) self['LATITUDE'].append(_decimal(parts[2])) self['LONGITUDE'].append(_decimal(correct_longitude(parts[3]))) self['CTDPRS'].append_check_range(_decimal_check_missing(parts[4])) self['CTDTMP'].append_check_range(_decimal_check_missing(parts[6])) self['CTDSAL'].append_check_range(_decimal_check_missing(parts[7])) self['CTDOXY'].append_check_range(_decimal_check_missing(parts[8])) self['FLUOR'].append_check_range(_decimal_check_missing(parts[10])) self.globals['_COMMENTS'] = ';'.join(comments) self.globals['EXPOCODE'] = create_expocode('33H4', self['_DATETIME'][0]) self.globals['SECT_ID'] = BATS_SECT_ID idparts = dpr_idparts(handle.name) self.globals['_OS_ID'] = idparts['cruise'] self.globals['STNNBR'] = idparts['type'] self.globals['CASTNO'] = idparts['cast'] self.globals['DEPTH'] = FILL_VALUE collapse_globals(self, ['_DATETIME', 'LATITUDE', 'LONGITUDE'])
def sigma_r(refprs, press, temp, salty): '''Calculate density using international equation of state From text furnished by J. Gieskes Args: press -- pressure in decibars temp -- temperature in celsius degrees salty -- salinity PSS 78 refprs -- reference pressure refprs = 0. : sigma theta refprs = press: sigma z Return: kg/m*3 - 1000.0 ''' # check for missing data if _any_missing(temp, press, salty): return CDMISS # calculate potential temperature if press != refprs: potemp = potential_temperature(press, temp, salty, refprs) else: potemp = temp # sigma theta kg/m**3 sigma = (rho_w(potemp) + kw(potemp, salty) + k_st0(salty, potemp) + _decimal(4.8314e-4) * salty ** 2) if equal_with_epsilon(refprs, 0.0): return sigma - _decimal(1000.0) # Calculate pressure effect # # rho(s,t,0)/(1.0-p/k(s,t,p)) # kst0 = secant_bulk_modulus(abs(salty), potemp, 0) # reference pressure in bars bars = refprs * 0.1 # Calculate pressure terms terma = polynomial(potemp, (3.239908, 0.00143713, 1.16092e-4, -5.77905e-7)) + \ polynomial(potemp, (0.0022838, -1.0981e-5, -1.6078e-6)) * salty + \ 1.91075e-4 * abs(salty) ** 1.5 termb = polynomial(potemp, (8.50935e-5, -6.12293e-6, 5.2787e-8)) + \ polynomial(potemp, (-9.9348e-7, 2.0816e-8, 9.1697e-10)) * salty # Secant bulk modulus k(s,t,p) */ kstp = polynomial(bars, (kst0, terma, termb)) return sigma / (1.0 - bars / kstp) - 1000.0
def test_write_exchange_decimal_places(self): """Decimal places should be kept from the original data.""" with closing(StringIO()) as buff: dfile = DataFile() dfile.globals['LONGITUDE'] = _decimal('0.0000000') dfile.create_columns(['CTDPRS']) dfile['CTDPRS'].values = [_decimal('10.0001'), None] ctdex.write(dfile, buff) result = buff.getvalue().split('\n') # Decimal('0.0000000') is converted to 0E-7 by str. The formatting # has to be done manually. self.assertEqual('0.0000000', result[2].split(' = ')[1].lstrip())
def test_decimal_places(self): """A column's decimal places is the max number of places after a decimal in the column. """ ccc = Column('test') ccc.values = [ _decimal('-999.0000'), _decimal('19.0'), _decimal('-999.000'), _decimal('-999.00'), ] self.assertEqual(4, ccc.decimal_places())
def density(salinity, temperature, pressure): """ Calculates density given salinity, temperature, and pressure. The algorithm is given on page -15- of UNESCO 44 as equation (7) """ if any(map(lambda x: x is None, (salinity, temperature, pressure))): return None s = _decimal(salinity) t = _decimal(temperature) p = _decimal(pressure) if p == 0: # UNESCO 44 page - 17 - A = _decimal('999.842594', '6.793952e-2', '-9.095290e-3', '1.001685e-4', '-1.120083e-6', '6.536332e-9') # equation (14) pure_water_d = polyn(t, A) B = _decimal('8.24493e-1', '-4.0899e-3', '7.6438e-5', '-8.2467e-7', '5.3875e-9') C = _decimal('-5.72466e-3', '1.0227e-4', '-1.6546e-6') d0 = _decimal('4.8314e-4') try: return pure_water_d + polyn(t, B) * s + \ polyn(t, C) * (s ** 3).sqrt() + \ d0 * (s ** _decimal(2)) except InvalidOperation, e: log.debug('Invalid operation probably caused by salinity = %r' % s) raise e
def _build_columns_for_row(self, iii, row, num_quality_words, parameters, asterisks): # QUALT1 takes precedence quality_flags = row[-num_quality_words:] # Build up the columns for the line flag_i = 0 for j, parameter in enumerate(parameters): datum = row[j].strip() datum = in_band_or_none(datum, -9) if parameter not in CHARACTER_PARAMETERS: try: datum = _decimal(datum) except Exception, e: log.warning(u'Expected numeric data for parameter %r, got %r' % (parameter, datum)) # Only assign flag if column is flagged. if "**" in asterisks[j].strip(): # TODO should use better detection for asterisks try: woce_flag = int(quality_flags[0][flag_i]) except ValueError, e: log.error(u'Received bad flag "{}" for {} on record {}'.format( quality_flags[0][flag_i], parameter, iii)) raise e flag_i += 1 self[parameter].set(iii, datum, woce_flag)
def milliliter_per_liter_to_umol_per_kg(file, column, whole_not_aliquot=None): if whole_not_aliquot is None: whole_not_aliquot = oxygen_method_is_whole_not_aliquot() for i, value in enumerate(column.values): salinity = _get_first_value_of_parameters(file, ('CTDSAL', 'SALNTY'), i) or APPROXIMATION_SALINITY # Salinity sanity check if salinity <= 0: salinity = APPROXIMATION_SALINITY elif salinity < 20 or salinity > 60: log.warn('Salinity (%f) is ridiculous' % salinity) temperature = _get_first_value_of_parameters( file, ('CTDTMP', 'THETA', 'REVTMP'), i) temperature_missing = not (temperature and temperature > -3) if value < -3: # Missing column.values[i] = None elif 'OXY' in column.parameter.mnemonic_woce(): # Converting oxygen if not whole_not_aliquot and \ 'CTDOXY' in column.parameter.mnemonic_woce(): temperature = APPROXIMATION_TEMPERATURE elif temperature_missing: temperature = APPROXIMATION_TEMPERATURE log.warn(('Temperature is missing. Using %f at ' 'record#%d') % (temperature, i)) sigt = volume.sigma_r(0.0, 0.0, temperature, salinity) o2_atomic_weight = 31.9988 density_o2 = 1.42905481 # g/l @ 273.15K constant = o2_atomic_weight / density_o2 * 0.001 column.values[i] /= (_decimal(constant) * (sigt / _decimal(1.0e3) + _decimal(1.0))) else: raise ValueError(('Cannot apply conversion for oxygen to ' 'non-oxygen parameter.')) # Change the units if 'OXY' in column.parameter.units.name: column.parameter.unit = std.Unit('UMOL/KG') return column
def test_diff_decplaces(self): """Derivative is still different when decimal places are different.""" dfo = DataFile() dfo.create_columns(['CTDPRS', 'CTDOXY']) dfo['CTDPRS'].append(_decimal('1')) dfo['CTDOXY'].append(_decimal('0.140')) dfd = DataFile() dfd.create_columns(['CTDPRS', 'CTDOXY']) dfd['CTDPRS'].append(_decimal('1')) dfd['CTDOXY'].append(_decimal('0.14')) p_different, p_not_in_orig, p_not_in_deriv, p_common = \ different_columns(dfo, dfd, ['CTDPRS']) self.assertEqual(p_different, ['CTDOXY']) dfile = merge_datafiles(dfo, dfd, ['CTDPRS'], ['CTDOXY']) self.assertEqual(decimal_to_str(dfile['CTDOXY'][0]), '0.14')
def secant_bulk_modulus(salinity, temperature, pressure): """Calculate the secant bulk modulus of sea water. Obtained from EOS80 according to Fofonoff Millard 1983 pg 15 Args: salinity: [PSS-78] temperature: [degrees Celsius IPTS-68] pressure: pressure Returns: The secant bulk modulus of sea water as a float. """ s = _decimal(salinity) t = _decimal(temperature) p = _decimal(pressure) if p == 0: E = _decimal('19652.21', '148.4206', '-2.327105', '1.360477e-2', '-5.155288e-5') Kw = polyn(t, E) F = _decimal('54.6746', '-0.603459', '1.09987e-2', '-6.1670e-5') G = _decimal('7.944e-2', '1.6483e-2', '-5.3009e-4') try: return Kw + polyn(t, F) * s + \ polyn(t, G) * (s ** 3).sqrt() except InvalidOperation, e: log.debug('Invalid operation probably caused by salinity = %r' % s) raise e
def ctdoxy_micromole_per_liter_to_micromole_per_kilogram(file, column): sigtheta = file['CTDSIGTH'] if not sigtheta: log.warn('Unable to find sigma theta column. Cannot convert.') return column for i, value in enumerate(column): precision = len(str(column[i].to_integral())) + \ min(-sigtheta[i].as_tuple().exponent, -column[i].as_tuple().exponent) with localcontext() as ctx: factor = sigtheta[i].fma(_decimal('1.0e-3'), 1) ctx.prec = precision column[i] /= factor return column
def test_write_exchange_decimal_places(self): """Decimal places should be kept from the original data.""" with closing(StringIO()) as buff: dfile = DataFile() dfile.create_columns([ 'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'LONGITUDE' ]) dfile['STNNBR'].values = [None, None] dfile['CASTNO'].values = [None, None] dfile['BTLNBR'].values = [None, None] dfile['_DATETIME'].values = [None, None] dfile['CTDPRS'].values = [_decimal('10.0001'), None] dfile['LONGITUDE'].values = [ _decimal('0.0000000'), _decimal('1.000000') ] btlex.write(dfile, buff) result = buff.getvalue().split('\n') # Decimal('0.0000000') is converted to 0E-7 by str. The formatting # has to be done manually. self.assertEqual('0.0000000', result[3].split(',')[5].lstrip())
def test_write_btl_date_time_no_decimals(self): """BTL_DATE and BTL_TIME should not have decimal places.""" with closing(StringIO()) as buff: dfile = DataFile() dfile.create_columns([ 'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'BTL_DATE', 'BTL_TIME' ]) dfile['STNNBR'].values = [None, None] dfile['CASTNO'].values = [None, None] dfile['BTLNBR'].values = [None, None] dfile['_DATETIME'].values = [None, None] dfile['CTDPRS'].values = [_decimal('10.0001'), None] dfile['BTL_DATE'].values = [ _decimal('19700101'), _decimal('19700102') ] dfile['BTL_TIME'].values = [_decimal('0000'), _decimal('1234')] btlex.write(dfile, buff) result = buff.getvalue().split('\n') self.assertEqual('19700101', result[3].split(',')[6].lstrip()) self.assertEqual('1234', result[4].split(',')[7].lstrip())
def depth(grav, p, rho): """Calculate depth by integration of insitu density. Sverdrup, H. U.,Johnson, M. W., and Fleming, R. H., 1942. The Oceans, Their Physics, Chemistry and General Biology. Prentice-Hall, Inc., Englewood Cliff, N.J. Args: grav: local gravity (m/sec^2) @ 0.0 db p: pressure series (decibars) rho: insitu density series (kg/m^3) Returns: depth - depth series (meters) """ depth = [] num_intervals = len(p) assert num_intervals == len(rho), \ ("The number of series intervals must be the same.\n" "pressure {0} != density {1}").format(num_intervals, len(rho)) grav = _decimal(grav) p = _decimal(p) rho = _decimal(rho) # When calling depth() repeatedly with a two-element # series, the first call should be with a one-element series to # initialize the starting value (see depth_(), below). # TODO figure out what this does. The original C version has the caller # maintain a depth array that is constantly modified. # Initialize the series if num_intervals is not 2: # If the integration starts from > 15 db, calculate depth relative to # starting place. Otherwise, calculate from surface. if p[0] > 15.0: depth.append(_decimal(0)) else: depth.append(p[0] / (rho[0] * _decimal(10000) * \ (grav + DGRAV_DPRES * p[0]))) # Calculate the rest of the series. for i in range(0, num_intervals - 1): j = i + 1 # depth in meters depth.insert(j, depth[i] + \ (p[j] - p[i]) / ((rho[j] + rho[i]) * _decimal(5000) * \ (grav + DGRAV_DPRES * p[j])) * _decimal('1e8')) return depth
def test_calculate_depths(self): self.file['_ACTUAL_DEPTH'] = Column('_ACTUAL_DEPTH') self.assertEqual(('actual', []), self.file.calculate_depths()) del self.file['_ACTUAL_DEPTH'] self.file.globals['LATITUDE'] = 0 self.file.create_columns(['CTDPRS', 'CTDSAL', 'CTDTMP']) self.assertEqual(('unesco1983', []), self.file.calculate_depths()) self.file['CTDPRS'].values = [1] self.file['CTDSAL'].values = [1] self.file['CTDTMP'].values = [1] self.assertEqual( ('sverdrup', [_decimal('1.021723814950101286444879340E-8')]), self.file.calculate_depths())
def cc_per_kilogram_e_neg_5_to_nanomole_per_kilogram(file, column): """ Convert CC/KG * 10 ** -5 to NMOL/KG For Helium and Neon, CC/KG * 10 ** -5 / 2.2415 = NMOL/KG Bill Jenkins (WHOI) 2006-05-03 Bill Jenkins (WHOI) 2012-05-15 """ constant = _decimal('2.2415') for i, value in enumerate(column): if value: precision = \ len(str(value.to_integral())) - value.as_tuple()[2] with localcontext() as ctx: ctx.prec = precision column[i] = value / constant return column
def test_depth(self): # TODO these numbers almost equal may be imprecise self.assertAlmostEqual(_decimal('0.0000102040783'), depth.depth(9.8, [1], [1])[0]) self.assertRaises(AssertionError, depth.depth, 9.8, [1, 2], [1]) self.assertAlmostEqual(_decimal('0.000000'), depth.depth(9.8, [16], [2])[0]) #print depth.depth(9.8, [16, 16], [2, 2]) #depth has an issue with sequences of length 2 to integrate over. answer = [ _decimal('0.000002040815871720217975820810286'), _decimal('226.7572705863743630694763440'), _decimal('518.3022651720940178880441487'), _decimal('926.4651666301443099770898946'), _decimal('1606.736517457033660694942632') ] result = depth.depth(9.8, [1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) for aaa, bbb in zip(answer, result): self.assertAlmostEqual(aaa, bbb)
def _read_data_row(dfile, row_i, info, raw): raw_value = raw.strip() col, param = info # tuple indicates flag column if type(param) is tuple: try: value = int(raw_value) except (ValueError, TypeError): log.warn( u'Bad {0} flag {1!r} for {2} on data row {3}'.format( param[0], raw_value, param[2], row_i)) value = None else: if out_of_band(raw_value): value = None else: if param is None or param.format.endswith('s'): value = raw_value else: try: value = _decimal(raw_value) except: value = raw_value col.append(value)
def test_grav_ocean_surface_wrt_latitude(self): self.assertAlmostEqual(_decimal('9.780318'), depth.grav_ocean_surface_wrt_latitude(0)) self.assertAlmostEqual( _decimal('9.80738775'), depth.grav_ocean_surface_wrt_latitude(-60.4987683333))
def k_st0(salinity, potential_temperature): return polynomial(potential_temperature, COEFF_K_ST0) * \ abs(salinity) ** _decimal(1.5)
def kw(potential_temperature, salinity): '''Pure water secant bulk modulus?''' # TODO return (kw_1(potential_temperature) + _decimal(0.824493)) * salinity
def depth_unesco(pres, lat): """Depth (meters) from pressure (decibars) using Saunders and Fofonoff's method. Saunders, P. M., 1981. Practical Conversion of Pressure to Depth. Journal of Physical Oceanography 11, 573-574. Mantyla, A. W., 1982-1983. Private correspondence. Deep-sea Res., 1976, 23, 109-111. Formula refitted for 1980 equation of state Ported from Unesco 1983 Units: pressure p decibars latitude lat degrees depth depth meters Checkvalue: depth = 9712.653 M for P=10000 decibars, latitude=30 deg above for standard ocean: T=0 deg celsius; S=35 (PSS-78) """ if not pres or not lat: return None x = sin(lat / _decimal('57.29578')) ** _decimal(2) gr = _decimal('9.780318') * \ (_decimal('1') + (_decimal('5.2788e-3') + _decimal('2.36e-5') * x) * x) + \ _decimal('1.092e-6') * pres return ((((_decimal('-1.82e-15') * pres + _decimal('2.279e-10')) * pres - \ _decimal('2.2512e-5')) * pres + _decimal('9.72659')) * pres) / gr
def read(self, handle): """How to read CTD Bonus Goodhope files from a TAR.""" lines = handle.readlines() line0 = lines[0].split() sect_id = line0[1] station = str(int(line0[0])) line3 = lines[3].split() lattoks = [line3[3], line3[4], line3[2]] lontoks = [line3[6], line3[7], line3[5]] try: latitude = ddm_to_dd(lattoks) except ValueError: latitude = ddm_to_dd([lattoks[1], lattoks[2], lattoks[0]]) try: longitude = ddm_to_dd(lontoks) except ValueError: longitude = ddm_to_dd([lontoks[1], lontoks[2], lontoks[0]]) date = line3[0] time = line3[1].zfill(4) depth = line3[10] self.globals['EXPOCODE'] = None self.globals['SECT_ID'] = sect_id self.globals['STNNBR'] = station self.globals['CASTNO'] = '1' self.globals['LATITUDE'] = latitude self.globals['LONGITUDE'] = longitude self.globals['DEPTH'] = depth self.globals['_DATETIME'] = datetime.strptime(date + time, '%d%m%Y%H%M') param_units = [ ['CTDPRS', 'DBAR'], ['CTDTMP', 'ITS-90'], ['CTDSAL', 'PSS-78'], ['CTDOXY', 'UMOL/KG'], ['THETA', 'DEG C'], ['DEPTH', 'METERS'], ['SIG0', 'KG/M^3'], ['GAMMA', 'KG/M^3'], ] columns = [] units = [] for p, u in param_units: columns.append(p) units.append(u) self.create_columns(columns, units) data = lines[14:] for l in data: for i, v in enumerate(map(float, l.split())): v = _decimal(v) flag_woce = 2 if equal_with_epsilon(v, 9.0): v = None flag_woce = 9 self[columns[i]].append(v, flag_woce=flag_woce) self.check_and_replace_parameters()
def _decimal_check_missing(str): """Convert str to a decimal or None if matches dpr fill value.""" x = _decimal(str) if equal_with_epsilon(x, -9.99) or equal_with_epsilon(x, -10): return None return x
def read(self, f, expo=None): if expo: self.globals["EXPOCODE"] = expo self.globals["CASTNO"] = "" self.globals["SECT_ID"] = "" l = f.readline() while "===" not in l: if "Lat" in l: l = l.split('=') ctoks = l[1].strip().split(' ') self.globals["LATITUDE"] = ddm_to_dd(ctoks) elif "Lon" in l: l = l.split('=') ctoks = l[1].strip().split(' ') self.globals["LONGITUDE"] = ddm_to_dd(ctoks) elif "depth" in l: l = l.split(':') self.globals["DEPTH"] = l[1].strip() elif "UTC" in l: l = l.split('=') dt = datetime.datetime.strptime(l[1].strip(), '%H:%M:%S') self.globals["TIME"] = dt.strftime('%H%M') elif "Station" in l: l = l.split(':') self.globals['STNNBR'] = l[1].strip() else: try: dt = datetime.datetime.strptime(l.strip(), '%b %d %Y') self.globals["DATE"] = dt.strftime('%Y%m%d') except ValueError: pass l = f.readline() if "===" in l: l = f.readline() else: raise ValueError params = re.split('\s+', l) params = [p for p in params if p.strip()] for i, param in enumerate(params): if 'Temp' in param: params[i] = "CTDTMP" if 'Sal' in param: params[i] = "CTDSAL" if "Oxy" in param: params[i] = "CTDOXY" if "Pres" in param: params[i] = "CTDPRS" l = f.readline() units = re.findall('(?<=\[)[\/ \w]*(?=\])', l) for i, unit in enumerate(units): if 'db' in unit: units[i] = "DBAR" l = f.readline() if "---" in l: l = f.readline() else: raise ValueError self.create_columns(params, units, None) while l: values = [v for v in re.split('\s+', l) if v.split()] for column, value in zip(params, values): col = self.columns[column] if 'NaN' in value: col.append(None, flag_woce=9) elif column is not "CTDPRS": col.append(_decimal(value), flag_woce=2) else: col.append(_decimal(value)) l = f.readline() self.check_and_replace_parameters()
def read(dfile, fileobj, data_type=None): """Read a French CSV file. data_type (optional) if given, must be 'bottle' or 'ctd'. This changes the columns that are created (Adds BTLNBR for bottle data). NOTE: French CSV used for CTD contains all the CTD casts in one file. Split them into a DataFileCollection. """ assert data_type is None or data_type in ['bottle', 'ctd'] reader = csv_reader(fileobj, dialect=FrCSVDialect()) # Read header line that contains parameters and units. Convert them to WOCE. r_param = re_compile('(.*)\s\[(.*)\]') params = [] units = [] header = reader.next() for param in header: matches = r_param.match(param) unit = None if matches: param = matches.group(1) unit = matches.group(2) elif param == 'Flag': param = params[-1] + FLAG_F try: param = frparam_to_param[param] except KeyError: pass params.append(param) try: unit = frunit_to_unit[unit] except KeyError: pass units.append(unit) non_flag_paramunits = [] for paramunit in zip(params, units): if paramunit[0].endswith(FLAG_F): continue non_flag_paramunits.append(paramunit) # Create all the columns. dfile.create_columns(*zip(*non_flag_paramunits)) columns_id = ['EXPOCODE', 'STNNBR', 'CASTNO'] col_exp, col_stn, col_cast = dfile.create_columns(columns_id) if data_type == 'bottle': (col_btln,) = dfile.create_columns(['BTLNBR']) dfile.check_and_replace_parameters() # Read data. Flag columns follow immediately after data columns. flags = set() flag_values = {} for rowi, row in enumerate(reader): for param, value in zip(params, row): if param == 'LATITUDE': lattoks = value[1:].split() + [value[0]] value = woce_lat_to_dec_lat(lattoks) elif param == 'LONGITUDE': lngtoks = value[1:].split() + [value[0]] value = woce_lng_to_dec_lng(lngtoks) if param.endswith(FLAG_F): param = param[:-len(FLAG_F)] col = dfile[param] if value not in flags: flag_values[value] = [param, rowi, col.values[rowi]] flags.add(value) if value == '': value = 9 try: value = int(value) value = frflag_to_woce_flag[value] except (ValueError, KeyError): value = 9 col.set(rowi, col.get(rowi), flag_woce=value) else: col = dfile[param] if value == '' or value is None: col.set(rowi, None) else: col.set(rowi, _decimal(value)) fuse_datetime(dfile) # French CSV does not include cast identifying information. Generate that # by watching for coordinate changes. # While looping through and finding station changes, also populate the # bottom depth column from the _DEPTH column by estimating it as the bottom # most depth. dfile.create_columns(['DEPTH']) last_coord = None last_dt = None last_depths = [] stnnbr = 0 castno = 0 btlnbr = 1 col_lat = dfile['LATITUDE'] col_lng = dfile['LONGITUDE'] col_dt = dfile['_DATETIME'] col_bot = dfile['DEPTH'] try: col_depth = dfile['_DEPTH'] except KeyError: method, col_depth = dfile.calculate_depths(col_lat[rowi]) col_depth = [xxx.to_integral_value() if xxx else xxx for xxx in col_depth] for rowi in range(len(dfile)): coord = (col_lat[rowi], col_lng[rowi]) # location changed => station change if last_coord != coord: stnnbr += 1 castno = 0 btlnbr = 1 last_coord = coord # time changed => cast changed dtime = col_dt[rowi] if last_dt != dtime: castno += 1 btlnbr = 1 if last_depths: col_bot.set_length(rowi, max(last_depths)) last_depths = [] else: # normal measurement row btlnbr += 1 last_dt = dtime col_exp.set(rowi, '') col_stn.set(rowi, stnnbr) col_cast.set(rowi, castno) last_depths.append(col_depth[rowi]) if data_type == 'bottle': col_btln.set(rowi, btlnbr) col_bot.set_length(len(dfile), col_depth[len(dfile) - 1]) try: del dfile['_DEPTH'] except KeyError: pass
def test_integration_merge_btl(self): with TemporaryFile() as origin, \ TemporaryFile() as deriv: origin.write("""\ BOTTLE,19700101CCHSIOYYY # header 1 EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W ,,,,,,,METERS,UMOL/KG,/MILLE,,, 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,-999.000,9,11,9 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,9,22,9 316N145_9, TRNS1, 574, 1, 34, 34,2,1000,5,-999.000,9,33,9 316N145_9, TRNS1, 574, 1, 32, 32,2,1000,5,-999.000,9,44,9 END_DATA """) origin.flush() origin.seek(0) deriv.write("""\ BOTTLE,19700101CCHSIOYYY # header 2 EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W ,,,,,,,METERS,UMOL/KG,/MILLE,,, 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5, 10.000,9,-999.0,9 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1,-999.0,9 316N145_9, TRNS1, 574, 1, 34, 34,2,1000,5,-999.000,9,-999.0,9 316N145_9, TRNS1, 600, 1, 1, 1,2,1000,5,-999.000,9,-999.0,9 END_DATA """) deriv.flush() deriv.seek(0) dfo = DataFile() dfd = DataFile() btlex.read(dfo, origin) btlex.read(dfd, deriv) p_different, p_not_in_orig, p_not_in_deriv, p_common = \ different_columns(dfo, dfd, BOTTLE_KEY_COLS) parameters = p_different + p_not_in_orig keys = determine_bottle_keys(dfo, dfd) self.assertEqual( keys, ('EXPOCODE', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR')) parameters = list(OrderedSet(parameters) - OrderedSet(keys)) # Parameters with underscores in them may be confused when matching # flags with them. E.g. PH_SWS_FLAG_W should be matched with PH_SWS # not PH. dfile = merge_datafiles(dfo, dfd, keys, parameters) self.assertEqual(dfile['DELC14'][0], _decimal('10.000')) self.assertEqual(dfile['DELC14'].flags_woce[1], 1) # Header should be the origin file's header self.assertNotIn('header 2', dfile.globals['header']) self.assertIn('header 1', dfile.globals['header']) # Header should contain the merged parameters self.assertIn('Merged parameters: PH_SWS, DELC14, DELC14_FLAG_W', dfile.globals['header']) # No double new lines self.assertNotIn('\n\n', dfile.globals['header']) # new line for header is not included in the writers self.assertEqual('\n', dfile.globals['header'][-1]) # Key columns should not have been converted to floats. This happens # for some reason if pandas combine/update have been used. self.assertEqual(str(dfile['STNNBR'][0]), '574') self.assertEqual(str(dfile['CASTNO'][0]), '1') self.assertEqual(str(dfile['SAMPNO'][0]), '36') self.assertEqual(str(dfile['BTLNBR'][0]), '36') self.assertEqual(str(dfile['PH_SWS'][0]), 'None') # Extra keys in derivative file should not be merged in. self.assertNotIn(600, dfile['STNNBR']) # Make sure warning is printed regarding extra key in deriv file. lines = [[ 'Key ', 'does not exist in origin from derivative rows', '600' ]] self.assertTrue(self.ensure_lines(lines))