def woce_dec_lng_to_dec_lng(lngtoks): """Convert a longitude in decimal + hemisphere to decimal.""" precision = 3 + len(lngtoks) with IncreasedPrecision(precision): try: lng = Decimal(lngtoks[0]) except InvalidOperation: return None lng *= hemisphere_to_coeff(lngtoks[1]) return lng.quantize(Decimal(10)**-precision)
def woce_lng_to_dec_lng(lngtoks): '''Convert a longitude in WOCE format to decimal.''' precision = 4 + len(lngtoks) with IncreasedPrecision(precision): try: lng = int(lngtoks[0]) + Decimal(lngtoks[1]) / Decimal('60.0') except InvalidOperation: return None lng *= hemisphere_to_coeff(lngtoks[2]) return lng.quantize(Decimal(10)**-precision)
def test_read_btlnbr_as_string(self): with closing(StringIO()) as fff: fff.write('SIO1,33.24\n') fff.write('01,32.10\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['BTLNBR'] = Column('BTLNBR') dfile['CTDSAL'] = Column('CTDSAL') exchange.read_data(dfile, fff, ['BTLNBR', 'CTDSAL']) self.assertEqual(dfile['BTLNBR'].values, ['SIO1', '01']) self.assertEqual( dfile['CTDSAL'].values, [Decimal('33.24'), Decimal('32.10')])
def combine(bats_file, event_sum_file): """Combines the given BATS .dpr file with the Summary event.log file so that the DataFile contains most of the information from both. """ # It is pretty much given that the data is CTD. lat, lng = bats_file.globals['LATITUDE'], bats_file.globals['LONGITUDE'] # Find the event log record sum_file_i = None for i in range(len(event_sum_file)): sumlat, sumlng = event_sum_file['LATITUDE'][i], event_sum_file[ 'LONGITUDE'][i] epsilon = Decimal('1e-3') close_enough = equal_with_epsilon(lat, sumlat, epsilon) and \ equal_with_epsilon(lng, sumlng, epsilon) if close_enough: sum_file_i = i break if sum_file_i is None: log.error('Event for BATS data at %f %f not found' % (lat, lng)) return headers = event_sum_file.column_headers() row = event_sum_file.row(i) info = dict(zip(headers, row)) bats_file.globals['DEPTH'] = info['DEPTH']
def read(self, handle, retain_order=False, header_only=False): """How to read a CTD Exchange file. header_only - only read the CTD headers, not the data """ read_identifier_line(self, handle, 'CTD') l = read_comments(self, handle) # Read NUMBER_HEADERS num_headers = re_compile('NUMBER_HEADERS\s*=\s*(\d+)') m = num_headers.match(l) if m: # NUMBER_HEADERS counts itself as a header num_headers = int(m.group(1)) - 1 else: raise ValueError( u'Expected NUMBER_HEADERS as the second non-comment line.') header = re_compile('(\w+)\s*=\s*(-?[\w\.]*)') for i in range(0, num_headers): m = header.match(handle.readline()) if m: if m.group(1) in REQUIRED_HEADERS and m.group(1) in [ 'LATITUDE', 'LONGITUDE' ]: self.globals[m.group(1)] = Decimal(m.group(2)) else: self.globals[m.group(1)] = m.group(2) else: raise ValueError(('Expected %d continuous headers ' 'but only saw %d') % (num_headers, i)) woce.fuse_datetime(self) if header_only: return # Read parameters and units columns = handle.readline().strip().split(',') units = handle.readline().strip().split(',') # Check columns and units to match length if len(columns) is not len(units): raise ValueError(("Expected as many columns as units in file. " "Found %d columns and %d units.") % \ (len(columns), len(units))) # Check all parameters are non-trivial if not all(columns): log.warn( ("Stripped blank parameter from MALFORMED EXCHANGE FILE\n" "This may be caused by an extra comma at the end of a line.")) columns = filter(None, columns) self.create_columns(columns, units, retain_order) read_data(self, handle, columns) self.check_and_replace_parameters()
def test_density(self): """ Test values from UNESCO 44 pg -19- """ self.assertAlmostEqual(X.density( 0, 5, 0), Decimal('999.96675'), 5) self.assertAlmostEqual(X.density( 0, 5, 10000), Decimal('1044.12802'), 5) self.assertAlmostEqual(X.density( 0, 25, 0), Decimal('997.04796'), 5) self.assertAlmostEqual(X.density( 0, 25, 10000), Decimal('1037.90204'), 5) self.assertAlmostEqual(X.density(35, 5, 0), Decimal('1027.67547'), 5) self.assertAlmostEqual(X.density(35, 5, 10000), Decimal('1069.48914'), 5) self.assertAlmostEqual(X.density(35, 25, 0), Decimal('1023.34306'), 5) self.assertAlmostEqual(X.density(35, 25, 10000), Decimal('1062.53817'), 5)
def check_variable_ranges(nc_file): for name, variable in nc_file.variables.items(): try: min = Decimal(str(variable.data_min)) max = Decimal(str(variable.data_max)) except AttributeError: try: min = Decimal(str(variable.valid_min)) max = Decimal(str(variable.valid_max)) except AttributeError: continue for y in variable[:]: if fns.isnan(y): continue x = Decimal(str(y)) if x < min: log.warn('%s too small for %s range (%s, %s)' % \ (str(x), name, str(min), str(max))) if x > max: log.warn('%s too large for %s range (%s, %s)' % \ (str(x), name, str(min), str(max)))
def test_read_summary_hot(self): self.file = SummaryFile() self.buff = StringIO(self.sample_hot) sumhot.read(self.file, self.buff) cs = self.file.columns self.assertEqual(['33KI134_1'] * 3, cs['EXPOCODE'].values) self.assertEqual(['PRS2'] * 3, cs['SECT_ID'].values) self.assertEqual(['1', '2', '2'], cs['STNNBR'].values) self.assertEqual([1, 1, 15], cs['CASTNO'].values) self.assertEqual(['ROS'] * 3, cs['_CAST_TYPE'].values) self.assertEqual([ datetime(2002, 1, 14, 23, 34), datetime(2002, 1, 15, 12, 59), datetime(2002, 1, 17, 8, 56)], cs['_DATETIME'].values) self.assertEqual(['BE'] * 3, cs['_CODE'].values) self.assertTrue(fp_eq(Decimal('21.344333333'), cs['LATITUDE'].values[0])) self.assertTrue(fp_eq(Decimal('22.75'), cs['LATITUDE'].values[1])) self.assertTrue(fp_eq(Decimal('22.74950'), cs['LATITUDE'].values[2])) self.assertTrue(fp_eq(Decimal('-158.271'), cs['LONGITUDE'].values[0])) self.assertTrue(fp_eq(Decimal('-158.001'), cs['LONGITUDE'].values[1])) self.assertTrue(fp_eq(Decimal('-158.000166'), cs['LONGITUDE'].values[2])) self.assertEqual(['GPS'] * 3, cs['_NAV'].values) self.assertEqual([1503, 4720, 4720], cs['DEPTH'].values) self.assertEqual([16, 24, 12], cs['_NUM_BOTTLES'].values) self.assertEqual([1020, 4806, 4806], cs['_MAX_PRESSURE'].values) self.assertEqual(['1,2', '1,2,3,4,5,6', '1,2'], cs['_PARAMETERS'].values) self.assertEqual(['Dual T, C sensors'] * 3, cs['_COMMENTS'].values)
def test_read_summary_woce(self): self.file = SummaryFile() self.buff = StringIO(self.sample_woce) sumwoce.read(self.file, self.buff) cs = self.file.columns self.assertEqual(['33RR20070204'] * 3, cs['EXPOCODE'].values) self.assertEqual(['I8S'] * 3, cs['SECT_ID'].values) self.assertEqual(['1', '1', '2'], cs['STNNBR'].values) self.assertEqual([1, 1, 1], cs['CASTNO'].values) self.assertEqual(['ROS'] * 3, cs['_CAST_TYPE'].values) self.assertEqual([ datetime(2007, 2, 15, 14, 24), datetime(2007, 2, 15, 14, 42), datetime(2007, 2, 15, 17, 5)], cs['_DATETIME'].values) self.assertEqual(['BE', 'BO', 'BE'], cs['_CODE'].values) self.assertTrue(fp_eq(Decimal('-65.81083'), cs['LATITUDE'].values[0])) self.assertTrue(fp_eq(Decimal('-65.81083'), cs['LATITUDE'].values[1])) self.assertTrue(fp_eq(Decimal('-65.76816'), cs['LATITUDE'].values[2])) self.assertTrue(fp_eq(Decimal('84.549999'), cs['LONGITUDE'].values[0])) self.assertTrue(fp_eq(Decimal('84.55016'), cs['LONGITUDE'].values[1])) self.assertTrue(fp_eq(Decimal('84.53483'), cs['LONGITUDE'].values[2])) self.assertEqual(['GPS'] * 3, cs['_NAV'].values) self.assertEqual([450, 450, 1257], cs['DEPTH'].values) self.assertEqual([None, 439, None], cs['_MAX_PRESSURE'].values) self.assertEqual([None, 16, None], cs['_NUM_BOTTLES'].values) self.assertEqual(['test', '1-8,23-24,27,43,104-112', None], cs['_PARAMETERS'].values) self.assertEqual([None] * 3, cs['_COMMENTS'].values)
def deg_min_to_decimal_deg(deg, min): return Decimal(deg) + Decimal(min) / Decimal(60)
def read(self, handle): """How to read an NODC SD2 file.""" self.create_columns(( 'EXPOCODE', 'STNNBR', 'CASTNO', '_DATETIME', 'LATITUDE', 'LONGITUDE', )) self.create_columns(('BOTTOM', 'DEPTH', 'CTDTMP', 'SALNTY', 'OXYGEN', 'PHSPHT', 'SILCAT', 'NITRIT', 'NITRAT', 'PH'), ( 'METERS', 'METERS', 'DEG C', 'PSU', 'ML/L', 'UMOL/L', 'UMOL/L', 'UMOL/L', 'UMOL/L', '', )) current_station = None current_cast = 1 while handle: line = handle.readline() if not line: break if line[79] == '2': # Nothing in MR 2 that matters. continue #print 'line:', line, #print 'rule:', '|-`-*+-`-*' * 8 if line[79] == '1': station = {} raw_line = { 'continuation_indicator': int_or_none(line[0]), 'nodc_ref_num_country': int_or_none(line[2:4]), 'file_code': int_or_none(line[4]), 'nodc_ref_num_cruise_number': int_or_none(line[5:9]), 'nodc_consecutive_station_number': int_or_none(line[9:13]), 'data_type': int_or_none(line[13:15]), 'ten-degree_square': int_or_none(line[17:21]), 'one-degree_square': int_or_none(line[21:23]), 'two-degree_square': int_or_none(line[23:25]), 'five-degree_square': int_or_none(line[25]), 'hemisphere_of_latitude': line[26], 'degrees_latitude': int_or_none(line[27:29]), 'minutes_latitude': int_or_none(line[29:31]), 'minutes_latitude_tenths': int_or_none(line[31]), 'hemisphere_of_longitude': line[32], 'degrees_longitude': int_or_none(line[33:36]), 'minutes_longitude': int_or_none(line[36:38]), 'minutes_longitude_tenths': int_or_none(line[38]), 'quarter_of_one_degree_square': int_or_none(line[39]), 'year_gmt': int_or_none(line[40:42]), 'month_of_year_gmt': int_or_none(line[42:44]), 'day_of_month_gmt': int_or_none(line[44:46]), 'station_time_gmt_hours_to_tenths': line[46:49], # This is marked as platform on # http://www.nodc.noaa.gov/General/NODC-Archive/sd2.html 'data_origin_country': line[49:51], # These two are marked blank 'data_origin_institution': line[51:53], 'data_origin_platform': line[53:55], 'bottom_depth': line[55:60], 'effective_depth': int_or_none(line[60:64]), 'cast_duration_hours_to_tenths': line[64:67], 'cast_direction': line[67], 'data_use_code': line[69], 'minimum_depth': int_or_none(line[70:74]), 'maximum_depth': int_or_none(line[74:78]), 'always_2_next_record_indicator': line[78], 'always_1_record_indicator': line[79], } assert raw_line['file_code'] == 5, \ "Master Record 1 is corrupt. File Code should always be 5." assert raw_line['always_2_next_record_indicator'] == '2' or \ raw_line['always_2_next_record_indicator'].strip() == '', \ "Master Record 1 is corrupt." assert raw_line['always_1_record_indicator'] == '1', \ "Not master record 1. Algorithm is wrong." if raw_line['continuation_indicator']: continuation_indicator = raw_line['continuation_indicator'] # 0 - one station record # 1 - first station record # 2-8 - intermediate records # 9 - last station record # TODO handle multiple records per stations station['EXPOCODE'] = raw_line['nodc_ref_num_cruise_number'] station['STNNBR'] = raw_line['nodc_consecutive_station_number'] station['_DATA_TYPE'] = _DATA_TYPE_CODES.get( raw_line['data_type'], 'UNKNOWN') if not raw_line['hemisphere_of_latitude'] in ('N', 'S'): raise ValueError( ("Master Record 1 is corrupt. Latitude hemisphere must be " "N or S.")) latitude = str( (1 if raw_line['hemisphere_of_latitude'] == 'N' else -1) * \ (raw_line['degrees_latitude'] + raw_line['minutes_latitude'] / 60.0 + raw_line['minutes_latitude_tenths'] / 600.0)) latitude = latitude[:latitude.find('.') + \ _MAX_GRATICULE_PRECISION + 1] station['LATITUDE'] = Decimal(latitude) if not raw_line['hemisphere_of_longitude'] in ('E', 'W'): raise ValueError(( "Master Record 1 is corrupt. Longitude hemisphere must be " "E or W.")) longitude = str( (1 if raw_line['hemisphere_of_longitude'] == 'E' else -1) * \ (raw_line['degrees_longitude'] + raw_line['minutes_longitude'] / 60.0 + raw_line['minutes_longitude_tenths'] / 600.0)) longitude = longitude[:longitude.find('.') + \ _MAX_GRATICULE_PRECISION + 1] station['LONGITUDE'] = Decimal(longitude) hours = int(raw_line['station_time_gmt_hours_to_tenths'][:2]) minutes = int(raw_line['station_time_gmt_hours_to_tenths'][2]) * 6 station['_DATETIME'] = datetime(*(1900 + raw_line['year_gmt'], raw_line['month_of_year_gmt'], raw_line['day_of_month_gmt']), hour=hours, minute=minutes) try: station['BOTTOM'] = int(raw_line['bottom_depth']) except ValueError: station['BOTTOM'] = None current_station = station elif line[79] == '2': raw_line = { 'depth_difference': line[0:4], 'sample_interval': line[4:6], 'salinity_observed': line[6], 'oxygen_observed': line[7], 'phosphate_observed': line[8], 'total_phosphorous_observed': line[9], 'silicate_observed': line[10], 'nitrite_observed': line[11], 'nitrate_observed': line[12], 'ph_observed': line[13], 'originators_cruise_identifier': line[13:17], 'originators_station_identifier': int_or_none(line[17:26]), 'water_color': int_or_none(line[26:28]), 'water_transparency': int_or_none(line[28:30]), 'wave_direction': int_or_none(line[30:32]), 'wave_height': line[32], 'sea_state': int_or_none(line[33]), 'wind_force': line[34:36], 'file_update_code': int_or_none(line[36]), 'wave_period': line[37], 'wind_direction': int_or_none(line[38:40]), 'wind_speed': int_or_none(line[40:42]), 'barometric_pressure': line[42:47], 'dry_bulb_temperature': line[47:51], 'dry_bulb_temperature_precision': int_or_none(line[51]), 'wet_bulb_temperature': line[52:56], 'wet_bulb_temperature_precision': int_or_none(line[56]), 'weather': line[57:59], 'cloud_type': int_or_none(line[59]), 'cloud_amount': int_or_none(line[60]), 'number_of_observed_depths': int_or_none(line[61:64]), 'number_of_standard_depth_levels': int_or_none(line[64:66]), 'number_of_detail_depths': int_or_none(line[66:69]), 'blank': line[69:78], 'next_record_indicator': line[78], 'always_2_record_indicator': line[79], } # Effectively nothing here to care about. elif line[79] == '3': raw_line = { 'depth': int_or_none(line[0:5]), 'depth_quality_indicator': int_or_none(line[5]), 'thermometric_depth_flag': line[6], 'temperature': line[7:12], 'temperature_precision': int_or_none(line[12]), 'temperature_quality_indicator': int_or_none(line[13]), 'salinity': line[14:19], 'salinity_precision': int_or_none(line[19]), 'salinity_quality_indicator': int_or_none(line[20]), 'sigma-t': int_or_none(line[21:25]), 'sigma-t_quality_indicator': int_or_none(line[25]), 'sound_speed': int_or_none(line[26:31]), 'sound_speed_precision': int_or_none(line[31]), 'oxygen': line[32:36], 'oxygen_precision': int_or_none(line[36]), 'oxygen_quality_indicator': int_or_none(line[37]), 'data_range_check_flags_phosphate': int_or_none(line[38]), 'data_range_check_flags_total': int_or_none(line[39]), 'data_range_check_flags_silicate': int_or_none(line[40]), 'data_range_check_flags_nitrite': int_or_none(line[41]), 'data_range_check_flags_nitrate': int_or_none(line[42]), 'data_range_check_flags_ph': int_or_none(line[43]), 'cast_start_time_or_messenger_release_time': line[44:47], 'cast_number': int_or_none(line[47]), 'inorganic_phosphate': line[48:52], 'inorganic_phosphate_precision': int_or_none(line[52]), 'total_phosphorous': line[53:57], 'total_phosphorous_precision)': int_or_none(line[57]), 'silicate': line[58:62], 'silicate_precision': int_or_none(line[62]), 'nitrite': line[63:66], 'nitrite_precision': int_or_none(line[66]), 'nitrate': line[67:70], 'nitrate_precision': int_or_none(line[70]), 'ph': line[71:74], 'ph_precision': int_or_none(line[74]), 'blank': line[75:77], 'density_inversion_flag': int_or_none(line[77]), 'next_record_type': int_or_none(line[78]), 'record_type': int_or_none(line[79]), } sample = {} assert raw_line['record_type'] == 3, \ ("Only observations are handled by this reader. " "Interpolations are not handled.") sample['DEPTH'] = raw_line['depth'] sample['DEPTH_QC'] = raw_line['depth_quality_indicator'] p = raw_line['temperature_precision'] if p and p != 9: x = raw_line['temperature'].strip() sample['TEMPERATURE'] = Decimal('%s.%s' % (x[:-p], x[-p:])) sample['TEMPERATURE_QC'] = \ raw_line['temperature_quality_indicator'] p = raw_line['salinity_precision'] if p and p != 9: x = raw_line['salinity'].strip() sample['SALINITY'] = Decimal('%s.%s' % (x[:-p], x[-p:])) sample['SALINITY_QC'] = raw_line['salinity_quality_indicator'] p = raw_line['oxygen_precision'] if p and p != 9: x = raw_line['oxygen'].strip() sample['OXYGEN'] = Decimal('%s.%s' % (x[:-p], x[-p:])) sample['OXYGEN_QC'] = raw_line['oxygen_quality_indicator'] try: x = raw_line['cast_start_time_or_messenger_release_time'] sample['TIME'] = x[:2] + str(int(x[2]) / 600.0) except ValueError: pass sample['CASTNO'] = raw_line['cast_number'] # TODO ensure this is inorganic p = raw_line['inorganic_phosphate_precision'] if p and p != 9: x = raw_line['inorganic_phosphate'].strip() sample['PHSPHT'] = Decimal('%s.%s' % (x[:-p], x[-p:])) p = raw_line['silicate_precision'] if p and p != 9: x = raw_line['silicate'].strip() sample['SILCAT'] = Decimal('%s.%s' % (x[:-p], x[-p:])) p = raw_line['nitrite_precision'] if p and p != 9: x = raw_line['nitrite'].strip() sample['NITRIT'] = Decimal('%s.%s' % (x[:-p], x[-p:])) p = raw_line['nitrate_precision'] if p and p != 9: x = raw_line['nitrate'].strip() sample['NITRAT'] = Decimal('%s.%s' % (x[:-p], x[-p:])) # TODO which PH is this? p = raw_line['ph_precision'] if p and p != 9: x = raw_line['ph'].strip() sample['PH'] = Decimal('%s.%s' % (x[:-p], x[-p:])) if not current_station: raise ValueError(("Malformed SD2 file: Data record found " "before master record")) if current_station['_DATA_TYPE'] == 'NANSEN CAST': merged_row = { 'EXPOCODE': current_station['EXPOCODE'], 'STNNBR': current_station['STNNBR'], 'LATITUDE': current_station['LATITUDE'], 'LONGITUDE': current_station['LONGITUDE'], '_DATETIME': current_station['_DATETIME'], 'BOTTOM': current_station['BOTTOM'], 'CASTNO': sample['CASTNO'], 'DEPTH': sample['DEPTH'], 'DEPTH_FLAG_W': _NODC_0608_TO_WOCE_FLAGS[sample['DEPTH_QC']], # TODO figure out what parameter this should be 'CTDTMP': sample['TEMPERATURE'], 'CTDTMP_FLAG_W': _NODC_0608_TO_WOCE_FLAGS[sample['TEMPERATURE_QC']], 'SALNTY': sample['SALINITY'], 'SALNTY_FLAG_W': _NODC_0608_TO_WOCE_FLAGS[sample['SALINITY_QC']], 'OXYGEN': sample['OXYGEN'], 'OXYGEN_FLAG_W': _NODC_0608_TO_WOCE_FLAGS[sample['OXYGEN_QC']], 'PHSPHT': sample.get('PHSPHT', None), 'SILCAT': sample.get('SILCAT', None), 'NITRIT': sample.get('NITRIT', None), 'NITRAT': sample.get('NITRAT', None), 'PH': sample.get('PH', None), } try: merged_row['_DATETIME'].hour = sample['TIME'][:2] merged_row['_DATETIME'].minute = sample['TIME'][2:] except KeyError: pass i = len(self) self['EXPOCODE'].set(i, merged_row['EXPOCODE']) self['STNNBR'].set(i, merged_row['STNNBR']) self['CASTNO'].set(i, merged_row['CASTNO']) self['LATITUDE'].set(i, merged_row['LATITUDE']) self['LONGITUDE'].set(i, merged_row['LONGITUDE']) self['_DATETIME'].set(i, merged_row['_DATETIME']) self['BOTTOM'].set(i, merged_row['BOTTOM']) self['DEPTH'].set(i, merged_row['DEPTH'], merged_row['DEPTH_FLAG_W']) self['CTDTMP'].set(i, merged_row['CTDTMP'], merged_row['CTDTMP_FLAG_W']) self['SALNTY'].set(i, merged_row['SALNTY'], merged_row['SALNTY_FLAG_W']) self['OXYGEN'].set(i, merged_row['OXYGEN'], merged_row['OXYGEN_FLAG_W']) self['PHSPHT'].set(i, merged_row['PHSPHT']) self['SILCAT'].set(i, merged_row['SILCAT']) self['NITRIT'].set(i, merged_row['NITRIT']) self['NITRAT'].set(i, merged_row['NITRAT']) self['PH'].set(i, merged_row['PH']) else: # CTD raise NotImplementedError("Can't read SD2 CTDs yet") for key, column in self.columns.items(): if len(filter(None, column.values)) == 0 and \ len(filter(None, column.flags_woce)) == 0 and \ len(filter(None, column.flags_igoss)) == 0: del self.columns[key] self.globals['stamp'] = '' self.globals['header'] = '' self.check_and_replace_parameters()
def read(self, handle, metadata=None): """How to read a Bottle Bermuda Atlantic Time-Series Study file. This function reads bats_bottle.txt. Arguments: self - (special case, see NOTE) dictionary metadata - (optional) BATS cruise metadata to be used to find port dates NOTE: The result for this method is a special case. The bottle file format contains the entire BATS holdings while the internal data format splits data up by cruises. Because cruises for timeseries are split by file for cruise, the end result is a dictionary with cruise_ids as keys to DatafileCollections (cruises) containing Datafiles (casts). """ sections = _read_header_sections(self, handle) _read_variables(self, handle) parameters = _get_variables(self, handle, sections) # Add DON for note in Variables list stating DON is reported for TON prior # to BATS 121 parameters.append(['DON', None, 'umol/kg']) manual_parameters = [ ['BTLNBR', ''], ['_DATETIME', ''], ['LATITUDE', ''], ['LONGITUDE', ''], ['_ACTUAL_DEPTH', 'METERS'], ] columns = [x[0] for x in manual_parameters] units = [x[1] for x in manual_parameters] s = None for i, (var, d, u) in enumerate(parameters): if var == 'Depth': s = i + 1 continue # Only want to add parameters after Depth. The others were done manually. if s is None: continue try: var = bats_to_param[var] except KeyError: pass columns.append(var) units.append(u) template_df = DataFile() template_df.create_columns(columns, units) template_df.check_and_replace_parameters(convert=False) for sec, lines in sections.items(): if sec == 'Variables list': continue if sec != 'Comments': continue template_df.globals['_{0}'.format(sec)] = '\n'.join(lines) df = None params_auto = parameters[s:] dfi = 0 for i, l in enumerate(handle): parts = l.split() id = parts[0] (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id, nisk_id) = _parse_bats_id(id) ship = _ship_from_cruise_num(cruise_num) if not ship: ship = 'R/V Atlantic Explorer' if (df is None or df.globals['_OS_ID'] != cruise_id or df.globals['STNNBR'] != cruise_type or df.globals['CASTNO'] != cast_id): if df is not None: # Done reading one cast. Finalize it. log.info(u'finalizing cast {0} {1} {2}'.format( df.globals['_OS_ID'], df.globals['STNNBR'], df.globals['CASTNO'])) try: meta = metadata[cruise_id] port_date = meta['dates'][0] except (TypeError, KeyError): port_date = None if not port_date: port_date = min(df['_DATETIME']) df.globals['EXPOCODE'] = create_expocode( ship_code(ship, raise_on_unknown=False), port_date) log.info(df.globals['EXPOCODE']) df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH']) collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE']) # Normalize all the parameter column lengths. There may be # columns that did not get data written to them so make sure # they are just as long as the rest length = len(df) for c in df.columns.values(): c.set_length(length) try: dfc = self[df.globals['_OS_ID']] except KeyError: dfc = self[df.globals['_OS_ID']] = DataFileCollection() dfc.files.append(df) dfi = 0 # Create a new cast df = copy(template_df) df.globals['SECT_ID'] = BATS_SECT_ID df.globals['_SHIP'] = ship df.globals['_OS_ID'] = cruise_id df.globals['STNNBR'] = cruise_type df.globals['CASTNO'] = cast_id df['BTLNBR'].set(dfi, nisk_id) dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M') dt_deci = bats_time_to_dt(parts[2]) #if dt_ascii != dt_deci: # log.warn( # u'Dates differ on data row {0}: {5} {1!r}={2} ' # '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2], # dt_deci, dt_deci - dt_ascii)) df['_DATETIME'].set(dfi, dt_ascii) df['LATITUDE'].set(dfi, Decimal(parts[4])) df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5]))) df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6])) parts_auto = parts[s:] for p, v in zip(params_auto, parts_auto): param = p[0] try: param = bats_to_param[param] except KeyError: pass if cruise_num < 121 and param == 'TON': param = 'DON' if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9) or equal_with_epsilon(v, -9.99)): df[param].set_check_range(dfi, None) # TODO determine whether -10 is just bad formatting for -9.9 elif equal_with_epsilon(v, -10): #log.warn(u'Possible missing data value {0}'.format(v)) df[param].set_check_range(dfi, None) elif v == 0: log.warn(u'Data under detection limit, set flag to ' 'WOCE water sample questionable measurement') df[param].set_check_range(dfi, None, flag=3) else: df[param].set_check_range(dfi, Decimal(v)) dfi += 1 # Since this is a super long file that contains multiple cruises and # casts, as the file is processed it is split apart into a list of # DataFileCollection(s) containing DataFile objects for each casts if i % 100 == 0: log.info(u'processed {0} lines'.format(i))