Ejemplo n.º 1
0
def woce_dec_lng_to_dec_lng(lngtoks):
    """Convert a longitude in decimal + hemisphere to decimal."""
    precision = 3 + len(lngtoks)
    with IncreasedPrecision(precision):
        try:
            lng = Decimal(lngtoks[0])
        except InvalidOperation:
            return None
        lng *= hemisphere_to_coeff(lngtoks[1])
        return lng.quantize(Decimal(10)**-precision)
Ejemplo n.º 2
0
def woce_lng_to_dec_lng(lngtoks):
    '''Convert a longitude in WOCE format to decimal.'''
    precision = 4 + len(lngtoks)
    with IncreasedPrecision(precision):
        try:
            lng = int(lngtoks[0]) + Decimal(lngtoks[1]) / Decimal('60.0')
        except InvalidOperation:
            return None
        lng *= hemisphere_to_coeff(lngtoks[2])
        return lng.quantize(Decimal(10)**-precision)
Ejemplo n.º 3
0
 def test_read_btlnbr_as_string(self):
     with closing(StringIO()) as fff:
         fff.write('SIO1,33.24\n')
         fff.write('01,32.10\n')
         fff.flush()
         fff.seek(0)
         dfile = DataFile()
         dfile['BTLNBR'] = Column('BTLNBR')
         dfile['CTDSAL'] = Column('CTDSAL')
         exchange.read_data(dfile, fff, ['BTLNBR', 'CTDSAL'])
         self.assertEqual(dfile['BTLNBR'].values, ['SIO1', '01'])
         self.assertEqual(
             dfile['CTDSAL'].values,
             [Decimal('33.24'), Decimal('32.10')])
def combine(bats_file, event_sum_file):
    """Combines the given BATS .dpr file with the Summary event.log file so
       that the DataFile contains most of the information from both.
    """
    # It is pretty much given that the data is CTD.

    lat, lng = bats_file.globals['LATITUDE'], bats_file.globals['LONGITUDE']

    # Find the event log record
    sum_file_i = None
    for i in range(len(event_sum_file)):
        sumlat, sumlng = event_sum_file['LATITUDE'][i], event_sum_file[
            'LONGITUDE'][i]
        epsilon = Decimal('1e-3')
        close_enough = equal_with_epsilon(lat, sumlat, epsilon) and \
                       equal_with_epsilon(lng, sumlng, epsilon)
        if close_enough:
            sum_file_i = i
            break

    if sum_file_i is None:
        log.error('Event for BATS data at %f %f not found' % (lat, lng))
        return
    headers = event_sum_file.column_headers()
    row = event_sum_file.row(i)

    info = dict(zip(headers, row))
    bats_file.globals['DEPTH'] = info['DEPTH']
Ejemplo n.º 5
0
def read(self, handle, retain_order=False, header_only=False):
    """How to read a CTD Exchange file.

    header_only - only read the CTD headers, not the data

    """
    read_identifier_line(self, handle, 'CTD')
    l = read_comments(self, handle)

    # Read NUMBER_HEADERS
    num_headers = re_compile('NUMBER_HEADERS\s*=\s*(\d+)')
    m = num_headers.match(l)
    if m:
        # NUMBER_HEADERS counts itself as a header
        num_headers = int(m.group(1)) - 1
    else:
        raise ValueError(
            u'Expected NUMBER_HEADERS as the second non-comment line.')
    header = re_compile('(\w+)\s*=\s*(-?[\w\.]*)')
    for i in range(0, num_headers):
        m = header.match(handle.readline())
        if m:
            if m.group(1) in REQUIRED_HEADERS and m.group(1) in [
                    'LATITUDE', 'LONGITUDE'
            ]:
                self.globals[m.group(1)] = Decimal(m.group(2))
            else:
                self.globals[m.group(1)] = m.group(2)
        else:
            raise ValueError(('Expected %d continuous headers '
                              'but only saw %d') % (num_headers, i))
    woce.fuse_datetime(self)

    if header_only:
        return

    # Read parameters and units
    columns = handle.readline().strip().split(',')
    units = handle.readline().strip().split(',')

    # Check columns and units to match length
    if len(columns) is not len(units):
        raise ValueError(("Expected as many columns as units in file. "
                          "Found %d columns and %d units.") % \
                         (len(columns), len(units)))

    # Check all parameters are non-trivial
    if not all(columns):
        log.warn(
            ("Stripped blank parameter from MALFORMED EXCHANGE FILE\n"
             "This may be caused by an extra comma at the end of a line."))
        columns = filter(None, columns)

    self.create_columns(columns, units, retain_order)

    read_data(self, handle, columns)

    self.check_and_replace_parameters()
Ejemplo n.º 6
0
    def test_density(self):
        """ Test values from UNESCO 44 pg -19- """
        self.assertAlmostEqual(X.density( 0, 5,      0), Decimal('999.96675'), 5)
        self.assertAlmostEqual(X.density( 0, 5,  10000), Decimal('1044.12802'), 5)
        self.assertAlmostEqual(X.density( 0, 25,     0), Decimal('997.04796'), 5)
        self.assertAlmostEqual(X.density( 0, 25, 10000), Decimal('1037.90204'), 5)

        self.assertAlmostEqual(X.density(35, 5,      0), Decimal('1027.67547'), 5)
        self.assertAlmostEqual(X.density(35, 5,  10000), Decimal('1069.48914'), 5)
        self.assertAlmostEqual(X.density(35, 25,     0), Decimal('1023.34306'), 5)
        self.assertAlmostEqual(X.density(35, 25, 10000), Decimal('1062.53817'), 5)
Ejemplo n.º 7
0
def check_variable_ranges(nc_file):
    for name, variable in nc_file.variables.items():
        try:
            min = Decimal(str(variable.data_min))
            max = Decimal(str(variable.data_max))
        except AttributeError:
            try:
                min = Decimal(str(variable.valid_min))
                max = Decimal(str(variable.valid_max))
            except AttributeError:
                continue
        for y in variable[:]:
            if fns.isnan(y):
                continue
            x = Decimal(str(y))
            if x < min:
                log.warn('%s too small for %s range (%s, %s)' % \
                         (str(x), name, str(min), str(max)))
            if x > max:
                log.warn('%s too large for %s range (%s, %s)' % \
                         (str(x), name, str(min), str(max)))
Ejemplo n.º 8
0
 def test_read_summary_hot(self):
     self.file = SummaryFile()
     self.buff = StringIO(self.sample_hot)
     sumhot.read(self.file, self.buff)
 
     cs = self.file.columns
     self.assertEqual(['33KI134_1'] * 3, cs['EXPOCODE'].values)
     self.assertEqual(['PRS2'] * 3, cs['SECT_ID'].values)
     self.assertEqual(['1', '2', '2'], cs['STNNBR'].values)
     self.assertEqual([1, 1, 15], cs['CASTNO'].values)
     self.assertEqual(['ROS'] * 3, cs['_CAST_TYPE'].values)
     self.assertEqual([
         datetime(2002, 1, 14, 23, 34), datetime(2002, 1, 15, 12, 59),
         datetime(2002, 1, 17, 8, 56)], cs['_DATETIME'].values)
     self.assertEqual(['BE'] * 3, cs['_CODE'].values)
 
     self.assertTrue(fp_eq(Decimal('21.344333333'), cs['LATITUDE'].values[0]))
     self.assertTrue(fp_eq(Decimal('22.75'), cs['LATITUDE'].values[1]))
     self.assertTrue(fp_eq(Decimal('22.74950'), cs['LATITUDE'].values[2]))
 
     self.assertTrue(fp_eq(Decimal('-158.271'), cs['LONGITUDE'].values[0]))
     self.assertTrue(fp_eq(Decimal('-158.001'), cs['LONGITUDE'].values[1]))
     self.assertTrue(fp_eq(Decimal('-158.000166'), cs['LONGITUDE'].values[2]))
 
     self.assertEqual(['GPS'] * 3, cs['_NAV'].values)
     self.assertEqual([1503, 4720, 4720], cs['DEPTH'].values)
     self.assertEqual([16, 24, 12], cs['_NUM_BOTTLES'].values)
     self.assertEqual([1020, 4806, 4806], cs['_MAX_PRESSURE'].values)
     self.assertEqual(['1,2', '1,2,3,4,5,6', '1,2'], cs['_PARAMETERS'].values)
     self.assertEqual(['Dual T, C sensors'] * 3, cs['_COMMENTS'].values)
Ejemplo n.º 9
0
 def test_read_summary_woce(self):
     self.file = SummaryFile()
     self.buff = StringIO(self.sample_woce)
     sumwoce.read(self.file, self.buff)
 
     cs = self.file.columns
     self.assertEqual(['33RR20070204'] * 3, cs['EXPOCODE'].values)
     self.assertEqual(['I8S'] * 3, cs['SECT_ID'].values)
     self.assertEqual(['1', '1', '2'], cs['STNNBR'].values)
     self.assertEqual([1, 1, 1], cs['CASTNO'].values)
     self.assertEqual(['ROS'] * 3, cs['_CAST_TYPE'].values)
     self.assertEqual([
         datetime(2007, 2, 15, 14, 24), datetime(2007, 2, 15, 14, 42),
         datetime(2007, 2, 15, 17, 5)], cs['_DATETIME'].values)
     self.assertEqual(['BE', 'BO', 'BE'], cs['_CODE'].values)
 
     self.assertTrue(fp_eq(Decimal('-65.81083'), cs['LATITUDE'].values[0]))
     self.assertTrue(fp_eq(Decimal('-65.81083'), cs['LATITUDE'].values[1]))
     self.assertTrue(fp_eq(Decimal('-65.76816'), cs['LATITUDE'].values[2]))
 
     self.assertTrue(fp_eq(Decimal('84.549999'), cs['LONGITUDE'].values[0]))
     self.assertTrue(fp_eq(Decimal('84.55016'), cs['LONGITUDE'].values[1]))
     self.assertTrue(fp_eq(Decimal('84.53483'), cs['LONGITUDE'].values[2]))
 
     self.assertEqual(['GPS'] * 3, cs['_NAV'].values)
     self.assertEqual([450, 450, 1257], cs['DEPTH'].values)
     self.assertEqual([None, 439, None], cs['_MAX_PRESSURE'].values)
     self.assertEqual([None, 16, None], cs['_NUM_BOTTLES'].values)
     self.assertEqual(['test', '1-8,23-24,27,43,104-112', None], cs['_PARAMETERS'].values)
     self.assertEqual([None] * 3, cs['_COMMENTS'].values)
def deg_min_to_decimal_deg(deg, min):
    return Decimal(deg) + Decimal(min) / Decimal(60)
Ejemplo n.º 11
0
def read(self, handle):
    """How to read an NODC SD2 file."""

    self.create_columns((
        'EXPOCODE',
        'STNNBR',
        'CASTNO',
        '_DATETIME',
        'LATITUDE',
        'LONGITUDE',
    ))
    self.create_columns(('BOTTOM', 'DEPTH', 'CTDTMP', 'SALNTY', 'OXYGEN',
                         'PHSPHT', 'SILCAT', 'NITRIT', 'NITRAT', 'PH'), (
                             'METERS',
                             'METERS',
                             'DEG C',
                             'PSU',
                             'ML/L',
                             'UMOL/L',
                             'UMOL/L',
                             'UMOL/L',
                             'UMOL/L',
                             '',
                         ))

    current_station = None
    current_cast = 1

    while handle:
        line = handle.readline()
        if not line:
            break

        if line[79] == '2':
            # Nothing in MR 2 that matters.
            continue

        #print 'line:', line,
        #print 'rule:', '|-`-*+-`-*' * 8

        if line[79] == '1':
            station = {}
            raw_line = {
                'continuation_indicator': int_or_none(line[0]),
                'nodc_ref_num_country': int_or_none(line[2:4]),
                'file_code': int_or_none(line[4]),
                'nodc_ref_num_cruise_number': int_or_none(line[5:9]),
                'nodc_consecutive_station_number': int_or_none(line[9:13]),
                'data_type': int_or_none(line[13:15]),
                'ten-degree_square': int_or_none(line[17:21]),
                'one-degree_square': int_or_none(line[21:23]),
                'two-degree_square': int_or_none(line[23:25]),
                'five-degree_square': int_or_none(line[25]),
                'hemisphere_of_latitude': line[26],
                'degrees_latitude': int_or_none(line[27:29]),
                'minutes_latitude': int_or_none(line[29:31]),
                'minutes_latitude_tenths': int_or_none(line[31]),
                'hemisphere_of_longitude': line[32],
                'degrees_longitude': int_or_none(line[33:36]),
                'minutes_longitude': int_or_none(line[36:38]),
                'minutes_longitude_tenths': int_or_none(line[38]),
                'quarter_of_one_degree_square': int_or_none(line[39]),
                'year_gmt': int_or_none(line[40:42]),
                'month_of_year_gmt': int_or_none(line[42:44]),
                'day_of_month_gmt': int_or_none(line[44:46]),
                'station_time_gmt_hours_to_tenths': line[46:49],

                # This is marked as platform on
                # http://www.nodc.noaa.gov/General/NODC-Archive/sd2.html
                'data_origin_country': line[49:51],
                # These two are marked blank
                'data_origin_institution': line[51:53],
                'data_origin_platform': line[53:55],
                'bottom_depth': line[55:60],
                'effective_depth': int_or_none(line[60:64]),
                'cast_duration_hours_to_tenths': line[64:67],
                'cast_direction': line[67],
                'data_use_code': line[69],
                'minimum_depth': int_or_none(line[70:74]),
                'maximum_depth': int_or_none(line[74:78]),
                'always_2_next_record_indicator': line[78],
                'always_1_record_indicator': line[79],
            }

            assert raw_line['file_code'] == 5, \
                "Master Record 1 is corrupt. File Code should always be 5."
            assert raw_line['always_2_next_record_indicator'] == '2' or \
                   raw_line['always_2_next_record_indicator'].strip() == '', \
                "Master Record 1 is corrupt."
            assert raw_line['always_1_record_indicator'] == '1', \
                "Not master record 1. Algorithm is wrong."

            if raw_line['continuation_indicator']:
                continuation_indicator = raw_line['continuation_indicator']
                # 0 - one station record
                # 1 - first station record
                # 2-8 - intermediate records
                # 9 - last station record
                # TODO handle multiple records per stations

            station['EXPOCODE'] = raw_line['nodc_ref_num_cruise_number']
            station['STNNBR'] = raw_line['nodc_consecutive_station_number']
            station['_DATA_TYPE'] = _DATA_TYPE_CODES.get(
                raw_line['data_type'], 'UNKNOWN')

            if not raw_line['hemisphere_of_latitude'] in ('N', 'S'):
                raise ValueError(
                    ("Master Record 1 is corrupt. Latitude hemisphere must be "
                     "N or S."))

            latitude = str(
                (1 if raw_line['hemisphere_of_latitude'] == 'N' else -1) * \
                (raw_line['degrees_latitude'] +
                 raw_line['minutes_latitude'] / 60.0 +
                 raw_line['minutes_latitude_tenths'] / 600.0))
            latitude = latitude[:latitude.find('.') + \
                                 _MAX_GRATICULE_PRECISION + 1]
            station['LATITUDE'] = Decimal(latitude)

            if not raw_line['hemisphere_of_longitude'] in ('E', 'W'):
                raise ValueError((
                    "Master Record 1 is corrupt. Longitude hemisphere must be "
                    "E or W."))

            longitude = str(
                (1 if raw_line['hemisphere_of_longitude'] == 'E' else -1) * \
                (raw_line['degrees_longitude'] +
                 raw_line['minutes_longitude'] / 60.0 +
                 raw_line['minutes_longitude_tenths'] / 600.0))
            longitude = longitude[:longitude.find('.') + \
                                   _MAX_GRATICULE_PRECISION + 1]
            station['LONGITUDE'] = Decimal(longitude)

            hours = int(raw_line['station_time_gmt_hours_to_tenths'][:2])
            minutes = int(raw_line['station_time_gmt_hours_to_tenths'][2]) * 6

            station['_DATETIME'] = datetime(*(1900 + raw_line['year_gmt'],
                                              raw_line['month_of_year_gmt'],
                                              raw_line['day_of_month_gmt']),
                                            hour=hours,
                                            minute=minutes)

            try:
                station['BOTTOM'] = int(raw_line['bottom_depth'])
            except ValueError:
                station['BOTTOM'] = None

            current_station = station
        elif line[79] == '2':
            raw_line = {
                'depth_difference': line[0:4],
                'sample_interval': line[4:6],
                'salinity_observed': line[6],
                'oxygen_observed': line[7],
                'phosphate_observed': line[8],
                'total_phosphorous_observed': line[9],
                'silicate_observed': line[10],
                'nitrite_observed': line[11],
                'nitrate_observed': line[12],
                'ph_observed': line[13],
                'originators_cruise_identifier': line[13:17],
                'originators_station_identifier': int_or_none(line[17:26]),
                'water_color': int_or_none(line[26:28]),
                'water_transparency': int_or_none(line[28:30]),
                'wave_direction': int_or_none(line[30:32]),
                'wave_height': line[32],
                'sea_state': int_or_none(line[33]),
                'wind_force': line[34:36],
                'file_update_code': int_or_none(line[36]),
                'wave_period': line[37],
                'wind_direction': int_or_none(line[38:40]),
                'wind_speed': int_or_none(line[40:42]),
                'barometric_pressure': line[42:47],
                'dry_bulb_temperature': line[47:51],
                'dry_bulb_temperature_precision': int_or_none(line[51]),
                'wet_bulb_temperature': line[52:56],
                'wet_bulb_temperature_precision': int_or_none(line[56]),
                'weather': line[57:59],
                'cloud_type': int_or_none(line[59]),
                'cloud_amount': int_or_none(line[60]),
                'number_of_observed_depths': int_or_none(line[61:64]),
                'number_of_standard_depth_levels': int_or_none(line[64:66]),
                'number_of_detail_depths': int_or_none(line[66:69]),
                'blank': line[69:78],
                'next_record_indicator': line[78],
                'always_2_record_indicator': line[79],
            }
            # Effectively nothing here to care about.
        elif line[79] == '3':
            raw_line = {
                'depth': int_or_none(line[0:5]),
                'depth_quality_indicator': int_or_none(line[5]),
                'thermometric_depth_flag': line[6],
                'temperature': line[7:12],
                'temperature_precision': int_or_none(line[12]),
                'temperature_quality_indicator': int_or_none(line[13]),
                'salinity': line[14:19],
                'salinity_precision': int_or_none(line[19]),
                'salinity_quality_indicator': int_or_none(line[20]),
                'sigma-t': int_or_none(line[21:25]),
                'sigma-t_quality_indicator': int_or_none(line[25]),
                'sound_speed': int_or_none(line[26:31]),
                'sound_speed_precision': int_or_none(line[31]),
                'oxygen': line[32:36],
                'oxygen_precision': int_or_none(line[36]),
                'oxygen_quality_indicator': int_or_none(line[37]),
                'data_range_check_flags_phosphate': int_or_none(line[38]),
                'data_range_check_flags_total': int_or_none(line[39]),
                'data_range_check_flags_silicate': int_or_none(line[40]),
                'data_range_check_flags_nitrite': int_or_none(line[41]),
                'data_range_check_flags_nitrate': int_or_none(line[42]),
                'data_range_check_flags_ph': int_or_none(line[43]),
                'cast_start_time_or_messenger_release_time': line[44:47],
                'cast_number': int_or_none(line[47]),
                'inorganic_phosphate': line[48:52],
                'inorganic_phosphate_precision': int_or_none(line[52]),
                'total_phosphorous': line[53:57],
                'total_phosphorous_precision)': int_or_none(line[57]),
                'silicate': line[58:62],
                'silicate_precision': int_or_none(line[62]),
                'nitrite': line[63:66],
                'nitrite_precision': int_or_none(line[66]),
                'nitrate': line[67:70],
                'nitrate_precision': int_or_none(line[70]),
                'ph': line[71:74],
                'ph_precision': int_or_none(line[74]),
                'blank': line[75:77],
                'density_inversion_flag': int_or_none(line[77]),
                'next_record_type': int_or_none(line[78]),
                'record_type': int_or_none(line[79]),
            }

            sample = {}

            assert raw_line['record_type'] == 3, \
                ("Only observations are handled by this reader. "
                 "Interpolations are not handled.")

            sample['DEPTH'] = raw_line['depth']
            sample['DEPTH_QC'] = raw_line['depth_quality_indicator']
            p = raw_line['temperature_precision']
            if p and p != 9:
                x = raw_line['temperature'].strip()
                sample['TEMPERATURE'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
                sample['TEMPERATURE_QC'] = \
                    raw_line['temperature_quality_indicator']
            p = raw_line['salinity_precision']
            if p and p != 9:
                x = raw_line['salinity'].strip()
                sample['SALINITY'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
                sample['SALINITY_QC'] = raw_line['salinity_quality_indicator']
            p = raw_line['oxygen_precision']
            if p and p != 9:
                x = raw_line['oxygen'].strip()
                sample['OXYGEN'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
                sample['OXYGEN_QC'] = raw_line['oxygen_quality_indicator']
            try:
                x = raw_line['cast_start_time_or_messenger_release_time']
                sample['TIME'] = x[:2] + str(int(x[2]) / 600.0)
            except ValueError:
                pass
            sample['CASTNO'] = raw_line['cast_number']
            # TODO ensure this is inorganic
            p = raw_line['inorganic_phosphate_precision']
            if p and p != 9:
                x = raw_line['inorganic_phosphate'].strip()
                sample['PHSPHT'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
            p = raw_line['silicate_precision']
            if p and p != 9:
                x = raw_line['silicate'].strip()
                sample['SILCAT'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
            p = raw_line['nitrite_precision']
            if p and p != 9:
                x = raw_line['nitrite'].strip()
                sample['NITRIT'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
            p = raw_line['nitrate_precision']
            if p and p != 9:
                x = raw_line['nitrate'].strip()
                sample['NITRAT'] = Decimal('%s.%s' % (x[:-p], x[-p:]))
            # TODO which PH is this?
            p = raw_line['ph_precision']
            if p and p != 9:
                x = raw_line['ph'].strip()
                sample['PH'] = Decimal('%s.%s' % (x[:-p], x[-p:]))

            if not current_station:
                raise ValueError(("Malformed SD2 file: Data record found "
                                  "before master record"))
            if current_station['_DATA_TYPE'] == 'NANSEN CAST':
                merged_row = {
                    'EXPOCODE':
                    current_station['EXPOCODE'],
                    'STNNBR':
                    current_station['STNNBR'],
                    'LATITUDE':
                    current_station['LATITUDE'],
                    'LONGITUDE':
                    current_station['LONGITUDE'],
                    '_DATETIME':
                    current_station['_DATETIME'],
                    'BOTTOM':
                    current_station['BOTTOM'],
                    'CASTNO':
                    sample['CASTNO'],
                    'DEPTH':
                    sample['DEPTH'],
                    'DEPTH_FLAG_W':
                    _NODC_0608_TO_WOCE_FLAGS[sample['DEPTH_QC']],
                    # TODO figure out what parameter this should be
                    'CTDTMP':
                    sample['TEMPERATURE'],
                    'CTDTMP_FLAG_W':
                    _NODC_0608_TO_WOCE_FLAGS[sample['TEMPERATURE_QC']],
                    'SALNTY':
                    sample['SALINITY'],
                    'SALNTY_FLAG_W':
                    _NODC_0608_TO_WOCE_FLAGS[sample['SALINITY_QC']],
                    'OXYGEN':
                    sample['OXYGEN'],
                    'OXYGEN_FLAG_W':
                    _NODC_0608_TO_WOCE_FLAGS[sample['OXYGEN_QC']],
                    'PHSPHT':
                    sample.get('PHSPHT', None),
                    'SILCAT':
                    sample.get('SILCAT', None),
                    'NITRIT':
                    sample.get('NITRIT', None),
                    'NITRAT':
                    sample.get('NITRAT', None),
                    'PH':
                    sample.get('PH', None),
                }
                try:
                    merged_row['_DATETIME'].hour = sample['TIME'][:2]
                    merged_row['_DATETIME'].minute = sample['TIME'][2:]
                except KeyError:
                    pass
                i = len(self)
                self['EXPOCODE'].set(i, merged_row['EXPOCODE'])
                self['STNNBR'].set(i, merged_row['STNNBR'])
                self['CASTNO'].set(i, merged_row['CASTNO'])
                self['LATITUDE'].set(i, merged_row['LATITUDE'])
                self['LONGITUDE'].set(i, merged_row['LONGITUDE'])
                self['_DATETIME'].set(i, merged_row['_DATETIME'])
                self['BOTTOM'].set(i, merged_row['BOTTOM'])
                self['DEPTH'].set(i, merged_row['DEPTH'],
                                  merged_row['DEPTH_FLAG_W'])
                self['CTDTMP'].set(i, merged_row['CTDTMP'],
                                   merged_row['CTDTMP_FLAG_W'])
                self['SALNTY'].set(i, merged_row['SALNTY'],
                                   merged_row['SALNTY_FLAG_W'])
                self['OXYGEN'].set(i, merged_row['OXYGEN'],
                                   merged_row['OXYGEN_FLAG_W'])
                self['PHSPHT'].set(i, merged_row['PHSPHT'])
                self['SILCAT'].set(i, merged_row['SILCAT'])
                self['NITRIT'].set(i, merged_row['NITRIT'])
                self['NITRAT'].set(i, merged_row['NITRAT'])
                self['PH'].set(i, merged_row['PH'])
            else:
                # CTD
                raise NotImplementedError("Can't read SD2 CTDs yet")

    for key, column in self.columns.items():
        if len(filter(None, column.values)) == 0 and \
           len(filter(None, column.flags_woce)) == 0 and \
           len(filter(None, column.flags_igoss)) == 0:
            del self.columns[key]

    self.globals['stamp'] = ''
    self.globals['header'] = ''

    self.check_and_replace_parameters()
def read(self, handle, metadata=None):
    """How to read a Bottle Bermuda Atlantic Time-Series Study file.

    This function reads bats_bottle.txt.

    Arguments:
    self - (special case, see NOTE) dictionary
    metadata - (optional) BATS cruise metadata to be used to find port dates

    NOTE: The result for this method is a special case. The bottle file format
    contains the entire BATS holdings while the internal data format splits data
    up by cruises. Because cruises for timeseries are split by file for cruise,
    the end result is a dictionary with cruise_ids as keys to
    DatafileCollections (cruises) containing Datafiles (casts). 

    """
    sections = _read_header_sections(self, handle)
    _read_variables(self, handle)
    parameters = _get_variables(self, handle, sections)

    # Add DON for note in Variables list stating DON is reported for TON prior
    # to BATS 121
    parameters.append(['DON', None, 'umol/kg'])

    manual_parameters = [
        ['BTLNBR', ''],
        ['_DATETIME', ''],
        ['LATITUDE', ''],
        ['LONGITUDE', ''],
        ['_ACTUAL_DEPTH', 'METERS'],
    ]
    columns = [x[0] for x in manual_parameters]
    units = [x[1] for x in manual_parameters]

    s = None
    for i, (var, d, u) in enumerate(parameters):
        if var == 'Depth':
            s = i + 1
            continue
        # Only want to add parameters after Depth. The others were done manually.
        if s is None:
            continue
        try:
            var = bats_to_param[var]
        except KeyError:
            pass
        columns.append(var)
        units.append(u)

    template_df = DataFile()
    template_df.create_columns(columns, units)
    template_df.check_and_replace_parameters(convert=False)

    for sec, lines in sections.items():
        if sec == 'Variables list':
            continue
        if sec != 'Comments':
            continue
        template_df.globals['_{0}'.format(sec)] = '\n'.join(lines)

    df = None
    params_auto = parameters[s:]
    dfi = 0
    for i, l in enumerate(handle):
        parts = l.split()

        id = parts[0]
        (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id,
         nisk_id) = _parse_bats_id(id)
        ship = _ship_from_cruise_num(cruise_num)
        if not ship:
            ship = 'R/V Atlantic Explorer'

        if (df is None or df.globals['_OS_ID'] != cruise_id
                or df.globals['STNNBR'] != cruise_type
                or df.globals['CASTNO'] != cast_id):
            if df is not None:
                # Done reading one cast. Finalize it.
                log.info(u'finalizing cast {0} {1} {2}'.format(
                    df.globals['_OS_ID'], df.globals['STNNBR'],
                    df.globals['CASTNO']))
                try:
                    meta = metadata[cruise_id]
                    port_date = meta['dates'][0]
                except (TypeError, KeyError):
                    port_date = None
                if not port_date:
                    port_date = min(df['_DATETIME'])
                df.globals['EXPOCODE'] = create_expocode(
                    ship_code(ship, raise_on_unknown=False), port_date)
                log.info(df.globals['EXPOCODE'])
                df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH'])
                collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE'])
                # Normalize all the parameter column lengths. There may be
                # columns that did not get data written to them so make sure
                # they are just as long as the rest
                length = len(df)
                for c in df.columns.values():
                    c.set_length(length)
                try:
                    dfc = self[df.globals['_OS_ID']]
                except KeyError:
                    dfc = self[df.globals['_OS_ID']] = DataFileCollection()
                dfc.files.append(df)
                dfi = 0

            # Create a new cast
            df = copy(template_df)
            df.globals['SECT_ID'] = BATS_SECT_ID
            df.globals['_SHIP'] = ship
            df.globals['_OS_ID'] = cruise_id
            df.globals['STNNBR'] = cruise_type
            df.globals['CASTNO'] = cast_id

        df['BTLNBR'].set(dfi, nisk_id)

        dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M')
        dt_deci = bats_time_to_dt(parts[2])
        #if dt_ascii != dt_deci:
        #    log.warn(
        #        u'Dates differ on data row {0}: {5} {1!r}={2} '
        #        '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2],
        #                           dt_deci, dt_deci - dt_ascii))
        df['_DATETIME'].set(dfi, dt_ascii)

        df['LATITUDE'].set(dfi, Decimal(parts[4]))
        df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5])))
        df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6]))

        parts_auto = parts[s:]
        for p, v in zip(params_auto, parts_auto):
            param = p[0]
            try:
                param = bats_to_param[param]
            except KeyError:
                pass
            if cruise_num < 121 and param == 'TON':
                param = 'DON'

            if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9)
                    or equal_with_epsilon(v, -9.99)):
                df[param].set_check_range(dfi, None)
            # TODO determine whether -10 is just bad formatting for -9.9
            elif equal_with_epsilon(v, -10):
                #log.warn(u'Possible missing data value {0}'.format(v))
                df[param].set_check_range(dfi, None)
            elif v == 0:
                log.warn(u'Data under detection limit, set flag to '
                         'WOCE water sample questionable measurement')
                df[param].set_check_range(dfi, None, flag=3)
            else:
                df[param].set_check_range(dfi, Decimal(v))

        dfi += 1
        # Since this is a super long file that contains multiple cruises and
        # casts, as the file is processed it is split apart into a list of
        # DataFileCollection(s) containing DataFile objects for each casts
        if i % 100 == 0:
            log.info(u'processed {0} lines'.format(i))