Esempio n. 1
0
class TestCTDNetCDFOceansites(unittest.TestCase):
    def setUp(self):
        self._infile = open(
            os.path.join(
                os.path.dirname(__file__),
                'samples/nc_hyd/i08s_33RR20070204_00001_00001_hy1.nc'), 'r')
        self.datafile = DataFile()
        self._outfile = NamedTemporaryFile()

        g = self.datafile.globals
        g['DATE'] = '12341231'
        g['TIME'] = '2359'
        g['LATITUDE'] = 90
        g['LONGITUDE'] = 180
        g['DEPTH'] = -1
        g['EXPOCODE'] = 'test'
        g['STNNBR'] = '20'
        g['CASTNO'] = '5'
        g['_OS_ID'] = 'OS1'
        fuse_datetime(self.datafile)

    def tearDown(self):
        self._infile.close()

    def _setupData(self):
        self.datafile['CTDPRS'] = Column('CTDPRS')
        self.datafile['CTDPRS'].append(1, 2)
        self.datafile['CTDOXY'] = Column('CTDOXY')
        self.datafile['CTDOXY'].append(1, 2)
        self.datafile.check_and_replace_parameters()
        p = self.datafile['CTDOXY'].parameter
        p.description = 'ctd oxygen'
        p.bound_lower = 0
        p.bound_upper = 200

    def test_write(self):
        self.assertRaises(AttributeError, ctdncos.write, self.datafile,
                          self._outfile)

        self._setupData()
        ctdncos.write(self.datafile, self._outfile)

    def test_write_timeseries(self):
        self._setupData()
        ctdncos.write(self.datafile, self._outfile, timeseries='BATS')
Esempio n. 2
0
    def test_different_columns(self):
        """Columns between two datafiles differ under a wide variety of cases.

        Case 1: Column values are different
        Case 1 corollary: Flag values are different
        Case 2: Units are different
        Case 3: Column not in original
        Case 4: Column not in derivative

        """
        with TemporaryFile() as origin, TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,NITRAT_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,2,10.0,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,2,10.0,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,TDN_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,NMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,3,10.0,-999.000,1,-999.0,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,3,10.0,  10.000,9,-999.0,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dforigin = DataFile()
            dfderiv = DataFile()
            btlex.read(dforigin, origin)
            btlex.read(dfderiv, deriv)
            self.assertEqual(
                # NITRIT comes after because NMOL/KG is not an expected unit and
                # gets pushed to the end when sorting
                (
                    ['DELC14', 'DELC14_FLAG_W', 'NITRIT'],
                    # PH_SWS_FLAG_W has underscores inside the parameter name. All
                    # parts need to be included
                    ['PH_SWS', 'PH_SWS_FLAG_W', 'TDN', 'TDN_FLAG_W'],
                    ['NITRAT', 'NITRAT_FLAG_W'],
                    [
                        'EXPOCODE', 'SECT_ID', 'STNNBR', 'CASTNO', 'SAMPNO',
                        'BTLNBR', 'BTLNBR_FLAG_W', 'LATITUDE', 'LONGITUDE',
                        'DEPTH', '_DATETIME'
                    ]),
                different_columns(dforigin, dfderiv, (
                    'EXPOCODE',
                    'SECT_ID',
                    'STNNBR',
                    'CASTNO',
                    'SAMPNO',
                    'BTLNBR',
                )))

            lines = [
                "DELC14 differs at origin row 1:\t(None, Decimal('10.000'))",
                "DELC14_FLAG_W differs at origin row 0:\t(9, 1)",
            ]
            self.assertTrue(self.ensure_lines(lines))

            # Columns are not different if merged results are not different.
            dfo = DataFile()
            dfd = DataFile()

            dfo.create_columns(['CTDPRS', 'CTDOXY'])
            dfo.check_and_replace_parameters()
            dfd.create_columns(['CTDPRS', 'CTDOXY'])
            dfd.check_and_replace_parameters()

            dfo['CTDPRS'].values = [1, 2, 3]
            dfo['CTDOXY'].values = [10, 20, 30]
            dfd['CTDPRS'].values = [3, 2, 1]
            dfd['CTDOXY'].values = [30, 20, 10]

            self.assertEqual(([], [], [], ['CTDPRS', 'CTDOXY']),
                             different_columns(dfo, dfd, ('CTDPRS', )))
Esempio n. 3
0
class TestBottleNetCDF(unittest.TestCase):
    def setUp(self):
        self.infile = open(
            sample_file('nc_hyd', 'i08s_33RR20070204_00001_00001_hy1.nc'), 'r')

    def tearDown(self):
        self.infile.close()

    def assertAlmostEqualOrNones(self, x, y):
        if x is None:
            self.assert_(y is None)
        else:
            self.assertAlmostEqual(x, y)

    def test_read(self):
        self.file = DataFile()
        botnc.read(self.file, self.infile)

        nitrite_values = (0.11, None, 0.08, 0.08, 0.08, 0.08, 0.06, 0.03, 0.06,
                          0.04, 0.03, None, 0.03, None, 0.03, None)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(nitrite_values, self.file.columns['NITRIT'].values))

        freon11_values = (6.063, 6.055, 5.795, 5.619, 5.486, 5.508, 5.487,
                          5.683, 5.422, 5.190, 5.222, None, 5.289, None, 5.250,
                          5.254)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        freon113_values = (None, ) * 16
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        expocodes = ['33RR20070204'] * 16
        self.assertEqual(expocodes, self.file.columns['EXPOCODE'].values)

    def test_read_multiple(self):
        self.file = DataFile()
        botnc.read(self.file, self.infile)

        nitrite_values = (0.11, None, 0.08, 0.08, 0.08, 0.08, 0.06, 0.03, 0.06,
                          0.04, 0.03, None, 0.03, None, 0.03, None)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(nitrite_values, self.file.columns['NITRIT'].values))

        freon11_values = (6.063, 6.055, 5.795, 5.619, 5.486, 5.508, 5.487,
                          5.683, 5.422, 5.190, 5.222, None, 5.289, None, 5.250,
                          5.254)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        freon113_values = (None, ) * 16
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        expocodes = ['33RR20070204'] * 16
        self.assertEqual(expocodes, self.file.columns['EXPOCODE'].values)

        # Read second file
        infile2 = open(sample_file('nc_hyd', 'p03a_00199_00001_hy1.nc'), 'r')
        botnc.read(self.file, infile2)

        # Make sure all columns have the same length
        length = None
        for c in self.file.columns.values():
            if not length:
                length = len(c.values)
            else:
                self.assertEquals(len(c.values), length)
                if c.is_flagged_woce():
                    self.assertEquals(len(c.flags_woce), length)
                if c.is_flagged_igoss():
                    self.assertEquals(len(c.flags_igoss), length)

        # Test parameter in first file not in second is filled with None.
        freon113_values += (None, ) * 36
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        # Test parameter in both files are filled in correctly.
        freon11_values += (1.437, 1.501, 1.515, 1.525, 1.578, 1.596, 1.602,
                           1.725, 1.650, 1.703, 1.694, 1.437, 1.059, 0.702,
                           0.303, 0.130, 0.040, 0.015, -0.001, 0.002, 0.000,
                           None, None, 0.012, None, 0.006, None, None, None,
                           0.014, None, 0.000, None, 0.014, None, -0.001)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        infile2.close()

    def test_write(self):
        self.file = DataFile()

        g = self.file.globals

        self.file['EXPOCODE'] = Column('EXPOCODE')
        self.file['EXPOCODE'].append('TESTEXPO')

        self.file['SECT_ID'] = Column('SECT_ID')
        self.file['SECT_ID'].append('TEST')

        self.file['STNNBR'] = Column('CASTNO')
        self.file['STNNBR'].append(5)

        self.file['CASTNO'] = Column('STNNBR')
        self.file['CASTNO'].append(20)

        self.file['DEPTH'] = Column('DEPTH')
        self.file['DEPTH'].append(-1)

        self.file['LATITUDE'] = Column('LATITUDE')
        self.file['LATITUDE'].append(90)

        self.file['LONGITUDE'] = Column('LONGITUDE')
        self.file['LONGITUDE'].append(180)

        self.file['_DATETIME'] = Column('_DATETIME')
        self.file['_DATETIME'].append(datetime.utcnow())

        self.file['BTLNBR'] = Column('BTLNBR')
        self.file['BTLNBR'].append(5, 9)

        self.file['CTDOXY'] = Column('CTDOXY')
        self.file['CTDOXY'].append(1, 2)
        self.file.check_and_replace_parameters()
        p = self.file['CTDOXY'].parameter
        p.description = 'ctd oxygen'
        p.bound_lower = 0
        p.bound_upper = 200

        botnc.write(self.file, NamedTemporaryFile())
Esempio n. 4
0
class TestDataFile(TestCase):
    def setUp(self):
        self.file = DataFile()
        self.c = self.file.columns['EXPOCODE'] = Column('EXPOCODE')

    def tearDown(self):
        self.file = None

    def test_init(self):
        self.assertEqual(len(self.file.columns), 1)
        self.assertEqual(self.file.footer, None)
        self.assertEqual(self.file.globals, {'stamp': '', 'header': ''})

    def test_expocodes(self):
        self.c.append('A')
        self.assertEqual(['A'], self.file.expocodes())
        self.c.append('B')
        self.assertEqual(['A', 'B'], self.file.expocodes())
        self.c.append('A')
        self.assertEqual(
            ['A', 'B'],
            self.file.expocodes())  # Expocodes returns unique expocodes.

    def test_len(self):
        c = self.file.columns['EXPOCODE']
        del self.file.columns['EXPOCODE']
        self.assertEqual(len(self.file), 0)
        self.file.columns['EXPOCODE'] = c
        self.assertEqual(len(self.file), 0)
        self.c.append('A')
        self.assertEqual(len(self.file), 1)
        self.c.append('A')
        self.assertEqual(len(self.file), 2)

    def test_sorted_columns(self):
        self.file.columns['CASTNO'] = Column('CASTNO')
        self.file.columns['STNNBR'] = Column('STNNBR')
        expected = ['EXPOCODE', 'STNNBR', 'CASTNO']
        received = map(lambda c: c.parameter.mnemonic_woce(),
                       self.file.sorted_columns())
        # If lengths are equal and all expected in received, then assume equal
        self.assertEqual(len(expected), len(received))
        self.assertTrue(all([x in received for x in expected]))

    def test_get_property_for_columns(self):
        pass  # This is tested by the following tests.

    def test_column_headers(self):
        self.assertEqual(['EXPOCODE'], self.file.column_headers())
        self.file.columns['STNNBR'] = Column('STNNBR')
        expected = ['EXPOCODE', 'STNNBR']
        received = self.file.column_headers()
        # If lengths are equal and all expected in received, then assume equal
        self.assertEqual(len(expected), len(received))
        self.assertTrue(all([x in received for x in expected]))

    def test_formats(self):
        self.file.columns['CTDOXY'] = Column('CTDOXY')
        self.file.check_and_replace_parameters()
        # Order of columns may be wrong
        self.assertEqual(['%11s', '%9.4f'], self.file.formats())

    def test_to_dict(self):
        self.file.to_dict()
        pass  # TODO

    def test_str(self):
        str(self.file)

    def test_create_columns(self):
        parameters = ['CTDOXY']
        units = ['UMOL/KG']
        self.file.create_columns(parameters, units)

    def test_column_append(self):
        self.assertEqual(self.c.values, [])
        self.c.set(2, 'test')
        self.assertEqual(self.c.values, [None, None, 'test'])
        self.assertEqual(self.c.flags_woce, [])
        self.c.append('test2', 'flag2')
        self.assertEqual(self.c.values, [None, None, 'test', 'test2'])
        self.assertEqual(self.c.flags_woce, [None, None, None, 'flag2'])

    def test_calculate_depths(self):
        self.file['_ACTUAL_DEPTH'] = Column('_ACTUAL_DEPTH')
        self.assertEqual(('actual', []), self.file.calculate_depths())

        del self.file['_ACTUAL_DEPTH']
        self.file.globals['LATITUDE'] = 0
        self.file.create_columns(['CTDPRS', 'CTDSAL', 'CTDTMP'])
        self.assertEqual(('unesco1983', []), self.file.calculate_depths())

        self.file['CTDPRS'].values = [1]
        self.file['CTDSAL'].values = [1]
        self.file['CTDTMP'].values = [1]

        self.assertEqual(
            ('sverdrup', [_decimal('1.021723814950101286444879340E-8')]),
            self.file.calculate_depths())

    def test_check_and_replace_parameter_contrived(self):
        """Contrived parameters are not checked."""
        col = Column('_DATETIME')
        col.check_and_replace_parameter(self.file, convert=False)
def read(self, handle, metadata=None):
    """How to read a Bottle Bermuda Atlantic Time-Series Study file.

    This function reads bats_bottle.txt.

    Arguments:
    self - (special case, see NOTE) dictionary
    metadata - (optional) BATS cruise metadata to be used to find port dates

    NOTE: The result for this method is a special case. The bottle file format
    contains the entire BATS holdings while the internal data format splits data
    up by cruises. Because cruises for timeseries are split by file for cruise,
    the end result is a dictionary with cruise_ids as keys to
    DatafileCollections (cruises) containing Datafiles (casts). 

    """
    sections = _read_header_sections(self, handle)
    _read_variables(self, handle)
    parameters = _get_variables(self, handle, sections)

    # Add DON for note in Variables list stating DON is reported for TON prior
    # to BATS 121
    parameters.append(['DON', None, 'umol/kg'])

    manual_parameters = [
        ['BTLNBR', ''],
        ['_DATETIME', ''],
        ['LATITUDE', ''],
        ['LONGITUDE', ''],
        ['_ACTUAL_DEPTH', 'METERS'],
    ]
    columns = [x[0] for x in manual_parameters]
    units = [x[1] for x in manual_parameters]

    s = None
    for i, (var, d, u) in enumerate(parameters):
        if var == 'Depth':
            s = i + 1
            continue
        # Only want to add parameters after Depth. The others were done manually.
        if s is None:
            continue
        try:
            var = bats_to_param[var]
        except KeyError:
            pass
        columns.append(var)
        units.append(u)

    template_df = DataFile()
    template_df.create_columns(columns, units)
    template_df.check_and_replace_parameters(convert=False)

    for sec, lines in sections.items():
        if sec == 'Variables list':
            continue
        if sec != 'Comments':
            continue
        template_df.globals['_{0}'.format(sec)] = '\n'.join(lines)

    df = None
    params_auto = parameters[s:]
    dfi = 0
    for i, l in enumerate(handle):
        parts = l.split()

        id = parts[0]
        (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id,
         nisk_id) = _parse_bats_id(id)
        ship = _ship_from_cruise_num(cruise_num)
        if not ship:
            ship = 'R/V Atlantic Explorer'

        if (df is None or df.globals['_OS_ID'] != cruise_id
                or df.globals['STNNBR'] != cruise_type
                or df.globals['CASTNO'] != cast_id):
            if df is not None:
                # Done reading one cast. Finalize it.
                log.info(u'finalizing cast {0} {1} {2}'.format(
                    df.globals['_OS_ID'], df.globals['STNNBR'],
                    df.globals['CASTNO']))
                try:
                    meta = metadata[cruise_id]
                    port_date = meta['dates'][0]
                except (TypeError, KeyError):
                    port_date = None
                if not port_date:
                    port_date = min(df['_DATETIME'])
                df.globals['EXPOCODE'] = create_expocode(
                    ship_code(ship, raise_on_unknown=False), port_date)
                log.info(df.globals['EXPOCODE'])
                df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH'])
                collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE'])
                # Normalize all the parameter column lengths. There may be
                # columns that did not get data written to them so make sure
                # they are just as long as the rest
                length = len(df)
                for c in df.columns.values():
                    c.set_length(length)
                try:
                    dfc = self[df.globals['_OS_ID']]
                except KeyError:
                    dfc = self[df.globals['_OS_ID']] = DataFileCollection()
                dfc.files.append(df)
                dfi = 0

            # Create a new cast
            df = copy(template_df)
            df.globals['SECT_ID'] = BATS_SECT_ID
            df.globals['_SHIP'] = ship
            df.globals['_OS_ID'] = cruise_id
            df.globals['STNNBR'] = cruise_type
            df.globals['CASTNO'] = cast_id

        df['BTLNBR'].set(dfi, nisk_id)

        dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M')
        dt_deci = bats_time_to_dt(parts[2])
        #if dt_ascii != dt_deci:
        #    log.warn(
        #        u'Dates differ on data row {0}: {5} {1!r}={2} '
        #        '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2],
        #                           dt_deci, dt_deci - dt_ascii))
        df['_DATETIME'].set(dfi, dt_ascii)

        df['LATITUDE'].set(dfi, Decimal(parts[4]))
        df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5])))
        df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6]))

        parts_auto = parts[s:]
        for p, v in zip(params_auto, parts_auto):
            param = p[0]
            try:
                param = bats_to_param[param]
            except KeyError:
                pass
            if cruise_num < 121 and param == 'TON':
                param = 'DON'

            if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9)
                    or equal_with_epsilon(v, -9.99)):
                df[param].set_check_range(dfi, None)
            # TODO determine whether -10 is just bad formatting for -9.9
            elif equal_with_epsilon(v, -10):
                #log.warn(u'Possible missing data value {0}'.format(v))
                df[param].set_check_range(dfi, None)
            elif v == 0:
                log.warn(u'Data under detection limit, set flag to '
                         'WOCE water sample questionable measurement')
                df[param].set_check_range(dfi, None, flag=3)
            else:
                df[param].set_check_range(dfi, Decimal(v))

        dfi += 1
        # Since this is a super long file that contains multiple cruises and
        # casts, as the file is processed it is split apart into a list of
        # DataFileCollection(s) containing DataFile objects for each casts
        if i % 100 == 0:
            log.info(u'processed {0} lines'.format(i))