Ejemplo n.º 1
0
    def test_illegal_flag_gets_none(self):
        """An illegal flag will still get a spot in the flags list as None."""
        self.buff = StringIO(TestBottleExchange.sample2)
        btlex.read(self.file, self.buff)
        self.buff.close()

        column = self.file['CTDSAL']
        self.assertEqual(len(column.values), len(column.flags_woce))
Ejemplo n.º 2
0
    def test_functional_scripts_btlex(self):
        """Test merging Bottle Exchange files."""
        from argparse import Namespace
        from libcchdo.scripts import merge_btlex_and_btlex
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv, \
                NamedTemporaryFile(delete=False) as output:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 14, 34, 2, 0, 0, 19700101, 0000,1000,4.00,-999.000,2
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,-999.000,1
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,  10.000,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            args = Namespace()
            args.origin = origin
            args.derivative = deriv
            args.parameters_to_merge = None
            args.merge_different = True
            args.output = output
            args.guess_key = True
            merge_btlex_and_btlex(args)

            with open(output.name) as fff:
                dfile = DataFile()
                btlex.read(dfile, fff)
                self.assertEqual(map(str, dfile['TDN'].values),
                                 ['6.00', '5.00'])
                self.assertEqual(dfile['TDN'].flags_woce, [])
            unlink(output.name)
        lines = [
            "Merging on keys composed of: ('EXPOCODE', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR')",
        ]
        self.assertTrue(self.ensure_lines(lines))
Ejemplo n.º 3
0
    def test_read(self):
        self.buff = StringIO(TestBottleExchange.sample)
        btlex.read(self.file, self.buff)

        # Ensure flags are ints
        for c in self.file.columns.values():
            if c.is_flagged_woce():
                for v in c.flags_woce:
                    self.assertEqual(type(v), int)
            if c.is_flagged_igoss():
                for v in c.flags_igoss:
                    self.assertEqual(type(v), int)

        self.buff.close()
Ejemplo n.º 4
0
    def test_no_stamp_uses_users(self):
        """If the writer is not given a stamp, it will use the config stamp."""
        self.buff = StringIO(TestBottleExchange.sample)
        btlex.read(self.file, self.buff)
        self.buff.close()

        self.file.globals['stamp'] = ''

        self.buff = StringIO()
        btlex.write(self.file, self.buff)

        expected_stamp = config.stamp()

        first_line = self.buff.getvalue().split('\n')[0]
        self.assertEqual(expected_stamp, first_line.split(',')[1])
        self.buff.close()
Ejemplo n.º 5
0
    def test_write(self):
        self.buff = StringIO(self.sample_basic)
        btlex.read(self.file, self.buff)
        self.buff.close()

        self.buff = StringIO()
        btlex.write(self.file, self.buff)
        output = self.buff.getvalue()
        self.buff.close()

        for aaa, bbb in zip(self.sample_basic.split('\n'), output.split('\n')):
            aas = [x.strip() for x in aaa.split(',')]
            bbs = [x.strip() for x in bbb.split(',')]
            # Check that DBARS units has been updated from blank to METERS
            if len(aas) > 11 and aas[2] == u'':
                aas[11] = u'METERS'
            self.assertEqual(aas, bbs)
Ejemplo n.º 6
0
    def test_merge_btl_non_unique_keys(self):
        """Warn if there are non-unique keys in origin.
        
        Map to the first occurrence in derivative.

        """
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36,2,1000,5,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35,2,1000,5,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,  10.000,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dfo = DataFile()
            dfd = DataFile()
            btlex.read(dfo, origin)
            btlex.read(dfd, deriv)
            parameters = ['DELC14']
            keys = ['STNNBR']
            mdf = merge_datafiles(dfo, dfd, keys, parameters)

            # Make sure warning is printed regarding extra key in deriv file.
            lines = [
                'Picked the first row of occurrence in derivative data for non'
                ' unique keys: ',
            ]
            self.assertTrue(self.ensure_lines(lines))
Ejemplo n.º 7
0
    def test_merge_btl_no_common_keys(self):
        """Warn if there are no common keys."""
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36,2,1000,5,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35,2,1000,5,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,  10.000,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dfo = DataFile()
            dfd = DataFile()
            btlex.read(dfo, origin)
            btlex.read(dfd, deriv)
            p_different, p_not_in_orig, p_not_in_derip_not_in_deriv, p_common = \
                different_columns(dfo, dfd, BOTTLE_KEY_COLS)
            parameters = p_different + p_not_in_orig
            keys = determine_bottle_keys(dfo, dfd)
            parameters = list(OrderedSet(parameters) - OrderedSet(keys))
            mdf = merge_datafiles(dfo, dfd, keys, parameters)

            # Make sure warning is printed regarding extra key in deriv file.
            lines = [
                'No keys matched',
                'No keys provided to map on.',
            ]
            self.assertTrue(self.ensure_lines(lines))
Ejemplo n.º 8
0
    def test_different_columns(self):
        """Columns between two datafiles differ under a wide variety of cases.

        Case 1: Column values are different
        Case 1 corollary: Flag values are different
        Case 2: Units are different
        Case 3: Column not in original
        Case 4: Column not in derivative

        """
        with TemporaryFile() as origin, TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,NITRAT_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,2,10.0,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,2,10.0,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,TDN_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,NMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,3,10.0,-999.000,1,-999.0,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,3,10.0,  10.000,9,-999.0,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dforigin = DataFile()
            dfderiv = DataFile()
            btlex.read(dforigin, origin)
            btlex.read(dfderiv, deriv)
            self.assertEqual(
                # NITRIT comes after because NMOL/KG is not an expected unit and
                # gets pushed to the end when sorting
                (
                    ['DELC14', 'DELC14_FLAG_W', 'NITRIT'],
                    # PH_SWS_FLAG_W has underscores inside the parameter name. All
                    # parts need to be included
                    ['PH_SWS', 'PH_SWS_FLAG_W', 'TDN', 'TDN_FLAG_W'],
                    ['NITRAT', 'NITRAT_FLAG_W'],
                    [
                        'EXPOCODE', 'SECT_ID', 'STNNBR', 'CASTNO', 'SAMPNO',
                        'BTLNBR', 'BTLNBR_FLAG_W', 'LATITUDE', 'LONGITUDE',
                        'DEPTH', '_DATETIME'
                    ]),
                different_columns(dforigin, dfderiv, (
                    'EXPOCODE',
                    'SECT_ID',
                    'STNNBR',
                    'CASTNO',
                    'SAMPNO',
                    'BTLNBR',
                )))

            lines = [
                "DELC14 differs at origin row 1:\t(None, Decimal('10.000'))",
                "DELC14_FLAG_W differs at origin row 0:\t(9, 1)",
            ]
            self.assertTrue(self.ensure_lines(lines))

            # Columns are not different if merged results are not different.
            dfo = DataFile()
            dfd = DataFile()

            dfo.create_columns(['CTDPRS', 'CTDOXY'])
            dfo.check_and_replace_parameters()
            dfd.create_columns(['CTDPRS', 'CTDOXY'])
            dfd.check_and_replace_parameters()

            dfo['CTDPRS'].values = [1, 2, 3]
            dfo['CTDOXY'].values = [10, 20, 30]
            dfd['CTDPRS'].values = [3, 2, 1]
            dfd['CTDOXY'].values = [30, 20, 10]

            self.assertEqual(([], [], [], ['CTDPRS', 'CTDOXY']),
                             different_columns(dfo, dfd, ('CTDPRS', )))
Ejemplo n.º 9
0
    def test_integration_merge_btl(self):
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,-999.000,9,11,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,9,22,9
 316N145_9, TRNS1, 574, 1, 34, 34,2,1000,5,-999.000,9,33,9
 316N145_9, TRNS1, 574, 1, 32, 32,2,1000,5,-999.000,9,44,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,  10.000,9,-999.0,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1,-999.0,9
 316N145_9, TRNS1, 574, 1, 34, 34,2,1000,5,-999.000,9,-999.0,9
 316N145_9, TRNS1, 600, 1,  1,  1,2,1000,5,-999.000,9,-999.0,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dfo = DataFile()
            dfd = DataFile()
            btlex.read(dfo, origin)
            btlex.read(dfd, deriv)
            p_different, p_not_in_orig, p_not_in_deriv, p_common = \
                different_columns(dfo, dfd, BOTTLE_KEY_COLS)
            parameters = p_different + p_not_in_orig
            keys = determine_bottle_keys(dfo, dfd)
            self.assertEqual(
                keys, ('EXPOCODE', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR'))
            parameters = list(OrderedSet(parameters) - OrderedSet(keys))

            # Parameters with underscores in them may be confused when matching
            # flags with them. E.g. PH_SWS_FLAG_W should be matched with PH_SWS
            # not PH.
            dfile = merge_datafiles(dfo, dfd, keys, parameters)

            self.assertEqual(dfile['DELC14'][0], _decimal('10.000'))
            self.assertEqual(dfile['DELC14'].flags_woce[1], 1)

            # Header should be the origin file's header
            self.assertNotIn('header 2', dfile.globals['header'])
            self.assertIn('header 1', dfile.globals['header'])
            # Header should contain the merged parameters
            self.assertIn('Merged parameters: PH_SWS, DELC14, DELC14_FLAG_W',
                          dfile.globals['header'])
            # No double new lines
            self.assertNotIn('\n\n', dfile.globals['header'])
            # new line for header is not included in the writers
            self.assertEqual('\n', dfile.globals['header'][-1])

            # Key columns should not have been converted to floats. This happens
            # for some reason if pandas combine/update have been used.
            self.assertEqual(str(dfile['STNNBR'][0]), '574')
            self.assertEqual(str(dfile['CASTNO'][0]), '1')
            self.assertEqual(str(dfile['SAMPNO'][0]), '36')
            self.assertEqual(str(dfile['BTLNBR'][0]), '36')
            self.assertEqual(str(dfile['PH_SWS'][0]), 'None')

            # Extra keys in derivative file should not be merged in.
            self.assertNotIn(600, dfile['STNNBR'])

            # Make sure warning is printed regarding extra key in deriv file.
            lines = [[
                'Key ', 'does not exist in origin from derivative rows', '600'
            ]]
            self.assertTrue(self.ensure_lines(lines))