Esempio n. 1
0
    def test_map_collections_keep_origin_files(self):
        """When merging collections, make sure to keep origin's files.

        If files from origin were not mapped to deriv's files, keep them in the
        final product.

        Warn if deriv file not in origin collection.

        """
        odfc = DataFileCollection()
        ddfc = DataFileCollection()

        df0 = DataFile()
        df0.globals['EXPOCODE'] = 'a'
        df0.globals['STNNBR'] = 1
        df0.globals['CASTNO'] = 1
        odfc.append(df0)

        df1 = DataFile()
        df1.globals['EXPOCODE'] = 'b'
        df1.globals['STNNBR'] = 1
        df1.globals['CASTNO'] = 1
        ddfc.append(df1)

        dfile_map = map_collections(odfc, ddfc)

        self.assertEqual(dfile_map, [(df0, df0, ('a', 1, 1))])
        lines = [
            "Origin file key ('a', 1, 1) is not present in derivative collection.",
            "Derivative file key ('b', 1, 1) is not present in origin collection.",
        ]
        self.assertTrue(self.ensure_lines(lines))
Esempio n. 2
0
    def test_merge_datafiles_does_not_create_extra_columns(self):
        """Merge datafiles but don't create extra columns.

        When merging data files, create columns only if they exist in derivative
        and were requested to be merged in.

        Thanks to sescher for finding this.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'CTDOXY'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['CTDOXY'].append(40, 2)
        df0['CTDOXY'].append(41, 3)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'CTDOXY', 'CTDSAL'])
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(3, 2)
        df1['CTDOXY'].append(50, 2)
        df1['CTDOXY'].append(51, 3)
        df1['CTDSAL'].append(20, 2)
        df1['CTDSAL'].append(21, 2)

        mdf = merge_datafiles(df0, df1, ['CTDPRS'], ['CTDOXY'])

        with self.assertRaises(KeyError):
            mdf['CTDSAL']
Esempio n. 3
0
    def test_merge_collections(self):
        """When merging collections, map files, then merge mapped files.

        """
        odfc = DataFileCollection()
        ddfc = DataFileCollection()

        df0 = DataFile()
        df0.globals['EXPOCODE'] = 'a'
        df0.globals['STNNBR'] = 1
        df0.globals['CASTNO'] = 1
        df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRIT'].append(10, 2)
        df0['NITRIT'].append(11, 2)
        odfc.append(df0)

        df1 = DataFile()
        df1.globals['EXPOCODE'] = 'a'
        df1.globals['STNNBR'] = 1
        df1.globals['CASTNO'] = 1
        df1.create_columns(['CTDPRS', 'NITRAT', 'NITRIT'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(3, 2)
        df1['NITRAT'].append(20, 2)
        df1['NITRAT'].append(21, 2)
        df1['NITRIT'].append(10, 2)
        df1['NITRIT'].append(11, 2)
        ddfc.append(df1)

        def merger(origin, deriv):
            return merge_datafiles(origin, deriv, ['CTDPRS'],
                                   ['NITRAT', 'NITRIT'])

        merged_dfc = merge_collections(odfc, ddfc, merger)

        self.assertEqual(merged_dfc.files[0]['CTDPRS'].values, [1, 2])
        self.assertEqual(merged_dfc.files[0]['NITRAT'].values, [20, 11])
        self.assertEqual(merged_dfc.files[0]['NITRIT'].values, [10, 11])

        lines = [
            # df1 has an different CTDPRS record (3)
            'Key (3,) does not exist in origin from derivative rows',
            # NITRIT columns are the same
            "Instructed to merge parameters that are not different: ['NITRIT']"
        ]
        self.assertTrue(self.ensure_lines(lines))
Esempio n. 4
0
def read(dfc, fileobj):
    dfile = DataFile()
    retval = frcsv.read(dfile, fileobj, 'ctd')
    split_dfc = split_on_cast(dfile)
    dfc.files = split_dfc.files

    # Convert header columns to globals
    global_headers = [
        'EXPOCODE', 'STNNBR', 'CASTNO', '_DATETIME', 'LATITUDE', 'LONGITUDE',
        'DEPTH'
    ]
    for dfile in dfc.files:
        for header in global_headers:
            value = dfile[header][0]
            if type(value) == int:
                value = str(value)
            dfile.globals[header] = value
            del dfile[header]

        # Arbitrarily set SECT_ID to blank
        dfile.globals['SECT_ID'] = ''

        # Take largest depth value and set as bottom depth
        try:
            depth = max(dfile['DEPTH'])
        except KeyError:
            depth = depth_unesco(max(dfile['CTDPRS']),
                                 dfile.globals['LATITUDE'])
        dfile.globals['DEPTH'] = ceil(depth)

    return retval
Esempio n. 5
0
    def test_write_fill_value_decimal_places_follow_column(self):
        """Fill values should follow the column's data's lead for decimal places.

        E.g. if the column has data [10.001, 11.123], the normal fill value -999
        should be written -999.000. I.e. as many trailing zeros as the data has.

        If the column has no data in it, default to the old-style C format
        string for how many decimal places to show.

        """
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.create_columns([
                'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'CTDOXY'
            ])
            dfile['STNNBR'].values = [None, None]
            dfile['CASTNO'].values = [None, None]
            dfile['BTLNBR'].values = [None, None]
            dfile['_DATETIME'].values = [None, None]
            dfile['CTDPRS'].values = [_decimal('10.0001'), None]
            dfile['CTDOXY'].values = [None, _decimal('243.23')]
            btlex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            # CTDPRS default decplaces is 1 but the data has 4
            self.assertEqual('-999.0000', result[4].split(',')[5].lstrip())
            # CTDOXY default decplaces is 4 but the data has 2
            self.assertEqual('-999.00', result[3].split(',')[6].lstrip())
Esempio n. 6
0
 def test_read_err_flag_col_no_data_col(self):
     with closing(StringIO()) as fff:
         dfile = DataFile()
         exchange.read_data(dfile, fff, ['CTDSAL_FLAG_W'])
     lines = [
         "Flag column CTDSAL_FLAG_W exists without parameter column CTDSAL",
     ]
     self.assertTrue(self.ensure_lines(lines))
Esempio n. 7
0
    def test_merge_btl_non_unique_keys(self):
        """Warn if there are non-unique keys in origin.
        
        Map to the first occurrence in derivative.

        """
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36,2,1000,5,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35,2,1000,5,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,  10.000,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dfo = DataFile()
            dfd = DataFile()
            btlex.read(dfo, origin)
            btlex.read(dfd, deriv)
            parameters = ['DELC14']
            keys = ['STNNBR']
            mdf = merge_datafiles(dfo, dfd, keys, parameters)

            # Make sure warning is printed regarding extra key in deriv file.
            lines = [
                'Picked the first row of occurrence in derivative data for non'
                ' unique keys: ',
            ]
            self.assertTrue(self.ensure_lines(lines))
Esempio n. 8
0
def _multi_file(reader, files, output, **kwargs):
    dfc = DataFileCollection()
    for f in files:
        d = DataFile()
        reader.read(d, f, **kwargs)
        dfc.files.append(d)
    if output is not sys.stdout:
        output = open(output, 'w')
    ctdzipex.write(dfc, output)
Esempio n. 9
0
    def test_num_headers(self):
        """The number of headers header counts itself as a header."""
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.globals['LONGITUDE'] = '0.000'
            ctdex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            self.assertEqual('2', result[1].split(' = ')[1].lstrip())
Esempio n. 10
0
    def test_diff_decplaces(self):
        """Derivative is still different when decimal places are different."""
        dfo = DataFile()
        dfo.create_columns(['CTDPRS', 'CTDOXY'])
        dfo['CTDPRS'].append(_decimal('1'))
        dfo['CTDOXY'].append(_decimal('0.140'))

        dfd = DataFile()
        dfd.create_columns(['CTDPRS', 'CTDOXY'])
        dfd['CTDPRS'].append(_decimal('1'))
        dfd['CTDOXY'].append(_decimal('0.14'))

        p_different, p_not_in_orig, p_not_in_deriv, p_common = \
            different_columns(dfo, dfd, ['CTDPRS'])
        self.assertEqual(p_different, ['CTDOXY'])

        dfile = merge_datafiles(dfo, dfd, ['CTDPRS'], ['CTDOXY'])
        self.assertEqual(decimal_to_str(dfile['CTDOXY'][0]), '0.14')
Esempio n. 11
0
    def test_merge_btl_no_common_keys(self):
        """Warn if there are no common keys."""
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36,2,1000,5,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35,2,1000,5,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,  10.000,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dfo = DataFile()
            dfd = DataFile()
            btlex.read(dfo, origin)
            btlex.read(dfd, deriv)
            p_different, p_not_in_orig, p_not_in_derip_not_in_deriv, p_common = \
                different_columns(dfo, dfd, BOTTLE_KEY_COLS)
            parameters = p_different + p_not_in_orig
            keys = determine_bottle_keys(dfo, dfd)
            parameters = list(OrderedSet(parameters) - OrderedSet(keys))
            mdf = merge_datafiles(dfo, dfd, keys, parameters)

            # Make sure warning is printed regarding extra key in deriv file.
            lines = [
                'No keys matched',
                'No keys provided to map on.',
            ]
            self.assertTrue(self.ensure_lines(lines))
Esempio n. 12
0
def read(self, handle):
    """How to read CTD ODEN files from a Zip."""
    zip = Zip.ZeroCommentZipFile(handle, 'r')
    for file in zip.namelist():
        if 'DOC' in file or 'README' in file:
            continue
        tempstream = StringIO(zip.read(file))
        ctdfile = DataFile()
        oden(ctdfile).read(tempstream)
        self.files.append(ctdfile)
        tempstream.close()
    zip.close()
Esempio n. 13
0
def read(self, handle):
    """How to read CTD Bonus Goodhope files from a TAR."""

    with tarfile.open(fileobj=handle) as tf:
        for member in tf.getmembers():
            ecp_file = tf.extractfile(member)
            ctdfile = DataFile()
            try:
                ecp.read(ctdfile, ecp_file)
            except ValueError:
                log.error(u'Failed on {0}'.format(member.name))
            self.files.append(ctdfile)
Esempio n. 14
0
    def test_read_multiple(self):
        self.file = DataFile()
        botnc.read(self.file, self.infile)

        nitrite_values = (0.11, None, 0.08, 0.08, 0.08, 0.08, 0.06, 0.03, 0.06,
                          0.04, 0.03, None, 0.03, None, 0.03, None)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(nitrite_values, self.file.columns['NITRIT'].values))

        freon11_values = (6.063, 6.055, 5.795, 5.619, 5.486, 5.508, 5.487,
                          5.683, 5.422, 5.190, 5.222, None, 5.289, None, 5.250,
                          5.254)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        freon113_values = (None, ) * 16
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        expocodes = ['33RR20070204'] * 16
        self.assertEqual(expocodes, self.file.columns['EXPOCODE'].values)

        # Read second file
        infile2 = open(sample_file('nc_hyd', 'p03a_00199_00001_hy1.nc'), 'r')
        botnc.read(self.file, infile2)

        # Make sure all columns have the same length
        length = None
        for c in self.file.columns.values():
            if not length:
                length = len(c.values)
            else:
                self.assertEquals(len(c.values), length)
                if c.is_flagged_woce():
                    self.assertEquals(len(c.flags_woce), length)
                if c.is_flagged_igoss():
                    self.assertEquals(len(c.flags_igoss), length)

        # Test parameter in first file not in second is filled with None.
        freon113_values += (None, ) * 36
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        # Test parameter in both files are filled in correctly.
        freon11_values += (1.437, 1.501, 1.515, 1.525, 1.578, 1.596, 1.602,
                           1.725, 1.650, 1.703, 1.694, 1.437, 1.059, 0.702,
                           0.303, 0.130, 0.040, 0.015, -0.001, 0.002, 0.000,
                           None, None, 0.012, None, 0.006, None, None, None,
                           0.014, None, 0.000, None, 0.014, None, -0.001)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        infile2.close()
Esempio n. 15
0
 def test_read_warn_bad_flag(self):
     with closing(StringIO()) as fff:
         fff.name = 'testfile'
         fff.write('123,a\n')
         fff.flush()
         fff.seek(0)
         dfile = DataFile()
         dfile['CTDSAL'] = Column('CTDSAL')
         exchange.read_data(dfile, fff, ['CTDSAL', 'CTDSAL_FLAG_W'])
     lines = [
         "Bad WOCE flag 'a' for CTDSAL on data row 0",
     ]
     self.assertTrue(self.ensure_lines(lines))
Esempio n. 16
0
    def test_write_exchange_decimal_places(self):
        """Decimal places should be kept from the original data."""
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.globals['LONGITUDE'] = _decimal('0.0000000')
            dfile.create_columns(['CTDPRS'])
            dfile['CTDPRS'].values = [_decimal('10.0001'), None]
            ctdex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            # Decimal('0.0000000') is converted to 0E-7 by str. The formatting
            # has to be done manually.
            self.assertEqual('0.0000000', result[2].split(' = ')[1].lstrip())
Esempio n. 17
0
    def test_functional_scripts_btlex(self):
        """Test merging Bottle Exchange files."""
        from argparse import Namespace
        from libcchdo.scripts import merge_btlex_and_btlex
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv, \
                NamedTemporaryFile(delete=False) as output:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 14, 34, 2, 0, 0, 19700101, 0000,1000,4.00,-999.000,2
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,-999.000,1
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,  10.000,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            args = Namespace()
            args.origin = origin
            args.derivative = deriv
            args.parameters_to_merge = None
            args.merge_different = True
            args.output = output
            args.guess_key = True
            merge_btlex_and_btlex(args)

            with open(output.name) as fff:
                dfile = DataFile()
                btlex.read(dfile, fff)
                self.assertEqual(map(str, dfile['TDN'].values),
                                 ['6.00', '5.00'])
                self.assertEqual(dfile['TDN'].flags_woce, [])
            unlink(output.name)
        lines = [
            "Merging on keys composed of: ('EXPOCODE', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR')",
        ]
        self.assertTrue(self.ensure_lines(lines))
Esempio n. 18
0
    def test_merge_datafiles_no_column(self):
        """Error to merge columns in neither datafile."""
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(2, 2)
        df1['NITRAT'].append(20, 3)
        df1['NITRAT'].append(21, 4)

        with self.assertRaisesRegexp(
                ValueError, 'No columns selected to merge are different.'):
            merge_datafiles(df0, df1, ['CTDPRS'], ['CTDSAL'])
        lines = [
            "Instructed to merge parameters that are not in either datafile: ['CTDSAL']",
        ]
        self.assertTrue(self.ensure_lines(lines))
Esempio n. 19
0
    def test_read_data_btlnbr_as_string(self):
        with closing(StringIO()) as fff:
            fff.write('BTLNBR\n')
            fff.write('\n')
            fff.write('12\n')

            fff.flush()
            fff.seek(0)

            dfile = DataFile()
            columns = ['BTLNBR']
            exchange.read_data(dfile, fff, columns)

            self.assertTrue(isinstance(dfile['BTLNBR'].values[0], basestring))
Esempio n. 20
0
    def test_functional_write(self):
        dfile = DataFile()
        dfile.create_columns(['CTDPRS', 'CTDOXY'])
        dfile['CTDPRS'].parameter.display_order = 0
        dfile['CTDOXY'].parameter.display_order = 1
        dfile['CTDPRS'].values = map(_decimal, ['2.0', '4.0'])
        dfile['CTDOXY'].values = map(_decimal, ['254.0', '253.1'])
        dfile['CTDOXY'].flags_woce = [2, 3]

        with closing(StringIO()) as buff:
            ctdex.write(dfile, buff)
            result = buff.getvalue().split('\n')
            self.assertEqual([u'        2.0', u'      254.0', u'2'],
                             result[4].split(','))
Esempio n. 21
0
 def test_read_btlnbr_as_string(self):
     with closing(StringIO()) as fff:
         fff.write('SIO1,33.24\n')
         fff.write('01,32.10\n')
         fff.flush()
         fff.seek(0)
         dfile = DataFile()
         dfile['BTLNBR'] = Column('BTLNBR')
         dfile['CTDSAL'] = Column('CTDSAL')
         exchange.read_data(dfile, fff, ['BTLNBR', 'CTDSAL'])
         self.assertEqual(dfile['BTLNBR'].values, ['SIO1', '01'])
         self.assertEqual(
             dfile['CTDSAL'].values,
             [Decimal('33.24'), Decimal('32.10')])
Esempio n. 22
0
    def test_merge_datafiles_flags(self):
        """It should be possible to only merge flag "columns".

        This includes updating and adding flags.
        If adding flags and the original column does not exist, warn and fail.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT', 'FLUOR'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['CTDPRS'].append(3, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRAT'].append(12, 2)
        df0['FLUOR'].append(100)
        df0['FLUOR'].append(101)
        df0['FLUOR'].append(102)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT', 'FLUOR'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(4, 2)
        df1['NITRAT'].append(20, 3)
        df1['NITRAT'].append(21, 4)
        df1['NITRAT'].append(22, 4)
        df1['FLUOR'].append(200, 2)
        df1['FLUOR'].append(201, 3)
        df1['FLUOR'].append(202, 3)

        mdf = merge_datafiles(df0, df1, ['CTDPRS'],
                              ['NITRAT_FLAG_W', 'FLUOR_FLAG_W'])
        self.assertEqual(mdf['NITRAT'].values, [10, 11, 12])
        self.assertEqual(mdf['NITRAT'].flags_woce, [3, 4, 2])
        self.assertEqual(mdf['FLUOR'].values, [100, 101, 102])
        self.assertEqual(mdf['FLUOR'].flags_woce, [2, 3, 9])
Esempio n. 23
0
def read(self, fileobj):
    """How to read LDEO ASEP files from an NODC accession."""
    def is_fname_ok(fname):
        if '.csv' not in fname:
            return False
        if fname.find('/') > -1:
            raise ValueError(
                u'CTD Exchange Zip files should not contain directories.')
        return True

    def reader(dfile, fileobj, retain_order, header_only):
        ctdex.read(dfile, fileobj, retain_order, header_only)
        dfile.globals['_FILENAME'] = fileobj.name

    dfiles = []

    datapath = None
    datadirname = '0-data'
    with tarfile_open(mode='r:gz', fileobj=fileobj) as fff:
        for member in fff.getmembers():
            if datapath is None:
                if datadirname in member.name:
                    datapath = member.name.split(
                        datadirname)[0] + datadirname + '/'
                    log.info('NODC accession data path: {0}'.format(datapath))
                else:
                    continue

            if not member.name.startswith(datapath):
                continue
            bname = os.path.basename(member.name)
            if bname.endswith('pdf'):
                continue
            if '_ros.' in bname:
                continue
            # don't want upcasts
            if '_ctd_U.' in bname:
                continue

            dfile = DataFile()
            ggg = fff.extractfile(member)
            if ggg is None:
                log.error(u'Unable to extract file {0!r}'.format(member))
            else:
                ldeo_asep.read(dfile, ggg)
                dfiles.append(dfile)

    self.files = sorted(dfiles,
                        key=lambda dfile: lexico(dfile.globals['STNNBR']))
Esempio n. 24
0
def guess_ftype_dftype_format(fileobj, file_type=None, file_name=None):
    """Return a tuple of guessed file type, Datafile or DatafileCollection, and 
    the format module.

    """
    from libcchdo.model.datafile import (
        DataFile, SummaryFile, DataFileCollection)
    file_type = guess_file_type_from_file(fileobj, file_type, file_name)
    if 'zip' in file_type or file_type.startswith('archive'):
        dfile = DataFileCollection()
    elif file_type.startswith('sum'):
        dfile = SummaryFile()
    else:
        dfile = DataFile()
    format_module = guess_format_module(fileobj, file_type)
    return (file_type, dfile, format_module)
Esempio n. 25
0
def read(self, handle):
    """How to read CTD WOCE EGEE files from a Zip."""
    zip = Zip.ZeroCommentZipFile(handle, 'r')
    try:
        for file in zip.namelist():
            tempstream = StringIO(zip.read(file))
            ctdfile = DataFile()
            try:
                woce_egee.read(ctdfile, tempstream)
            except Exception, e:
                log.info('Failed to read file %s in %s' % (file, handle))
                print_exc()
                raise e
            self.append(ctdfile)
            tempstream.close()
    finally:
        zip.close()
Esempio n. 26
0
    def setUp(self):
        self._infile = open(
            os.path.join(
                os.path.dirname(__file__),
                'samples/nc_hyd/i08s_33RR20070204_00001_00001_hy1.nc'), 'r')
        self.datafile = DataFile()
        self._outfile = NamedTemporaryFile()

        g = self.datafile.globals
        g['DATE'] = '12341231'
        g['TIME'] = '2359'
        g['LATITUDE'] = 90
        g['LONGITUDE'] = 180
        g['DEPTH'] = -1
        g['EXPOCODE'] = 'test'
        g['STNNBR'] = '20'
        g['CASTNO'] = '5'
        g['_OS_ID'] = 'OS1'
        fuse_datetime(self.datafile)
Esempio n. 27
0
def sbe_asc_to_ctd_exchange(args):
    output, expo = (sys.stdout, '')
    if (args.expo):
        expo = args.expo
    if (args.output):
        output = args.output
    d = DataFile()
    f = args.files
    if len(args.files) == 1:
        if output is not sys.stdout:
            output = output + "_ct1.csv"

        _single_file(asc, args.files, output, expo=expo)

    if len(args.files) > 1:
        if output is not sys.stdout:
            output = output + '_ct1.zip'

        _multi_file(asc, args.files, output, expo=expo)
Esempio n. 28
0
    def test_write(self):
        self.file = DataFile()

        g = self.file.globals

        self.file['EXPOCODE'] = Column('EXPOCODE')
        self.file['EXPOCODE'].append('TESTEXPO')

        self.file['SECT_ID'] = Column('SECT_ID')
        self.file['SECT_ID'].append('TEST')

        self.file['STNNBR'] = Column('CASTNO')
        self.file['STNNBR'].append(5)

        self.file['CASTNO'] = Column('STNNBR')
        self.file['CASTNO'].append(20)

        self.file['DEPTH'] = Column('DEPTH')
        self.file['DEPTH'].append(-1)

        self.file['LATITUDE'] = Column('LATITUDE')
        self.file['LATITUDE'].append(90)

        self.file['LONGITUDE'] = Column('LONGITUDE')
        self.file['LONGITUDE'].append(180)

        self.file['_DATETIME'] = Column('_DATETIME')
        self.file['_DATETIME'].append(datetime.utcnow())

        self.file['BTLNBR'] = Column('BTLNBR')
        self.file['BTLNBR'].append(5, 9)

        self.file['CTDOXY'] = Column('CTDOXY')
        self.file['CTDOXY'].append(1, 2)
        self.file.check_and_replace_parameters()
        p = self.file['CTDOXY'].parameter
        p.description = 'ctd oxygen'
        p.bound_lower = 0
        p.bound_upper = 200

        botnc.write(self.file, NamedTemporaryFile())
Esempio n. 29
0
    def test_read_unknown_parameter_fillvalue(self):
        """Reading data for a parameter with unknown format should still check
           for out of band.

        """
        with closing(StringIO()) as fff:
            fff.name = 'testfile'
            fff.write('-999,9,1,012\n')
            fff.write('11,2,-999,123\n')
            fff.flush()
            fff.seek(0)
            dfile = DataFile()
            dfile['CTDPRS'] = Column('CTDPRS')
            dfile['UNKPARAM'] = Column('UNKPARAM')
            dfile['BTLNBR'] = Column('BTLNBR')
            exchange.read_data(
                dfile, fff, ['CTDPRS', 'CTDPRS_FLAG_W', 'UNKPARAM', 'BTLNBR'])
        self.assertEqual(None, dfile['CTDPRS'].values[0])
        self.assertEqual('012', dfile['BTLNBR'].values[0])
        self.assertEqual('123', dfile['BTLNBR'].values[1])
        self.assertEqual(None, dfile['UNKPARAM'].values[1])
Esempio n. 30
0
    def test_read(self):
        self.file = DataFile()
        botnc.read(self.file, self.infile)

        nitrite_values = (0.11, None, 0.08, 0.08, 0.08, 0.08, 0.06, 0.03, 0.06,
                          0.04, 0.03, None, 0.03, None, 0.03, None)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(nitrite_values, self.file.columns['NITRIT'].values))

        freon11_values = (6.063, 6.055, 5.795, 5.619, 5.486, 5.508, 5.487,
                          5.683, 5.422, 5.190, 5.222, None, 5.289, None, 5.250,
                          5.254)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        freon113_values = (None, ) * 16
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        expocodes = ['33RR20070204'] * 16
        self.assertEqual(expocodes, self.file.columns['EXPOCODE'].values)