Example #1
0
def fuse_datetime_columns(file):
    """ Fuses a file's "DATE" and "TIME" columns into a "_DATETIME" column.
        There are three cases:
        1. DATE and TIME both exist
            A datetime object is inserted representing the combination
            of the two objects.
        2. DATE exists and TIME does not
            A date object is inserted only representing the date.
        3. DATE does not exist but TIME does
            None is inserted because date is required.

        Arg:
            file - a DataFile object
    """
    try:
        dates = file['DATE'].values
    except KeyError:
        log.error(u'No DATE column is present.')
        return

    try:
        times = file['TIME'].values
    except KeyError:
        log.warn(u'No TIME column is present.')

    file['_DATETIME'] = Column('_DATETIME')
    file['_DATETIME'].values = [
        strptime_woce_date_time(*x) for x in zip(dates, times)
    ]
    del file['DATE']
    del file['TIME']
Example #2
0
 def test_decimal_places_requires_decimal(self):
     ccc = Column('test')
     ccc.values = [
         _decimal('-999.0000'),
         20.12355,
         _decimal('-999.00'),
     ]
     with self.assertRaises(ValueError):
         ccc.decimal_places()
Example #3
0
    def test_decimal_places(self):
        """A column's decimal places is the max number of places after a decimal
        in the column.

        """
        ccc = Column('test')
        ccc.values = [
            _decimal('-999.0000'),
            _decimal('19.0'),
            _decimal('-999.000'),
            _decimal('-999.00'),
        ]
        self.assertEqual(4, ccc.decimal_places())
Example #4
0
def _read_oliver_sun(dfc, fileobj, cfg):
    """Read HRP2 format from Oliver Sun."""
    mat = loadmat(fileobj)
    filekey = mat.keys()[0]
    casts = mat[filekey][0]

    for cast in casts:
        dfile = DataFile()
        dfc.append(dfile)

        dfile.globals['EXPOCODE'] = cfg['expocode']

        # TODO
        dfile.globals['DEPTH'] = 0

        for key, item in zip(cast.dtype.names, cast):
            if item.shape == (1, 1):
                key = cfg['global_mapping'].get(key, None)
                if key:
                    dfile.globals[key] = item[0, 0]
            else:
                try:
                    dfile[key] = Column(key)
                    dfile[key].values = list(item.flatten())
                    # Act as if all files had QC and assign it to OceanSITES 1.
                    # Assuming that someone has already gone through level 0
                    # data and we are receiving level 1 or higher.
                    dfile[key].flags_woce = [2] * len(dfile[key].values)
                except KeyError:
                    pass

        try:
            dfile.globals['STNNBR']
        except KeyError:
            dfile.globals['STNNBR'] = '999'

        woce.fuse_datetime(dfile)
Example #5
0
def read(self, handle):
    """ How to read a Bottle Exchange file. """
    read_identifier_line(self, handle, 'BOTTLE')
    l = read_comments(self, handle)

    # Read columns and units
    columns = [x.strip() for x in l.strip().split(',')]
    units = [x.strip() for x in handle.readline().strip().split(',')]

    # Check columns and units to match length
    if len(columns) != len(units):
        raise ValueError(
            ("Expected as many columns as units in file. "
             "Found %d columns and %d units.") % (len(columns), len(units)))

    # Check for unique identifer
    identifier = []
    if 'EXPOCODE' in columns and \
       'STNNBR' in columns and \
       'CASTNO' in columns:
        identifier = ['STNNBR', 'CASTNO']
        if 'SAMPNO' in columns:
            identifier.append('SAMPNO')
            if 'BTLNBR' in columns:
                identifier.append('BTLNBR')
        elif 'BTLNBR' in columns:
            identifier.append('BTLNBR')
        else:
            raise ValueError(("No unique identifer found for file. "
                              "(STNNBR,CASTNO,SAMPNO,BTLNBR),"
                              "(STNNBR,CASTNO,SAMPNO),"
                              "(STNNBR,CASTNO,BTLNBR)"))

    self.create_columns(columns, units)

    read_data(self, handle, columns)

    # Format all data to be what it is
    try:
        self['EXPOCODE'].values = map(str, self['EXPOCODE'].values)
    except KeyError:
        pass
    try:
        self['LATITUDE'].values = map(_decimal, self['LATITUDE'].values)
    except KeyError:
        pass
    try:
        self['LONGITUDE'].values = map(_decimal, self['LONGITUDE'].values)
    except KeyError:
        pass
    try:
        self['DATE']
    except KeyError:
        self['DATE'] = Column('DATE')
        self['DATE'].values = [None] * len(self)
    try:
        self['TIME']
    except KeyError:
        self['TIME'] = Column('TIME')
        self['TIME'].values = [None] * len(self)

    woce.fuse_datetime(self)

    self.check_and_replace_parameters()
Example #6
0
def australian_navy_ctd(args):
    """Download and convert Australian Navy CTD data."""
    from pydap.client import open_url
    from libcchdo.thredds import crawl
    from libcchdo.formats.ctd.zip import exchange as ctdzipex
    from libcchdo.formats.zip import write as zwrite

    dfcs = []

    cf_param_to_cchdo_param = {
        'sea_water_pressure': 'CTDPRS',
        'sea_water_temperature': 'CTDTMP',
        'sea_water_practical_salinity': 'CTDSAL',
    }
    ignored_qc_flags = [
        'time_qc_flag',
        'position_qc_flag',
    ]
    qc_conventions = {
        'Proposed IODE qc scheme March 2012': {
            1: 2,  # good
            2: 5,  # not_evaluated_or_unknown
            3: 3,  # suspect
            4: 4,  # bad
            9: 9,  # missing
        },
    }

    dfc = DataFileCollection()
    catalog = "http://www.metoc.gov.au/thredds/catalog/RAN_CTD_DATA/catalog.xml"
    for url in crawl(catalog):
        df = DataFile()

        log.info(u'Reading %s', url)
        dset = open_url(url)
        vars = dset.keys()
        for vname in vars:
            var = dset[vname]
            attrs = var.attributes
            if 'standard_name' in attrs:
                std_name = attrs['standard_name']
                if std_name == 'time':
                    df.globals['_DATETIME'] = \
                        datetime(1950, 1, 1) + timedelta(var[:])
                elif std_name == 'latitude':
                    df.globals['LATITUDE'] = var[:]
                elif std_name == 'longitude':
                    df.globals['LONGITUDE'] = var[:]
                elif std_name in cf_param_to_cchdo_param:
                    cparam = cf_param_to_cchdo_param[std_name]
                    if '_FillValue' in attrs:
                        fill_value = attrs['_FillValue']
                        values = []
                        for x in var[:]:
                            if equal_with_epsilon(x, fill_value):
                                values.append(None)
                            else:
                                values.append(x)
                    else:
                        values = var[:]

                    try:
                        df[cparam].values = values
                    except KeyError:
                        df[cparam] = Column(cparam)
                        df[cparam].values = values
                elif 'status_flag' in std_name:
                    flagged_param = std_name.replace('status_flag', '').strip()
                    cparam = cf_param_to_cchdo_param[flagged_param]
                    qc_convention = attrs['quality_control_convention']
                    if qc_convention in qc_conventions:
                        qc_map = qc_conventions[qc_convention]
                        df[cparam].flags_woce = [qc_map[x] for x in var[:]]
                else:
                    log.debug('unhandled standard_name %s', std_name)
            elif ('long_name' in attrs
                  and attrs['long_name'] == 'profile identifier'):
                profile_id = var[:]
                cruise_id = profile_id / 10**4
                profile_id = profile_id - cruise_id * 10**4
                df.globals['EXPOCODE'] = str(cruise_id)
                df.globals['STNNBR'] = str(profile_id)
                df.globals['CASTNO'] = str(1)
            elif vname in ignored_qc_flags:
                df.globals['_' + vname] = var[:]
            elif (vname.endswith('whole_profile_flag')
                  or vname.endswith('sd_test')):
                pass
            else:
                log.debug('unhandled variable %s', vname)

        # attach new file to appropriate collection
        if dfc.files:
            if dfc.files[0].globals['EXPOCODE'] != df.globals['EXPOCODE']:
                dfcs.append(dfc)
                dfc = DataFileCollection()
        dfc.append(df)

    with closing(args.output) as out_file:
        next_id = 0

        def get_filename(dfc):
            try:
                return '{0}_ct1.zip'.format(dfc.files[0].globals['EXPOCODE'])
            except IndexError:
                next_id += 1
                return '{0}_ct1.zip'.format(next_id)

        zwrite(dfcs, out_file, ctdzipex, get_filename)