def fuse_datetime_columns(file): """ Fuses a file's "DATE" and "TIME" columns into a "_DATETIME" column. There are three cases: 1. DATE and TIME both exist A datetime object is inserted representing the combination of the two objects. 2. DATE exists and TIME does not A date object is inserted only representing the date. 3. DATE does not exist but TIME does None is inserted because date is required. Arg: file - a DataFile object """ try: dates = file['DATE'].values except KeyError: log.error(u'No DATE column is present.') return try: times = file['TIME'].values except KeyError: log.warn(u'No TIME column is present.') file['_DATETIME'] = Column('_DATETIME') file['_DATETIME'].values = [ strptime_woce_date_time(*x) for x in zip(dates, times) ] del file['DATE'] del file['TIME']
def split_datetime_columns(file): """ Splits a file's "_DATETIME" columns into "DATE" and "TIME" columns. There are three cases: 1. datetime DATE and TIME are populated appropriately. 2. date Only DATE is populated. 3. None Both DATE and TIME are None If there are absolutely no TIMEs in the file the TIME column is not kept. Arg: file - a DataFile object """ dtimecol = file['_DATETIME'] date = file['DATE'] = Column('DATE') time = file['TIME'] = Column('TIME') for dtime in dtimecol.values: if dtime: date.append(strftime_woce_date(dtime)) if type(dtime) is datetime: time.append(strftime_woce_time(dtime)) else: date.append(None) time.append(None) del file['_DATETIME'] if not any(file['TIME'].values): file['TIME'].values = [UNKNONW_TIME_FILL] * len(file['TIME'])
def test_sorted_columns(self): self.file.columns['CASTNO'] = Column('CASTNO') self.file.columns['STNNBR'] = Column('STNNBR') expected = ['EXPOCODE', 'STNNBR', 'CASTNO'] received = map(lambda c: c.parameter.mnemonic_woce(), self.file.sorted_columns()) # If lengths are equal and all expected in received, then assume equal self.assertEqual(len(expected), len(received)) self.assertTrue(all([x in received for x in expected]))
def test_decimal_places_requires_decimal(self): ccc = Column('test') ccc.values = [ _decimal('-999.0000'), 20.12355, _decimal('-999.00'), ] with self.assertRaises(ValueError): ccc.decimal_places()
def _setupData(self): self.datafile['CTDPRS'] = Column('CTDPRS') self.datafile['CTDPRS'].append(1, 2) self.datafile['CTDOXY'] = Column('CTDOXY') self.datafile['CTDOXY'].append(1, 2) self.datafile.check_and_replace_parameters() p = self.datafile['CTDOXY'].parameter p.description = 'ctd oxygen' p.bound_lower = 0 p.bound_upper = 200
def test_decimal_places(self): """A column's decimal places is the max number of places after a decimal in the column. """ ccc = Column('test') ccc.values = [ _decimal('-999.0000'), _decimal('19.0'), _decimal('-999.000'), _decimal('-999.00'), ] self.assertEqual(4, ccc.decimal_places())
def test_read_btlnbr_as_string(self): with closing(StringIO()) as fff: fff.write('SIO1,33.24\n') fff.write('01,32.10\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['BTLNBR'] = Column('BTLNBR') dfile['CTDSAL'] = Column('CTDSAL') exchange.read_data(dfile, fff, ['BTLNBR', 'CTDSAL']) self.assertEqual(dfile['BTLNBR'].values, ['SIO1', '01']) self.assertEqual( dfile['CTDSAL'].values, [Decimal('33.24'), Decimal('32.10')])
def test_create_column_with_parameter(self): """Creating a column with a given parameter object should set it as that column's parameter object. """ param = std.make_contrived_parameter('testparameter') column = Column(param) self.assertEqual(column.parameter, param)
def test_column_headers(self): self.assertEqual(['EXPOCODE'], self.file.column_headers()) self.file.columns['STNNBR'] = Column('STNNBR') expected = ['EXPOCODE', 'STNNBR'] received = self.file.column_headers() # If lengths are equal and all expected in received, then assume equal self.assertEqual(len(expected), len(received)) self.assertTrue(all([x in received for x in expected]))
def test_diff(self): aaa = Column('aaa') bbb = Column('aaa') # Make sure can diff on Nones, results in no diff. aaa.flags_woce = [None] bbb.flags_woce = [None] diff = aaa.diff(bbb) self.assertFalse(diff['diff'])
def test_write(self): self.file = DataFile() g = self.file.globals self.file['EXPOCODE'] = Column('EXPOCODE') self.file['EXPOCODE'].append('TESTEXPO') self.file['SECT_ID'] = Column('SECT_ID') self.file['SECT_ID'].append('TEST') self.file['STNNBR'] = Column('CASTNO') self.file['STNNBR'].append(5) self.file['CASTNO'] = Column('STNNBR') self.file['CASTNO'].append(20) self.file['DEPTH'] = Column('DEPTH') self.file['DEPTH'].append(-1) self.file['LATITUDE'] = Column('LATITUDE') self.file['LATITUDE'].append(90) self.file['LONGITUDE'] = Column('LONGITUDE') self.file['LONGITUDE'].append(180) self.file['_DATETIME'] = Column('_DATETIME') self.file['_DATETIME'].append(datetime.utcnow()) self.file['BTLNBR'] = Column('BTLNBR') self.file['BTLNBR'].append(5, 9) self.file['CTDOXY'] = Column('CTDOXY') self.file['CTDOXY'].append(1, 2) self.file.check_and_replace_parameters() p = self.file['CTDOXY'].parameter p.description = 'ctd oxygen' p.bound_lower = 0 p.bound_upper = 200 botnc.write(self.file, NamedTemporaryFile())
def test_read_unknown_parameter_fillvalue(self): """Reading data for a parameter with unknown format should still check for out of band. """ with closing(StringIO()) as fff: fff.name = 'testfile' fff.write('-999,9,1,012\n') fff.write('11,2,-999,123\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['CTDPRS'] = Column('CTDPRS') dfile['UNKPARAM'] = Column('UNKPARAM') dfile['BTLNBR'] = Column('BTLNBR') exchange.read_data( dfile, fff, ['CTDPRS', 'CTDPRS_FLAG_W', 'UNKPARAM', 'BTLNBR']) self.assertEqual(None, dfile['CTDPRS'].values[0]) self.assertEqual('012', dfile['BTLNBR'].values[0]) self.assertEqual('123', dfile['BTLNBR'].values[1]) self.assertEqual(None, dfile['UNKPARAM'].values[1])
def test_read_warn_bad_flag(self): with closing(StringIO()) as fff: fff.name = 'testfile' fff.write('123,a\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['CTDSAL'] = Column('CTDSAL') exchange.read_data(dfile, fff, ['CTDSAL', 'CTDSAL_FLAG_W']) lines = [ "Bad WOCE flag 'a' for CTDSAL on data row 0", ] self.assertTrue(self.ensure_lines(lines))
def _read_oliver_sun(dfc, fileobj, cfg): """Read HRP2 format from Oliver Sun.""" mat = loadmat(fileobj) filekey = mat.keys()[0] casts = mat[filekey][0] for cast in casts: dfile = DataFile() dfc.append(dfile) dfile.globals['EXPOCODE'] = cfg['expocode'] # TODO dfile.globals['DEPTH'] = 0 for key, item in zip(cast.dtype.names, cast): if item.shape == (1, 1): key = cfg['global_mapping'].get(key, None) if key: dfile.globals[key] = item[0, 0] else: try: dfile[key] = Column(key) dfile[key].values = list(item.flatten()) # Act as if all files had QC and assign it to OceanSITES 1. # Assuming that someone has already gone through level 0 # data and we are receiving level 1 or higher. dfile[key].flags_woce = [2] * len(dfile[key].values) except KeyError: pass try: dfile.globals['STNNBR'] except KeyError: dfile.globals['STNNBR'] = '999' woce.fuse_datetime(dfile)
def test_calculate_depths(self): self.file['_ACTUAL_DEPTH'] = Column('_ACTUAL_DEPTH') self.assertEqual(('actual', []), self.file.calculate_depths()) del self.file['_ACTUAL_DEPTH'] self.file.globals['LATITUDE'] = 0 self.file.create_columns(['CTDPRS', 'CTDSAL', 'CTDTMP']) self.assertEqual(('unesco1983', []), self.file.calculate_depths()) self.file['CTDPRS'].values = [1] self.file['CTDSAL'].values = [1] self.file['CTDTMP'].values = [1] self.assertEqual( ('sverdrup', [_decimal('1.021723814950101286444879340E-8')]), self.file.calculate_depths())
def _prepare_to_read_exchange_data(dfile, columns): """Return preparatory information about the columns to be read. columns - list of WOCE names of parameters Returns: A list of tuples, each containing the list to which to append the next value as well as, depending on whether the column is: 1. data column a standard Parameter that has been loaded from the database with format string 2. flag column a tuple including the flag name, the attribute of the Column for the flag column, and the parameter name """ infos = [] ssesh = session() for column in columns: flag_info = None if column.endswith(FLAG_ENDING_WOCE): colname = column[:column.index(FLAG_ENDING_WOCE)] flag_info = ('WOCE', 'flags_woce', colname) elif column.endswith(FLAG_ENDING_IGOSS): colname = column[:column.index(FLAG_ENDING_IGOSS)] flag_info = ('IGOSS', 'flags_igoss', colname) else: colname = column try: col = dfile[colname] except KeyError, err: if flag_info: log.error(u'Flag column {0} exists without parameter ' 'column {1}'.format(column, colname)) col = dfile[colname] = Column(colname) if flag_info: col = getattr(col, flag_info[1]) infos.append((col, flag_info)) else: infos.append((col, find_parameter(ssesh, column)))
def merge_ctd_bacp_xmiss_and_ctd_exchange(file, mergefile): """Merge mergefile onto file""" merge_pressure = None pressure = None for c in PRESSURE_PARAMETERS: try: merge_pressure = mergefile[c] pressure = file[c] except KeyError: pass if merge_pressure is None or pressure is None: log.warn( 'Unable to find a matching pressure column in both files. Could ' 'not merge.') return 1 param = 'XMISS' param = 'TRANSM' xmiss_column = None try: xmiss_column = file['TRANSM'] except KeyError: pass if not xmiss_column: xmiss_column = file['TRANSM'] = Column('TRANSM') xmiss_column.values = [None] * len(file) merge_xmiss = None try: merge_xmiss = mergefile['TRANSM'] except KeyError: pass if not merge_xmiss: log.warn('Merge file has no {0} column to merge'.format(param)) return 1 for i, p in enumerate(merge_pressure.values): j = pressure.values.index(p) xmiss_column.values[j] = merge_xmiss.values[i]
def setUp(self): self.column = Column('EXPOCODE')
class TestColumn(TestCase): def setUp(self): self.column = Column('EXPOCODE') def test_initialization(self): parameter = std.find_by_mnemonic('EXPOCODE') # The column did not initialize to the correct parameter self.assertEqual(self.column.parameter.mnemonic_woce(), 'EXPOCODE') # Missing values array. self.assertEqual(self.column.values, []) # Missing WOCE flags array self.assertEqual(self.column.flags_woce, []) # Missing IGOSS flags array self.assertEqual(self.column.flags_igoss, []) def test_create_column_with_parameter(self): """Creating a column with a given parameter object should set it as that column's parameter object. """ param = std.make_contrived_parameter('testparameter') column = Column(param) self.assertEqual(column.parameter, param) def test_get(self): self.assertEqual(None, self.column.get(0)) self.column[0] = 1 self.assertEqual(self.column.get(0), 1) self.assertEqual(self.column[0], 1) self.assertEqual(None, self.column.get(1)) self.assertEqual(None, self.column.__getitem__(1)) def test_length(self): self.assertEqual(len(self.column), 0) self.column[0] = 1 self.assertEqual(len(self.column), 1) self.column[2] = 2 self.assertEqual(len(self.column), 3) def test_set(self): self.column.set(1, 2, 3, 4) self.assertEqual(self.column[1], 2) self.assertEqual(self.column.flags_woce[1], 3) self.assertEqual(self.column.flags_igoss[1], 4) self.assertEqual(len(self.column), 2) def test_set_i(self): """Make sure setting to an index past the current length of the list doesn't raise an index exception and works as expected. """ self.column.set(2, 1, 1, 1) self.assertEqual(len(self.column), 3) def test_append(self): self.column.append(2, 3, 4) self.assertEqual(len(self.column), 1) self.assertEqual(len(self.column.flags_woce), 1) self.assertEqual(len(self.column.flags_igoss), 1) def test_iter(self): self.column.append(1, 2, 3) self.column.append(4, 5, 6) arr = [x for x in self.column] self.assertEqual([1, 4], arr) def test_contains(self): self.column.append(1, 2, 3) self.column.append(4, 5, 6) self.assertTrue(1 in self.column) self.assertFalse(2 in self.column) def test_is_flagged_woce(self): self.assertFalse(self.column.is_flagged_woce()) self.column.append(1) self.assertFalse(self.column.is_flagged_woce()) self.column.append(2, 3, 4) self.assertTrue(self.column.is_flagged_woce()) def test_is_flagged_igoss(self): self.assertFalse(self.column.is_flagged_igoss()) self.column.append(1) self.assertFalse(self.column.is_flagged_igoss()) self.column.append(2, 3, 4) self.assertTrue(self.column.is_flagged_igoss()) def test_is_flagged(self): self.assertFalse(self.column.is_flagged()) self.column.append(1) self.assertFalse(self.column.is_flagged()) self.column.append(2, 3) self.assertTrue(self.column.is_flagged()) def test_flagged_woce(self): self.assertFalse(self.column.is_flagged_woce() ) # Column has WOCE flags when there should not be self.column[0] = 1 self.assertFalse(self.column.is_flagged_woce() ) # Column has WOCE flags when there should not be self.column.set(0, 1, 2, 3) self.assertTrue(self.column.is_flagged_woce( )) # Column did not have WOCE flags when there should have been def test_flagged_igoss(self): self.assertFalse(self.column.is_flagged_igoss() ) # Column has IGOSS flags when there should not be self.column[0] = 1 self.assertFalse(self.column.is_flagged_igoss() ) # Column has IGOSS flags when there should not be self.column.set(0, 1, 2, 3) self.assertTrue(self.column.is_flagged_igoss( )) # Column did not have IGOSS flags when there should have been def test_str(self): str(self.column) # TODO def test_cmp(self): self.assertFalse(self.column < self.column) self.assertFalse(self.column > self.column) self.assertTrue(self.column >= self.column) self.column.parameter = None self.assertFalse(self.column >= self.column)
def merge_datafiles(origin, deriv, keys, parameters): """Merge the columns and data of two DataFiles.""" row_map = map_keys(origin, deriv, keys) diffcols, not_in_orig_cols, not_in_deriv_cols, commoncols = \ different_columns(origin, deriv, [], row_map) params_to_merge = filter_params_to_merge( diffcols, not_in_orig_cols, not_in_deriv_cols, commoncols, parameters) param_keys = set(params_to_merge) & set(keys) if param_keys: raise ValueError( u'Cannot merge key column using itself: {0!r}'.format(param_keys)) # Create merged file using origin as template merged = copy(origin) # Create columns that are going to be added for param in params_to_merge: if '_FLAG_' in param: continue try: merged[param] except KeyError: merged[param] = Column(deriv[param].parameter) # There are two cases to consider when merging # # 1. New column is being added to original # 2. Column has been edited from original # # In both cases, the data values need to be inserted in the correct row # based on the key column values. # Additionally, it should be possible to specify whether only a flag column # gets merged or whether only column values get merged or which flag gets # merged. The way this could happen is... all_cols = commoncols + not_in_deriv_cols + list(keys) + \ list(OrderedSet(diffcols) | params_to_merge) for key in all_cols: param = _normalize_column_name(key) if param in origin: col = merged[param] # copy the origin values in to be overwritten origincol = origin[param] if '_FLAG_' in key: if key.endswith(FLAG_ENDING_WOCE): col.flags_woce = origincol.flags_woce elif key.endswith(FLAG_ENDING_IGOSS): col.flags_igoss = origincol.flags_igoss else: col.values = origincol.values for key in params_to_merge: param = _normalize_column_name(key) if param in deriv: col = merged[param] # For each param in deriv, update column with deriv value at origin # index derivcol = deriv[param] # Make sure the column is filled with fill values first. # set_length won't extend flag lists unless they evaluate to truthy if '_FLAG_' in key: if derivcol.flags_woce: if not col.flags_woce: col.flags_woce = [9] col.set_length(len(merged)) col.flags_woce = overwrite_list( col.flags_woce, derivcol.flags_woce, row_map) if derivcol.flags_igoss: if not col.flags_igoss: col.flags_igoss = [9] col.set_length(len(merged)) col.flags_igoss = overwrite_list( col.flags_igoss, derivcol.flags_igoss, row_map) else: if derivcol.parameter.units: try: orig_units = col.parameter.units.name except AttributeError: orig_units = '' try: deriv_units = derivcol.parameter.units.name except AttributeError: deriv_units = '' log.warn(u'Changed units for {0} from {1!r} to {2!r}'.format( param, orig_units, deriv_units)) col.parameter.units = derivcol.parameter.units col.set_length(len(merged)) col.values = overwrite_list( col.values, derivcol.values, row_map) # Copy header from origin and add note about merged parameters header = '# Merged parameters: {0}\n# {1}\n'.format( ', '.join(params_to_merge), origin.globals['stamp'].rstrip()) header_orig = origin.globals['header'].rstrip() if header_orig: header += header_orig + '\n' merged.globals['header'] = header return merged
def test_check_and_replace_parameter_contrived(self): """Contrived parameters are not checked.""" col = Column('_DATETIME') col.check_and_replace_parameter(self.file, convert=False)
def setUp(self): self.file = DataFile() self.c = self.file.columns['EXPOCODE'] = Column('EXPOCODE')
def australian_navy_ctd(args): """Download and convert Australian Navy CTD data.""" from pydap.client import open_url from libcchdo.thredds import crawl from libcchdo.formats.ctd.zip import exchange as ctdzipex from libcchdo.formats.zip import write as zwrite dfcs = [] cf_param_to_cchdo_param = { 'sea_water_pressure': 'CTDPRS', 'sea_water_temperature': 'CTDTMP', 'sea_water_practical_salinity': 'CTDSAL', } ignored_qc_flags = [ 'time_qc_flag', 'position_qc_flag', ] qc_conventions = { 'Proposed IODE qc scheme March 2012': { 1: 2, # good 2: 5, # not_evaluated_or_unknown 3: 3, # suspect 4: 4, # bad 9: 9, # missing }, } dfc = DataFileCollection() catalog = "http://www.metoc.gov.au/thredds/catalog/RAN_CTD_DATA/catalog.xml" for url in crawl(catalog): df = DataFile() log.info(u'Reading %s', url) dset = open_url(url) vars = dset.keys() for vname in vars: var = dset[vname] attrs = var.attributes if 'standard_name' in attrs: std_name = attrs['standard_name'] if std_name == 'time': df.globals['_DATETIME'] = \ datetime(1950, 1, 1) + timedelta(var[:]) elif std_name == 'latitude': df.globals['LATITUDE'] = var[:] elif std_name == 'longitude': df.globals['LONGITUDE'] = var[:] elif std_name in cf_param_to_cchdo_param: cparam = cf_param_to_cchdo_param[std_name] if '_FillValue' in attrs: fill_value = attrs['_FillValue'] values = [] for x in var[:]: if equal_with_epsilon(x, fill_value): values.append(None) else: values.append(x) else: values = var[:] try: df[cparam].values = values except KeyError: df[cparam] = Column(cparam) df[cparam].values = values elif 'status_flag' in std_name: flagged_param = std_name.replace('status_flag', '').strip() cparam = cf_param_to_cchdo_param[flagged_param] qc_convention = attrs['quality_control_convention'] if qc_convention in qc_conventions: qc_map = qc_conventions[qc_convention] df[cparam].flags_woce = [qc_map[x] for x in var[:]] else: log.debug('unhandled standard_name %s', std_name) elif ('long_name' in attrs and attrs['long_name'] == 'profile identifier'): profile_id = var[:] cruise_id = profile_id / 10**4 profile_id = profile_id - cruise_id * 10**4 df.globals['EXPOCODE'] = str(cruise_id) df.globals['STNNBR'] = str(profile_id) df.globals['CASTNO'] = str(1) elif vname in ignored_qc_flags: df.globals['_' + vname] = var[:] elif (vname.endswith('whole_profile_flag') or vname.endswith('sd_test')): pass else: log.debug('unhandled variable %s', vname) # attach new file to appropriate collection if dfc.files: if dfc.files[0].globals['EXPOCODE'] != df.globals['EXPOCODE']: dfcs.append(dfc) dfc = DataFileCollection() dfc.append(df) with closing(args.output) as out_file: next_id = 0 def get_filename(dfc): try: return '{0}_ct1.zip'.format(dfc.files[0].globals['EXPOCODE']) except IndexError: next_id += 1 return '{0}_ct1.zip'.format(next_id) zwrite(dfcs, out_file, ctdzipex, get_filename)
def read(self, handle): """ How to read a Bottle Exchange file. """ read_identifier_line(self, handle, 'BOTTLE') l = read_comments(self, handle) # Read columns and units columns = [x.strip() for x in l.strip().split(',')] units = [x.strip() for x in handle.readline().strip().split(',')] # Check columns and units to match length if len(columns) != len(units): raise ValueError( ("Expected as many columns as units in file. " "Found %d columns and %d units.") % (len(columns), len(units))) # Check for unique identifer identifier = [] if 'EXPOCODE' in columns and \ 'STNNBR' in columns and \ 'CASTNO' in columns: identifier = ['STNNBR', 'CASTNO'] if 'SAMPNO' in columns: identifier.append('SAMPNO') if 'BTLNBR' in columns: identifier.append('BTLNBR') elif 'BTLNBR' in columns: identifier.append('BTLNBR') else: raise ValueError(("No unique identifer found for file. " "(STNNBR,CASTNO,SAMPNO,BTLNBR)," "(STNNBR,CASTNO,SAMPNO)," "(STNNBR,CASTNO,BTLNBR)")) self.create_columns(columns, units) read_data(self, handle, columns) # Format all data to be what it is try: self['EXPOCODE'].values = map(str, self['EXPOCODE'].values) except KeyError: pass try: self['LATITUDE'].values = map(_decimal, self['LATITUDE'].values) except KeyError: pass try: self['LONGITUDE'].values = map(_decimal, self['LONGITUDE'].values) except KeyError: pass try: self['DATE'] except KeyError: self['DATE'] = Column('DATE') self['DATE'].values = [None] * len(self) try: self['TIME'] except KeyError: self['TIME'] = Column('TIME') self['TIME'].values = [None] * len(self) woce.fuse_datetime(self) self.check_and_replace_parameters()
def test_formats(self): self.file.columns['CTDOXY'] = Column('CTDOXY') self.file.check_and_replace_parameters() # Order of columns may be wrong self.assertEqual(['%11s', '%9.4f'], self.file.formats())