def test_select(self): d = Dataset(tempfile.mktemp(), 'w') tb = TargetBuffer(tags=['WI001'], name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') d.register_tags(['WI001', 'MD01', 'measurement']) t = d.new(tb) ib = InstrumentBuffer(tags=['MD01'], sensor_id='F00975', location='West rim', no_bits=16, type='DOAS', description='GeoNet permanent instrument') i = d.new(ib) rdtb = RawDataTypeBuffer(tags=['measurement'], name='1st round measurements', acquisition='stationary') rdt = d.new(rdtb) rb = RawDataBuffer(target=t, instrument=i, type=rdt, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) r = d.new(rb) e = d.select("tags == 'MD01'") self.assertEqual(e['Target'][0], t) self.assertEqual(e['Instrument'][0], i) e = d.select("type.acquisition == 'stationary'", etype='RawData') self.assertEqual(e['RawData'][0], r)
def test_read(self): """ Test reading of HDF5 files. """ fn = tempfile.mktemp() d = Dataset(fn, 'w') tb = TargetBuffer(tags=['WI001'], name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') d.register_tags(['WI001', 'MD01', 'measurement']) t = d.new(tb) ib = InstrumentBuffer(tags=['MD01'], sensor_id='F00975', location='West rim', no_bits=16, type='DOAS', description='GeoNet permanent instrument') i = d.new(ib) rdtb = RawDataTypeBuffer(tags=['measurement'], name='1st round measurements', acquisition='stationary') rdt = d.new(rdtb) rb = RawDataBuffer(target=t, instrument=i, type=rdt, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) d.new(rb) d.close() d1 = Dataset.open(fn) r1 = d1.elements['RawData'][0] self.assertEqual(r1.target.name, 'White Island main vent') self.assertEqual(list(r1.instrument.tags)[0], 'MD01')
def test_tagging(self): """ Test the tagging of data elements. """ d = Dataset(tempfile.mktemp(), 'w') d.register_tags(['measurement']) with self.assertRaises(ValueError): d.register_tags(['measurement']) tb = TargetBuffer(tags=['WI001', 'Eruption16']) with self.assertRaises(ValueError): t = d.new(tb) d.register_tags(['WI001', 'Eruption16']) t = d.new(tb) d.register_tags(['SomethingElse']) t.tags.append('SomethingElse') t.tags.remove('WI001') with warnings.catch_warnings(): warnings.simplefilter('ignore') d.remove_tags(['Eruption16', 'blub']) self.assertEqual(list(t.tags), ['SomethingElse']) # Ensure the same tag is only added once t.tags.append('SomethingElse') self.assertEqual(list(t.tags), ['SomethingElse']) self.assertEqual(len(d._f.root.tags._v_children['SomethingElse'][:]), 1)
def test_forbidden(self): d = Dataset(tempfile.mktemp(), 'w') with self.assertRaises(AttributeError): tb = TargetBuffer(blub=10) with self.assertRaises(AttributeError): tb = TargetBuffer(resource_id=5.) tb = TargetBuffer() with self.assertRaises(AttributeError): tb.blub = 5. t = d.new(tb) with self.assertRaises(AttributeError): t.position = (1, 1, 1) rb = RawDataBuffer(d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) r = d.new(rb) with self.assertRaises(AttributeError): r.d_var[0] = 1 with self.assertRaises(AttributeError): r.d_var[0:2] = 1 with self.assertRaises(AttributeError): r.d_var = np.ones((1, 2048)) with self.assertRaises(AttributeError): r.blub np.testing.assert_array_equal(np.zeros(2048), np.array(r.d_var[0][0]))
def test_add(self): d = Dataset.new('FLYSPEC') d1 = Dataset.open(os.path.join(self.data_dir, '2012_02_29_1340_CHILE.txt'), format='FLYSPEC') d += d1 r = d.retrievals[3] s1 = r.spectra_id.get_referred_object() angle = s1.angle[r.slice] id_max = np.argmax(r.sca) np.testing.assert_almost_equal(angle[id_max], 168.04, 2) self.assertEqual(len(d.retrievals), 36) d1 = Dataset.open(os.path.join(self.data_dir, '2016_06_11_0830_TOFP04.txt'), format='FLYSPEC', timeshift=12.0) d2 = Dataset.open(os.path.join(self.data_dir, '2016_06_11_0900_TOFP04.txt'), format='FLYSPEC', timeshift=12.0) d3 = d1 + d2 self.assertEqual(len(d3.retrievals), 25) d0 = Dataset.new('FLYSPEC') d0 += d1 d0 += d2 self.assertEqual(len(d0.retrievals), 25)
def test_spectra(self): """ Test reading binary file containing the raw spectra together with the text file. """ d = Dataset(tempfile.mktemp(), 'w') fin_txt = os.path.join(self.data_dir, 'TOFP04', '2017_06_14_0930.txt') fin_bin = os.path.join(self.data_dir, 'TOFP04', '2017_06_14_0930.bin') fin_high = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_high.bin') fin_low = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_low.bin') fin_dark = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_dark.bin') fin_ref = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_ref.bin') x = [521, 637, 692, 818] y = [305., 315., 319.5, 330.] f = interp1d(x, y, fill_value='extrapolate') xnew = list(range(0, 2048)) wavelengths = f(xnew) e = d.read(fin_txt, spectra=fin_bin, wavelengths=wavelengths, ftype='flyspec', timeshift=12.0) self.assertEqual(e['RawDataBuffer'].d_var.shape, (1321, 2048)) rdtb = e['RawDataTypeBuffer'] rdt = d.new(rdtb) rb = e['RawDataBuffer'] rb.type = rdt r = d.new(rb) cb = e['ConcentrationBuffer'] rdlist = [r] for _f in [fin_high, fin_low, fin_dark, fin_ref]: e = d.read(_f, ftype='flyspecref', wavelengths=wavelengths, type=_f.replace('fin_', '')) rdtb = e['RawDataTypeBuffer'] rdt = d.new(rdtb) rb = e['RawDataBuffer'] rb.type = rdt r = d.new(rb) rdlist.append(r) cb.rawdata = rdlist c = d.new(cb) for _r in c.rawdata[:]: if _r.type.name[0] == 'measurement': break if False: with tempfile.TemporaryFile() as fd: plot(_r, savefig=fd) expected_image = os.path.join(self.data_dir, 'raw_data_plot.png') rms = self.compare_images(fd, expected_image) self.assertTrue(rms <= 0.001)
def test_ResourceIdentifiers(self): d = Dataset(tempfile.mktemp(), 'w') tb = TargetBuffer(target_id='WI001', name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t = d.new(tb) rb = RawDataBuffer(target=t, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) r = d.new(rb) self.assertEqual(r.target.target_id, 'WI001')
def test_read(self): d = Dataset(tempfile.mktemp(), 'w') e = d.read(os.path.join(self.data_dir, '2012_02_29_1340_CHILE.txt'), ftype='FLYSPEC') r = d.new(e['RawDataBuffer']) cb = e['ConcentrationBuffer'] cb.rawdata = [r] c = d.new(cb) r = d.elements['RawData'][0] self.assertEqual(sum([x.size for x in r.datetime]), 4600) self.assertEqual(r.inc_angle[0], 174.750) c = d.elements['Concentration'][0] r1 = c.rawdata[0] self.assertEqual(len(c.value[:]), 4600) np.testing.assert_array_almost_equal(r1.position[0], [-67.8047, -23.3565, 3927.], 2) # dicretize all retrievals onto a grid to show a daily plot bins = np.arange(0, 180, 1.0) m = [] for _angle, _so2 in split_by_scan(r1.inc_angle[:], c.value[:]): _so2_binned = binned_statistic(_angle, _so2, 'mean', bins) m.append(_so2_binned.statistic) m = np.array(m) ids = np.argmax(np.ma.masked_invalid(m), axis=1) maxima = np.array([ 166., 167., 167., 167., 168., 167., 168., 167., 167., 167., 167., 167., 168., 167., 167., 167., 167., 166., 167., 166., 166., 167., 165., 165., 165., 164., 165., 163., 163., 164., 163., 165., 164., 164., 164., 161. ]) np.testing.assert_array_almost_equal(maxima, bins[ids], 2) d1 = Dataset(tempfile.mktemp(), 'w') e = d1.read(os.path.join(self.data_dir, '2016_06_11_0830_TOFP04.txt'), ftype='FLYSPEC', timeshift=12.0) r = d1.new(e['RawDataBuffer']) cb = e['ConcentrationBuffer'] cb.rawdata = [r] d1.new(cb) c = d1.elements['Concentration'][0] r = c.rawdata[0] m = [] for _angle, _so2 in split_by_scan(r.inc_angle[:], c.value[:]): _so2_binned = binned_statistic(_angle, _so2, 'mean', bins) m.append(_so2_binned.statistic) m = np.array(m) ids = np.argmax(np.ma.masked_invalid(m), axis=1) maxima = np.array( [147., 25., 27., 86., 29., 31., 27., 27., 28., 137., 34., 34.]) np.testing.assert_array_almost_equal(maxima, bins[ids], 2)
def test_append(self): d = Dataset(tempfile.mktemp(), 'w') d.register_tags(['WI001', 'MD01', 'measurement']) tb = TargetBuffer(tags=['WI001'], name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t = d.new(tb) ib = InstrumentBuffer(tags=['MD01'], sensor_id='F00975', location='West rim', no_bits=16, type='DOAS', description='GeoNet permanent instrument') i = d.new(ib) rdtb = RawDataTypeBuffer(tags=['measurement'], name='1st round measurements', acquisition='stationary') rdt = d.new(rdtb) rb = RawDataBuffer(target=t, instrument=i, type=rdt, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) r = d.new(rb) rb1 = RawDataBuffer(target=t, instrument=i, type=rdt, d_var=np.ones((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:01']) r.append(rb1) self.assertEqual(len(r.ind_var[:]), 4096) self.assertEqual(np.array(r.ind_var[:]).size, 4096) self.assertTrue(np.alltrue(np.array(r.d_var[:]) < 2)) np.testing.assert_array_equal(np.array(r.datetime[:]).flatten(), np.array(['2017-01-10T15:23:00', '2017-01-10T15:23:01'], dtype='datetime64[ms]')) with self.assertRaises(ValueError): r.append(rb1, pedantic=True) with self.assertRaises(ValueError): r.append(rb, pedantic=True) with self.assertRaises(AttributeError): t.append(tb) d.register_tags(['WI002']) tb1 = TargetBuffer(tags=['WI002'], name='Donald Duck', position=(177.1, -37.4, 50), position_error=(0.2, 0.2, 20), description='Donald Duck vent in January 2010') t1 = d.new(tb1) rb2 = RawDataBuffer(target=t1, instrument=i, type=rdt, d_var=np.ones((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:02']) with self.assertRaises(AttributeError): rb.append(rb2)
def test_sum(self): d1 = Dataset.new('ram') s = Spectra(d1.plugin, counts=np.zeros((1, 2048))) d1.spectra.append(s) d2 = Dataset.new('ram') d2.spectra.append(s) d3 = d1 + d2 self.assertEqual(len(d3.spectra), 2) self.assertTrue(d3 != d2) self.assertTrue(d3 != d1) self.assertEqual(d3.spectra[0], d3.spectra[1]) self.assertEqual(d3.spectra[0].counts.shape, (1, 2048)) with self.assertRaises(TypeError): d4 = d1 + s d5 = Dataset.new('ram') d5 += d1 self.assertEqual(d5.spectra[0], d1.spectra[0])
def test_ram_plugin(self): d = Dataset.new('ram') p = d.plugin p.create_item('spectra/someid/counts', np.zeros((1, 2048))) self.assertTrue(d['spectra/someid/counts'].shape == (1, 2048)) self.assertTrue(np.alltrue(d['spectra/someid/counts'] < 1)) d['spectra/someid/counts'] = np.ones((1, 2048)) self.assertFalse(np.alltrue(d['spectra/someid/counts'] < 1))
def test_times(self): d = Dataset(tempfile.mktemp(), 'w') rb = RawDataBuffer(d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) r = d.new(rb) rb1 = RawDataBuffer(d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:01']) ct = r.creation_time r.append(rb1) self.assertGreater(r.modification_time, r.creation_time) self.assertEqual(r.creation_time, ct)
def test_RawData(self): d = Dataset(tempfile.mktemp(), 'w') tstart = np.datetime64('2017-01-10T15:23:00') times = [str(tstart + np.timedelta64(i*1, 's')) for i in range(10)] rb = RawDataBuffer(d_var=np.zeros((10, 2048)), ind_var=np.arange(2048), datetime=times, inc_angle=np.arange(10, 110, 10)) r = d.new(rb) self.assertEqual(r.d_var.shape, (10, 2048)) self.assertTrue(np.alltrue(r.d_var[0] < 1)) self.assertEqual(r.datetime[0], np.datetime64('2017-01-10T15:23:00'))
def test_plot(self): d = Dataset(tempfile.mktemp(), 'w') e = d.read(os.path.join(self.data_dir, '2012_02_29_1340_CHILE.txt'), ftype='FLYSPEC', timeshift=12.0) rdt = d.new(e['RawDataTypeBuffer']) rb = e['RawDataBuffer'] rb.type = rdt r = d.new(rb) cb = e['ConcentrationBuffer'] cb.rawdata = [r] cb.rawdata_indices = np.arange(cb.value.shape[0]) c = d.new(cb) if False: with tempfile.TemporaryFile() as fd: plot(c, savefig=fd, timeshift=12.0) expected_image = os.path.join(self.data_dir, 'chile_retrievals_overview.png') rms = self.compare_images(fd, expected_image) self.assertTrue(rms <= 0.001)
def test_PreferredFlux(self): d = Dataset(tempfile.mktemp(), 'w') pfb = PreferredFluxBuffer(flux_indices=[[2]], datetime=['2017-01-10T15:23:00', '2017-01-11T15:23:00']) pf = d.new(pfb) np.testing.assert_array_equal(pf.datetime[:], np.array(['2017-01-10T15:23:00', '2017-01-11T15:23:00'], dtype='datetime64[ms]')) self.assertEqual(pf.flux_indices.shape, (1,1))
def test_DataElementBase(self): d = Dataset(tempfile.mktemp(), 'w') tb = TargetBuffer(target_id='WI001', name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t = d.new(tb) np.testing.assert_almost_equal(np.squeeze(t.position[:]), np.array([177.2, -37.5, 50]), 1) self.assertEqual(t.target_id, 'WI001') with self.assertRaises(AttributeError): t.position = (177.2, -37.5, 50) with self.assertRaises(AttributeError): t.target_id = 'WI002' self.assertEqual(t.target_id, 'WI001')
def test_repr(self): d = Dataset(tempfile.mktemp(), 'w') tb = TargetBuffer(target_id='WI001', name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t = d.new(tb) test_string = ['position_error:', '(3,)', 'position:', '(3,)', 'description:', 'Main', 'vent', 'in', 'January', '2017', 'target_id:', 'WI001', 'name:', 'White', 'Island', 'main', 'vent', 'Created'] # remove ID and creation time from test as they always change repr_string = str(repr(t)).split()[2:-2] for e in repr_string: self.assertTrue(e in test_string) for s in test_string: self.assertTrue(s in repr_string)
def test_typechecking(self): """ Test the type checking and conversion functionality. """ with self.assertRaises(ValueError): TargetBuffer(target_id='WI001', name='White Island main vent', position=('a', -37.5, 50)) d = Dataset(tempfile.mktemp(), 'w') tb2 = TargetBuffer(target_id='WI001', name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t = d.new(tb2) with self.assertRaises(ValueError): RawDataBuffer(instrument=t, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime='2017-01-10T15:23:00')
def test_pedantic(self): d = Dataset(tempfile.mktemp(), 'w') rb = RawDataBuffer() with self.assertRaises(ValueError): d.new(rb, pedantic=True) d.register_tags(['WI001']) tb = TargetBuffer(tags=['WI001'], name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') d.new(tb) with self.assertRaises(ValueError): d.new(tb, pedantic=True)
def test_dtbuffer(self): """ Testing the behaviour of buffer elements. """ d = Dataset(tempfile.mktemp(), 'w') tb = TargetBuffer(tags=['WI001'], name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') with self.assertRaises(ValueError): t = d.new(tb) d.register_tags(['WI001', 'MD01', 'measurement']) t = d.new(tb) ib = InstrumentBuffer(tags=['MD01'], sensor_id='F00975', location='West rim', no_bits=16, type='DOAS', description='GeoNet permanent instrument') i = d.new(ib) rdtb = RawDataTypeBuffer(tags=['measurement'], name='1st round measurements', acquisition='stationary') rdt = d.new(rdtb) rb = RawDataBuffer(target=t, instrument=i, type=rdt, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) r = d.new(rb) self.assertTrue(r.target == t) self.assertTrue(r.instrument == i) self.assertTrue(r.type == rdt) rb1 = RawDataBuffer() rb1.d_var = np.zeros((1, 2048)) rb1.ind_var = np.arange(2048), rb1.datetime = ['2017-01-10T15:23:00'] rb1.target = t rb1.instrument = i rb1.type = rdt d.new(rb1)
def test_add(self): d1 = Dataset(tempfile.mktemp(), 'w') e = d1.read(os.path.join(self.data_dir, '2016_06_11_0830_TOFP04.txt'), ftype='FLYSPEC', timeshift=12.0) r = d1.new(e['RawDataBuffer']) cb = e['ConcentrationBuffer'] cb.rawdata = [r] d1.new(cb) d2 = Dataset(tempfile.mktemp(), 'w') e = d2.read(os.path.join(self.data_dir, '2016_06_11_0900_TOFP04.txt'), ftype='FLYSPEC', timeshift=12.0) r = d2.new(e['RawDataBuffer']) cb = e['ConcentrationBuffer'] cb.rawdata = [r] d2.new(cb) d1 += d2 self.assertEqual(len(d1.elements['Concentration']), 2) self.assertEqual(len(d1.elements['RawData']), 2)
def main(datapath, outputpath, start, end, pg=True, deletefiles=False): msg = "Data path is: {}\n".format(datapath) msg += "Output path is: {}\n".format(outputpath) msg += "Start date: {}\n".format(start) msg += "End date: {}\n".format(end) logging.info(msg) dates = pd.date_range(start=start, end=end, freq='D') if pg: ndays = len(dates) bar = Bar('Processing', max=ndays) for date in dates: if pg: bar.next() else: print(date) outputfile = 'MiniDOAS_{:d}{:02d}{:02d}.h5'.format( date.year, date.month, date.day) h5file = os.path.join(outputpath, outputfile) if True: d = Dataset(h5file, 'w') # ToDo: get correct plume coordinates tb = TargetBuffer(name='White Island main plume', target_id='WI001', position=[177.18375770, -37.52170799, 321.0]) t = d.new(tb) wpoptions = "{'Pixel316nm':479, 'TrimLower':30, 'LPFilterCount':3," wpoptions += "'MinWindSpeed':3,'BrightEnough':500, 'BlueStep':5, " wpoptions += "'MinR2:0.8, 'MaxFitCoeffError':50.0, " wpoptions += "'InPlumeThresh':0.05, 'MinPlumeAngle':0.1, " wpoptions += "'MaxPlumeAngle':3.0, 'MinPlumeSect':0.4, " wpoptions += "'MaxPlumeSect':2.0, 'MeanPlumeCtrHeight':310, " wpoptions += "'SEMeanPlumeCtrHeight':0.442, " wpoptions += " 'MaxRangeSeperation':5000, 'MaxRangeToPlume':5000, " wpoptions += " 'MaxPlumeWidth':2600'MaxPlumeCentreAltitude':2000, " wpoptions += "'MaxAltSeperation':1000, 'MaxTimeDiff':30," wpoptions += "'MinTriLensAngle':0.1745, 'MaxTriLensAngle':2.9671," wpoptions += "'SEWindSpeed':0.20, 'WindMultiplier':1.24, " wpoptions += "'SEWindDir':0.174}" mb1 = MethodBuffer(name='WidPro v1.2', description='Jscript wrapper for DOASIS', settings=wpoptions) m1 = d.new(mb1) station_info = {} location_name = 'White Island North-East Point' station_info['WI301'] = { 'files': {}, 'stationID': 'WI301', 'stationLoc': location_name, 'target': t, 'bearing': 6.0214, 'lon': 177.192979384, 'lat': -37.5166903535, 'elev': 49.0, 'widpro_method': m1, 'wp_station_id': 'NE' } station_info['WI302'] = { 'files': {}, 'stationID': 'WI302', 'stationLoc': 'White Island South Rim', 'target': t, 'bearing': 3.8223, 'lon': 177.189013316, 'lat': -37.5265334424, 'elev': 96.0, 'widpro_method': m1, 'wp_station_id': 'SR' } for station in ['WI301', 'WI302']: # Find the raw data raw_data_filename = "{:s}_{:d}{:02d}{:02d}.zip" station_id = station_info[station]['wp_station_id'] raw_data_filename = raw_data_filename.format( station_id, date.year, date.month, date.day) raw_data_filepath = os.path.join(datapath, 'spectra', station_id, raw_data_filename) if os.path.isfile(raw_data_filepath): try: with ZipFile(raw_data_filepath) as myzip: myzip.extractall('/tmp') except: msg = "ERROR 05: Can't unzip file {}" logging.error(msg.format(raw_data_filepath)) raw_data_filepath = None else: raw_data_filename = raw_data_filename.replace( '.zip', '.csv') raw_data_filepath = os.path.join( '/tmp', raw_data_filename) else: logging.error( "file {} does not exist".format(raw_data_filepath)) continue try: if not is_file_OK(raw_data_filepath): raw_data_filepath = None except Exception as e: print(raw_data_filepath) raise (e) station_info[station]['files']['raw'] = raw_data_filepath # Find the concentration data monthdir = '{:d}-{:02d}'.format(date.year, date.month) spectra_filename = "{:s}_{:d}_{:02d}_{:02d}_Spectra.csv" spectra_filename = spectra_filename.format( station_id, date.year, date.month, date.day) spectra_filepath = os.path.join(datapath, 'results', monthdir, spectra_filename) if not is_file_OK(spectra_filepath): spectra_filepath = None station_info[station]['files']['spectra'] = spectra_filepath # Find the flux data flux_ah_filename = spectra_filename.replace( 'Spectra.csv', 'Scans.csv') flux_ah_filepath = os.path.join(datapath, 'results', monthdir, flux_ah_filename) if not is_file_OK(flux_ah_filepath): flux_ah_filepath = None station_info[station]['files']['flux_ah'] = flux_ah_filepath flux_ch_filename = "XX_{:d}_{:02d}_{:02d}_Combined.csv" flux_ch_filename = flux_ch_filename.format( date.year, date.month, date.day) flux_ch_filepath = os.path.join(datapath, 'results', monthdir, flux_ch_filename) if not is_file_OK(flux_ch_filepath): flux_ch_filepath = None station_info[station]['files']['flux_ch'] = flux_ch_filepath fits_flux_ah, fits_flux_ch = FITS_download(date, station) station_info[station]['files']['fits_flux_ah'] = fits_flux_ah station_info[station]['files']['fits_flux_ch'] = fits_flux_ch try: read_single_station(d, station_info[station], date) except MiniDoasException as e: logging.error(str(e)) fn = raw_data_filename.replace('.zip', '.csv') fn = os.path.join('/tmp', fn) if fn is not None and os.path.isfile(fn): os.remove(fn) # Wind data windd_dir = os.path.join(datapath, 'wind', 'direction') winds_dir = os.path.join(datapath, 'wind', 'speed') sub_dir = '{:02d}-{:02d}'.format(date.year - 2000, date.month) winds_filename = '{:d}{:02d}{:02d}_WS_00.txt'.format( date.year, date.month, date.day) windd_filename = winds_filename.replace('WS', 'WD') winds_filepath = os.path.join(winds_dir, sub_dir, winds_filename) windd_filepath = os.path.join(windd_dir, sub_dir, windd_filename) if is_file_OK(winds_filepath) and is_file_OK(windd_filepath): # Read in the raw wind data; this is currently # not needed to reproduce flux estimates so it's # just stored for reference e = d.read( { 'direction': windd_filepath, 'speed': winds_filepath }, ftype='minidoas-wind', timeshift=13) gfb = e['GasFlowBuffer'] gf = d.new(gfb) d.close() try: verify_flux(os.path.join(outputpath, outputfile), 1.) except MDOASException as e: msg = str(e) logging.error(msg) if deletefiles: if h5file is not None and os.path.isfile(h5file): os.remove(h5file) for station in ['WI301', 'WI302']: files = [ station_info[station]['files']['raw'], station_info[station]['files']['fits_flux_ah'], station_info[station]['files']['fits_flux_ch'] ] for _f in files: if _f is not None and os.path.isfile(_f): os.remove(_f) if pg: bar.finish()
def test_sum(self): d1 = Dataset(tempfile.mktemp(), 'w') tb = TargetBuffer(target_id='WI001', name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t = d1.new(tb) rb = RawDataBuffer(target=t, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) d1.new(rb) d2 = Dataset(tempfile.mktemp(), 'w') tb2 = TargetBuffer(target_id='WI002', name='White Island main vent', position=(177.2, -37.5, 50), position_error=(0.2, 0.2, 20), description='Main vent in January 2017') t2 = d2.new(tb2) rb2 = RawDataBuffer(target=t2, d_var=np.zeros((1, 2048)), ind_var=np.arange(2048), datetime=['2017-01-10T15:23:00']) d2.new(rb2) with self.assertRaises(AttributeError): d3 = d1 + d2 d3 = Dataset(tempfile.mktemp(), 'w') d3 += d1 d3 += d2 self.assertEqual(len(d3.elements['RawData']), 2) rc3 = d3.elements['RawData'][0] rc2 = d2.elements['RawData'][0] rc4 = d3.elements['RawData'][1] rc1 = d1.elements['RawData'][0] # Check that the references are not the same anymore... self.assertNotEqual(getattr(rc3._root._v_attrs, 'target'), getattr(rc1._root._v_attrs, 'target')) # ...but that the copied elements contain the same information self.assertEqual(rc3.target.target_id, rc1.target.target_id) self.assertEqual(rc4.target.target_id, rc2.target.target_id) # Now check that this is also working for arrays of references mb1 = MethodBuffer(name='Method1') mb2 = MethodBuffer(name='Method2') d4 = Dataset(tempfile.mktemp(), 'w') m1 = d4.new(mb1) m2 = d4.new(mb2) gfb = GasFlowBuffer(methods=[m1, m2]) gf = d4.new(gfb) d3 += d4 gf2 = d3.elements['GasFlow'][0] self.assertNotEqual(getattr(gf2._root._v_attrs, 'methods')[0], getattr(gf._root._v_attrs, 'methods')[0]) self.assertEqual(gf2.methods[0].name, gf.methods[0].name) self.assertEqual(gf2.methods[1].name, gf.methods[1].name) # ToDo: not sure what the _rids feature was there for # tmp = {} # tmp.update(d1._rids) # tmp.update(d2._rids) # self.assertTrue(tmp == d3._rids) # self.assertTrue(d3._tags == d1._tags + d2._tags) with self.assertRaises(AttributeError): d4 = d1 + rb # ToDo: also not sure what behaviour we expected from # the following line # d1 += d2 with self.assertRaises(ValueError): d1 += d1
def test_readabunch(self): """ Read in a whole day's worth of data including the reference spectra, the flux results, and the wind data. """ def keyfunc(fn): date = os.path.basename(fn).split('.')[0] year, month, day, hourmin = date.split('_') return datetime.datetime(int(year), int(month), int(day), int(hourmin[0:2]), int(hourmin[2:])) # Reference spectra fin_high = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_high.bin') fin_low = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_low.bin') fin_dark = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_dark.bin') fin_ref = os.path.join(self.data_dir, 'TOFP04', 'Cal_20170602_0956_ref.bin') bearing = 285. x = [521, 637, 692, 818] y = [305., 315., 319.5, 330.] f = interp1d(x, y, fill_value='extrapolate') xnew = list(range(0, 2048)) wavelengths = f(xnew) d = Dataset(tempfile.mktemp(), 'w') ib = InstrumentBuffer(location='Te Maari crater', type='FlySpec', name='TOFP04') inst = d.new(ib) tb = TargetBuffer(name='Upper Te Maari crater', position=[175.671854359, -39.107850505, 1505.]) t = d.new(tb) rdlist = [] for _k, _f in zip(['high', 'low', 'dark', 'ref'], [fin_high, fin_low, fin_dark, fin_ref]): e = d.read(_f, ftype='flyspecref', wavelengths=wavelengths, type=_k) rdtb = e['RawDataTypeBuffer'] rdt = d.new(rdtb) rb = e['RawDataBuffer'] rb.type = rdt rb.instrument = inst r = d.new(rb) rdlist.append(r) files = glob.glob(os.path.join(self.data_dir, 'TOFP04', '2017*.txt')) files = sorted(files, key=keyfunc) r = None c = None nlines = 0 last_index = 0 for _f in files: try: fin_bin = _f.replace('.txt', '.bin') with open(_f) as fd: nlines += len(fd.readlines()) e = d.read(_f, ftype='FLYSPEC', spectra=fin_bin, wavelengths=wavelengths, bearing=bearing, timeshift=12) if r is None and c is None: rdt = d.new(e['RawDataTypeBuffer']) rb = e['RawDataBuffer'] rb.type = rdt rb.instrument = inst rb.target = t r = d.new(rb) cb = e['ConcentrationBuffer'] rdlist.append(r) cb.rawdata = rdlist cb.rawdata_indices = np.arange(cb.value.shape[0]) last_index = cb.value.shape[0] - 1 c = d.new(cb) else: r.append(e['RawDataBuffer']) cb = e['ConcentrationBuffer'] cb.rawdata_indices = (last_index + 1 + np.arange(cb.value.shape[0])) last_index = last_index + cb.value.shape[0] c.append(cb) except Exception as ex: print((ex, _f, fin_bin)) continue # Check all data has been read self.assertEqual(c.rawdata[4].d_var.shape, (nlines, 2048)) self.assertEqual(c.rawdata[4].inc_angle.shape, (nlines, )) self.assertEqual(c.value[0], 119.93) self.assertEqual(c.value[-1], 23.30) self.assertEqual(c.rawdata[4].datetime[-1], np.datetime64('2017-06-14T04:30:00.535')) self.assertEqual(c.rawdata[4].datetime[0], np.datetime64('2017-06-13T20:30:49.512')) if False: with tempfile.TemporaryFile() as fd: plot(c, savefig=fd) expected_image = os.path.join(self.data_dir, 'TOFP04', 'concentration_plot.png') rms = self.compare_images(fd, expected_image) self.assertTrue(rms <= 0.001) with tempfile.TemporaryFile() as fd: plot(c.rawdata[0], savefig=fd) expected_image = os.path.join(self.data_dir, 'TOFP04', 'ref_spectrum.png') rms = self.compare_images(fd, expected_image) self.assertTrue(rms <= 0.001) fe = d.read(os.path.join(self.data_dir, 'TOFP04', 'TOFP04_2017_06_14.txt'), ftype='flyspecflux', timeshift=12) gf = d.read(os.path.join(self.data_dir, 'TOFP04', 'wind', '2017_06_14.txt'), ftype='flyspecwind', timeshift=12) fb = fe['FluxBuffer'] draw = r.datetime[:].astype('datetime64[us]') inds = [] for i in range(fb.value.shape[0]): d0 = fb.datetime[i].astype('datetime64[us]') idx0 = np.argmin(abs(draw - d0)) if i < fb.value.shape[0] - 1: d1 = fb.datetime[i + 1].astype('datetime64[us]') idx1 = np.argmin(abs(draw - d1)) # There is a small bug in Nial's program that gets # the start of the final scan in a file wrong if r.inc_angle[idx1 - 1] < r.inc_angle[idx1]: idx1 -= 1 fb.datetime[i + 1] = r.datetime[idx1] else: idx1 = r.datetime.shape[0] inds.append([idx0, idx1 - 1]) fb.concentration_indices = inds fb.concentration = c mb = fe['MethodBuffer'] m = d.new(mb) fb.method = m fb.gasflow = gf f = d.new(fb) nos = 18 i0, i1 = f.concentration_indices[nos] cn = f.concentration rn = cn.rawdata[4] self.assertAlmostEqual(f.value[nos], 0.62, 2) self.assertEqual(rn.inc_angle[i0], 25.) self.assertEqual(rn.inc_angle[i1], 150.) self.assertEqual(f.datetime[nos], np.datetime64('2017-06-13T21:20:17.196000')) pfb = PreferredFluxBuffer(fluxes=[f], flux_indices=[[nos]], value=[f.value[nos]], datetime=[f.datetime[nos]]) d.new(pfb)
def test_new(self): d = Dataset.new('FLYSPEC') s = Spectra(d.plugin, counts=np.zeros((1, 2048))) self.assertTrue(np.alltrue(s.counts < 1)) s.angle = np.array([45.0]) self.assertTrue(s.angle[0] == 45.0)
def test_readall(self): """ Produce a complete HDF5 file for 1 day of MiniDOAS analysis at one station. """ d = Dataset(tempfile.mktemp(), 'w') # ToDo: get correct plume coordinates tb = TargetBuffer(name='White Island main plume', target_id='WI001', position=[177.18375770, -37.52170799, 321.0]) t = d.new(tb) wpoptions = "{'Pixel316nm':479, 'TrimLower':30, 'LPFilterCount':3," wpoptions += "'MinWindSpeed':3, 'BrightEnough':500, 'BlueStep':5," wpoptions += "'MinR2:0.8, 'MaxFitCoeffError':50.0," wpoptions += "'InPlumeThresh':0.05, 'MinPlumeAngle':0.1," wpoptions += "'MaxPlumeAngle':3.0, 'MinPlumeSect':0.4," wpoptions += "'MaxPlumeSect':2.0, 'MeanPlumeCtrHeight':310," wpoptions += "'SEMeanPlumeCtrHeight':0.442, 'MaxRangeToPlume':5000," wpoptions += "'MaxPlumeWidth':2600, 'MaxPlumeCentreAltitude':2000," wpoptions += "'MaxRangeSeperation':5000, 'MaxAltSeperation':1000," wpoptions += "'MaxTimeDiff':30, 'MinTriLensAngle':0.1745," wpoptions += "'MaxTriLensAngle':2.9671, 'SEWindSpeed':0.20," wpoptions += "'WindMultiplier':1.24, 'SEWindDir':0.174}" mb1 = MethodBuffer(name='WidPro v1.2', description='Jscript wrapper for DOASIS', settings=wpoptions) m1 = d.new(mb1) # Read in the raw wind data; this is currently not needed to reproduce # flux estimates so it's just stored for reference fn_wd = os.path.join(self.data_dir, 'minidoas', 'wind', '20161101_WD_00.txt') fn_ws = os.path.join(self.data_dir, 'minidoas', 'wind', '20161101_WS_00.txt') e2 = d.read({'direction': fn_wd, 'speed': fn_ws}, ftype='minidoas-wind', timeshift=13) gfb = e2['GasFlowBuffer'] d.new(gfb) station_info = {} files = {'raw': os.path.join(self.data_dir, 'minidoas', 'NE_20161101.csv'), 'spectra': os.path.join(self.data_dir, 'minidoas', 'NE_2016_11_01_Spectra.csv'), 'flux_ah': os.path.join(self.data_dir, 'minidoas', 'NE_2016_11_01_Scans.csv'), 'flux_ch': os.path.join(self.data_dir, 'minidoas', 'XX_2016_11_01_Combined.csv'), 'fits_flux_ah': os.path.join(self.data_dir, 'minidoas', 'FITS_NE_20161101_ah.csv'), 'fits_flux_ch': os.path.join(self.data_dir, 'minidoas', 'FITS_NE_20161101_ch.csv')} station_info['WI301'] = {'files': files, 'stationID': 'WI301', 'stationLoc': 'White Island North-East Point', 'target': t, 'bearing': 6.0214, 'lon': 177.192979384, 'lat': -37.5166903535, 'elev': 49.0, 'widpro_method': m1, 'wp_station_id': 'NE'} files = {'raw': os.path.join(self.data_dir, 'minidoas', 'SR_20161101.csv'), 'spectra': os.path.join(self.data_dir, 'minidoas', 'SR_2016_11_01_Spectra.csv'), 'flux_ah': os.path.join(self.data_dir, 'minidoas', 'SR_2016_11_01_Scans.csv'), 'flux_ch': os.path.join(self.data_dir, 'minidoas', 'XX_2016_11_01_Combined.csv'), 'fits_flux_ah': os.path.join(self.data_dir, 'minidoas', 'FITS_SR_20161101_ah.csv'), 'fits_flux_ch': os.path.join(self.data_dir, 'minidoas', 'FITS_SR_20161101_ch.csv')} station_info['WI302'] = {'files': files, 'stationID': 'WI302', 'stationLoc': 'White Island South Rim', 'target': t, 'bearing': 3.8223, 'lon': 177.189013316, 'lat': -37.5265334424, 'elev': 96.0, 'widpro_method': m1, 'wp_station_id': 'SR'} self.read_single_station(d, station_info['WI301']) self.read_single_station(d, station_info['WI302']) d.close()