def test_counter(self): with open('./tests/series.ndjson', 'rb') as f: ll = list( get_jsons_from_stream(stream=f, object_name='series.ndjson')) self.assertEqual(15, len(ll)) for rec in ll: _, measurement, _ = split_record(rec) station_id = self.smdao.store_from_json(rec) series_id = self.dao.store( station_id=station_id, parameter=measurement['parameter'], unit=measurement['unit'], averagingPeriod=f"{measurement['averagingPeriod']}") mes_id = self.mdao.store(series_id=series_id, value=measurement['value'], date=measurement['date']['utc']) mes = self.mdao.get_all() self.assertEqual(15, len(mes)) self.assertEqual(self.mdao.count(), 15) self.assertEqual(4, self.smdao.count()) self.assertEqual(6, self.dao.count()) self.assertEqual(4, self.mdao.count(series_id=1)) self.assertEqual(0, self.mdao.count(series_id=838232)) self.assertEqual(0, self.dao.count(station_id=1212))
def update_last(**kwargs): prefix = get_prefix(**kwargs) target_dir = os.path.join(Variable.get('target_dir'), prefix) logging.info(f'Will be processing [{ target_dir }]') flist = list_directory(target_dir) logging.info(f'Files detected: { len(flist)}') previous_run = kwargs['prev_execution_date'] next_run = kwargs['next_execution_date'] filtered_list = filter_file_list( flist=flist, previous_run=previous_run, next_run=next_run) logging.info(f'Previous run was @{previous_run}, next will be @{next_run}. File list reduced to: {len(filtered_list)}') station_dao, series_dao, mes_dao = setup_daos() m = 0 for fname in filtered_list: logging.info(f'Analyzing { fname}') with open(fname, 'rb') as f: for record in get_jsons_from_stream(stream=f, object_name=fname): station, measurement, _ = split_record(record) m += 1 add_to_db(station_dao, series_dao, mes_dao, station=station, measurement=measurement) logging.info(f'Number of measurements added to DB: {m}') print_db_stats(station_dao, series_dao, mes_dao) return True
def test_get_forstation(self): with open('./tests/series.ndjson', 'rb') as f: ll = list( get_jsons_from_stream(stream=f, object_name='series.ndjson')) self.assertEqual(15, len(ll)) for rec in ll: _, measurement, _ = split_record(rec) station_id = self.smdao.store_from_json(rec) series_id = self.dao.store( station_id=station_id, parameter=measurement['parameter'], unit=measurement['unit'], averagingPeriod=f"{measurement['averagingPeriod']}") mes_id = self.mdao.store(series_id=series_id, value=measurement['value'], date=measurement['date']['utc']) mes = self.mdao.get_all() self.assertEqual(15, len(mes)) only_one = self.dao.get_all_for_station(station_id="Nisekh") self.assertEqual(1, len(only_one)) more_than_one = self.dao.get_all_for_station( station_id="Sankt Eriksgatan") self.assertEqual(3, len(more_than_one)) my_series = self.dao.get_for_id(series_id=1) self.assertEqual(my_series[2], 'pm10')
def test_multiple_inserts(self): with open('./tests/series.ndjson', 'rb') as f: ll = list( get_jsons_from_stream(stream=f, object_name='series.ndjson')) self.assertEqual(len(ll), 15) for rec in ll: station, measurement, _ = split_record(rec) station_id = self.smdao.store_from_json(rec) series_id = self.dao.store( station_id=station_id, parameter=measurement['parameter'], unit=measurement['unit'], averagingPeriod=f"{measurement['averagingPeriod']}") mes_id = self.mdao.store(series_id=series_id, value=measurement['value'], date=measurement['date']['utc']) stations = self.smdao.get_all() self.assertEqual(len(stations), 4) series = self.dao.get_all() self.assertEqual(len(series), 6) mes = self.mdao.get_all() self.assertEqual(len(mes), 15)
def test_get_jsons_from_gzipped_stream(self): with open('./tests/2014-03-30.ndjson.gz', 'rb') as f: ll = list( get_jsons_from_stream( stream=f, object_name='./tests/2014-03-30.ndjson.gz')) self.assertEqual(64, len(ll))
def test_get_jsons_from_stream(self): with open('./tests/exobj.ndjson', 'rb') as f: ll = list( get_jsons_from_stream(stream=f, object_name='./tests/exobj.ndjson')) self.assertEqual(7444, len(ll))
def go_through(**kwargs): prefix = get_prefix(**kwargs) target_dir = os.path.join(Variable.get('target_dir'), prefix) logging.info(f'Will be processing [{ target_dir }]') flist = glob.glob(os.path.join(target_dir, '*')) logging.info(f'Files detected: { len(flist)}') station_dao, series_dao, mes_dao = setup_daos() for fname in flist: logging.info(f'Processing { fname}') with open(fname, 'rb') as f: for record in get_jsons_from_stream(stream=f, object_name=fname): station, measurement, _ = split_record(record) add_to_db(station_dao, series_dao, mes_dao, station=station, measurement=measurement) print_db_stats(station_dao, series_dao, mes_dao) return True