def setUpClass(cls) -> None: with contextlib.redirect_stdout(None): reader = api_reader.ApiReader( tests.TEST_DATA_DIR, False, ReadFilter(extensions={".rdvxm"}, station_ids={"0000000001"}), ) cls.apim_station = reader.get_station_by_id("0000000001")[0] reader = api_reader.ApiReader( tests.TEST_DATA_DIR, False, ReadFilter(extensions={".rdvxz"}, station_ids={"1637650010"}), ) cls.api900_station = reader.get_station_by_id("1637650010")[0]
def test_read_all_station_ids_no_match(self): api1000_dir = os.path.join(self.input_dir, "api1000") reader = api_reader.ApiReader(api1000_dir, True, ReadFilter(station_ids={"1000000900"})) result = reader.index_summary.total_packets() self.assertEqual(result, 0) result_by_id = reader.read_files_by_id("1000000900") self.assertIsNone(result_by_id)
def test_read_all_start_time_no_match(self): reader = api_reader.ApiReader( self.input_dir, True, ReadFilter(start_dt=dtu.datetime_from_epoch_seconds_utc(1700000000)), ) result = reader.index_summary.total_packets() self.assertEqual(result, 0)
def test_read_all_api900_in_structured_dir(self): reader = api_reader.ApiReader( self.input_dir, True, ReadFilter(extensions=self.api_900_extensions) ) result = reader.index_summary.total_packets() self.assertEqual(result, 2) result_by_id = reader.read_files_by_id("1000000900") self.assertEqual(len(result_by_id), 2) self.assertEqual( result_by_id[0].station_information.id, "1000000900" )
def test_read_all_station_ids(self): reader = api_reader.ApiReader( self.input_dir, True, ReadFilter(station_ids={"1000001000", "2000001000"}) ) result = reader.index_summary.total_packets() self.assertEqual(result, 2) result_by_id = reader.read_files_by_id("1000001000") self.assertEqual(len(result_by_id), 2) self.assertEqual( result_by_id[0].station_information.id, "1000001000" )
def test_filter_loop(self): filter_ids = ["1000000900", "1000001000", "2000000900"] final_result = 0 for f_id in filter_ids: reader = api_reader.ApiReader(self.input_dir, True, ReadFilter(station_ids={f_id})) result = reader.index_summary.total_packets() if result == 0: self.assertTrue("2000000900" in reader.filter.station_ids) continue # skip failed filter result self.assertEqual(result, 2) final_result += result self.assertEqual(final_result, 4)
def test_read_all_end_time(self): reader = api_reader.ApiReader( self.input_dir, True, ReadFilter(end_dt_buf=timedelta(seconds=30), end_dt=dtu.datetime_from_epoch_seconds_utc(1611696100)), ) result = reader.index_summary.total_packets() self.assertEqual(result, 2) result_by_id = reader.read_files_by_id("1000000900") self.assertEqual(len(result_by_id), 1) self.assertEqual( result_by_id[0].station_information.id, "1000000900" )
def setUpClass(cls) -> None: cls.input_dir = tests.TEST_DATA_DIR cls.station_filter = ReadFilter(station_ids={"0000000001"})
def setUpClass(cls) -> None: with contextlib.redirect_stdout(None): result = api_reader.ApiReader(tests.TEST_DATA_DIR, structured_dir=False, read_filter=ReadFilter(station_ids={"1637680001"})) cls.timesync = ts.TimeSync().from_raw_packets(result.read_files_by_id("1637680001"))
def setUpClass(cls) -> None: cls.apim_filter = ReadFilter(extensions={".rdvxm"}, station_ids={"0000000001"}) cls.api900_filter = ReadFilter(extensions={".rdvxz"}, station_ids={"1637650010"})
def _check_station_stats( self, request_filter: io.ReadFilter, pool: Optional[multiprocessing.pool.Pool] = None, ) -> io.Index: """ recursively check the filter's results; if resulting index has enough information in it, return it, otherwise search for more data. The filter should only request one station :param request_filter: filter representing the requested information :return: Index that includes as much information as possible that fits the request """ _pool: multiprocessing.pool.Pool = multiprocessing.Pool() if pool is None else pool index = self._apply_filter(request_filter) # if there are no restrictions on time or we found nothing, return the index if ( (not self.filter.start_dt and not self.filter.end_dt) or (not request_filter.start_dt and not request_filter.end_dt) or len(index.entries) < 1 ): return index stats = fs.extract_stats(index, pool=_pool) # Close pool if created here if pool is None: _pool.close() timing_offsets: Optional[offset_model.TimingOffsets] = offset_model.compute_offsets(stats) # punt if duration or other important values are invalid or if the latency array was empty if timing_offsets is None: return index # if our filtered files encompass the request even when the packet times are updated, return the index if (not self.filter.start_dt or timing_offsets.adjusted_start <= self.filter.start_dt) and ( not self.filter.end_dt or timing_offsets.adjusted_end >= self.filter.end_dt ): return index # we have to update our filter to get more information new_filter = request_filter.clone() no_more_start = True no_more_end = True # check if there is a packet just beyond the request times if self.filter.start_dt and timing_offsets.adjusted_start > self.filter.start_dt: beyond_start = ( self.filter.start_dt - np.abs(timing_offsets.start_offset) - stats[0].packet_duration ) start_filter = ( request_filter.clone() .with_start_dt(beyond_start) .with_end_dt(stats[0].packet_start_dt) .with_end_dt_buf(timedelta(seconds=0)) ) start_index = self._apply_filter(start_filter) # if the beyond check produces an earlier start date time, # then update filter, otherwise flag result as no more data to obtain if ( len(start_index.entries) > 0 and start_index.entries[0].date_time < index.entries[0].date_time ): new_filter.with_start_dt(beyond_start) no_more_start = False if self.filter.end_dt and timing_offsets.adjusted_end < self.filter.end_dt: beyond_end = self.filter.end_dt + np.abs(timing_offsets.end_offset) end_filter = ( request_filter.clone() .with_start_dt(stats[-1].packet_start_dt + stats[-1].packet_duration) .with_end_dt(beyond_end) .with_start_dt_buf(timedelta(seconds=0)) ) end_index = self._apply_filter(end_filter) # if the beyond check produces a later end date time, # then update filter, otherwise flag result as no more data to obtain if ( len(end_index.entries) > 0 and end_index.entries[-1].date_time > index.entries[-1].date_time ): new_filter.with_end_dt(beyond_end) no_more_end = False # if there is no more data to obtain from either end, return the original index if no_more_start and no_more_end: return index # check the updated index _pool = multiprocessing.Pool() if pool is None else pool ret = self._check_station_stats(new_filter, pool=_pool) if pool is None: _pool.close() return ret