def test_read(self, test_records1): """Tests if read records same as saved ones.""" tmpfile = self.write_to_tmpfile(test_records1) test_slice = LevelSlice(tmpfile.name) test_slice.read() assert test_slice._records['PPX_ASYS'] == test_records1 tmpfile.close()
def test_empty_read(self): """Tests read behavior when none filename is supplied.""" tmpfile = self.write_to_tmpfile([]) test_slice = LevelSlice(tmpfile.name) assert test_slice._records == {} test_slice.read() assert test_slice._records == {}
def test_get_start(self, test_records1): """Tests start time is earliest in all records.""" tmpfile = self.write_to_tmpfile(test_records1) test_slice = LevelSlice(tmpfile.name) assert test_slice._start == -1 test_slice.read() assert test_slice.get_first_timestamp() == test_records1[0][0] tmpfile.close()
def test_downsample_factor(self, test_records1, strategy, factor): """Tests if right downsample strategy is applied, using downsample factor.""" tmpfile = self.write_to_tmpfile(test_records1) test_slice = LevelSlice(tmpfile.name) test_slice.read() downsampled = test_slice.downsample(strategy, factor) assert downsampled['PPX_ASYS'] == strategy_reducer( test_records1, strategy, factor) tmpfile.close()
def _raw_preprocess(self, number_per_slice): """Splits raw data into slices. keep start time of each slice in a json file. Args: number_per_slice: An int of records to keep for each slice. Returns: Error string if an error occurs, None if complete. """ raw_slice_metadata = Metadata( self._preprocess_dir, strategy=None, level=RAW_LEVEL_DIR, bucket=self._preprocess_bucket) raw_data = RawDataProcessor( self._metadata['raw_file'], number_per_slice, self._raw_bucket) slice_index = 0 raw_start_times = list() record_count = 0 timespan_start = timespan_end = -1 while raw_data.readable(): slice_name = utils.get_slice_path( self._preprocess_dir, RAW_LEVEL_DIR, utils.get_slice_name(slice_index)) print("Slice name: " + slice_name) level_slice = LevelSlice( slice_name, bucket=self._preprocess_bucket) raw_slice = raw_data.read_next_slice() print(raw_slice) if isinstance(raw_slice, str): return raw_slice level_slice.save(raw_slice) raw_start_times.append(raw_slice[0][0]) slice_index += 1 record_count += len(raw_slice) if timespan_start == -1: timespan_start = raw_slice[0][0] timespan_end = raw_slice[-1][0] self._metadata['raw_number'] = record_count self._metadata['start'] = timespan_start self._metadata['end'] = timespan_end levels, level_names = self._get_levels_metadata( record_count, timespan_end-timespan_start) self._metadata['levels']['names'] = level_names for name, level in zip(level_names, levels): self._metadata["levels"][name] = level for index, raw_slice_start in enumerate(raw_start_times): raw_slice_metadata[self._metadata['levels'] [RAW_LEVEL_DIR]['names'][index]] = raw_slice_start raw_slice_metadata.save() return None
def test_add_records_multi_channel(self, test_records1, test_records2): """Tests if right records added on calling add_records, add multiple channels.""" formatted_test_records = { test_records1[0][2]: test_records1, test_records2[0][2]: test_records2, } test_slice = LevelSlice('dummy') assert test_slice._records == {} expected_test_records = formatted_test_records test_slice.add_records(formatted_test_records) assert test_slice._records == expected_test_records
def test_downsample_max_records(self, test_records1, strategy, max_records): """Tests if right downsample strategy is applied, using max records.""" tmpfile = self.write_to_tmpfile(test_records1) test_slice = LevelSlice(tmpfile.name) test_slice.read() downsampled = test_slice.downsample(strategy, max_records=max_records) downsample_factor = ceil(len(test_records1) / max_records) assert downsampled['PPX_ASYS'] == strategy_reducer( test_records1, strategy, downsample_factor) tmpfile.close()
def test_save_member(self, test_records1): """Tests if object member records saved.""" tmpfile = NamedTemporaryFile() test_save_slice = LevelSlice(tmpfile.name) formatted_test_records = {test_records1[0][2]: test_records1} test_save_slice.add_records(formatted_test_records) test_save_slice.save() test_read_slice = LevelSlice(tmpfile.name) test_read_slice.read() assert test_read_slice._records == formatted_test_records tmpfile.close()
def test_add_records_single_channel(self, test_records1): """Tests if right records added on calling add_records, add single channel.""" formatted_test_records = {test_records1[0][2]: test_records1} test_slice = LevelSlice('dummy') assert test_slice._records == {} expected_test_records = formatted_test_records test_slice.add_records(formatted_test_records) assert test_slice._records == expected_test_records expected_test_records = { test_records1[0][2]: test_records1 + test_records1 } test_slice.add_records(formatted_test_records) assert test_slice._records == expected_test_records
def _single_level_downsample(self, strategy, prev_level, curr_level, level_metadata): """Downsamples for one single level. Args: strategy: A string representing a downsampling strategy. prev_level: A string of the name of the current level. curr_level: A string of the name of the previous level. level_metadata: A metadata object for this level. Returns: A dict of metadata for the current level. """ curr_slice_names = self._metadata['levels'][curr_level]['names'] prev_slice_names = self._metadata['levels'][prev_level]['names'] slice_index = 0 curr_slice_path = utils.get_slice_path(self._preprocess_dir, curr_level, utils.get_slice_name( slice_index), strategy) curr_level_slice = LevelSlice( curr_slice_path, bucket=self._preprocess_bucket) for prev_slice_name in prev_slice_names: prev_slice_path = utils.get_slice_path(self._preprocess_dir, prev_level, prev_slice_name, strategy) prev_level_slice = LevelSlice( prev_slice_path, bucket=self._preprocess_bucket) prev_level_slice.read() prev_level_downsample = prev_level_slice.downsample( strategy, self._downsample_level_factor) curr_level_slice.add_records(prev_level_downsample) if curr_level_slice.get_records_count() >= self._number_per_slice: curr_level_slice.save() level_metadata[curr_slice_names [slice_index]] = curr_level_slice.get_first_timestamp() slice_index += 1 curr_slice_path = utils.get_slice_path(self._preprocess_dir, curr_level, utils.get_slice_name( slice_index), strategy) curr_level_slice = LevelSlice( curr_slice_path, bucket=self._preprocess_bucket) curr_level_slice.save() level_metadata[curr_slice_names [slice_index]] = curr_level_slice.get_first_timestamp() return level_metadata