コード例 #1
0
    def _raw_preprocess(self, number_per_slice):
        """Splits raw data into slices. keep start time of each slice in a json file.

        Args:
            number_per_slice: An int of records to keep for each slice.

        Returns:
            Error string if an error occurs, None if complete.
        """
        raw_slice_metadata = Metadata(
            self._preprocess_dir, strategy=None, level=RAW_LEVEL_DIR,
            bucket=self._preprocess_bucket)
        raw_data = RawDataProcessor(
            self._metadata['raw_file'], number_per_slice, self._raw_bucket)

        slice_index = 0
        raw_start_times = list()
        record_count = 0
        timespan_start = timespan_end = -1
        while raw_data.readable():
            slice_name = utils.get_slice_path(
                self._preprocess_dir, RAW_LEVEL_DIR, utils.get_slice_name(slice_index))
            print("Slice name: " + slice_name)
            level_slice = LevelSlice(
                slice_name, bucket=self._preprocess_bucket)
            raw_slice = raw_data.read_next_slice()
            print(raw_slice)
            if isinstance(raw_slice, str):
                return raw_slice
            level_slice.save(raw_slice)
            raw_start_times.append(raw_slice[0][0])

            slice_index += 1
            record_count += len(raw_slice)
            if timespan_start == -1:
                timespan_start = raw_slice[0][0]
            timespan_end = raw_slice[-1][0]

        self._metadata['raw_number'] = record_count
        self._metadata['start'] = timespan_start
        self._metadata['end'] = timespan_end

        levels, level_names = self._get_levels_metadata(
            record_count, timespan_end-timespan_start)
        self._metadata['levels']['names'] = level_names
        for name, level in zip(level_names, levels):
            self._metadata["levels"][name] = level
        for index, raw_slice_start in enumerate(raw_start_times):
            raw_slice_metadata[self._metadata['levels']
                               [RAW_LEVEL_DIR]['names'][index]] = raw_slice_start
        raw_slice_metadata.save()
        return None
    def test_save_parameter(self, test_records1):
        """Tests if parameter records saved."""
        tmpfile = NamedTemporaryFile()
        test_save_slice = LevelSlice(tmpfile.name)

        formatted_test_records = {test_records1[0][2]: test_records1}
        test_save_slice.save(test_records1)

        test_read_slice = LevelSlice(tmpfile.name)
        test_read_slice.read()
        assert test_read_slice._records == formatted_test_records

        tmpfile.close()
    def _single_level_downsample(self, strategy, prev_level, curr_level,
                                 level_metadata):
        """Downsamples for one single level.

        Args:
            strategy: A string representing a downsampling strategy.
            prev_level: A string of the name of the current level.
            curr_level: A string of the name of the previous level.
            level_metadata: A metadata object for this level.

        Returns:
            A dict of metadata for the current level.
        """
        curr_slice_names = self._metadata['levels'][curr_level]['names']
        prev_slice_names = self._metadata['levels'][prev_level]['names']

        slice_index = 0
        curr_slice_path = utils.get_slice_path(
            self._preprocess_dir, curr_level,
            utils.get_slice_name(slice_index), strategy)
        curr_level_slice = LevelSlice(curr_slice_path,
                                      bucket=self._preprocess_bucket)

        for prev_slice_name in prev_slice_names:
            prev_slice_path = utils.get_slice_path(self._preprocess_dir,
                                                   prev_level, prev_slice_name,
                                                   strategy)
            prev_level_slice = LevelSlice(prev_slice_path,
                                          bucket=self._preprocess_bucket)
            prev_level_slice.read()
            prev_level_downsample = prev_level_slice.downsample(
                strategy, self._downsample_level_factor)
            curr_level_slice.add_records(prev_level_downsample)
            if curr_level_slice.get_records_count() >= self._number_per_slice:
                curr_level_slice.save()
                level_metadata[curr_slice_names[
                    slice_index]] = curr_level_slice.get_first_timestamp()
                slice_index += 1
                curr_slice_path = utils.get_slice_path(
                    self._preprocess_dir, curr_level,
                    utils.get_slice_name(slice_index), strategy)
                curr_level_slice = LevelSlice(curr_slice_path,
                                              bucket=self._preprocess_bucket)

        curr_level_slice.save()
        level_metadata[curr_slice_names[
            slice_index]] = curr_level_slice.get_first_timestamp()
        return level_metadata