Esempio n. 1
0
def find_last_sample_timestamp(pv_name, out_dir, gran, delimiters):
    # Get the directory path and the prefix of data files.
    dir_path, file_prefix = pb_filepath.get_dir_and_prefix(out_dir, delimiters, pv_name)
    
    # Collect the time suffixes of existing data files for this PV.
    try:
        time_suffixes = list(pb_filepath.filter_filenames(os.listdir(dir_path), file_prefix))
    except OSError as e:
        if e.errno == errno.ENOENT:
            time_suffixes = []
        else:
            raise
    
    # Split time suffixes into integer components, but keep the original suffixes around.
    time_suffixes = map(lambda x: {'suffix':x, 'ints':map(int, x.split('_'))}, time_suffixes)
    
    # Sanity check numer of components.
    num_comps = gran.suffix_count()
    for x in time_suffixes:
        if len(x['ints']) != num_comps:
            raise FindLastSampleError(('Unexpected number of time suffix components: {0}. '
                                      'You are trying to export data using a different time granularity '
                                      'than it was used for already exported data.').format(x['suffix']))
    
    # Sort suffixes.
    time_suffixes = sorted(time_suffixes, key=lambda x: x['ints'])
    
    # Have no suffixes? Then there are no samples at all.
    if len(time_suffixes) == 0:
        return None
    
    for suffix in reversed(time_suffixes):
        # Make the file path.
        file_path = pb_filepath.get_path_for_suffix(out_dir, delimiters, pv_name, suffix['suffix'])
        
        # Go through this file.
        with open(file_path, 'rb') as stream:
            results = pb_verify.verify_stream(stream, pv_name=pv_name)
        
        # If any samples were found in this file, the last timestamp in the
        # file is what we're looking for. Else continue looking into the previous file.
        if results['last_timestamp'] is not None:
            year = results['year']
            secondsintoyear, nano = results['last_timestamp']
            return (year, secondsintoyear, nano)
    
    # No samples found in any file.
    return None
Esempio n. 2
0
    def write_sample(self, sample_pb, dt_seconds, nanoseconds, pb_type):
        """ Determines the appropriate file for the sample (based on the timestamp) and 
        writes the given sample into a file."""
        # Extract the number of seconds into the year. This should be exact.
        td = dt_seconds - datetime.datetime(dt_seconds.year, 1, 1)
        into_year_sec_fp = td.seconds + td.days * 24 * 3600
        into_year_sec = int(into_year_sec_fp)
        sample_ts = (into_year_sec, nanoseconds)

        # Ignore sample if requested by the lower bound.
        if self._ignore_ts_start is not None:
            if (dt_seconds.year, into_year_sec, nanoseconds) <= self._ignore_ts_start:
                self._pvlog.ignored_initial_sample()
                return

        # Write timestamp to sample.
        sample_pb.secondsintoyear, sample_pb.nano = sample_ts

        # Serialize sample.
        sample_serialized = sample_pb.SerializeToString()

        # If this sample does not belong to the currently opened file, close the file.
        # Note that it's ok to use dt_seconds here since we don't support sub-second granularity.
        # Same goes for the get_segment_for_time call below.
        if self._cur_file is not None and not (self._cur_start <= dt_seconds < self._cur_end):
            self._cur_file.close()
            self._cur_file = None

        # Need to open a file?
        if self._cur_file is None:
            # Determine the segment for this sample.
            segment = self._gran.get_segment_for_time(dt_seconds)
            self._cur_start = segment.start_time()
            self._cur_end = segment.next_segment().start_time()

            # Sanity check the segment bounds.
            assert self._cur_start <= dt_seconds < self._cur_end

            # Determine the path of the file.
            self._cur_path = pb_filepath.get_path_for_suffix(
                self._out_dir, self._delimiters, self._pv_name, segment.file_suffix()
            )
            pb_filepath.make_sure_path_exists(os.path.dirname(self._cur_path))

            self._pvlog.info("File: {0}".format(self._cur_path))

            # Open file. This creates the file if it does not exist,
            # and the the cursor is set to the *end*.
            self._cur_file = open(self._cur_path, "a+b")

            # Seek to the beginning.
            self._cur_file.seek(0, 0)

            # We fail if we found samples newer than this one in the file.
            upper_ts_bound = sample_ts

            # Verify any existing contents of the file.
            try:
                pb_verify.verify_stream(
                    self._cur_file,
                    pb_type=pb_type,
                    pv_name=self._pv_name,
                    year=dt_seconds.year,
                    upper_ts_bound=upper_ts_bound,
                )

            except pb_verify.VerificationError as e:
                self._pvlog.error("Verification failed: {0}: {1}".format(self._cur_path, e))
                self._cur_file.close()
                self._cur_file = None
                return
                # raise AppenderError('Verification failed: {0}: {1}'.format(self._cur_path, e))

            except pb_verify.EmptyFileError:
                # Build header.
                header_pb = pbt.PayloadInfo()
                header_pb.type = pb_type
                header_pb.pvname = self._pv_name
                header_pb.year = dt_seconds.year

                # Write header. Note that since there was no header we are still at the start of the file.
                self._cur_file.write(pb_escape.escape_line(header_pb.SerializeToString()))

        # Finally write the sample.
        self._cur_file.write(pb_escape.escape_line(sample_serialized))

        self._pvlog.archived_sample()