def process_file(events_file, from_timestamp=None): clear_records_file() if from_timestamp is not None: from_time = ztreamy.parse_timestamp(from_timestamp) else: from_time = 0 records = [] num_events = 0 num_records = 0 for event in read_file(events_file): num_events += 1 new_records = extract_data(event) new_records = [r for r in extract_data(event) \ if r.time >= from_time] records.extend(new_records) if num_events % 10000 == 0: logging.info('Read {} events into {} records'\ .format(num_events, len(records) + num_records)) if len(records) > 3000000: logging.info('Sorting {} records'.format(len(records))) records.sort(key=lambda x: x.time) write_records(records[:1500000]) del records[:1500000] num_records += 1500000 logging.info('Read {} events into {} records'.format(num_events, len(records))) logging.info('Sorting {} records'.format(len(records))) records.sort(key=lambda x: x.time) write_records(records)
def process_file(events_file, from_timestamp=None): clear_records_file() if from_timestamp is not None: from_time = ztreamy.parse_timestamp(from_timestamp) else: from_time = 0 records = [] num_events = 0 num_records = 0 for event in read_file(events_file): num_events += 1 new_records = extract_data(event) new_records = [r for r in extract_data(event) \ if r.time >= from_time] records.extend(new_records) if num_events % 10000 == 0: logging.info('Read {} events into {} records'\ .format(num_events, len(records) + num_records)) if len(records) > 3000000: logging.info('Sorting {} records'.format(len(records))) records.sort(key=lambda x: x.time) write_records(records[:1500000]) del records[:1500000] num_records += 1500000 logging.info('Read {} events into {} records'.format( num_events, len(records))) logging.info('Sorting {} records'.format(len(records))) records.sort(key=lambda x: x.time) write_records(records)
def time(self): """Event timestamp as a number of seconds since the epoch. The number of seconds is UTC-based. """ if self._time is None: self._time = ztreamy.parse_timestamp(self.timestamp) return self._time
def randomize_timestamps(events, interval_duration): current_time = ztreamy.parse_timestamp(events[0].timestamp) time_max = current_time + interval_duration - 1 exp_rate = 1.3 * len(events) / interval_duration for event in events: current_time += random.expovariate(exp_rate) timestamp = int(current_time) if timestamp > time_max: timestamp = time_max event.timestamp = ztreamy.get_timestamp(date=timestamp)
def time(self): if self._time is None: self._time = ztreamy.parse_timestamp(self.timestamp, default_tz=self.default_tz) return self._time