def process_file(events_file, from_timestamp=None):
    clear_records_file()
    if from_timestamp is not None:
        from_time = ztreamy.parse_timestamp(from_timestamp)
    else:
        from_time = 0
    records = []
    num_events = 0
    num_records = 0
    for event in read_file(events_file):
        num_events += 1
        new_records = extract_data(event)
        new_records = [r for r in extract_data(event) \
                       if r.time >= from_time]
        records.extend(new_records)
        if num_events % 10000 == 0:
            logging.info('Read {} events into {} records'\
                         .format(num_events, len(records) + num_records))
            if len(records) > 3000000:
                logging.info('Sorting {} records'.format(len(records)))
                records.sort(key=lambda x: x.time)
                write_records(records[:1500000])
                del records[:1500000]
                num_records += 1500000
    logging.info('Read {} events into {} records'.format(num_events,
                                                       len(records)))
    logging.info('Sorting {} records'.format(len(records)))
    records.sort(key=lambda x: x.time)
    write_records(records)
Example #2
0
def process_file(events_file, from_timestamp=None):
    clear_records_file()
    if from_timestamp is not None:
        from_time = ztreamy.parse_timestamp(from_timestamp)
    else:
        from_time = 0
    records = []
    num_events = 0
    num_records = 0
    for event in read_file(events_file):
        num_events += 1
        new_records = extract_data(event)
        new_records = [r for r in extract_data(event) \
                       if r.time >= from_time]
        records.extend(new_records)
        if num_events % 10000 == 0:
            logging.info('Read {} events into {} records'\
                         .format(num_events, len(records) + num_records))
            if len(records) > 3000000:
                logging.info('Sorting {} records'.format(len(records)))
                records.sort(key=lambda x: x.time)
                write_records(records[:1500000])
                del records[:1500000]
                num_records += 1500000
    logging.info('Read {} events into {} records'.format(
        num_events, len(records)))
    logging.info('Sorting {} records'.format(len(records)))
    records.sort(key=lambda x: x.time)
    write_records(records)
Example #3
0
    def time(self):
        """Event timestamp as a number of seconds since the epoch.

        The number of seconds is UTC-based.

        """
        if self._time is None:
            self._time = ztreamy.parse_timestamp(self.timestamp)
        return self._time
Example #4
0
def randomize_timestamps(events, interval_duration):
    current_time = ztreamy.parse_timestamp(events[0].timestamp)
    time_max = current_time + interval_duration - 1
    exp_rate = 1.3 * len(events) / interval_duration
    for event in events:
        current_time += random.expovariate(exp_rate)
        timestamp = int(current_time)
        if timestamp > time_max:
            timestamp = time_max
        event.timestamp = ztreamy.get_timestamp(date=timestamp)
 def time(self):
     if self._time is None:
         self._time = ztreamy.parse_timestamp(self.timestamp,
                                              default_tz=self.default_tz)
     return self._time
Example #6
0
 def time(self):
     if self._time is None:
         self._time = ztreamy.parse_timestamp(self.timestamp,
                                              default_tz=self.default_tz)
     return self._time