def main(args): client = KronosClient(args.kronos_url) results = client.get(args.stream, args.start, args.end, namespace=args.namespace) if args.display == 'print': if args.type == 'json': events = [] for event in results: events.append(event) print json.dumps(events) elif args.type == 'one-per-line': for event in results: print event elif args.display == 'csv': writer = csv.DictWriter(sys.stdout, args.fields) if not args.remove_header: writer.writeheader() for event in results: row_values = {} for field in args.fields: field_value = get_property(event, field) row_values[field] = (field_value.encode('utf-8') if isinstance( field_value, unicode) else field_value) writer.writerow(row_values) elif args.display == 'aggregate': aggregates = aggregate_stream(results, AGGREGATORS[args.aggregator], args.field, args.time_bucket_width) print 'Bucket, Aggregate' for bucket, aggregate in aggregates: print '%s, %s' % (datetime.fromtimestamp(bucket), aggregate) else: raise Exception('Invalid display option {}'.format(args.display))
def main(args): client = KronosClient(args.kronos_url) results = client.get(args.stream, args.start, args.end, namespace=args.namespace) if args.display == 'print': if args.type == 'json': events = [] for event in results: events.append(event) print json.dumps(events) elif args.type == 'one-per-line': for event in results: print event elif args.display == 'csv': writer = csv.DictWriter(sys.stdout, args.fields) if not args.remove_header: writer.writeheader() for event in results: row_values = {} for field in args.fields: field_value = get_property(event, field) row_values[field] = (field_value.encode('utf-8') if isinstance(field_value, unicode) else field_value) writer.writerow(row_values) elif args.display == 'aggregate': aggregates = aggregate_stream(results, AGGREGATORS[args.aggregator], args.field, args.time_bucket_width) print 'Bucket, Aggregate' for bucket, aggregate in aggregates: print '%s, %s' % (datetime.fromtimestamp(bucket), aggregate) else: raise Exception('Invalid display option {}'.format(args.display))
def _stream_earliest_action(client, stream, start, end, fuzzy_time, last_user_action, user_id_mappings): """ Find users who advance to this step of the funnel. :returns: dictionary with user_action and stream_data. user_action is a dictionary of user ids and time of last action. This is for determining if events in subsequent streams occur after the current stream. stream_data is a dictionary of user ids and dictionary of output properties as specified in stream.output_fields. """ events = client.get(stream.stream_name, start, end) user_action = {} stream_data = {} for idx, event in enumerate(events): if idx % 10000 == 0: log.debug('...processed', idx, 'events') if stream.event_filter and not stream.event_filter(event): continue try: user = user_id_mappings[stream.user_field].get( event[stream.user_field]) except: log.error('Unable to get field %s on %s from %s', stream.user_field, stream.stream_name, event) last_time = last_user_action.get(user) event_time = event[TIMESTAMP_FIELD] # If we've seen an action from this user in the last stream, and # if they performed an action on the current stream (fuzzily) # after their last action, update their current user action time. if (user is not None and last_time is not None and ((last_time - fuzzy_time) < event_time)): user_action[user] = min(user_action.get(user, event_time), event_time) if stream.output_fields and not stream.invert: event_fields = {} for field in stream.output_fields: try: event_fields[field] = get_property(event, field) except KeyError: log.warn('Field %s does not appear in stream %s', field, stream.stream_name) stream_data[user] = event_fields # If stream results should be inverted, include all users that are NOT in # user_action, and use their timestamp from the previous step as the timestamp # of the current step. We can not use the timestamp for this stream, since they # may not have an event in this stream. if stream.invert: inverted_user_action = {} for user, timestamp in last_user_action.iteritems(): if user not in user_action: inverted_user_action[user] = timestamp return {'user_action': inverted_user_action, 'stream_data': {}} else: return {'user_action': user_action, 'stream_data': stream_data}
def aggregate_stream(events, aggregator_class, field, bucket_width): current_bucket, aggregator = None, None emitted = False for event in events: timestamp = kronos_time_to_epoch_time(event[TIMESTAMP_FIELD]) bucket = timestamp - (timestamp % bucket_width) if bucket != current_bucket: if current_bucket is not None: yield current_bucket, aggregator.aggregate() emitted = True current_bucket = bucket aggregator = aggregator_class() emitted = False aggregator.update(get_property(event, field)) if not emitted and current_bucket and aggregator: yield current_bucket, aggregator.aggregate()