Beispiel #1
0
def main(args):
    client = KronosClient(args.kronos_url)
    results = client.get(args.stream,
                         args.start,
                         args.end,
                         namespace=args.namespace)
    if args.display == 'print':
        if args.type == 'json':
            events = []
            for event in results:
                events.append(event)
            print json.dumps(events)
        elif args.type == 'one-per-line':
            for event in results:
                print event
    elif args.display == 'csv':
        writer = csv.DictWriter(sys.stdout, args.fields)
        if not args.remove_header:
            writer.writeheader()
        for event in results:
            row_values = {}
            for field in args.fields:
                field_value = get_property(event, field)
                row_values[field] = (field_value.encode('utf-8') if isinstance(
                    field_value, unicode) else field_value)
            writer.writerow(row_values)
    elif args.display == 'aggregate':
        aggregates = aggregate_stream(results, AGGREGATORS[args.aggregator],
                                      args.field, args.time_bucket_width)
        print 'Bucket, Aggregate'
        for bucket, aggregate in aggregates:
            print '%s, %s' % (datetime.fromtimestamp(bucket), aggregate)
    else:
        raise Exception('Invalid display option {}'.format(args.display))
def main(args):
  client = KronosClient(args.kronos_url)
  results = client.get(args.stream, args.start, args.end,
                       namespace=args.namespace)
  if args.display == 'print':
    if args.type == 'json':
      events = []
      for event in results:
        events.append(event)
      print json.dumps(events)
    elif args.type == 'one-per-line':
      for event in results:
        print event
  elif args.display == 'csv':
    writer = csv.DictWriter(sys.stdout, args.fields)
    if not args.remove_header:
      writer.writeheader()
    for event in results:
      row_values = {}
      for field in args.fields:
        field_value = get_property(event, field)
        row_values[field] = (field_value.encode('utf-8')
                             if isinstance(field_value, unicode)
                             else field_value)
      writer.writerow(row_values)
  elif args.display == 'aggregate':
    aggregates = aggregate_stream(results, AGGREGATORS[args.aggregator],
                                  args.field, args.time_bucket_width)
    print 'Bucket, Aggregate'
    for bucket, aggregate in aggregates:
      print '%s, %s' % (datetime.fromtimestamp(bucket), aggregate)
  else:
    raise Exception('Invalid display option {}'.format(args.display))
Beispiel #3
0
def _stream_earliest_action(client, stream, start, end, fuzzy_time,
                            last_user_action, user_id_mappings):
    """ Find users who advance to this step of the funnel.
  :returns: dictionary with user_action and stream_data. user_action is a
  dictionary of user ids and time of last action. This is for determining
  if events in subsequent streams occur after the current stream.
  stream_data is a dictionary of user ids and dictionary of output properties
  as specified in stream.output_fields.
  """
    events = client.get(stream.stream_name, start, end)
    user_action = {}
    stream_data = {}
    for idx, event in enumerate(events):
        if idx % 10000 == 0:
            log.debug('...processed', idx, 'events')
        if stream.event_filter and not stream.event_filter(event):
            continue
        try:
            user = user_id_mappings[stream.user_field].get(
                event[stream.user_field])
        except:
            log.error('Unable to get field %s on %s from %s',
                      stream.user_field, stream.stream_name, event)
        last_time = last_user_action.get(user)
        event_time = event[TIMESTAMP_FIELD]
        # If we've seen an action from this user in the last stream, and
        # if they performed an action on the current stream (fuzzily)
        # after their last action, update their current user action time.
        if (user is not None and last_time is not None
                and ((last_time - fuzzy_time) < event_time)):
            user_action[user] = min(user_action.get(user, event_time),
                                    event_time)
            if stream.output_fields and not stream.invert:
                event_fields = {}
                for field in stream.output_fields:
                    try:
                        event_fields[field] = get_property(event, field)
                    except KeyError:
                        log.warn('Field %s does not appear in stream %s',
                                 field, stream.stream_name)
                stream_data[user] = event_fields

    # If stream results should be inverted, include all users that are NOT in
    # user_action, and use their timestamp from the previous step as the timestamp
    # of the current step. We can not use the timestamp for this stream, since they
    # may not have an event in this stream.
    if stream.invert:
        inverted_user_action = {}
        for user, timestamp in last_user_action.iteritems():
            if user not in user_action:
                inverted_user_action[user] = timestamp
        return {'user_action': inverted_user_action, 'stream_data': {}}
    else:
        return {'user_action': user_action, 'stream_data': stream_data}
Beispiel #4
0
def aggregate_stream(events, aggregator_class, field, bucket_width):
  current_bucket, aggregator = None, None
  emitted = False
  for event in events:
    timestamp = kronos_time_to_epoch_time(event[TIMESTAMP_FIELD])
    bucket = timestamp - (timestamp % bucket_width)
    if bucket != current_bucket:
      if current_bucket is not None:
        yield current_bucket, aggregator.aggregate()
        emitted = True
      current_bucket = bucket
      aggregator = aggregator_class()
      emitted = False
    aggregator.update(get_property(event, field))
  if not emitted and current_bucket and aggregator:
    yield current_bucket, aggregator.aggregate()