Ejemplo n.º 1
0
 def execute_order_by(self, node):
     events = sorted(
         self.execute(node.stream),
         key=lambda e: tuple(get_value(e, field) for field in node.fields))
     for i in (xrange(len(events) - 1, -1, -1) if node.order
               == node.ResultOrder.DESCENDING else xrange(len(events))):
         yield events[i]
Ejemplo n.º 2
0
 def map(event):
     new_event = {}
     for key, value in event.iteritems():
         new_event['%s.%s' % (alias, key)] = value
     key = json.dumps(
         [get_value(new_event, value) for value in key_values])
     return (key, new_event)
Ejemplo n.º 3
0
 def map_func(self, event):
     if self.merge:
         new_event = event
     else:
         new_event = {}
     for field in self.fields:
         new_event[field.alias] = get_value(event, field)
     return new_event
Ejemplo n.º 4
0
 def map_func(self, event):
   if self.merge:
     new_event = event
   else:
     new_event = {}
   for field in self.fields:
     new_event[field.alias] = get_value(event, field)
   return new_event
Ejemplo n.º 5
0
 def execute_order_by(self, node):
   events = sorted(self.execute(node.source),
                   key=lambda e: tuple(get_value(e, field)
                                       for field in node.fields))
   for i in (xrange(len(events) - 1, -1, -1)
             if node.order == node.ResultOrder.DESCENDING else
             xrange(len(events))):
     yield events[i]
Ejemplo n.º 6
0
 def project(event):
   if node.merge:
     new_event = deepcopy(event)
   else:
     new_event = {}
   for field in node.fields:
     new_event[field.alias] = get_value(event, field)
   return new_event
Ejemplo n.º 7
0
 def project(event):
     if node.merge:
         new_event = deepcopy(event)
     else:
         new_event = {}
     for field in node.fields:
         new_event[field.alias] = get_value(event, field)
     return new_event
Ejemplo n.º 8
0
 def group_func(self, event):
     new_event = {
         value.alias: get_value(event, value)
         for value in self.group_by.values
     }
     key = json.dumps(new_event, sort_keys=True)
     for aggregate in self.aggregates:
         arguments = aggregate.arguments
         if aggregate.op == Aggregator.Op.COUNT:
             if not len(arguments):
                 value = 1
             else:
                 value = 0 if get_value(event, arguments[0]) is None else 1
         elif aggregate.op == Aggregator.Op.SUM:
             value = cast_to_number(get_value(event, arguments[0]), 0)
         elif aggregate.op == Aggregator.Op.MIN:
             value = cast_to_number(get_value(event, arguments[0]),
                                    float('inf'))
         elif aggregate.op == Aggregator.Op.MAX:
             value = cast_to_number(get_value(event, arguments[0]),
                                    -float('inf'))
         elif aggregate.op == Aggregator.Op.AVG:
             value = cast_to_number(get_value(event, arguments[0]), None)
             if value is None:
                 value = (0, 0)
             else:
                 value = (value, 1)
         new_event[aggregate.alias] = value
     return key, new_event
Ejemplo n.º 9
0
 def group_func(self, event):
   new_event = {value.alias: get_value(event, value)
                for value in self.group_by.values}
   key = json.dumps(new_event, sort_keys=True)
   for aggregate in self.aggregates:
     arguments = aggregate.arguments
     if aggregate.op == Aggregator.Op.COUNT:
       if not len(arguments):
         value = 1
       else:
         value = 0 if get_value(event, arguments[0]) is None else 1
     elif aggregate.op == Aggregator.Op.SUM:
       value = cast_to_number(get_value(event, arguments[0]), 0)
     elif aggregate.op == Aggregator.Op.MIN:
       value = cast_to_number(get_value(event, arguments[0]), float('inf'))
     elif aggregate.op == Aggregator.Op.MAX:
       value = cast_to_number(get_value(event, arguments[0]), -float('inf'))
     elif aggregate.op == Aggregator.Op.AVG:
       value = cast_to_number(get_value(event, arguments[0]), None)
       if value is None:
         value = (0, 0)
       else:
         value = (value, 1)
     new_event[aggregate.alias] = value
   return key, new_event
Ejemplo n.º 10
0
 def group(event):
   # `key` can only be strings in Spark if you want to use `reduceByKey`.
   new_event = {value.alias: get_value(event, value)
                for value in node.group_by.values}
   key = json.dumps(new_event, sort_keys=True)
   for aggregate in node.aggregates:
     arguments = aggregate.arguments
     if aggregate.op == Aggregator.Op.COUNT:
       if not len(arguments):
         value = 1
       else:
         value = 0 if get_value(event, arguments[0]) is None else 1
     elif aggregate.op == Aggregator.Op.SUM:
       value = cast_to_number(get_value(event, arguments[0]), 0)
     elif aggregate.op == Aggregator.Op.MIN:
       value = cast_to_number(get_value(event, arguments[0]), float('inf'))
     elif aggregate.op == Aggregator.Op.MAX:
       value = cast_to_number(get_value(event, arguments[0]), -float('inf'))
     elif aggregate.op == Aggregator.Op.AVG:
       value = cast_to_number(get_value(event, arguments[0]), None)
       if value is None:
         value = (0, 0)
       else:
         value = (value, 1)
     new_event[aggregate.alias] = value
   return (key, new_event)
Ejemplo n.º 11
0
 def execute_order_by(self, node):
   return (self.execute(node.stream)
           .keyBy(lambda e: tuple(get_value(e, field)
                                  for field in node.fields))
           .sortByKey(ascending=not node.reverse)
           .map(lambda e: e[1]))
Ejemplo n.º 12
0
 def map(event):
   new_event = {}
   for key, value in event.iteritems():
     new_event['%s.%s' % (alias, key)] = value
   key = json.dumps([get_value(new_event, value) for value in key_values])
   return (key, new_event)
Ejemplo n.º 13
0
 def execute_order_by(self, node):
     return (self.execute(node.source).keyBy(lambda e: tuple(
         get_value(e, field) for field in node.fields)).sortByKey(
             ascending=node.order == node.ResultOrder.ASCENDING).map(
                 lambda e: e[1]))
Ejemplo n.º 14
0
 def execute_order_by(self, node):
   return (self.execute(node.source)
           .keyBy(lambda e: tuple(get_value(e, field)
                                  for field in node.fields))
           .sortByKey(ascending=node.order == node.ResultOrder.ASCENDING)
           .map(lambda e: e[1]))