def execute_order_by(self, node): events = sorted( self.execute(node.stream), key=lambda e: tuple(get_value(e, field) for field in node.fields)) for i in (xrange(len(events) - 1, -1, -1) if node.order == node.ResultOrder.DESCENDING else xrange(len(events))): yield events[i]
def map(event): new_event = {} for key, value in event.iteritems(): new_event['%s.%s' % (alias, key)] = value key = json.dumps( [get_value(new_event, value) for value in key_values]) return (key, new_event)
def map_func(self, event): if self.merge: new_event = event else: new_event = {} for field in self.fields: new_event[field.alias] = get_value(event, field) return new_event
def execute_order_by(self, node): events = sorted(self.execute(node.source), key=lambda e: tuple(get_value(e, field) for field in node.fields)) for i in (xrange(len(events) - 1, -1, -1) if node.order == node.ResultOrder.DESCENDING else xrange(len(events))): yield events[i]
def project(event): if node.merge: new_event = deepcopy(event) else: new_event = {} for field in node.fields: new_event[field.alias] = get_value(event, field) return new_event
def group_func(self, event): new_event = { value.alias: get_value(event, value) for value in self.group_by.values } key = json.dumps(new_event, sort_keys=True) for aggregate in self.aggregates: arguments = aggregate.arguments if aggregate.op == Aggregator.Op.COUNT: if not len(arguments): value = 1 else: value = 0 if get_value(event, arguments[0]) is None else 1 elif aggregate.op == Aggregator.Op.SUM: value = cast_to_number(get_value(event, arguments[0]), 0) elif aggregate.op == Aggregator.Op.MIN: value = cast_to_number(get_value(event, arguments[0]), float('inf')) elif aggregate.op == Aggregator.Op.MAX: value = cast_to_number(get_value(event, arguments[0]), -float('inf')) elif aggregate.op == Aggregator.Op.AVG: value = cast_to_number(get_value(event, arguments[0]), None) if value is None: value = (0, 0) else: value = (value, 1) new_event[aggregate.alias] = value return key, new_event
def group_func(self, event): new_event = {value.alias: get_value(event, value) for value in self.group_by.values} key = json.dumps(new_event, sort_keys=True) for aggregate in self.aggregates: arguments = aggregate.arguments if aggregate.op == Aggregator.Op.COUNT: if not len(arguments): value = 1 else: value = 0 if get_value(event, arguments[0]) is None else 1 elif aggregate.op == Aggregator.Op.SUM: value = cast_to_number(get_value(event, arguments[0]), 0) elif aggregate.op == Aggregator.Op.MIN: value = cast_to_number(get_value(event, arguments[0]), float('inf')) elif aggregate.op == Aggregator.Op.MAX: value = cast_to_number(get_value(event, arguments[0]), -float('inf')) elif aggregate.op == Aggregator.Op.AVG: value = cast_to_number(get_value(event, arguments[0]), None) if value is None: value = (0, 0) else: value = (value, 1) new_event[aggregate.alias] = value return key, new_event
def group(event): # `key` can only be strings in Spark if you want to use `reduceByKey`. new_event = {value.alias: get_value(event, value) for value in node.group_by.values} key = json.dumps(new_event, sort_keys=True) for aggregate in node.aggregates: arguments = aggregate.arguments if aggregate.op == Aggregator.Op.COUNT: if not len(arguments): value = 1 else: value = 0 if get_value(event, arguments[0]) is None else 1 elif aggregate.op == Aggregator.Op.SUM: value = cast_to_number(get_value(event, arguments[0]), 0) elif aggregate.op == Aggregator.Op.MIN: value = cast_to_number(get_value(event, arguments[0]), float('inf')) elif aggregate.op == Aggregator.Op.MAX: value = cast_to_number(get_value(event, arguments[0]), -float('inf')) elif aggregate.op == Aggregator.Op.AVG: value = cast_to_number(get_value(event, arguments[0]), None) if value is None: value = (0, 0) else: value = (value, 1) new_event[aggregate.alias] = value return (key, new_event)
def execute_order_by(self, node): return (self.execute(node.stream) .keyBy(lambda e: tuple(get_value(e, field) for field in node.fields)) .sortByKey(ascending=not node.reverse) .map(lambda e: e[1]))
def map(event): new_event = {} for key, value in event.iteritems(): new_event['%s.%s' % (alias, key)] = value key = json.dumps([get_value(new_event, value) for value in key_values]) return (key, new_event)
def execute_order_by(self, node): return (self.execute(node.source).keyBy(lambda e: tuple( get_value(e, field) for field in node.fields)).sortByKey( ascending=node.order == node.ResultOrder.ASCENDING).map( lambda e: e[1]))
def execute_order_by(self, node): return (self.execute(node.source) .keyBy(lambda e: tuple(get_value(e, field) for field in node.fields)) .sortByKey(ascending=node.order == node.ResultOrder.ASCENDING) .map(lambda e: e[1]))