def __init__(self, inputs, group_tag, group_operator): groups = [None] self.group_idx = [[]] group_inputs = [[]] # compute for i, s in enumerate(inputs): if not s.get(group_tag, None) in groups: groups.append(s[group_tag]) self.group_idx.append(list()) group_inputs.append(list()) g_idx = groups.index(s.get(group_tag, None)) group_inputs[g_idx].append(s) self.group_idx[g_idx].append(i) self.operators = [group_operator(x) if len(x) else PrintOperator([]) for x in group_inputs] for o in self.operators: for i in xrange(0, len(o.outputs)): o.outputs[i]['Metadata/Extra/Operator'] = 'tgroup(%s, %s)' % (group_tag, str(o)) self.block_streaming = reduce(operator.__or__, map(operator.attrgetter('block_streaming'), self.operators)) Operator.__init__(self, inputs, outputs=util.flatten(map(operator.attrgetter('outputs'), self.operators)))
def __init__(self, inputs, group_operator, **kwargs): field = kwargs.get('field', 'day') width = int(kwargs.get("width", 1)) slide = int(kwargs.get("slide", width)) inclusive = make_inclusive(kwargs.get("inclusive", "inc-exc")) snap_times = bool(kwargs.get("snap_times", True)) skip_empty = util.to_bool(kwargs.get("skip_empty", True)) if not field in DT_FIELDS: raise core.SmapException("Invalid datetime field: " + field) if not slide <= width: raise core.SmapException("window: Cannot slide more than the window width!") self.inclusive = make_inclusive(inclusive) if self.inclusive[0] == False: raise core.SmapException("Open intervals at the start are not supported") self.tzs = map(lambda x: dtutil.gettz(x['Properties/Timezone']), inputs) self.ops = map(lambda x: group_operator([x]), inputs) # self.ops = [[op([x]) for op in ops] for x in inputs] self.comparator = self.make_bin_comparator(field, width) self.snapper = make_bin_snapper(field, slide) self.snap_times = snap_times self.skip_empty = skip_empty self.bin_width = datetime.timedelta(**{field + 's': width}) self.bin_slide = datetime.timedelta(**{field + 's': slide}) self.name = "window(%s, field=%s, width=%i, inclusive=%s, snap_times=%s)" % ( \ str(self.ops[0]), field, width, str(inclusive), str(snap_times)) Operator.__init__(self, inputs, util.flatten(map(operator.attrgetter('outputs'), self.ops))) self.reset()
def __init__(self, inputs, group_tag, group_operator): groups = [None] self.group_idx = [[]] group_inputs = [[]] # compute for i, s in enumerate(inputs): if not s.get(group_tag, None) in groups: groups.append(s[group_tag]) self.group_idx.append(list()) group_inputs.append(list()) g_idx = groups.index(s.get(group_tag, None)) group_inputs[g_idx].append(s) self.group_idx[g_idx].append(i) self.operators = [ group_operator(x) if len(x) else PrintOperator([]) for x in group_inputs ] for o in self.operators: for i in xrange(0, len(o.outputs)): o.outputs[i]['Metadata/Extra/Operator'] = 'tgroup(%s, %s)' % ( group_tag, str(o)) self.block_streaming = reduce( operator.__or__, map(operator.attrgetter('block_streaming'), self.operators)) Operator.__init__(self, inputs, outputs=util.flatten( map(operator.attrgetter('outputs'), self.operators)))
def __init__(self, inputs, group_operator, **kwargs): field = kwargs.get('field', 'day') width = int(kwargs.get("width", 1)) slide = int(kwargs.get("slide", width)) inclusive = make_inclusive(kwargs.get("inclusive", "inc-exc")) snap_times = bool(kwargs.get("snap_times", True)) skip_empty = util.to_bool(kwargs.get("skip_empty", True)) if not field in DT_FIELDS: raise core.SmapException("Invalid datetime field: " + field) if not slide <= width: raise core.SmapException( "window: Cannot slide more than the window width!") self.inclusive = make_inclusive(inclusive) if self.inclusive[0] == False: raise core.SmapException( "Open intervals at the start are not supported") self.tzs = map(lambda x: dtutil.gettz(x['Properties/Timezone']), inputs) self.ops = map(lambda x: group_operator([x]), inputs) # self.ops = [[op([x]) for op in ops] for x in inputs] self.comparator = self.make_bin_comparator(field, width) self.snapper = make_bin_snapper(field, slide) self.snap_times = snap_times self.skip_empty = skip_empty self.bin_width = datetime.timedelta(**{field + 's': width}) self.bin_slide = datetime.timedelta(**{field + 's': slide}) self.name = "window(%s, field=%s, width=%i, inclusive=%s, snap_times=%s)" % ( \ str(self.ops[0]), field, width, str(inclusive), str(snap_times)) Operator.__init__( self, inputs, util.flatten(map(operator.attrgetter('outputs'), self.ops))) self.reset()
def bind(self, inputs): for (i, c) in enumerate(self.children): self.children[i] = c(inputs) my_inputs = util.flatten((c.op.outputs for c in self.children)) self.op = self.op(my_inputs) self.name = self.op.name operators.Operator.__init__(self, inputs, self.op.outputs)
def __init__(self, inputs, *oplist): self.ops = [op(inputs) for op in oplist] self.name = "%s(%s)" % (self.operator_name, ','.join(map(str, self.ops))) self.block_streaming = reduce(operator.__or__, (op.block_streaming for op in self.ops), False) print "blocking", self.block_streaming Operator.__init__(self, inputs, util.flatten(map(operator.attrgetter('outputs'), self.ops)))
def __init__(self, inputs, *oplist): self.ops = [op(inputs) for op in oplist] self.name = "%s(%s)" % (self.operator_name, ','.join(map( str, self.ops))) self.block_streaming = reduce(operator.__or__, (op.block_streaming for op in self.ops), False) print "blocking", self.block_streaming Operator.__init__( self, inputs, util.flatten(map(operator.attrgetter('outputs'), self.ops)))
def process(self, data): rv = [null] * len(self.inputs) for i in xrange(0, len(self.inputs)): self.state[i]['first'] = data.first self.state[i]['last'] = data.last self.state[i]['region'] = data.region rv[i], self.state[i] = self.process_one(data[i], self.ops[i], self.tzs[i], **self.state[i]) # flatten the individual stream results rv = util.flatten(rv) # now we have to insert nans to indicate missing data so the # rows from all streams are aligned. return join_union(rv)
def process(self, data): rv = [[] for x in xrange(0, len(self.operators))] for i, op in enumerate(self.operators): input_data = [data[j] for j in self.group_idx[i]] rv[i] = self.operators[i](input_data) return util.flatten(rv)
def process(self, data): subdata = util.flatten((c.process(data) for c in self.children)) subdata = operators.DataChunk(data.region, data.first, data.last, subdata) return self.op(subdata)
def process(self, data): return util.flatten(map(lambda (c, d): c([d]), zip(self.converters, data)))
def process(self, data): return util.flatten(op(data) for op in self.ops)
def process(self, data): return self.op(util.flatten((c.process(data) for c in self.children)))