def __init__(self, inputs, group_tag, group_operator):
        groups = [None]
        self.group_idx = [[]]
        group_inputs = [[]]
        # compute 
        for i, s in enumerate(inputs):
            if not s.get(group_tag, None) in groups:
                groups.append(s[group_tag])
                self.group_idx.append(list())
                group_inputs.append(list())

            g_idx = groups.index(s.get(group_tag, None))
            group_inputs[g_idx].append(s)
            self.group_idx[g_idx].append(i)


        self.operators = [group_operator(x) if len(x) else PrintOperator([])
                          for x in group_inputs]

        for o in self.operators:
            for i in xrange(0, len(o.outputs)):
                o.outputs[i]['Metadata/Extra/Operator'] = 'tgroup(%s, %s)' % (group_tag,
                                                                              str(o))
                                                                       
        self.block_streaming = reduce(operator.__or__,
                                      map(operator.attrgetter('block_streaming'), 
                                          self.operators))
        Operator.__init__(self, inputs, 
                          outputs=util.flatten(map(operator.attrgetter('outputs'), 
                                                   self.operators)))
    def __init__(self, inputs, group_operator, **kwargs):
        field = kwargs.get('field', 'day') 
        width = int(kwargs.get("width", 1))
        slide = int(kwargs.get("slide", width))
        inclusive = make_inclusive(kwargs.get("inclusive", "inc-exc"))
        snap_times = bool(kwargs.get("snap_times", True))
        skip_empty = util.to_bool(kwargs.get("skip_empty", True))

        if not field in DT_FIELDS:
            raise core.SmapException("Invalid datetime field: " + field)
        if not slide <= width:
            raise core.SmapException("window: Cannot slide more than the window width!")

        self.inclusive = make_inclusive(inclusive)
        if self.inclusive[0] == False:
            raise core.SmapException("Open intervals at the start are not supported")

        self.tzs = map(lambda x: dtutil.gettz(x['Properties/Timezone']), inputs)
        self.ops = map(lambda x: group_operator([x]), inputs)
        # self.ops = [[op([x]) for op in ops] for x in inputs]
        self.comparator = self.make_bin_comparator(field, width)
        self.snapper = make_bin_snapper(field, slide)
        self.snap_times = snap_times
        self.skip_empty = skip_empty
        self.bin_width = datetime.timedelta(**{field + 's': width})
        self.bin_slide = datetime.timedelta(**{field + 's': slide})
        self.name = "window(%s, field=%s, width=%i, inclusive=%s, snap_times=%s)" % ( \
            str(self.ops[0]), field, width, str(inclusive), str(snap_times))
        Operator.__init__(self, inputs, 
                          util.flatten(map(operator.attrgetter('outputs'), 
                                           self.ops)))
        self.reset()
Beispiel #3
0
    def __init__(self, inputs, group_tag, group_operator):
        groups = [None]
        self.group_idx = [[]]
        group_inputs = [[]]
        # compute
        for i, s in enumerate(inputs):
            if not s.get(group_tag, None) in groups:
                groups.append(s[group_tag])
                self.group_idx.append(list())
                group_inputs.append(list())

            g_idx = groups.index(s.get(group_tag, None))
            group_inputs[g_idx].append(s)
            self.group_idx[g_idx].append(i)

        self.operators = [
            group_operator(x) if len(x) else PrintOperator([])
            for x in group_inputs
        ]

        for o in self.operators:
            for i in xrange(0, len(o.outputs)):
                o.outputs[i]['Metadata/Extra/Operator'] = 'tgroup(%s, %s)' % (
                    group_tag, str(o))

        self.block_streaming = reduce(
            operator.__or__,
            map(operator.attrgetter('block_streaming'), self.operators))
        Operator.__init__(self,
                          inputs,
                          outputs=util.flatten(
                              map(operator.attrgetter('outputs'),
                                  self.operators)))
Beispiel #4
0
    def __init__(self, inputs, group_operator, **kwargs):
        field = kwargs.get('field', 'day')
        width = int(kwargs.get("width", 1))
        slide = int(kwargs.get("slide", width))
        inclusive = make_inclusive(kwargs.get("inclusive", "inc-exc"))
        snap_times = bool(kwargs.get("snap_times", True))
        skip_empty = util.to_bool(kwargs.get("skip_empty", True))

        if not field in DT_FIELDS:
            raise core.SmapException("Invalid datetime field: " + field)
        if not slide <= width:
            raise core.SmapException(
                "window: Cannot slide more than the window width!")

        self.inclusive = make_inclusive(inclusive)
        if self.inclusive[0] == False:
            raise core.SmapException(
                "Open intervals at the start are not supported")

        self.tzs = map(lambda x: dtutil.gettz(x['Properties/Timezone']),
                       inputs)
        self.ops = map(lambda x: group_operator([x]), inputs)
        # self.ops = [[op([x]) for op in ops] for x in inputs]
        self.comparator = self.make_bin_comparator(field, width)
        self.snapper = make_bin_snapper(field, slide)
        self.snap_times = snap_times
        self.skip_empty = skip_empty
        self.bin_width = datetime.timedelta(**{field + 's': width})
        self.bin_slide = datetime.timedelta(**{field + 's': slide})
        self.name = "window(%s, field=%s, width=%i, inclusive=%s, snap_times=%s)" % ( \
            str(self.ops[0]), field, width, str(inclusive), str(snap_times))
        Operator.__init__(
            self, inputs,
            util.flatten(map(operator.attrgetter('outputs'), self.ops)))
        self.reset()
Beispiel #5
0
    def bind(self, inputs):
        for (i, c) in enumerate(self.children):
            self.children[i] = c(inputs)

        my_inputs = util.flatten((c.op.outputs for c in self.children))
        self.op = self.op(my_inputs)
        self.name = self.op.name
        operators.Operator.__init__(self, inputs, self.op.outputs)
Beispiel #6
0
Datei: ast.py Projekt: ahaas/smap
    def bind(self, inputs):
        for (i, c) in enumerate(self.children):
            self.children[i] = c(inputs)

        my_inputs = util.flatten((c.op.outputs for c in self.children))
        self.op = self.op(my_inputs)
        self.name = self.op.name
        operators.Operator.__init__(self, inputs, self.op.outputs)
 def __init__(self, inputs, *oplist):
     self.ops = [op(inputs) for op in oplist]
     self.name = "%s(%s)" % (self.operator_name, ','.join(map(str, self.ops)))
     self.block_streaming = reduce(operator.__or__, 
                                   (op.block_streaming for op in self.ops), 
                                   False)
     print "blocking", self.block_streaming
     Operator.__init__(self, inputs, 
                       util.flatten(map(operator.attrgetter('outputs'), 
                                        self.ops)))
Beispiel #8
0
 def __init__(self, inputs, *oplist):
     self.ops = [op(inputs) for op in oplist]
     self.name = "%s(%s)" % (self.operator_name, ','.join(map(
         str, self.ops)))
     self.block_streaming = reduce(operator.__or__,
                                   (op.block_streaming for op in self.ops),
                                   False)
     print "blocking", self.block_streaming
     Operator.__init__(
         self, inputs,
         util.flatten(map(operator.attrgetter('outputs'), self.ops)))
Beispiel #9
0
    def process(self, data):
        rv = [null] * len(self.inputs)
        for i in xrange(0, len(self.inputs)):
            self.state[i]['first'] = data.first
            self.state[i]['last'] = data.last
            self.state[i]['region'] = data.region
            rv[i], self.state[i] = self.process_one(data[i], self.ops[i],
                                                    self.tzs[i],
                                                    **self.state[i])
        # flatten the individual stream results
        rv = util.flatten(rv)

        # now we have to insert nans to indicate missing data so the
        # rows from all streams are aligned.
        return join_union(rv)
 def process(self, data):
     rv = [null] * len(self.inputs)
     for i in xrange(0, len(self.inputs)):
         self.state[i]['first'] = data.first
         self.state[i]['last'] = data.last
         self.state[i]['region'] = data.region
         rv[i], self.state[i] = self.process_one(data[i],
                                                 self.ops[i],
                                                 self.tzs[i],
                                                 **self.state[i])
     # flatten the individual stream results
     rv = util.flatten(rv)
      
     # now we have to insert nans to indicate missing data so the
     # rows from all streams are aligned.
     return join_union(rv)
 def process(self, data):
     rv = [[] for x in xrange(0, len(self.operators))]
     for i, op in enumerate(self.operators):
         input_data = [data[j] for j in self.group_idx[i]]
         rv[i] = self.operators[i](input_data)
     return util.flatten(rv)
Beispiel #12
0
Datei: ast.py Projekt: ahaas/smap
 def process(self, data):
     subdata = util.flatten((c.process(data) for c in self.children))
     subdata = operators.DataChunk(data.region, data.first, data.last, subdata)
     return self.op(subdata)
Beispiel #13
0
 def process(self, data):
     return util.flatten(map(lambda (c, d): c([d]), zip(self.converters, data)))
Beispiel #14
0
 def process(self, data):
     rv = [[] for x in xrange(0, len(self.operators))]
     for i, op in enumerate(self.operators):
         input_data = [data[j] for j in self.group_idx[i]]
         rv[i] = self.operators[i](input_data)
     return util.flatten(rv)
Beispiel #15
0
 def process(self, data):
     subdata = util.flatten((c.process(data) for c in self.children))
     subdata = operators.DataChunk(data.region, data.first, data.last,
                                   subdata)
     return self.op(subdata)
 def process(self, data):
     return util.flatten(op(data) for op in self.ops)
Beispiel #17
0
 def process(self, data):
     return util.flatten(op(data) for op in self.ops)
Beispiel #18
0
 def process(self, data):
     return self.op(util.flatten((c.process(data) for c in self.children)))
Beispiel #19
0
 def process(self, data):
     return self.op(util.flatten((c.process(data) for c in self.children)))