Beispiel #1
0
    def setUp(self):
        # Stream we have here:
        #
        #  source ---+---> aggregate ----> aggtarget
        #            |
        #            +---> sample ----> map ----> target

        self.fields = brewery.fieldlist(["a", "b", "c", "str"])
        self.src_list = [[1, 2, 3, "a"], [4, 5, 6, "b"], [7, 8, 9, "a"]]
        self.target_list = []
        self.aggtarget_list = []

        nodes = {
            "source": RowListSourceNode(self.src_list, self.fields),
            "target": RecordListTargetNode(self.target_list),
            "aggtarget": RecordListTargetNode(self.aggtarget_list),
            "sample": SampleNode("sample"),
            "map": FieldMapNode(drop_fields=["c"]),
            "aggregate": AggregateNode(keys=["str"])
        }

        connections = [("source", "sample"), ("sample", "map"),
                       ("map", "target"), ("source", "aggregate"),
                       ("aggregate", "aggtarget")]

        self.stream = Stream(nodes, connections)
Beispiel #2
0
    def test_merge(self):
        node = brewery.nodes.MergeNode()
        self.create_distinct_sample()

        input2 = brewery.streams.SimpleDataPipe()
        input2.fields = brewery.fieldlist(["type2", "name"])
        input2.put(["a", "apple"])
        input2.put(["b", "bananna"])
        input2.put(["c", "curry"])
        input2.put(["d", "dynamite"])

        input_len = len(self.input.buffer)

        node.inputs = [self.input, input2]
        node.outputs = [self.output]

        node.joins = [(1, "type", "type2")]

        node.maps = {
            0: brewery.FieldMap(drop=["id2"]),
            1: brewery.FieldMap(drop=["type2"])
        }
        self.initialize_node(node)

        self.assertEqual(5, len(node.output_fields))

        node.run()
        node.finalize()

        self.assertEqual(5, len(self.output.buffer[0]))
        self.assertEqual(input_len, len(self.output.buffer))
Beispiel #3
0
    def test_merge(self):
        node = brewery.nodes.MergeNode()
        self.create_distinct_sample()

        input2 = brewery.streams.SimpleDataPipe()
        input2.fields = brewery.fieldlist(["type2", "name"])
        input2.put(["a", "apple"])
        input2.put(["b", "bananna"])
        input2.put(["c", "curry"])
        input2.put(["d", "dynamite"])

        input_len = len(self.input.buffer)

        node.inputs = [self.input, input2]
        node.outputs = [self.output]

        node.joins = [
                    (1, "type", "type2")
                ]

        node.maps = {
                        0: brewery.FieldMap(drop = ["id2"]),
                        1: brewery.FieldMap(drop = ["type2"])
                    }
        self.initialize_node(node)

        self.assertEqual(5, len(node.output_fields)) 

        node.run()
        node.finalize()

        self.assertEqual(5, len(self.output.buffer[0]))
        self.assertEqual(input_len, len(self.output.buffer)) 
        
        
Beispiel #4
0
    def test_consolidate_type(self):
        fields = brewery.fieldlist([("s", "string"), ("i", "integer"),
                                    ("f", "float"), ("u", "unknown")])
        self.input.fields = fields
        sample = [["  foo  ", 123, 123, None], [123, "123", "123", None],
                  [123.0, " 123  ", "  123  ", None],
                  ["  foo  ", "1 2 3", "1 2 3  . 0", None],
                  ["  foo  ", "fail", "fail", None], [None, None, None, None]]

        for row in sample:
            self.input.put(row)

        node = brewery.nodes.CoalesceValueToTypeNode()

        self.setup_node(node)

        self.initialize_node(node)

        node.run()
        node.finalize()

        strings = []
        integers = []
        floats = []

        for row in self.output.buffer:
            strings.append(row[0])
            integers.append(row[1])
            floats.append(row[2])

        self.assertEqual(["foo", "123", "123.0", "foo", "foo", None], strings)
        self.assertEqual([123, 123, 123, 123, None, None], integers)
        self.assertEqual([123, 123, 123, 123, None, None], floats)
Beispiel #5
0
 def create_sample(self, count=100, custom=None, pipe=None):
     if not pipe:
         pipe = self.input
     pipe.empty()
     pipe.fields = brewery.fieldlist(["i", "q", "str", "custom"])
     for i in range(0, count):
         pipe.put([i, float(i) / 4, "item-%s" % i, custom])
Beispiel #6
0
    def setUp(self):
        # Stream we have here:
        #
        #  source ---+---> aggregate ----> aggtarget
        #            |
        #            +---> sample ----> map ----> target

        self.fields = brewery.fieldlist(["a", "b", "c", "str"])
        self.src_list = [[1,2,3,"a"], [4,5,6,"b"], [7,8,9,"a"]]
        self.target_list = []
        self.aggtarget_list = []
        
        nodes = {
            "source": RowListSourceNode(self.src_list, self.fields),
            "target": RecordListTargetNode(self.target_list),
            "aggtarget": RecordListTargetNode(self.aggtarget_list),
            "sample": SampleNode("sample"),
            "map": FieldMapNode(drop_fields = ["c"]),
            "aggregate": AggregateNode(keys = ["str"])
        }
        
        connections = [
            ("source", "sample"),
            ("sample", "map"),
            ("map", "target"),
            ("source", "aggregate"),
            ("aggregate", "aggtarget")
        ]

        self.stream = Stream(nodes, connections)
Beispiel #7
0
 def create_sample(self, count = 100, custom = None, pipe = None):
     if not pipe:
         pipe = self.input
     pipe.empty()
     pipe.fields = brewery.fieldlist(["i", "q", "str", "custom"])
     for i in range(0, count):
         pipe.put([i, float(i)/4, "item-%s" % i, custom])
Beispiel #8
0
 def create_distinct_sample(self, pipe=None):
     if not pipe:
         pipe = self.input
     pipe.empty()
     pipe.fields = brewery.fieldlist(["id", "id2", "q", "type", "class"])
     for i in range(1, 10):
         pipe.put([i, i, float(i) / 4, "a", "x"])
         pipe.put([i, i * 10, float(i) / 4, "a", "y"])
         pipe.put([i * 10, i * 100, float(i) / 4, "b", "x"])
         pipe.put([i * 100, i * 1000, float(i) / 4, "c", "y"])
Beispiel #9
0
 def create_distinct_sample(self, pipe = None):
     if not pipe:
         pipe = self.input
     pipe.empty()
     pipe.fields = brewery.fieldlist(["id", "id2", "q", "type", "class"])
     for i in range(1, 10):
         pipe.put([i, i, float(i)/4, "a", "x"])
         pipe.put([i, i*10, float(i)/4, "a", "y"])
         pipe.put([i*10, i*100, float(i)/4, "b", "x"])
         pipe.put([i*100, i*1000, float(i)/4, "c", "y"])
Beispiel #10
0
    def test_strip_auto(self):
        fields = brewery.fieldlist([("str1", "string"), ("x", "unknown"),
                                    ("str2", "string"), ("f", "unknown")])
        self.input.fields = fields
        for i in range(0, 5):
            self.input.put([" foo ", " bar ", " baz ", " moo "])

        node = brewery.nodes.StringStripNode()

        self.setup_node(node)

        self.initialize_node(node)

        node.run()
        node.finalize()

        row = self.output.buffer[0]
        self.assertEqual(["foo", " bar ", "baz", " moo "], row)
Beispiel #11
0
    def test_strip_auto(self):
        fields = brewery.fieldlist([("str1", "string"), 
                                       ("x","unknown"), 
                                       ("str2","string"), 
                                       ("f", "unknown")])
        self.input.fields = fields
        for i in range(0, 5):
            self.input.put([" foo ", " bar ", " baz ", " moo "])

        node = brewery.nodes.StringStripNode()

        self.setup_node(node)

        self.initialize_node(node)

        node.run()
        node.finalize()

        row = self.output.buffer[0]
        self.assertEqual(["foo", " bar ", "baz", " moo "], row) 
Beispiel #12
0
    def test_consolidate_type(self):
        fields = brewery.fieldlist([("s", "string"), 
                                       ("i","integer"), 
                                       ("f","float"), 
                                       ("u", "unknown")])
        self.input.fields = fields
        sample = [
                    ["  foo  ", 123, 123, None],
                    [123, "123", "123", None],
                    [123.0, " 123  ", "  123  ", None],
                    ["  foo  ", "1 2 3", "1 2 3  . 0", None],
                    ["  foo  ", "fail", "fail", None],
                    [None, None, None, None]
                ]

        for row in sample:
            self.input.put(row)


        node = brewery.nodes.CoalesceValueToTypeNode()

        self.setup_node(node)

        self.initialize_node(node)

        node.run()
        node.finalize()

        strings = []
        integers = []
        floats = []

        for row in self.output.buffer:
            strings.append(row[0])
            integers.append(row[1])
            floats.append(row[2])

        self.assertEqual(["foo", "123", "123.0", "foo", "foo", None], strings) 
        self.assertEqual([123, 123, 123, 123, None, None], integers) 
        self.assertEqual([123, 123, 123, 123, None, None], floats) 
Beispiel #13
0
 def output_fields(self):
     return brewery.fieldlist(["i"])
Beispiel #14
0
 def output_fields(self):
     return brewery.fieldlist(["i"])