def setUp(self): # Stream we have here: # # source ---+---> aggregate ----> aggtarget # | # +---> sample ----> map ----> target self.fields = brewery.fieldlist(["a", "b", "c", "str"]) self.src_list = [[1, 2, 3, "a"], [4, 5, 6, "b"], [7, 8, 9, "a"]] self.target_list = [] self.aggtarget_list = [] nodes = { "source": RowListSourceNode(self.src_list, self.fields), "target": RecordListTargetNode(self.target_list), "aggtarget": RecordListTargetNode(self.aggtarget_list), "sample": SampleNode("sample"), "map": FieldMapNode(drop_fields=["c"]), "aggregate": AggregateNode(keys=["str"]) } connections = [("source", "sample"), ("sample", "map"), ("map", "target"), ("source", "aggregate"), ("aggregate", "aggtarget")] self.stream = Stream(nodes, connections)
def test_merge(self): node = brewery.nodes.MergeNode() self.create_distinct_sample() input2 = brewery.streams.SimpleDataPipe() input2.fields = brewery.fieldlist(["type2", "name"]) input2.put(["a", "apple"]) input2.put(["b", "bananna"]) input2.put(["c", "curry"]) input2.put(["d", "dynamite"]) input_len = len(self.input.buffer) node.inputs = [self.input, input2] node.outputs = [self.output] node.joins = [(1, "type", "type2")] node.maps = { 0: brewery.FieldMap(drop=["id2"]), 1: brewery.FieldMap(drop=["type2"]) } self.initialize_node(node) self.assertEqual(5, len(node.output_fields)) node.run() node.finalize() self.assertEqual(5, len(self.output.buffer[0])) self.assertEqual(input_len, len(self.output.buffer))
def test_merge(self): node = brewery.nodes.MergeNode() self.create_distinct_sample() input2 = brewery.streams.SimpleDataPipe() input2.fields = brewery.fieldlist(["type2", "name"]) input2.put(["a", "apple"]) input2.put(["b", "bananna"]) input2.put(["c", "curry"]) input2.put(["d", "dynamite"]) input_len = len(self.input.buffer) node.inputs = [self.input, input2] node.outputs = [self.output] node.joins = [ (1, "type", "type2") ] node.maps = { 0: brewery.FieldMap(drop = ["id2"]), 1: brewery.FieldMap(drop = ["type2"]) } self.initialize_node(node) self.assertEqual(5, len(node.output_fields)) node.run() node.finalize() self.assertEqual(5, len(self.output.buffer[0])) self.assertEqual(input_len, len(self.output.buffer))
def test_consolidate_type(self): fields = brewery.fieldlist([("s", "string"), ("i", "integer"), ("f", "float"), ("u", "unknown")]) self.input.fields = fields sample = [[" foo ", 123, 123, None], [123, "123", "123", None], [123.0, " 123 ", " 123 ", None], [" foo ", "1 2 3", "1 2 3 . 0", None], [" foo ", "fail", "fail", None], [None, None, None, None]] for row in sample: self.input.put(row) node = brewery.nodes.CoalesceValueToTypeNode() self.setup_node(node) self.initialize_node(node) node.run() node.finalize() strings = [] integers = [] floats = [] for row in self.output.buffer: strings.append(row[0]) integers.append(row[1]) floats.append(row[2]) self.assertEqual(["foo", "123", "123.0", "foo", "foo", None], strings) self.assertEqual([123, 123, 123, 123, None, None], integers) self.assertEqual([123, 123, 123, 123, None, None], floats)
def create_sample(self, count=100, custom=None, pipe=None): if not pipe: pipe = self.input pipe.empty() pipe.fields = brewery.fieldlist(["i", "q", "str", "custom"]) for i in range(0, count): pipe.put([i, float(i) / 4, "item-%s" % i, custom])
def setUp(self): # Stream we have here: # # source ---+---> aggregate ----> aggtarget # | # +---> sample ----> map ----> target self.fields = brewery.fieldlist(["a", "b", "c", "str"]) self.src_list = [[1,2,3,"a"], [4,5,6,"b"], [7,8,9,"a"]] self.target_list = [] self.aggtarget_list = [] nodes = { "source": RowListSourceNode(self.src_list, self.fields), "target": RecordListTargetNode(self.target_list), "aggtarget": RecordListTargetNode(self.aggtarget_list), "sample": SampleNode("sample"), "map": FieldMapNode(drop_fields = ["c"]), "aggregate": AggregateNode(keys = ["str"]) } connections = [ ("source", "sample"), ("sample", "map"), ("map", "target"), ("source", "aggregate"), ("aggregate", "aggtarget") ] self.stream = Stream(nodes, connections)
def create_sample(self, count = 100, custom = None, pipe = None): if not pipe: pipe = self.input pipe.empty() pipe.fields = brewery.fieldlist(["i", "q", "str", "custom"]) for i in range(0, count): pipe.put([i, float(i)/4, "item-%s" % i, custom])
def create_distinct_sample(self, pipe=None): if not pipe: pipe = self.input pipe.empty() pipe.fields = brewery.fieldlist(["id", "id2", "q", "type", "class"]) for i in range(1, 10): pipe.put([i, i, float(i) / 4, "a", "x"]) pipe.put([i, i * 10, float(i) / 4, "a", "y"]) pipe.put([i * 10, i * 100, float(i) / 4, "b", "x"]) pipe.put([i * 100, i * 1000, float(i) / 4, "c", "y"])
def create_distinct_sample(self, pipe = None): if not pipe: pipe = self.input pipe.empty() pipe.fields = brewery.fieldlist(["id", "id2", "q", "type", "class"]) for i in range(1, 10): pipe.put([i, i, float(i)/4, "a", "x"]) pipe.put([i, i*10, float(i)/4, "a", "y"]) pipe.put([i*10, i*100, float(i)/4, "b", "x"]) pipe.put([i*100, i*1000, float(i)/4, "c", "y"])
def test_strip_auto(self): fields = brewery.fieldlist([("str1", "string"), ("x", "unknown"), ("str2", "string"), ("f", "unknown")]) self.input.fields = fields for i in range(0, 5): self.input.put([" foo ", " bar ", " baz ", " moo "]) node = brewery.nodes.StringStripNode() self.setup_node(node) self.initialize_node(node) node.run() node.finalize() row = self.output.buffer[0] self.assertEqual(["foo", " bar ", "baz", " moo "], row)
def test_strip_auto(self): fields = brewery.fieldlist([("str1", "string"), ("x","unknown"), ("str2","string"), ("f", "unknown")]) self.input.fields = fields for i in range(0, 5): self.input.put([" foo ", " bar ", " baz ", " moo "]) node = brewery.nodes.StringStripNode() self.setup_node(node) self.initialize_node(node) node.run() node.finalize() row = self.output.buffer[0] self.assertEqual(["foo", " bar ", "baz", " moo "], row)
def test_consolidate_type(self): fields = brewery.fieldlist([("s", "string"), ("i","integer"), ("f","float"), ("u", "unknown")]) self.input.fields = fields sample = [ [" foo ", 123, 123, None], [123, "123", "123", None], [123.0, " 123 ", " 123 ", None], [" foo ", "1 2 3", "1 2 3 . 0", None], [" foo ", "fail", "fail", None], [None, None, None, None] ] for row in sample: self.input.put(row) node = brewery.nodes.CoalesceValueToTypeNode() self.setup_node(node) self.initialize_node(node) node.run() node.finalize() strings = [] integers = [] floats = [] for row in self.output.buffer: strings.append(row[0]) integers.append(row[1]) floats.append(row[2]) self.assertEqual(["foo", "123", "123.0", "foo", "foo", None], strings) self.assertEqual([123, 123, 123, 123, None, None], integers) self.assertEqual([123, 123, 123, 123, None, None], floats)
def output_fields(self): return brewery.fieldlist(["i"])