def test_merge(self): node = brewery.nodes.MergeNode() self.create_distinct_sample() input2 = brewery.streams.SimpleDataPipe() input2.fields = brewery.fieldlist(["type2", "name"]) input2.put(["a", "apple"]) input2.put(["b", "bananna"]) input2.put(["c", "curry"]) input2.put(["d", "dynamite"]) input_len = len(self.input.buffer) node.inputs = [self.input, input2] node.outputs = [self.output] node.joins = [(1, "type", "type2")] node.maps = { 0: brewery.FieldMap(drop=["id2"]), 1: brewery.FieldMap(drop=["type2"]) } self.initialize_node(node) self.assertEqual(5, len(node.output_fields)) node.run() node.finalize() self.assertEqual(5, len(self.output.buffer[0])) self.assertEqual(input_len, len(self.output.buffer))
def initialize(self): self.map = brewery.FieldMap(rename=self.mapped_fields, drop=self.dropped_fields, keep=self.kept_fields) self._output_fields = self.map.map(self.input.fields) self.filter = self.map.row_filter(self.input.fields)
def initialize(self): pass # Check joins and normalize them first self._keys = {} self._kindexes = {} self.master_input = self.inputs[self.master] self.detail_inputs = [] for (tag, pipe) in enumerate(self.inputs): if pipe is not self.master_input: self.detail_inputs.append( (tag, pipe) ) for join in self.joins: joinlen = len(join) if joinlen == 3: (detail_tag, master_key, detail_key) = join elif joinlen == 2: # We use same key names for detail as master if no detail key is specified (detail_tag, master_key) = join detail_key = master_key else: raise Exception("Join specification should be a tuple/list of two or three elements.") # Convert to tuple if it is just a string (as expected later) if not (type(detail_key) == list or type(detail_key) == tuple): detail_key = (detail_key, ) if not (type(master_key) == list or type(master_key) == tuple): master_key = (master_key, ) if detail_tag == self.master: raise Exception("Can not join master to itself.") self._keys[detail_tag] = (detail_key, master_key) detail_input = self.inputs[detail_tag] # Get field indexes detail_indexes = detail_input.fields.indexes(detail_key) master_indexes = self.master_input.fields.indexes(master_key) self._kindexes[detail_tag] = (detail_indexes, master_indexes) # Prepare storage for input data self._input_rows = {} for (tag, pipe) in enumerate(self.inputs): self._input_rows[tag] = {} # Create map filters self._filters = {} self._maps = {} if self.maps: for (tag, fmap) in self.maps.items(): if type(fmap) == dict: fmap = brewery.FieldMap(rename = fmap.get("rename"), drop = fmap.get("drop"), keep=fmap.get("keep")) elif type(fmap) != brewery.FieldMap: raise Exception("Unknown field map type: %s" % type(fmap) ) f = fmap.row_filter(self.inputs[tag].fields) self._maps[tag] = fmap self._filters[tag] = f # Construct output fields fields = [] for (tag, pipe) in enumerate(self.inputs): fmap = self._maps.get(tag, None) if fmap: fields += fmap.map(pipe.fields) else: fields += pipe.fields self._output_fields = brewery.FieldList(fields)