def test_run(self): sink = [] self.cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) executor = gibbon.get_async_executor(shutdown=True) self.w.prepare(self.cfg) self.w.run(executor) self.assertSequenceEqual(sink, list(zip([1]*len(self.data), self.data)))
def test_concat_unbalanced(self): self.assertTrue(self.w.is_valid) src1 = [('row_id', ), ('another_id', )] src2 = [('row_data', )] sink = [] cfg = gibbon.Configuration() cfg.add_configuration('src1', source=gibbon.SequenceWrapper, iterable=src1) cfg.add_configuration('src2', source=gibbon.SequenceWrapper, iterable=src2) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) executor = gibbon.get_async_executor(shutdown=True) self.w.prepare(cfg) self.w.run(executor) self.assertTrue(len(sink)) self.assertEqual(len(sink), 2) self.assertEqual(sink[0], ('row_id', 'row_data')) self.assertEqual(sink[1], ('another_id', ))
def test_split(self): self.assertTrue(self.w.is_valid) src = [('row_id', 'row_data')] sink1 = [] sink2 = [] cfg = gibbon.Configuration() cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=src) cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sink1) cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sink2) executor = gibbon.get_async_executor(shutdown=True) self.w.prepare(cfg) self.w.run(executor) self.assertEqual(len(sink1), 1) self.assertEqual(len(sink2), 1) self.assertEqual(sink1[0], ('row_id', )) self.assertEqual(sink2[0], ('row_data', ))
def testWithDefault1(self): """Tip: should pass because a target was added after all gated outports were connected, this additional target is used as a default destination for rows that don't meet any of the conditions""" self.assertTrue(self.wk_sel_wd.is_valid) self.wk_sel_wd.add_target('tgt_default', source='sel') self.assertTrue(self.wk_sel_wd.is_valid) data = [(0, ), (1, ), (-1, ), (0, )] sinks = ([], [], []) self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) self.cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sinks[0]) self.cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sinks[1]) self.cfg.add_configuration('tgt_default', target=gibbon.SequenceWrapper, container=sinks[2]) executor = gibbon.get_async_executor(shutdown=True) self.wk_sel_wd.prepare(self.cfg) self.wk_sel_wd.run(executor) self.assertSequenceEqual(sinks[0], [(1, )]) self.assertSequenceEqual(sinks[1], [(-1, )]) self.assertSequenceEqual(sinks[2], [(0, ), (0, )])
def test_default(self): self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=self.data) self.cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=self.results) executor = gibbon.get_async_executor(shutdown=True) self.wk_default_expr.prepare(self.cfg) self.wk_default_expr.run(executor) self.assertSequenceEqual(self.results, self.data)
def testLenOverThree(self): sink = [] self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=zip(self.data)) self.cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) executor = gibbon.get_async_executor(shutdown=True) self.wk.prepare(self.cfg) self.wk.run(executor) self.assertSequenceEqual(sink, [('4444', )])
def testOnlyPositive(self): self.results = [] self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=self.data) self.cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=self.results) executor = gibbon.get_async_executor(shutdown=True) self.wk_only_pos.prepare(self.cfg) self.wk_only_pos.run(executor) self.assertSequenceEqual(self.results, [(1, )])
def test_csv_source(self): self.assertTrue(self.w.is_valid) results = [] cfg = gibbon.Configuration() cfg.add_configuration('csv', source=gibbon.CSVSourceFile, filename=self._filename) cfg.add_configuration('list', target=gibbon.SequenceWrapper, container=results) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertTrue(len(results) > 0) self.assertIsInstance(results[0], tuple)
def test_sum(self): cfg = gibbon.Configuration() data = list(zip([1, 2, 3])) sink = [] cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertSequenceEqual(sink, [( 'sum:', 6, )])
def test_group_by(self): cfg = gibbon.Configuration() data = list(zip(['a', 'b', 'a', 'b'], [1, 2, 3, 0])) sink = [] cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertEqual(len(sink), 2) self.assertTrue('a' in dict(sink)) self.assertTrue('b' in dict(sink)) self.assertEqual(dict(sink)['a'], 4) self.assertEqual(dict(sink)['b'], 2)
def testBinarySelection(self): sinks = ([], []) self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=self.data) self.cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sinks[0]) self.cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sinks[1]) executor = gibbon.get_async_executor(shutdown=True) self.wk_sel_bin.prepare(self.cfg) self.wk_sel_bin.run(executor) self.assertSequenceEqual(sinks[0], [(0, ), (1, )]) self.assertSequenceEqual(sinks[1], [(-1, )])
def testShortCircuit(self): """Tip: should not shot-circuit, rows are sent wherever it fits even if it means to send it twice or more""" data = [(0, )] sinks = ([], []) self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) self.cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sinks[0]) self.cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sinks[1]) executor = gibbon.get_async_executor(shutdown=True) self.wk_sel_shc.prepare(self.cfg) self.wk_sel_shc.run(executor) self.assertSequenceEqual(sinks[0], [(0, )]) self.assertSequenceEqual(sinks[1], [(0, )])
def testWithDefault2(self): """Adding a useless target don't fail""" self.wk_sel_wd.reset(self.cfg) self.wk_sel_wd.add_target('tgt_default', source='sel') self.wk_sel_wd.add_target('useless_target', source='sel') # this test show the workflow is valid # any way, the Selector raised a build warning saying that a useless target was connected self.assertTrue(self.wk_sel_wd.is_valid) warnings = self.wk_sel_wd.get_all_warnings() self.assertNotEqual(warnings, 'No warning.') data = [(0, ), (1, ), (-1, ), (0, )] empty = [] sinks = ([], [], []) self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) self.cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sinks[0]) self.cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sinks[1]) self.cfg.add_configuration('tgt_default', target=gibbon.SequenceWrapper, container=sinks[2]) self.cfg.add_configuration('useless_target', target=gibbon.SequenceWrapper, container=empty) executor = gibbon.get_async_executor(shutdown=True) self.wk_sel_wd.prepare(self.cfg) self.wk_sel_wd.run(executor) self.assertSequenceEqual(sinks[0], [(1, )]) self.assertSequenceEqual(sinks[1], [(-1, )]) self.assertSequenceEqual(sinks[2], [(0, ), (0, )]) self.assertSequenceEqual(empty, [])
def test_csv_target(self): self.assertTrue(self.w.is_valid) input_data = [ ('Brian', 23), ('Joe', 'ERROR'), ('Mary', 40), ('Alice', 25), ('Billy', 15), ] cfg = gibbon.Configuration() cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=input_data) cfg.add_configuration('csv', target=gibbon.CSVTargetFile, filename=self._filename) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertFileContent()
def testDefault(self): """Tip: without a default queue, rows that don't match any condition are discarded by not being pushed downstream""" data = [(1, ), (-1, ), (0, )] sinks = ([], []) self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) self.cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sinks[0]) self.cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sinks[1]) executor = gibbon.get_async_executor(shutdown=True) self.wk_sel_def.prepare(self.cfg) self.wk_sel_def.run(executor) self.assertSequenceEqual(sinks[0], [(1, )]) self.assertSequenceEqual(sinks[1], [(-1, )])
def test_union(self): self.w.validate() self.assertTrue(self.w.is_valid) data_src_1 = list(zip(['a', 'b', 'c'], [1, 2, 3])) data_src_2 = list(zip(['e', 'f', 'g'], [4, 5, 6])) sink = [] cfg = gibbon.Configuration() cfg.add_configuration('src1', source=gibbon.SequenceWrapper, iterable=data_src_1) cfg.add_configuration('src2', source=gibbon.SequenceWrapper, iterable=data_src_2) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertGreater(len(sink), 0) self.assertEqual(len(sink), len(data_src_1)+len(data_src_2)) dict_for_assert = dict(sink) ref_for_assert = dict(data_src_1+data_src_2) self.assertSequenceEqual(sorted(list(dict_for_assert.keys())), sorted(list(ref_for_assert.keys()))) self.assertSequenceEqual(sorted(list(dict_for_assert.values())), sorted(list(ref_for_assert.values())))