def test_split(self): self.assertTrue(self.w.is_valid) src = [('row_id', 'row_data')] sink1 = [] sink2 = [] cfg = gibbon.Configuration() cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=src) cfg.add_configuration('tgt1', target=gibbon.SequenceWrapper, container=sink1) cfg.add_configuration('tgt2', target=gibbon.SequenceWrapper, container=sink2) executor = gibbon.get_async_executor(shutdown=True) self.w.prepare(cfg) self.w.run(executor) self.assertEqual(len(sink1), 1) self.assertEqual(len(sink2), 1) self.assertEqual(sink1[0], ('row_id', )) self.assertEqual(sink2[0], ('row_data', ))
def test_concat_unbalanced(self): self.assertTrue(self.w.is_valid) src1 = [('row_id', ), ('another_id', )] src2 = [('row_data', )] sink = [] cfg = gibbon.Configuration() cfg.add_configuration('src1', source=gibbon.SequenceWrapper, iterable=src1) cfg.add_configuration('src2', source=gibbon.SequenceWrapper, iterable=src2) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) executor = gibbon.get_async_executor(shutdown=True) self.w.prepare(cfg) self.w.run(executor) self.assertTrue(len(sink)) self.assertEqual(len(sink), 2) self.assertEqual(sink[0], ('row_id', 'row_data')) self.assertEqual(sink[1], ('another_id', ))
def setUp(self): self.data = ['22', '333', '4444'] self.wk = gibbon.Workflow('all_rows') self.wk.add_source('src') self.wk.add_transformation('filter', gibbon.Filter, source='src', condition=len_over_three) self.wk.add_target('tgt', source='filter') self.cfg = gibbon.Configuration()
def setUp(self): self.data = ['Henry', 'Jane', 'Willy'] self.w = gibbon.Workflow('test_enumerator') self.w.add_source('src') self.w.add_transformation('enum', gibbon.Enumerator, source='src', start_with=1, reset_after=1) self.w.add_target('tgt', source='enum') self.cfg = gibbon.Configuration() tuples = list(zip(self.data)) self.cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=tuples) self.w.validate(verbose=True)
def test_csv_source(self): self.assertTrue(self.w.is_valid) results = [] cfg = gibbon.Configuration() cfg.add_configuration('csv', source=gibbon.CSVSourceFile, filename=self._filename) cfg.add_configuration('list', target=gibbon.SequenceWrapper, container=results) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertTrue(len(results) > 0) self.assertIsInstance(results[0], tuple)
def test_sum(self): cfg = gibbon.Configuration() data = list(zip([1, 2, 3])) sink = [] cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertSequenceEqual(sink, [( 'sum:', 6, )])
def test_group_by(self): cfg = gibbon.Configuration() data = list(zip(['a', 'b', 'a', 'b'], [1, 2, 3, 0])) sink = [] cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=data) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertEqual(len(sink), 2) self.assertTrue('a' in dict(sink)) self.assertTrue('b' in dict(sink)) self.assertEqual(dict(sink)['a'], 4) self.assertEqual(dict(sink)['b'], 2)
def setUp(self): self.data = [(0, ), (1, ), (-1, )] self.results = [] self.wk_default_expr = gibbon.Workflow('default') self.wk_default_expr.add_source('src') self.wk_default_expr.add_transformation('expression', gibbon.Expression, source='src') self.wk_default_expr.add_target('tgt', source='expression') self.wk_compute_expr = gibbon.Workflow('compute') self.wk_compute_expr.add_source('src') self.wk_compute_expr.add_transformation('expression', gibbon.Expression, source='src', func=compute) self.wk_compute_expr.add_target('tgt', source='expression') self.cfg = gibbon.Configuration()
def setUp(self): self.data = [(0, ), (1, ), (-1, )] self.results = [] self.wk_all_rows = gibbon.Workflow('all_rows') self.wk_all_rows.add_source('src') self.wk_all_rows.add_transformation('filter', gibbon.Filter, source='src') self.wk_all_rows.add_target('tgt', source='filter') self.wk_only_pos = gibbon.Workflow('only_positive') self.wk_only_pos.add_source('src') self.wk_only_pos.add_transformation('filter', gibbon.Filter, source='src', condition=self.only_positive) self.wk_only_pos.add_target('tgt', source='filter') self.cfg = gibbon.Configuration()
def test_csv_target(self): self.assertTrue(self.w.is_valid) input_data = [ ('Brian', 23), ('Joe', 'ERROR'), ('Mary', 40), ('Alice', 25), ('Billy', 15), ] cfg = gibbon.Configuration() cfg.add_configuration('src', source=gibbon.SequenceWrapper, iterable=input_data) cfg.add_configuration('csv', target=gibbon.CSVTargetFile, filename=self._filename) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertFileContent()
def setUp(self): self.data = [(0, ), (1, ), (-1, )] self.results = [] self.wk_sort_asc = gibbon.Workflow('sort_asc') self.wk_sort_asc.add_source('src') self.wk_sort_asc.add_transformation('sort_asc', gibbon.Sorter, source='src', key=lambda r: r[0]) self.wk_sort_asc.add_target('tgt', source='sort_asc') self.wk_sort_desc = gibbon.Workflow('sort_desc') self.wk_sort_desc.add_source('src') self.wk_sort_desc.add_transformation('sort_desc', gibbon.Sorter, source='src', key=lambda r: r[0], reverse=True) self.wk_sort_desc.add_target('tgt', source='sort_desc') self.cfg = gibbon.Configuration()
def test_union(self): self.w.validate() self.assertTrue(self.w.is_valid) data_src_1 = list(zip(['a', 'b', 'c'], [1, 2, 3])) data_src_2 = list(zip(['e', 'f', 'g'], [4, 5, 6])) sink = [] cfg = gibbon.Configuration() cfg.add_configuration('src1', source=gibbon.SequenceWrapper, iterable=data_src_1) cfg.add_configuration('src2', source=gibbon.SequenceWrapper, iterable=data_src_2) cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink) self.w.prepare(cfg) self.w.run(gibbon.get_async_executor(shutdown=True)) self.assertGreater(len(sink), 0) self.assertEqual(len(sink), len(data_src_1)+len(data_src_2)) dict_for_assert = dict(sink) ref_for_assert = dict(data_src_1+data_src_2) self.assertSequenceEqual(sorted(list(dict_for_assert.keys())), sorted(list(ref_for_assert.keys()))) self.assertSequenceEqual(sorted(list(dict_for_assert.values())), sorted(list(ref_for_assert.values())))
def setUp(self): self.data = [(0, ), (1, ), (-1, )] self.wk_sel_bin = gibbon.Workflow('binary_selector') self.wk_sel_bin.add_source('src') conditions = (is_positive_or_zero, is_negative) self.wk_sel_bin.add_transformation('sel', gibbon.Selector, source='src', conditions=conditions) self.wk_sel_bin.add_target('tgt1', source='sel') self.wk_sel_bin.add_target('tgt2', source='sel') self.wk_sel_mul = gibbon.Workflow('multiple_selector') self.wk_sel_mul.add_source('src') conditions = (is_positive, is_negative, is_zero) self.wk_sel_mul.add_transformation('sel', gibbon.Selector, source='src', conditions=conditions) self.wk_sel_mul.add_target('tgt1', source='sel') self.wk_sel_mul.add_target('tgt2', source='sel') self.wk_sel_mul.add_target('tgt3', source='sel') conditions = (is_positive_or_zero, is_zero) self.wk_sel_shc = gibbon.Workflow('short_circuit_selector') self.wk_sel_shc.add_source('src') self.wk_sel_shc.add_transformation('sel', gibbon.Selector, source='src', conditions=conditions) self.wk_sel_shc.add_target('tgt1', source='sel') self.wk_sel_shc.add_target('tgt2', source='sel') conditions = (is_positive, is_negative) self.wk_sel_def = gibbon.Workflow('default_selector') self.wk_sel_def.add_source('src') self.wk_sel_def.add_transformation('sel', gibbon.Selector, source='src', conditions=conditions) self.wk_sel_def.add_target('tgt1', source='sel') self.wk_sel_def.add_target('tgt2', source='sel') conditions = (is_positive, is_negative) self.wk_sel_osp1 = gibbon.Workflow('out_port_specified') self.wk_sel_osp1.add_source('src') self.wk_sel_osp1.add_transformation('sel', gibbon.Selector, out_ports=1, source='src', conditions=conditions) self.wk_sel_osp1.add_target('tgt1', source='sel') self.wk_sel_osp1.add_target('tgt2', source='sel') conditions = (is_positive, is_negative) self.wk_sel_osp2 = gibbon.Workflow('no_out_ports_specified') self.wk_sel_osp2.add_source('src') self.wk_sel_osp2.add_transformation('sel', gibbon.Selector, source='src', conditions=conditions) self.wk_sel_osp2.add_target('tgt1', source='sel') self.wk_sel_osp2.add_target('tgt2', source='sel') self.wk_sel_wd = gibbon.Workflow('with_default') self.wk_sel_wd.add_source('src') self.wk_sel_wd.add_transformation('sel', gibbon.Selector, source='src', conditions=conditions) self.wk_sel_wd.add_target('tgt1', source='sel') self.wk_sel_wd.add_target('tgt2', source='sel') self.cfg = gibbon.Configuration()