Exemple #1
0
 def test_run(self):
     sink = []
     self.cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink)
     executor = gibbon.get_async_executor(shutdown=True)
     self.w.prepare(self.cfg)
     self.w.run(executor)
     self.assertSequenceEqual(sink, list(zip([1]*len(self.data), self.data)))
    def test_concat_unbalanced(self):
        self.assertTrue(self.w.is_valid)

        src1 = [('row_id', ), ('another_id', )]
        src2 = [('row_data', )]
        sink = []

        cfg = gibbon.Configuration()
        cfg.add_configuration('src1',
                              source=gibbon.SequenceWrapper,
                              iterable=src1)
        cfg.add_configuration('src2',
                              source=gibbon.SequenceWrapper,
                              iterable=src2)
        cfg.add_configuration('tgt',
                              target=gibbon.SequenceWrapper,
                              container=sink)

        executor = gibbon.get_async_executor(shutdown=True)
        self.w.prepare(cfg)
        self.w.run(executor)

        self.assertTrue(len(sink))
        self.assertEqual(len(sink), 2)
        self.assertEqual(sink[0], ('row_id', 'row_data'))
        self.assertEqual(sink[1], ('another_id', ))
    def test_split(self):
        self.assertTrue(self.w.is_valid)
        src = [('row_id', 'row_data')]
        sink1 = []
        sink2 = []

        cfg = gibbon.Configuration()
        cfg.add_configuration('src',
                              source=gibbon.SequenceWrapper,
                              iterable=src)
        cfg.add_configuration('tgt1',
                              target=gibbon.SequenceWrapper,
                              container=sink1)
        cfg.add_configuration('tgt2',
                              target=gibbon.SequenceWrapper,
                              container=sink2)

        executor = gibbon.get_async_executor(shutdown=True)
        self.w.prepare(cfg)
        self.w.run(executor)

        self.assertEqual(len(sink1), 1)
        self.assertEqual(len(sink2), 1)
        self.assertEqual(sink1[0], ('row_id', ))
        self.assertEqual(sink2[0], ('row_data', ))
Exemple #4
0
    def testWithDefault1(self):
        """Tip: should pass because a target was added after all gated outports were connected,
           this additional target is used as a default destination for rows that don't
           meet any of the conditions"""
        self.assertTrue(self.wk_sel_wd.is_valid)
        self.wk_sel_wd.add_target('tgt_default', source='sel')
        self.assertTrue(self.wk_sel_wd.is_valid)

        data = [(0, ), (1, ), (-1, ), (0, )]
        sinks = ([], [], [])

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=data)
        self.cfg.add_configuration('tgt1',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[0])
        self.cfg.add_configuration('tgt2',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[1])
        self.cfg.add_configuration('tgt_default',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[2])

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk_sel_wd.prepare(self.cfg)
        self.wk_sel_wd.run(executor)
        self.assertSequenceEqual(sinks[0], [(1, )])
        self.assertSequenceEqual(sinks[1], [(-1, )])
        self.assertSequenceEqual(sinks[2], [(0, ), (0, )])
Exemple #5
0
    def test_default(self):

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=self.data)
        self.cfg.add_configuration('tgt',
                                   target=gibbon.SequenceWrapper,
                                   container=self.results)

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk_default_expr.prepare(self.cfg)
        self.wk_default_expr.run(executor)
        self.assertSequenceEqual(self.results, self.data)
Exemple #6
0
    def testLenOverThree(self):
        sink = []
        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=zip(self.data))
        self.cfg.add_configuration('tgt',
                                   target=gibbon.SequenceWrapper,
                                   container=sink)

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk.prepare(self.cfg)
        self.wk.run(executor)
        self.assertSequenceEqual(sink, [('4444', )])
Exemple #7
0
    def testOnlyPositive(self):
        self.results = []

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=self.data)
        self.cfg.add_configuration('tgt',
                                   target=gibbon.SequenceWrapper,
                                   container=self.results)

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk_only_pos.prepare(self.cfg)
        self.wk_only_pos.run(executor)
        self.assertSequenceEqual(self.results, [(1, )])
Exemple #8
0
    def test_csv_source(self):
        self.assertTrue(self.w.is_valid)

        results = []
        cfg = gibbon.Configuration()
        cfg.add_configuration('csv',
                              source=gibbon.CSVSourceFile,
                              filename=self._filename)
        cfg.add_configuration('list',
                              target=gibbon.SequenceWrapper,
                              container=results)

        self.w.prepare(cfg)
        self.w.run(gibbon.get_async_executor(shutdown=True))
        self.assertTrue(len(results) > 0)
        self.assertIsInstance(results[0], tuple)
Exemple #9
0
 def test_sum(self):
     cfg = gibbon.Configuration()
     data = list(zip([1, 2, 3]))
     sink = []
     cfg.add_configuration('src',
                           source=gibbon.SequenceWrapper,
                           iterable=data)
     cfg.add_configuration('tgt',
                           target=gibbon.SequenceWrapper,
                           container=sink)
     self.w.prepare(cfg)
     self.w.run(gibbon.get_async_executor(shutdown=True))
     self.assertSequenceEqual(sink, [(
         'sum:',
         6,
     )])
Exemple #10
0
 def test_group_by(self):
     cfg = gibbon.Configuration()
     data = list(zip(['a', 'b', 'a', 'b'], [1, 2, 3, 0]))
     sink = []
     cfg.add_configuration('src',
                           source=gibbon.SequenceWrapper,
                           iterable=data)
     cfg.add_configuration('tgt',
                           target=gibbon.SequenceWrapper,
                           container=sink)
     self.w.prepare(cfg)
     self.w.run(gibbon.get_async_executor(shutdown=True))
     self.assertEqual(len(sink), 2)
     self.assertTrue('a' in dict(sink))
     self.assertTrue('b' in dict(sink))
     self.assertEqual(dict(sink)['a'], 4)
     self.assertEqual(dict(sink)['b'], 2)
Exemple #11
0
    def testBinarySelection(self):

        sinks = ([], [])

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=self.data)
        self.cfg.add_configuration('tgt1',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[0])
        self.cfg.add_configuration('tgt2',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[1])

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk_sel_bin.prepare(self.cfg)
        self.wk_sel_bin.run(executor)
        self.assertSequenceEqual(sinks[0], [(0, ), (1, )])
        self.assertSequenceEqual(sinks[1], [(-1, )])
Exemple #12
0
    def testShortCircuit(self):
        """Tip: should not shot-circuit, rows are sent wherever it fits even if it means to send it twice or more"""
        data = [(0, )]
        sinks = ([], [])

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=data)
        self.cfg.add_configuration('tgt1',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[0])
        self.cfg.add_configuration('tgt2',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[1])

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk_sel_shc.prepare(self.cfg)
        self.wk_sel_shc.run(executor)
        self.assertSequenceEqual(sinks[0], [(0, )])
        self.assertSequenceEqual(sinks[1], [(0, )])
Exemple #13
0
    def testWithDefault2(self):
        """Adding a useless target don't fail"""
        self.wk_sel_wd.reset(self.cfg)
        self.wk_sel_wd.add_target('tgt_default', source='sel')
        self.wk_sel_wd.add_target('useless_target', source='sel')

        # this test show the workflow is valid
        # any way, the Selector raised a build warning saying that a useless target was connected

        self.assertTrue(self.wk_sel_wd.is_valid)
        warnings = self.wk_sel_wd.get_all_warnings()
        self.assertNotEqual(warnings, 'No warning.')

        data = [(0, ), (1, ), (-1, ), (0, )]
        empty = []
        sinks = ([], [], [])

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=data)
        self.cfg.add_configuration('tgt1',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[0])
        self.cfg.add_configuration('tgt2',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[1])
        self.cfg.add_configuration('tgt_default',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[2])
        self.cfg.add_configuration('useless_target',
                                   target=gibbon.SequenceWrapper,
                                   container=empty)

        executor = gibbon.get_async_executor(shutdown=True)

        self.wk_sel_wd.prepare(self.cfg)
        self.wk_sel_wd.run(executor)
        self.assertSequenceEqual(sinks[0], [(1, )])
        self.assertSequenceEqual(sinks[1], [(-1, )])
        self.assertSequenceEqual(sinks[2], [(0, ), (0, )])
        self.assertSequenceEqual(empty, [])
Exemple #14
0
    def test_csv_target(self):
        self.assertTrue(self.w.is_valid)

        input_data = [
            ('Brian', 23),
            ('Joe', 'ERROR'),
            ('Mary', 40),
            ('Alice', 25),
            ('Billy', 15),
        ]
        cfg = gibbon.Configuration()
        cfg.add_configuration('src',
                              source=gibbon.SequenceWrapper,
                              iterable=input_data)
        cfg.add_configuration('csv',
                              target=gibbon.CSVTargetFile,
                              filename=self._filename)

        self.w.prepare(cfg)
        self.w.run(gibbon.get_async_executor(shutdown=True))
        self.assertFileContent()
Exemple #15
0
    def testDefault(self):
        """Tip: without a default queue, rows that don't match any condition are discarded
        by not being pushed downstream"""
        data = [(1, ), (-1, ), (0, )]
        sinks = ([], [])

        self.cfg.add_configuration('src',
                                   source=gibbon.SequenceWrapper,
                                   iterable=data)
        self.cfg.add_configuration('tgt1',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[0])
        self.cfg.add_configuration('tgt2',
                                   target=gibbon.SequenceWrapper,
                                   container=sinks[1])

        executor = gibbon.get_async_executor(shutdown=True)
        self.wk_sel_def.prepare(self.cfg)
        self.wk_sel_def.run(executor)
        self.assertSequenceEqual(sinks[0], [(1, )])
        self.assertSequenceEqual(sinks[1], [(-1, )])
Exemple #16
0
    def test_union(self):
        self.w.validate()
        self.assertTrue(self.w.is_valid)

        data_src_1 = list(zip(['a', 'b', 'c'], [1, 2, 3]))
        data_src_2 = list(zip(['e', 'f', 'g'], [4, 5, 6]))
        sink = []

        cfg = gibbon.Configuration()
        cfg.add_configuration('src1', source=gibbon.SequenceWrapper, iterable=data_src_1)
        cfg.add_configuration('src2', source=gibbon.SequenceWrapper, iterable=data_src_2)
        cfg.add_configuration('tgt', target=gibbon.SequenceWrapper, container=sink)

        self.w.prepare(cfg)
        self.w.run(gibbon.get_async_executor(shutdown=True))

        self.assertGreater(len(sink), 0)
        self.assertEqual(len(sink), len(data_src_1)+len(data_src_2))
        dict_for_assert = dict(sink)
        ref_for_assert = dict(data_src_1+data_src_2)
        self.assertSequenceEqual(sorted(list(dict_for_assert.keys())), sorted(list(ref_for_assert.keys())))
        self.assertSequenceEqual(sorted(list(dict_for_assert.values())), sorted(list(ref_for_assert.values())))