Example #1
0
    def test_100_downstream(self):
        """ test """
        nums = self._pipeline.parallelize([1, 2])

        downstreams = []
        for i in xrange(100):
            downstreams.append(nums.map(lambda x: x))

        self.assertEqual(100 * 3, transforms.union(*downstreams).sum().get())
    def test_100_downstream(self):
        """ test """
        nums = self._pipeline.parallelize([1, 2])

        downstreams = []
        for i in xrange(100):
            downstreams.append(nums.map(lambda x: x))

        self.assertEqual(100 * 3, transforms.union(*downstreams).sum().get())
Example #3
0
    def end_serde_test(self):
        """ test """
        import sys
        from bigflow.core import entity
        logger.info(str(self._checking_condition))
        values = map(lambda condition: condition[1], self._checking_condition)
        p_values = self._pipeline.parallelize([values
                                               ])  # 避免map结点超过32个(Hadoop的限制)
        p_value_list = []

        out = []
        for (i, (sd, value)) in enumerate(self._checking_condition):
            sd1 = serde.of(int)
            sd2 = sd

            cpp_deserialize_fn = entity.KVDeserializeFn(sd1, sd2)
            cpp_serialize_fn = entity.KVSerializeFn(sd1, sd2)

            python_deserialize_fn = lambda kv: (sd1.deserialize(kv[0]),
                                                sd2.deserialize(kv[1]))
            python_serialize_fn = lambda kv: (sd1.serialize(kv[0]),
                                              sd2.serialize(kv[1]))

            serialize_fns = [cpp_serialize_fn, python_serialize_fn]
            deserialize_fns = [cpp_deserialize_fn, python_deserialize_fn]

            kv_val = (1, value)

            def _assert_eq_val(v):
                assert v == kv_val

            for serialize_fn in serialize_fns:
                for deserialize_fn in deserialize_fns:
                    out.append(
                        p_values.map(lambda x: (1, x[i])).map(serialize_fn).
                        map(deserialize_fn).map(_assert_eq_val))
        if out:
            transforms.union(*out).cache()
        else:
            print >> sys.stderr, "SKIP a test!!!"
        self._pipeline.run()
Example #4
0
    def test_modify_left_param(self):
        """ inner function"""
        inp = self._pipeline.parallelize([[1, 2, 3], [6, 5, 4]])

        def _sum(x, y):
            x[0] += y[0]
            x[1] += y[1]
            x[2] += y[2]
            return x

        result = transforms.union(inp.reduce(_sum), inp.reduce(_sum)).get()
        self.assertEqual([[7, 7, 7], [7, 7, 7]], result)
Example #5
0
    def test_modify_left_param(self):
        """ inner function"""
        inp = self._pipeline.parallelize([[1, 2, 3], [6, 5, 4]])

        def _sum(x, y):
            x[0] += y[0]
            x[1] += y[1]
            x[2] += y[2]
            return x

        result = transforms.union(inp.reduce(_sum), inp.reduce(_sum)).get()
        self.assertEqual([[7, 7, 7], [7, 7, 7]], result)
Example #6
0
    def end_serde_test(self):
        """ test """
        import sys
        from bigflow.core import entity
        logger.info(str(self._checking_condition))
        values = map(lambda condition: condition[1], self._checking_condition)
        p_values = self._pipeline.parallelize([values]) # 避免map结点超过32个(Hadoop的限制)
        p_value_list = []


        out = []
        for (i, (sd, value)) in enumerate(self._checking_condition):
            sd1 = serde.of(int)
            sd2 = sd

            cpp_deserialize_fn = entity.KVDeserializeFn(sd1, sd2)
            cpp_serialize_fn = entity.KVSerializeFn(sd1, sd2)

            python_deserialize_fn = lambda kv: (sd1.deserialize(kv[0]), sd2.deserialize(kv[1]))
            python_serialize_fn = lambda kv: (sd1.serialize(kv[0]), sd2.serialize(kv[1]))

            serialize_fns = [cpp_serialize_fn, python_serialize_fn]
            deserialize_fns = [cpp_deserialize_fn, python_deserialize_fn]

            kv_val = (1, value)
            def _assert_eq_val(v):
                assert v == kv_val
            for serialize_fn in serialize_fns:
                for deserialize_fn in deserialize_fns:
                    out.append(p_values.map(lambda x: (1, x[i]))
                            .map(serialize_fn)
                            .map(deserialize_fn)
                            .map(_assert_eq_val))
        if out:
            transforms.union(*out).cache()
        else:
            print >> sys.stderr, "SKIP a test!!!"
        self._pipeline.run()
Example #7
0
    def union(self, other, *others, **option):
        """
        将元素与其他PCollection/PObject中的所有元素共同构成PCollection
        等同于 ``transforms.union(self, other, *others)``

        Args:
          other (PCollection or PObject):  其他PCollection/PObject
          *others:  其他PCollection/PObject

        Returns:
          PCollection:  表示结果的PCollection

        >>> _p1 = _pipeline.parallelize(1)
        >>> _p2 = _pipeline.parallelize([2, 3])
        >>> _p1.union(_p2).get()
        [1, 2, 3]
        """
        return transforms.union(self, other, *others, **option)
Example #8
0
    def union(self, other, *others, **option):
        """
        将元素与其他PCollection/PObject中的所有元素共同构成PCollection
        等同于 ``transforms.union(self, other, *others)``

        Args:
          other (PCollection or PObject):  其他PCollection/PObject
          *others:  其他PCollection/PObject

        Returns:
          PCollection:  表示结果的PCollection

        >>> _p1 = _pipeline.parallelize(1)
        >>> _p2 = _pipeline.parallelize([2, 3])
        >>> _p1.union(_p2).get()
        [1, 2, 3]
        """
        return transforms.union(self, other, *others, **option)
Example #9
0
    def union(self, other, *others, **options):
        """
        将元素与其他PCollection/PObject中的所有元素共同构成新的PCollection
        等同于
        :func:`bigflow.transforms.union(self, other, *others)
        <bigflow.transforms.union>`

        Args:
          other (PCollection or PObject):  其他PCollection/PObject
          *others:  其他PCollection/PObject

        Returns:
          PCollection:  表示结果的PCollection

        >>> _p1 = _pipeline.parallelize([1, 2, 3, 4])
        >>> _p2 = _pipeline.parallelize([5, 6, 7, 8])
        >>> _p1.union(_p2).get()
        [1, 2, 3, 4, 5, 6, 7, 8]
        """
        return transforms.union(self, other, *others, **options)
Example #10
0
    def union(self, other, *others, **options):
        """
        将元素与其他PCollection/PObject中的所有元素共同构成新的PCollection
        等同于
        :func:`bigflow.transforms.union(self, other, *others)
        <bigflow.transforms.union>`

        Args:
          other (PCollection or PObject):  其他PCollection/PObject
          *others:  其他PCollection/PObject

        Returns:
          PCollection:  表示结果的PCollection

        >>> _p1 = _pipeline.parallelize([1, 2, 3, 4])
        >>> _p2 = _pipeline.parallelize([5, 6, 7, 8])
        >>> _p1.union(_p2).get()
        [1, 2, 3, 4, 5, 6, 7, 8]
        """
        return transforms.union(self, other, *others, **options)
Example #11
0
def _if(cond_val, if_true, if_false):
    return transforms.union(if_true.filter(lambda _, v: v, cond_val),
                            if_false.filter(lambda _, v: not v, cond_val))
Example #12
0
 def _test_value_fn(a, b, c, d, e):
     self.assertEqual(200, e)
     return transforms.union(a, b, c, d)
Example #13
0
        self.passertEqual(expect, analytics)

        dict_to_tuple = analytics.apply(schema.dict_to_tuple,
                                        ['max_click_num', 'sum_click_num'])
        expect = [(2, 5), (1, 1), (3, 7), (3, 6)]
        self.passertEqual(expect, dict_to_tuple)

        p1 = self._pipeline.parallelize([('a', 1), ('c', 2), ('a', 3),
                                         ('b', 2), ('d', 1)])
        sp1 = p1.apply(schema.tuple_to_dict, ['websites', 'clicknum'])
        p2 = self._pipeline.parallelize([('a', 5), ('c', 6), ('a', 7),
                                         ('b', 8), ('d', 9)])
        sp2 = p2.apply(schema.tuple_to_dict, ['websites', 'clicknum'])
        csp = sp1.apply(schema.cogroup, sp2, fields=['websites'])
        gg = csp.apply_values(lambda x, y: transforms.union(x, y)).apply(
            schema.flatten)

        expect = [{
            'clicknum': 8,
            'websites': 'b'
        }, {
            'clicknum': 2,
            'websites': 'b'
        }, {
            'clicknum': 9,
            'websites': 'd'
        }, {
            'clicknum': 1,
            'websites': 'd'
        }, {
Example #14
0
def _if(cond_val, if_true, if_false):
    return transforms.union(if_true.filter(lambda _,v : v, cond_val), if_false.filter(lambda _,v:not v, cond_val))