def test_100_downstream(self): """ test """ nums = self._pipeline.parallelize([1, 2]) downstreams = [] for i in xrange(100): downstreams.append(nums.map(lambda x: x)) self.assertEqual(100 * 3, transforms.union(*downstreams).sum().get())
def end_serde_test(self): """ test """ import sys from bigflow.core import entity logger.info(str(self._checking_condition)) values = map(lambda condition: condition[1], self._checking_condition) p_values = self._pipeline.parallelize([values ]) # 避免map结点超过32个(Hadoop的限制) p_value_list = [] out = [] for (i, (sd, value)) in enumerate(self._checking_condition): sd1 = serde.of(int) sd2 = sd cpp_deserialize_fn = entity.KVDeserializeFn(sd1, sd2) cpp_serialize_fn = entity.KVSerializeFn(sd1, sd2) python_deserialize_fn = lambda kv: (sd1.deserialize(kv[0]), sd2.deserialize(kv[1])) python_serialize_fn = lambda kv: (sd1.serialize(kv[0]), sd2.serialize(kv[1])) serialize_fns = [cpp_serialize_fn, python_serialize_fn] deserialize_fns = [cpp_deserialize_fn, python_deserialize_fn] kv_val = (1, value) def _assert_eq_val(v): assert v == kv_val for serialize_fn in serialize_fns: for deserialize_fn in deserialize_fns: out.append( p_values.map(lambda x: (1, x[i])).map(serialize_fn). map(deserialize_fn).map(_assert_eq_val)) if out: transforms.union(*out).cache() else: print >> sys.stderr, "SKIP a test!!!" self._pipeline.run()
def test_modify_left_param(self): """ inner function""" inp = self._pipeline.parallelize([[1, 2, 3], [6, 5, 4]]) def _sum(x, y): x[0] += y[0] x[1] += y[1] x[2] += y[2] return x result = transforms.union(inp.reduce(_sum), inp.reduce(_sum)).get() self.assertEqual([[7, 7, 7], [7, 7, 7]], result)
def end_serde_test(self): """ test """ import sys from bigflow.core import entity logger.info(str(self._checking_condition)) values = map(lambda condition: condition[1], self._checking_condition) p_values = self._pipeline.parallelize([values]) # 避免map结点超过32个(Hadoop的限制) p_value_list = [] out = [] for (i, (sd, value)) in enumerate(self._checking_condition): sd1 = serde.of(int) sd2 = sd cpp_deserialize_fn = entity.KVDeserializeFn(sd1, sd2) cpp_serialize_fn = entity.KVSerializeFn(sd1, sd2) python_deserialize_fn = lambda kv: (sd1.deserialize(kv[0]), sd2.deserialize(kv[1])) python_serialize_fn = lambda kv: (sd1.serialize(kv[0]), sd2.serialize(kv[1])) serialize_fns = [cpp_serialize_fn, python_serialize_fn] deserialize_fns = [cpp_deserialize_fn, python_deserialize_fn] kv_val = (1, value) def _assert_eq_val(v): assert v == kv_val for serialize_fn in serialize_fns: for deserialize_fn in deserialize_fns: out.append(p_values.map(lambda x: (1, x[i])) .map(serialize_fn) .map(deserialize_fn) .map(_assert_eq_val)) if out: transforms.union(*out).cache() else: print >> sys.stderr, "SKIP a test!!!" self._pipeline.run()
def union(self, other, *others, **option): """ 将元素与其他PCollection/PObject中的所有元素共同构成PCollection 等同于 ``transforms.union(self, other, *others)`` Args: other (PCollection or PObject): 其他PCollection/PObject *others: 其他PCollection/PObject Returns: PCollection: 表示结果的PCollection >>> _p1 = _pipeline.parallelize(1) >>> _p2 = _pipeline.parallelize([2, 3]) >>> _p1.union(_p2).get() [1, 2, 3] """ return transforms.union(self, other, *others, **option)
def union(self, other, *others, **options): """ 将元素与其他PCollection/PObject中的所有元素共同构成新的PCollection 等同于 :func:`bigflow.transforms.union(self, other, *others) <bigflow.transforms.union>` Args: other (PCollection or PObject): 其他PCollection/PObject *others: 其他PCollection/PObject Returns: PCollection: 表示结果的PCollection >>> _p1 = _pipeline.parallelize([1, 2, 3, 4]) >>> _p2 = _pipeline.parallelize([5, 6, 7, 8]) >>> _p1.union(_p2).get() [1, 2, 3, 4, 5, 6, 7, 8] """ return transforms.union(self, other, *others, **options)
def _if(cond_val, if_true, if_false): return transforms.union(if_true.filter(lambda _, v: v, cond_val), if_false.filter(lambda _, v: not v, cond_val))
def _test_value_fn(a, b, c, d, e): self.assertEqual(200, e) return transforms.union(a, b, c, d)
self.passertEqual(expect, analytics) dict_to_tuple = analytics.apply(schema.dict_to_tuple, ['max_click_num', 'sum_click_num']) expect = [(2, 5), (1, 1), (3, 7), (3, 6)] self.passertEqual(expect, dict_to_tuple) p1 = self._pipeline.parallelize([('a', 1), ('c', 2), ('a', 3), ('b', 2), ('d', 1)]) sp1 = p1.apply(schema.tuple_to_dict, ['websites', 'clicknum']) p2 = self._pipeline.parallelize([('a', 5), ('c', 6), ('a', 7), ('b', 8), ('d', 9)]) sp2 = p2.apply(schema.tuple_to_dict, ['websites', 'clicknum']) csp = sp1.apply(schema.cogroup, sp2, fields=['websites']) gg = csp.apply_values(lambda x, y: transforms.union(x, y)).apply( schema.flatten) expect = [{ 'clicknum': 8, 'websites': 'b' }, { 'clicknum': 2, 'websites': 'b' }, { 'clicknum': 9, 'websites': 'd' }, { 'clicknum': 1, 'websites': 'd' }, {
def _if(cond_val, if_true, if_false): return transforms.union(if_true.filter(lambda _,v : v, cond_val), if_false.filter(lambda _,v:not v, cond_val))