def postfunc_no_combine(_): GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) GlobalSocket.pipe_to_cpp.send(str(len(GlobalVar.reduce_by_key_store))) for (x, y) in GlobalVar.reduce_by_key_store: GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x)) GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y)) GlobalVar.reduce_by_key_store = []
def postfunc_combine_hash(op): # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) send_buffer = dict() for x, y in GlobalVar.reduce_by_key_store: if x in send_buffer: send_buffer[x] = op.func(send_buffer[x], y) else: send_buffer[x] = y GlobalSocket.pipe_to_cpp.send(str(len(send_buffer))) for x, y in send_buffer.iteritems(): GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x)) GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y))
def postfunc_combine_2(op): # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) # combine send_buffer = [] def reduce_func(x, y): return x[0], op.func(x[1], y[1]) for _, y in groupby(sorted(GlobalVar.reduce_by_key_store), key=lambda x: x[0]): k, v = reduce(reduce_func, y) send_buffer.append(Serializer.dumps(k)) send_buffer.append(Serializer.dumps(v)) GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2)) for x in send_buffer: GlobalSocket.pipe_to_cpp.send(x)
def end_postfunc(_): GlobalSocket.pipe_to_cpp.send("Functional#reduce_end") GlobalSocket.pipe_to_cpp.send(Serializer.dumps(GlobalVar.reduce_res)) res = None while True: recv_str = GlobalSocket.pipe_from_cpp.recv() # fdebug.write("recv: "+recv_str+"\n") if not recv_str: break recv = Serializer.loads(recv_str) if recv is None: continue if res is None: res = recv else: res = GlobalVar.reduce_func(res, recv) # fdebug.write("result: "+str(res)+"\n"); GlobalSocket.pipe_to_cpp.send(Serializer.dumps(res)) GlobalVar.reduce_res = None GlobalVar.reduce_func = None
def postfunc_combine_1(op): # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) # combine GlobalVar.reduce_by_key_store.sort(key=lambda x: x[0]) send_buffer = [] if GlobalVar.reduce_by_key_store: prev_x, prev_y = GlobalVar.reduce_by_key_store[0] for x, y in islice(GlobalVar.reduce_by_key_store, 1, None): if x != prev_x: send_buffer.append(Serializer.dumps(prev_x)) send_buffer.append(Serializer.dumps(prev_y)) prev_x, prev_y = x, y else: prev_y = op.func(prev_y, y) send_buffer.append(Serializer.dumps(prev_x)) send_buffer.append(Serializer.dumps(prev_y)) GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2)) for x in send_buffer: GlobalSocket.pipe_to_cpp.send(x)
def postfunc_combine_n2n(_): """ Attempt4: Hash Map """ # send out reduce_by_key_store GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key") GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list) GlobalSocket.pipe_to_cpp.send("0") send_buffer = [[] for i in xrange(GlobalVar.num_workers)] if GlobalVar.reduce_by_key_store: for x, y in GlobalVar.reduce_by_key_store.items(): dst = hash(x) % GlobalVar.num_workers send_buffer[dst].append((x, y)) for i in xrange(GlobalVar.num_workers): GlobalN2NSocket.send(i, Serializer.dumps(send_buffer[i]))
def func(_, data): for x in data: GlobalVar.difference_store.append(Serializer.dumps(x))
def func(_, data): for x in data: GlobalVar.distinct_store.append(Serializer.dumps(x))
def func(_, data): for x in data: assert (isinstance(x, tuple) or isinstance(x, list)) and len(x) is 2 GlobalVar.group_by_key_store.append( (Serializer.dumps(x[0]), Serializer.dumps(x[1])))
def end_postfunc(_): GlobalSocket.pipe_to_cpp.send("Functional#collect_end") GlobalSocket.pipe_to_cpp.send("collect_list") GlobalSocket.pipe_to_cpp.send( Serializer.dumps(GlobalVar.data_chunk["collect_list"])) del GlobalVar.data_chunk["collect_list"]