コード例 #1
0
 def postfunc_no_combine(_):
     GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
     GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)
     GlobalSocket.pipe_to_cpp.send(str(len(GlobalVar.reduce_by_key_store)))
     for (x, y) in GlobalVar.reduce_by_key_store:
         GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x))
         GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y))
     GlobalVar.reduce_by_key_store = []
コード例 #2
0
    def postfunc_combine_hash(op):
        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        send_buffer = dict()
        for x, y in GlobalVar.reduce_by_key_store:
            if x in send_buffer:
                send_buffer[x] = op.func(send_buffer[x], y)
            else:
                send_buffer[x] = y
        GlobalSocket.pipe_to_cpp.send(str(len(send_buffer)))
        for x, y in send_buffer.iteritems():
            GlobalSocket.pipe_to_cpp.send(Serializer.dumps(x))
            GlobalSocket.pipe_to_cpp.send(Serializer.dumps(y))
コード例 #3
0
    def postfunc_combine_2(op):
        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        # combine
        send_buffer = []

        def reduce_func(x, y):
            return x[0], op.func(x[1], y[1])

        for _, y in groupby(sorted(GlobalVar.reduce_by_key_store),
                            key=lambda x: x[0]):
            k, v = reduce(reduce_func, y)
            send_buffer.append(Serializer.dumps(k))
            send_buffer.append(Serializer.dumps(v))
        GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2))
        for x in send_buffer:
            GlobalSocket.pipe_to_cpp.send(x)
コード例 #4
0
 def end_postfunc(_):
     GlobalSocket.pipe_to_cpp.send("Functional#reduce_end")
     GlobalSocket.pipe_to_cpp.send(Serializer.dumps(GlobalVar.reduce_res))
     res = None
     while True:
         recv_str = GlobalSocket.pipe_from_cpp.recv()
         # fdebug.write("recv: "+recv_str+"\n")
         if not recv_str:
             break
         recv = Serializer.loads(recv_str)
         if recv is None:
             continue
         if res is None:
             res = recv
         else:
             res = GlobalVar.reduce_func(res, recv)
     # fdebug.write("result: "+str(res)+"\n");
     GlobalSocket.pipe_to_cpp.send(Serializer.dumps(res))
     GlobalVar.reduce_res = None
     GlobalVar.reduce_func = None
コード例 #5
0
    def postfunc_combine_1(op):
        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        # combine
        GlobalVar.reduce_by_key_store.sort(key=lambda x: x[0])
        send_buffer = []
        if GlobalVar.reduce_by_key_store:
            prev_x, prev_y = GlobalVar.reduce_by_key_store[0]
            for x, y in islice(GlobalVar.reduce_by_key_store, 1, None):
                if x != prev_x:
                    send_buffer.append(Serializer.dumps(prev_x))
                    send_buffer.append(Serializer.dumps(prev_y))
                    prev_x, prev_y = x, y
                else:
                    prev_y = op.func(prev_y, y)
            send_buffer.append(Serializer.dumps(prev_x))
            send_buffer.append(Serializer.dumps(prev_y))
        GlobalSocket.pipe_to_cpp.send(str(len(send_buffer) / 2))
        for x in send_buffer:
            GlobalSocket.pipe_to_cpp.send(x)
コード例 #6
0
    def postfunc_combine_n2n(_):
        """ Attempt4: Hash Map """

        # send out reduce_by_key_store
        GlobalSocket.pipe_to_cpp.send("Functional#reduce_by_key")
        GlobalSocket.pipe_to_cpp.send(GlobalVar.reduce_by_key_list)

        GlobalSocket.pipe_to_cpp.send("0")
        send_buffer = [[] for i in xrange(GlobalVar.num_workers)]
        if GlobalVar.reduce_by_key_store:
            for x, y in GlobalVar.reduce_by_key_store.items():
                dst = hash(x) % GlobalVar.num_workers
                send_buffer[dst].append((x, y))
        for i in xrange(GlobalVar.num_workers):
            GlobalN2NSocket.send(i, Serializer.dumps(send_buffer[i]))
コード例 #7
0
 def func(_, data):
     for x in data:
         GlobalVar.difference_store.append(Serializer.dumps(x))
コード例 #8
0
 def func(_, data):
     for x in data:
         GlobalVar.distinct_store.append(Serializer.dumps(x))
コード例 #9
0
 def func(_, data):
     for x in data:
         assert (isinstance(x, tuple)
                 or isinstance(x, list)) and len(x) is 2
         GlobalVar.group_by_key_store.append(
             (Serializer.dumps(x[0]), Serializer.dumps(x[1])))
コード例 #10
0
 def end_postfunc(_):
     GlobalSocket.pipe_to_cpp.send("Functional#collect_end")
     GlobalSocket.pipe_to_cpp.send("collect_list")
     GlobalSocket.pipe_to_cpp.send(
         Serializer.dumps(GlobalVar.data_chunk["collect_list"]))
     del GlobalVar.data_chunk["collect_list"]