コード例 #1
0
    return out


def custom_reduce(data):
    d = dict()
    for key, value in data:
        if key not in d:
            d[key] = value
        else:
            d[key] += value

    out = []
    for key in d:
        out.append((key, d[key]))
    return out


## Same calls
mr_1 = MapReduce(verbose=False)
mr_1.map_one(custom_map, values_1)
mr_1.map_one(custom_map, values_2)
mr_1.map_one(custom_map, values_3)
print(mr_1.reduce(custom_reduce))

mr_2 = MapReduce(verbose=False)
mr_2.map(custom_map, all_values, 3)
print(mr_2.reduce(custom_reduce))

mr_3 = MapReduce(verbose=True)
print(mr_3.map_reduce(custom_map, custom_reduce, all_values, 3))
コード例 #2
0
def main():
    comm = MPI.COMM_WORLD
    nr_nodes = comm.Get_size()
    rank = comm.Get_rank()
    map_phase = 10
    reduce_phase = 20
    confirmation = 30
    stop_phase = 100
    master = 0

    # master node
    if rank == master:

        # mapping phase
        file_name = 'application/output/adjacency_list.json'
        with open(file_name, 'r', encoding='utf-8') as infile:
            json_data = json.load(infile)

        keys = []
        data_queue = []

        for data in json_data:
            keys.append(data)

        for key in keys:
            for value in json_data[key]:
                data = {"k": key, "v": value}
                data_queue.append(data)

        # transmit data
        temp_data = transmit_data(data_queue, nr_nodes, comm, map_phase)
        while temp_data:
            temp_data = transmit_data(temp_data, nr_nodes, comm, map_phase)

        # reduction phase
        path = 'application/output/map/*.json'
        files = glob.glob(path)
        for file in files:
            with open(file, 'r', encoding='utf-8') as infile:
                json_data = json.load(infile)

            keys = []
            data_queue = []

            for data in json_data:
                keys.append(data)

            for key in keys:
                for value in json_data[key]:
                    data = {"k": key, "v": value}
                    data_queue.append(data)

            # transmit data
            temp_data = transmit_data(data_queue, nr_nodes, comm, reduce_phase)
            # while temp_data:
            #    temp_data = transmit_data(temp_data, nr_nodes, comm, reduce_phase)

        # stopping phase
        for dest_rank in range(1, nr_nodes):
            data = {"k": "", "v": ""}
            comm.isend(data, dest=dest_rank, tag=stop_phase)

        print("[" + str(rank) + "] - TERMINATED ")

    # worker nodes
    else:
        is_terminated = False
        mr = MapReduce(rank)

        while not is_terminated:
            status = MPI.Status()
            data = comm.recv(source=0, tag=MPI.ANY_TAG, status=status)
            tag = status.Get_tag()
            if tag == map_phase:
                key = data["k"]
                value = data["v"]
                mr.map(key, value)
            elif tag == reduce_phase:
                key = data["k"]
                value = data["v"]
                mr.reduce(key, value)
            elif tag == stop_phase:
                print("[" + str(rank) + "] - TERMINATED")
                is_terminated = True
            else:
                print("[" + str(rank) + "] - INCORRECT TAG RECEIVED")

            comm.isend(data, dest=master, tag=confirmation)

        mr.store_values()