Ejemplo n.º 1
0
def analyze_skew(M, N, k, scheme, af_list):

    dp_records, num_replicas_list = dp_simulation.run(M,
                                                      N,
                                                      k,
                                                      scheme,
                                                      af_list=af_list,
                                                      show_output=False)

    load_records = calculate_load(dp_records, num_replicas_list, af_list)
    print('#######################')
    print(json.dumps(load_records, indent=4))
    print('#######################')
    #print(load_records)
    #print("Skew:", calculate_skew_factor(list(load_records.values())))
    #skew_score = calculate_skew_factor(list(load_records.values()))
    #skew_score_scipy = calculate_skew_factor_scipy(list(load_records.values()))
    #print("@", skew_score, skew_score_scipy)
    #print("#", calculate_skew_factor(af_list), calculate_skew_factor_scipy(af_list))
    #return skew_score_scipy
    import numpy as np
    #print("@", sum(load_records.values()))
    #print("#", len(load_records))
    #return np.std(list(load_records.values())) / sum(load_records.values())
    #return np.std(list(load_records.values()))
    #return calculate_skew_factor_myown_percentage(list(load_records.values()))
    return calculate_skew_factor(list(load_records.values()))
Ejemplo n.º 2
0
def analyze_partition_skew(M, N, k, scheme, af_list):

    dp_records, num_replicas_list = dp_simulation.run(M,
                                                      N,
                                                      k,
                                                      scheme,
                                                      af_list=af_list,
                                                      show_output=False)

    partition_load_records = calculate_partition_load(dp_records,
                                                      num_replicas_list,
                                                      af_list)
    print('#######################')
    print(partition_load_records)
    print('#######################')
    #print("@@", sum(partition_load_records[n] * num_replicas_list[n] for n in range(len(num_replicas_list))))
    total_replicas = sum(num_replicas_list)
    total_load = sum(af_list)
    expected_load_per_replica = (total_load / total_replicas) / (N / M)
    print("replica list:")
    print(num_replicas_list)
    print("total replicas={}, total_load={}, load per replica={}".format(
        total_replicas, total_load, expected_load_per_replica))
    partition_load_delta_list = [
        abs(partition_load_records[n] - expected_load_per_replica)
        for n in range(len(num_replicas_list))
    ]
    #print("^^", partition_load_delta_list)
    print("**", sum(partition_load_delta_list))

    return calculate_skew_factor(partition_load_records)
Ejemplo n.º 3
0
def analyze_placement(M, N, k, scheme, af_list):
    dp_records, num_replicas_list = dp_simulation.run(M,
                                                      N,
                                                      k,
                                                      scheme,
                                                      af_list=af_list,
                                                      show_output=False)

    load_records = calculate_load(dp_records, num_replicas_list, af_list)
    return calculate_skew_factor(list(load_records.values()))
Ejemplo n.º 4
0
def analyze_skew(M, N, k, scheme, af_list):

    dp_records, num_replicas_list = dp_simulation.run(M,
                                                      N,
                                                      k,
                                                      scheme,
                                                      af_list=af_list,
                                                      show_output=False)

    # load per node
    load_records = calculate_load(dp_records, num_replicas_list, af_list)
    load_list = list(load_records.values())

    return {
        'max-mean': SkewAnalyzer.calculate_max_mean(load_list),
        'min-max': SkewAnalyzer.calculate_min_max(load_list),
        'avg-colors': ColorAnalyzer.calculate_colors(dp_records)
    }
Ejemplo n.º 5
0
def generate_data_placement(old_nodes, new_nodes, locations, access_statistics, coarse_grained=True, dp_model='rainbow', dp_name=None):
    # assign node id
    nodes = sorted(set(old_nodes + new_nodes))  # should not have duplicate nodes
    #print("nodes:", nodes)
    # assign partition id -> k=1 or k > 1
    max_num_partitions = 1
    if not coarse_grained:
        max_num_partitions = 1
        for node in old_nodes:
            if len(locations[node]) > max_num_partitions:
                max_num_partitions = len(locations[node])
    #print("k:", max_num_partitions)
    #print(json.dumps(locations, indent=4))
    #print(json.dumps(access_statistics, indent=4))
    # create access frequency list
    partition_list = []
    af_list = []
    sorted_partition_list = []
    sorted_af_list = []
    if max_num_partitions == 1:
        for node in old_nodes:
            #print(node, sum([(access_statistics[partition] if partition in access_statistics else 0) for partition in locations[node]]))
            af_list.append(sum([(access_statistics[partition] if partition in access_statistics else 0) for partition in locations[node]]))
            partition_list.append(node)
        # sort by node ip for now
        for node in sorted(partition_list, key=roxie_node_comparator):
            sorted_partition_list.append(node)
            sorted_af_list.append(af_list[partition_list.index(node)])
    else:
        for partition in sorted([partition for node in locations.keys() for partition in locations[node]]):
            af_list.append(access_statistics[partition] if partition in access_statistics else 0)
            partition_list.append(partition)
        # this can be an issue
        for partition in sorted(partition_list, key=roxie_file_comparator):
            sorted_partition_list.append(partition)
            sorted_af_list.append(af_list[partition_list.index(partition)])
    #print('-----------')
    #for i in range(len(sorted_partition_list)):
    #    print(sorted_partition_list[i], sorted_af_list[i])
    M = len(old_nodes)
    N = len(nodes)
    k = max_num_partitions
    t = dp_model
    print('+++Running data placement simulation+++')

    dp_records, adjusted_num_replicas_list = dp_simulation.run(M, N, k, t, af_list=sorted_af_list, show_output=True)
    s = set()
    for p_list in dp_records.values():
        s.update(p_list)
    print('Total unique partitions:', len(s))
    assert len(s) == len(af_list)
    #sys.exit(0)
    # print(json.dumps(dp_records, indent=4))

    new_locations = {}
    if max_num_partitions == 1:
        #print('................')
        #print(json.dumps(dp_records, indent=4))
        for node_name in dp_records.keys():
            new_locations[nodes[int(node_name)]] = []
            for node_index in dp_records[node_name]:
                new_locations[nodes[int(node_name)]].extend(locations[nodes[node_index]])
    else:
        for node_name in dp_records.keys():
            new_locations[nodes[int(node_name)]] = []
            for partition_index in dp_records[node_name]:
                new_locations[nodes[int(node_name)]].append(sorted_partition_list[partition_index])
    return placement.DataPlacement(nodes, sorted_partition_list, new_locations, name=dp_name)
Ejemplo n.º 6
0
def run(ctx, m, n, k, type, workload, frequency):
    if len(frequency) > 0 and ',' in frequency:
        af_list = [float(n.strip()) for n in frequency.split(',')]
        dp_simulation.run(m, n, k, type, None, af_list=af_list)
    else:
        dp_simulation.run(m, n, k, type, workload, af_list=[])