Ejemplo n.º 1
0
def min_max_hash():
    field_delimiter = request.json['field_delimiter']

    cmd.min_max_hash(cmd.hash_keys(field_delimiter), cmd.map_folder_name_path,
                     field_delimiter)

    return jsonify(success=True)
Ejemplo n.º 2
0
def shuffle(content):
    full_file_path = os.path.join(Command.shuffle_folder_name_path,
                                  'shuffled.csv')
    field_delimiter = content['field_delimiter']

    files = []

    # r=root, d=directories, f = files
    for r, d, f in os.walk(Command.map_folder_name_path):
        for file in f:
            files.append(os.path.join(r, file))

    for f in files:
        data_f = pd.read_csv(f, sep=field_delimiter)
        headers = list(data_f.columns)

        for i in content['nodes_keys']:
            index_list = []
            for index, item in enumerate(data_f.loc[:, 'key_column']):

                min, max = i["hash_keys_range"]
                last_node = max == content['max_hash']
                hash_item = Command.hash_f(item)
                hash_item_in_range = min <= hash_item < max
                if hash_item_in_range:
                    index_list.append(index)
                elif hash_item == max and last_node:
                    index_list.append(index)

            if i['data_node_ip'] == self_node_ip:
                if not os.path.isfile(full_file_path):
                    data_f.iloc[index_list].to_csv(full_file_path,
                                                   header=headers,
                                                   encoding='utf-8',
                                                   index=False,
                                                   sep=field_delimiter)
                else:
                    data_f.iloc[index_list].to_csv(full_file_path,
                                                   mode='a',
                                                   header=False,
                                                   index=False,
                                                   encoding='utf-8',
                                                   sep=field_delimiter)
            else:
                data = {
                    'content': data_f.iloc[index_list].to_json(),
                    'data_node_ip': i['data_node_ip']
                }

                sc = ShuffleCommand(data, full_file_path, field_delimiter)
                sc.send()
Ejemplo n.º 3
0
def get_file_from_cluster():
    cmd.get_file_from_cluster(request.json)
    return jsonify(success=True)
Ejemplo n.º 4
0
def move_file_to_init_folder():
    cmd.move_file_to_init_folder(request.json)
    return jsonify(success=True)
Ejemplo n.º 5
0
def reduce():
    cmd.reduce(request.json)

    return jsonify(success=True)
Ejemplo n.º 6
0
def clear_data():
    cmd.clear_data(request.json)

    return jsonify(success=True)
Ejemplo n.º 7
0
def finish_shuffle():
    cmd.finish_shuffle(request.json)
    return jsonify(success=True)
Ejemplo n.º 8
0
def map():
    response = {'mapped_folder_name': cmd.map(request.json)}
    return jsonify(response)
Ejemplo n.º 9
0
def write():
    cmd.write(request.json)
    return jsonify(success=True)
Ejemplo n.º 10
0
def create_config_and_filesystem():
    file_name = request.json["file_name"]
    cmd.init_folder_variables(file_name)
    cmd.create_folders()
    return jsonify(success=True)