def min_max_hash(): field_delimiter = request.json['field_delimiter'] cmd.min_max_hash(cmd.hash_keys(field_delimiter), cmd.map_folder_name_path, field_delimiter) return jsonify(success=True)
def shuffle(content): full_file_path = os.path.join(Command.shuffle_folder_name_path, 'shuffled.csv') field_delimiter = content['field_delimiter'] files = [] # r=root, d=directories, f = files for r, d, f in os.walk(Command.map_folder_name_path): for file in f: files.append(os.path.join(r, file)) for f in files: data_f = pd.read_csv(f, sep=field_delimiter) headers = list(data_f.columns) for i in content['nodes_keys']: index_list = [] for index, item in enumerate(data_f.loc[:, 'key_column']): min, max = i["hash_keys_range"] last_node = max == content['max_hash'] hash_item = Command.hash_f(item) hash_item_in_range = min <= hash_item < max if hash_item_in_range: index_list.append(index) elif hash_item == max and last_node: index_list.append(index) if i['data_node_ip'] == self_node_ip: if not os.path.isfile(full_file_path): data_f.iloc[index_list].to_csv(full_file_path, header=headers, encoding='utf-8', index=False, sep=field_delimiter) else: data_f.iloc[index_list].to_csv(full_file_path, mode='a', header=False, index=False, encoding='utf-8', sep=field_delimiter) else: data = { 'content': data_f.iloc[index_list].to_json(), 'data_node_ip': i['data_node_ip'] } sc = ShuffleCommand(data, full_file_path, field_delimiter) sc.send()
def get_file_from_cluster(): cmd.get_file_from_cluster(request.json) return jsonify(success=True)
def move_file_to_init_folder(): cmd.move_file_to_init_folder(request.json) return jsonify(success=True)
def reduce(): cmd.reduce(request.json) return jsonify(success=True)
def clear_data(): cmd.clear_data(request.json) return jsonify(success=True)
def finish_shuffle(): cmd.finish_shuffle(request.json) return jsonify(success=True)
def map(): response = {'mapped_folder_name': cmd.map(request.json)} return jsonify(response)
def write(): cmd.write(request.json) return jsonify(success=True)
def create_config_and_filesystem(): file_name = request.json["file_name"] cmd.init_folder_variables(file_name) cmd.create_folders() return jsonify(success=True)