def data_generation_post(): global coordinator_db, username, parallel_ssh_cmd, generate_command,\ generate_data_size print "Data generation post" if bottle.request.POST.generate_data: print "Generating data" data_size = int(bottle.request.POST.data_size) unit = bottle.request.POST.unit print "Generating %d%s of data..." % (data_size, unit) files_per_disk = int(bottle.request.POST.files_per_disk) generate_data_size = unitconversion.convert(data_size, unit, "B") generate_command = parallel_ssh_cmd[ "%s --no_sudo -g -n%d %d%s gensort" % ( os.path.join(SCRIPT_DIR, os.pardir, "generate_graysort_inputs.py"), files_per_disk, data_size, unit)] & BG elif bottle.request.POST.abort_data_generation: print "Aborting data generation" pkill["-f"]["generate_graysort_inputs.py"]() generate_command = None elif bottle.request.POST.wipe_input: allowed = bottle.request.POST.getall('allow_wipe_data') if len(allowed) == 1 and allowed[0] == "True": print "Wiping input data" parallel_ssh_cmd[ "%s input %s" % ( os.path.join(SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]() elif bottle.request.POST.wipe_intermediate_output: allowed = bottle.request.POST.getall('allow_wipe_data') if len(allowed) == 1 and allowed[0] == "True": print "Wiping intermediate/output data" parallel_ssh_cmd[ "%s noinput %s" % ( os.path.join(SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]() elif bottle.request.POST.wipe_all: allowed = bottle.request.POST.getall('allow_wipe_data') if len(allowed) == 1 and allowed[0] == "True": print "Wiping all data" parallel_ssh_cmd[ "%s all %s" % ( os.path.join(SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]() # Call the normal /data_generation method. return data_generation()
def data_generation_post(): global coordinator_db, username, parallel_ssh_cmd, generate_command,\ generate_data_size print "Data generation post" if bottle.request.POST.generate_data: print "Generating data" data_size = int(bottle.request.POST.data_size) unit = bottle.request.POST.unit print "Generating %d%s of data..." % (data_size, unit) files_per_disk = int(bottle.request.POST.files_per_disk) generate_data_size = unitconversion.convert(data_size, unit, "B") skewed_data = bool(bottle.request.POST.skewed_data) replica_data = bool(bottle.request.POST.replica_data) extra_args = "" if skewed_data: extra_args += "-s " if replica_data: extra_args += "-r 2 " generate_command = parallel_ssh_cmd[ "%s --no_sudo %s-g -n%d %d%s gensort" % (os.path.join(SCRIPT_DIR, os.pardir, "generate_graysort_inputs.py" ), extra_args, files_per_disk, data_size, unit)] & BG elif bottle.request.POST.abort_data_generation: print "Aborting data generation" pkill["-f"]["generate_graysort_inputs.py"]() generate_command = None elif bottle.request.POST.wipe_input: allowed = bottle.request.POST.getall('allow_wipe_data') if len(allowed) == 1 and allowed[0] == "True": print "Wiping input data" parallel_ssh_cmd["%s input %s" % (os.path.join( SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]() elif bottle.request.POST.wipe_intermediate_output: allowed = bottle.request.POST.getall('allow_wipe_data') if len(allowed) == 1 and allowed[0] == "True": print "Wiping intermediate/output data" parallel_ssh_cmd["%s noinput %s" % (os.path.join( SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]() elif bottle.request.POST.wipe_all: allowed = bottle.request.POST.getall('allow_wipe_data') if len(allowed) == 1 and allowed[0] == "True": print "Wiping all data" parallel_ssh_cmd["%s all %s" % (os.path.join( SCRIPT_DIR, os.pardir, "clear_disks.sh"), disk_mountpoint)]() # Call the normal /data_generation method. return data_generation()
def load_constraints(filename): with open(filename, 'r') as fp: constraints = json.load(fp) for phase in constraints: for stage in constraints[phase][consts.STAGES_KEY]: stage_info = constraints[phase][consts.STAGES_KEY][stage] if consts.IO_BOUND_KEY in stage_info: (rate, sep, unit) = stage_info[ consts.MAX_RATE_PER_WORKER_KEY].partition(' ') rate = float(rate) rate_Bps = convert(rate, unit, "Bps") stage_info[consts.MAX_RATE_PER_WORKER_KEY] = rate_Bps return constraints
def load_constraints(filename): with open(filename, 'r') as fp: constraints = json.load(fp) for phase in constraints: for stage in constraints[phase][consts.STAGES_KEY]: stage_info = constraints[phase][consts.STAGES_KEY][stage] if consts.IO_BOUND_KEY in stage_info: (rate, sep, unit ) = stage_info[consts.MAX_RATE_PER_WORKER_KEY].partition(' ') rate = float(rate) rate_Bps = convert(rate, unit, "Bps") stage_info[consts.MAX_RATE_PER_WORKER_KEY] = rate_Bps return constraints
def _make_cell_readable(self, cell_value): # Make sure we don't try to convert non-numeric types if type(cell_value) not in [int, float]: return cell_value units = ["B", "KiB", "MiB", "GiB", "TiB"] if cell_value > 0: # floor(log_2(X)) gives the closest integer power of 2 to # X. Dividing it by 10 gives the unit (2^0 = bytes, 2^10 = # kibibytes, and so on, so 0 = bytes, 1 = kibibytes, etc.) cell_value_dest_unit = units[min( int(math.floor(math.log(cell_value, 2) / 10)), len(units) - 1)] cell_value = "%.2f %s" % (convert( cell_value, "B", cell_value_dest_unit), cell_value_dest_unit) else: cell_value = "0 B" return cell_value
def _make_cell_readable(self, cell_value): # Make sure we don't try to convert non-numeric types if type(cell_value) not in [int, float]: return cell_value units = ["B", "KiB", "MiB", "GiB", "TiB"] if cell_value > 0: # floor(log_2(X)) gives the closest integer power of 2 to # X. Dividing it by 10 gives the unit (2^0 = bytes, 2^10 = # kibibytes, and so on, so 0 = bytes, 1 = kibibytes, etc.) cell_value_dest_unit = units[ min(int(math.floor(math.log(cell_value, 2) / 10)), len(units) - 1)] cell_value = "%.2f %s" % ( convert(cell_value, "B", cell_value_dest_unit), cell_value_dest_unit) else: cell_value = "0 B" return cell_value
def postprocess_rate_data(data): display_data = {} disk_data = {} for data_key in sorted(data.keys()): (job, phase, epoch) = data_key epoch_data = data[data_key] if phase is None and epoch is None: disk_data = epoch_data["disks"] for key, value in disk_data.items(): disk_data[key] = value continue epoch_display_data = {} epoch_display_data["job"] = job epoch_display_data["phase"] = phase epoch_display_data["epoch"] = epoch host_data = epoch_data["hosts"] stats_data = epoch_data["stats"] epoch_display_data["intermediate_disks"] = sum( (disk_data[host]["num_intermediate_disks"] for host in disk_data)) epoch_display_data["num_nodes"] = len(host_data) input_size = stats_data["input_size"] if "output_size" in stats_data: output_size = stats_data["output_size"] else: output_size = 0 max_size = max(input_size, output_size) for (size_name, size) in [("input_size", input_size), ("output_size", output_size), ("max_size", max_size)]: epoch_display_data[size_name] = {} for unit in ["B", "MB", "MiB", "GB", "Gb", "GiB", "TB"]: epoch_display_data[size_name][unit] = convert(size, "B", unit) epoch_display_data["total_time"] = {} if "timestamps" in stats_data: smallest_start_time = stats_data["timestamps"]["start"].min() largest_stop_time = stats_data["timestamps"]["stop"].max() total_time = largest_stop_time - smallest_start_time for unit in ["s", "min"]: epoch_display_data["total_time"][unit] = convert( total_time, "us", unit) else: for unit in ["s", "min"]: epoch_display_data["total_time"][unit] = 0 display_data[data_key] = epoch_display_data return display_data
def postprocess_rate_data(data): display_data = {} disk_data = {} for data_key in sorted(data.keys()): (job, phase, epoch) = data_key epoch_data = data[data_key] if phase is None and epoch is None: disk_data = epoch_data["disks"] for key, value in disk_data.items(): disk_data[key] = value continue epoch_display_data = {} epoch_display_data["job"] = job epoch_display_data["phase"] = phase epoch_display_data["epoch"] = epoch host_data = epoch_data["hosts"] stats_data = epoch_data["stats"] epoch_display_data["intermediate_disks"] = sum( (disk_data[host]["num_intermediate_disks"] for host in disk_data)) epoch_display_data["num_nodes"] = len(host_data) input_size = stats_data["input_size"] if "output_size" in stats_data: output_size = stats_data["output_size"] else: output_size = 0 max_size = max(input_size, output_size) for (size_name, size) in [("input_size", input_size), ("output_size", output_size), ("max_size", max_size)]: epoch_display_data[size_name] = {} for unit in ["B", "MB", "MiB", "GB", "Gb", "GiB", "TB"]: epoch_display_data[size_name][unit] = convert( size, "B", unit) epoch_display_data["total_time"] = {} if "timestamps" in stats_data: smallest_start_time = stats_data["timestamps"]["start"].min() largest_stop_time = stats_data["timestamps"]["stop"].max() total_time = largest_stop_time - smallest_start_time for unit in ["s", "min"]: epoch_display_data["total_time"][unit] = convert( total_time, "us", unit) else: for unit in ["s", "min"]: epoch_display_data["total_time"][unit] = 0 display_data[data_key] = epoch_display_data return display_data