def build_docker_image(dockerfile_path: str) -> str: """ Builds an image using the given dockerfile using a unique image name :param dockerfile_path: path to the dockerfile to use for building the image :return: the name of the image created. Randomly generated. """ debug("Starting docker image build for {}".format(dockerfile_path)) if os.path.isfile(dockerfile_path): build_path, _ = os.path.split( dockerfile_path) # if given path is to Dockerfile, get the parent else: build_path = dockerfile_path # else use the directory given if not os.path.exists(build_path): raise Exception("Build path {} does not exist".format(build_path)) _pull_base_image_if_absent("python:latest") try: repository: str = DockerUtil.get_random_string_of_length( 20, uppercase=False, numbers=False) DOCKER_CLIENT.images.build(path=build_path, tag=repository) debug("Image build complete!") return repository except Exception as e: raise Exception("Failed to build image for {}".format(dockerfile_path), e)
def _run_build_file_in_browser(dst_path) -> None: """ Runs the generated HTML file in browser :raises: Exception if any of the paths are invalid (file does not exist), or the file is not of correct format """ debug("Opening results_visualization in browser") try: webbrowser.open('file://' + os.path.join(dst_path, "build", "index.html")) except Exception as e: raise Exception("Could not open file in browser.")
def _run_scalene_in_docker(self, program_file_path, scalene_args: List[str]): debug("Loading the scalene docker container") load_docker_image(os.path.abspath("resources/scalene.tar")) output_path = build_scalene_dockerfile(program_file_path, scalene_args) image_name = build_docker_image(output_path) container = create_docker_container(image_name) debug("Starting the scalene docker container") container.start() container.wait() output = bytes(container.logs()).decode('UTF-8') container.stop() return output
def _run_test_container(self, image_name: str, runs: int) -> List[TestResult]: """ Spins-up a container from the given image and runs the program for the given input 'run' number of times Returns the results for each run :param image_name: name of the docker image configured to run test for an input size :return: a list of Result objects - one for each run """ results: List[TestResult] = [] try: for i in range(runs): debug("Running trial number {} of {}".format(i + 1, runs)) results.append(run_and_inspect_docker_image(image_name)) except Exception as e: raise Exception("An error occurred during run test container", e) return results
def analyze(self, program_file_path: str, config: Config) -> None: """ Runs profile analyses on the given program :param program_file_path: path to the program to analyze :param config: config object for user-defined configuration """ self.results = {"class": [], "function": [], "line_by_line": []} debug("Running scalene profile analysis") args = config.get_args_for(max(config.get_input_sizes())) debug("Using input size {} for scalene analysis".format( max(config.get_input_sizes()))) debug("Starting scalene run") # output = check_output(scalene_args, encoding='UTF-8', cwd=os.path.abspath(program_file_dir)) output = self._run_scalene_in_docker(program_file_path, args) debug(output) debug("Parsing output") self.parseOutput(output)
def _compute_average(self, individual_runs: List[TestResult]) -> InputSizeResult: """ Computes an average result for the individual runs and combines both into an InputSizeResult :param individual_runs: results for the individual runs of the container :return: a combined result with a computed average """ debug("Computing average result") runs: float = len(individual_runs) * 1.0 average_result = TestResult() sample_times_with_counts: Dict[int, int] = dict() for run in individual_runs: average_result.total_runtime_ms += run.total_runtime_ms / runs average_result.max_memory_usage_bytes += run.max_memory_usage_bytes / runs for time, memory in run.memory_usage_by_time.items(): if time not in average_result.memory_usage_by_time: average_result.memory_usage_by_time[time] = 0 if time not in sample_times_with_counts: sample_times_with_counts[time] = 0 sample_times_with_counts[time] += 1 average_result.memory_usage_by_time[time] += memory for time, memory in average_result.memory_usage_by_time.items(): average_result.memory_usage_by_time[ time] /= sample_times_with_counts[time] debug("Average runtime: {} ms".format(average_result.total_runtime_ms)) debug("Average memory use: {} bytes".format( average_result.max_memory_usage_bytes)) return InputSizeResult(average_result, individual_runs)
def _pull_base_image_if_absent(base_image_name: str): local_images = list_images(remove_version=False) if base_image_name not in local_images: debug("Base Docker image '{}' not found".format(base_image_name)) debug("Downloading and extracting base image.") debug( "If you're running Speedometer for the first time, this may take a few minutes" ) pull_process = Popen(['docker', 'pull', base_image_name]) pull_process.wait()
def run(self, program_file_path: str, config: Config) -> None: """ Runs the analysis on the user-provided python program :param program_file_path: path to the program to analyze :param config: config object for user-defined configuration """ debug("Starting profile analysis") profiler = ProfileAnalyzer() profiler.analyze(program_file_path, config) profiler_results = profiler.get_results() debug("Profile analysis complete") debug("Starting end to end analysis") e2e_analyzer = EndToEndAnalyzer() e2e_analyzer.analyze(program_file_path, config) e2e_results = e2e_analyzer.get_results() debug("End to end analysis complete") build_visualization(program_file_path, profiler_results, e2e_results)
def _build_client() -> None: """ Builds the react client """ yarn_build_succ_msg = "The build folder is ready to be deployed." try: debug("Installing yarn dependencies for results_visualization") debug(CLIENT_SRC) os.chdir(CLIENT_SRC) os.system("yarn install") debug("Compiling results_visualization code") os.system("yarn build") except Exception as e: raise Exception( "Could not build results_visualization. Ensure you have the latest version of yarn installed.", e)
def processLines(self, arr: list): """ Helper function for processing Scalene output :param arr: Scalene output split by line """ # Map file header (with name & total time) to line contents (with %time and %mem per line) # 5 lines without memory usage, 6 lines with debug("Processing scalene output...") file_dict = self.ScaleneArrayStrip(arr, "Memory usage:", "% of time", 6) for a in file_dict: # Get total file time from header in ms total_memory = 0.0 if len(a.split("\n")) > 1: debug("CHECKING FILENAME - Full Scalene message: {}".format(a)) debug("Split Message: {}".format(a.split("\n")[1])) file_name = (a.split("\n")[1]).split(": % of time")[0] mem_num = (( a.split("\n")[0]).split("(max:")[1]).split("MB)")[0] total_memory = float(mem_num) else: file_name = a.split(": % of time")[0] reference_time = self.getRefTime(a) * 1000.0 func = function_runtime(file_name, "", 0.0, 0.0, 0) func_indentation = "" clas = class_runtime(file_name, "", 0.0, 0.0, 0) class_indentation = "" debug("Total Runtime Calculated: {}ms".format(reference_time)) debug("Total Memory Calculated: {}MB".format(total_memory)) for l in file_dict[a]: line_split = l.split("│") code_position = len(line_split) - 1 line = line_by_line_runtime(file_name, 0, 0.0, 0.0, line_split[code_position], 0) leading_whitespace = re.match( r"\s*", line_split[code_position]).group() # Create function object when line starts with "def" if line_split[code_position].strip().startswith( "def") and line_split[code_position].strip().endswith( ":"): if func.name != "" and func.total_run_time > 0.0: self.computeMemoryPercentageForSection( func, total_memory) self.results["function"].append(func) func_name = line_split[code_position].strip( )[4:len(line_split[code_position].strip()) - 1] func = function_runtime(file_name, func_name, 0.0, 0.0, 0) # Create class object when lines starts with "class" elif line_split[code_position].strip().startswith( "class") and line_split[code_position].strip( ).endswith(":"): if clas.name != "" and clas.total_run_time > 0.0: self.computeMemoryPercentageForSection( clas, total_memory) self.results["class"].append(clas) class_name = line_split[code_position].strip( )[6:len(line_split[code_position].strip()) - 1] clas = class_runtime(file_name, class_name, 0.0, 0.0, 0) #If indentation matches that of previous function elif leading_whitespace == func_indentation and func.name != "": self.computeMemoryPercentageForSection(func, total_memory) self.results["function"].append(func) if (clas.name != ""): clas.class_functions.append( len(self.results["function"]) - 1) func = function_runtime(file_name, "", 0.0, 0.0, 0) #If indentation matches that of previous class elif leading_whitespace == class_indentation and clas.name != "": self.computeMemoryPercentageForSection(clas, total_memory) self.results["class"].append(clas) clas = class_runtime(file_name, "", 0.0, 0.0, 0) # If Scalene output determines line has significant time, calculate time in ms and add it to line/function/class objects line.line_num = int(line_split[0].strip()) if not (line_split[1].isspace() and line_split[2].isspace()): if line_split[1].isspace() and not ( line_split[2].isspace()): lineTimePercentage = int(line_split[2].strip().replace( "%", "")) lineTime = int(line_split[2].strip().replace( "%", "")) / 100 * reference_time elif line_split[2].isspace() and not ( line_split[1].isspace()): lineTimePercentage = int(line_split[1].strip().replace( "%", "")) lineTime = int(line_split[1].strip().replace( "%", "")) / 100 * reference_time else: lineTimePercentage = ( int(line_split[1].strip().replace("%", "")) + int(line_split[2].strip().replace("%", ""))) lineTime = (int(line_split[1].strip().replace("%", "")) + int(line_split[2].strip().replace( "%", ""))) / 100 * reference_time else: lineTime = 0.0 lineTimePercentage = 0 self.updateRelevantData(line, func, clas, line_split, lineTime, lineTimePercentage, total_memory) # If function object exists that hasn't been saved (i.e. near the end of the file), add to results if func.name != "" and func.total_run_time > 0.0: self.computeMemoryPercentageForSection(func, total_memory) self.results["function"].append(func) if (clas.name != ""): clas.class_functions.append( len(self.results["function"]) - 1) # If class object exists that hasn't been saved (i.e. near the end of the file), add to results if clas.name != "" and clas.total_run_time > 0.0: self.computeMemoryPercentageForSection(clas, total_memory) self.results["class"].append(clas)
def analyze(self, program_file_path: str, config: Config) -> None: """ Runs e2e analyses on the given program :param program_file_path: **absolute** path to the program to analyze :param config: config object for user-defined configuration """ input_sizes: List[int] = config.get_input_sizes() # creates all required dockerfiles and stores paths in a dict of <input_size, dockerfile path> debug("Creating dockerfiles for each input size") dockerfiles: Dict[int, str] = \ {size: self._build_dockerfile_for_input(program_file_path, size, config) for size in input_sizes} # builds all the dockerfiles into images and stores the image names in a dict of <input_size, image name> debug("Building docker images for each input size") images: Dict[int, str] = \ {size: build_docker_image(dockerfile) for size, dockerfile in dockerfiles.items()} # execute the test runs for input_size, image in images.items(): debug("Analyzing for input size {}".format(input_size)) individual_results: List[TestResult] = self._run_test_container( image, self.RUNS_PER_INPUT_SIZE) computed_results: InputSizeResult = self._compute_average( individual_results) self.results[input_size] = computed_results debug("End-to-end analysis complete!") debug("Doing some clean-up!") try: delete_all_docker_images(exceptions=["python"], force=True) debug("All cleaned-up here.") except: debug("Cleanup failed. Please cleanup Docker manually")
def build_visualization(program_file_path, profiler_results, e2e_results: Dict[int, InputSizeResult]) -> dict: """ Builds the visualizations with the given results :param program_file_path: path to the user's program :param profiler_results: results from the profile analysis :param e2e_results: results from the end to end analysis """ debug("Starting results_visualization building") input_path_split = program_file_path.split("/") if len(input_path_split) > 0: output = {"script_name": input_path_split[len(input_path_split)-1]} else: output = {"script_name": program_file_path} # create class object, two arrays for runtime and memory containing original file name, avg time/memory per class debug("Creating class objects") class_runtime = [] class_memory = [] for c in profiler_results["class"]: class_runtime.append({ "name": c.filename + "/" + c.name, "total_runtime": c.total_run_time, "percent_runtime": c.time_percentage_of_total }) class_memory.append({ "name": c.filename + "/" + c.name, "total_memory": c.total_memory, "percent_memory": c.memory_percentage_of_total }) output["class"] = {"class_runtime": class_runtime, "class_memory": class_memory} debug("Class objects created") # create function object, two arrays for runtime and memory containing original file name, avg time/memory per function # find function that takes longest/uses most memory debug("Creating function objects") function_runtimes = [] function_memory = [] max_fun_runtime = 0.0 max_fun_memory = 0.0 max_fun_runtime_name = "" max_fun_memory_name = "" for f in profiler_results["function"]: function_runtimes.append({ "name": f.filename + "/" + f.name, "total_runtime": f.total_run_time, "percent_runtime": f.time_percentage_of_total }) if f.total_run_time > max_fun_runtime: max_fun_runtime_name = f.name max_fun_runtime = f.total_run_time function_memory.append({ "name": f.filename + "/" + f.name, "total_memory": f.total_memory, "percent_memory": f.memory_percentage_of_total }) if f.total_memory > max_fun_memory: max_fun_memory_name = f.name max_fun_memory = f.total_memory output["function"] = {"function_runtime": function_runtimes, "function_memory": function_memory} debug("Function objects created") # create array of line objects with filename, line#, code content, runtime, and memory usage debug("Creating line objects") line_by_line = [] line_of_function = [] current_fun = "" for l in profiler_results["line_by_line"]: if current_fun == "": current_fun = l.filename else: if current_fun != l.filename: line_by_line.append(line_of_function) line_of_function = [] line_of_function.append({ "fileName": l.filename, "line_num": l.line_num, "code": l.line_text, "total_runtime": l.total_run_time, "total_memory": l.total_memory, "percent_runtime": l.time_percentage_of_total, "percent_memory": l.memory_percentage_of_total }) line_by_line.append(line_of_function) output["line_by_line"] = line_by_line debug("Line objects created") e2e_object = {} e2e_runtime = [] e2e_memory = [] # calculate fit line data for runtime & memory debug("Calculating runtime fit lines") fit_data_runtime: FitData = get_reference_fits(e2e_results, True) debug("Calculating memory fit lines") fit_data_memory: FitData = get_reference_fits(e2e_results, False) total_runtime_points = {} total_memory_points = {} # e2e object contains two arrays for runtime and memory containing the n parameter, avg runtime/memory, and associated fit data debug("Creating e2e object") for i in e2e_results: debug("Parsing e2e - n = {}".format(i)) # rt_obj = {} e2e_runtime.append({ "n": i, "total_runtime": round(math.log(round(e2e_results[i].average.total_runtime_ms, 2) if round(e2e_results[i].average.total_runtime_ms, 2) > 1 else 1), 2), "O(1)": round(math.log(round(fit_data_runtime.O_1[i], 2) if round(fit_data_runtime.O_1[i], 2) > 1 else 1), 2), "O(log(n))": round(math.log(round(fit_data_runtime.O_logn[i], 2) if round(fit_data_runtime.O_logn[i], 2) > 1 else 1), 2), "O(n)": round(math.log(round(fit_data_runtime.O_n[i], 2) if round(fit_data_runtime.O_n[i], 2) > 1 else 1), 2), "O(n\u00B2)": round(math.log(round(fit_data_runtime.O_n2[i], 2) if round(fit_data_runtime.O_n2[i], 2) > 1 else 1), 2), "O(n\u00B3)": round(math.log(round(fit_data_runtime.O_n3[i], 2) if round(fit_data_runtime.O_n3[i], 2) > 1 else 1), 2), "O(nlog(n))": round(math.log(round(fit_data_runtime.O_nlogn[i], 2) if round(fit_data_runtime.O_nlogn[i], 2) > 1 else 1), 2), "O(n\u207F)": round(math.log(round(fit_data_runtime.O_nn[i], 2) if round(fit_data_runtime.O_nn[i], 2) > 1 else 1), 2), "O(n!)": round(math.log(round(fit_data_runtime.O_n_fact[i], 2) if round(fit_data_runtime.O_n_fact[i], 2) > 1 else 1), 2) }) total_runtime_points[i] = e2e_results[i].average.total_runtime_ms e2e_memory.append({ "n": i, "total_memory": round(math.log(round(e2e_results[i].average.max_memory_usage_bytes / 10**6, 2) if round(e2e_results[i].average.max_memory_usage_bytes / 10**6, 2) > 1 else 1), 2), "O(1)": round(math.log(round(fit_data_memory.O_1[i] / 10**6, 2) if round(fit_data_memory.O_1[i] / 10**6, 2) > 1 else 1), 2), "O(log(n))": round(math.log(round(fit_data_memory.O_logn[i] / 10**6, 2) if round(fit_data_memory.O_logn[i] / 10**6, 2) > 1 else 1), 2), "O(n)": round(math.log(round(fit_data_memory.O_n[i] / 10**6, 2) if round(fit_data_memory.O_n[i] / 10**6, 2) > 1 else 1), 2), "O(n\u00B2)": round(math.log(round(fit_data_memory.O_n2[i] / 10**6, 2) if round(fit_data_memory.O_n2[i] / 10**6, 2) > 1 else 1), 2), "O(n\u00B3)": round(math.log(round(fit_data_memory.O_n3[i] / 10**6, 2) if round(fit_data_memory.O_n3[i] / 10**6, 2) > 1 else 1), 2), "O(nlog(n))": round(math.log(round(fit_data_memory.O_nlogn[i] / 10**6, 2) if round(fit_data_memory.O_nlogn[i] / 10**6, 2) > 1 else 1), 2), "O(n\u207F)": round(math.log(round(fit_data_memory.O_nn[i] / 10**6, 2) if round(fit_data_memory.O_nn[i] / 10**6, 2) > 1 else 1), 2), "O(n!)": round(math.log(round(fit_data_memory.O_n_fact[i] / 10**6, 2) if round(fit_data_memory.O_n_fact[i] / 10**6, 2) > 1 else 1), 2), "memory_usage_by_time": e2e_results[i].average.memory_usage_by_time }) total_memory_points[i] = e2e_results[i].average.max_memory_usage_bytes # set E2E runtime/memory arrays, highest runtime/memory functions, total function runtime/memory usage, and calculate complexity of program e2e_object["e2e_runtime"] = e2e_runtime e2e_object["e2e_memory"] = e2e_memory e2e_object["e2e_highest_runtime_function"] = max_fun_runtime_name e2e_object["e2e_highest_memory_usage_function"] = max_fun_memory_name e2e_object["e2e_total_average_time"] = round(sum(list(total_runtime_points.values())) / len(e2e_runtime), 2) e2e_object["e2e_total_average_memory"] = round(sum(list(total_memory_points.values())) / len(e2e_memory) / 10**6, 2) debug("Calculating project runtime complexity") e2e_object["e2e_time_complexity"] = find_O_fit(fit_data_runtime, total_runtime_points) debug("Calculating project memory complexity") e2e_object["e2e_space_complexity"] = find_O_fit(fit_data_memory, total_memory_points) output["e2e"] = e2e_object debug("e2e object created") #create sankey data object debug("Creating sankey object") output["sankey"] = { "sankey_runtime": get_sankey_data(profiler_results, True), "sankey_memory": get_sankey_data(profiler_results, False) } debug("Sankey object created") with open(DATA_FILE, 'w') as outfile: json.dump(output, outfile) debug("JSON created") return output