def build_docker_image(dockerfile_path: str) -> str:
    """
    Builds an image using the given dockerfile using a unique image name
    :param dockerfile_path: path to the dockerfile to use for building the image
    :return: the name of the image created. Randomly generated.
    """
    debug("Starting docker image build for {}".format(dockerfile_path))
    if os.path.isfile(dockerfile_path):
        build_path, _ = os.path.split(
            dockerfile_path)  # if given path is to Dockerfile, get the parent
    else:
        build_path = dockerfile_path  # else use the directory given

    if not os.path.exists(build_path):
        raise Exception("Build path {} does not exist".format(build_path))

    _pull_base_image_if_absent("python:latest")
    try:
        repository: str = DockerUtil.get_random_string_of_length(
            20, uppercase=False, numbers=False)
        DOCKER_CLIENT.images.build(path=build_path, tag=repository)
        debug("Image build complete!")
        return repository
    except Exception as e:
        raise Exception("Failed to build image for {}".format(dockerfile_path),
                        e)
Exemple #2
0
def _run_build_file_in_browser(dst_path) -> None:
    """
    Runs the generated HTML file in browser
    :raises: Exception if any of the paths are invalid (file does not exist), or the file is not of correct format
    """
    debug("Opening results_visualization in browser")
    try:
        webbrowser.open('file://' +
                        os.path.join(dst_path, "build", "index.html"))
    except Exception as e:
        raise Exception("Could not open file in browser.")
 def _run_scalene_in_docker(self, program_file_path,
                            scalene_args: List[str]):
     debug("Loading the scalene docker container")
     load_docker_image(os.path.abspath("resources/scalene.tar"))
     output_path = build_scalene_dockerfile(program_file_path, scalene_args)
     image_name = build_docker_image(output_path)
     container = create_docker_container(image_name)
     debug("Starting the scalene docker container")
     container.start()
     container.wait()
     output = bytes(container.logs()).decode('UTF-8')
     container.stop()
     return output
Exemple #4
0
    def _run_test_container(self, image_name: str,
                            runs: int) -> List[TestResult]:
        """
        Spins-up a container from the given image and runs the program for the given input 'run' number of times
        Returns the results for each run
        :param image_name: name of the docker image configured to run test for an input size
        :return: a list of Result objects - one for each run
        """

        results: List[TestResult] = []
        try:
            for i in range(runs):
                debug("Running trial number {} of {}".format(i + 1, runs))
                results.append(run_and_inspect_docker_image(image_name))
        except Exception as e:
            raise Exception("An error occurred during run test container", e)
        return results
    def analyze(self, program_file_path: str, config: Config) -> None:
        """
        Runs profile analyses on the given program
        :param program_file_path: path to the program to analyze
        :param config: config object for user-defined configuration
        """
        self.results = {"class": [], "function": [], "line_by_line": []}
        debug("Running scalene profile analysis")

        args = config.get_args_for(max(config.get_input_sizes()))
        debug("Using input size {} for scalene analysis".format(
            max(config.get_input_sizes())))
        debug("Starting scalene run")
        # output = check_output(scalene_args, encoding='UTF-8', cwd=os.path.abspath(program_file_dir))
        output = self._run_scalene_in_docker(program_file_path, args)
        debug(output)
        debug("Parsing output")
        self.parseOutput(output)
Exemple #6
0
    def _compute_average(self,
                         individual_runs: List[TestResult]) -> InputSizeResult:
        """
        Computes an average result for the individual runs and combines both into an InputSizeResult
        :param individual_runs: results for the individual runs of the container
        :return: a combined result with a computed average
        """
        debug("Computing average result")

        runs: float = len(individual_runs) * 1.0
        average_result = TestResult()
        sample_times_with_counts: Dict[int, int] = dict()

        for run in individual_runs:
            average_result.total_runtime_ms += run.total_runtime_ms / runs
            average_result.max_memory_usage_bytes += run.max_memory_usage_bytes / runs

            for time, memory in run.memory_usage_by_time.items():
                if time not in average_result.memory_usage_by_time:
                    average_result.memory_usage_by_time[time] = 0
                if time not in sample_times_with_counts:
                    sample_times_with_counts[time] = 0

                sample_times_with_counts[time] += 1
                average_result.memory_usage_by_time[time] += memory

        for time, memory in average_result.memory_usage_by_time.items():
            average_result.memory_usage_by_time[
                time] /= sample_times_with_counts[time]

        debug("Average runtime: {} ms".format(average_result.total_runtime_ms))
        debug("Average memory use: {} bytes".format(
            average_result.max_memory_usage_bytes))
        return InputSizeResult(average_result, individual_runs)
def _pull_base_image_if_absent(base_image_name: str):
    local_images = list_images(remove_version=False)
    if base_image_name not in local_images:
        debug("Base Docker image '{}' not found".format(base_image_name))
        debug("Downloading and extracting base image.")
        debug(
            "If you're running Speedometer for the first time, this may take a few minutes"
        )
        pull_process = Popen(['docker', 'pull', base_image_name])
        pull_process.wait()
    def run(self, program_file_path: str, config: Config) -> None:
        """
        Runs the analysis on the user-provided python program
        :param program_file_path: path to the program to analyze
        :param config: config object for user-defined configuration
        """
        debug("Starting profile analysis")
        profiler = ProfileAnalyzer()
        profiler.analyze(program_file_path, config)
        profiler_results = profiler.get_results()
        debug("Profile analysis complete")

        debug("Starting end to end analysis")
        e2e_analyzer = EndToEndAnalyzer()
        e2e_analyzer.analyze(program_file_path, config)
        e2e_results = e2e_analyzer.get_results()
        debug("End to end analysis complete")

        build_visualization(program_file_path, profiler_results, e2e_results)
Exemple #9
0
def _build_client() -> None:
    """
    Builds the react client
    """

    yarn_build_succ_msg = "The build folder is ready to be deployed."

    try:
        debug("Installing yarn dependencies for results_visualization")
        debug(CLIENT_SRC)
        os.chdir(CLIENT_SRC)
        os.system("yarn install")
        debug("Compiling results_visualization code")
        os.system("yarn build")
    except Exception as e:
        raise Exception(
            "Could not build results_visualization. Ensure you have the latest version of yarn installed.",
            e)
    def processLines(self, arr: list):
        """
        Helper function for processing Scalene output
        :param arr: Scalene output split by line
        """
        # Map file header (with name & total time) to line contents (with %time and %mem per line)
        # 5 lines without memory usage, 6 lines with
        debug("Processing scalene output...")
        file_dict = self.ScaleneArrayStrip(arr, "Memory usage:", "% of time",
                                           6)

        for a in file_dict:
            # Get total file time from header in ms
            total_memory = 0.0
            if len(a.split("\n")) > 1:
                debug("CHECKING FILENAME - Full Scalene message: {}".format(a))
                debug("Split Message: {}".format(a.split("\n")[1]))
                file_name = (a.split("\n")[1]).split(": % of time")[0]
                mem_num = ((
                    a.split("\n")[0]).split("(max:")[1]).split("MB)")[0]
                total_memory = float(mem_num)
            else:
                file_name = a.split(": % of time")[0]
            reference_time = self.getRefTime(a) * 1000.0
            func = function_runtime(file_name, "", 0.0, 0.0, 0)
            func_indentation = ""
            clas = class_runtime(file_name, "", 0.0, 0.0, 0)
            class_indentation = ""
            debug("Total Runtime Calculated: {}ms".format(reference_time))
            debug("Total Memory Calculated: {}MB".format(total_memory))

            for l in file_dict[a]:
                line_split = l.split("│")
                code_position = len(line_split) - 1
                line = line_by_line_runtime(file_name, 0, 0.0, 0.0,
                                            line_split[code_position], 0)
                leading_whitespace = re.match(
                    r"\s*", line_split[code_position]).group()
                # Create function object when line starts with "def"
                if line_split[code_position].strip().startswith(
                        "def") and line_split[code_position].strip().endswith(
                            ":"):
                    if func.name != "" and func.total_run_time > 0.0:
                        self.computeMemoryPercentageForSection(
                            func, total_memory)
                        self.results["function"].append(func)
                    func_name = line_split[code_position].strip(
                    )[4:len(line_split[code_position].strip()) - 1]
                    func = function_runtime(file_name, func_name, 0.0, 0.0, 0)
                # Create class object when lines starts with "class"
                elif line_split[code_position].strip().startswith(
                        "class") and line_split[code_position].strip(
                        ).endswith(":"):
                    if clas.name != "" and clas.total_run_time > 0.0:
                        self.computeMemoryPercentageForSection(
                            clas, total_memory)
                        self.results["class"].append(clas)
                    class_name = line_split[code_position].strip(
                    )[6:len(line_split[code_position].strip()) - 1]
                    clas = class_runtime(file_name, class_name, 0.0, 0.0, 0)
                #If indentation matches that of previous function
                elif leading_whitespace == func_indentation and func.name != "":
                    self.computeMemoryPercentageForSection(func, total_memory)
                    self.results["function"].append(func)
                    if (clas.name != ""):
                        clas.class_functions.append(
                            len(self.results["function"]) - 1)
                    func = function_runtime(file_name, "", 0.0, 0.0, 0)
                #If indentation matches that of previous class
                elif leading_whitespace == class_indentation and clas.name != "":
                    self.computeMemoryPercentageForSection(clas, total_memory)
                    self.results["class"].append(clas)
                    clas = class_runtime(file_name, "", 0.0, 0.0, 0)

                # If Scalene output determines line has significant time, calculate time in ms and add it to line/function/class objects
                line.line_num = int(line_split[0].strip())
                if not (line_split[1].isspace() and line_split[2].isspace()):
                    if line_split[1].isspace() and not (
                            line_split[2].isspace()):
                        lineTimePercentage = int(line_split[2].strip().replace(
                            "%", ""))
                        lineTime = int(line_split[2].strip().replace(
                            "%", "")) / 100 * reference_time
                    elif line_split[2].isspace() and not (
                            line_split[1].isspace()):
                        lineTimePercentage = int(line_split[1].strip().replace(
                            "%", ""))
                        lineTime = int(line_split[1].strip().replace(
                            "%", "")) / 100 * reference_time
                    else:
                        lineTimePercentage = (
                            int(line_split[1].strip().replace("%", "")) +
                            int(line_split[2].strip().replace("%", "")))
                        lineTime = (int(line_split[1].strip().replace("%", ""))
                                    + int(line_split[2].strip().replace(
                                        "%", ""))) / 100 * reference_time
                else:
                    lineTime = 0.0
                    lineTimePercentage = 0
                self.updateRelevantData(line, func, clas, line_split, lineTime,
                                        lineTimePercentage, total_memory)
            # If function object exists that hasn't been saved (i.e. near the end of the file), add to results
            if func.name != "" and func.total_run_time > 0.0:
                self.computeMemoryPercentageForSection(func, total_memory)
                self.results["function"].append(func)
                if (clas.name != ""):
                    clas.class_functions.append(
                        len(self.results["function"]) - 1)
            # If class object exists that hasn't been saved (i.e. near the end of the file), add to results
            if clas.name != "" and clas.total_run_time > 0.0:
                self.computeMemoryPercentageForSection(clas, total_memory)
                self.results["class"].append(clas)
Exemple #11
0
    def analyze(self, program_file_path: str, config: Config) -> None:
        """
        Runs e2e analyses on the given program
        :param program_file_path: **absolute** path to the program to analyze
        :param config: config object for user-defined configuration
        """
        input_sizes: List[int] = config.get_input_sizes()

        # creates all required dockerfiles and stores paths in a dict of <input_size, dockerfile path>
        debug("Creating dockerfiles for each input size")
        dockerfiles: Dict[int, str] = \
            {size: self._build_dockerfile_for_input(program_file_path, size, config) for size in input_sizes}

        # builds all the dockerfiles into images and stores the image names in a dict of <input_size, image name>
        debug("Building docker images for each input size")
        images: Dict[int, str] = \
            {size: build_docker_image(dockerfile) for size, dockerfile in dockerfiles.items()}

        # execute the test runs
        for input_size, image in images.items():
            debug("Analyzing for input size {}".format(input_size))
            individual_results: List[TestResult] = self._run_test_container(
                image, self.RUNS_PER_INPUT_SIZE)
            computed_results: InputSizeResult = self._compute_average(
                individual_results)
            self.results[input_size] = computed_results

        debug("End-to-end analysis complete!")
        debug("Doing some clean-up!")
        try:
            delete_all_docker_images(exceptions=["python"], force=True)
            debug("All cleaned-up here.")
        except:
            debug("Cleanup failed. Please cleanup Docker manually")
Exemple #12
0
def build_visualization(program_file_path, profiler_results, e2e_results: Dict[int, InputSizeResult]) -> dict:
    """
    Builds the visualizations with the given results
    :param program_file_path: path to the user's program
    :param profiler_results: results from the profile analysis
    :param e2e_results: results from the end to end analysis
    """
    debug("Starting results_visualization building")
    input_path_split = program_file_path.split("/")
    if len(input_path_split) > 0:
        output = {"script_name": input_path_split[len(input_path_split)-1]}
    else:
        output = {"script_name": program_file_path}

    # create class object, two arrays for runtime and memory containing original file name, avg time/memory per class
    debug("Creating class objects")
    class_runtime = []
    class_memory = []
    for c in profiler_results["class"]:
        class_runtime.append({
            "name": c.filename + "/" + c.name,
            "total_runtime": c.total_run_time,
            "percent_runtime": c.time_percentage_of_total
        })
        class_memory.append({
            "name": c.filename + "/" + c.name,
            "total_memory": c.total_memory,
            "percent_memory": c.memory_percentage_of_total
        })
    output["class"] = {"class_runtime": class_runtime, "class_memory": class_memory}
    debug("Class objects created")

    # create function object, two arrays for runtime and memory containing original file name, avg time/memory per function
    # find function that takes longest/uses most memory
    debug("Creating function objects")
    function_runtimes = []
    function_memory = []
    max_fun_runtime = 0.0
    max_fun_memory = 0.0
    max_fun_runtime_name = ""
    max_fun_memory_name = ""
    for f in profiler_results["function"]:
        function_runtimes.append({
            "name": f.filename + "/" + f.name,
            "total_runtime": f.total_run_time,
            "percent_runtime": f.time_percentage_of_total
        })
        if f.total_run_time > max_fun_runtime:
            max_fun_runtime_name = f.name
            max_fun_runtime = f.total_run_time
        function_memory.append({
            "name": f.filename + "/" + f.name,
            "total_memory": f.total_memory,
            "percent_memory": f.memory_percentage_of_total
        })
        if f.total_memory > max_fun_memory:
            max_fun_memory_name = f.name
            max_fun_memory = f.total_memory
    output["function"] = {"function_runtime": function_runtimes, "function_memory": function_memory}
    debug("Function objects created")

    # create array of line objects with filename, line#, code content, runtime, and memory usage
    debug("Creating line objects")
    line_by_line = []
    line_of_function = []
    current_fun = ""
    for l in profiler_results["line_by_line"]:
        if current_fun == "":
            current_fun = l.filename
        else:
            if current_fun != l.filename:
                line_by_line.append(line_of_function)
                line_of_function = []
        line_of_function.append({
            "fileName": l.filename,
            "line_num": l.line_num,
            "code": l.line_text,
            "total_runtime": l.total_run_time,
            "total_memory": l.total_memory,
            "percent_runtime": l.time_percentage_of_total,
            "percent_memory": l.memory_percentage_of_total
        })
    line_by_line.append(line_of_function)
    output["line_by_line"] = line_by_line
    debug("Line objects created")

    e2e_object = {}
    e2e_runtime = []
    e2e_memory = []
    # calculate fit line data for runtime & memory
    debug("Calculating runtime fit lines")
    fit_data_runtime: FitData = get_reference_fits(e2e_results, True)
    debug("Calculating memory fit lines")
    fit_data_memory: FitData = get_reference_fits(e2e_results, False)
    total_runtime_points = {}
    total_memory_points = {}

    # e2e object contains two arrays for runtime and memory containing the n parameter, avg runtime/memory, and associated fit data
    debug("Creating e2e object")
    for i in e2e_results:
        debug("Parsing e2e - n = {}".format(i))
        # rt_obj = {}

        e2e_runtime.append({
            "n": i,
            "total_runtime": round(math.log(round(e2e_results[i].average.total_runtime_ms, 2) if round(e2e_results[i].average.total_runtime_ms, 2) > 1 else 1), 2),
            "O(1)": round(math.log(round(fit_data_runtime.O_1[i], 2) if round(fit_data_runtime.O_1[i], 2) > 1 else 1), 2),
            "O(log(n))": round(math.log(round(fit_data_runtime.O_logn[i], 2) if round(fit_data_runtime.O_logn[i], 2) > 1 else 1), 2),
            "O(n)": round(math.log(round(fit_data_runtime.O_n[i], 2) if round(fit_data_runtime.O_n[i], 2) > 1 else 1), 2),
            "O(n\u00B2)": round(math.log(round(fit_data_runtime.O_n2[i], 2) if round(fit_data_runtime.O_n2[i], 2) > 1 else 1), 2),
            "O(n\u00B3)": round(math.log(round(fit_data_runtime.O_n3[i], 2) if round(fit_data_runtime.O_n3[i], 2) > 1 else 1), 2),
            "O(nlog(n))": round(math.log(round(fit_data_runtime.O_nlogn[i], 2) if round(fit_data_runtime.O_nlogn[i], 2) > 1 else 1), 2),
            "O(n\u207F)": round(math.log(round(fit_data_runtime.O_nn[i], 2) if round(fit_data_runtime.O_nn[i], 2) > 1 else 1), 2),
            "O(n!)": round(math.log(round(fit_data_runtime.O_n_fact[i], 2) if round(fit_data_runtime.O_n_fact[i], 2) > 1 else 1), 2)
        })
        total_runtime_points[i] = e2e_results[i].average.total_runtime_ms
        e2e_memory.append({
            "n": i,
            "total_memory": round(math.log(round(e2e_results[i].average.max_memory_usage_bytes / 10**6, 2) if round(e2e_results[i].average.max_memory_usage_bytes / 10**6, 2) > 1 else 1), 2),
            "O(1)": round(math.log(round(fit_data_memory.O_1[i] / 10**6, 2) if round(fit_data_memory.O_1[i] / 10**6, 2) > 1 else 1), 2),
            "O(log(n))": round(math.log(round(fit_data_memory.O_logn[i] / 10**6, 2) if round(fit_data_memory.O_logn[i] / 10**6, 2) > 1 else 1), 2),
            "O(n)": round(math.log(round(fit_data_memory.O_n[i] / 10**6, 2) if round(fit_data_memory.O_n[i] / 10**6, 2) > 1 else 1), 2),
            "O(n\u00B2)": round(math.log(round(fit_data_memory.O_n2[i] / 10**6, 2) if round(fit_data_memory.O_n2[i] / 10**6, 2) > 1 else 1), 2),
            "O(n\u00B3)": round(math.log(round(fit_data_memory.O_n3[i] / 10**6, 2) if round(fit_data_memory.O_n3[i] / 10**6, 2) > 1 else 1), 2),
            "O(nlog(n))": round(math.log(round(fit_data_memory.O_nlogn[i] / 10**6, 2) if round(fit_data_memory.O_nlogn[i] / 10**6, 2) > 1 else 1), 2),
            "O(n\u207F)": round(math.log(round(fit_data_memory.O_nn[i] / 10**6, 2) if round(fit_data_memory.O_nn[i] / 10**6, 2) > 1 else 1), 2),
            "O(n!)": round(math.log(round(fit_data_memory.O_n_fact[i] / 10**6, 2) if round(fit_data_memory.O_n_fact[i] / 10**6, 2) > 1 else 1), 2),
            "memory_usage_by_time": e2e_results[i].average.memory_usage_by_time
        })
        total_memory_points[i] = e2e_results[i].average.max_memory_usage_bytes
    # set E2E runtime/memory arrays, highest runtime/memory functions, total function runtime/memory usage, and calculate complexity of program
    e2e_object["e2e_runtime"] = e2e_runtime
    e2e_object["e2e_memory"] = e2e_memory
    e2e_object["e2e_highest_runtime_function"] = max_fun_runtime_name
    e2e_object["e2e_highest_memory_usage_function"] = max_fun_memory_name
    e2e_object["e2e_total_average_time"] = round(sum(list(total_runtime_points.values())) / len(e2e_runtime), 2)
    e2e_object["e2e_total_average_memory"] = round(sum(list(total_memory_points.values())) / len(e2e_memory) / 10**6, 2)
    debug("Calculating project runtime complexity")
    e2e_object["e2e_time_complexity"] = find_O_fit(fit_data_runtime, total_runtime_points)
    debug("Calculating project memory complexity")
    e2e_object["e2e_space_complexity"] = find_O_fit(fit_data_memory, total_memory_points)
    output["e2e"] = e2e_object
    debug("e2e object created")

    #create sankey data object
    debug("Creating sankey object")
    output["sankey"] = {
        "sankey_runtime": get_sankey_data(profiler_results, True),
        "sankey_memory": get_sankey_data(profiler_results, False)
    }
    debug("Sankey object created")

    with open(DATA_FILE, 'w') as outfile:
        json.dump(output, outfile)
    debug("JSON created")
    return output