Ejemplo n.º 1
0
def main(args):
    overall_start_time = time.time()
    input_files_path = args[2]
    orig_log_files_path = args[4]
    component_path = args[6]
    intermediate_path = args[8]
    fix_file_contents = get_log_analyzer_results(input_files_path)
    change_component_permission(component_path)
    with Pool(cpu_count()) as pool:
        results = pool.map(
            partial(process_each_artifact_dependency_solve,
                    component_path=component_path,
                    intermediate_path=intermediate_path,
                    orig_log_files_path=orig_log_files_path),
            fix_file_contents)
    results = [line[0] for line in results]
    DependencyAnalyzerUtils.write_to_csv(
        results,
        DependencyAnalyzerConstants.BUGSINPY_CSV_SOLVER_RESULTS_HEADERS,
        'bugsinpy_iterative_solve_results.csv')
    partial_fix = 0
    complete_fix = 0
    for row in results:
        if row[2] in [
                FinalOutcome.SUCCESS_FIXED_BUILD,
                FinalOutcome.SUCCESS_RESTORED_TO_ORIGINAL_STATUS,
                FinalOutcome.SUCCESS_RESTORED_TO_ORIGINAL_ERROR
        ] and len(row[1]):
            complete_fix += 1
        elif row[2] in [
                FinalOutcome.PARTIAL_EXHAUSTED_ALL_OPTIONS,
                FinalOutcome.SUCCESS_NO_LONGER_DEPENDENCY_ERROR,
                FinalOutcome.PARTIAL_NO_POSSIBLE_CANDIDATES
        ] and len(row[1]):
            partial_fix += 1
    overall_end_time = time.time()
    print(
        '==========**** IterativeDependencySolver FINAL OUTPUT ****==========')
    if len(results) == 0:
        print('No artifacts to solve for')
    else:
        print('Number of builds identified: {}'.format(len(results)))
        print('Complete Fixes: {}({})'.format(
            complete_fix, complete_fix * 100 / len(results)))
        print('Partial Fixes: {}({})'.format(partial_fix,
                                             partial_fix * 100 / len(results)))
        print('No Fixes: {}({})'.format(
            len(results) - (complete_fix + partial_fix),
            (len(results) -
             (complete_fix + partial_fix)) * 100 / len(results)))
        print('Total Runtime: {} minutes'.format(
            (overall_end_time - overall_start_time) / 60))
    print('==========**** END OF OUTPUT ****==========')
Ejemplo n.º 2
0
def create_run_dir(iter_count, dir_path):
    """ Create a directory for intermediate files of a run """
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.SUDO_MKDIR.format(
            join(
                dir_path,
                DependencyAnalyzerConstants.ITER_RUN_DIR_NAME.format(
                    iter_count))))
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(
            join(
                dir_path,
                DependencyAnalyzerConstants.ITER_RUN_DIR_NAME.format(
                    iter_count))))
Ejemplo n.º 3
0
def copy_build_script(component_path, cloned_dir, repo_name, bug_id, version):
    """ Create build script in cloned directory """
    travis_gen = TravisYMLGenerator()
    travis_gen.bugsinpy_yaml_generation(
        '{}_{}_{}'.format(repo_name, bug_id, version), component_path,
        os.getcwd(), True)
    inner_dir = repo_name
    if inner_dir == 'spacy':
        inner_dir = 'spaCy'
    cp_travis_yml_cmd = 'cp {}_{}_{}/.travis.yml {}_{}_{}/{}'.format(
        repo_name, bug_id, version, repo_name, bug_id, version, inner_dir)
    cp_travis_script_cmd = 'cp {}_{}_{}/test.sh {}_{}_{}/{}'.format(
        repo_name, bug_id, version, repo_name, bug_id, version, inner_dir)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(';'.join(
        [cp_travis_yml_cmd, cp_travis_script_cmd]))
    if not ok:
        return False
    script_content = []
    with open(join(cloned_dir, 'test.sh'),
              DependencyAnalyzerConstants.FILE_READ_MODE) as f:
        script_content = f.readlines()
    edited_script = []
    for line in script_content:
        if not line.strip().startswith(
                'travis_cmd cd\ /home/sumukher/bugswarm-dev/dependency-solver/scripts/'
        ):
            edited_script.append(line)
        else:
            edited_script.append(
                '  travis_cmd cd\{} --assert --echo'.format(cloned_dir))
    with open(join(cloned_dir, 'test.sh'),
              DependencyAnalyzerConstants.FILE_WRITE_MODE) as f:
        f.write(DependencyAnalyzerConstants.CHAR_EMPTY.join(edited_script))
    return True
Ejemplo n.º 4
0
def execute_patch_changes(cloned_repo_dir, curr_errors, log_output_content,
                          output_log_path, repo_name, bug_id, version,
                          iter_count, python_ver):
    """ Execute patched build in a clean Docker environment """
    image_name = None
    if python_ver == DependencyAnalyzerConstants.PYTHON_3_8_3:
        image_name = DependencyAnalyzerConstants.PYTHON_3_8_3_IMAGE_NAME
    elif python_ver == DependencyAnalyzerConstants.PYTHON_3_8_1:
        image_name = DependencyAnalyzerConstants.PYTHON_3_8_1_IMAGE_NAME
    elif python_ver == DependencyAnalyzerConstants.PYTHON_3_7_7:
        image_name = DependencyAnalyzerConstants.PYTHON_3_7_7_IMAGE_NAME
    elif python_ver == DependencyAnalyzerConstants.PYTHON_3_7_0:
        image_name = DependencyAnalyzerConstants.PYTHON_3_7_0_IMAGE_NAME
    elif python_ver == DependencyAnalyzerConstants.PYTHON_3_6_9:
        image_name = DependencyAnalyzerConstants.PYTHON_3_6_9_IMAGE_NAME
    container_name = '{}_{}_{}'.format(repo_name, bug_id, version)
    docker_run_cmd = DependencyAnalyzerConstants.DOCKER_RUN_AS_ROOT_CMD.format(
        container_name, image_name)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        docker_run_cmd)
    if not ok:
        log_output_content.append(stderr)
        log_output_content.append('Failed to run Docker container')
        return None, log_output_content
    get_container_id_cmd = DependencyAnalyzerConstants.DOCKER_GET_CONTAINER_ID_CMD + \
        container_name + DependencyAnalyzerConstants.CHAR_DOUBLE_QUOTE
    _, container_id, stderr, _ = DependencyAnalyzerUtils._run_command(
        get_container_id_cmd)
    copy_source_code_cmd = DependencyAnalyzerConstants.DOCKER_CP_HOME_CMD.format(
        cloned_repo_dir, container_id)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        copy_source_code_cmd)
    if not ok:
        log_output_content.append(stderr)
        log_output_content.append('Failed to copy source code')
        return None, log_output_content
    change_perm_cmd = DependencyAnalyzerConstants.DOCKER_HOME_CHANGE_PERM_CMD.format(
        container_id, repo_name)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        change_perm_cmd)
    change_pip_perm = DependencyAnalyzerConstants.DOCKER_PIP_CHANGE_PERM_CMD.format(
        container_id)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        change_pip_perm)
    execute_build_cmd = DependencyAnalyzerConstants.DOCKER_EXEC_BUILD_JOB_CMD.format(
        container_id, repo_name)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        execute_build_cmd)
    log_cp_cmd = DependencyAnalyzerConstants.DOCKER_CP_BUILD_LOG_CMD.format(
        container_id, repo_name, output_log_path, repo_name, bug_id, version)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(log_cp_cmd)
    if not ok:
        log_output_content.append(stderr)
        log_output_content.append('Failed to copy build log')
        return None, log_output_content
    remove_docker_container(container_name)
    return check_build_result(output_log_path, curr_errors, log_output_content,
                              repo_name, bug_id, version)
Ejemplo n.º 5
0
def cleanup(component_path,
            output_log_path=None,
            log_output_content=None,
            repo_dir=None):
    """ Remove intermediate details """
    if output_log_path:
        append_output_log(output_log_path, log_output_content)
        log_output_content = []
    if repo_dir:
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.SUDO_RM_RF_CMD.format(repo_dir))
def check_in_original_logs(lines_flagged, original_log_files_path, repo_name,
                           bug_id, version, component_path):
    # if not lines_flagged:
    #    return False, None
    original_file_nm = DependencyAnalyzerConstants.ORIG_FILE_NAME_FORMAT.format(
        repo_name, str(bug_id), str(version))
    orig_content = None
    if isfile(join(original_log_files_path, original_file_nm)):
        with open(join(original_log_files_path, original_file_nm)) as f:
            orig_content = [x.strip() for x in f.readlines()]
    else:
        # Generate original log
        orig_log_gen = OriginalLogGenerator(2)
        orig_content = orig_log_gen.start_log_gen(
            '{}_{}_{}'.format(repo_name, bug_id, version),
            original_log_files_path, None, component_path)
    if not orig_content:
        return False, None
    orig_match_list = []
    orig_lines_flagged = {}

    # Iterate over original log lines
    for idx in range(0, len(orig_content)):
        orig_line = orig_content[idx]
        orig_match = DependencyAnalyzerUtils.check_matches(orig_line)
        if orig_match:
            orig_match_list.append(orig_match)
            orig_lines_flagged[idx] = {
                BuildLogConstants.KEY_LINE:
                orig_line.replace(DependencyAnalyzerConstants.CHAR_NEW_LINE,
                                  DependencyAnalyzerConstants.CHAR_EMPTY),
                BuildLogConstants.KEY_TYPE:
                orig_match
            }

    new_lines_flagged = {}

    # check against current log's identified lines
    for new_id in lines_flagged:
        found = False
        for orig_id in orig_lines_flagged:
            if orig_lines_flagged[orig_id][BuildLogConstants.KEY_TYPE] == lines_flagged[new_id][BuildLogConstants.KEY_TYPE]\
                and orig_lines_flagged[orig_id][BuildLogConstants.KEY_LINE] == lines_flagged[new_id][BuildLogConstants.KEY_LINE]:
                found = True
        if not found:
            new_lines_flagged[new_id] = lines_flagged[new_id]

    return new_lines_flagged, orig_content
Ejemplo n.º 7
0
def create_art_dir(intermediate_path, repo_name, bug_id, version):
    """ Create directory to store intermediate files for an artifact """
    if not isdir(join(intermediate_path, repo_name)):
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.SUDO_MKDIR.format(
                join(intermediate_path, repo_name)))
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(
                join(intermediate_path, repo_name)))
    if not isdir(join(intermediate_path, repo_name, str(bug_id))):
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.SUDO_MKDIR.format(
                join(intermediate_path, repo_name, str(bug_id))))
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(
                join(intermediate_path, repo_name, str(bug_id))))
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.SUDO_MKDIR.format(
            join(intermediate_path, repo_name, str(bug_id), str(version))))
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(
            join(intermediate_path, repo_name, str(bug_id), str(version))))
def main(args):
    start_time = time.time()
    global log_files_path
    log_files_path = args[2]
    original_log_files_path = args[4]
    component_path = args[6]
    is_solver_run = False
    is_subset_run = False
    if len(args) == 9:
        is_solver_run = True
        log_file_nm = args[8]
    elif len(args) == 8:
        is_subset_run = True

    if is_solver_run:
        filenames = [log_file_nm]
    else:
        failed_art_list = []
        passed_art_list = []
        if is_subset_run:
            for proj_bug in DependencyAnalyzerConstants.BUGSINPY_SUBSET_LIST:
                proj = proj_bug.split('_')[0]
                bug = proj_bug.split('_')[1]
                if not isfile(
                        join(log_files_path, '{}_{}_{}.log'.format(
                            proj, bug, '0'))):
                    failed_art_list.append('{}_{}_{}'.format(proj, bug, '0'))
                if not isfile(
                        join(log_files_path, '{}_{}_{}.log'.format(
                            proj, bug, '1'))):
                    passed_art_list.append('{}_{}_{}'.format(proj, bug, '1'))

        else:
            proj_list = os.listdir(join(component_path, 'projects'))
            for proj in proj_list:
                if proj in DependencyAnalyzerConstants.BUGSINPY_EXCLUSION_LIST:
                    continue
                if not isdir(join(component_path, 'projects', proj)):
                    continue
                bugs_list = os.listdir(
                    join(component_path, 'projects', proj, 'bugs'))
                for bug in bugs_list:
                    if proj == 'matplotlib' and bug != '1':
                        continue
                    if not isdir(
                            join(component_path, 'projects', proj, 'bugs',
                                 bug)):
                        continue
                    if not isfile(
                            join(log_files_path, '{}_{}_{}.log'.format(
                                proj, bug, '0'))):
                        failed_art_list.append('{}_{}_{}'.format(
                            proj, bug, '0'))
                    if not isfile(
                            join(log_files_path, '{}_{}_{}.log'.format(
                                proj, bug, '1'))):
                        passed_art_list.append('{}_{}_{}'.format(
                            proj, bug, '1'))
        with Pool(cpu_count()) as pool:
            results = pool.map(
                partial(ReproducedLogGenerator(2).start_log_gen,
                        f_or_p='failed',
                        output_path=log_files_path,
                        component_path=component_path), failed_art_list)
        with Pool(cpu_count()) as pool:
            results = pool.map(
                partial(ReproducedLogGenerator(2).start_log_gen,
                        f_or_p='passed',
                        output_path=log_files_path,
                        component_path=component_path), passed_art_list)

        filenames = [
            f for f in listdir(log_files_path)
            if isfile(join(log_files_path, f))
            and f.endswith(DependencyAnalyzerConstants.LOG_FILE_EXT)
        ]
    # all reproduced logs for passed builds
    repr_passed = []
    # all reproduced logs for failed builds
    repr_failed = []
    for f in filenames:
        if len(
                list(
                    filter(f.startswith, DependencyAnalyzerConstants.
                           BUGSINPY_EXCLUSION_LIST))) != 0:
            continue
        repr_failed.append(f)
    dep_issue_art = {}
    dep_issue_art.update(
        check_log(repr_passed, BuildLogConstants.STR_PASSED,
                  original_log_files_path, component_path))
    dep_issue_art.update(
        check_log(repr_failed, BuildLogConstants.STR_FAILED,
                  original_log_files_path, component_path))
    end_time = time.time()
    print('==========**** LogErrorAnalyzer FINAL OUTPUT ****==========')
    if len(filenames) == 0:
        print('No reproduced logs generated to identify')
    else:
        print('Number of builds identified: {}'.format(len(dep_issue_art)))
        print('Number of builds available: {}'.format(len(filenames)))
        print('Percentage Identification: {}'.format(
            len(dep_issue_art) * 100 / len(filenames)))
        print('Total Runtime: {} minutes'.format((end_time - start_time) / 60))
    print('==========**** END OF OUTPUT ****==========')
    dep_issue_art_list = ErrorAnalyzerUtils.convert_dict_to_list(dep_issue_art)
    if not is_solver_run:
        DependencyAnalyzerUtils.write_to_csv(
            dep_issue_art_list,
            DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
            'bugsinpy_artifacts_dependency_broken.csv')
    else:
        op_name = 'bugsinpy_artifacts_dependency_broken_' + log_file_nm + DependencyAnalyzerConstants.CSV_FILE_EXT
        DependencyAnalyzerUtils.write_to_csv(
            dep_issue_art_list,
            DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS, op_name)
def check_log(file_list, pass_fail, original_log_files_path, component_path):
    """ Parse each log, check for dependency errors, identify possible
        candidates """
    dep_issue_art = {}
    ansi_escape = re.compile(DependencyAnalyzerConstants.ANSI_ESCAPE_STR, re.M)
    for art in file_list:
        print('Processing {}...'.format(art))
        repo_name = art.split(DependencyAnalyzerConstants.CHAR_UNDERSCORE)[0]
        bug_id = art.split(DependencyAnalyzerConstants.CHAR_UNDERSCORE)[1]
        version = art.split(
            DependencyAnalyzerConstants.CHAR_UNDERSCORE)[2].split(
                DependencyAnalyzerConstants.CHAR_STOP)[0]
        if version == DependencyAnalyzerConstants.STR_PASSED_VERSION:
            pass_fail = DependencyAnalyzerConstants.STR_PASSED
        else:
            pass_fail = DependencyAnalyzerConstants.STR_FAILED
        content = []
        # Read log
        with open(join(log_files_path, art),
                  encoding=DependencyAnalyzerConstants.STR_ENCODING_UTF8) as f:
            content = [x.strip() for x in f.readlines()]
        match_type_list = []
        lines_flagged = {}
        traceback_starts = {}
        installed_packages = []
        last_identified_package = {}
        dep_file_names = []
        is_build_success = False
        is_module_not_found = False
        module_not_found_packages = {}

        # Start iterating over log lines
        for idx in range(0, len(content)):
            line = content[idx]
            line = ansi_escape.sub(DependencyAnalyzerConstants.CHAR_EMPTY,
                                   line)

            # check if line shows build success for passed builds
            if pass_fail == BuildLogConstants.STR_PASSED and DependencyAnalyzerConstants.BUILD_SUCCESS_LOG in line:
                is_build_success = True
                break

            # check if this line is collecting a package
            if line.startswith(BuildLogConstants.LOG_STR_COLLECTING):
                if idx < len(content) - 1:
                    last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(
                        line, content[idx + 1])
                else:
                    last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(
                        line)
                if not last_identified_package:
                    continue
                installed_packages.append(last_identified_package)
                if file_nm:
                    dep_file_names.append(
                        file_nm.split(
                            DependencyAnalyzerConstants.CHAR_OPEN_PARENTHESIS)
                        [0].strip())
                continue
            elif line.startswith(BuildLogConstants.LOG_STR_SEARCHING):
                last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(
                    line, content[idx + 3])
                if not last_identified_package:
                    continue
                installed_packages.append(last_identified_package)
                if file_nm:
                    dep_file_names.append(
                        file_nm.split(
                            DependencyAnalyzerConstants.CHAR_OPEN_PARENTHESIS)
                        [0].strip())
                continue
            elif line.startswith(
                    BuildLogConstants.LOG_STR_REQUIREMENT_SATISFIED):
                last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(
                    line, line)
                if not last_identified_package:
                    continue
                installed_packages.append(last_identified_package)
                if file_nm:
                    dep_file_names.append(
                        file_nm.split(
                            DependencyAnalyzerConstants.CHAR_OPEN_PARENTHESIS)
                        [0].strip())
                continue
            elif line.startswith(BuildLogConstants.LOG_STR_DOWNLOADING)\
                    or line.startswith(BuildLogConstants.LOG_STR_BEST_MATCH):
                continue

            # checking if this is the start of a traceback
            traceback_starts, start_traceback = ErrorAnalyzerUtils.check_if_traceback(
                line, traceback_starts, idx)
            if start_traceback:
                continue

            # checking if this line in the reproduced log matches any of the known error patterns
            new_match_type = DependencyAnalyzerUtils.check_matches(line)
            lines_flagged, match_type_list, is_module_not_found, module_not_found_packages = ErrorAnalyzerUtils.add_matches(
                new_match_type, line, lines_flagged, idx, match_type_list,
                module_not_found_packages)
        # add nothing if build is successful
        if is_build_success:
            continue

        installed_packages = ErrorAnalyzerUtils.merge_same_installed_packages(
            installed_packages)
        # Check if the error exists in orig logs
        new_lines_flagged, orig_content = check_in_original_logs(
            lines_flagged, original_log_files_path, repo_name, bug_id, version,
            component_path)
        module_not_found_packages, orig_content = check_in_original_logs(
            module_not_found_packages, original_log_files_path, repo_name,
            bug_id, version, component_path)
        if new_lines_flagged:
            dep_issue_art[art] = {
                BuildLogConstants.KEY_LINES: new_lines_flagged,
                BuildLogConstants.KEY_POSSIBLE_CANDIDATES: [],
                BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                BuildLogConstants.KEY_FILE_NAME: dep_file_names,
                'is_module_error': is_module_not_found
            }

            # identify possible candidates
            ErrorAnalyzerUtils.localize_errors(dep_issue_art[art], content)
            if last_identified_package:
                ErrorAnalyzerUtils.add_if_not_added(
                    dep_issue_art[art], last_identified_package.name)

        if traceback_starts:
            if art not in dep_issue_art:
                dep_issue_art[art] = {
                    BuildLogConstants.KEY_LINES: {},
                    BuildLogConstants.KEY_POSSIBLE_CANDIDATES: [],
                    BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                    BuildLogConstants.KEY_FILE_NAME:
                    DependencyAnalyzerConstants.CHAR_EMPTY,
                    'is_module_error': is_module_not_found
                }

            # identify possible candidates
            ErrorAnalyzerUtils.extract_packages_from_tracebacks(
                traceback_starts, dep_issue_art[art], content, orig_content,
                art.split(
                    DependencyAnalyzerConstants.CHAR_UNDERSCORE)[0].strip())

        if module_not_found_packages:
            module_not_found_dep_issue_art = {}
            candidates = []
            if art in dep_issue_art:
                candidates = dep_issue_art[art][
                    BuildLogConstants.KEY_POSSIBLE_CANDIDATES]
            module_not_found_dep_issue_art[art] = {
                BuildLogConstants.KEY_LINES: module_not_found_packages,
                BuildLogConstants.KEY_POSSIBLE_CANDIDATES: candidates,
                BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                BuildLogConstants.KEY_FILE_NAME:
                DependencyAnalyzerConstants.CHAR_EMPTY,
                'is_module_error': True
            }
            ErrorAnalyzerUtils.localize_errors(
                module_not_found_dep_issue_art[art], content, True)
            if art not in dep_issue_art:

                dep_issue_art[art] = {
                    BuildLogConstants.KEY_LINES:
                    module_not_found_packages,
                    BuildLogConstants.KEY_POSSIBLE_CANDIDATES:
                    module_not_found_dep_issue_art[art][
                        BuildLogConstants.KEY_POSSIBLE_CANDIDATES],
                    BuildLogConstants.KEY_INSTALLED_PKGS:
                    module_not_found_dep_issue_art[art][
                        BuildLogConstants.KEY_INSTALLED_PKGS],
                    BuildLogConstants.KEY_FILE_NAME:
                    DependencyAnalyzerConstants.CHAR_EMPTY,
                    'is_module_error':
                    True
                }
            else:
                dep_issue_art[art][BuildLogConstants.KEY_LINES].update(
                    module_not_found_dep_issue_art[art][
                        BuildLogConstants.KEY_LINES])
                dep_issue_art[art][
                    BuildLogConstants.
                    KEY_POSSIBLE_CANDIDATES] = module_not_found_dep_issue_art[
                        art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES]

        if art not in dep_issue_art:
            continue
        if dep_issue_art[art] and not dep_issue_art[art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES]\
            and not dep_issue_art[art][BuildLogConstants.KEY_LINES]:
            del dep_issue_art[art]
            continue

    return dep_issue_art
Ejemplo n.º 10
0
def main(args):
    start_time = time.time()
    global log_files_path, artifact_dict
    # if not DependencyAnalyzerUtils\
    #         .valid_input(args, DependencyAnalyzerConstants.LOG_DEP_ANALYZER_USAGE):
    #     return
    # else:
    log_files_path = args[2]
    orig_log_path = args[4]
    is_solver_run = False
    is_subset_run = False
    log_file_name = None
    if len(args) == 7:
        is_solver_run = True
        log_file_name = args[6]
    elif len(args) == 6:
        is_subset_run = True
    artifact_dict = DependencyAnalyzerUtils.get_artifact_dict(getcwd())
    filenames = []
    if not is_solver_run:
        generate_reproduced_logs(log_files_path, artifact_dict, is_subset_run)
        filenames = [f for f in listdir(log_files_path) if isfile(join(log_files_path, f))]
    else:
        filenames = [log_file_name]
    # all reproduced logs for passed builds
    repr_passed = []
    # all reproduced logs for failed builds
    repr_failed = []
    for f in filenames:
        if len(list(filter(f.startswith,
                           DependencyAnalyzerConstants.BUGSWARM_EXCLUSION_LIST))) != 0:
            continue
        if not is_python_artifact(f):
            continue
        if f.endswith(BuildLogConstants.PASSED_LOG_NAME_SUFFIX):
            repr_passed.append(f)
        elif f.endswith(BuildLogConstants.FAILED_LOG_NAME_SUFFIX):
            repr_failed.append(f)
    dep_issue_art = {}
    dep_issue_art.update(check_log(orig_log_path, repr_passed, BuildLogConstants.STR_PASSED))
    dep_issue_art.update(check_log(orig_log_path, repr_failed, BuildLogConstants.STR_FAILED))
    end_time = time.time()
    print('==========**** LogErrorAnalyzer FINAL OUTPUT ****==========')
    if len(filenames) == 0:
        print('No reproduced logs generated to identify')
    else:
        print('Number of builds identified: {}'.format(len(dep_issue_art)))
        print('Number of builds available: {}'.format(len(filenames)))
        print('Percentage Identification: {}'.format(len(dep_issue_art)*100/len(filenames)))
        print('Total Runtime: {} minutes'.format((end_time - start_time)/60))
    print('==========**** END OF OUTPUT ****==========')
    dep_issue_art_list = ErrorAnalyzerUtils.convert_dict_to_list(dep_issue_art)
    if not is_solver_run:
        DependencyAnalyzerUtils.write_to_csv(dep_issue_art_list,
                                            DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
                                            DependencyAnalyzerConstants.LOG_DEP_ANALYZER_FILENM)
    else:
        op_name = 'artifacts_dependency_broken_' + log_file_name.split()[0] + '.csv'
        DependencyAnalyzerUtils.write_to_csv(dep_issue_art_list,
                                            DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
                                            op_name)
Ejemplo n.º 11
0
def check_log(orig_log_path, file_list, pass_fail):
    """ Parse each log, check for dependency errors, identify possible
    candidates """
    dep_issue_art = {}
    ansi_escape = re.compile(DependencyAnalyzerConstants.ANSI_ESCAPE_STR, re.M)
    for art in file_list:
        artname = art.split(DependencyAnalyzerConstants.CHAR_STOP)[0]
        content = []

        # read current log
        with open(join(log_files_path, art),
                  encoding=DependencyAnalyzerConstants.STR_ENCODING_UTF8) as f:
            content = [x.strip() for x in f.readlines()]

        match_type_list = []
        lines_flagged = {}
        traceback_starts = {}
        installed_packages = []
        last_identified_package = {}
        dep_file_names = []
        is_build_success = False
        is_module_not_found = False
        module_not_found_packages = {}

        # Iterate over log lines
        for idx in range(0, len(content)):
            line = content[idx]
            line = ansi_escape.sub(DependencyAnalyzerConstants.CHAR_EMPTY, line)

            # check if line shows build success for passed builds
            if pass_fail == BuildLogConstants.STR_PASSED and DependencyAnalyzerConstants.BUILD_SUCCESS_LOG in line:
                is_build_success = True
                break

            # check if this line is collecting a package
            if line.startswith(BuildLogConstants.LOG_STR_COLLECTING):
                if idx < len(content) - 1:
                    last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(line, content[idx + 1])
                else:
                    last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(line)
                if not last_identified_package:
                    continue
                installed_packages.append(last_identified_package)
                if file_nm:
                    dep_file_names.append(file_nm.split(DependencyAnalyzerConstants.CHAR_OPEN_PARENTHESIS)[0].strip())
                continue
            elif line.startswith(BuildLogConstants.LOG_STR_SEARCHING):
                last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(line, content[idx + 3])
                if not last_identified_package:
                    continue
                installed_packages.append(last_identified_package)
                if file_nm:
                    dep_file_names.append(file_nm.split(DependencyAnalyzerConstants.CHAR_OPEN_PARENTHESIS)[0].strip())
                continue
            elif line.startswith(BuildLogConstants.LOG_STR_REQUIREMENT_SATISFIED):
                last_identified_package, file_nm = ErrorAnalyzerUtils.get_package_info(line, line)
                if not last_identified_package:
                    continue
                installed_packages.append(last_identified_package)
                if file_nm:
                    dep_file_names.append(file_nm.split(DependencyAnalyzerConstants.CHAR_OPEN_PARENTHESIS)[0].strip())
                continue
            elif line.startswith(BuildLogConstants.LOG_STR_DOWNLOADING)\
                    or line.startswith(BuildLogConstants.LOG_STR_BEST_MATCH):
                continue

            # checking if this is the start of a traceback
            traceback_starts, start_traceback = ErrorAnalyzerUtils.check_if_traceback(line, traceback_starts, idx)
            if start_traceback:
                continue

            # checking if this line in the reproduced log matches any of the known error patterns
            new_match_type = DependencyAnalyzerUtils.check_matches(line)
            lines_flagged, match_type_list, is_module_not_found, module_not_found_packages = ErrorAnalyzerUtils.add_matches(new_match_type, line, lines_flagged, idx, match_type_list, module_not_found_packages)

        # add nothing if build is successful
        if is_build_success:
            continue

        installed_packages = ErrorAnalyzerUtils.merge_same_installed_packages(installed_packages)

        # get orig log from travis
        orig_content = get_orig_log_contents(artifact_dict, artname, orig_log_path, pass_fail)
        if not orig_content:
            continue

        if len(match_type_list) > 0:
            # the line has matched at least one error pattern
            # checking if original logs had the same error
            orig_match_list = []
            for orig_line in orig_content:
                orig_match = DependencyAnalyzerUtils.check_matches(orig_line)
                if orig_match:
                    orig_match_list.append(orig_match)
            if len(list(set(match_type_list) - set(orig_match_list))) > 0:
                dep_issue_art[art] = {BuildLogConstants.KEY_LINES: lines_flagged,
                                        BuildLogConstants.KEY_POSSIBLE_CANDIDATES: [],
                                        BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                                        BuildLogConstants.KEY_FILE_NAME: dep_file_names,
                                        'is_module_error': is_module_not_found}

                # identify possible candidates
                ErrorAnalyzerUtils.localize_errors(dep_issue_art[art], content)
                if last_identified_package:
                    ErrorAnalyzerUtils.add_if_not_added(dep_issue_art[art], last_identified_package.name)

        if traceback_starts:
            if art not in dep_issue_art:
                dep_issue_art[art] = {BuildLogConstants.KEY_LINES: {},
                                            BuildLogConstants.KEY_POSSIBLE_CANDIDATES: [],
                                            BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                                            BuildLogConstants.KEY_FILE_NAME: '',
                                            'is_module_error': is_module_not_found}

            # identify possible candidates
            ErrorAnalyzerUtils.extract_packages_from_tracebacks(traceback_starts, dep_issue_art[art],
                                             content, orig_content,
                                             artifact_dict[artname][DependencyAnalyzerConstants.REPO_KEY])
        if module_not_found_packages:
            candidates = []
            if art in dep_issue_art:
                candidates = dep_issue_art[art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES]
            module_not_found_dep_issue_art = {}
            module_not_found_dep_issue_art[art] = {BuildLogConstants.KEY_LINES: module_not_found_packages,
                                              BuildLogConstants.KEY_POSSIBLE_CANDIDATES: candidates,
                                              BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                                              BuildLogConstants.KEY_FILE_NAME: DependencyAnalyzerConstants.CHAR_EMPTY,
                                              'is_module_error': True}
            ErrorAnalyzerUtils.localize_errors(module_not_found_dep_issue_art[art], content, True)
            if art not in dep_issue_art:
                dep_issue_art[art] = {BuildLogConstants.KEY_LINES: module_not_found_packages,
                                        BuildLogConstants.KEY_POSSIBLE_CANDIDATES: module_not_found_dep_issue_art[art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES],
                                        BuildLogConstants.KEY_INSTALLED_PKGS: installed_packages,
                                        BuildLogConstants.KEY_FILE_NAME: DependencyAnalyzerConstants.CHAR_EMPTY,
                                        'is_module_error': True}
            else:
                dep_issue_art[art][BuildLogConstants.KEY_LINES].update(module_not_found_packages)
                dep_issue_art[art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES] = module_not_found_dep_issue_art[art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES]

        if art not in dep_issue_art:
            continue
        if dep_issue_art[art] and not dep_issue_art[art][BuildLogConstants.KEY_POSSIBLE_CANDIDATES] and not dep_issue_art[art][BuildLogConstants.KEY_LINES]:
            del dep_issue_art[art]
            continue
    return dep_issue_art
Ejemplo n.º 12
0
def main(args):
    path = args[2]
    dataset = args[4]
    orig_file_name = None
    new_file_name = None
    result_file_prefix = None
    header_consts = None
    is_subset_run = False
    if len(args) == 6:
        is_subset_run = True
    if dataset == '1':
        if is_subset_run:
            orig_file_name = 'bugswarm_subset_iterative_solve_results_orig.csv'
        else:
            orig_file_name = 'bugswarm_iterative_solve_results_orig.csv'
        new_file_name = 'iterative_solve_results.csv'
        result_file_prefix = 'bugswarm'
        header_consts = DependencyAnalyzerConstants.COMPARE_BUGSWARM_CSV_SOLVER_RESULTS_HEADERS
    elif dataset == '2':
        if is_subset_run:
            orig_file_name = 'bugsinpy_subset_iterative_solve_results_orig.csv'
        else:
            orig_file_name = 'bugsinpy_iterative_solve_results_orig.csv'
        new_file_name = 'bugsinpy_iterative_solve_results.csv'
        result_file_prefix = 'bugsinpy'
        header_consts = DependencyAnalyzerConstants.COMPARE_BUGSWARM_CSV_SOLVER_RESULTS_HEADERS
    else:
        print('No such dataset exists in this study')
        return
    orig_data = []
    new_data = []
    # read original data
    with open(join(path, 'orig_data', orig_file_name)) as f:
        csv_reader = csv.reader(f)
        next(csv_reader, None)
        for row in csv_reader:
            orig_data.append(row)
    # read new data
    with open(join(path, new_file_name)) as f:
        csv_reader = csv.reader(f)
        next(csv_reader, None)
        for row in csv_reader:
            new_data.append(row)
    diff_in_num_id = len(orig_data) - len(new_data)
    new_found = []
    same_fix_outcome = []
    nofix_to_partial = []
    nofix_to_complete = []
    partial_to_complete = []
    complete_to_partial = []
    complete_to_nofix = []
    partial_to_nofix = []
    orig_data_map = {}
    for row in orig_data:
        orig_data_map[row[0]] = row
    for row in new_data:
        key = row[0]
        if key not in orig_data_map:
            new_found.append(row)
        else:
            orig_row = orig_data_map[key]
            compare_row = []
            compare_row.extend(row)
            compare_row.extend(orig_row[1:])
            if not len(row[1]):
                # no fix
                if len(orig_row[1]):
                    if orig_row[2].strip(
                    ) == 'No longer recognized as a dependency error' or orig_row[
                            2].strip() == 'Exhausted all options':
                        partial_to_nofix.append(compare_row)
                    else:
                        complete_to_nofix.append(compare_row)
                else:
                    same_fix_outcome.append(compare_row)
            elif row[2].strip(
            ) == 'No longer recognized as a dependency error' or row[2].strip(
            ) == 'Exhausted all options':
                # partial fix
                if not len(orig_row[1]):
                    nofix_to_partial.append(compare_row)
                elif orig_row[2].strip(
                ) == 'No longer recognized as a dependency error' or orig_row[
                        2].strip() == 'Exhausted all options':
                    same_fix_outcome.append(compare_row)
                else:
                    complete_to_partial.append(compare_row)
            else:
                # complete fix
                if not len(orig_row[1]):
                    nofix_to_complete.append(compare_row)
                elif orig_row[2].strip(
                ) == 'No longer recognized as a dependency error' or orig_row[
                        2].strip() == 'Exhausted all options':
                    partial_to_complete.append(compare_row)
                else:
                    same_fix_outcome.append(compare_row)
    if diff_in_num_id > 0:
        print('Decrease in number of fixes attempted: {}'.format(
            abs(diff_in_num_id)))
    elif diff_in_num_id < 0:
        print('Increase in number of fixes attempted: {}'.format(
            abs(diff_in_num_id)))
    else:
        print('Same number of fixes attempted')
    print('Same fix outcome count: {}'.format(len(same_fix_outcome)))
    print('========Improvements=======')
    print('No fix to partial fix: {}'.format(len(nofix_to_partial)))
    print('No fix to complete fix: {}'.format(len(nofix_to_complete)))
    print('Partial fix to complete fix: {}'.format(len(partial_to_complete)))
    print('========Deterioration=======')
    print('Complete fix to no fix: {}'.format(len(complete_to_nofix)))
    print('Complete fix to partial fix: {}'.format(len(complete_to_partial)))
    print('Partial fix to no fix: {}'.format(len(partial_to_nofix)))
    DependencyAnalyzerUtils.write_to_csv(
        same_fix_outcome,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_same_fix_outcome.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        nofix_to_partial,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_nofix_to_partial.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        nofix_to_complete,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_nofix_to_complete.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        partial_to_complete,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_partial_to_complete.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        complete_to_nofix,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_complete_to_nofix.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        complete_to_partial,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_complete_to_partial.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        partial_to_nofix,
        DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_partial_to_nofix.csv'.format(result_file_prefix))
Ejemplo n.º 13
0
def remove_docker_container(container_name):
    """ Remove Docker container used """
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.DOCKER_REMOVE_CONTAINER.format(
            container_name))
Ejemplo n.º 14
0
def setup_bug_repo(component_path, repo_name, bug_id, version):
    """ Setup cloned source code """
    project_info_locn = join(
        component_path, DependencyAnalyzerConstants.PROJECTS_DIR, repo_name,
        DependencyAnalyzerConstants.PROJECT_INFO_FILE_NAME)
    bug_info_locn = join(component_path,
                         DependencyAnalyzerConstants.PROJECTS_DIR, repo_name,
                         DependencyAnalyzerConstants.BUGS_DIR, bug_id,
                         DependencyAnalyzerConstants.BUG_INFO_FILENM)
    git_repo_url = get_value_from_info_file(
        project_info_locn, DependencyAnalyzerConstants.INFO_GIT_URL)
    outer_dir_name = '{}_{}_{}'.format(repo_name, bug_id, version)
    if repo_name == 'spacy':
        clone_dir = join(os.getcwd(), outer_dir_name, 'spaCy')
    else:
        clone_dir = join(os.getcwd(), outer_dir_name, repo_name)
    if not git_repo_url:
        return None
    if isdir(clone_dir) or isdir(join(os.getcwd(), outer_dir_name)):
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.SUDO_RM_RF_CMD.format(
                join(os.getcwd(), outer_dir_name)))
    git_clone_cmd = DependencyAnalyzerConstants.GIT_CLONE_CMD.format(
        outer_dir_name, outer_dir_name,
        git_repo_url.strip(DependencyAnalyzerConstants.CHAR_SLASH))
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(git_clone_cmd)
    if not ok:
        print(stderr)
        print('Failed to clone repo')
        return None
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(clone_dir))
    if not ok:
        print(stderr)
        print('Failed to change permission for cloned repo')
        return None
    failed_commit = get_value_from_info_file(
        bug_info_locn, DependencyAnalyzerConstants.INFO_BUGGY_COMMIT)
    passed_commit = get_value_from_info_file(
        bug_info_locn, DependencyAnalyzerConstants.INFO_FIXED_COMMIT)
    test_file_locn = get_value_from_info_file(
        bug_info_locn, DependencyAnalyzerConstants.INFO_TEST_FILE)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.GIT_RESET_HARD_CMD.format(
            clone_dir, passed_commit))
    if not ok:
        print(stderr)
        print('Failed to reset head of cloned repo to passed commit')
        return None
    if version == 0:  # failed version
        test_file_contents = get_test_file_contents(clone_dir, test_file_locn)
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.GIT_RESET_HARD_CMD.format(
                clone_dir, failed_commit))
        if not ok:
            print(stderr)
            print('Failed to reset head of cloned repo to failed commit')
            return None
        if test_file_contents:
            write_test_file_contents(clone_dir, test_file_locn,
                                     test_file_contents)
    return clone_dir
Ejemplo n.º 15
0
def run_log_analyzer(log_file_nm, output_log_path, log_output_content,
                     orig_log_files_path, component_path):
    """ Run log analyzer to check if a dependency issue still exists, if it does what are the errors and what are the possible candidates """
    if not isdir(
            join(expanduser(DependencyAnalyzerConstants.CHAR_TILDE),
                 'bugsinpy_intermediate_log')):
        mkdir_log_path_cmd = DependencyAnalyzerConstants.CREATE_DIR_CMD.format(
            expanduser(DependencyAnalyzerConstants.CHAR_TILDE),
            'bugsinpy_intermediate_log')
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            mkdir_log_path_cmd)
        if not ok:
            log_output_content.append(stderr)
            log_output_content.append(
                'Failed to create intermediate log folder')
            return None, None, None, log_output_content
    change_per_cmd = DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(
        join(expanduser(DependencyAnalyzerConstants.CHAR_TILDE),
             'bugsinpy_intermediate_log'))
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        change_per_cmd)
    if not ok:
        log_output_content.append(stderr)
        log_output_content.append('Failed to change permission')
    cp_log_cmd = DependencyAnalyzerConstants.COPY_FILE_CMD.format(
        join(output_log_path, log_file_nm),
        join(expanduser(DependencyAnalyzerConstants.CHAR_TILDE),
             'bugsinpy_intermediate_log', log_file_nm))
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(cp_log_cmd)
    if not ok:
        log_output_content.append(stderr)
        log_output_content.append('Failed to copy new log')
        return None, None, None, log_output_content
    analyzer_cmd = DependencyAnalyzerConstants.BUGSINPY_RUN_LOG_ANALYZER.format(
        join(expanduser(DependencyAnalyzerConstants.CHAR_TILDE),
             'bugsinpy_intermediate_log'), orig_log_files_path, component_path,
        log_file_nm)
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(analyzer_cmd)
    if not ok:
        print(stderr)
        log_output_content.append('Failed to run analyzer')
        return None, None, None, log_output_content
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        'cp {} {}'.format(
            'bugsinpy_artifacts_dependency_broken_' + log_file_nm +
            DependencyAnalyzerConstants.CSV_FILE_EXT, output_log_path))
    if not ok:
        log_output_content.append('Failed to copy analyzer output')
    errors = None
    candidates = None
    files = None
    found_data = False
    with open('bugsinpy_artifacts_dependency_broken_' + log_file_nm +
              DependencyAnalyzerConstants.CSV_FILE_EXT,
              DependencyAnalyzerConstants.FILE_READ_MODE,
              encoding=DependencyAnalyzerConstants.STR_ENCODING_UTF8) as f:
        reader = csv.reader(f)
        next(reader, None)
        for row in reader:
            errors = row[1]
            candidates = json.loads(row[2])
            files = row[3]
            found_data = True
    if isfile('bugsinpy_artifacts_dependency_broken_' + log_file_nm +
              DependencyAnalyzerConstants.CSV_FILE_EXT):
        _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
            DependencyAnalyzerConstants.SUDO_RM_CMD.format(
                'bugsinpy_artifacts_dependency_broken_' + log_file_nm +
                DependencyAnalyzerConstants.CSV_FILE_EXT))
    if found_data:
        return errors, candidates, files, log_output_content
    return None, None, None, log_output_content
Ejemplo n.º 16
0
def change_component_permission(component_path):
    """ Change permission for component directory """
    _, stdout, stderr, ok = DependencyAnalyzerUtils._run_command(
        DependencyAnalyzerConstants.CHANGE_PERMISSION_CMD.format(
            component_path))
def main(args):
    path = args[2]
    dataset = args[4]
    is_subset_run = False
    if len(args) == 6:
        is_subset_run = True
    orig_file_name = None
    new_file_name = None
    result_file_prefix = None
    if dataset == '1':
        if not is_subset_run:
            orig_file_name = 'bugswarm_artifacts_dependency_broken_orig.csv'
        else:
            orig_file_name = 'bugswarm_subset_artifacts_dependency_broken_orig.csv'
        new_file_name = 'artifacts_dependency_broken.csv'
        result_file_prefix = 'bugswarm'
    elif dataset == '2':
        if not is_subset_run:
            orig_file_name = 'bugsinpy_artifacts_dependency_broken_orig.csv'
        else:
            orig_file_name = 'bugsinpy_subset_artifacts_dependency_broken_orig.csv'
        new_file_name = 'bugsinpy_artifacts_dependency_broken.csv'
        result_file_prefix = 'bugsinpy'
    else:
        print('No such dataset exists in this study')
        return
    orig_data = []
    new_data = []
    # read original data
    with open(join(path, 'orig_data', orig_file_name)) as f:
        csv_reader = csv.reader(f)
        next(csv_reader, None)
        for row in csv_reader:
            orig_data.append(row)
    # read new data
    with open(join(path, new_file_name)) as f:
        csv_reader = csv.reader(f)
        next(csv_reader, None)
        for row in csv_reader:
            new_data.append(row)
    diff_in_num_id = len(orig_data) - len(new_data)
    new_found = []
    prev_skipped = []
    common_diff_error = []
    orig_data_map = {}
    for row in orig_data:
        orig_data_map[row[0]] = row
    for row in new_data:
        key = row[0]
        if key not in orig_data_map:
            new_found.append(row)
            continue
        if key in orig_data_map:
            orig_row = orig_data_map[key]
            if orig_row[1] != row[1]:
                contrast_row = row
                contrast_row.append(orig_row[1])
                contrast_row.append(orig_row[2])
                common_diff_error.append(contrast_row)
            del orig_data_map[key]
            continue
    prev_skipped = list(orig_data_map.values())
    if diff_in_num_id > 0:
        print('Decrease in number identified: {}'.format(abs(diff_in_num_id)))
    elif diff_in_num_id < 0:
        print('Increase in number identified: {}'.format(abs(diff_in_num_id)))
    else:
        print('Number identified: No Change')
    print('New identified: {}'.format(len(new_found)))
    print('Not identified from previously identified: {}'.format(
        len(prev_skipped)))
    print('Error identified changed: {}'.format(len(common_diff_error)))
    DependencyAnalyzerUtils.write_to_csv(
        new_found, DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_new_found.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        prev_skipped, DependencyAnalyzerConstants.CSV_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_prev_skipped.csv'.format(result_file_prefix))
    DependencyAnalyzerUtils.write_to_csv(
        common_diff_error,
        DependencyAnalyzerConstants.COMPARE_ARTIFACTS_BROKEN_HEADERS,
        '{}_compare_common_diff_error.csv'.format(result_file_prefix))
Ejemplo n.º 18
0
def main(args):
    path = args[2]
    orig_log_path = args[4]
    op = []
    artifact_dict = DependencyAnalyzerUtils.get_artifact_dict(path)
    iter_count = 0
    proj_dep_buckets = {
        '1-5': 0,
        '6-10': 0,
        '11-15': 0,
        '16-25': 0,
        '26-100': 0,
        '>100': 0
    }
    tran_dep_buckets = {
        '1-5': 0,
        '6-10': 0,
        '11-15': 0,
        '16-20': 0,
        '21-25': 0,
        '26-50': 0,
        '51-75': 0,
        '76-100': 0,
        '>100': 0
    }
    total_pinned = {'project': 0, 'transitive': 0, 'total': 0}
    total_constrained = {'project': 0, 'transitive': 0, 'total': 0}
    total_unconstrained = {'project': 0, 'transitive': 0, 'total': 0}
    for img_tag in artifact_dict:
        if artifact_dict[img_tag][DependencyAnalyzerConstants.LANGUAGE_KEY]\
                != DependencyAnalyzerConstants.LANGUAGE_PYTHON:
            continue
        if ('terasoluna' in img_tag and img_tag not in [
                'terasolunaorg-guideline-166664541',
                'terasolunaorg-guideline-167349415',
                'terasolunaorg-guideline-166664553'
        ]) or 'openfisca' in img_tag:
            continue
        print(iter_count)
        iter_count += 1
        # failed_orig_log_path = download_orig_log(artifact_dict[img_tag], img_tag, 'failed')
        failed_orig_log_path = join(
            orig_log_path,
            DependencyAnalyzerConstants.ORIG_LOG_FILENM_PATTERN.format(
                img_tag, 'failed'))
        if not isfile(failed_orig_log_path):
            continue
        installed_pkgs = check_log(failed_orig_log_path)
        count, pinned, constrained, unconstrained, transitive = gather_metrics(
            installed_pkgs)
        if count > 0:
            op.append([
                img_tag, 'failed', count, pinned, constrained, unconstrained,
                transitive['count'], transitive['pinned'],
                transitive['constrained'], transitive['unconstrained'],
                json.dumps([ob.__dict__ for ob in installed_pkgs])
            ])
            tran_dep_buckets, proj_dep_buckets, total_pinned, total_constrained, total_unconstrained = update_dep_counts(
                tran_dep_buckets, proj_dep_buckets, total_pinned,
                total_constrained, total_unconstrained, count, pinned,
                constrained, unconstrained, transitive)
        # passed_orig_log_path = download_orig_log(artifact_dict[img_tag], img_tag, 'passed')
        passed_orig_log_path = join(
            orig_log_path,
            DependencyAnalyzerConstants.ORIG_LOG_FILENM_PATTERN.format(
                img_tag, 'passed'))
        if not isfile(passed_orig_log_path):
            continue
        installed_pkgs = check_log(passed_orig_log_path)
        count, pinned, constrained, unconstrained, transitive = gather_metrics(
            installed_pkgs)
        if count > 0:
            op.append([
                img_tag, 'passed', count, pinned, constrained, unconstrained,
                transitive['count'], transitive['pinned'],
                transitive['constrained'], transitive['unconstrained'],
                json.dumps([ob.__dict__ for ob in installed_pkgs])
            ])
            tran_dep_buckets, proj_dep_buckets, total_pinned, total_constrained, total_unconstrained = update_dep_counts(
                tran_dep_buckets, proj_dep_buckets, total_pinned,
                total_constrained, total_unconstrained, count, pinned,
                constrained, unconstrained, transitive)
    DependencyAnalyzerUtils.write_to_csv(op, [
        'Artifact Image Tag', 'Pass/Fail', 'Project Dependency Count',
        'Pinned Dependency Count', 'Constrained Dependency Count',
        'Unconstrained Dependency Count', 'Transitive Dependency Count',
        'Transitive Pinned Dependency Count',
        'Transitive Constrained Dependency Count',
        'Transitive Unconstrained Dependency Count', 'Installed Packages'
    ], 'orig_log_metrics.csv')
    print('==========**** BugSwarm METRICS FINAL OUTPUT ****==========')
    print('Figure 2a: Builds by number of project dependencies')
    for key in proj_dep_buckets:
        print('{} dependencies: {} builds'.format(key, proj_dep_buckets[key]))
    print('===========================================')
    print('Fig 2b: Builds by number of transitive dependencies')
    for key in tran_dep_buckets:
        print('{} dependencies: {} builds'.format(key, tran_dep_buckets[key]))
    print('===========================================')
    print('Fig 3: Dependencies by Version Specification')
    print('Pinned Version Specification')
    print('Project Dependencies: {}'.format(total_pinned['project']))
    print('Transitive Dependencies: {}'.format(total_pinned['transitive']))
    print('Total Dependencies: {}'.format(total_pinned['total']))
    print('====================')
    print('Constrained Version Specification')
    print('Project Dependencies: {}'.format(total_constrained['project']))
    print('Transitive Dependencies: {}'.format(
        total_constrained['transitive']))
    print('Total Dependencies: {}'.format(total_constrained['total']))
    print('====================')
    print('Unconstrained Version Specification')
    print('Project Dependencies: {}'.format(total_unconstrained['project']))
    print('Transitive Dependencies: {}'.format(
        total_unconstrained['transitive']))
    print('Total Dependencies: {}'.format(total_unconstrained['total']))
    print('==========**** END OF OUTPUT ****==========')