Exemple #1
0
def synthetic_blame(commit: Commit, test_run: TestRun):
    '''
    Given a synthetic commit, it will create blames for the commit based on
    the blames of the sub-combinations of the diffs
    '''
    if not test_run.failed_tests:
        logger.info(
            'No failing tests for commit {}, nothing to blame'.format(commit))
        return

    logger.info('Setting blame for commit: {}'.format(commit))
    session = Session.object_session(commit)

    # Get the list of commits that are made up of the subset of diffs in this
    # commit
    children_commits = get_synthetic_children_commits(commit)

    for failed_test_result in test_run.failed_tests:
        # get all the blames for this test failure that were new at the time.
        # The newness attribute should remove duplicates.
        # All of these blames will now be combined for a new blame for this
        # test failure.
        children_test_failure_blames = []
        for child_commit in children_commits:
            if child_commit.has_same_test_result_output(
                    failed_test_result, status=TEST_OUTPUT_FAILURE):
                child_test_failure = (
                    child_commit.get_matching_test_result(failed_test_result))

                for blame in child_test_failure.blames:
                    children_test_failure_blames.append(blame)

        if children_test_failure_blames:
            faulty_diffs = list(
                set([blame.diff for blame in children_test_failure_blames]))

            for faulty_diff in faulty_diffs:
                logger.info('Assigning blame using child commit {} and diff '
                            '{} for test failure: {}'.format(
                                child_commit, faulty_diff, failed_test_result))
                create(session,
                       Blame,
                       diff=faulty_diff,
                       test_result=failed_test_result)

        else:
            # We have created a completely new blame from this combination of
            # diffs in comparison from its children
            for diff in commit.diffs:
                blame = create(session,
                               Blame,
                               diff=diff,
                               test_result=failed_test_result)
                logger.info('Assigning new blame for commit {} blame {}'
                            'for test failure: {}'.format(
                                commit, blame, failed_test_result))

    logger.info('Completed blaming for {}'.format(commit))
    session.commit()
Exemple #2
0
def run_all_tests(commit: Commit):
    '''
    Runs a repository's tests and records the results
    '''
    logger.info('Running the tests against commit: {}'.format(commit))

    start_timestamp = time.time()
    date = datetime.utcfromtimestamp(start_timestamp).strftime(
        '%Y-%m-%d %H:%M:%S')
    logger.info('Testing {repo_name} at commit "{commit_id}" at {date} '
                'with command:\n{command}'.format(
                    repo_name=commit.repository.name,
                    commit_id=commit.commit_id,
                    command=commit.repository.test_commands,
                    date=date))

    test_run = create(Session.object_session(commit),
                      TestRun,
                      commit=commit,
                      start_timestamp=start_timestamp)

    # run all of the tests
    test_command = commit.repository.test_commands
    _run_tests(test_run, test_command)

    return test_run
Exemple #3
0
def _initialize_repository(session,
                           src_path: str,
                           initialize_commands: str = None,
                           test_commands: str = None,
                           src_directory: str = None,
                           commit_only=False,
                           ignored_files=''):
    '''
    Given a src_path to a repository, create the repository in the database
    '''
    logger.info('Initializing repository at "{}"'.format(src_path))
    url = get_repository_url_from_path(src_path)
    repository = get(session, Repository, url=url)

    name = get_repository_name_from_url(url)
    logger.info('Repository name is "{}" with url "{}"'.format(name, url))
    if not initialize_commands:
        msg = ('Input the commands to intialize the repo (ex. '
               '"source env/bin/activate"): ')
        initialize_commands = input(msg)
    if not test_commands:
        msg = ('Input the command to run the tests for the repo: ')
        test_commands = input(msg)
    if not src_directory:
        msg = ('Input the source directory for your project: ')
        src_directory = input(msg)

    # first check to see if the repository already exists
    repository = get(session, Repository, url=url)
    if not repository:
        repository = create(session,
                            Repository,
                            name=name,
                            url=url,
                            initialize_commands=initialize_commands,
                            test_commands=test_commands,
                            src_directory=src_directory,
                            src_path=src_path,
                            ignored_files=ignored_files)

    _check_repo_is_clean(repository, path=repository.original_path)

    # create the mirror repository that BugBuddy primarily works on
    sync_mirror_repo(repository)

    # make sure the mirrored repo is on bug_buddy branch
    set_bug_buddy_branch(repository)

    # Initialize the repository by recording functions and creating synthetic
    # diffs
    if not commit_only:
        snapshot(repository, allow_empty=True)

    session.commit()
    logger.info(
        'Your repository "{}" has been successfully initialized!'.format(
            repository))

    return repository
Exemple #4
0
def create_new_functions_from_nodes(commit: Commit, function_nodes):
    '''
    Given a list of function nodes, it will create new functions
    '''
    session = Session.object_session(commit)
    for node in function_nodes:
        # create the function instance
        function = create(session,
                          Function,
                          name=node.name,
                          repository=commit.repository,
                          file_path=node.file_path)

        # We have a new function!
        function_history = create(session,
                                  FunctionHistory,
                                  function=function,
                                  commit=commit,
                                  node=node,
                                  first_line=node.first_line,
                                  last_line=node.last_line)
Exemple #5
0
def create_synthetic_diff_for_node(repository: Repository, commit: Commit,
                                   node):
    '''
    Creates the visited function and adds an 'assert False' to the node.
    This is used for creating synthetic 'assert False' diffs for each function.
    '''
    session = Session.object_session(repository)

    previous_commit = get_previous_commit(commit)
    function = previous_commit.get_function_for_node(node).function

    # create the function history instance
    function_history = create(
        session,
        FunctionHistory,
        function=function,
        commit=commit,
        node=node,
        first_line=node.first_line,
        # we need the minus 1 because when we complete the commit the
        # 'assert False' line will have been removed
        last_line=node.last_line - 1,
    )

    logger.info('There is a new function history: {}'.format(function_history))

    added_line = function_history.prepend_statement('assert False')

    if library_is_testable(repository):
        # create a new diff from this one change
        diffs = create_diffs(repository,
                             commit=commit,
                             function=function,
                             is_synthetic=True,
                             allow_empty=False)

        # There should always be only one diff created from altering one
        # function
        assert len(diffs) == 1

        diff = diffs[0]
        logger.info('Created diff: {}'.format(diff))

        # go back to a clean repository
        revert_diff(diff)

    else:
        # remove the addition from the source code
        function_history.remove_line(added_line)

    return node
Exemple #6
0
def create_diffs(repository: Repository,
                 commit: Commit = None,
                 is_synthetic=False,
                 function: Function = None,
                 allow_empty=True,
                 only_unstaged=False) -> DiffList:
    '''
    Returns a list of diffs from a repository
    '''
    session = Session.object_session(repository)
    if not commit:
        commit = get_most_recent_commit(repository)

    diffs = []

    # the patches should be split on a per function basis
    patches = get_diff_patches(commit, only_unstaged=only_unstaged)

    if not allow_empty and not patches:
        import pdb
        pdb.set_trace()
        logger.error('No diffs discovered when allow_no_diffs == False')
        get_diff_patches(commit)

    for patch in patches:
        diff_function = function
        file_path = patch.header.new_path
        first_line, last_line = get_range_of_patch(patch)

        if not diff_function:
            function_history = commit.get_corresponding_function(
                file_path=file_path,
                start_range=first_line,
                end_range=last_line,
            )
            diff_function = function_history.function if function_history else None

        diff = create(session,
                      Diff,
                      commit=commit,
                      patch=patch.text,
                      function=diff_function,
                      file_path=file_path,
                      is_synthetic=is_synthetic,
                      first_line=first_line,
                      last_line=last_line)

        diffs.append(diff)

    return diffs
Exemple #7
0
    def set_commit(self, commit):
        '''
        Whenever we update the commit for the Environment, we need to set
        multiple different variables
        '''
        logger.info('Setting commit to {}'.format(commit))

        # If we are in synthetic training then show what the actual test
        # failures are at the beginning.
        if self.synthetic_training and False:
            logger.info('{commit} test failures: {test_failures}'.format(
                commit=commit, test_failures=commit.test_failures))

        self.session = Session.object_session(commit)
        self.commit = commit
        self.state = commit_to_state(
            commit, synthetic_training=self.synthetic_training)

        # All tests are linked with a corresponding TestRun instance.  When
        # running the tests against this state
        if not self.synthetic_training:
            self.test_run = create(self.session, TestRun, commit=self.commit)

        # list of available tests, sorted in order of their id.  It is sorted
        # by the Test.id
        self.all_tests = get_list_of_tests(self.commit)
        self.all_tests.sort(key=lambda test: test.id, reverse=False)

        # total number of rewards accumulated for this change
        self.total_rewards = 0

        # total number of newly passing or newly failing tests discovered for
        # this change
        self.num_newly_changed_results_found = 0

        # list of the tests that have already been ran for this commit
        self.tests_ran = []
        logger.info('Set commit')
Exemple #8
0
def _save_altered_file_function_history(commit: Commit,
                                        previous_commit: Commit,
                                        altered_file_paths: List[str],
                                        altered_function_nodes, patches):
    '''
    Saves the function history for altered files

    If the file was altered, then we need to be extremely careful about how
    we track function history.  Currently follows the following algorithm:
     1) Create a dictionary where the key is the function name of the values
        is a dictionary which has two keys: PREVIOUS_HISTORY and CURRENT_NODE.
        They map to a list of the matching function histories and nodes.
     2) If there are the same number of old function and new node, then they are
        matched and removed from the dictionary.
     3) If there is more than one function/new node pairing for a single key,
        then they will be matched in order of first_line.
     4) Remaining nodes will look into the patches to see if they were renamed.
        Those are then removed from the dictionary.
     5) Remaining nodes are treated as newly created functions.
     6) Remaining old functions are treated as deleted nodes.
    '''
    session = Session.object_session(commit)

    for altered_file in altered_file_paths:
        # get the function nodes present in the specified file
        file_current_function_nodes = [
            function_node for function_node in altered_function_nodes
            if function_node.file_path == altered_file
        ]

        # get the previous commit's version of the file's function histories
        # in order by their first line
        file_previous_function_histories = [
            function_history
            for function_history in previous_commit.function_histories
            if function_history.function.file_path == altered_file
        ]

        # combine the file's current nodes and previous histories into a
        # dictionary with key being their names
        function_name_map = {}
        for func in (file_current_function_nodes +
                     file_previous_function_histories):
            func_type = (PREVIOUS_HISTORY if isinstance(func, FunctionHistory)
                         else CURRENT_NODE)
            if function_name_map.get(func.name):
                function_name_map[func.name][func_type].append(func)
            else:
                function_name_map[func.name] = defaultdict(list)
                function_name_map[func.name][func_type].append(func)

        for func_name, corresponding_functions in function_name_map.items():
            previous_histories = corresponding_functions[PREVIOUS_HISTORY]
            current_nodes = corresponding_functions[CURRENT_NODE]

            matched_pairs, unmatched_nodes = _match_nodes_with_history(
                previous_histories, current_nodes)

            for node, previous_history in matched_pairs:
                function_history = create(session,
                                          FunctionHistory,
                                          function=previous_history.function,
                                          commit=commit,
                                          node=node,
                                          first_line=node.first_line,
                                          last_line=node.last_line)

                # logger.info('Created altered function history: {}'
                #             .format(function_history))

            # convert all unmatched nodes into new functions
            create_new_functions_from_nodes(commit, unmatched_nodes)
Exemple #9
0
def save_function_histories(repository: Repository, commit: Commit,
                            function_nodes, patches):
    '''
    Stores the function histories
    '''
    session = Session.object_session(repository)

    previous_commit = get_previous_commit(commit)

    if not previous_commit:
        logger.info('No previous commit, creating new function nodes')
        create_new_functions_from_nodes(commit, function_nodes)
        return

    # from the patches, determine which files were altered
    altered_files = list(set([patch.header.new_path for patch in patches]))
    logger.info('Altered files: {}'.format(altered_files))

    altered_function_nodes = []
    unaltered_function_nodes = []
    for function_node in function_nodes:
        if function_node.file_path in altered_files:
            altered_function_nodes.append(function_node)
        else:
            unaltered_function_nodes.append(function_node)

    # If the file was not altered, then we can simply find the previous
    # function history and recreate it for this commit without even finding
    # the appropriate function
    for function_node in unaltered_function_nodes:
        try:
            previous_function_history = (session.query(FunctionHistory).join(
                FunctionHistory.function
            ).filter(FunctionHistory.commit_id == previous_commit.id).filter(
                Function.name == function_node.name).filter(
                    Function.file_path == function_node.file_path).filter(
                        FunctionHistory.first_line == function_node.lineno)
                                         ).one()
        except NoResultFound:
            close_no_cigar = (session.query(FunctionHistory).join(
                FunctionHistory.function).filter(
                    FunctionHistory.commit_id == previous_commit.id).filter(
                        Function.name == function_node.name).filter(
                            Function.file_path == function_node.file_path)
                              ).all()
            import pdb
            pdb.set_trace()
            logger.error('Unable to find previous function history for node {}'
                         'which was in an unaltered file')

        function_history = create(
            session,
            FunctionHistory,
            function=previous_function_history.function,
            commit=commit,
            node=function_node,
            first_line=previous_function_history.first_line,
            last_line=previous_function_history.last_line)

        # logger.info('Created unaltered function history: {}'
        #             .format(function_history))

    # If the file was altered, then we need to be extremely careful about how
    # we track function history.
    _save_altered_file_function_history(commit, previous_commit, altered_files,
                                        altered_function_nodes, patches)
Exemple #10
0
def create_results_from_junit_xml(output_file: str, repository: Repository,
                                  test_run: TestRun):
    '''
    Gets results from a JUnitXML format file
    https://docs.pytest.org/en/latest/usage.html#creating-junitxml-format-files
    '''
    logger.info('Reading info from {}'.format(output_file))
    try:
        session = Session.object_session(test_run)
        xml_output = JUnitXml.fromfile(output_file)

        test_names = []

        expected_new = False

        for test_case in xml_output:
            # There can seemingly be duplicate test outputs for a test if both
            # the test and the test's teardown step both fail.  So we will ignore
            # the second test output
            unique_id = '{}/{}/{}'.format(
                test_case.name, test_case._elem.attrib.get('file'),
                test_case._elem.attrib.get('classname'))
            if unique_id in test_names:
                logger.error(
                    'There was a duplicate test output for test: {}'.format(
                        test_case.name))
                continue

            test_names.append(unique_id)

            test, is_new = get_or_create(
                session,
                Test,
                repository=repository,
                name=test_case.name,
                file=test_case._elem.attrib.get('file'),
                classname=test_case._elem.attrib.get('classname'),
            )

            if is_new and not expected_new:
                import pdb
                pdb.set_trace()
                logger.error('Did you expect to create a new test?')
                expected_new = True

            status = TEST_OUTPUT_FAILURE if test_case.result else TEST_OUTPUT_SUCCESS

            # if the test is skipped, do not keep it
            if hasattr(test_case, 'result') and hasattr(
                    test_case.result, 'type'):
                if test_case.result.type == 'pytest.skip':
                    status = TEST_OUTPUT_SKIPPED

            create(
                session,
                TestResult,
                test=test,
                test_run=test_run,
                status=status,
                time=test_case.time,
            )

    except Exception as e:
        import pdb
        pdb.set_trace()
        logger.info('Hit error when reading from junit xml: {}'.format(e))