def synthetic_blame(commit: Commit, test_run: TestRun): ''' Given a synthetic commit, it will create blames for the commit based on the blames of the sub-combinations of the diffs ''' if not test_run.failed_tests: logger.info( 'No failing tests for commit {}, nothing to blame'.format(commit)) return logger.info('Setting blame for commit: {}'.format(commit)) session = Session.object_session(commit) # Get the list of commits that are made up of the subset of diffs in this # commit children_commits = get_synthetic_children_commits(commit) for failed_test_result in test_run.failed_tests: # get all the blames for this test failure that were new at the time. # The newness attribute should remove duplicates. # All of these blames will now be combined for a new blame for this # test failure. children_test_failure_blames = [] for child_commit in children_commits: if child_commit.has_same_test_result_output( failed_test_result, status=TEST_OUTPUT_FAILURE): child_test_failure = ( child_commit.get_matching_test_result(failed_test_result)) for blame in child_test_failure.blames: children_test_failure_blames.append(blame) if children_test_failure_blames: faulty_diffs = list( set([blame.diff for blame in children_test_failure_blames])) for faulty_diff in faulty_diffs: logger.info('Assigning blame using child commit {} and diff ' '{} for test failure: {}'.format( child_commit, faulty_diff, failed_test_result)) create(session, Blame, diff=faulty_diff, test_result=failed_test_result) else: # We have created a completely new blame from this combination of # diffs in comparison from its children for diff in commit.diffs: blame = create(session, Blame, diff=diff, test_result=failed_test_result) logger.info('Assigning new blame for commit {} blame {}' 'for test failure: {}'.format( commit, blame, failed_test_result)) logger.info('Completed blaming for {}'.format(commit)) session.commit()
def run_all_tests(commit: Commit): ''' Runs a repository's tests and records the results ''' logger.info('Running the tests against commit: {}'.format(commit)) start_timestamp = time.time() date = datetime.utcfromtimestamp(start_timestamp).strftime( '%Y-%m-%d %H:%M:%S') logger.info('Testing {repo_name} at commit "{commit_id}" at {date} ' 'with command:\n{command}'.format( repo_name=commit.repository.name, commit_id=commit.commit_id, command=commit.repository.test_commands, date=date)) test_run = create(Session.object_session(commit), TestRun, commit=commit, start_timestamp=start_timestamp) # run all of the tests test_command = commit.repository.test_commands _run_tests(test_run, test_command) return test_run
def _initialize_repository(session, src_path: str, initialize_commands: str = None, test_commands: str = None, src_directory: str = None, commit_only=False, ignored_files=''): ''' Given a src_path to a repository, create the repository in the database ''' logger.info('Initializing repository at "{}"'.format(src_path)) url = get_repository_url_from_path(src_path) repository = get(session, Repository, url=url) name = get_repository_name_from_url(url) logger.info('Repository name is "{}" with url "{}"'.format(name, url)) if not initialize_commands: msg = ('Input the commands to intialize the repo (ex. ' '"source env/bin/activate"): ') initialize_commands = input(msg) if not test_commands: msg = ('Input the command to run the tests for the repo: ') test_commands = input(msg) if not src_directory: msg = ('Input the source directory for your project: ') src_directory = input(msg) # first check to see if the repository already exists repository = get(session, Repository, url=url) if not repository: repository = create(session, Repository, name=name, url=url, initialize_commands=initialize_commands, test_commands=test_commands, src_directory=src_directory, src_path=src_path, ignored_files=ignored_files) _check_repo_is_clean(repository, path=repository.original_path) # create the mirror repository that BugBuddy primarily works on sync_mirror_repo(repository) # make sure the mirrored repo is on bug_buddy branch set_bug_buddy_branch(repository) # Initialize the repository by recording functions and creating synthetic # diffs if not commit_only: snapshot(repository, allow_empty=True) session.commit() logger.info( 'Your repository "{}" has been successfully initialized!'.format( repository)) return repository
def create_new_functions_from_nodes(commit: Commit, function_nodes): ''' Given a list of function nodes, it will create new functions ''' session = Session.object_session(commit) for node in function_nodes: # create the function instance function = create(session, Function, name=node.name, repository=commit.repository, file_path=node.file_path) # We have a new function! function_history = create(session, FunctionHistory, function=function, commit=commit, node=node, first_line=node.first_line, last_line=node.last_line)
def create_synthetic_diff_for_node(repository: Repository, commit: Commit, node): ''' Creates the visited function and adds an 'assert False' to the node. This is used for creating synthetic 'assert False' diffs for each function. ''' session = Session.object_session(repository) previous_commit = get_previous_commit(commit) function = previous_commit.get_function_for_node(node).function # create the function history instance function_history = create( session, FunctionHistory, function=function, commit=commit, node=node, first_line=node.first_line, # we need the minus 1 because when we complete the commit the # 'assert False' line will have been removed last_line=node.last_line - 1, ) logger.info('There is a new function history: {}'.format(function_history)) added_line = function_history.prepend_statement('assert False') if library_is_testable(repository): # create a new diff from this one change diffs = create_diffs(repository, commit=commit, function=function, is_synthetic=True, allow_empty=False) # There should always be only one diff created from altering one # function assert len(diffs) == 1 diff = diffs[0] logger.info('Created diff: {}'.format(diff)) # go back to a clean repository revert_diff(diff) else: # remove the addition from the source code function_history.remove_line(added_line) return node
def create_diffs(repository: Repository, commit: Commit = None, is_synthetic=False, function: Function = None, allow_empty=True, only_unstaged=False) -> DiffList: ''' Returns a list of diffs from a repository ''' session = Session.object_session(repository) if not commit: commit = get_most_recent_commit(repository) diffs = [] # the patches should be split on a per function basis patches = get_diff_patches(commit, only_unstaged=only_unstaged) if not allow_empty and not patches: import pdb pdb.set_trace() logger.error('No diffs discovered when allow_no_diffs == False') get_diff_patches(commit) for patch in patches: diff_function = function file_path = patch.header.new_path first_line, last_line = get_range_of_patch(patch) if not diff_function: function_history = commit.get_corresponding_function( file_path=file_path, start_range=first_line, end_range=last_line, ) diff_function = function_history.function if function_history else None diff = create(session, Diff, commit=commit, patch=patch.text, function=diff_function, file_path=file_path, is_synthetic=is_synthetic, first_line=first_line, last_line=last_line) diffs.append(diff) return diffs
def set_commit(self, commit): ''' Whenever we update the commit for the Environment, we need to set multiple different variables ''' logger.info('Setting commit to {}'.format(commit)) # If we are in synthetic training then show what the actual test # failures are at the beginning. if self.synthetic_training and False: logger.info('{commit} test failures: {test_failures}'.format( commit=commit, test_failures=commit.test_failures)) self.session = Session.object_session(commit) self.commit = commit self.state = commit_to_state( commit, synthetic_training=self.synthetic_training) # All tests are linked with a corresponding TestRun instance. When # running the tests against this state if not self.synthetic_training: self.test_run = create(self.session, TestRun, commit=self.commit) # list of available tests, sorted in order of their id. It is sorted # by the Test.id self.all_tests = get_list_of_tests(self.commit) self.all_tests.sort(key=lambda test: test.id, reverse=False) # total number of rewards accumulated for this change self.total_rewards = 0 # total number of newly passing or newly failing tests discovered for # this change self.num_newly_changed_results_found = 0 # list of the tests that have already been ran for this commit self.tests_ran = [] logger.info('Set commit')
def _save_altered_file_function_history(commit: Commit, previous_commit: Commit, altered_file_paths: List[str], altered_function_nodes, patches): ''' Saves the function history for altered files If the file was altered, then we need to be extremely careful about how we track function history. Currently follows the following algorithm: 1) Create a dictionary where the key is the function name of the values is a dictionary which has two keys: PREVIOUS_HISTORY and CURRENT_NODE. They map to a list of the matching function histories and nodes. 2) If there are the same number of old function and new node, then they are matched and removed from the dictionary. 3) If there is more than one function/new node pairing for a single key, then they will be matched in order of first_line. 4) Remaining nodes will look into the patches to see if they were renamed. Those are then removed from the dictionary. 5) Remaining nodes are treated as newly created functions. 6) Remaining old functions are treated as deleted nodes. ''' session = Session.object_session(commit) for altered_file in altered_file_paths: # get the function nodes present in the specified file file_current_function_nodes = [ function_node for function_node in altered_function_nodes if function_node.file_path == altered_file ] # get the previous commit's version of the file's function histories # in order by their first line file_previous_function_histories = [ function_history for function_history in previous_commit.function_histories if function_history.function.file_path == altered_file ] # combine the file's current nodes and previous histories into a # dictionary with key being their names function_name_map = {} for func in (file_current_function_nodes + file_previous_function_histories): func_type = (PREVIOUS_HISTORY if isinstance(func, FunctionHistory) else CURRENT_NODE) if function_name_map.get(func.name): function_name_map[func.name][func_type].append(func) else: function_name_map[func.name] = defaultdict(list) function_name_map[func.name][func_type].append(func) for func_name, corresponding_functions in function_name_map.items(): previous_histories = corresponding_functions[PREVIOUS_HISTORY] current_nodes = corresponding_functions[CURRENT_NODE] matched_pairs, unmatched_nodes = _match_nodes_with_history( previous_histories, current_nodes) for node, previous_history in matched_pairs: function_history = create(session, FunctionHistory, function=previous_history.function, commit=commit, node=node, first_line=node.first_line, last_line=node.last_line) # logger.info('Created altered function history: {}' # .format(function_history)) # convert all unmatched nodes into new functions create_new_functions_from_nodes(commit, unmatched_nodes)
def save_function_histories(repository: Repository, commit: Commit, function_nodes, patches): ''' Stores the function histories ''' session = Session.object_session(repository) previous_commit = get_previous_commit(commit) if not previous_commit: logger.info('No previous commit, creating new function nodes') create_new_functions_from_nodes(commit, function_nodes) return # from the patches, determine which files were altered altered_files = list(set([patch.header.new_path for patch in patches])) logger.info('Altered files: {}'.format(altered_files)) altered_function_nodes = [] unaltered_function_nodes = [] for function_node in function_nodes: if function_node.file_path in altered_files: altered_function_nodes.append(function_node) else: unaltered_function_nodes.append(function_node) # If the file was not altered, then we can simply find the previous # function history and recreate it for this commit without even finding # the appropriate function for function_node in unaltered_function_nodes: try: previous_function_history = (session.query(FunctionHistory).join( FunctionHistory.function ).filter(FunctionHistory.commit_id == previous_commit.id).filter( Function.name == function_node.name).filter( Function.file_path == function_node.file_path).filter( FunctionHistory.first_line == function_node.lineno) ).one() except NoResultFound: close_no_cigar = (session.query(FunctionHistory).join( FunctionHistory.function).filter( FunctionHistory.commit_id == previous_commit.id).filter( Function.name == function_node.name).filter( Function.file_path == function_node.file_path) ).all() import pdb pdb.set_trace() logger.error('Unable to find previous function history for node {}' 'which was in an unaltered file') function_history = create( session, FunctionHistory, function=previous_function_history.function, commit=commit, node=function_node, first_line=previous_function_history.first_line, last_line=previous_function_history.last_line) # logger.info('Created unaltered function history: {}' # .format(function_history)) # If the file was altered, then we need to be extremely careful about how # we track function history. _save_altered_file_function_history(commit, previous_commit, altered_files, altered_function_nodes, patches)
def create_results_from_junit_xml(output_file: str, repository: Repository, test_run: TestRun): ''' Gets results from a JUnitXML format file https://docs.pytest.org/en/latest/usage.html#creating-junitxml-format-files ''' logger.info('Reading info from {}'.format(output_file)) try: session = Session.object_session(test_run) xml_output = JUnitXml.fromfile(output_file) test_names = [] expected_new = False for test_case in xml_output: # There can seemingly be duplicate test outputs for a test if both # the test and the test's teardown step both fail. So we will ignore # the second test output unique_id = '{}/{}/{}'.format( test_case.name, test_case._elem.attrib.get('file'), test_case._elem.attrib.get('classname')) if unique_id in test_names: logger.error( 'There was a duplicate test output for test: {}'.format( test_case.name)) continue test_names.append(unique_id) test, is_new = get_or_create( session, Test, repository=repository, name=test_case.name, file=test_case._elem.attrib.get('file'), classname=test_case._elem.attrib.get('classname'), ) if is_new and not expected_new: import pdb pdb.set_trace() logger.error('Did you expect to create a new test?') expected_new = True status = TEST_OUTPUT_FAILURE if test_case.result else TEST_OUTPUT_SUCCESS # if the test is skipped, do not keep it if hasattr(test_case, 'result') and hasattr( test_case.result, 'type'): if test_case.result.type == 'pytest.skip': status = TEST_OUTPUT_SKIPPED create( session, TestResult, test=test, test_run=test_run, status=status, time=test_case.time, ) except Exception as e: import pdb pdb.set_trace() logger.info('Hit error when reading from junit xml: {}'.format(e))