def get_validation_data(repository: Repository): ''' Returns the repository's data to be ready for training ''' commits = [] i = 0 logger.info('Retrieving training data') for commit in repository.commits: if i > 30: logger.info('Got 30 commits') break if (commit.commit_type == SYNTHETIC_CHANGE and commit.test_runs): commits.append(commit) i += 1 random.shuffle(commits) validation_features = [] validation_labels = [] for commit in commits: feature = commit_to_state(commit) set_functions_altered_noise(feature) set_tests_not_run_noise(feature) label = commit_to_test_failure_label(commit) validation_features.append(feature) validation_labels.append(label) return (commits, numpy.stack(validation_features), numpy.stack(validation_labels))
def run_test(test_run: TestRun, test: Test) -> TestResult: ''' Runs a specific test and returns the TestResult ''' command = ('python -m pytest -vs {file}::{test_name}'.format( file=test.file, test_name=test.name)) logger.info('Running individual test {} with command: "{}"'.format( test, command)) _run_tests(test_run, command) # return the test output test_results = [ test_result for test_result in test_run.test_results if test_result.test.id == test.id ] if not test_results: import pdb pdb.set_trace(0) # if there are more than one, then return the one with the latest id test_results.sort(key=lambda test: test.id, reverse=False) return test_results[-1]
def get_range_of_patch(patch): ''' Given a whatthepatch patch, it will return the start and end range of the patch. We consider the start of the patch not to be the first line necessarily, but the first line that is changed. ''' start_range = None end_range = None for original_line, new_line, change in patch.changes: if original_line is None or new_line is None: if not start_range: start_range = new_line or original_line if not end_range or (new_line or -1) > end_range: end_range = new_line or original_line if start_range is None or end_range is None: import pdb pdb.set_trace() logger.error('Failed to get start_range or end_range') logger.info('{}-{}'.format(start_range, end_range)) # if end_range - start_range > 25 or (start_range == 0 and end_range == 132): # import pdb; pdb.set_trace() # logger.error('What in tarnation') return start_range, end_range
def snapshot_commit(repository: Repository, commit: Commit, skip_diffs=False): ''' Given a repository and commit, store the necessary data such as the Functions, FunctionHistory, and Diff instances. It will do this in the following order: - Get the diffs - Get the functions - Store new functions - Create new FunctionHistory for all functions that exist - Store the diffs with their corresponding FunctionHistory ''' logger.info('Snapshotting commit {}'.format(commit)) # retrieve the AST function nodes from the repository function_nodes = get_function_nodes_from_repo(repository) # retrieve the patches from the repository in the form of whatthepatch # patch objects patches = get_diff_patches(commit) # create FunctionHistory instances for each Function save_function_histories(repository, commit, function_nodes, patches) if not skip_diffs: # create Diff instances diffs = create_diffs(repository, commit) # save the diffs save_diffs(repository, commit, diffs) return commit
def run_all_tests(commit: Commit): ''' Runs a repository's tests and records the results ''' logger.info('Running the tests against commit: {}'.format(commit)) start_timestamp = time.time() date = datetime.utcfromtimestamp(start_timestamp).strftime( '%Y-%m-%d %H:%M:%S') logger.info('Testing {repo_name} at commit "{commit_id}" at {date} ' 'with command:\n{command}'.format( repo_name=commit.repository.name, commit_id=commit.commit_id, command=commit.repository.test_commands, date=date)) test_run = create(Session.object_session(commit), TestRun, commit=commit, start_timestamp=start_timestamp) # run all of the tests test_command = commit.repository.test_commands _run_tests(test_run, test_command) return test_run
def watch(repository: Repository, commit_only: bool): ''' Watches the repository's filesystem for changes and records the changes. It also notifies the user when there is an update in the test output. ''' session = Session.object_session(repository) set_bug_buddy_branch(repository) logger.info('Starting BugBuddy thingy') commits = get_commits(repository, num_commits=5, synthetic=True) for commit in commits: # commit = get(session, Commit, id=1809) # commit = get(session, Commit, id=2748) go_to_commit(repository, commit, force=True) # run_all_tests(commit) for test_failure in commit.failed_test_results: predict_blame(test_failure) session.commit() # import pdb; pdb.set_trace() # commit.summary(blame=False) commit.summary()
def synthetic_blame(commit: Commit, test_run: TestRun): ''' Given a synthetic commit, it will create blames for the commit based on the blames of the sub-combinations of the diffs ''' if not test_run.failed_tests: logger.info( 'No failing tests for commit {}, nothing to blame'.format(commit)) return logger.info('Setting blame for commit: {}'.format(commit)) session = Session.object_session(commit) # Get the list of commits that are made up of the subset of diffs in this # commit children_commits = get_synthetic_children_commits(commit) for failed_test_result in test_run.failed_tests: # get all the blames for this test failure that were new at the time. # The newness attribute should remove duplicates. # All of these blames will now be combined for a new blame for this # test failure. children_test_failure_blames = [] for child_commit in children_commits: if child_commit.has_same_test_result_output( failed_test_result, status=TEST_OUTPUT_FAILURE): child_test_failure = ( child_commit.get_matching_test_result(failed_test_result)) for blame in child_test_failure.blames: children_test_failure_blames.append(blame) if children_test_failure_blames: faulty_diffs = list( set([blame.diff for blame in children_test_failure_blames])) for faulty_diff in faulty_diffs: logger.info('Assigning blame using child commit {} and diff ' '{} for test failure: {}'.format( child_commit, faulty_diff, failed_test_result)) create(session, Blame, diff=faulty_diff, test_result=failed_test_result) else: # We have created a completely new blame from this combination of # diffs in comparison from its children for diff in commit.diffs: blame = create(session, Blame, diff=diff, test_result=failed_test_result) logger.info('Assigning new blame for commit {} blame {}' 'for test failure: {}'.format( commit, blame, failed_test_result)) logger.info('Completed blaming for {}'.format(commit)) session.commit()
def train_command(src_path: str): ''' Trains a neural network on the available data ''' with session_manager() as session: repository = _get_repository_from_src_path(session, src_path) logger.info('Training repository: "{}"'.format(repository)) train(repository)
def do_command(src_path: str): ''' dos a neural network on the available data ''' with session_manager() as session: repository = _get_repository_from_src_path(session, src_path) logger.info('Doing stuff to repository: "{}"'.format(repository)) cache_test_results(repository)
def _migrate_repository_to_new_path(session, repository, new_path): ''' Migrating repository to new path. This is necessary when the database has one path stored for a repo but you're on a different machine. ''' logger.info('Upating path of repository in database from {} to {}'.format( repository.original_path, new_path)) repository.original_path = new_path sync_mirror_repo(repository) session.commit()
def remove_line(self, line): ''' Removes a particular line from the function ''' with open(self.abs_path, 'r') as f: contents = f.readlines() content = contents.pop(line - 1) logger.info('Removed line: "{}" from {}'.format( content.strip(), self.function.file_path)) with open(self.abs_path, 'w') as f: f.writelines(contents)
def synthetic_train(repository: Repository): ''' Trains the agent on synthetic data generation ''' logger.info('Training on synthetic data for: {}'.format(repository)) # logger.info('Caching commit tensors') # cache_commits(repository) logger.info('Initializing environment') brain = Brain(repository) env = ChangeEnvironment(repository, synthetic_training=True) env.reset() brain.train(env)
def create_synthetic_diff_for_node(repository: Repository, commit: Commit, node): ''' Creates the visited function and adds an 'assert False' to the node. This is used for creating synthetic 'assert False' diffs for each function. ''' session = Session.object_session(repository) previous_commit = get_previous_commit(commit) function = previous_commit.get_function_for_node(node).function # create the function history instance function_history = create( session, FunctionHistory, function=function, commit=commit, node=node, first_line=node.first_line, # we need the minus 1 because when we complete the commit the # 'assert False' line will have been removed last_line=node.last_line - 1, ) logger.info('There is a new function history: {}'.format(function_history)) added_line = function_history.prepend_statement('assert False') if library_is_testable(repository): # create a new diff from this one change diffs = create_diffs(repository, commit=commit, function=function, is_synthetic=True, allow_empty=False) # There should always be only one diff created from altering one # function assert len(diffs) == 1 diff = diffs[0] logger.info('Created diff: {}'.format(diff)) # go back to a clean repository revert_diff(diff) else: # remove the addition from the source code function_history.remove_line(added_line) return node
def generate_command(src_path: str, run_limit: int = None): ''' Entry-point for the "bugbuddy generate" command @param src_path: path to the repository ''' with session_manager() as session: repository = _get_repository_from_src_path(session, src_path) _check_repo_is_clean(repository) db_and_git_match(repository) logger.info('Creating synthetic results for: {}'.format(repository)) generate_synthetic_test_results(repository, run_limit)
def prepend_statement(self, statement, offset: int = 0): ''' Writes a statement to the beginning of the function ''' def _is_comment(node): ''' Checks to see if the node is a comment. We need to because we do not want to add our statement into the comment. For some reason, comments lineno is the last part of the comment. ''' return (True if hasattr(node, 'value') and isinstance(node.value, ast.Str) else False) # Get the first node in the function, which is it's first statement. # We will add the statement here first_node = self.node.body[0] first_line_in_function = first_node.lineno # scoot down one function if the first node is a comment first_line_in_function += 1 if _is_comment(first_node) else 0 first_line_in_function += offset # note that a comment after the function does not seem to have a # column offset, and instead returns -1. column_offset = (first_node.col_offset if first_node.col_offset != -1 else self.node.col_offset + 4) indentation = ' ' * column_offset indented_statement = indentation + statement + '\n' with open(self.abs_path, 'r') as f: contents = f.readlines() contents.insert(first_line_in_function - 1, indented_statement) with open(self.abs_path, 'w') as f: f.writelines(contents) logger.info( 'Added "{statement}" to {file} | {function_name}@{lineno}'.format( statement=statement, file=self.function.file_path, function_name=self.node.name, lineno=first_line_in_function)) return first_line_in_function
def yield_blame_set(synthetic_diffs: DiffList): ''' Returns a set of diffs. 1) Returns each synthetic diff of a repository one by one 2) Once all diffs have been returned individually, it will then returns a set of 4 diffs that were randomly chosen. ''' while True: diff_set = [] for i in range(4): diff_set.append(synthetic_diffs[random.randint( 0, len(synthetic_diffs) - 1)]) logger.info('Yielding diff set: {}'.format(diff_set)) # remove duplicates if they exist yield list(set(diff_set))
def _watch(repository: Repository, commit_only: bool): ''' Watches the repository's filesystem for changes and records the changes. It also notifies the user when there is an update in the test output. ''' set_bug_buddy_branch(repository) logger.info('Starting BugBuddy watcher') event_handler = ChangeWatchdog(repository, commit_only) observer = Observer() observer.schedule(event_handler, repository.original_path, recursive=True) observer.start() try: while True: time.sleep(1) except KeyboardInterrupt: observer.stop() logger.info('Shutting down BugBuddy watcher') observer.join()
def _get_repository_from_src_path(session, src_path: str): ''' Returns the repository given a src_path ''' url = get_repository_url_from_path(src_path) repository = get(session, Repository, url=url) if not repository: msg = ('This repository is not in the BudBuddy database, would you ' 'like to initialize the repository? (y/n)\n'.format(src_path)) should_initialize = input(msg) if is_affirmative(should_initialize): repository = _initialize_repository(session, src_path) else: logger.info('No worries!') if src_path != repository.original_path: _migrate_repository_to_new_path(session, repository, src_path) return repository
def delete_command(src_path: str): ''' Entry-point for the "bugbuddy generate" command @param src_path: path to the repository ''' url = get_repository_url_from_path(src_path) with session_manager() as session: repository = get(session, Repository, url=url) # make sure you cannot delete the bug_buddy branch if repository.name not in ['bug_buddy', 'BugBuddy']: msg = ('Would you like to delete the bug_buddy branch for {}?\n' '(y/n)\n'.format(repository)) should_delete = input(msg) if is_affirmative(should_delete): logger.info('Deleting bug_buddy branch') delete_bug_buddy_branch(repository or Mock(src_path=src_path)) if repository: logger.info('Deleting data from the database') delete(session, repository) else: logger.info('No matching repo found in the database')
def _initialize_repository(session, src_path: str, initialize_commands: str = None, test_commands: str = None, src_directory: str = None, commit_only=False, ignored_files=''): ''' Given a src_path to a repository, create the repository in the database ''' logger.info('Initializing repository at "{}"'.format(src_path)) url = get_repository_url_from_path(src_path) repository = get(session, Repository, url=url) name = get_repository_name_from_url(url) logger.info('Repository name is "{}" with url "{}"'.format(name, url)) if not initialize_commands: msg = ('Input the commands to intialize the repo (ex. ' '"source env/bin/activate"): ') initialize_commands = input(msg) if not test_commands: msg = ('Input the command to run the tests for the repo: ') test_commands = input(msg) if not src_directory: msg = ('Input the source directory for your project: ') src_directory = input(msg) # first check to see if the repository already exists repository = get(session, Repository, url=url) if not repository: repository = create(session, Repository, name=name, url=url, initialize_commands=initialize_commands, test_commands=test_commands, src_directory=src_directory, src_path=src_path, ignored_files=ignored_files) _check_repo_is_clean(repository, path=repository.original_path) # create the mirror repository that BugBuddy primarily works on sync_mirror_repo(repository) # make sure the mirrored repo is on bug_buddy branch set_bug_buddy_branch(repository) # Initialize the repository by recording functions and creating synthetic # diffs if not commit_only: snapshot(repository, allow_empty=True) session.commit() logger.info( 'Your repository "{}" has been successfully initialized!'.format( repository)) return repository
def sync_mirror_repo(repository: Repository): ''' Updates the mirror repository to match the code base the developer is working on ''' # skip the .git directory, otherwise you are overwritting the commits in # the mirror repository if not os.path.exists(MIRROR_ROOT): os.makedirs(MIRROR_ROOT) if not os.path.exists(repository.mirror_path): logger.info('Initializing mirror repository') clone_repository(repository, repository.mirror_path) command = ('rsync -a {source}/ {destination} --exclude ".git"'.format( source=repository.original_path, destination=repository.mirror_path)) run_cmd(repository, command, log=False) # remove the pyc files so that we do not run into any import errors when # trying to run the testing commands: # http://wisercoder.com/importmismatcherror-python-fix/ clean_command = 'find . -name \*.pyc -delete' run_cmd(repository, clean_command, log=False)
def on_any_event(self, event): ''' Catches all events ''' if '/.' in event.src_path: return updated_file = os.path.relpath(event.src_path, self.repository.original_path) if (not updated_file or updated_file in self.repository.ignored_files or not updated_file.endswith('.py')): return # we have to recreate the repository in this thread for Sqlite with session_manager() as session: repository = get(session, Repository, id=self.repository.id) # logger.info('Syncing updates') # Copy the change over to the mirror repository sync_mirror_repo(repository) if not is_repo_clean(self.repository): logger.info('Valid change event: {}'.format(event)) # make sure the repository is on the bug_buddy branch start = time.time() commit = snapshot(repository, commit_only=self.commit_only) total_time = time.time() - start logger.info( 'Completed snapshot of {commit} in {m}m {s}s'.format( commit=commit, m=total_time / 60, s=total_time % 60)) session.commit() if not self.commit_only: run_all_tests(commit) for test_failure in commit.failed_test_results: predict_blame(test_failure) # display the results in the cli output # self.score_card.render(commit) commit.summary() else: logger.info('Nothing was changed')
def set_commit(self, commit): ''' Whenever we update the commit for the Environment, we need to set multiple different variables ''' logger.info('Setting commit to {}'.format(commit)) # If we are in synthetic training then show what the actual test # failures are at the beginning. if self.synthetic_training and False: logger.info('{commit} test failures: {test_failures}'.format( commit=commit, test_failures=commit.test_failures)) self.session = Session.object_session(commit) self.commit = commit self.state = commit_to_state( commit, synthetic_training=self.synthetic_training) # All tests are linked with a corresponding TestRun instance. When # running the tests against this state if not self.synthetic_training: self.test_run = create(self.session, TestRun, commit=self.commit) # list of available tests, sorted in order of their id. It is sorted # by the Test.id self.all_tests = get_list_of_tests(self.commit) self.all_tests.sort(key=lambda test: test.id, reverse=False) # total number of rewards accumulated for this change self.total_rewards = 0 # total number of newly passing or newly failing tests discovered for # this change self.num_newly_changed_results_found = 0 # list of the tests that have already been ran for this commit self.tests_ran = [] logger.info('Set commit')
def generate_synthetic_test_results(repository: Repository, run_limit: int): ''' Creates multiple synthetic changes and test results ''' session = Session.object_session(repository) synthetic_diffs = repository.get_synthetic_diffs() if not synthetic_diffs: # create the synthetic diffs create_synthetic_alterations(repository) logger.info('You have created the base synthetic commits. Congrats!') session.commit() synthetic_diffs = repository.get_synthetic_diffs() num_runs = 0 for diff_set in yield_blame_set(synthetic_diffs): logger.info('On diff set: {}'.format(diff_set)) for diff_subset in powerset(diff_set): logger.info('On run #{} with: {}'.format(num_runs, diff_subset)) db_and_git_match(repository) try: # see if we already have a commit and test run for the diff set. # if we do, continue logger.debug('1: {}'.format(time.time())) commit = get_matching_commit_for_diffs(repository, diff_subset) # if the commit does not already exist for this set, then we # need to create it and run tests against it if not commit: # revert back to a clean repository reset_commit = create_reset_commit(repository) if reset_commit: logger.info('Storing reset commit') snapshot_commit(repository, reset_commit) git_push(repository) session.commit() db_and_git_match(repository) # create a commit. Only allow an empty commit if there # nothing in the diff commit = create_commit(repository, name=SYNTHETIC_CHANGE, commit_type=SYNTHETIC_CHANGE, allow_empty=True) logger.info('Applying diffs') # apply the synthetic diffs to the mirrored repository apply_synthetic_diffs(commit, diff_subset) # store the rest of the commit data. No need to recreate # the diffs since they have already been stored in # apply_synthetic_diffs logger.info('Snapshotting the commit: {}'.format(commit)) commit = snapshot_commit(repository, commit, skip_diffs=True) # add the commit hash id for its synthetic diffs logger.info('Creating synthetic diff hash') if not commit.synthetic_diff_hash: base_synthetic_ids = [diff.id for diff in diff_subset] commit.synthetic_diff_hash = ( get_hash_given_base_synthetic_ids(base_synthetic_ids)) logger.info('Added hash_ids #{} to commit: {}'.format( commit.synthetic_diff_hash, commit)) logger.info('Running tests') if not commit.test_runs: # run all tests against the synthetic change run_all_tests(commit) logger.debug('2: {}'.format(time.time())) if commit.needs_blaming(): synthetic_blame(commit, commit.test_runs[0]) logger.debug('3: {}'.format(time.time())) session.commit() # push newly created commit git_push(repository) logger.info('Completed run #{}'.format(num_runs)) num_runs += 1 if run_limit and num_runs >= run_limit: logger.info('Completed all #{} runs! '.format(num_runs)) exit() except Exception as e: # revert all the local edits logger.error('Hit the following exception: {}'.format(e)) logger.error('Reverting local changes') revert_to_master(repository) raise e
def step(self, action): ''' Run one timestep of the environment's dynamics. Accepts an action and returns a tuple (new_state, reward, done, info). # Arguments action (object): Agent chosen by the agent. For example, the action could be to run a test to see if it has newly failed or newly passed status # Returns new_state (object): the new state of the current environment after the action was taken reward (float) : Amount of reward returned after previous action. done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results. info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). ''' # the action is the index of the test that the agent wants to run test = self.all_tests[action] # if we are in synthetic training, then we don't need to run the test. # We can simply retrieve from the database whether or not the test is # failing. if self.synthetic_training: test_result = self.get_synthetic_test_result(test) assert test_result else: raise NotImplementedError( 'You have not implemented actually running a test') # determine the reward for running that particular test reward = self.get_reward(test_result) # update the state with the action self.update_state(test_result) ######################### # BOOKKEEPING # ######################### # store that we ran this test self.tests_ran.append(test) # store the total number of rewards we have received for this commit self.total_rewards += reward # if it was a newly failing or newly passing test result, then store # that information for bookeeping. if self.is_newly_changed_result(test_result): self.num_newly_changed_results_found += 1 # we can store extra debugging information here info_dict = {} # hiding this for now if False: logger.info('Commit #{commit} | ' '{test} | ' '{previous_status} -> {new_status} | ' 'R: {reward} | ' 'T: {total} | ' '{num_tests_ran} / {total_tests}'.format( commit=self.commit.id, test=test.name, previous_status=self.get_previous_status(test), new_status=test_result.status, reward=reward, total=self.total_rewards, num_tests_ran=len(self.tests_ran), total_tests=len(self.all_tests))) if self.done: logger.info( 'Completed commit #{commit} | ' 'Total reward: {total} / {total_possible} | ' '{num_tests_ran} / {total_tests}'.format( commit=self.commit.id, total=self.total_rewards, total_possible=self.num_newly_changed_results_found, num_tests_ran=len(self.tests_ran), total_tests=len(self.all_tests))) return self.state, reward, self.done, info_dict
def commit_to_tensor(commit): ''' Converts an individual commit into a tensor with the following shape: functionA functionB -------------------------------------------------------------------------- testA [function_altered, test_status, ... blame_count] -------------------------------------------------------------------------- testB ... ... -------------------------------------------------------------------------- or another way of looking at the shape: [functionA: [testA: [functionA_altered, testA_status, blame_count], [testB: [functionA_altered, testB_status, blame_count], ... ] ''' if commit._commit_tensor_binary: # logger.info('Returning cache for {}'.format(commit)) return commit.commit_tensor logger.info('Creating cached commit tensor for {}'.format(commit)) session = Session.object_session(commit) sorted_functions = commit.repository.functions sorted_functions.sort(key=lambda func: func.id, reverse=False) sorted_tests = commit.repository.tests sorted_tests.sort(key=lambda test: test.id, reverse=False) # the current features are: # function_altered # test_status # blame_count commit_tensor = numpy.zeros((len(sorted_functions), len(sorted_tests), NUM_FEATURES)) # store the results of the tests for the commit in a dictionary for quick # lookup commit_results = {} if commit.test_runs: test_run = commit.test_runs[0] for test_result in test_run.test_results: commit_results[test_result.test.id] = ( TEST_STATUS_TO_ID_MAP[test_result.status]) logger.info('Commit Test Results: {}'.format(commit_results)) for i in range(len(sorted_functions)): function = sorted_functions[i] logger.info('On function: {}'.format(function)) function_was_altered = any([diff.commit.id == commit.id for diff in function.diffs]) blame_counts = get_blame_counts_for_function(function) for j in range(len(sorted_tests)): # Step 1 - add whether or not the function was altered for this # commit. 1 for altered, 0 otherwise. commit_tensor[i][j][FUNCTION_ALTERED_LOC] = int(function_was_altered) # Step 2 - add the status of the test. If the test is not ran # the id will be 0, which represents that the test has not been # ran yet test = sorted_tests[j] commit_tensor[i][j][TEST_STATUS_LOC] = commit_results.get( test.id, TEST_STATUS_TO_ID_MAP[TEST_OUTPUT_NOT_RUN]) # Step 3 - add the blame count, which represents how many times # the function has been blamed for the test blame_count = blame_counts.get(test.id, 0) commit_tensor[i][j][BLAME_COUNT_LOC] = blame_count commit._commit_tensor_binary = commit_tensor session.commit() return commit_tensor
def create_results_from_junit_xml(output_file: str, repository: Repository, test_run: TestRun): ''' Gets results from a JUnitXML format file https://docs.pytest.org/en/latest/usage.html#creating-junitxml-format-files ''' logger.info('Reading info from {}'.format(output_file)) try: session = Session.object_session(test_run) xml_output = JUnitXml.fromfile(output_file) test_names = [] expected_new = False for test_case in xml_output: # There can seemingly be duplicate test outputs for a test if both # the test and the test's teardown step both fail. So we will ignore # the second test output unique_id = '{}/{}/{}'.format( test_case.name, test_case._elem.attrib.get('file'), test_case._elem.attrib.get('classname')) if unique_id in test_names: logger.error( 'There was a duplicate test output for test: {}'.format( test_case.name)) continue test_names.append(unique_id) test, is_new = get_or_create( session, Test, repository=repository, name=test_case.name, file=test_case._elem.attrib.get('file'), classname=test_case._elem.attrib.get('classname'), ) if is_new and not expected_new: import pdb pdb.set_trace() logger.error('Did you expect to create a new test?') expected_new = True status = TEST_OUTPUT_FAILURE if test_case.result else TEST_OUTPUT_SUCCESS # if the test is skipped, do not keep it if hasattr(test_case, 'result') and hasattr( test_case.result, 'type'): if test_case.result.type == 'pytest.skip': status = TEST_OUTPUT_SKIPPED create( session, TestResult, test=test, test_run=test_run, status=status, time=test_case.time, ) except Exception as e: import pdb pdb.set_trace() logger.info('Hit error when reading from junit xml: {}'.format(e))
def save_function_histories(repository: Repository, commit: Commit, function_nodes, patches): ''' Stores the function histories ''' session = Session.object_session(repository) previous_commit = get_previous_commit(commit) if not previous_commit: logger.info('No previous commit, creating new function nodes') create_new_functions_from_nodes(commit, function_nodes) return # from the patches, determine which files were altered altered_files = list(set([patch.header.new_path for patch in patches])) logger.info('Altered files: {}'.format(altered_files)) altered_function_nodes = [] unaltered_function_nodes = [] for function_node in function_nodes: if function_node.file_path in altered_files: altered_function_nodes.append(function_node) else: unaltered_function_nodes.append(function_node) # If the file was not altered, then we can simply find the previous # function history and recreate it for this commit without even finding # the appropriate function for function_node in unaltered_function_nodes: try: previous_function_history = (session.query(FunctionHistory).join( FunctionHistory.function ).filter(FunctionHistory.commit_id == previous_commit.id).filter( Function.name == function_node.name).filter( Function.file_path == function_node.file_path).filter( FunctionHistory.first_line == function_node.lineno) ).one() except NoResultFound: close_no_cigar = (session.query(FunctionHistory).join( FunctionHistory.function).filter( FunctionHistory.commit_id == previous_commit.id).filter( Function.name == function_node.name).filter( Function.file_path == function_node.file_path) ).all() import pdb pdb.set_trace() logger.error('Unable to find previous function history for node {}' 'which was in an unaltered file') function_history = create( session, FunctionHistory, function=previous_function_history.function, commit=commit, node=function_node, first_line=previous_function_history.first_line, last_line=previous_function_history.last_line) # logger.info('Created unaltered function history: {}' # .format(function_history)) # If the file was altered, then we need to be extremely careful about how # we track function history. _save_altered_file_function_history(commit, previous_commit, altered_files, altered_function_nodes, patches)
def _match_patch_with_history(patch, function_histories: FunctionHistoryList): ''' Returns a modified form of the patch for the function history It would turn following patch: def a: dog = 'dog' + # added to a def b: cat = 'cat' + # added to b more_dog_stuff = 2 + # added more to a Into two patches: 1) for function history 'def a' def a: dog = 'dog' + # added to a def b: cat = 'cat' more_dog_stuff = 2 + # added more to a 2) for function history 'def b'' def a: dog = 'dog' def b: cat = 'cat' + # added to b more_dog_stuff = 2 ''' for function_history in function_histories: function_lines = list( range(function_history.first_line, function_history.last_line)) # now remove the lines that are a part of a function that is within # this function. This an inner function and changes to that function # should not relate to the patch for this function. for other_history in function_histories: if (other_history.first_line > function_history.first_line and other_history.last_line < function_history.last_line): logger.info('{} is an inner function of {}'.format( other_history, function_history)) inner_func_lines = list( range(other_history.first_line, other_history.last_line)) # remove the inner function lines from the list of lines in the # patch that maps to this function function_lines = [ line for line in function_lines if line not in inner_func_lines ] function_changes = [] for original_line, new_line, change in patch.changes: if new_line in function_lines: pass