Esempio n. 1
0
def get_validation_data(repository: Repository):
    '''
    Returns the repository's data to be ready for training
    '''
    commits = []
    i = 0
    logger.info('Retrieving training data')
    for commit in repository.commits:
        if i > 30:
            logger.info('Got 30 commits')
            break

        if (commit.commit_type == SYNTHETIC_CHANGE and commit.test_runs):
            commits.append(commit)
            i += 1

    random.shuffle(commits)

    validation_features = []
    validation_labels = []

    for commit in commits:
        feature = commit_to_state(commit)
        set_functions_altered_noise(feature)
        set_tests_not_run_noise(feature)

        label = commit_to_test_failure_label(commit)

        validation_features.append(feature)
        validation_labels.append(label)

    return (commits, numpy.stack(validation_features),
            numpy.stack(validation_labels))
Esempio n. 2
0
def run_test(test_run: TestRun, test: Test) -> TestResult:
    '''
    Runs a specific test and returns the TestResult
    '''
    command = ('python -m pytest -vs {file}::{test_name}'.format(
        file=test.file, test_name=test.name))
    logger.info('Running individual test {} with command: "{}"'.format(
        test, command))

    _run_tests(test_run, command)

    # return the test output
    test_results = [
        test_result for test_result in test_run.test_results
        if test_result.test.id == test.id
    ]

    if not test_results:
        import pdb
        pdb.set_trace(0)

    # if there are more than one, then return the one with the latest id
    test_results.sort(key=lambda test: test.id, reverse=False)

    return test_results[-1]
Esempio n. 3
0
def get_range_of_patch(patch):
    '''
    Given a whatthepatch patch, it will return the start and end range of the
    patch.  We consider the start of the patch not to be the first line
    necessarily, but the first line that is changed.
    '''
    start_range = None
    end_range = None

    for original_line, new_line, change in patch.changes:
        if original_line is None or new_line is None:
            if not start_range:
                start_range = new_line or original_line

            if not end_range or (new_line or -1) > end_range:
                end_range = new_line or original_line

    if start_range is None or end_range is None:
        import pdb
        pdb.set_trace()
        logger.error('Failed to get start_range or end_range')

    logger.info('{}-{}'.format(start_range, end_range))
    # if end_range - start_range > 25 or (start_range == 0 and end_range == 132):
    #     import pdb; pdb.set_trace()
    #     logger.error('What in tarnation')

    return start_range, end_range
Esempio n. 4
0
def snapshot_commit(repository: Repository, commit: Commit, skip_diffs=False):
    '''
    Given a repository and commit, store the necessary data such as the
    Functions, FunctionHistory, and Diff instances.

    It will do this in the following order:
        - Get the diffs
        - Get the functions
        - Store new functions
        - Create new FunctionHistory for all functions that exist
        - Store the diffs with their corresponding FunctionHistory
    '''
    logger.info('Snapshotting commit {}'.format(commit))

    # retrieve the AST function nodes from the repository
    function_nodes = get_function_nodes_from_repo(repository)

    # retrieve the patches from the repository in the form of whatthepatch
    # patch objects
    patches = get_diff_patches(commit)

    # create FunctionHistory instances for each Function
    save_function_histories(repository, commit, function_nodes, patches)

    if not skip_diffs:
        # create Diff instances
        diffs = create_diffs(repository, commit)

        # save the diffs
        save_diffs(repository, commit, diffs)

    return commit
Esempio n. 5
0
def run_all_tests(commit: Commit):
    '''
    Runs a repository's tests and records the results
    '''
    logger.info('Running the tests against commit: {}'.format(commit))

    start_timestamp = time.time()
    date = datetime.utcfromtimestamp(start_timestamp).strftime(
        '%Y-%m-%d %H:%M:%S')
    logger.info('Testing {repo_name} at commit "{commit_id}" at {date} '
                'with command:\n{command}'.format(
                    repo_name=commit.repository.name,
                    commit_id=commit.commit_id,
                    command=commit.repository.test_commands,
                    date=date))

    test_run = create(Session.object_session(commit),
                      TestRun,
                      commit=commit,
                      start_timestamp=start_timestamp)

    # run all of the tests
    test_command = commit.repository.test_commands
    _run_tests(test_run, test_command)

    return test_run
Esempio n. 6
0
def watch(repository: Repository, commit_only: bool):
    '''
    Watches the repository's filesystem for changes and records the changes.
    It also notifies the user when there is an update in the test output.
    '''
    session = Session.object_session(repository)
    set_bug_buddy_branch(repository)
    logger.info('Starting BugBuddy thingy')

    commits = get_commits(repository, num_commits=5, synthetic=True)

    for commit in commits:
        # commit = get(session, Commit, id=1809)
        # commit = get(session, Commit, id=2748)

        go_to_commit(repository, commit, force=True)

        # run_all_tests(commit)

        for test_failure in commit.failed_test_results:
            predict_blame(test_failure)

        session.commit()
        # import pdb; pdb.set_trace()
        # commit.summary(blame=False)

        commit.summary()
Esempio n. 7
0
def synthetic_blame(commit: Commit, test_run: TestRun):
    '''
    Given a synthetic commit, it will create blames for the commit based on
    the blames of the sub-combinations of the diffs
    '''
    if not test_run.failed_tests:
        logger.info(
            'No failing tests for commit {}, nothing to blame'.format(commit))
        return

    logger.info('Setting blame for commit: {}'.format(commit))
    session = Session.object_session(commit)

    # Get the list of commits that are made up of the subset of diffs in this
    # commit
    children_commits = get_synthetic_children_commits(commit)

    for failed_test_result in test_run.failed_tests:
        # get all the blames for this test failure that were new at the time.
        # The newness attribute should remove duplicates.
        # All of these blames will now be combined for a new blame for this
        # test failure.
        children_test_failure_blames = []
        for child_commit in children_commits:
            if child_commit.has_same_test_result_output(
                    failed_test_result, status=TEST_OUTPUT_FAILURE):
                child_test_failure = (
                    child_commit.get_matching_test_result(failed_test_result))

                for blame in child_test_failure.blames:
                    children_test_failure_blames.append(blame)

        if children_test_failure_blames:
            faulty_diffs = list(
                set([blame.diff for blame in children_test_failure_blames]))

            for faulty_diff in faulty_diffs:
                logger.info('Assigning blame using child commit {} and diff '
                            '{} for test failure: {}'.format(
                                child_commit, faulty_diff, failed_test_result))
                create(session,
                       Blame,
                       diff=faulty_diff,
                       test_result=failed_test_result)

        else:
            # We have created a completely new blame from this combination of
            # diffs in comparison from its children
            for diff in commit.diffs:
                blame = create(session,
                               Blame,
                               diff=diff,
                               test_result=failed_test_result)
                logger.info('Assigning new blame for commit {} blame {}'
                            'for test failure: {}'.format(
                                commit, blame, failed_test_result))

    logger.info('Completed blaming for {}'.format(commit))
    session.commit()
Esempio n. 8
0
def train_command(src_path: str):
    '''
    Trains a neural network on the available data
    '''
    with session_manager() as session:
        repository = _get_repository_from_src_path(session, src_path)
        logger.info('Training repository: "{}"'.format(repository))
        train(repository)
Esempio n. 9
0
def do_command(src_path: str):
    '''
    dos a neural network on the available data
    '''
    with session_manager() as session:
        repository = _get_repository_from_src_path(session, src_path)
        logger.info('Doing stuff to repository: "{}"'.format(repository))
        cache_test_results(repository)
Esempio n. 10
0
def _migrate_repository_to_new_path(session, repository, new_path):
    '''
    Migrating repository to new path.  This is necessary when the database has
    one path stored for a repo but you're on a different machine.
    '''
    logger.info('Upating path of repository in database from {} to {}'.format(
        repository.original_path, new_path))
    repository.original_path = new_path
    sync_mirror_repo(repository)
    session.commit()
Esempio n. 11
0
    def remove_line(self, line):
        '''
        Removes a particular line from the function
        '''
        with open(self.abs_path, 'r') as f:
            contents = f.readlines()

        content = contents.pop(line - 1)
        logger.info('Removed line: "{}" from {}'.format(
            content.strip(), self.function.file_path))

        with open(self.abs_path, 'w') as f:
            f.writelines(contents)
Esempio n. 12
0
def synthetic_train(repository: Repository):
    '''
    Trains the agent on synthetic data generation
    '''
    logger.info('Training on synthetic data for: {}'.format(repository))
    # logger.info('Caching commit tensors')
    # cache_commits(repository)

    logger.info('Initializing environment')
    brain = Brain(repository)
    env = ChangeEnvironment(repository, synthetic_training=True)
    env.reset()
    brain.train(env)
Esempio n. 13
0
def create_synthetic_diff_for_node(repository: Repository, commit: Commit,
                                   node):
    '''
    Creates the visited function and adds an 'assert False' to the node.
    This is used for creating synthetic 'assert False' diffs for each function.
    '''
    session = Session.object_session(repository)

    previous_commit = get_previous_commit(commit)
    function = previous_commit.get_function_for_node(node).function

    # create the function history instance
    function_history = create(
        session,
        FunctionHistory,
        function=function,
        commit=commit,
        node=node,
        first_line=node.first_line,
        # we need the minus 1 because when we complete the commit the
        # 'assert False' line will have been removed
        last_line=node.last_line - 1,
    )

    logger.info('There is a new function history: {}'.format(function_history))

    added_line = function_history.prepend_statement('assert False')

    if library_is_testable(repository):
        # create a new diff from this one change
        diffs = create_diffs(repository,
                             commit=commit,
                             function=function,
                             is_synthetic=True,
                             allow_empty=False)

        # There should always be only one diff created from altering one
        # function
        assert len(diffs) == 1

        diff = diffs[0]
        logger.info('Created diff: {}'.format(diff))

        # go back to a clean repository
        revert_diff(diff)

    else:
        # remove the addition from the source code
        function_history.remove_line(added_line)

    return node
Esempio n. 14
0
def generate_command(src_path: str, run_limit: int = None):
    '''
    Entry-point for the "bugbuddy generate" command

    @param src_path: path to the repository
    '''
    with session_manager() as session:
        repository = _get_repository_from_src_path(session, src_path)

        _check_repo_is_clean(repository)

        db_and_git_match(repository)

        logger.info('Creating synthetic results for: {}'.format(repository))

        generate_synthetic_test_results(repository, run_limit)
Esempio n. 15
0
    def prepend_statement(self, statement, offset: int = 0):
        '''
        Writes a statement to the beginning of the function
        '''
        def _is_comment(node):
            '''
            Checks to see if the node is a comment.  We need to because we do
            not want to add our statement into the comment.  For some reason,
            comments lineno is the last part of the comment.
            '''
            return (True if hasattr(node, 'value')
                    and isinstance(node.value, ast.Str) else False)

        # Get the first node in the function, which is it's first statement.
        # We will add the statement here
        first_node = self.node.body[0]
        first_line_in_function = first_node.lineno

        # scoot down one function if the first node is a comment
        first_line_in_function += 1 if _is_comment(first_node) else 0
        first_line_in_function += offset

        # note that a comment after the function does not seem to have a
        # column offset, and instead returns -1.
        column_offset = (first_node.col_offset if first_node.col_offset != -1
                         else self.node.col_offset + 4)
        indentation = ' ' * column_offset
        indented_statement = indentation + statement + '\n'

        with open(self.abs_path, 'r') as f:
            contents = f.readlines()

        contents.insert(first_line_in_function - 1, indented_statement)

        with open(self.abs_path, 'w') as f:
            f.writelines(contents)

        logger.info(
            'Added "{statement}" to {file} | {function_name}@{lineno}'.format(
                statement=statement,
                file=self.function.file_path,
                function_name=self.node.name,
                lineno=first_line_in_function))

        return first_line_in_function
Esempio n. 16
0
def yield_blame_set(synthetic_diffs: DiffList):
    '''
    Returns a set of diffs.

    1) Returns each synthetic diff of a repository one by one
    2) Once all diffs have been returned individually, it will then returns a
       set of 4 diffs that were randomly chosen.
    '''
    while True:
        diff_set = []
        for i in range(4):
            diff_set.append(synthetic_diffs[random.randint(
                0,
                len(synthetic_diffs) - 1)])

        logger.info('Yielding diff set: {}'.format(diff_set))
        # remove duplicates if they exist
        yield list(set(diff_set))
Esempio n. 17
0
def _watch(repository: Repository, commit_only: bool):
    '''
    Watches the repository's filesystem for changes and records the changes.
    It also notifies the user when there is an update in the test output.
    '''
    set_bug_buddy_branch(repository)
    logger.info('Starting BugBuddy watcher')
    event_handler = ChangeWatchdog(repository, commit_only)
    observer = Observer()
    observer.schedule(event_handler, repository.original_path, recursive=True)
    observer.start()
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        observer.stop()
        logger.info('Shutting down BugBuddy watcher')
    observer.join()
Esempio n. 18
0
def _get_repository_from_src_path(session, src_path: str):
    '''
    Returns the repository given a src_path
    '''
    url = get_repository_url_from_path(src_path)
    repository = get(session, Repository, url=url)
    if not repository:
        msg = ('This repository is not in the BudBuddy database, would you '
               'like to initialize the repository?  (y/n)\n'.format(src_path))
        should_initialize = input(msg)
        if is_affirmative(should_initialize):
            repository = _initialize_repository(session, src_path)
        else:
            logger.info('No worries!')

    if src_path != repository.original_path:
        _migrate_repository_to_new_path(session, repository, src_path)
    return repository
Esempio n. 19
0
def delete_command(src_path: str):
    '''
    Entry-point for the "bugbuddy generate" command

    @param src_path: path to the repository
    '''
    url = get_repository_url_from_path(src_path)
    with session_manager() as session:
        repository = get(session, Repository, url=url)

        # make sure you cannot delete the bug_buddy branch
        if repository.name not in ['bug_buddy', 'BugBuddy']:
            msg = ('Would you like to delete the bug_buddy branch for {}?\n'
                   '(y/n)\n'.format(repository))
            should_delete = input(msg)

            if is_affirmative(should_delete):
                logger.info('Deleting bug_buddy branch')
                delete_bug_buddy_branch(repository or Mock(src_path=src_path))

        if repository:
            logger.info('Deleting data from the database')
            delete(session, repository)

        else:
            logger.info('No matching repo found in the database')
Esempio n. 20
0
def _initialize_repository(session,
                           src_path: str,
                           initialize_commands: str = None,
                           test_commands: str = None,
                           src_directory: str = None,
                           commit_only=False,
                           ignored_files=''):
    '''
    Given a src_path to a repository, create the repository in the database
    '''
    logger.info('Initializing repository at "{}"'.format(src_path))
    url = get_repository_url_from_path(src_path)
    repository = get(session, Repository, url=url)

    name = get_repository_name_from_url(url)
    logger.info('Repository name is "{}" with url "{}"'.format(name, url))
    if not initialize_commands:
        msg = ('Input the commands to intialize the repo (ex. '
               '"source env/bin/activate"): ')
        initialize_commands = input(msg)
    if not test_commands:
        msg = ('Input the command to run the tests for the repo: ')
        test_commands = input(msg)
    if not src_directory:
        msg = ('Input the source directory for your project: ')
        src_directory = input(msg)

    # first check to see if the repository already exists
    repository = get(session, Repository, url=url)
    if not repository:
        repository = create(session,
                            Repository,
                            name=name,
                            url=url,
                            initialize_commands=initialize_commands,
                            test_commands=test_commands,
                            src_directory=src_directory,
                            src_path=src_path,
                            ignored_files=ignored_files)

    _check_repo_is_clean(repository, path=repository.original_path)

    # create the mirror repository that BugBuddy primarily works on
    sync_mirror_repo(repository)

    # make sure the mirrored repo is on bug_buddy branch
    set_bug_buddy_branch(repository)

    # Initialize the repository by recording functions and creating synthetic
    # diffs
    if not commit_only:
        snapshot(repository, allow_empty=True)

    session.commit()
    logger.info(
        'Your repository "{}" has been successfully initialized!'.format(
            repository))

    return repository
Esempio n. 21
0
def sync_mirror_repo(repository: Repository):
    '''
    Updates the mirror repository to match the code base the developer is
    working on
    '''
    # skip the .git directory, otherwise you are overwritting the commits in
    # the mirror repository
    if not os.path.exists(MIRROR_ROOT):
        os.makedirs(MIRROR_ROOT)

    if not os.path.exists(repository.mirror_path):
        logger.info('Initializing mirror repository')
        clone_repository(repository, repository.mirror_path)

    command = ('rsync -a {source}/ {destination} --exclude ".git"'.format(
        source=repository.original_path, destination=repository.mirror_path))

    run_cmd(repository, command, log=False)

    # remove the pyc files so that we do not run into any import errors when
    # trying to run the testing commands:
    # http://wisercoder.com/importmismatcherror-python-fix/
    clean_command = 'find . -name \*.pyc -delete'
    run_cmd(repository, clean_command, log=False)
Esempio n. 22
0
    def on_any_event(self, event):
        '''
        Catches all events
        '''
        if '/.' in event.src_path:
            return

        updated_file = os.path.relpath(event.src_path,
                                       self.repository.original_path)
        if (not updated_file or updated_file in self.repository.ignored_files
                or not updated_file.endswith('.py')):
            return

        # we have to recreate the repository in this thread for Sqlite
        with session_manager() as session:
            repository = get(session, Repository, id=self.repository.id)
            # logger.info('Syncing updates')
            # Copy the change over to the mirror repository
            sync_mirror_repo(repository)

            if not is_repo_clean(self.repository):
                logger.info('Valid change event: {}'.format(event))

                # make sure the repository is on the bug_buddy branch
                start = time.time()
                commit = snapshot(repository, commit_only=self.commit_only)
                total_time = time.time() - start
                logger.info(
                    'Completed snapshot of {commit} in {m}m {s}s'.format(
                        commit=commit, m=total_time / 60, s=total_time % 60))
                session.commit()

                if not self.commit_only:
                    run_all_tests(commit)

                    for test_failure in commit.failed_test_results:
                        predict_blame(test_failure)

                    # display the results in the cli output
                    # self.score_card.render(commit)
                    commit.summary()

            else:
                logger.info('Nothing was changed')
Esempio n. 23
0
    def set_commit(self, commit):
        '''
        Whenever we update the commit for the Environment, we need to set
        multiple different variables
        '''
        logger.info('Setting commit to {}'.format(commit))

        # If we are in synthetic training then show what the actual test
        # failures are at the beginning.
        if self.synthetic_training and False:
            logger.info('{commit} test failures: {test_failures}'.format(
                commit=commit, test_failures=commit.test_failures))

        self.session = Session.object_session(commit)
        self.commit = commit
        self.state = commit_to_state(
            commit, synthetic_training=self.synthetic_training)

        # All tests are linked with a corresponding TestRun instance.  When
        # running the tests against this state
        if not self.synthetic_training:
            self.test_run = create(self.session, TestRun, commit=self.commit)

        # list of available tests, sorted in order of their id.  It is sorted
        # by the Test.id
        self.all_tests = get_list_of_tests(self.commit)
        self.all_tests.sort(key=lambda test: test.id, reverse=False)

        # total number of rewards accumulated for this change
        self.total_rewards = 0

        # total number of newly passing or newly failing tests discovered for
        # this change
        self.num_newly_changed_results_found = 0

        # list of the tests that have already been ran for this commit
        self.tests_ran = []
        logger.info('Set commit')
Esempio n. 24
0
def generate_synthetic_test_results(repository: Repository, run_limit: int):
    '''
    Creates multiple synthetic changes and test results
    '''
    session = Session.object_session(repository)
    synthetic_diffs = repository.get_synthetic_diffs()

    if not synthetic_diffs:
        # create the synthetic diffs
        create_synthetic_alterations(repository)
        logger.info('You have created the base synthetic commits.  Congrats!')
        session.commit()
        synthetic_diffs = repository.get_synthetic_diffs()

    num_runs = 0
    for diff_set in yield_blame_set(synthetic_diffs):
        logger.info('On diff set: {}'.format(diff_set))

        for diff_subset in powerset(diff_set):
            logger.info('On run #{} with: {}'.format(num_runs, diff_subset))
            db_and_git_match(repository)

            try:
                # see if we already have a commit and test run for the diff set.
                # if we do, continue
                logger.debug('1: {}'.format(time.time()))
                commit = get_matching_commit_for_diffs(repository, diff_subset)

                # if the commit does not already exist for this set, then we
                # need to create it and run tests against it
                if not commit:
                    # revert back to a clean repository
                    reset_commit = create_reset_commit(repository)
                    if reset_commit:
                        logger.info('Storing reset commit')
                        snapshot_commit(repository, reset_commit)
                        git_push(repository)
                        session.commit()
                        db_and_git_match(repository)

                    # create a commit.  Only allow an empty commit if there
                    # nothing in the diff
                    commit = create_commit(repository,
                                           name=SYNTHETIC_CHANGE,
                                           commit_type=SYNTHETIC_CHANGE,
                                           allow_empty=True)

                    logger.info('Applying diffs')
                    # apply the synthetic diffs to the mirrored repository
                    apply_synthetic_diffs(commit, diff_subset)

                    # store the rest of the commit data.  No need to recreate
                    # the diffs since they have already been stored in
                    # apply_synthetic_diffs
                    logger.info('Snapshotting the commit: {}'.format(commit))
                    commit = snapshot_commit(repository,
                                             commit,
                                             skip_diffs=True)

                # add the commit hash id for its synthetic diffs
                logger.info('Creating synthetic diff hash')
                if not commit.synthetic_diff_hash:
                    base_synthetic_ids = [diff.id for diff in diff_subset]
                    commit.synthetic_diff_hash = (
                        get_hash_given_base_synthetic_ids(base_synthetic_ids))
                    logger.info('Added hash_ids #{} to commit: {}'.format(
                        commit.synthetic_diff_hash, commit))

                logger.info('Running tests')
                if not commit.test_runs:
                    # run all tests against the synthetic change
                    run_all_tests(commit)

                logger.debug('2: {}'.format(time.time()))
                if commit.needs_blaming():
                    synthetic_blame(commit, commit.test_runs[0])

                logger.debug('3: {}'.format(time.time()))
                session.commit()

                # push newly created commit
                git_push(repository)

                logger.info('Completed run #{}'.format(num_runs))

                num_runs += 1
                if run_limit and num_runs >= run_limit:
                    logger.info('Completed all #{} runs! '.format(num_runs))
                    exit()

            except Exception as e:
                # revert all the local edits
                logger.error('Hit the following exception: {}'.format(e))
                logger.error('Reverting local changes')
                revert_to_master(repository)
                raise e
Esempio n. 25
0
    def step(self, action):
        '''
        Run one timestep of the environment's dynamics.
        Accepts an action and returns a tuple (new_state, reward, done, info).

        # Arguments
            action (object): Agent chosen by the agent.  For example, the action
                             could be to run a test to see if it has newly
                             failed or newly passed status

        # Returns
            new_state (object): the new state of the current environment after
                                the action was taken
            reward (float) : Amount of reward returned after previous action.
            done (boolean): Whether the episode has ended, in which case further
                            step() calls will return undefined results.
            info (dict): Contains auxiliary diagnostic information (helpful for
                         debugging, and sometimes learning).
        '''
        # the action is the index of the test that the agent wants to run
        test = self.all_tests[action]

        # if we are in synthetic training, then we don't need to run the test.
        # We can simply retrieve from the database whether or not the test is
        # failing.
        if self.synthetic_training:
            test_result = self.get_synthetic_test_result(test)
            assert test_result

        else:
            raise NotImplementedError(
                'You have not implemented actually running a test')

        # determine the reward for running that particular test
        reward = self.get_reward(test_result)

        # update the state with the action
        self.update_state(test_result)

        #########################
        #      BOOKKEEPING      #
        #########################
        # store that we ran this test
        self.tests_ran.append(test)
        # store the total number of rewards we have received for this commit
        self.total_rewards += reward

        # if it was a newly failing or newly passing test result, then store
        # that information for bookeeping.
        if self.is_newly_changed_result(test_result):
            self.num_newly_changed_results_found += 1

        # we can store extra debugging information here
        info_dict = {}

        # hiding this for now
        if False:
            logger.info('Commit #{commit} | '
                        '{test} | '
                        '{previous_status} -> {new_status} | '
                        'R: {reward} | '
                        'T: {total} | '
                        '{num_tests_ran} / {total_tests}'.format(
                            commit=self.commit.id,
                            test=test.name,
                            previous_status=self.get_previous_status(test),
                            new_status=test_result.status,
                            reward=reward,
                            total=self.total_rewards,
                            num_tests_ran=len(self.tests_ran),
                            total_tests=len(self.all_tests)))

        if self.done:
            logger.info(
                'Completed commit #{commit} | '
                'Total reward: {total} / {total_possible} | '
                '{num_tests_ran} / {total_tests}'.format(
                    commit=self.commit.id,
                    total=self.total_rewards,
                    total_possible=self.num_newly_changed_results_found,
                    num_tests_ran=len(self.tests_ran),
                    total_tests=len(self.all_tests)))

        return self.state, reward, self.done, info_dict
Esempio n. 26
0
def commit_to_tensor(commit):
    '''
    Converts an individual commit into a tensor with the following shape:

                functionA                               functionB
    --------------------------------------------------------------------------
    testA      [function_altered,
                test_status,                               ...
                blame_count]
    --------------------------------------------------------------------------
    testB       ...                                        ...
    --------------------------------------------------------------------------

    or another way of looking at the shape:

    [functionA: [testA: [functionA_altered, testA_status, blame_count],
                [testB: [functionA_altered, testB_status, blame_count],
     ...
    ]
    '''
    if commit._commit_tensor_binary:
        # logger.info('Returning cache for {}'.format(commit))
        return commit.commit_tensor

    logger.info('Creating cached commit tensor for {}'.format(commit))
    session = Session.object_session(commit)

    sorted_functions = commit.repository.functions
    sorted_functions.sort(key=lambda func: func.id, reverse=False)

    sorted_tests = commit.repository.tests
    sorted_tests.sort(key=lambda test: test.id, reverse=False)

    # the current features are:
    #   function_altered
    #   test_status
    #   blame_count
    commit_tensor = numpy.zeros((len(sorted_functions),
                                 len(sorted_tests),
                                 NUM_FEATURES))

    # store the results of the tests for the commit in a dictionary for quick
    # lookup
    commit_results = {}
    if commit.test_runs:
        test_run = commit.test_runs[0]
        for test_result in test_run.test_results:
            commit_results[test_result.test.id] = (
                TEST_STATUS_TO_ID_MAP[test_result.status])

    logger.info('Commit Test Results: {}'.format(commit_results))

    for i in range(len(sorted_functions)):
        function = sorted_functions[i]
        logger.info('On function: {}'.format(function))

        function_was_altered = any([diff.commit.id == commit.id for diff in
                                    function.diffs])
        blame_counts = get_blame_counts_for_function(function)
        for j in range(len(sorted_tests)):
            # Step 1 - add whether or not the function was altered for this
            # commit.  1 for altered, 0 otherwise.
            commit_tensor[i][j][FUNCTION_ALTERED_LOC] = int(function_was_altered)

            # Step 2 - add the status of the test.  If the test is not ran
            # the id will be 0, which represents that the test has not been
            # ran yet
            test = sorted_tests[j]
            commit_tensor[i][j][TEST_STATUS_LOC] = commit_results.get(
                test.id, TEST_STATUS_TO_ID_MAP[TEST_OUTPUT_NOT_RUN])

            # Step 3 - add the blame count, which represents how many times
            # the function has been blamed for the test
            blame_count = blame_counts.get(test.id, 0)
            commit_tensor[i][j][BLAME_COUNT_LOC] = blame_count

    commit._commit_tensor_binary = commit_tensor
    session.commit()
    return commit_tensor
Esempio n. 27
0
def create_results_from_junit_xml(output_file: str, repository: Repository,
                                  test_run: TestRun):
    '''
    Gets results from a JUnitXML format file
    https://docs.pytest.org/en/latest/usage.html#creating-junitxml-format-files
    '''
    logger.info('Reading info from {}'.format(output_file))
    try:
        session = Session.object_session(test_run)
        xml_output = JUnitXml.fromfile(output_file)

        test_names = []

        expected_new = False

        for test_case in xml_output:
            # There can seemingly be duplicate test outputs for a test if both
            # the test and the test's teardown step both fail.  So we will ignore
            # the second test output
            unique_id = '{}/{}/{}'.format(
                test_case.name, test_case._elem.attrib.get('file'),
                test_case._elem.attrib.get('classname'))
            if unique_id in test_names:
                logger.error(
                    'There was a duplicate test output for test: {}'.format(
                        test_case.name))
                continue

            test_names.append(unique_id)

            test, is_new = get_or_create(
                session,
                Test,
                repository=repository,
                name=test_case.name,
                file=test_case._elem.attrib.get('file'),
                classname=test_case._elem.attrib.get('classname'),
            )

            if is_new and not expected_new:
                import pdb
                pdb.set_trace()
                logger.error('Did you expect to create a new test?')
                expected_new = True

            status = TEST_OUTPUT_FAILURE if test_case.result else TEST_OUTPUT_SUCCESS

            # if the test is skipped, do not keep it
            if hasattr(test_case, 'result') and hasattr(
                    test_case.result, 'type'):
                if test_case.result.type == 'pytest.skip':
                    status = TEST_OUTPUT_SKIPPED

            create(
                session,
                TestResult,
                test=test,
                test_run=test_run,
                status=status,
                time=test_case.time,
            )

    except Exception as e:
        import pdb
        pdb.set_trace()
        logger.info('Hit error when reading from junit xml: {}'.format(e))
Esempio n. 28
0
def save_function_histories(repository: Repository, commit: Commit,
                            function_nodes, patches):
    '''
    Stores the function histories
    '''
    session = Session.object_session(repository)

    previous_commit = get_previous_commit(commit)

    if not previous_commit:
        logger.info('No previous commit, creating new function nodes')
        create_new_functions_from_nodes(commit, function_nodes)
        return

    # from the patches, determine which files were altered
    altered_files = list(set([patch.header.new_path for patch in patches]))
    logger.info('Altered files: {}'.format(altered_files))

    altered_function_nodes = []
    unaltered_function_nodes = []
    for function_node in function_nodes:
        if function_node.file_path in altered_files:
            altered_function_nodes.append(function_node)
        else:
            unaltered_function_nodes.append(function_node)

    # If the file was not altered, then we can simply find the previous
    # function history and recreate it for this commit without even finding
    # the appropriate function
    for function_node in unaltered_function_nodes:
        try:
            previous_function_history = (session.query(FunctionHistory).join(
                FunctionHistory.function
            ).filter(FunctionHistory.commit_id == previous_commit.id).filter(
                Function.name == function_node.name).filter(
                    Function.file_path == function_node.file_path).filter(
                        FunctionHistory.first_line == function_node.lineno)
                                         ).one()
        except NoResultFound:
            close_no_cigar = (session.query(FunctionHistory).join(
                FunctionHistory.function).filter(
                    FunctionHistory.commit_id == previous_commit.id).filter(
                        Function.name == function_node.name).filter(
                            Function.file_path == function_node.file_path)
                              ).all()
            import pdb
            pdb.set_trace()
            logger.error('Unable to find previous function history for node {}'
                         'which was in an unaltered file')

        function_history = create(
            session,
            FunctionHistory,
            function=previous_function_history.function,
            commit=commit,
            node=function_node,
            first_line=previous_function_history.first_line,
            last_line=previous_function_history.last_line)

        # logger.info('Created unaltered function history: {}'
        #             .format(function_history))

    # If the file was altered, then we need to be extremely careful about how
    # we track function history.
    _save_altered_file_function_history(commit, previous_commit, altered_files,
                                        altered_function_nodes, patches)
Esempio n. 29
0
def _match_patch_with_history(patch, function_histories: FunctionHistoryList):
    '''
    Returns a modified form of the patch for the function history

    It would turn following patch:

        def a:
            dog = 'dog'
    +        # added to a

            def b:
                cat = 'cat'
    +            # added to b

            more_dog_stuff = 2
    +        # added more to a


    Into two patches:
    1) for function history 'def a'

        def a:
            dog = 'dog'
    +        # added to a

            def b:
                cat = 'cat'

            more_dog_stuff = 2
    +        # added more to a

    2) for function history 'def b''

        def a:
            dog = 'dog'

            def b:
                cat = 'cat'
    +            # added to b

            more_dog_stuff = 2
    '''
    for function_history in function_histories:
        function_lines = list(
            range(function_history.first_line, function_history.last_line))

        # now remove the lines that are a part of a function that is within
        # this function.  This an inner function and changes to that function
        # should not relate to the patch for this function.
        for other_history in function_histories:
            if (other_history.first_line > function_history.first_line
                    and other_history.last_line < function_history.last_line):
                logger.info('{} is an inner function of {}'.format(
                    other_history, function_history))

                inner_func_lines = list(
                    range(other_history.first_line, other_history.last_line))

                # remove the inner function lines from the list of lines in the
                # patch that maps to this function
                function_lines = [
                    line for line in function_lines
                    if line not in inner_func_lines
                ]

        function_changes = []
        for original_line, new_line, change in patch.changes:
            if new_line in function_lines:
                pass