def analyzeRepo(repository_to_analyze, session): """ Analyzes the given repository @param repository_to_analyze The repository to analyze. @param session SQLAlchemy session @private """ repo_name = repository_to_analyze.name repo_id = repository_to_analyze.id last_analysis_date = repository_to_analyze.analysis_date # Update status of repo to show it is analyzing repository_to_analyze.status = "Analyzing" session.commit() logging.info('Worker analyzing repository id ' + repo_id) # all commits in descending order all_commits = (session.query(Commit) .filter(Commit.repository_id == repo_id) .order_by(Commit.author_date_unix_timestamp.desc()) .all() ) # corrective commits in ascending order # if updating, only get the corrective commits that have not been linked yet. # No need to re-link corrective commits that have already been linked with the bug-inducing commit. corrective_commits = (session.query(Commit) .filter( (Commit.fix == "True") & (Commit.repository_id == repo_id) & (Commit.linked == False) ) .order_by(Commit.author_date_unix_timestamp.asc()) .all() ) logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_id) try: git_commit_linker = GitCommitLinker(repo_id) git_commit_linker.linkCorrectiveCommits(corrective_commits, all_commits) except Exception as e: logging.exception("Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_id) repository_to_analyze.status = "Error" session.commit() # update repo status raise # Signify to CAS Manager that this repo is ready to have it's model built if repository_to_analyze.status != "Error": repository_to_analyze.status = "In Queue to Build Model" session.commit() # update repo status
def analyzeRepo(repository_to_analyze, session): """ Analyzes the given repository @param repository_to_analyze The repository to analyze. @param session SQLAlchemy session @private """ repo_name = repository_to_analyze.name repo_id = repository_to_analyze.id last_analysis_date = repository_to_analyze.analysis_date # Update status of repo to show it is analyzing repository_to_analyze.status = "Analyzing" session.commit() logging.info('Worker analyzing repository id ' + repo_id) # all commits in descending order all_commits = (session.query(Commit).filter( Commit.repository_id == repo_id).order_by( Commit.author_date_unix_timestamp.desc()).all()) # corrective commits in ascending order # if updating, only get the corrective commits that have not been linked yet. # No need to re-link corrective commits that have already been linked with the bug-inducing commit. corrective_commits = (session.query(Commit).filter( (Commit.fix == "True") & (Commit.repository_id == repo_id) & (Commit.linked == False)).order_by( Commit.author_date_unix_timestamp.asc()).all()) logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_id) try: git_commit_linker = GitCommitLinker(repo_id) git_commit_linker.linkCorrectiveCommits(corrective_commits, all_commits) except Exception as e: logging.exception( "Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_id) repository_to_analyze.status = "Error" session.commit() # update repo status raise # Signify to CAS Manager that this repo is ready to have it's model built if repository_to_analyze.status != "Error": repository_to_analyze.status = "In Queue to Build Model" session.commit() # update repo status # after update commit.contains_bug & commit.fix label, parsing diff information git = Git() git.diff(repo_id)
def checkBuildModel(self): """ Checks if any repo is awaiting to build model. We are using a queue because we can't concurrently access R """ session = Session() if self.modelQueue.empty() != True: repo_id = self.modelQueue.get() repo = (session.query(Repository).filter(Repository.id == repo_id).first()) # use data only up to X months prior we won't have sufficent data to build models # as there may be bugs introduced in those months that haven't been fixed, skewing # our model. glm_model_time = int(config['glm_modeling']['months']) data_months_datetime = datetime.utcnow() - MonthDelta(glm_model_time) data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple()) # all commits for repo prior to current time - glm model time training_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp < int(data_months_unixtime)) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) # all commits for repo after or on current time - glm model time testing_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp >= int(data_months_unixtime))) .all()) try: metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits) metrics_generator.buildAllModels() # montly data dump - or rather, every 30 days. dump_refresh_date = str(datetime.utcnow() - timedelta(days=30)) if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date: logging.info("Generating a monthly data dump for repository: " + repo_id) # Get all commits for the repository all_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) metrics_generator.dumpData(all_commits) repo.last_data_dump = str(datetime.now().replace(microsecond=0)) # Notify user if repo has never been analyzed previously if repo.analysis_date is None: self.notify(repo) logging.info("Repo " + repo_id + " finished analyzing.") repo.analysis_date = str(datetime.now().replace(microsecond=0)) repo.status = "Analyzed" session.commit() # update status of repo session.close() # uh-oh except Exception as e: logging.exception("Got an exception building model for repository " + repo_id) repo.status = "Error" session.commit() # update repo status session.close()
def checkBuildModel(self): """ Checks if any repo is awaiting to build model. We are using a queue because we can't concurrently access R """ session = Session() if self.modelQueue.empty() != True: repo_id = self.modelQueue.get() repo = (session.query(Repository).filter(Repository.id == repo_id).first()) # use data only up to X months prior we won't have sufficent data to build models # as there may be bugs introduced in those months that haven't been fixed, skewing # our model. glm_model_time = int(config['glm_modeling']['months']) data_months_datetime = datetime.utcnow() - monthdelta(glm_model_time) data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple()) # all commits for repo prior to current time - glm model time training_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp < str(data_months_unixtime)) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) # all commits for repo after or on current time - glm model time testing_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp >= str(data_months_unixtime))) .all()) try: metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits) metrics_generator.buildAllModels() # montly data dump - or rather, every 30 days. dump_refresh_date = str(datetime.utcnow() - timedelta(days=30)) if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date: logging.info("Generating a monthly data dump for repository: " + repo_id) # Get all commits for the repository all_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) metrics_generator.dumpData(all_commits) repo.last_data_dump = str(datetime.now().replace(microsecond=0)) # Notify user if repo has never been analyzed previously if repo.analysis_date is None: self.notify(repo) logging.info("Repo " + repo_id + " finished analyzing.") repo.analysis_date = str(datetime.now().replace(microsecond=0)) repo.status = "Analyzed" session.commit() # update status of repo session.close() # uh-oh except Exception as e: logging.exception("Got an exception building model for repository " + repo_id) repo.status = "Error" session.commit() # update repo status session.close()