Exemple #1
0
def analyzeRepo(repository_to_analyze, session):
    """
    Analyzes the given repository
    @param repository_to_analyze	The repository to analyze.
    @param session                  SQLAlchemy session
    @private
    """
    repo_name = repository_to_analyze.name
    repo_id = repository_to_analyze.id
    last_analysis_date = repository_to_analyze.analysis_date

    # Update status of repo to show it is analyzing
    repository_to_analyze.status = "Analyzing"
    session.commit()

    logging.info('Worker analyzing repository id ' + repo_id)

    # all commits in descending order
    all_commits = (session.query(Commit)
                   .filter(Commit.repository_id == repo_id)
                   .order_by(Commit.author_date_unix_timestamp.desc())
                   .all()
                   )

    # corrective commits in ascending order
    # if updating, only get the corrective commits that have not been linked yet.
    # No need to re-link corrective commits that have already been linked with the bug-inducing commit.
    corrective_commits = (session.query(Commit)
                          .filter(
                            (Commit.fix == "True") &
                            (Commit.repository_id == repo_id) &
                            (Commit.linked == False)
                          )
                          .order_by(Commit.author_date_unix_timestamp.asc())
                          .all()
                          )

    logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_id)

    try:
        git_commit_linker = GitCommitLinker(repo_id)
        git_commit_linker.linkCorrectiveCommits(corrective_commits, all_commits)
    except Exception as e:
        logging.exception("Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_id)
        repository_to_analyze.status = "Error"
        session.commit()  # update repo status
        raise

    # Signify to CAS Manager that this repo is ready to have it's model built
    if repository_to_analyze.status != "Error":
        repository_to_analyze.status = "In Queue to Build Model"
        session.commit()  # update repo status
Exemple #2
0
def analyzeRepo(repository_to_analyze, session):
    """
	Analyzes the given repository
	@param repository_to_analyze	The repository to analyze.
	@param session                  SQLAlchemy session
	@private
	"""
    repo_name = repository_to_analyze.name
    repo_id = repository_to_analyze.id
    last_analysis_date = repository_to_analyze.analysis_date

    # Update status of repo to show it is analyzing
    repository_to_analyze.status = "Analyzing"
    session.commit()

    logging.info('Worker analyzing repository id ' + repo_id)

    # all commits in descending order
    all_commits = (session.query(Commit).filter(
        Commit.repository_id == repo_id).order_by(
            Commit.author_date_unix_timestamp.desc()).all())

    # corrective commits in ascending order
    # if updating, only get the corrective commits that have not been linked yet.
    # No need to re-link corrective commits that have already been linked with the bug-inducing commit.
    corrective_commits = (session.query(Commit).filter(
        (Commit.fix == "True") & (Commit.repository_id == repo_id)
        & (Commit.linked == False)).order_by(
            Commit.author_date_unix_timestamp.asc()).all())

    logging.info("Linking " + str(len(corrective_commits)) +
                 " new corrective commits for repo " + repo_id)

    try:
        git_commit_linker = GitCommitLinker(repo_id)
        git_commit_linker.linkCorrectiveCommits(corrective_commits,
                                                all_commits)
    except Exception as e:
        logging.exception(
            "Got an exception linking bug fixing changes to bug inducing changes for repo "
            + repo_id)
        repository_to_analyze.status = "Error"
        session.commit()  # update repo status
        raise

    # Signify to CAS Manager that this repo is ready to have it's model built
    if repository_to_analyze.status != "Error":
        repository_to_analyze.status = "In Queue to Build Model"
        session.commit()  # update repo status
        # after update commit.contains_bug & commit.fix label, parsing diff information
        git = Git()
        git.diff(repo_id)
Exemple #3
0
	def checkBuildModel(self):
		""" Checks if any repo is awaiting to build model. 
			We are using a queue because we can't concurrently access R """

		session = Session()

		if self.modelQueue.empty() != True:
			repo_id = self.modelQueue.get()
			repo = (session.query(Repository).filter(Repository.id == repo_id).first())

			# use data only up to X months prior we won't have sufficent data to build models
			# as there may be bugs introduced in those months that haven't been fixed, skewing
			# our model.
			glm_model_time =  int(config['glm_modeling']['months']) 
			data_months_datetime = datetime.utcnow() - MonthDelta(glm_model_time)
			data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple())
		
			# all commits for repo prior to current time - glm model time
			training_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp < int(data_months_unixtime))
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

			# all commits for repo after or on current time - glm model time
			testing_commits = (session.query(Commit)
						.filter(
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp >= int(data_months_unixtime)))
						.all())
	
			try: 
				metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits)
				metrics_generator.buildAllModels()

				# montly data dump - or rather, every 30 days.
				dump_refresh_date = str(datetime.utcnow() - timedelta(days=30))
				if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date:
					logging.info("Generating a monthly data dump for repository: " + repo_id)

					# Get all commits for the repository
					all_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id )
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

					metrics_generator.dumpData(all_commits)
					repo.last_data_dump = str(datetime.now().replace(microsecond=0))
					
				# Notify user if repo has never been analyzed previously
				if repo.analysis_date is None:
					self.notify(repo)
	
				logging.info("Repo " + repo_id + " finished analyzing.")
				repo.analysis_date = str(datetime.now().replace(microsecond=0))
				repo.status = "Analyzed"
				session.commit() # update status of repo
				session.close()

			# uh-oh
			except Exception as e:
				logging.exception("Got an exception building model for repository " + repo_id)

				repo.status = "Error"
				session.commit() # update repo status
				session.close()
	def checkBuildModel(self):
		""" Checks if any repo is awaiting to build model. 
			We are using a queue because we can't concurrently access R """

		session = Session()

		if self.modelQueue.empty() != True:
			repo_id = self.modelQueue.get()
			repo = (session.query(Repository).filter(Repository.id == repo_id).first())

			# use data only up to X months prior we won't have sufficent data to build models
			# as there may be bugs introduced in those months that haven't been fixed, skewing
			# our model.
			glm_model_time =  int(config['glm_modeling']['months']) 
			data_months_datetime = datetime.utcnow() - monthdelta(glm_model_time)
			data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple())
		
			# all commits for repo prior to current time - glm model time
			training_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp < str(data_months_unixtime))
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

			# all commits for repo after or on current time - glm model time
			testing_commits = (session.query(Commit)
						.filter(
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp >= str(data_months_unixtime)))
						.all())
	
			try: 
				metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits)
				metrics_generator.buildAllModels()

				# montly data dump - or rather, every 30 days.
				dump_refresh_date = str(datetime.utcnow() - timedelta(days=30))
				if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date:
					logging.info("Generating a monthly data dump for repository: " + repo_id)

					# Get all commits for the repository
					all_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id )
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

					metrics_generator.dumpData(all_commits)
					repo.last_data_dump = str(datetime.now().replace(microsecond=0))
					
				# Notify user if repo has never been analyzed previously
				if repo.analysis_date is None:
					self.notify(repo)
	
				logging.info("Repo " + repo_id + " finished analyzing.")
				repo.analysis_date = str(datetime.now().replace(microsecond=0))
				repo.status = "Analyzed"
				session.commit() # update status of repo
				session.close()

			# uh-oh
			except Exception as e:
				logging.exception("Got an exception building model for repository " + repo_id)

				repo.status = "Error"
				session.commit() # update repo status
				session.close()