Esempio n. 1
0
    def authenticate(self):
        """
		authenticate()
		Authenticates this application to github using
		the cas-user git user credentials. This is hopefully temporary!
		"""

        s = requests.Session()
        username = config["github"]["user"]
        password = config["github"]["pass"]
        s.auth = (username, password)
        payload = {"scopes": ["repo"]}
        r = s.get(self.request_auth, params=payload)

        if r.headers.get('x-ratelimit-remaining') == '0':
            logging.info("Github quota limit hit -- waiting")

            # Wait up to a hour until we can continue..
            while r.headers.get('x-ratelimit-remaining') == '0':
                time.sleep(600)  # Wait 10 minutes and try again
                r = s.get(self.request_auth, params=payload)
                data = r.json()

        data = r.json()[0]

        if r.status_code >= 400:
            msg = data.get('message')
            logging.error("Failed to authenticate issue tracker: \n" + msg)
            return  # Exit
        else:
            self.auth_token = data.get("token")
            requests_left = r.headers.get('x-ratelimit-remaining')
            logging.info("Analyzer has " + requests_left +
                         " issue tracker calls left this hour")
Esempio n. 2
0
    def notify(self):
        """
		Notify all subscribers that repo has been analyzed and is ready
		to be viewed
		"""

        FROM = "*****@*****.**"
        TO = self.subscribers
        SUBJECT = "Your repository has been analyzed"
        TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo

        # prepare actual message
        message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % (
            FROM, ", ".join(TO), SUBJECT, TEXT)

        try:
            server = smtplib.SMTP("smtp.gmail.com", 587)
            server.ehlo()
            server.starttls()
            server.login(self.gmail_user, self.gmail_pwd)
            server.sendmail(FROM, TO, message)
            server.quit()

            logging.info("Notification sent successfully")

        except:
            logging.error("Failed to send notification")
Esempio n. 3
0
	def checkIngestion(self):
		"""Check if any repo needs to be ingested"""
		# repos_to_get = self.repos
		for i in range(self.repos.shape[0]):
			repo = self.repos.iloc[i]
			logging.info("Adding repo " + repo['name'] + " to work queue for ingesting")
			self.workQueue.add_task(ingest,repo)
Esempio n. 4
0
def ingest(repo_id):
    """
  Ingest a repository with the given id. Gets the repository information
  from the repository table and starts ingesting using ingestRepo method
  @param repo_id   The repository id to ingest.
  """
    repo_to_analyze = [repo_id['name']]
    # Verify that repo exists
    if len(repo_to_analyze) == 1:
        ingestRepo(repo_id)
    else:
        logging.info('Repo with id ' + repo_to_analyze + ' not found!')
Esempio n. 5
0
	def checkAnalyzation(self):
		"""Checks if any repo needs to be analyzed"""
		# repos_to_get = self.repos
		for i in range(self.repos.shape[0]):
			repo = self.repos.iloc[i]
			refresh_date = datetime.datetime.now() + datetime.timedelta(-30)
			print("========================",datetime.datetime.now(),refresh_date)
			if repo['last_analyzed'] is not None and repo['last_analyzed'] < refresh_date:
				continue
			logging.info("Adding repo " + repo['name']+ " to work queue for analyzing.")
			self.workQueue.add_task(analyze, repo)
			self.repos.loc[i,'last_analyzed'] = datetime.datetime.now()
Esempio n. 6
0
def analyze(repo_id):
    """
	Analyze the repository with the given id. Gets the repository from the repository table
	and starts ingesting using the analyzeRepo method.
	@param repo_id		The repository id to analyze
	"""

    repo_to_analyze = [repo_id['name']]
    # Verify that repo exists
    if len(repo_to_analyze) > 0:
        analyzeRepo(repo_id)
    else:
        logging.info('Repo with id ' + repo_id['name'] + ' not found!')
Esempio n. 7
0
def ingestRepo(repository_to_ingest):
    """
  Ingests a given repository
  @param repository_to_ingest   The repository to inspect
  @param session 				The SQLAlchemy session
  @private
  """
    logging.info('A worker is starting scan repository: ' +
                 repository_to_ingest['name'])
    local_repo = LocalRepository(repository_to_ingest)
    local_repo.sync()

    logging.info('A worker finished ingesting repo ' +
                 repository_to_ingest['name'])
Esempio n. 8
0
    def _linkCorrectiveCommit(self, commit):
        """
    links the corrective change/commit to the change/commit which was the
    cause. this is the purpose of this object

    @commit - the corrective change to link w/ the changes that introduces the
    problems/issues it fixes.
    """
        region_chunks = self.getModifiedRegions(commit)

        logging.info("Linkage for commit " + commit['commit_hash'])
        # for k,v in region_chunks.items():
        #   logging.info("-- file: " + k)
        #   logging.info("---- loc modified: " + str(v))

        bug_introducing_changes = self.gitAnnotate(region_chunks, commit)
        return bug_introducing_changes
Esempio n. 9
0
    def getMedian(self, metric):
        """
    Helper function for the method calculateMedians.
    Takes in a metric and returns a string property of the results
    @private
    """
        median_props = ""

        try:
            # R functions to be used
            medianFn = robjects.r['median']
            wilcoxFn = robjects.r['wilcox.test']

            metric_buggy = getattr(self.metrics, metric + "_buggy")
            metric_nonbuggy = getattr(self.metrics, metric + "_nonbuggy")

            # First check p-values, if signficant then calculate median
            pvalue = self.wilcoxFn(robjects.FloatVector(metric_buggy),
                                   robjects.FloatVector(metric_nonbuggy))[2][0]
            buggy_median = self.medianFn(robjects.FloatVector(metric_buggy))
            nonbuggy_median = self.medianFn(
                robjects.FloatVector(metric_nonbuggy))
            median_props += '"' + metric + 'buggy":"' + str(
                buggy_median[0]) + '", '
            median_props += '"' + metric + 'nonbuggy":"' + str(
                nonbuggy_median[0]) + '", '

            if pvalue <= self.psig:
                median_props += '"' + metric + '_sig":"1", '
            else:
                median_props += '"' + metric + '_sig":"0", '

        except:
            # catch the case where we haven't made any observations to do this metric
            logging.info("Metric " + metric +
                         " could not be used in the median model for repo " +
                         self.repo_id)

        return median_props
Esempio n. 10
0
    def getDateOpened(self, issueNumber):
        """
		getDateOpened()
		Gets the date the issue number was opened in unix time
		If issue cannot be found for whichever reason, returns null.
		"""
        logging.info("searching for issue: " + str(issueNumber))
        logging.info(self.request_repos + "/" + self.owner + "/" + self.repo +
                     "/issues/" + issueNumber)

        header = {'Authorization': 'token ' + self.auth_token}
        r = requests.get(self.request_repos + "/" + self.owner + "/" +
                         self.repo + "/issues/" + issueNumber,
                         headers=header)

        data = r.json()

        # If forbidden
        if r.status_code == 403:

            # Check the api quota
            if r.headers.get('x-ratelimit-remaining') == '0':
                logging.info("Github quota limit hit -- waiting")

                # Wait up to a hour until we can continue..
                while r.headers.get('x-ratelimit-remaining') == '0':
                    time.sleep(600)  # Wait 10 minutes and try again
                    r = requests.get(self.request_repos + "/" + self.owner +
                                     "/" + self.repo + "/issues/" +
                                     issueNumber,
                                     headers=header)
                    data = r.json()

        # Check for other error codes
        elif r.status_code >= 400:
            msg = data.get('message')
            logging.info("issue not found")
            return None
        else:
            try:
                date = (dateutil.parser.parse(
                    data.get('created_at'))).timestamp()
                return date
            except:
                logging.error(
                    "ISSUE TRACKER FAILURE: Could not get created_at from github issues API"
                )
                return None
Esempio n. 11
0
def analyzeRepo(repository_to_analyze):
    """
	Analyzes the given repository
	@param repository_to_analyze	The repository to analyze.
	@param session                  SQLAlchemy session
	@private
	"""
    repo_name = repository_to_analyze['name']

    logging.info('Worker analyzing repository  ' + repo_name)

    all_commits = pd.read_csv(
        up(os.path.dirname(__file__)) + '/Data/Commit/' + repo_name + '.csv')

    corrective_commits = all_commits[all_commits['fix'] == True]
    logging.info("Linking " + str(len(corrective_commits)) +
                 " new corrective commits for repo " + repo_name)
    try:
        git_commit_linker = GitCommitLinker(repository_to_analyze)
        final_commits = git_commit_linker.linkCorrectiveCommits(
            corrective_commits, all_commits)
        #final_commits.to_csv(up(os.path.dirname(__file__)) + '/Data/commit_data/' + repo_name + '.csv',index=False)
        #print(os.getcwd())
        final_commits.to_csv(
            '/home/smajumd3/Data_Miner/github/data/commit_guru/' + repo_name +
            '.csv',
            index=False)
        # final_commits.to_csv('/Users/suvodeepmajumder/Documents/AI4SE/Data_Miner/github/data/commit_guru_exp/' + repo_name + '.csv',index=False)
        #final_commits.to_csv(up(up(up(up(up(up(os.getcwd())))))) + '/data/commit_guru/' + repo_name + '.csv',index=False)
    except Exception as e:
        logging.exception(
            "Got an exception linking bug fixing changes to bug inducing changes for repo "
            + repo_name)
        # repository_to_analyze.status = "Error"
        # session.commit() # update repo status
        raise
    logging.info("Linking done for repo" + repo_name)