def opened_employee_or_temporary(self):
        """
        Collect the status of the user that opened the issue/pull request. Employee if has an author_association OWNER, MEMBER, COLLABORATOR or CONTRIBUTOR. Temporary else.
        :return: list of the status of the user that opened the issue/pull request by issue/pull request
        :rtype: list
        """
        print("#### Opened by Employee or Temporary ####")

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        opened_by = [['number', 'status', 'user']]
        for issue in issues:
            if 'author_association' in issue.keys():
                # print(issue['author_association'])
                opened_by.append(
                    [issue['issue_number'], self._employee_or_temporary(issue['author_association']), issue['user']])

        for pull in pulls:
            if 'author_association' in pull.keys():
                opened_by.append(
                    [pull['pull_request_number'], self._employee_or_temporary(pull['author_association']),
                     pull['user']])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_opened_by.csv',
                      opened_by)

        return opened_by
    def _get_users_labels_in_comments(self):
        """
        Collects the author_association of each comment on issue/pull requests
        :return: lists of author_associations per issue/pull requests
        :rtype: list
        """
        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        users = {}
        hash = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                issue = comment['issue_url'].split('/')
                issue = issue[len(issue) - 1]

                if issue not in users.keys():
                    users[issue] = []

                if str(issue + comment['user']['login']) not in hash.keys():
                    hash[issue + comment['user']['login']] = 0
                    users[issue].append(comment['author_association'])

        return users
    def _get_users_labels_in_issues_and_pulls(self):
        """
        Collects the author_association of issue/pull requests (opened, closed or merged)
        :return: lists of author_associations per issue/pull requests
        :rtype: list
        """
        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        # author_association = [['id', 'association', 'created_at']]
        author_association = {}
        for issue in issues:
            if issue['author_association']:
                # author_association.append([issue['issue_number'], issue['author_association'], issue['created_at']])
                author_association[
                    issue['issue_number']] = issue['author_association']

        for pull in pulls:
            if pull['author_association']:
                author_association[
                    pull['pull_request_number']] = pull['author_association']
                # author_association.append([pull['pull_request_number'], pull['author_association'], pull['created_at']])

        return author_association
    def _get_comments_in_discussion(self):
        """
        Collect comments of each issue and pull request and the number of words of each comment
        :return: two lists, one containing the comments in issues/pull requests and another with the words per issue/pull request.
        :rtype: list, list
        """
        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
        words_in_discussion = {}
        comments_in_discussion = {}

        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in words_in_discussion.keys():
                        words_in_discussion[issue] = 0
                    if issue not in comments_in_discussion.keys():
                        comments_in_discussion[issue] = 0

                    tp = TextProcessing()
                    processed = tp.pre_process_text(comment['body'])
                    comment_text = ''
                    for token in processed:
                        comment_text += token + ' '
                    words_in_discussion[issue] += len(comment_text.split(' '))
                    comments_in_discussion[issue] += 1

        return comments_in_discussion, words_in_discussion
Exemple #5
0
    def collect_batch(self, save: bool = True):
        """
        Collect several groups of 30 elements returned by the API until the pages return an empty JSON
        :param save: if it should persist the json downloaded on the hard drive
        :type save: bool
        :return: list of elements returned by the API
        :rtype: list
        """
        request_url = self.api_url + self.owner + '/' + self.repo + self.private_url + '?page='
        path = self.path + self.repo + self.private_path + 'all/'
        page = 1
        data_list = []
        json = JSONHandler(path)
        while True:
            if json.file_exists(path + str(page) + '.json'):
                page = page + 1
                continue

            data = self.apiHandler.request(request_url + str(page))

            if not data:
                break

            data_list.append(data)

            if save:
                json.save_json(data, str(page))
            page = page + 1

        return data_list
Exemple #6
0
    def get_comments_in_discussion(self):
        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
        words_in_discussion = {}
        comments_in_discussion = {}

        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in words_in_discussion.keys():
                        words_in_discussion[issue] = 0
                    if issue not in comments_in_discussion.keys():
                        comments_in_discussion[issue] = 0

                    tp = TextProcessing()
                    processed = tp.pre_process_text(comment['body'])
                    comment_text = ''
                    for token in processed:
                        comment_text += token + ' '
                    words_in_discussion[issue] += len(comment_text.split(' '))
                    comments_in_discussion[issue] += 1

        return comments_in_discussion, words_in_discussion
Exemple #7
0
    def mean_time_between_replies(self):
        """
        Collect the mean time between comments inside an issue or pull request
        :return: list if mean time between comments per issue/pull request
        :rtype: list
        """
        print('#### Mean Time Between Comments ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}

        for file in onlyfiles:
            comments = json.open_json(file)

            for comment in comments:
                issue = comment['issue_url'].split('/')
                issue = issue[len(issue) - 1]

                if issue not in comments_per_issue.keys():
                    comments_per_issue[issue] = []

                comments_per_issue[issue].append(comment['created_at'])

        date_utils = DateUtils()
        mean_time = [['issue', 'mean_time']]
        for key in comments_per_issue.keys():
            days_between = []
            sorted_dates = date_utils.sort_dates(comments_per_issue[key])
            aux = None
            for date in sorted_dates:
                if not aux:
                    aux = date
                    continue

                days = date_utils.get_days_between_dates(aux, date)
                days_between.append(days)
                aux = date

            length = len(days_between)

            length += 1

            sum_days = sum(days_between)
            mean_days = sum_days / length
            mean_time.append([key, mean_days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_mean_time_between_replies.csv',
                      mean_time)

        return mean_time
Exemple #8
0
    def get_number_of_patches(self):
        """
        Collects the number of snippets inside each comment of issues and pull requests.

        :return: list of the number of snippets per issue or pull request
        :rtype: list
        """
        print('#### Number of Snippets ####')

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        patches_in_discussion = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]

                    if issue not in patches_in_discussion.keys():
                        patches_in_discussion[issue] = 0

                    if '```' in comment['body']:
                        patches = comment['body'].split('```')
                        count = 0
                        aux = 0
                        if issue not in self.patches_size.keys():
                            self.patches_size[issue] = 0
                        for patch in patches:

                            if len(patches) != 1:
                                aux += 1
                                if aux % 2 != 0:
                                    continue

                            self.patches_size[issue] += len(patch)

                            count += 1
                        patches_in_discussion[issue] += count

        number_of_patches_in_discussion = [['issue', 'number_patches']]

        for key in patches_in_discussion.keys():
            number_of_patches_in_discussion.append(
                [key, patches_in_discussion[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_patches_in_discussion.csv',
                      number_of_patches_in_discussion)

        return number_of_patches_in_discussion
Exemple #9
0
    def insert_issues(self):
        database = self.database['issues']

        issues_path = self.path + 'issues/individual/'

        json = JSONHandler(issues_path)
        issues = [f for f in listdir(issues_path) if isfile(join(issues_path, f))]

        for file in issues:

            issue = json.open_json(file)

            if database.find_one({'number': issue['number']}):
                continue

            database.insert_one(issue)
Exemple #10
0
    def insert_pulls(self):
        database = self.database['pull_requests']

        pulls_path = self.path + 'pulls/individual/'

        json = JSONHandler(pulls_path)
        pulls = [f for f in listdir(pulls_path) if isfile(join(pulls_path, f))]

        for file in pulls:

            pull = json.open_json(file)

            if database.find_one({'number': pull['number']}):
                continue

            database.insert_one(pull)
    def get_median_of_number_of_comments(self):
        """
        Collects the median of the number of comments inside an issue or pull requests
        :return: list with the median of the number of comments per issue or pull request
        :rtype: list
        """
        print("#### Median Comments ####")

        mypath = self.path + self.project + '/comments/individual/'
        json = JSONHandler(mypath)

        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        comments_per_issue = {}
        for file in onlyfiles:
            comments = json.open_json(file)
            for comment in comments:
                if 'issue_url' in comment.keys():
                    issue = comment['issue_url'].split('/')
                    issue = issue[len(issue) - 1]
                    if int(issue) not in comments_per_issue:
                        comments_per_issue[int(issue)] = 0
                    comments_per_issue[int(issue)] = comments_per_issue[int(issue)] + 1

        values = []
        median_comments = [['issue', 'median_comments']]
        number_comments = [['id', 'number_comments']]


        for key in sorted(comments_per_issue):
            #print(str(key) + ': ' + str(comments_per_issue[key]))
            values.append(comments_per_issue[key])
            median_comments.append([key, median(values)])
            number_comments.append([key, comments_per_issue[key]])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_median_comments.csv',
                      median_comments)

        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_number_comments.csv',
                      number_comments)

        return number_comments
Exemple #12
0
    def collect_commits_on_pulls(self, owner: str, project: str):
        """
        Collect Commits from Pull Requests from the GitHub API
        :param owner: repository owner
        :type owner: str
        :param project: project name
        :type project: str
        :return: list of commits from pull requests
        :rtype: list
        """
        print('Collecting Pull Requests Commits')

        pulls = []
        mypath = self.config['output_path'] + project + '/pulls/all/'
        json = JSONHandler(mypath)
        commits_json = JSONHandler(self.config['output_path'] + project + '/pulls_commits/commits/')
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        for file in onlyfiles:
            batch = json.open_json(file)
            for pull in batch:
                pulls.append(pull['number'])

        hashs = []

        for pull in pulls:
            if JSONHandler.file_exists(
                    self.config['output_path'] + project + '/pulls_commits/commits/' + str(pull) + '.json'):
                commits_pull = commits_json.open_json(
                    str(pull) + '.json')
                for commit_pull in commits_pull:
                    for commit in commit_pull:
                        hashs.append(commit['sha'])
                continue

            pullsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/pulls/' + str(pull) + '/commits')
            files = pullsEndpoint.collect_batch(False)
            commits_json.save_json(files, str(pull))

        commitsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/commits')
        aux = 1
        for hash in hashs:
            if not hash:
                continue
            commitsEndpoint.collect_single(hash)
            print(str(aux * 100 / len(hashs)) + "%")
            aux = aux + 1

        return hashs
Exemple #13
0
    def insert_commits(self):
        database = self.database['commits']

        commits_path = self.path + 'commits/individual/'

        json = JSONHandler(commits_path)
        commits = [f for f in listdir(commits_path) if isfile(join(commits_path, f))]

        for file in commits:

            commit = json.open_json(file)

            if database.find_one({'sha': commit['sha']}):
                continue

            commit['from_pull'] = False
            commit['pull_origin'] = []

            database.insert_one(commit)
Exemple #14
0
    def insert_events(self):
        database = self.database['events']

        events_path = self.path + 'events/all/'

        json = JSONHandler(events_path)
        events = [f for f in listdir(events_path) if isfile(join(events_path, f))]

        for file in events:

            event_batch = json.open_json(file)
            for event in event_batch:

                if database.find_one({'id': event['id']}):
                    continue

                database.insert_one(event)

        pass
    def get_time_in_days_between_open_and_close(self):
        """
        Collects the time in days between the day an issue or pull request was opened and the day it was closed.
        :return: list of time in days per issue/pull request.
        :rtype: list
        """
        print('#### Discussion Length ####')

        path = self.path + '/' + self.project
        json = JSONHandler(path + '/')
        issues = json.open_json(self.project + '_issues.json')
        pulls = json.open_json(self.project + '_pulls.json')

        days_between = [['number', 'status']]

        date_utils = DateUtils()
        for issue in issues:
            days = 0

            if 'closed' in issue['state']:
                days = date_utils.get_days_between_dates(
                    issue['created_at'], issue['closed_at'])
                # print(issue['author_association'])
                days_between.append([issue['issue_number'], days])

        for pull in pulls:
            days = 0
            if 'closed' in pull['state']:
                if pull['merged_at']:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['merged_at'])
                else:
                    days = date_utils.get_days_between_dates(
                        pull['created_at'], pull['closed_at'])

                days_between.append([pull['pull_request_number'], days])

        csv = CSVHandler()
        csv.write_csv(self.path + '/' + self.project + '/metrics/',
                      self.project + '_discussion_length.csv', days_between)

        return days_between
Exemple #16
0
    def collect_single(self, parameter: str, save: bool = True):
        """
        Collect a single element of the API
        :param parameter: parameter that will be used by the function to know which element it should download
        :type parameter: str
        :param save: if it should persist the json downloaded on the hard drive
        :type save: bool
        :return: json downloaded
        :rtype: dict
        """
        path = self.path + self.repo + self.private_path + 'individual/'
        json = JSONHandler(path)

        if json.file_exists(path + str(parameter) + '.json'):
            return JSONHandler(path).open_json(str(parameter) + '.json')

        request_url = self.api_url + self.owner + '/' + self.repo + self.private_url + '/' + str(
            parameter)
        data = self.apiHandler.request(request_url)
        if not data:
            print(
                'JSON returned empty. Please check your parameters for URL: ' +
                request_url)
            data = []

        if save:
            json.save_json(data, str(parameter))

        return data
Exemple #17
0
 def test_compile_data(self):
     """
     Tests the compile_data function of the MetricsCollector class
     First test analyzes if the CSV returned is not NULL
     Second test analyzes if the length in lines of the CSV is equal to three (header plus two rows)
     Third test analyzes if the identifier of the second row is 1, since is the identifier of the first issue.
     """
     project = 'test_collector'
     config = JSONHandler('../').open_json('config.json')
     collector = MetricsCollector(config['output_path'], project)
     assert (collector.compile_data() is not None)
     assert (len(collector.compile_data()) == 3)
     assert (collector.compile_data()[1][0] == '1')
Exemple #18
0
 def __init__(self,
              owner: str = '',
              repo: str = '',
              private_path: str = '',
              private_url: str = '',
              database: Database = None):
     self.owner = owner
     self.repo = repo
     self.api_url = 'https://api.github.com/repos/'
     config = JSONHandler('../').open_json('config.json')
     self.path = config['output_path']
     self.apiHandler = APICallHandler()
     self.private_path = private_path
     self.private_url = private_url
     self.database = database
Exemple #19
0
    def insert_comments(self):
        database = self.database['comments']

        comments_path = self.path + 'comments/individual/'

        json = JSONHandler(comments_path)
        comments = [f for f in listdir(comments_path) if isfile(join(comments_path, f))]

        for file in comments:

            comment_batch = json.open_json(file)

            for comment in comment_batch:

                if database.find_one({'id': comment['id']}):
                    continue

                issue_number = comment['issue_url'].split('issues/')[1]

                comment['issue_number'] = int(issue_number)

                database.insert_one(comment)

        comments_path = self.path + 'comments/issues/all/'

        json = JSONHandler(comments_path)
        comments = [f for f in listdir(comments_path) if isfile(join(comments_path, f))]

        for file in comments:

            comment_batch = json.open_json(file)

            for comment in comment_batch:

                if database.find_one({'id': comment['id']}):
                    continue

                issue_number = comment['issue_url'].split('issues/')[1]

                comment['issue_number'] = int(issue_number)

                database.insert_one(comment)
Exemple #20
0
    def insert_commits_from_pulls(self):
        database = self.database['commits']

        pulls_commits_path = self.path + 'pulls_commits/commits/'

        json = JSONHandler(pulls_commits_path)
        commits = [f for f in listdir(pulls_commits_path) if isfile(join(pulls_commits_path, f))]

        commit_pulls = {}
        for file in commits:
            commit_batch = json.open_json(file)
            for commit_list in commit_batch:
                for commit in commit_list:
                    if commit['sha'] not in commit_pulls.keys():
                        commit_pulls[commit['sha']] = []

                    commit_pulls[commit['sha']].append(file.split('.')[0])

        pulls_commits_path = self.path + 'pulls_commits/individual/'

        json = JSONHandler(pulls_commits_path)
        commits = [f for f in listdir(pulls_commits_path) if isfile(join(pulls_commits_path, f))]

        for file in commits:

            commit = json.open_json(file)

            if database.find_one({'sha': commit['sha']}):
                database.update_one({'sha': commit['sha']}, {"$set": {'from_pull': True}})

                if commit['sha'] in commit_pulls.keys():
                    database.update_one({'sha': commit['sha']}, {"$set": {'pull_origin': commit_pulls[commit['sha']]}})

                continue

            commit['from_pull'] = True
            commit['pull_origin'] = []

            if commit['sha'] in commit_pulls.keys():
                commit['pull_origin'] = commit_pulls[commit['sha']]

            database.insert_one(commit)
 def __init__(self, project):
     config = JSONHandler('../').open_json('config.json')
     self.project = project
     self.path = config['output_path']
     self.users_comments = self._get_users_labels_in_comments()
     self.users_issues = self._get_users_labels_in_issues_and_pulls()
 def __init__(self, project):
     config = JSONHandler('../').open_json('config.json')
     self.project = project
     self.path = config['output_path']
Exemple #23
0
 def __init__(self):
     self.position = 0
     self.config = JSONHandler('./').open_json('config.json')
     self.tokens_len = len(self.config['tokens'])
     self.username = self.config['tokens'][self.position]['username']
     self.auth_token = self.config['tokens'][self.position]['token']
Exemple #24
0
    def __init__(self):

        json_handler = JSONHandler('C:/Users/gurio/PycharmProjects/GHPyFramework/')

        self.config = json_handler.open_json('config.json')
        self.projects = self.config['projects']
Exemple #25
0
 def __init__(self):
     self.config = JSONHandler('').open_json('config.json')
     self.projects = self.config['projects']
Exemple #26
0
 def __init__(self, database: Database, project: str):
     self.database = database
     self.project = project
     config = JSONHandler('./').open_json('config.json')
     self.path = config['output_path'] + self.project + '/'