Esempio n. 1
0
    def collect_batch(self, save: bool = True):
        """
        Collect several groups of 30 elements returned by the API until the pages return an empty JSON
        :param save: if it should persist the json downloaded on the hard drive
        :type save: bool
        :return: list of elements returned by the API
        :rtype: list
        """
        request_url = self.api_url + self.owner + '/' + self.repo + self.private_url + '?page='
        path = self.path + self.repo + self.private_path + 'all/'
        page = 1
        data_list = []
        json = JSONHandler(path)
        while True:
            if json.file_exists(path + str(page) + '.json'):
                page = page + 1
                continue

            data = self.apiHandler.request(request_url + str(page))

            if not data:
                break

            data_list.append(data)

            if save:
                json.save_json(data, str(page))
            page = page + 1

        return data_list
Esempio n. 2
0
    def collect_single(self, parameter: str, save: bool = True):
        """
        Collect a single element of the API
        :param parameter: parameter that will be used by the function to know which element it should download
        :type parameter: str
        :param save: if it should persist the json downloaded on the hard drive
        :type save: bool
        :return: json downloaded
        :rtype: dict
        """
        path = self.path + self.repo + self.private_path + 'individual/'
        json = JSONHandler(path)

        if json.file_exists(path + str(parameter) + '.json'):
            return JSONHandler(path).open_json(str(parameter) + '.json')

        request_url = self.api_url + self.owner + '/' + self.repo + self.private_url + '/' + str(
            parameter)
        data = self.apiHandler.request(request_url)
        if not data:
            print(
                'JSON returned empty. Please check your parameters for URL: ' +
                request_url)
            data = []

        if save:
            json.save_json(data, str(parameter))

        return data
Esempio n. 3
0
    def collect_commits_on_pulls(self, owner: str, project: str):
        """
        Collect Commits from Pull Requests from the GitHub API
        :param owner: repository owner
        :type owner: str
        :param project: project name
        :type project: str
        :return: list of commits from pull requests
        :rtype: list
        """
        print('Collecting Pull Requests Commits')

        pulls = []
        mypath = self.config['output_path'] + project + '/pulls/all/'
        json = JSONHandler(mypath)
        commits_json = JSONHandler(self.config['output_path'] + project + '/pulls_commits/commits/')
        onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

        for file in onlyfiles:
            batch = json.open_json(file)
            for pull in batch:
                pulls.append(pull['number'])

        hashs = []

        for pull in pulls:
            if JSONHandler.file_exists(
                    self.config['output_path'] + project + '/pulls_commits/commits/' + str(pull) + '.json'):
                commits_pull = commits_json.open_json(
                    str(pull) + '.json')
                for commit_pull in commits_pull:
                    for commit in commit_pull:
                        hashs.append(commit['sha'])
                continue

            pullsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/pulls/' + str(pull) + '/commits')
            files = pullsEndpoint.collect_batch(False)
            commits_json.save_json(files, str(pull))

        commitsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/commits')
        aux = 1
        for hash in hashs:
            if not hash:
                continue
            commitsEndpoint.collect_single(hash)
            print(str(aux * 100 / len(hashs)) + "%")
            aux = aux + 1

        return hashs