class OpenElections: def __init__(self): self.github = GitHub() self.openelex = self.github.organization('openelections') def repos_report(self): repos = [] for repo in self.repos(): payload = { 'state': '', 'repo_type': '', 'repo_name': repo.name, 'open_issues': repo.open_issues_count, 'url': repo.url, } match = re.search(r'openelections-(sources|data|results)-(\w{2})$', repo.name) if match: repo_type, state = match.groups() payload.update({'repo_type': repo_type, 'state': state.upper()}) repos.append(payload) return repos def issues_report(self): issues = [] for repo in self.repos_with_issues(): for issue in repo.issues: payload = { 'state': '', 'repo_type': '', 'repo_name': repo.name, 'number': issue.number, 'title': issue.title, 'assignee': ','.join(issue.assignees), 'url': issue.url, 'created_date': self._simple_date(issue.created_at), 'created_by': issue.author, 'labels': ','.join(issue.labels), } match = re.search(r'openelections-(sources|data|results)-(\w{2})$', repo.name) if match: repo_type, state = match.groups() payload.update({'repo_type': repo_type, 'state': state.upper()}) issues.append(payload) return issues def repos(self): query = ReposQuery() repos = query.run() return repos def repos_with_issues(self): query = ReposWithIssuesQuery() repos = query.run() return repos def _simple_date(self, date_str): return datetime.datetime\ .strptime(date_str,"%Y-%m-%dT%H:%M:%SZ")\ .strftime("%Y-%m-%d")
def resolve_team_members( github_team_name: str, github_api: GitHub, ) -> typing.Union[typing.Generator[str, None, None], list]: not_none(github_team_name) org_name, team_name = github_team_name.split('/') # always of form 'org/name' organisation = github_api.organization(org_name) # unfortunately, we have to look-up the team (no api to retrieve it by name) team_or_none = _first(filter(lambda team: team.slug == team_name, organisation.teams())) if not team_or_none: logger.warning('failed to lookup team {t}'.format(t=team_name)) return [] for member in map(github_api.user, team_or_none.members()): if member.email: yield member.email else: logger.warning(f'no email found for GitHub user {member}')
def main(): user = input('GitHub username: '******'GitHub password: '******'''Given a function and some arguments, tries to execute the function or waits until the ratelimit is reset.''' success = False while not success: try: result = function(*args, **kargs) success = True except GitHubError as e: if e.code == 403: # rate limit exceeded reset = gh.rate_limit()['rate']['reset'] wait_time = int(reset - time.time()) + 1 print("Not enough API calls. Waiting for", int(wait_time / 60), "minutes and", wait_time % 60, "seconds") time.sleep(wait_time) else: raise e return result # get awesome developers devmine = call_or_wait( lambda: gh.organization("DevMine").iter_public_members()) # the lambda is used to make the call lazy, as there are multiple API # calls involved in the statement devs = {dev.login for dev in devmine} # get random developers for repo in call_or_wait(gh.iter_all_repos, number=settings.REPOS_COUNT, since=settings.REPOS_SINCE_ID): # skip organizations and forked repository to find users that actually # added new content if repo.private or repo.fork or (repo.owner.type == 'Organization'): continue devs.add(repo.owner.login) print(len(devs), "developers fetched") fpu = codecs.open(settings.USERS_DATASET, 'a', 'utf-8') fpr = codecs.open(settings.REPOS_DATASET, 'a', 'utf-8') # dump developers and their repositories users_processed = 0 for dev in devs: u = call_or_wait(gh.user, dev) json.dump(u.to_json(), fpu) fpu.write("\n") users_processed += 1 if users_processed % 100 == 0: print("Repos feched for", users_processed, "developers") for repo in call_or_wait(gh.iter_user_repos, u.login): json.dump(repo.to_json(), fpr) fpr.write("\n") fpu.close() fpr.close()
def __init__(self, gh: GitHub, organization: str) -> None: self._gh = gh self._org = gh.organization(organization)
def main(): user = input('GitHub username: '******'GitHub password: '******'''Given a function and some arguments, tries to execute the function or waits until the ratelimit is reset.''' success = False while not success: try: result = function(*args, **kargs) success = True except GitHubError as e: if e.code == 403: # rate limit exceeded reset = gh.rate_limit()['rate']['reset'] wait_time = int(reset - time.time()) + 1 print("Not enough API calls. Waiting for", int(wait_time / 60), "minutes and", wait_time % 60, "seconds") time.sleep(wait_time) else: raise e return result # get awesome developers devmine = call_or_wait(lambda: gh.organization("DevMine").iter_public_members()) # the lambda is used to make the call lazy, as there are multiple API # calls involved in the statement devs = {dev.login for dev in devmine} # get random developers for repo in call_or_wait(gh.iter_all_repos, number=settings.REPOS_COUNT, since=settings.REPOS_SINCE_ID): # skip organizations and forked repository to find users that actually # added new content if repo.private or repo.fork or (repo.owner.type == 'Organization'): continue devs.add(repo.owner.login) print(len(devs), "developers fetched") fpu = codecs.open(settings.USERS_DATASET, 'a', 'utf-8') fpr = codecs.open(settings.REPOS_DATASET, 'a', 'utf-8') # dump developers and their repositories users_processed = 0 for dev in devs: u = call_or_wait(gh.user, dev) json.dump(u.to_json(), fpu) fpu.write("\n") users_processed += 1 if users_processed % 100 == 0: print("Repos feched for", users_processed, "developers") for repo in call_or_wait(gh.iter_user_repos, u.login): json.dump(repo.to_json(), fpr) fpr.write("\n") fpu.close() fpr.close()
'invenio-search': '1cd5740aae6022ffb0a781c3d63dd3b26b83dc61', 'invenio-sequencegenerator': '1b55943e1f5e8dbf992b9d44412b2a783c415068', 'invenio-webhooks': '43e2f1e7670781d57f7d1cf9c03bdf91f9afe23b', } link_search_name = 'https://github.com/inveniosoftware/{0}/search?q=author-name:{1}&type=Commits' link_search_login = '******' with open('./people.yaml') as f: people = yaml.load(f) cern_people = set(p for k, g in people.items() if 'cern' in k for p in g) external = defaultdict(lambda: defaultdict(list)) g = GitHub(token='<token>') org = g.organization('inveniosoftware') for repo in org.repositories(): if repo.fork or 'flask' in repo.name.lower(): # Remove forks and flask packages # flask-menu # flask-breadcrumbs # flask-sso # flask-iiif # flask-sitemap # flask-cli # flask-celeryext # flask-security-fork # flask-webpackext continue if repo.name in base_commits:
class GithubAuditor(): """ Implements Github3py client and manages connections and credentials """ def __init__(self): self.load_app_credentials() # self.github = GitHub() self.github = GitHub(self.user, token=self.token) # TODO Can we use client credentials somehow with something like the below? # print(f"Connecting to API as app: {self.app_id}") # self.github.login_as_app_installation( # self.private_key_bytes, # self.app_id, # self.install_id # ) def load_app_credentials(self): """ Load credentials defined in environment variables """ if "AUTOSNYK_APP" in os.environ: self.app_id = int(os.environ["AUTOSNYK_APP"]) if "AUTOSNYK_INSTALL" in os.environ: self.install_id = int(os.environ["AUTOSNYK_INSTALL"]) if "AUTOSNYK_USER" in os.environ: self.user = os.environ["AUTOSNYK_USER"] if "AUTOSNYK_TOKEN" in os.environ: self.token = os.environ["AUTOSNYK_TOKEN"] if "AUTOSNYK_KEY" in os.environ: key_path = os.environ["AUTOSNYK_KEY"] with open(key_path, "rb") as key_file: self.private_key = serialization.load_pem_private_key( key_file.read(), password=None, backend=default_backend()) self.private_key_bytes = self.private_key.private_bytes( encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.PKCS8, encryption_algorithm=serialization.NoEncryption()) def usage(self): say = "python app.py get_repos [org_name]" print(self.github.octocat(say)) def get_repos(self, org_name): print(f"get_repos for {org_name}") by_status = {"open": [], "private": []} for repo in self.github.repositories_by(org_name): # print(repo.as_json()) # print(repo.name) if repo.private: by_status["private"].append(repo) else: by_status["open"].append(repo) for status, repos in by_status.items(): count = str(len(repos)) print(f"{status}: {count}") def get_org(self, org_name): org = self.github.organization(org_name) return org def get_org_teams(self, org_name): org = self.get_org(org_name) teams = org.teams() # for team in teams: # #print(team.as_json()) # print(team.slug) return teams def get_team_repos(self, org_name, team_slug): teams = self.get_org_teams(org_name) repos = [] for team in teams: if team.slug == team_slug: repos = team.repositories() return repos def list_team_repos(self, org_name, team_slug): repos = self.get_team_repos(org_name, team_slug) for repo in repos: print(repo.name) def clone_team_repos(self, org_name, team_slug): initial_directory = os.getcwd() repos = self.get_team_repos(org_name, team_slug) checkout_into = f"repos/{org_name}/{team_slug}" os.makedirs(checkout_into, exist_ok=True) cloned = [] for repo in repos: is_checked_out = os.path.exists(f"{checkout_into}/{repo.name}") if not repo.private: # if it's already there make sure it's current if is_checked_out: os.chdir(checkout_into) print(os.getcwd()) # os.system("git reset --hard origin/master") # os.system("git pull") os.chdir(initial_directory) else: self.clone_repo(repo, checkout_into) cloned.append(repo) return cloned def clone_repo(self, repo, checkout_into): clone_url = repo.clone_url print(clone_url) return_to = os.getcwd() os.chdir(checkout_into) os.system(f"git clone {clone_url}") os.chdir(return_to) def empty_team_repos(self, org_name, team_slug): checkout_into = f"repos/{org_name}/{team_slug}" #os.removedirs(checkout_into) shutil.rmtree(checkout_into) def say(self, text): self.github.octocat(say=text)