def create_github_release( repository: Repository, version: str, artifacts: Set[Path], ) -> None: """ Create a tag and release on GitHub. """ changelog_url = 'https://dcos-e2e.readthedocs.io/en/latest/changelog.html' release_name = 'Release ' + version release_message = 'See ' + changelog_url github_release = repository.create_git_tag_and_release( tag=version, tag_message='Release ' + version, release_name=release_name, release_message=release_message, type='commit', object=repository.get_commits()[0].sha, draft=True, ) for artifact_path in artifacts: github_release.upload_asset( path=str(artifact_path), label=artifact_path.name, ) github_release.update_release( name=release_name, message=release_message, draft=False, )
def get_last_commit_date(input_repo: Repository): """ get latest commit from repo :param input_repo: :return: """ page = input_repo.get_commits().get_page(0)[0] return page.commit.author.date
def get_contributors(repo: Repository, path: str, branch_name: str): """Gets a sorted list of contributors to a file in a repository.""" commit_list = repo.get_commits(sha=branch_name, path=path) # Remove duplicate users and map users to their commit counts. unique_users = defaultdict(int) for commit in commit_list: unique_users[commit.author] += 1 # Sort users by commit count, name them. return sorted(list(unique_users.keys()), key=lambda user: unique_users[user], reverse=True)
def create_github_release( repository: Repository, version: str, ) -> None: """ Create a tag and release on GitHub. """ changelog_url = 'https://dcos-e2e.readthedocs.io/en/latest/changelog.html' release_name = 'Release ' + version release_message = 'See ' + changelog_url github_release = repository.create_git_tag_and_release( tag=version, tag_message='Release ' + version, release_name=release_name, release_message=release_message, type='commit', object=repository.get_commits()[0].sha, draft=False, ) # The artifacts we build must be built from the tag we just created. # This tag is created remotely on GitHub using the GitHub HTTP API. # # We fetch all tags from GitHub and set our local HEAD to the latest master # from GitHub. # # One symptom of this is that ``minidcos --version`` from the PyInstaller # binary shows the correct version. local_repository = Repo('.') client = HttpGitClient(repository.owner.html_url) remote_refs = client.fetch(repository.name + '.git', local_repository) # Update the local tags and references with the remote ones. for key, value in remote_refs.items(): local_repository.refs[key] = value # Advance local HEAD to remote master HEAD. local_repository[b'HEAD'] = remote_refs[b'refs/heads/master'] # We need to make the artifacts just after creating a tag so that the # --version output is exactly the one of the tag. # No tag exists when the GitHub release is a draft. # This means that temporarily we have a release without binaries. linux_artifacts = make_linux_binaries(repo_root=Path('.')) for installer_path in linux_artifacts: github_release.upload_asset( path=str(installer_path), label=installer_path.name, )
def create_github_release( repository: Repository, version: str, ) -> None: """ Create a tag and release on GitHub. """ repository.create_git_tag_and_release( tag=version, tag_message='Release ' + version, release_name='Release ' + version, release_message='See CHANGELOG.rst', type='commit', object=repository.get_commits()[0].sha, )
def get_last_edit_date(repo: Repository, path: str, branch_name: str): """Gets a file's last edit date.""" return max([ commit.commit.author.date.date() for commit in repo.get_commits(sha=branch_name, path=path) ])
def analyze_github_file(repo: Repository, path_to_file: str, context: DocCheckerContext) -> FileAnalysis: """ This will actually load the file and the commit information to get things like if it was changed recently and who the owner (is taken from the frontmatter). :param context: :param repo: The repo object in the API client :param path_to_file: The path to the file in the repo (as in "lib/myfile.md") :return: """ analysis = FileAnalysis() info(f"Checking file {path_to_file}...") try: commits = repo.get_commits(path=path_to_file) no_earlier_than = datetime.datetime.now() - datetime.timedelta( days=context.doc_is_stale_after_days) if commits.totalCount > 0: commit_date = commits[0].commit.committer.date analysis.file_changed_recently = commit_date >= no_earlier_than analysis.last_change = commit_date analysis.changed_by_email = commits[0].commit.committer.email analysis.changed_by_name = commits[0].commit.committer.name content = repo.get_contents(path_to_file, ref=context.github_branch) analysis.file_link = content.html_url analysis.file_identifier = path_to_file if content.decoded_content: doc = frontmatter.loads(content.decoded_content) if not doc and not doc.metadata: error( f"There was a problem when reading the frontmatter for {path_to_file}", 1) else: if 'title' in doc.metadata: analysis.doc_name = doc.metadata['title'] else: analysis.doc_name = path_to_file if 'owner' in doc.metadata: analysis.owner = doc.metadata['owner'] try: valid = validate_email(analysis.owner) analysis.owner = valid.email except EmailNotValidError as e: warning( f"Found an owner but the email {analysis.owner} is not valid: " + str(e), 1) analysis.owner = None info(f"Owner: {analysis.owner if analysis.owner else 'Not found'}", 1) info( f"Changed On: {analysis.last_change if analysis.last_change else 'Not found'}", 1) info(f"Is Stale: {'No' if analysis.file_changed_recently else 'Yes'}", 1) info( f"Changed By: {analysis.changed_by_email if analysis.changed_by_email else 'Not found'}", 1) except Exception as e: error(f"Unable to load analysis due to exception: {str(e)} ", 1) return analysis
def normalize_release(self, component: Component, data: GitRelease, top_data: Repository) -> bool: """ Normalize GitHub data to our schema and save it to the database. Params: component: the Component we're tying this all to. data: the version data from the NPM registry. """ if component is None: raise ValueError("Missing component.") if data is None: raise ValueError("Missing data.") if top_data is None: raise ValueError("Missing top_data.") version, created = ComponentVersion.objects.get_or_create( component=component, version=data.tag_name, ) # type: ComponentVersion, bool # Data Source version.update_metadata(MetadataType.SOURCE, "data-source", "api.github.com") if created: logger.debug("Adding GitHub: %s@%s", component.name, data.tag_name) else: logger.debug("Reloading GitHub: %s@%s", component.name, data.tag_name) version.description = data.body version.maintainers.clear() author = data.author maintainer, _ = Maintainer.objects.get_or_create( metadata__SOURCE__contains={ "scoped-username.github": author.login }) # type: Maintainer, bool maintainer.add_name(author.name) maintainer.add_email(author.email) maintainer.update_metadata(MetadataType.SOURCE, "twitter_username", author.twitter_username) maintainer.update_metadata(MetadataType.SOURCE, "avatar_url", author.avatar_url) maintainer.save() version.maintainers.add(maintainer) # Additional maintainers year_ago = timezone.now() - datetime.timedelta(days=365) year_ago = year_ago.replace(hour=0, minute=0, second=0, microsecond=0) # cache-friendly commits = top_data.get_commits(since=year_ago) seen_commits = set() for commit in commits: if commit.author.login in seen_commits: continue seen_commits.add(commit.author.login) maintainer, _ = Maintainer.objects.get_or_create( metadata__SOURCE__contains={ "scoped-username.github": commit.author.login }) maintainer.add_name(commit.author.name) maintainer.add_email(commit.author.email) maintainer.update_metadata(MetadataType.SOURCE, "avatar_url", commit.author.avatar_url) maintainer.save() version.maintainers.add(maintainer) # Add relevant URLs test urls = [] if url := check_url(top_data.homepage): urls.append( Url.objects.get_or_create(url_type=UrlType.HOME_PAGE, url=url)[0])
def get_commits(r: github.Repository, data_dir: str): commits = dict() all_commits = r.get_commits() total = all_commits.totalCount cur = 0 for c in all_commits: cur += 1 if cur == 1 or cur % 10 == 0: print("Processing commit {} of {}".format(cur, total), file=sys.stderr) wait(verbose=True) else: wait() if c.author != None: login = c.author.login else: print("Skipping commit {} due to unknown author".format(c.sha), file=sys.stderr) continue if login not in commits.keys(): commits[login] = { "num_commits": 0, "loc_additions": 0, "loc_deletions": 0, "loc_changes": 0, "num_files": 0, "loc_times_files": 0, "loc_pow_files": 0, "msg_wordcount": 0, "msg_vocab": set(), "msg_vocab_filescope": 0, "patch_wordcount": 0, "patch_vocab": set(), "patch_vocab_filescope": 0, } data = commits[login] data["num_commits"] += 1 data["loc_additions"] += c.stats.additions data["loc_deletions"] += c.stats.deletions data["loc_changes"] += c.stats.total data["num_files"] += len(c.files) data["loc_times_files"] += c.stats.total * len(c.files) data["loc_pow_files"] += c.stats.total**len(c.files) data["msg_wordcount"] += len(c.commit.message.split()) data["msg_vocab"].update(set(c.commit.message.split())) data["msg_vocab_filescope"] += len(set(c.commit.message.split())) for f in c.files: if f.patch != None: data["patch_wordcount"] += len(f.patch.split()) data["patch_vocab"].update(set(f.patch.split())) data["patch_vocab_filescope"] += len(set(f.patch.split())) # Convert vocab sets to numbers for login in commits.keys(): commits[login]["msg_vocab"] = len(commits[login]["msg_vocab"]) commits[login]["patch_vocab"] = len(commits[login]["patch_vocab"]) filename = os.path.join(data_dir, "commits.json") with open(filename + ".part", mode="w") as f: json.dump(commits, f) os.rename(filename + ".part", filename)
def import_repo(repo: Repository): repo_id = repo.full_name if repo_id is None: return repo, [] with graph_lock: if repo_id in g: return repo, [] if repo.fork and repo.parent and repo.parent.full_name and repo.owner: import_repo(repo.parent) link_user(repo.parent.full_name, repo.owner.login, relation='fork', fork_source=repo_id, date=repo.created_at.isoformat()) language = repo.language or '?' weight = repo.watchers_count or 0 with graph_lock: g.add_node(repo_id, bipartite=0, language=language, weight=weight, date=repo.updated_at.isoformat()) repo_forks = [] try: if since is None: if repo.owner is not None: link_user(repo_id, repo.owner.login, relation='owner', date=repo.pushed_at.isoformat()) contributors = repo.get_contributors() for user in contributors: link_user(repo_id, user.login or user.email, relation='contributor') else: commits = [ x for x in repo.get_commits(since=since) if x.author and x.commit.author ] commits = sorted(commits, key=lambda x: x.commit.author.date) for commit in commits: date: datetime = commit.commit.author.date link_user(repo_id, commit.author.login or commit.author.email, relation="committer", date=date.isoformat()) repo_forks = list(repo.get_forks()) except RateLimitExceededException: with graph_lock: g.remove_node(repo_id) raise except GithubException: with graph_lock: g.remove_node(repo_id) except Exception: raise return repo, repo_forks