class Commit(Entity): """Commit entity class.""" entity_schema = Schema({ "pull_request": Any(None, int), "patch": Schema({str: str}), "author": str, "message": str, "date": int, "additions": int, "deletions": int, }) def analyse(self) -> List[GithubCommit]: """Override :func:`~Entity.analyse`.""" return [ c for c in self.get_raw_github_data() if c.sha not in self.previous_knowledge ] def store(self, commit: GithubCommit): """Override :func:`~Entity.store`.""" pull_request_id = commit.get_pulls( )[0].number if commit.get_pulls().totalCount != 0 else None self.stored_entities[commit.sha] = { "pull_request": pull_request_id, "patch": Commit.get_patches_for_files(commit), "author": commit.author.login if commit.author else commit.commit.author.name, "message": commit.commit.message, "date": int( datetime.strptime(commit.last_modified, "%a, %d %b %Y %X %Z").timestamp()), "additions": commit.stats.additions, "deletions": commit.stats.deletions, } @staticmethod def get_patches_for_files(commit: GithubCommit) -> Dict[str, str]: """Inspect whole patch according to specific files.""" return {f.filename: f.patch for f in commit.files} def get_raw_github_data(self) -> List[GithubCommit]: """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_commits()
class Commit(Entity): """Commit entity class.""" entity_schema = Schema({ "pull_request": Any(None, int), "patch": Schema({str: Any(None, str)}), "author": str, "message": str, "date": int, "additions": int, "deletions": int, }) def analyse(self): """Override :func:`~Entity.analyse`.""" length = len([c for c in self.get_raw_github_data()]) return GeneratorWrapper(self.get_raw_github_data(), length) def store(self, commit: GitCommit): """Override :func:`~Entity.store`.""" if commit.hash in self.previous_knowledge: return github_commit = self.repository.get_commit(commit.hash) pull_request_ids = [pr.number for pr in github_commit.get_pulls()] author_login = (github_commit.author.login if github_commit.author else github_commit.commit.author.name, ) patches = {} for mod in commit.modified_files: changed_methods = [method.name for method in mod.changed_methods] patches[mod.filename] = { "type": mod.change_type, "changed_methods": changed_methods, "patch_added": mod.diff_parsed["added"], "patch_deleted": mod.diff_parsed["deleted"], } self.stored_entities[commit.hash] = { "pull_request": pull_request_ids, "author": author_login, "message": commit.msg, "date": commit.committer_date.timestamp(), "additions": commit.insertions, "deletions": commit.deletions, "files": commit.files, } def get_raw_github_data(self) -> List[GitCommit]: """Override :func:`~Entity.get_raw_github_data`.""" return Repository(self.repository.clone_url).traverse_commits()
class ReadMe(Entity): """GitHub ReadMe entity.""" entity_schema = Schema({"name": str, "path": str, "content": str, "type": str, "license": str, "size": int,}) def analyse(self) -> List[GithubContentFile]: """Override :func:`~Entity.analyse`.""" # TODO: recursive Readme analysis - is that a good idea? if self.previous_knowledge is None or len(self.previous_knowledge) == 0: return [self.get_raw_github_data()] if self.previous_knowledge["readme"]["size"] == self.repository.get_readme().size: return [] return [self.get_raw_github_data()] def store(self, content_file: GithubContentFile): """Override :func:`~Entity.store`.""" self.stored_entities[content_file.path] = { "name": content_file.name, "path": content_file.path, "content": content_file.decoded_content.decode("utf-8"), "type": content_file.type, "license": content_file.license, "size": content_file.size, } def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_readme()
class validator: idSchema = Schema(All(str, Length(min=24, max=24))) jsStrSchema = Schema(str) jsIntSchema = Schema(int) @classmethod def validateId(cls, sid): return cls.idSchema(sid) @classmethod def validateData(cls, data): cls.idSchema(sid) cls.jsStrSchema(data["title"]) cls.jsIntSchema(data["price"]) cls.jsIntSchema(data["inventory_count"]) return
class validator: idSchema = Schema(All(str, Length(min=24, max=24))) jsSchema = Schema(str) @classmethod def validateId(cls, sid): return cls.idSchema(sid) @classmethod def validatePut(cls, sid, data): cls.jsSchema(data) cls.idSchema(sid) return @classmethod def validateData(cls, data): cls.jsSchema(data) return
class Issue(Entity): """GitHub Issue entity.""" entity_schema = Schema({ "created_by": str, "created_at": int, "closed_by": Any(None, str), "closed_at": Any(None, int), "labels": { str: { str: Any(int, str) } }, "interactions": { str: int }, }) def analyse(self) -> PaginatedList: """Override :func:`~Entity.analyse`.""" return self.get_only_new_entities() def store(self, issue: GithubIssue): """Override :func:`~Entity.store`.""" if issue.pull_request is not None: return # we analyze issues and prs differentely self.stored_entities[str(issue.number)] = { "created_by": issue.user.login, "created_at": int(issue.created_at.timestamp()), "closed_by": issue.closed_by.login if issue.closed_by is not None else None, "closed_at": int(issue.closed_at.timestamp()) if issue.closed_at is not None else None, "labels": GitHubKnowledge.get_labels(issue), "interactions": GitHubKnowledge.get_interactions(issue.get_comments()), } def previous_knowledge(self): """Override :func:`~Entity.previous_knowledge`.""" return self.previous_knowledge def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return [ issue for issue in self.repository.get_issues(state="all") if not issue.pull_request ]
def update_is_mute(master_user_id=None, thread_id=None): data_request = request.get_json() schema = Schema({Required("is_muted"): All(int, validate_is_active)}) try: schema(data_request) except MultipleInvalid as e: return multiple_invalid_response(e) is_muted = data_request.get('is_muted') thread = UserMessageThreadParticipantsDelegate(master_user_id, thread_id) if thread.update_is_muted(is_muted): response_data = SUCCESS.copy() response_data["message"] = UPDATED response = generic_success_response(response_data) else: response = ERROR_RESPONSE return response
def add_thread_participants(master_user_id=None, thread_id=None): data_request = request.get_json() schema = Schema( {Required("participant_list"): All(validate_message_participant_list)}) try: schema(data_request) except MultipleInvalid as e: return multiple_invalid_response(e) participant_list = data_request.get('participant_list') thread_participant = UserMessageThreadParticipantsDelegate( master_user_id, thread_id) if thread_participant.add_participants(participant_list): response_data = SUCCESS.copy() response_data["message"] = ADDED response = generic_success_response(response_data) else: response = INVALID_RESPONSE return response
class DependencyUpdate(Entity): """Python Dependency update entity. Any change (git commit) that was made into the Pipfile.lock file is considered a dependency update. It could be either manual (by contributor commiting to the file) or automatic (done by bot). """ entity_schema = Schema({"user": str, "date": int}) def analyse(self) -> List[Any]: """Override :func:`~Entity.analyse`.""" if self.previous_knowledge is None: return self.get_raw_github_data() return [ commit for commit in self.get_raw_github_data() if commit.sha not in self.previous_knowledge.keys() ] def store(self, commit: Commit): """Override :func:`~Entity.store`.""" self.stored_entities[commit.sha] = { "user": self.get_author(commit), "date": int(commit.commit.author.date.timestamp()), } def get_author(self, commit): """Get author login of the commit.""" prs = commit.get_pulls() commit_author = commit.author if prs.totalCount: return prs[0].user.login if commit_author: return commit_author.login return commit.commit.author.name def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_commits(path="Pipfile.lock")
def add_message(master_user_id=None, thread_id=None): data_request = request.get_json() schema = Schema({ Required("message_text"): All(str, Length(min=1, max=400)), Required("has_attachment"): All(int, validate_is_active), Optional("attachment_url"): Url }) try: schema(data_request) except MultipleInvalid as e: return multiple_invalid_response(e) message_text = data_request.get('message_text') has_attachment = data_request.get('has_attachment') try: attachment_url = data_request.get('attachment_url') except: attachment_url = "" message = UserMessageDelegate(master_user_id, thread_id) message_id = message.add(message_text, has_attachment, attachment_url) if message_id is not None: message_status = UserMessageStatusDelegate(master_user_id, thread_id) if message_status.add(message_id): thread = UserMessageThreadsDelegate( thread_id) # update threads last message updated_last_user_message = thread.update_last_user_message( message_id) thread_participants = UserMessageThreadParticipantsDelegate( master_user_id, thread_id) # update thread for new message info updated_thread_participant_new_message = thread_participants.update_thread_new_message( ) if updated_last_user_message and updated_thread_participant_new_message: response_data = SUCCESS.copy() # return success response response_data["message"] = ADDED response = generic_success_response(response_data) else: response = FAILURE_RESPONSE else: response = ERROR_RESPONSE else: response = ERROR_RESPONSE return response
class Fork(Entity): """Fork class.""" entity_schema = Schema(int) def analyse(self) -> List[Repository]: """Override :func:`~Entity.analyse`.""" return [ repo for repo in self.get_raw_github_data() if repo.owner.login not in self.previous_knowledge.keys() ] def store(self, fork: Repository): """Override :func:`~Entity.store`.""" self.stored_entities[fork.owner.login] = int( fork.created_at.timestamp()) def get_raw_github_data(self) -> List[Repository]: """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_forks()
class Stargazer(Entity): """Repository Stargazer entity.""" entity_schema = Schema(int) def analyse(self) -> List[Any]: """Override :func:`~Entity.analyse`.""" return [ s for s in self.get_raw_github_data() if s.user.login not in self.previous_knowledge ] def store(self, stargazer: GithubStargazer): """Override :func:`~Entity.store`.""" self.stored_entities[stargazer.user.login] = int( stargazer.starred_at.timestamp()) def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_stargazers_with_dates()
class ReadMe(Entity): """GitHub ReadMe entity.""" entity_schema = Schema({"name": str, "path": str, "content": str, "type": str, "size": int}) def analyse(self) -> List[GithubContentFile]: """Override :func:`~Entity.analyse`.""" # TODO: recursive Readme analysis - is that a good idea? readme = self.get_raw_github_data() if self.previous_knowledge is None or len(self.previous_knowledge) == 0: return [readme] if not readme or ( "README" in self.previous_knowledge and readme.decoded_content.decode("utf-8") == self.previous_knowledge["README"]["content"] and readme.path == self.previous_knowledge["README"]["path"] ): return [] return [readme] def store(self, content_file: GithubContentFile): """Override :func:`~Entity.store`.""" last_modified = int(datetime.strptime(content_file.last_modified, "%a, %d %b %Y %X %Z").timestamp()) self.stored_entities["README"] = { "name": content_file.name, "path": content_file.path, "content": content_file.decoded_content.decode("utf-8"), "type": content_file.type, "size": content_file.size, "date": last_modified, } def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" try: return self.repository.get_readme() except UnknownObjectException: return []
class CodeFrequency(Entity): """Code frequency statistics entity.""" entity_schema = Schema({"additions": int, "deletions": int}) def analyse(self) -> List[Any]: """Override :func:`~Entity.analyse`.""" return [ s for s in self.get_raw_github_data() if str(s.week.timestamp()) not in self.previous_knowledge.keys() ] def store(self, stats: StatsCodeFrequency): """Override :func:`~Entity.store`.""" self.stored_entities[str(stats.week.timestamp())] = { "additions": stats.additions, "deletions": stats.deletions, } def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_stats_code_frequency()
class PullRequestDiscussion(Entity): """Pull Request Discussion Entity class.""" entity_schema = Schema([{ "user": str, "text": str, "created_at": int, "reactions": list }]) def analyse(self) -> PaginatedList: """Override :func:`~Entity.analyse`.""" return self.get_only_new_entities() def store(self, github_entity: PullRequest): """Override :func:`~Entity.store`.""" self.stored_entities[str( github_entity.number)] = self.__class__.get_conversations( github_entity) @staticmethod def get_conversations(pull_request: PullRequest): """Get conversations for a pull_request.""" conversations = [] for c in pull_request.get_issue_comments(): comment = { "user": c.user.login, "text": c.body, "created_at": int(c.created_at.timestamp()), "reactions": [r.content for r in c.get_reactions()], } conversations.append(comment) return conversations def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_pulls(state="closed")
class TemplateEntity(Entity): """Template entity. Serves as a skelet for implementing a new entity so the contributor does not have to spend time copying everything from interface class. For further inspiration look at other implemented entities like Issue or PullRequest. """ entity_schema = Schema({"extracted_information": Any}) def analyse(self) -> List[Any]: """Override :func:`~Entity.analyse`.""" def store(self, github_entity): """Override :func:`~Entity.store`.""" self.stored["key"] = { "extracted_information": github_entity.attribute, } def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_entity()
def __init__(self, validators, msg=None, **kwargs): self.validators = validators self.msg = msg self._schemas = [Schema(val, **kwargs) for val in validators]
class Release(Entity): """Release entity.""" entity_schema = Schema({ "release_date": int, "note": str, }) def analyse(self) -> List[Any]: """Override :func:`~Entity.analyse`.""" return [ tag for tag in self.get_raw_github_data() if tag.commit.sha not in self.previous_knowledge ] def store(self, release: Union[Tag, GitRelease]): """Override :func:`~Entity.store`.""" is_tag = issubclass(Tag, type(release)) version_name = release.name name = version_name[1:] if len( version_name) > 0 and version_name[0] == "v" else version_name try: version = VersionInfo.parse(name) except ValueError: _LOGGER.info("Found tag is not a valid release, skipping") return self.stored_entities[release.commit.sha] = { "major": version.major, "minor": version.minor, "patch": version.patch, "prerelease": version.prerelease, "build": version.build, "release_date": self.__class__.get_tag_release_date(release) if is_tag else release.created_at.timestamp(), "note": self.__class__.get_tag_release_note(release) if is_tag else release.body, } @staticmethod def get_tag_release_date(release_tag: Tag): """Get release date from regular Tag.""" if release_tag.commit.get_pulls().totalCount == 0: return datetime.strptime(release_tag.commit.last_modified, "%a, %d %b %Y %X %Z").timestamp() return release_tag.commit.get_pulls()[0].closed_at.timestamp() @staticmethod def get_tag_release_note(release_tag: Tag): """Get release note from regular Tag.""" if release_tag.commit.get_pulls().totalCount == 0: return release_tag.commit.commit.message return release_tag.commit.get_pulls()[0].body def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" releases = [r for r in self.repository.get_releases()] releases.extend([t for t in self.repository.get_tags()]) return releases
def __init__(self, validators, **kwargs): self.validators = validators self.msg = kwargs.pop('msg', None) self._schemas = [Schema(val, **kwargs) for val in validators]
class KebechetUpdateManager(Entity): """ThothYaml entity used for kebechet manager detection.""" entity_schema = Schema({ "request_type": str, # manual, automatic, failed "request_created": int, "bot_first_response": Any(None, int), "closed_at": int, "merged_at": Any(None, int), }) def analyse(self) -> PaginatedList: """Override :func:`~Entity.analyse`.""" return [ i for i in self.get_raw_github_data() if i.number not in self.previous_knowledge ] def store(self, update_request: Issue): """Override :func:`~Entity.store`.""" _LOGGER.info("ID: %s", update_request.number) self.stored_entities[str(update_request.number)] = { "request_type": self.__class__.get_request_type(update_request), "request_created": int(update_request.created_at.timestamp()), "bot_first_response": self.__class__.get_first_bot_response(update_request), "closed_at": int(update_request.closed_at.timestamp()), "merged_at": self.__class__.get_merged_at(update_request), } @staticmethod def get_merged_at(update_request: Issue) -> typing.Optional[int]: """Get merged_at time if the issue is pull_request.""" if update_request.pull_request: if update_request.as_pull_request().merged_at: return int( update_request.as_pull_request().merged_at.timestamp()) return None def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return [ issue for issue in self.repository.get_issues(state="closed") if self.__class__.get_request_type(issue) ] @staticmethod def get_request_type(issue: Issue) -> typing.Optional[str]: """Get the type of the update request.""" if not issue.pull_request and issue.title == "Kebechet update": return "manual" for request_type, keyword in UPDATE_TYPES_AND_KEYWORDS.items(): if keyword in issue.title: return request_type _LOGGER.debug( f"Update request not recognized, issue num.{issue.number}") return None @staticmethod def get_first_bot_response(issue: Issue) -> typing.Optional[int]: """Get timestamps for all bot comments in issue.""" for comment in issue.get_comments(): if comment.user.login in BOTS: return int(comment.created_at.timestamp()) return None
def __call__(self, v): return self._exec((Schema(val) for val in self.validators), v)
def entities_schema(cls) -> Schema: """Return schema of how all of the entities of repo are stored.""" return Schema({str: cls.entity_schema})
import logging from typing import Dict, Generator, List from github.PaginatedList import PaginatedList from github.PullRequest import PullRequest as GithubPullRequest from voluptuous.schema_builder import Schema from voluptuous.validators import Any from srcopsmetrics.entities import Entity from srcopsmetrics.entities.tools.knowledge import GitHubKnowledge _LOGGER = logging.getLogger(__name__) PullRequestReview = Schema({ "author": Any(None, str), "words_count": int, "submitted_at": int, "state": str }) PullRequestReviews = Schema({str: PullRequestReview}) ISSUE_KEYWORDS = { "close", "closes", "closed", "fix", "fixes", "fixed", "resolve", "resolves", "resolved" } def get_first_review_time(reviews: Dict[Any, Any]): """Return timestamp of the first PR review.""" rev_times = [int(rev["submitted_at"]) for rev in reviews.values()] return min(rev_times) if rev_times else None
class PullRequest(Entity): """GitHub PullRequest entity.""" entity_schema = Schema({ "title": str, "body": Any(None, str), "size": str, "labels": [str], "created_by": str, "created_at": int, "closed_at": Any(None, int), "closed_by": Any(None, str), "merged_at": Any(None, int), "commits_number": int, "changed_files_number": int, "interactions": { str: int }, "reviews": PullRequestReviews, "commits": [str], "files": [str], }) def analyse(self) -> PaginatedList: """Override :func:`~Entity.analyse`.""" return self.get_raw_github_data() def store(self, pull_request: GithubPullRequest): """Override :func:`~Entity.store`.""" _LOGGER.info("Extracting PR #%d", pull_request.number) if pull_request.number in self.previous_knowledge.index: _LOGGER.debug("PullRequest %s already analysed, skipping") return created_at = int(pull_request.created_at.timestamp()) closed_at = int(pull_request.closed_at.timestamp() ) if pull_request.closed_at is not None else None merged_at = int(pull_request.merged_at.timestamp() ) if pull_request.merged_at is not None else None closed_by = pull_request.as_issue( ).closed_by.login if pull_request.as_issue( ).closed_by is not None else None merged_by = pull_request.merged_by.login if pull_request.merged_by is not None else None labels = [label.name for label in pull_request.get_labels()] # Evaluate size of PR pull_request_size = None if labels: pull_request_size = GitHubKnowledge.get_labeled_size(labels) if not pull_request_size: lines_changes = pull_request.additions + pull_request.deletions pull_request_size = GitHubKnowledge.assign_pull_request_size( lines_changes=lines_changes) reviews = self.extract_pull_request_reviews(pull_request) self.stored_entities[str(pull_request.number)] = { "title": pull_request.title, "body": pull_request.body, "size": pull_request_size, "created_by": pull_request.user.login, "created_at": created_at, "closed_at": closed_at, "closed_by": closed_by, "merged_at": merged_at, "merged_by": merged_by, "commits_number": pull_request.commits, "changed_files_number": pull_request.changed_files, "interactions": GitHubKnowledge.get_interactions( pull_request.get_issue_comments()), "reviews": reviews, "labels": labels, "commits": [c.sha for c in pull_request.get_commits()], "changed_files": [f.filename for f in pull_request.get_files()], "first_review_at": get_first_review_time(reviews), "first_approve_at": get_approve_time(reviews), } def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_pulls(state="all") @staticmethod def extract_pull_request_review_requests( pull_request: GithubPullRequest) -> List[str]: """Extract features from requested reviews of the PR. GitHub understands review requests rather as requested reviewers than actual requests. Arguments: pull_request {PullRequest} -- PR of which we can extract review requests. Returns: List[str] -- list of logins of the requested reviewers """ requested_users = pull_request.get_review_requests()[0] extracted = [] for user in requested_users: extracted.append(user.login) return extracted @staticmethod def extract_pull_request_reviews( pull_request: GithubPullRequest) -> Dict[str, Dict[str, Any]]: """Extract required features for each review from PR. Arguments: pull_request {PullRequest} -- Pull Request from which the reviews will be extracted Returns: Dict[str, Dict[str, Any]] -- dictionary of extracted reviews. Each review is stored """ reviews = pull_request.get_reviews() _LOGGER.debug(" -num of reviews found: %d" % reviews.totalCount) results = {} for idx, review in enumerate(reviews, 1): _LOGGER.info(" -analysing review no. %d/%d" % (idx, reviews.totalCount)) results[str(review.id)] = { "author": review.user.login if review.user and review.user.login else None, "words_count": len(review.body.split(" ")), "submitted_at": int(review.submitted_at.timestamp()), "state": review.state, } return results @staticmethod def get_referenced_issues(pull_request: GithubPullRequest) -> List[str]: """Scan all of the Pull Request comments and get referenced issues. Arguments: pull_request {PullRequest} -- Pull request for which the referenced issues are extracted Returns: List[str] -- IDs of referenced issues within the Pull Request. """ issues_referenced = [] for comment in pull_request.get_issue_comments(): for id in PullRequest.search_for_references(comment.body): issues_referenced.append(id) for id in PullRequest.search_for_references(pull_request.body): issues_referenced.append(id) _LOGGER.debug(" referenced issues: %s" % issues_referenced) return issues_referenced @staticmethod def search_for_references(body: str) -> Generator[str, None, None]: """Return generator for iterating through referenced IDs in a comment.""" if body is None: return message = body.split(" ") for idx, word in enumerate(message): if word.replace(":", "").lower() not in ISSUE_KEYWORDS: return _LOGGER.info(" ...found keyword referencing issue") referenced_issue_number = message[idx + 1] if referenced_issue_number.startswith("https"): # last element of url is always the issue number ref_issue = referenced_issue_number.split("/")[-1] elif referenced_issue_number.startswith("#"): ref_issue = referenced_issue_number.replace("#", "") else: _LOGGER.info(" ...referenced issue number absent") _LOGGER.debug(" keyword message: %s" % body) return if not referenced_issue_number.isnumeric(): _LOGGER.info( " ...referenced issue number in incorrect format") return _LOGGER.info(" ...referenced issue number: %s" % ref_issue) yield ref_issue