def analyse_issues(self, repository: Repository, prev_knowledge: Dict[str, Any], is_local: bool = False) -> Dict[str, Any]: """Analyse of every closed issue in repository. Arguments: repository {Repository} -- currently the PyGithub lib is used because of its functionality ogr unfortunatelly did not provide enough to properly analyze issues prev_knowledge {Dict[str, Any]} -- previous knowledge stored """ _LOGGER.info( "-------------Issues (that are not PR) Analysis-------------") current_issues = [ issue for issue in repository.get_issues(state="all") if issue.pull_request is None ] new_issues = self.get_only_new_entities(prev_knowledge, current_issues) if len(new_issues) == 0: return with KnowledgeAnalysis( entity_type=EntityTypeEnum.ISSUE.value, new_entities=new_issues, accumulator=prev_knowledge, store_method=self.store_issue, ) as analysis: accumulated = analysis.store() return accumulated
def analyse_pull_requests(self, repository: Repository, prev_knowledge: Dict[str, Any], is_local: bool = False) -> Dict[str, Any]: """Analyse every closed pull_request in repository. Arguments: repository {Repository} -- currently the PyGithub lib is used because of its functionality ogr unfortunatelly did not provide enough to properly analyze issues prev_knowledge {Dict[str, Any]} -- previous knowledge stored """ _LOGGER.info( "-------------Pull Requests Analysis (including its Reviews)-------------" ) current_pulls = repository.get_pulls(state="all") new_pulls = self.get_only_new_entities(prev_knowledge, current_pulls) if len(new_pulls) == 0: return with KnowledgeAnalysis( entity_type=EntityTypeEnum.PULL_REQUEST.value, new_entities=new_pulls, accumulator=prev_knowledge, store_method=self.store_pull_request, ) as analysis: accumulated = analysis.store() return accumulated
def initialize_knowledge(self, repository: str, is_local=False): """Add repository knowledge to the data.""" if is_local: pr = PullRequest() pr.load_previous_knowledge(is_local=True) self.data = pr.stored_entitie else: gh_repo = self.gh.get_repo(repository) with KnowledgeAnalysis(entity=PullRequest(gh_repo), is_local=True) as analysis: analysis.run() self.data = analysis.entity.stored_entities
def analyse_content_files( self, repository: Repository, prev_knowledge: Dict[str, Any], is_local: bool = False) -> Optional[Dict[str, Any]]: """Analyse content files in repository. Arguments: repository {Repository} -- currently the PyGithub lib is used because of its functionality ogr unfortunatelly did not provide enough to properly analyze content files prev_knowledge {Dict[str, Any]} -- previous knowledge stored. is_local -- flag to state if the knowledge should be collected locally or on Ceph. """ _LOGGER.info("-------------Content Files Analysis-------------") # TODO: Extend to all types of files. Currently only README are considered. # TODO: Add all types of README extensions available content_file_text = "" for file_name in ["README.md", "README.rst"]: try: content_file = repository.get_contents(file_name) file_path = content_file.path encoded = content_file.decoded_content # TODO: Adjust because most of the files are not text content_file_text = encoded.decode("utf-8") except Exception as e: _LOGGER.info("%r not found for: %r" % (file_name, repository.full_name)) _LOGGER.warning(e) if content_file_text: with KnowledgeAnalysis( entity_type=EntityTypeEnum.CONTENT_FILE.value, new_entities=[[ file_name, file_path, content_file_text ]], accumulator=prev_knowledge, store_method=self.store_content_file, ) as analysis: accumulated = analysis.store() break if not content_file_text: return None return accumulated
def analyse_entity( self, github_repo: Repository, project_path: Path, entity_cls: Type[Entity], is_local: bool = False ): """Load old knowledge and update it with the newly analysed one and save it. Arguments: github_repo {Repository} -- Github repo that will be analysed project_path {Path} -- The main directory where the knowledge will be stored github_type {str} -- Currently can be: "Issue", "PullRequest", "ContentFile" is_local {bool} -- If true, the local store will be used for knowledge loading and storing. """ entity = entity_cls(repository=github_repo) with KnowledgeAnalysis(entity=entity, is_local=is_local) as analysis: analysis.init_previous_knowledge() analysis.run() analysis.save_analysed_knowledge()