Example #1
0
 def refresh_repositories(self, msg, args):
     config = RepoConfig(REPOSITORY_CONFIG_FILE)
     branches = config.get_repository(REPOSITORY_NAME)["branches"]
     for branch in branches:
         cmd = GitCommand(branch["path"])
         cmd.pull()
         yield ("{} had been updated.".format((branch["path"])))
Example #2
0
    def __init__(self, config_file, github_token, git_path="git"):
        """
        Initialization.

        :param config_file: Name of the repository config file.
        :type config_file: str
        :param github_token: Github token
        :type github_token: str
        :param git_path: Executable git path.
        :type git_path: str
        """
        self._git_path = git_path
        self._configure = Configuration(config_file)
        self._github_token = github_token
Example #3
0
    def __init__(self,
                 config_file,
                 github_token,
                 repository_name,
                 logger,
                 git_path="git"):
        """
        Initialization.

        :param config_file: Name of the repository config file.
        :type config_file: str
        :param github_token: Github token
        :type github_token: str
        :param git_path: Executable git path.
        :type git_path: str
        """
        self._git_path = git_path
        self._configure = Configuration(config_file)
        self._github_token = github_token

        self._connstr = self._configure.get_conn(repository_name)
        self._github_client = GithubOperator(self._github_token)
        self._trans_db = TransDB(self._connstr, logger)
Example #4
0
class TranslateUtil:
    _git_path = ""
    _github_token = ""
    _configure = None

    def __init__(self, config_file, github_token, git_path="git"):
        """
        Initialization.

        :param config_file: Name of the repository config file.
        :type config_file: str
        :param github_token: Github token
        :type github_token: str
        :param git_path: Executable git path.
        :type git_path: str
        """
        self._git_path = git_path
        self._configure = Configuration(config_file)
        self._github_token = github_token

    def _filter_file_type(self, repository_name, file_name_list):
        """
        Only files with extensions in the list will left.
        :param repository_name: Repository name (in the config file)
        :type repository_name: str
        :param file_name_list:
        :type file_name_list: list
        :rtype: list
        """
        ext_list = self._configure.get_valid_extensions(repository_name)
        result = []
        for file_name in file_name_list:
            _, ext = splitext(file_name)
            if ext in ext_list:
                result.append(file_name)
        return result

    def _get_git_commander(self, repo):
        return GitCommand(repo, self._git_path)

    def _get_repo_path(self, repository_name, branch_name):
        self._configure.repository = repository_name
        branch_item = self._configure.get_branch(repository_name, branch_name)
        return branch_item["path"]

    @staticmethod
    def __is_ignore(file_name, ignore_list):
        result = False
        for pattern in ignore_list:
            if re.match(pattern, file_name):
                result = True
                break
        return result

    def _remove_ignore_files(self, file_list, repository, branch):
        ignore_list = self._configure.get_ignore_re_list(repository, branch)
        result_list = [
            item for item in file_list
            if not self.__is_ignore(item, ignore_list)
        ]
        return result_list

    def _get_clean_files(self, repository, branch, path):
        """
        Get file list in specified path.

        :param path: Relative path of the files we want.
        :type path: str
        :rtype: list
        """

        file_list = self._get_git_commander(
            self._get_repo_path(repository, branch)).list_files()
        file_list = self._filter_file_type(repository, file_list)
        path_sep = path.split(os.sep)
        result = [
            file_name[len(path):] for file_name in file_list
            if file_name.split(os.sep)[:len(path_sep)] == path_sep
        ]
        return result

    def list_branches(self, repository_name):
        return self._configure.list_branch(repository_name)

    def wait_for_limit(self, core_limit=10, search_limit=10):
        github_client = GithubOperator(self._github_token)
        github_client.check_limit(core_limit, search_limit)

    def find_new_files(self, repository_name, branch_name, language):
        """
        Find files which is in the source path, but not in the
        target path, and return it as a List of string.

        :param branch_name:
        :param repository_name:
        :rtype: list of str
        :param language: Language name (in the configure file)
        :type language: str
        """
        target_path = self._configure.get_languages(repository_name,
                                                    language)["path"]
        source_path = self._configure.get_source(repository_name)["path"]

        # List files in source/language path
        source_list = self._get_clean_files(repository_name, branch_name,
                                            source_path)
        target_list = self._get_clean_files(repository_name, branch_name,
                                            target_path)

        # return the different files list
        result = list(set(source_list) - set(target_list))
        result.sort()
        return self._remove_ignore_files(result, repository_name, branch_name)

    def cache_issues(self, query, file_name, search_limit=30):
        """

        :param search_limit:
        :param query: Github query string
        :param file_name: Save search result into a json file

        record = {"query": query,
            "timestamp": 1234567
            items:
            [
                {
                "number": 1234,
                "title": "Issue Title",
                labels: ["version/1.12", "translating"]
                },
            ]
        }
        """
        github_client = GithubOperator(self._github_token)
        issue_list = github_client.search_issue(query, search_limit)
        result = []
        for issue in issue_list:
            issue_item = {
                "number": issue.number,
                "title": issue.title,
                "labels": []
            }
            for label in issue.labels:
                issue_item["labels"].append(label.name)
            result.append(issue_item)
        with open(file_name, "w") as handle:
            json.dump(result, handle, indent=2)
        return len(result)

    def find_updated_files(self, repository_name, branch_name, language):
        """
        Find files match this criteria:
        - Both in source and target.
        - Last commit of source file is later than the last commit of target file

        and return it as a List of string.

        :param repository_name: Repository name (In the config file)
        :param branch_name: Branch name (In the config file)

        :rtype: dict
        :param language: Language name (in the configure file)
        :type language: str
        """

        repository_path = self._configure.get_branch(repository_name,
                                                     branch_name)["path"]
        git_cmd = self._get_git_commander(repository_path)

        target_path = self._configure.get_languages(repository_name,
                                                    language)["path"]
        source_path = self._configure.get_source(repository_name)["path"]

        # get files both in source and target.
        source_list = self._get_clean_files(repository_name, branch_name,
                                            source_path)
        target_list = self._get_clean_files(repository_name, branch_name,
                                            target_path)
        same_files = list(set(source_list) & set(target_list))

        result = {}
        for file_name in same_files:
            source_last_commit = \
                git_cmd.get_last_commit(source_path + file_name)
            target_commit = \
                git_cmd.get_last_commit(target_path + file_name)
            target_time = git_cmd.get_hash_time(target_commit)
            source_base_commit = git_cmd.get_file_hash_before(
                source_path + file_name, target_time)
            if source_base_commit != source_last_commit:
                diff = git_cmd.get_diff_by_hash(source_path + file_name,
                                                source_last_commit,
                                                source_base_commit)
                result[file_name] = diff
        return result

    def get_default_label(self, repository_name, branch, language):
        """
        A new issue will be labeled with these labels.
        :param repository_name:
        :param branch:
        :param language:
        :return:
        """
        labels = self._configure.get_repository(repository_name)["labels"]
        labels += self._configure.get_branch(repository_name, branch)["labels"]
        labels += self._configure.get_languages(repository_name,
                                                language)["labels"]
        return labels

    def get_search_label(self, repository_name, branch, language):
        """
        Find dupe issues with these labels.
        :param repository_name:
        :param branch:
        :param language:
        :return:
        """
        labels = self._configure.get_branch(repository_name,
                                            branch)["labels"].copy()
        labels += self._configure.get_languages(repository_name,
                                                language)["labels"]
        return labels

    def create_issue(self,
                     github_repository,
                     title,
                     body,
                     labels=None,
                     search_labels=None,
                     search_cache="",
                     search_online=False):
        """

        :param labels: Labels for new issue
        :type labels: list of str
        :param search_online: Search duplicated issues online
        :param github_repository: Name of the repository.
        :param title: Title of the new issue.

        :param body: Body of the new issue.
        :param search_labels: Search duplicated issues with title & labels.
        :type search_labels: list of str
        :param search_cache: Search in the cache file
        :type search_cache: str
        :rtype: github.Issue.Issue
        """
        if search_labels is None:
            search_labels = []
        if labels is None:
            labels = []
        dupe = False
        if len(search_cache) > 0:
            with open(search_cache, "r") as handler:
                obj = json.load(handler)
                for issue_record in obj:
                    if issue_record["title"] == title:
                        if len(search_labels) == 0:
                            dupe = True
                            break
                        else:
                            if set(search_labels).issubset(
                                    issue_record["labels"]):
                                dupe = True
                            break

        github_client = GithubOperator(self._github_token)
        if search_online:
            search_cmd = "repo:{} state:open is:issue in:title {}".format(
                github_repository, title)
            if len(search_labels) > 0:
                search_cmd = "{} {}".format(
                    search_cmd,
                    " ".join(["label:{}".format(i) for i in search_labels]))
            issue_list = github_client.search_issue(search_cmd)
            for issue in issue_list:
                if issue.title == title:
                    dupe = True
        if dupe:
            return None
        new_issue = github_client.create_issue(github_repository, title, body)
        # Add labels
        for label_name in labels:
            new_issue.add_to_labels(label_name)
        return new_issue

    def gen_source_url(self, repo, branch, file_name):
        """

        :param repo:
        :param branch:
        :param file_name:
        """
        prefix = self._configure.get_branch(repo,
                                            branch)["url_prefix"]["source"]
        middle = ""
        if file_name[:1] != "/":
            middle = "/"
        return "{}{}{}".format(prefix, middle, file_name)

    def gen_web_url(self, repo, branch, file_name):
        """

        :param repo:
        :param branch:
        :param file_name:
        """
        prefix = self._configure.get_branch(repo, branch)["url_prefix"]["web"]
        middle = ""
        if file_name[:1] != "/":
            middle = "/"
        return "{}{}{}".format(prefix, middle, file_name)

    def sync_pr_state_to_task_issue(self,
                                    repository,
                                    branch,
                                    language,
                                    days=5,
                                    search_limit=30):
        pr_list = self._get_code_pr_and_files(repository, branch, language,
                                              days, search_limit)
        pr_file_list = self._clean_pr_files(pr_list, repository, language)
        result = []
        for pr_record in pr_file_list:
            result += self._sync_task_with_file_name(repository, branch,
                                                     language, pr_record)
        return result

    def _get_code_pr_and_files(self,
                               repository,
                               branch,
                               language,
                               days=5,
                               search_limit=30):
        """
        Find recent PRs with specified language.

        :param days:
        :return:
        :param repository: Repository name
        :param branch: Branch name
        :param language: Language
        :param search_limit:
        :return:
        """
        after_date = datetime.now() - timedelta(days=days)
        after_date = after_date.strftime("%Y-%m-%d")
        base = self._configure.get_branch(repository, branch)["target_branch"]

        repository_data = self._configure.get_repository(repository)
        code_repo = "{}/{}".format(
            repository_data["github"]["code"]["owner"],
            repository_data["github"]["code"]["repository"],
        )
        prefix = self._configure.get_languages(repository, language)["path"]
        labels = self._configure.get_languages(repository,
                                               language)["target_labels"]
        query = "repo:{} type:pr {} created:>{}".format(
            code_repo, " ".join(["label:{}".format(label)
                                 for label in labels]), after_date)
        logging.warning(query)
        github_client = GithubOperator(self._github_token)
        pr_list = github_client.search_issue(query, search_limit)
        result = []
        for item in pr_list:
            pr = item.as_pull_request()
            if pr.base.ref != base:
                continue
            file_name_list = []
            for file_record in pr.get_files():
                file_name_list.append(file_record.filename)
            file_name_list = self._filter_file_type(repository, file_name_list)
            file_name_list = [
                file_name for file_name in file_name_list
                if file_name.startswith(prefix)
            ]
            record = {
                "url": pr.html_url,
                "number": pr.number,
                "files": file_name_list,
                "merged": pr.is_merged(),
                "base": pr.base.ref,
                "head": pr.head.ref,
                "owner": pr.user.login,
                "comments": [],
                "object": pr
            }
            # get comments
            for comment in pr.get_issue_comments():
                if comment.body.startswith("`[trans-bot:"):
                    record["comments"].append(comment.body)
            result.append(record)
        return result

    def _clean_pr_files(self, pr_list, repository, language):
        target_path = self._configure.get_languages(repository,
                                                    language)["path"]

        result = []
        for pr in pr_list:
            pr_merged = False
            for comment in pr["comments"]:
                if comment.startswith("`[trans-bot:merged]`"):
                    pr_merged = True
                if comment.startswith("`[trans-bot:N/A]`"):
                    pr_merged = True
            if pr_merged:
                continue
            time.sleep(2)
            if len(pr["files"]) != 1:
                body_pattern = "Thank you @{}, I can only process the PR with 1 file included, "
                body = "`[trans-bot:N/A]`\n\n" + body_pattern.format(pr["owner"]) + \
                       "will not be reported to the task issues."
                pr["object"].create_issue_comment(body)
                continue
            path_sep = target_path.split(os.sep)
            file_list = []
            for file_name in pr["files"]:
                if file_name.split(os.sep)[:len(path_sep)] == path_sep:
                    file_list.append(file_name[len(target_path):])
            pr["file_name"] = file_list[0]
            result.append(pr.copy())
        return result

    def _remove_status_label(self, repository, issue_item):
        for status in ["pushed", "merged", "pending", "working"]:
            status_label = self._configure.get_status_label(repository, status)
            if status_label in issue_item["labels"]:
                issue_item["object"].remove_from_labels(status_label)

    def _sync_task_with_file_name(self, repository, branch, language, pr):
        search_labels = self.get_search_label(repository, branch, language)
        repository_data = self._configure.get_repository(repository)
        task_repo_name = "{}/{}".format(
            repository_data["github"]["task"]["owner"],
            repository_data["github"]["task"]["repository"])
        file_name = pr["file_name"]
        query = "repo:{} state:open type:issue in:title {} {}".format(
            task_repo_name,
            " ".join(["label:{}".format(i) for i in search_labels]), file_name)
        logging.warning(query)
        github_client = GithubOperator(self._github_token)
        issue_list = github_client.search_issue(query)
        result = []
        for issue in issue_list:
            if issue.title != file_name:
                continue
            issue_item = {
                "title": issue.title,
                "number": issue.number,
                "url": issue.html_url,
                "labels": [],
                "object": issue,
            }
            if issue.assignee is not None:
                issue_item["owner"] = issue.assignee.login
            else:
                body_pattern = "Thank you @{}, the [related task issue]({}) has no assignee. "
                body = "`[trans-bot:N/A]`\n\n" + body_pattern.format(pr["owner"], issue_item["url"]) + \
                       "will not be reported to the task issues"
                pr["object"].create_issue_comment(body)
                continue
            for label in issue.labels:
                issue_item["labels"].append(label.name)
            issue_working = self._configure.get_status_label(
                repository, "working") in issue_item["labels"]
            issue_pushed = self._configure.get_status_label(
                repository, "pushed") in issue_item["labels"]
            same_user = issue_item["owner"] == pr["owner"]
            if not same_user:
                body_pattern = "Thank you @{}, the [related task issue]({}) had been assigned to others. "
                body = "`[trans-bot:N/A]`\n\n" + body_pattern.format(pr["owner"], issue_item["url"]) + \
                       "will not be reported to the task issues"
                pr["object"].create_issue_comment(body)
                result.append(pr["url"])
                continue
            if pr["merged"]:
                body_pattern = "Thank you @{}, the [related task issue]({}) had been updated."
                if issue_pushed:
                    issue.create_comment("/merged")
                else:
                    self._remove_status_label(repository, issue_item)
                    issue.add_to_labels(
                        self._configure.get_status_label(repository, "pushed"))
                    time.sleep(1)
                    issue.create_comment("/merged")
                body = "`[trans-bot:merged]`\n\n" + body_pattern.format(
                    pr["owner"], issue_item["url"])
                pr["object"].create_issue_comment(body)

                result.append(pr["url"])
                continue
            body_pattern = "Thank you @{}, the [related task issue]({}) had been updated."
            body = "`[trans-bot:pushed]`\n\n" + body_pattern.format(
                pr["owner"], issue_item["url"])
            if issue_pushed:
                continue
            if not issue_working:
                self._remove_status_label(repository, issue_item)
                issue.add_to_labels(
                    self._configure.get_status_label(repository, "working"))
                time.sleep(1)
            pr["object"].create_issue_comment(body)
            issue.create_comment("/pushed")
            result.append(pr["url"])
        return result

    def copy_version(self, repository, from_version, to_version, target_lang):
        config = self._configure
        source_repo_base_path = config.get_branch(repository,
                                                  from_version)["path"]
        target_repo_base_path = config.get_branch(repository,
                                                  to_version)["path"]
        source_lang_path = self._configure.get_source(repository)["path"]
        target_lang_path = self._configure.get_languages(
            repository, target_lang)["path"]

        source_repo_source_files = set(
            self._get_clean_files(repository, from_version, source_lang_path))
        source_repo_target_files = set(
            self._get_clean_files(repository, from_version, target_lang_path))
        source_same_files = (source_repo_source_files
                             & source_repo_target_files)
        dest_repo_source_files = set(
            self._get_clean_files(repository, to_version, source_lang_path))
        same_list = source_same_files & dest_repo_source_files
        for same_file_name in same_list:
            source_file = "{}/{}{}".format(source_repo_base_path,
                                           source_lang_path, same_file_name)
            target_file = "{}/{}{}".format(target_repo_base_path,
                                           source_lang_path, same_file_name)
            source_hash = md5_hash(source_file)
            target_hash = md5_hash(target_file)

            if source_hash == target_hash:
                source_trans_file = "{}/{}{}".format(source_repo_base_path,
                                                     target_lang_path,
                                                     same_file_name)
                target_trans_file = "{}/{}{}".format(target_repo_base_path,
                                                     target_lang_path,
                                                     same_file_name)
                print(target_hash, source_file)
                new_path = os.path.dirname(target_trans_file)
                if not os.path.exists(new_path):
                    os.makedirs(new_path)
                copyfile(source_trans_file, target_trans_file)

    def set_milestone_by_label(self,
                               repository_name,
                               label_list,
                               milestone,
                               core_limit=10,
                               search_limit=10):
        repository_data = self._configure.get_repository(repository_name)
        task_repo_name = "{}/{}".format(
            repository_data["github"]["task"]["owner"],
            repository_data["github"]["task"]["repository"])
        github_client = GithubOperator(self._github_token)
        label_query = " ".join(["label:" + item for item in label_list])
        query = "type:issue -milestone:{} repo:{} {}".format(
            milestone, task_repo_name, label_query)
        logging.info(query)
        issue_list = github_client.search_issue(query, search_limit)

        count = 0
        for issue in issue_list:
            count += 1
            if count % core_limit == 0:
                github_client.check_limit(core_limit, search_limit)
            github_client.set_issue_milestone(task_repo_name, issue, milestone)
        return count
Example #5
0
class TranslateUtil:
    _git_path = ""
    _github_token = ""
    _configure = None

    def __init__(self, config_file, github_token, git_path="git"):
        """
        Initialization.

        :param config_file: Name of the repository config file.
        :type config_file: str
        :param github_token: Github token
        :type github_token: str
        :param git_path: Executable git path.
        :type git_path: str
        """
        self._git_path = git_path
        self._configure = Configuration(config_file)
        self._github_token = github_token

    def _filter_file_type(self, repository_name, file_name_list):
        """
        Only files with extensions in the list will left.
        :param repository_name: Repository name (in the config file)
        :type repository_name: str
        :param file_name_list:
        :type file_name_list: list
        :rtype: list
        """
        ext_list = self._configure.get_valid_extensions(repository_name)
        result = []
        for file_name in file_name_list:
            _, ext = splitext(file_name)
            if ext in ext_list:
                result.append(file_name)
        return result

    def _get_git_commander(self, repo):
        return GitCommand(repo, self._git_path)

    def _get_repo_path(self, repository_name, branch_name):
        self._configure.repository = repository_name
        branch_item = self._configure.get_branch(repository_name, branch_name)
        return branch_item["path"]

    def _get_clean_files(self, repository, branch, path):
        """
        Get file list in specified path.

        :param path: Relative path of the files we want.
        :type path: str
        :rtype: list
        """

        file_list = self._get_git_commander(
            self._get_repo_path(repository, branch)
        ).list_files()
        file_list = self._filter_file_type(repository, file_list)
        path_sep = path.split(os.sep)
        result = [file_name[len(path):]
                  for file_name in file_list
                  if file_name.split(os.sep)[:len(path_sep)] == path_sep]
        return result

    def list_branches(self, repository_name):
        return self._configure.list_branch(repository_name)

    def wait_for_limit(self, core_limit=10, search_limit=10):
        github_client = GithubOperator(self._github_token)
        github_client.check_limit(core_limit, search_limit)

    def find_new_files(self, repository_name, branch_name, language):
        """
        Find files which is in the source path, but not in the
        target path, and return it as a List of string.

        :param branch_name:
        :param repository_name:
        :rtype: list of str
        :param language: Language name (in the configure file)
        :type language: str
        """
        target_path = self._configure.get_languages(
            repository_name, language)["path"]
        source_path = self._configure.get_source(
            repository_name)["path"]

        # List files in source/language path
        source_list = self._get_clean_files(repository_name,
                                            branch_name, source_path)
        target_list = self._get_clean_files(repository_name,
                                            branch_name, target_path)

        # return the different files list
        result = list(set(source_list) - set(target_list))
        result.sort()
        return result

    def cache_issues(self, query, file_name, search_limit=30):
        """

        :param search_limit:
        :param query: Github query string
        :param file_name: Save search result into a json file

        record = {"query": query,
            "timestamp": 1234567
            items:
            [
                {
                "number": 1234,
                "title": "Issue Title",
                labels: ["version/1.12", "translating"]
                },
            ]
        }
        """
        github_client = GithubOperator(self._github_token)
        issue_list = github_client.search_issue(query, search_limit)
        result = []
        for issue in issue_list:
            issue_item = {
                "number": issue.number,
                "title": issue.title,
                "labels": []
            }
            for label in issue.labels:
                issue_item["labels"].append(label.name)
            result.append(issue_item)
        with open(file_name, "w") as handle:
            json.dump(result, handle, indent=2)
        return len(result)

    def find_updated_files(self, repository_name, branch_name, language):
        """
        Find files match this criteria:
        - Both in source and target.
        - Last commit of source file is later than the last commit of target file

        and return it as a List of string.

        :param repository_name: Repository name (In the config file)
        :param branch_name: Branch name (In the config file)

        :rtype: dict
        :param language: Language name (in the configure file)
        :type language: str
        """

        repository_path = self._configure.get_branch(repository_name,
                                                     branch_name)["path"]
        git_cmd = self._get_git_commander(repository_path)

        target_path = self._configure.get_languages(repository_name,
                                                    language)["path"]
        source_path = self._configure.get_source(repository_name)["path"]

        # get files both in source and target.
        source_list = self._get_clean_files(repository_name,
                                            branch_name, source_path)
        target_list = self._get_clean_files(repository_name,
                                            branch_name, target_path)
        same_files = list(set(source_list) & set(target_list))

        result = {}
        for file_name in same_files:
            source_last_commit = \
                git_cmd.get_last_commit(source_path + file_name)
            target_commit = \
                git_cmd.get_last_commit(target_path + file_name)
            target_time = git_cmd.get_hash_time(target_commit)
            source_base_commit = git_cmd.get_file_hash_before(
                source_path + file_name, target_time)
            if source_base_commit != source_last_commit:
                diff = git_cmd.get_diff_by_hash(
                    source_path + file_name,
                    source_last_commit, source_base_commit)
                result[file_name] = diff
        return result

    def get_default_label(self, repository_name, branch, language):
        """

        :param repository_name:
        :param branch:
        :param language:
        :return:
        """
        labels = self._configure.get_repository(repository_name)["labels"]
        labels += self._configure.get_branch(repository_name, branch)["labels"]
        labels += self._configure.get_languages(repository_name, language)["labels"]
        return labels

    def get_search_label(self, repository_name, branch, language):
        labels = self._configure.get_branch(repository_name, branch)["labels"]
        labels += self._configure.get_languages(repository_name, language)["labels"]
        return labels

    def create_issue(self, github_repository, title, body, labels=[],
                     search_labels=[],
                     search_cache="",
                     search_online=False):
        """

        :param labels: Labels for new issue
        :type labels: list of str
        :param search_online: Search duplicated issues online
        :param github_repository: Name of the repository.
        :param title: Title of the new issue.

        :param body: Body of the new issue.
        :param search_labels: Search duplicated issues with title & labels.
        :type search_labels: list of str
        :param search_cache: Search in the cache file
        :type search_cache: str
        :rtype: github.Issue.Issue
        """
        dupe = False
        if len(search_cache) > 0:
            with open(search_cache, "r") as handler:
                obj = json.load(handler)
                for issue_record in obj:
                    if issue_record["title"] == title:
                        if len(search_labels) == 0:
                            dupe = True
                            break
                        else:
                            if set(search_labels).issubset(issue_record["labels"]):
                                dupe = True
                            break

        github_client = GithubOperator(self._github_token)
        if search_online:
            search_cmd = "repo:{} in:title {}".format(github_repository, title)
            if len(search_labels) > 0:
                search_cmd = "{} {}".format(search_cmd,
                                            " ".join(
                                                ["label:{}".format(i) for i in search_labels])
                                            )
            issue_list = github_client.search_issue(search_cmd)
            for issue in issue_list:
                if issue.title == title:
                    dupe = True
        if dupe:
            return None
        new_issue = github_client.create_issue(github_repository, title, body)
        # Add labels
        for label_name in labels:
            new_issue.add_to_labels(label_name)
        return new_issue

    def gen_source_url(self, repo, branch, file_name):
        """

        :param repo:
        :param branch:
        :param file_name:
        """
        prefix = self._configure.get_branch(repo, branch)["url_prefix"]["source"]
        middle = ""
        if file_name[:1] != "/":
            middle = "/"
        return "{}{}{}".format(prefix, middle, file_name)

    def gen_web_url(self, repo, branch, file_name):
        """

        :param repo:
        :param branch:
        :param file_name:
        """
        prefix = self._configure.get_branch(repo, branch)["url_prefix"]["web"]
        middle = ""
        if file_name[:1] != "/":
            middle = "/"
        return "{}{}{}".format(prefix, middle, file_name)
Example #6
0
def code_repository_name():
    repo_config = RepoConfig(REPOSITORY_CONFIG_FILE)
    repo_obj = repo_config.get_repository(REPOSITORY_NAME)
    repo_owner = repo_obj["github"]["code"]["owner"]
    repo_name = repo_obj["github"]["code"]["repository"]
    return "{}/{}".format(repo_owner, repo_name)
Example #7
0
class TranslateUtil:
    _git_path = ""
    _github_token = ""
    _configure = None
    _connstr = ""
    _github_client = None
    _trans_db = None

    def __init__(self,
                 config_file,
                 github_token,
                 repository_name,
                 logger,
                 git_path="git"):
        """
        Initialization.

        :param config_file: Name of the repository config file.
        :type config_file: str
        :param github_token: Github token
        :type github_token: str
        :param git_path: Executable git path.
        :type git_path: str
        """
        self._git_path = git_path
        self._configure = Configuration(config_file)
        self._github_token = github_token

        self._connstr = self._configure.get_conn(repository_name)
        self._github_client = GithubOperator(self._github_token)
        self._trans_db = TransDB(self._connstr, logger)

    def _filter_file_type(self, repository_name, file_name_list):
        """
        Only files with extensions in the list will left.
        :param repository_name: Repository name (in the config file)
        :type repository_name: str
        :param file_name_list:
        :type file_name_list: list
        :rtype: list
        """
        ext_list = self._configure.get_valid_extensions(repository_name)
        result = []
        for file_name in file_name_list:
            _, ext = splitext(file_name)
            if ext in ext_list:
                result.append(file_name)
        return result

    def _get_git_commander(self, repo):
        return GitCommand(repo, self._git_path)

    def _get_repo_path(self, repository_name, branch_name):
        self._configure.repository = repository_name
        branch_item = self._configure.get_branch(repository_name, branch_name)

        return branch_item

    def __is_ignore(self, file_name, ignore_list):
        result = False
        for pattern in ignore_list:
            if re.match(pattern, file_name):
                result = True
                break
        return result

    def _remove_ignore_files(self, file_list, repository, branch):
        ignore_list = self._configure.get_ignore_re_list(repository, branch)
        result_list = [
            item for item in file_list
            if not self.__is_ignore(item, ignore_list)
        ]
        return result_list

    def _get_clean_files(self, repository, branch_path, path):
        """
        Get file list in specified path.

        :param path: Relative path of the files we want.
        :type path: str
        :rtype: list
        """
        file_list = self._get_git_commander(branch_path).list_files()
        file_list = self._filter_file_type(repository, file_list)
        path_sep = path.split(os.sep)
        result = [
            file_name[len(path):] for file_name in file_list
            if file_name.split(os.sep)[:len(path_sep)] == path_sep
        ]
        return result

    def list_branches(self, repository_name):
        return self._configure.list_branch(repository_name)

    def wait_for_limit(self, core_limit=10, search_limit=10):
        self._github_client.check_limit(core_limit, search_limit)

    def find_files_comm(self, repository_name, branch_name, language,
                        is_create):
        """
        Find files which is in the source path, but not in the
        target path, and return it as a List of string.

        :param branch_name:
        :param repository_name:
        :rtype: list of str
        :param language: Language name (in the configure file)
        :param is_create true 创建issue false 更新 issue
        :type language: str
        """

        target_path = self._configure.get_languages(repository_name,
                                                    language)["path"]

        source_path = self._configure.get_source(repository_name)["path"]

        branch_item = self.checkout_branch(repository_name, branch_name)
        branch_path = branch_item['path']

        # List files in source/language path
        source_list = self._get_clean_files(repository_name, branch_path,
                                            source_path)
        target_list = self._get_clean_files(repository_name, branch_path,
                                            target_path)

        # return the different files list
        # 差集用于新建 issue(即翻译文档不存在)
        result_diff = list(set(source_list) - set(target_list))
        result_diff.sort()
        # return the intersection files list
        # 交集用于更新 issue(即翻译文档已经存在,可能需要更新)
        result_inter = list(set(source_list) & set(target_list))
        result_inter.sort()

        diff = self._remove_ignore_files(result_diff, repository_name,
                                         branch_name)
        inter_list = self._remove_ignore_files(result_inter, repository_name,
                                               branch_name)

        if is_create == True:
            inter = inter_list
        else:
            git_cmd = self._get_git_commander(branch_path)
            inter_result = {}
            for file_name in inter_list:
                source_last_commit = git_cmd.get_last_commit(source_path +
                                                             file_name)
                target_commit = git_cmd.get_last_commit(target_path +
                                                        file_name)

                target_time = git_cmd.get_hash_time(target_commit)

                source_base_commit = git_cmd.get_file_hash_before(
                    source_path + file_name, target_time)
                # 返回差异文件
                if source_base_commit != source_last_commit:
                    diff_file = git_cmd.get_diff_by_hash(
                        source_path + file_name, source_last_commit,
                        source_base_commit)
                    inter_result[file_name] = diff_file

            inter = inter_result

        return {"diff": diff, "inter": inter}

    # checkout branch
    def checkout_branch(self, repository_name, branch_name):
        branch_item = self._get_repo_path(repository_name, branch_name)
        cmd = GitCommand(branch_item['path'])
        cmd.checkout(branch_item['value'])
        cmd.pull()
        return branch_item

    def init_files(self, repository_name, branch_name, language, is_create):
        """
        Find all documents for a branch
        status: 
        初始化阶段
            0 未生成 issue (对应翻译文件已存在- 这里标记不生新的 issue)
            1 待生成 issue (对应翻译文件不存在- 标记生成初始化 issue)
        文档更新阶段
            2 翻译文件对应的原始文件有更新(标记需要在生成 issue) 
        issue 生成状态
            3 对应的翻译文件(issue 已生成)
        """
        result = self.find_files_comm(repository_name, branch_name, language,
                                      is_create)

        if is_create == True:
            self._trans_db.add_issue(result['diff'], branch_name, 1)
            self._trans_db.add_issue(result['inter'], branch_name, 0)
        else:
            self._trans_db.add_issue(result['inter'], branch_name, 2)

        return len(result['diff']), len(result['inter'])

    def init_issue_files(self, repository_name, branch_name, language, status,
                         create_issue, max_write, max_result):
        #status 待生成 issue 2.需要更新的 issue 3.已生成 issue
        tasks = self._trans_db.pending_issues(branch_name, status)

        if create_issue == 1:
            new_count = 0
            skip_count = 0
            for task in tasks:
                file_name = task.files
                if status == 1:
                    type_label = "sync/new"
                    diff = ""
                    body = "Source File: [{}]({})"
                    body = body.format(
                        file_name,
                        self.gen_source_url(repository_name, branch_name,
                                            file_name),
                    )
                elif status == 2:
                    type_label = "sync/update"
                    diff = task.diff_content
                    body = "Source File: [{}]({})\nDiff:\n~~~diff\n {}\n~~~"
                    body = body.format(
                        file_name,
                        self.gen_source_url(repository_name, branch_name,
                                            file_name), diff)
                else:
                    break

                default_label = self.get_default_label(repository_name,
                                                       branch_name, language)
                search_label = default_label

                default_label.append(type_label)

                if file_name.find("/docs/") == -1:
                    docs = False
                    default_label.append('priority/P1')
                else:
                    docs = True  #Docs 文档
                    default_label.append('priority/P0')

                # create issue
                new_issue = self.create_issue(
                    self.remote_repository_name(repository_name), file_name,
                    body, default_label, default_label, "", False)
                if new_issue is None:
                    skip_count += 1
                else:
                    # new_count += 1
                    self._trans_db.add_update_issues(file_name, diff,
                                                     task.version, docs, 3,
                                                     new_issue.number)

                    # if new_count >= max_write:
                    #     break
                    # if (new_count + skip_count) % max_result:
                    #     self.wait_for_limit(max_result, max_result)
        return tasks

    def remote_repository_name(self, repository_name):
        repo_obj = self._configure.get_repository(repository_name)
        repo_owner = repo_obj["github"]["owner"]
        repo_name = repo_obj["github"]["repository"]
        return "{}/{}".format(repo_owner, repo_name)

    def website_repository_name(self, repository_name):
        repo_obj = self._configure.get_repository(repository_name)
        repo_owner = repo_obj["website"]["owner"]
        repo_name = repo_obj["website"]["repository"]
        return "{}/{}".format(repo_owner, repo_name)

    def k8s_official_repository_name(self, repository_name):
        repo_obj = self._configure.get_repository(repository_name)
        repo_owner = repo_obj["k8s-official"]["owner"]
        repo_name = repo_obj["k8s-official"]["repository"]
        return "{}/{}".format(repo_owner, repo_name)

    def cache_issues(self, query, file_name, search_limit=30):
        """
        :param search_limit:
        :param query: Github query string
        :param file_name: Save search result into a json file

        record = {"query": query,
            "timestamp": 1234567
            items:
            [
                {
                "number": 1234,
                "title": "Issue Title",
                labels: ["version/1.12", "translating"]
                },
            ]
        }
        """
        issue_list = self._github_client.search_issue(query, search_limit)
        result = []
        for issue in issue_list:
            issue_item = {
                "number": issue.number,
                "title": issue.title,
                "labels": []
            }
            for label in issue.labels:
                issue_item["labels"].append(label.name)
            result.append(issue_item)
        with open(file_name, "w") as handle:
            json.dump(result, handle, indent=2)
        return len(result)

    def get_default_label(self, repository_name, branch, language):
        """
        :param repository_name:
        :param branch:
        :param language:
        :return:
        """
        labels = self._configure.get_branch(repository_name, branch)["labels"]
        labels += self._configure.get_languages(repository_name,
                                                language)["labels"]
        return labels

    def create_issue(self,
                     github_repository,
                     title,
                     body,
                     labels=[],
                     search_labels=[],
                     search_cache="",
                     search_online=False):
        """
        :param labels: Labels for new issue
        :type labels: list of str
        :param search_online: Search duplicated issues online
        :param github_repository: Name of the repository.
        :param title: Title of the new issue.

        :param body: Body of the new issue.
        :param search_labels: Search duplicated issues with title & labels.
        :type search_labels: list of str
        :param search_cache: Search in the cache file
        :type search_cache: str
        :rtype: github.Issue.Issue
        """
        dupe = False
        if len(search_cache) > 0:
            with open(search_cache, "r") as handler:
                obj = json.load(handler)
                for issue_record in obj:
                    if issue_record["title"] == title:
                        if len(search_labels) == 0:
                            dupe = True
                            break
                        else:
                            if set(search_labels).issubset(
                                    issue_record["labels"]):
                                dupe = True
                            break
        if search_online:
            search_cmd = "repo:{} state:open is:issue in:title {}".format(
                github_repository, title)
            if len(search_labels) > 0:
                search_cmd = "{} {}".format(
                    search_cmd,
                    " ".join(["label:{}".format(i) for i in search_labels]))
            issue_list = self._github_client.search_issue(search_cmd)
            for issue in issue_list:
                if issue.title == title:
                    dupe = True
        if dupe:
            return None
        new_issue = self._github_client.create_issue(github_repository, title,
                                                     body)
        # Add labels
        for label_name in labels:
            new_issue.add_to_labels(label_name)
        return new_issue

    def gen_source_url(self, repo, branch, file_name):
        """
        :param repo:
        :param branch:
        :param file_name:
        """
        prefix = self._configure.get_branch(repo,
                                            branch)["url_prefix"]["source"]
        middle = ""
        if file_name[:1] != "/":
            middle = "/"
        return "{}{}{}".format(prefix, middle, file_name)

    def gen_web_url(self, repo, branch, file_name):
        """
        :param repo:
        :param branch:
        :param file_name:
        """
        prefix = self._configure.get_branch(repo, branch)["url_prefix"]["web"]
        middle = ""
        if file_name[:1] != "/":
            middle = "/"
        return "{}{}{}".format(prefix, middle, file_name)

    def whatsnew(self, repository_name):
        cmd = "repo:{} label:welcome is:open type:issue".format(
            self.remote_repository_name(repository_name))
        issue_list = self._github_client.search_issue(cmd, 10)
        return issue_list

    def comment_issue(self, repository_name, issue_id, comment):
        comment_obj = self._github_client.issue_comment(
            self.remote_repository_name(repository_name), issue_id, comment)
        return comment_obj

    # 批量确认 issue 状态
    def batch_comment_issue(self, repository_name, comment, max_result):

        issue_list = self.find_open_issues(repository_name, max_result)
        for issue in issue_list:
            self._github_client.issue_comment(
                self.remote_repository_name(repository_name), issue.number,
                comment)

        return len(issue_list)

    def search_issues(self, repository_name, query):
        # tmpstr = "{} label:version/1.12 is:open type:issue repo:{}".format(query, remote_repository_name())
        tmpstr = "{} is:open type:issue repo:{}".format(
            query, self.remote_repository_name(repository_name))

        issue_list = self._github_client.search_issue(tmpstr, 10)
        return issue_list

    def show_limit(self):
        limit = self._github_client.get_limit()
        return limit

    def find_open_issues(self, repository_name, max_result):
        query = "repo:{} is:open is:issue".format(
            self.remote_repository_name(repository_name))
        issue_list = self._github_client.search_issue(query, max_result)
        return issue_list

    # https://developer.github.com/v3/pulls/
    def get_trans_pr(self, repository_name, max_result):
        query = "repo:{} is:pr label:language/zh".format(
            self.website_repository_name(repository_name))
        pr_list = self._github_client.search_issue(query, max_result)

        self._trans_db.add_update_pr(pr_list)
        return len(pr_list)

    # 获取翻译文件
    def get_trans_files(self, repository_name, max_result):
        prs = self._trans_db.get_prs()

        new_count = 0
        for pr in prs:
            pr_doc = self._github_client.pr_files(
                self.website_repository_name(repository_name), pr.number)
            self._trans_db.add_update_files(pr, pr_doc)

            new_count = new_count + 1
            if new_count % max_result:
                self.wait_for_limit(max_result, max_result)
        return len(prs)

    # 更新译者信息
    def _update_author(self, max_result):
        users = self._trans_db.user_list()
        new_count = 0
        for user in users:
            user_info = self._github_client.get_user(user.author)
            self._trans_db._add_update_author(user_info)
            new_count = new_count + 1
            if new_count % max_result:
                self.wait_for_limit(max_result, max_result)
        return list(users)

    # 更新周报
    def _update_report(self, repository_name, file_name, comment, repeat):
        content = self._trans_db._week_report(repeat)
        tmp_file = self._github_client.create_file(
            self.k8s_official_repository_name(repository_name), file_name,
            comment, content)
        return tmp_file['content'].path

    def report_update(self, repository_name, branch_name, max_result):
        # 1. 译者信息更新
        user_count = self._update_author(max_result)
        # 2. 指定版本翻译文档中文字数统计
        branch_item = self.checkout_branch(repository_name, branch_name)
        files_count = self._trans_db._files_cn_word_count(branch_item)
        # 3. 译者翻译字数统计(含翻译+更新)
        author_count = self._trans_db._author_files_cn_word_count()
        # 4. 以周为单位进行数据统计(更新 week_trans 数据表 0.新增 1.更新)
        self._trans_db._week_author_cn_word_count(0)
        self._trans_db._week_author_cn_word_count(1)
        # 5. 生成翻译周报(含翻译+更新)
        file_create_name = self._update_report(
            repository_name, "report/contribution-stage2.md", "周报更新-(新增)", 0)
        file_update_name = self._update_report(
            repository_name, "report/contribution-stage2-update.md",
            "周报更新-(更新)", 1)
        # 6. 生成图表展示文件 - data.json
        json_data = self._trans_db.get_echarjs_data()
        tmp_file = self._github_client.create_file(
            self.k8s_official_repository_name(repository_name), "data.json",
            "报表更新", json_data)

        return {
            "user_count": user_count,
            "files_count": files_count,
            "author_count": author_count,
            "file_create_name": file_create_name,
            "file_update_name": file_update_name,
            "json_file": tmp_file['content'].path
        }
Example #8
0
#/usr/bin/env python3
from gitutil.configure import Configuration
from gitutil.commands import GitCommand
import os

REPOSITORY_CONFIG_FILE = os.getenv("REPOSITORY_CONFIG_FILE")
REPOSITORY_NAME = os.getenv("REPOSITORY")
config = Configuration(REPOSITORY_CONFIG_FILE)

for branch in config["branches"]:
    cmd = GitCommand(branch["path"])
    cmd.pull()