Ejemplo n.º 1
0
    def __init__(self, file_path: str, known_words_file_path: str = None, no_camel_case: bool = False,
                 no_failure: bool = False, expand_dictionary: bool = False, templates: bool = False,
                 use_git: bool = False, prev_ver: str = None, release_notes_only: bool = False):
        if templates:
            ReleaseNotesChecker(template_examples=True)
            sys.exit(0)

        # if nothing entered will default to use git
        elif not file_path and not use_git:
            use_git = True

        self.file_path = file_path
        self.git_util = None
        self.prev_ver = prev_ver if prev_ver else 'demisto/master'

        if use_git:
            self.git_util = GitUtil()

        if release_notes_only:
            self.SUPPORTED_FILE_TYPES = [FileType.RELEASE_NOTES]

        self.files = set()  # type:Set
        self.spellchecker = SpellChecker()
        self.unknown_words = {}  # type:Dict
        self.no_camel_case = no_camel_case
        self.known_words_file_path = known_words_file_path
        self.found_misspelled = False
        self.no_failure = no_failure
        self.expand_dictionary = expand_dictionary
        self.files_with_misspells = set()  # type:Set
        self.files_without_misspells = set()  # type:Set
        self.malformed_rn_files = set()  # type:Set
Ejemplo n.º 2
0
def get_files_to_format_from_git(supported_file_types: List[str], prev_ver: str, include_untracked: bool) -> List[str]:
    """Get the files to format from git.

    Args:
        supported_file_types(list): File extensions which are supported by format
        prev_ver(str): The branch name or commit hash to compare with
        include_untracked(bool): Whether to include untracked files

    Returns:
        list. a list of all the files that should be formatted.
    """
    git_util = GitUtil()
    all_changed_files = git_util.get_all_changed_files(prev_ver=prev_ver, include_untracked=include_untracked)

    filtered_files = []
    for file_path in all_changed_files:
        str_file_path = str(file_path)

        # get the file extension without the '.'
        file_extension = os.path.splitext(str_file_path)[1][1:]
        if file_extension in supported_file_types and os.path.exists(str_file_path):
            filtered_files.append(str_file_path)

    if filtered_files:
        detected_files_string = "\n".join(filtered_files)
        click.secho(f'Found the following files to format:\n{detected_files_string}', fg='bright_cyan')

    else:
        click.secho('Did not find any files to format', fg='bright_red')

    return filtered_files
Ejemplo n.º 3
0
    def __init__(self,
                 file_paths: Optional[List] = None,
                 known_words_file_paths: Optional[List] = None,
                 no_camel_case: bool = False,
                 no_failure: bool = False,
                 expand_dictionary: bool = False,
                 templates: bool = False,
                 use_git: bool = False,
                 prev_ver: str = None,
                 release_notes_only: bool = False,
                 load_known_words_from_pack: bool = False):
        if templates:
            ReleaseNotesChecker(template_examples=True)
            sys.exit(0)

        # if nothing entered will default to use git
        elif not file_paths and not use_git:
            use_git = True

        self.file_paths = file_paths if file_paths else []
        self.git_util = None

        if use_git:
            self.git_util = GitUtil(repo=Content.git())
            self.prev_ver = self.git_util.handle_prev_ver()[1]
        else:
            self.prev_ver = prev_ver if prev_ver else 'demisto/master'

        if release_notes_only:
            self.SUPPORTED_FILE_TYPES = [FileType.RELEASE_NOTES]
            # if running doc-review --release-notes there is no need to consider invalid schema files of yml/json
            self.ignore_invalid_schema_file = True
        else:
            self.ignore_invalid_schema_file = False

        self.known_words_file_paths = known_words_file_paths if known_words_file_paths else []
        self.load_known_words_from_pack = load_known_words_from_pack
        self.known_pack_words_file_path = ''

        self.current_pack = None
        self.files: list = []
        self.spellchecker = SpellChecker()
        self.unknown_words = {}  # type:Dict
        self.no_camel_case = no_camel_case
        self.found_misspelled = False
        self.no_failure = no_failure
        self.expand_dictionary = expand_dictionary
        self.files_with_misspells = set()  # type:Set
        self.files_without_misspells = set()  # type:Set
        self.malformed_rn_files = set()  # type:Set
    def is_release_branch():
        # type: () -> bool
        """Check if we are working on a release branch.

        Returns:
            (bool): is release branch
        """
        git_util = GitUtil(repo=Content.git())
        main_branch = git_util.handle_prev_ver()[1]
        if not main_branch.startswith('origin'):
            main_branch = 'origin/' + main_branch

        diff_string_config_yml = run_command(f"git diff {main_branch} .circleci/config.yml")
        if re.search(r'[+-][ ]+CONTENT_VERSION: ".*', diff_string_config_yml):
            return True
        return False
Ejemplo n.º 5
0
    def __init__(self,
                 pack_path: str,
                 update_type: Union[str, None],
                 modified_files_in_pack: set,
                 added_files: set,
                 specific_version: str = None,
                 pre_release: bool = False,
                 pack: str = None,
                 pack_metadata_only: bool = False,
                 text: str = '',
                 existing_rn_version_path: str = '',
                 is_force: bool = False,
                 is_bc: bool = False):
        self.pack = pack if pack else get_pack_name(pack_path)
        self.update_type = update_type
        self.pack_path = pack_path
        # renamed files will appear in the modified list as a tuple: (old path, new path)
        modified_files_in_pack = {
            file_[1] if isinstance(file_, tuple) else file_
            for file_ in modified_files_in_pack
        }
        self.modified_files_in_pack = set()
        for file_path in modified_files_in_pack:
            self.modified_files_in_pack.add(
                self.change_image_or_desc_file_path(file_path))

        self.added_files = added_files
        self.pre_release = pre_release
        self.specific_version = specific_version
        self.existing_rn_changed = False
        self.text = text
        self.existing_rn_version_path = existing_rn_version_path
        self.should_delete_existing_rn = False
        self.pack_metadata_only = pack_metadata_only
        self.is_force = is_force
        git_util = GitUtil(repo=Content.git())
        self.main_branch = git_util.handle_prev_ver()[1]
        self.metadata_path = os.path.join(self.pack_path, 'pack_metadata.json')
        self.master_version = self.get_master_version()
        self.rn_path = ''
        self.is_bc = is_bc
        self.bc_path = ''
Ejemplo n.º 6
0
    def get_all_diff_text_files(self, branch_name, is_circle):
        """
        Get all new/modified text files that need to be searched for secrets
        :param branch_name: current branch being worked on
        :param is_circle: boolean to check if being ran from circle
        :return: list: list of text files
        """
        if is_circle:
            prev_ver = self.prev_ver
            if not prev_ver:
                self.git_util = GitUtil(repo=Content.git())
                prev_ver = self.git_util.handle_prev_ver()[1]
            if not prev_ver.startswith('origin'):
                prev_ver = 'origin/' + prev_ver
            print(f"Running secrets validation against {prev_ver}")

            changed_files_string = run_command(f"git diff --name-status {prev_ver}...{branch_name}")
        else:
            print("Running secrets validation on all changes")
            changed_files_string = run_command("git diff --name-status --no-merges HEAD")
        return list(self.get_diff_text_files(changed_files_string))
Ejemplo n.º 7
0
 def _get_repository_properties() -> Optional[giturlparse.result.GitUrlParsed]:
     """Returns the git repository of the cwd.
     if not running in a git repository, will return an empty string
     """
     try:
         urls = GitUtil().repo.remote().urls
         for url in urls:
             parsed_git = giturlparse.parse(url)
             if parsed_git and parsed_git.host and parsed_git.repo:
                 return parsed_git
     except (InvalidGitRepositoryError, AttributeError):
         return None
     return None
Ejemplo n.º 8
0
    def update_playbook_usages(self) -> None:
        """Check if the current playbook is used as a sub-playbook in other changed playbooks.
        Change the playbook's id in the tasks id needed.
        """
        current_playbook_id = str(self.data.get('id'))
        new_playbook_id = str(self.data.get('name'))

        # if the id and name are the same - there is no need for this format.
        if current_playbook_id == new_playbook_id:
            return

        # gather all the changed files - if the formatted playbook was
        # modified then any additional playbook changes were changed alongside it -
        # we would use git to gather all other changed playbooks
        try:
            git_util = GitUtil()
            modified_files = git_util.modified_files(include_untracked=True)
            added_files = git_util.added_files(include_untracked=True)
            renamed_files = {
                item[1]
                for item in git_util.renamed_files(include_untracked=True)
            }

            all_changed_files = modified_files.union(added_files).union(
                renamed_files)

        except (InvalidGitRepositoryError, TypeError) as e:
            click.secho(
                'Unable to connect to git - skipping sub-playbook checks',
                fg='yellow')
            if self.verbose:
                click.secho(f'The error: {e}')
            return

        for file_path in all_changed_files:
            self.check_for_subplaybook_usages(str(file_path),
                                              current_playbook_id,
                                              new_playbook_id)
Ejemplo n.º 9
0
def test_find_primary_branch():
    """
    Given
        - A Git repo

    When
        - Searching for the primary branch

    Then
        - Ensure ithe returned value is either 'main', 'master', or None
    """
    from demisto_sdk.commands.common.git_util import GitUtil

    assert not GitUtil.find_primary_branch(None)

    class Object(object):
        pass

    empty_repo = Object()
    assert not GitUtil.find_primary_branch(empty_repo)

    repo_with_empty_remotes = Object()
    repo_with_empty_remotes.remotes = []
    assert not GitUtil.find_primary_branch(repo_with_empty_remotes)

    repo_with_empty_remotes_refs = Object()
    repo_with_empty_remotes_refs.remotes = []
    empty_refs = Object()
    repo_with_empty_remotes_refs.remotes.append(empty_refs)
    assert not GitUtil.find_primary_branch(repo_with_empty_remotes_refs)

    repo_with_remotes_refs_main = Object()
    repo_with_remotes_refs_main.remotes = []
    refs_main = Object()
    refs_main.refs = ['a', 'origin/main', 'c']
    repo_with_remotes_refs_main.remotes.append(refs_main)
    assert GitUtil.find_primary_branch(repo_with_remotes_refs_main) == 'main'

    repo_with_remotes_refs_master = Object()
    repo_with_remotes_refs_master.remotes = []
    refs_master = Object()
    refs_master.refs = ['a', 'origin/master', 'c']
    repo_with_remotes_refs_master.remotes.append(refs_master)
    assert GitUtil.find_primary_branch(
        repo_with_remotes_refs_master) == 'master'

    repo_with_remotes_refs_other = Object()
    repo_with_remotes_refs_other.remotes = []
    refs_other = Object()
    refs_other.refs = ['a', 'b']
    repo_with_remotes_refs_other.remotes.append(refs_other)
    assert not GitUtil.find_primary_branch(repo_with_remotes_refs_other)
Ejemplo n.º 10
0
class SecretsValidator(object):

    def __init__(
            self,
            configuration=Configuration(), is_circle=False, ignore_entropy=False, white_list_path='',
            input_path='', prev_ver=None
    ):
        self.input_paths = input_path.split(',') if input_path else None
        self.configuration = configuration
        self.is_circle = is_circle
        self.white_list_path = white_list_path
        self.ignore_entropy = ignore_entropy
        self.prev_ver = prev_ver
        if self.prev_ver and not self.prev_ver.startswith('origin'):
            self.prev_ver = 'origin/' + self.prev_ver

    def get_secrets(self, branch_name, is_circle):
        secret_to_location_mapping = {}
        if self.input_paths:
            secrets_file_paths = self.input_paths
        else:
            secrets_file_paths = self.get_all_diff_text_files(branch_name, is_circle)
        # If a input path supplied, should not run on git. If not supplied make sure not in middle of merge.
        if not run_command('git rev-parse -q --verify MERGE_HEAD') or self.input_paths:
            secret_to_location_mapping = self.search_potential_secrets(secrets_file_paths, self.ignore_entropy)
            if secret_to_location_mapping:
                secrets_found_string = 'Secrets were found in the following files:'
                for file_name in secret_to_location_mapping:
                    for line in sorted(secret_to_location_mapping[file_name]):
                        secrets_found_string += ('\nIn File: ' + f'{file_name}:{line}' + '\n')
                        if len(secret_to_location_mapping[file_name][line]) == 1:
                            secrets_found_string += f'Secret found: {secret_to_location_mapping[file_name][line][0]}\n'
                        else:
                            secrets_found_string += f'Secrets found: {secret_to_location_mapping[file_name][line]}\n'

                if not is_circle:
                    secrets_found_string += '\n\nRemove or whitelist secrets in order to proceed, then re-commit\n'

                else:
                    secrets_found_string += '\n\nThe secrets were exposed in public repository,' \
                                            ' remove the files asap and report it.\n'

                secrets_found_string += 'For more information about whitelisting visit: ' \
                                        'https://xsoar.pan.dev/docs/concepts/demisto-sdk#secrets'
                print_error(secrets_found_string)
        return secret_to_location_mapping

    def reformat_secrets_output(self, secrets_list):
        """
        Get a list of secrets and reformat it's output
        :param secrets_list: List of secrets
        :return: str: List of secrets
        """
        return '\n'.join(secrets_list) if secrets_list else ''

    def get_all_diff_text_files(self, branch_name, is_circle):
        """
        Get all new/modified text files that need to be searched for secrets
        :param branch_name: current branch being worked on
        :param is_circle: boolean to check if being ran from circle
        :return: list: list of text files
        """
        if is_circle:
            prev_ver = self.prev_ver
            if not prev_ver:
                self.git_util = GitUtil(repo=Content.git())
                prev_ver = self.git_util.handle_prev_ver()[1]
            if not prev_ver.startswith('origin'):
                prev_ver = 'origin/' + prev_ver
            print(f"Running secrets validation against {prev_ver}")

            changed_files_string = run_command(f"git diff --name-status {prev_ver}...{branch_name}")
        else:
            print("Running secrets validation on all changes")
            changed_files_string = run_command("git diff --name-status --no-merges HEAD")
        return list(self.get_diff_text_files(changed_files_string))

    def get_diff_text_files(self, files_string):
        """Filter out only added/modified text files from git diff
        :param files_string: string representing the git diff files
        :return: text_files_list: string of full path to text files
        """
        # file statuses to filter from the diff, no need to test deleted files.
        all_files = files_string.split('\n')
        text_files_list = set()
        for file_name in all_files:
            file_data: list = list(filter(None, file_name.split('\t')))
            if not file_data:
                continue
            file_status = file_data[0]
            if 'r' in file_status.lower():
                file_path = file_data[2]
            else:
                file_path = file_data[1]
            # only modified/added file, text readable, exclude white_list file
            if (file_status.lower() in ACCEPTED_FILE_STATUSES or 'r' in file_status.lower()) and self.is_text_file(
                    file_path):
                if not any(skipped_file in file_path for skipped_file in SKIPPED_FILES):
                    text_files_list.add(file_path)
        return text_files_list

    @staticmethod
    def is_text_file(file_path):
        file_extension = os.path.splitext(file_path)[1]
        if file_extension in TEXT_FILE_TYPES:
            return True
        return False

    def search_potential_secrets(self, secrets_file_paths: list, ignore_entropy: bool = False):
        """Returns potential secrets(sensitive data) found in committed and added files
        :param secrets_file_paths: paths of files that are being commited to git repo
        :param ignore_entropy: If True then will ignore running entropy algorithm for finding potential secrets

        :return: dictionary(filename: (list)secrets) of strings sorted by file name for secrets found in files
        """
        secret_to_location_mapping: DefaultDict[str, defaultdict] = defaultdict(lambda: defaultdict(list))
        for file_path in secrets_file_paths:
            # Get if file path in pack and pack name
            is_pack = is_file_path_in_pack(file_path)
            pack_name = get_pack_name(file_path)
            # Get generic/ioc/files white list sets based on if pack or not
            secrets_white_list, ioc_white_list, files_white_list = self.get_white_listed_items(is_pack, pack_name)
            # Skip white listed files

            if file_path in files_white_list:
                print("Skipping secrets detection for file: {} as it is white listed".format(file_path))
                continue
            # Init vars for current loop
            file_name = os.path.basename(file_path)
            _, file_extension = os.path.splitext(file_path)
            # get file contents
            file_contents = self.get_file_contents(file_path, file_extension)
            # if detected disable-secrets comments, removes the line/s
            file_contents = self.remove_secrets_disabled_line(file_contents)
            # in packs regard all items as regex as well, reset pack's whitelist in order to avoid repetition later
            if is_pack:
                file_contents = self.remove_whitelisted_items_from_file(file_contents, secrets_white_list)

            yml_file_contents = self.get_related_yml_contents(file_path)
            # Add all context output paths keywords to whitelist temporary
            if file_extension == YML_FILE_EXTENSION or yml_file_contents:
                temp_white_list = self.create_temp_white_list(yml_file_contents if yml_file_contents else file_contents)
                secrets_white_list = secrets_white_list.union(temp_white_list)
            # Search by lines after strings with high entropy / IoCs regex as possibly suspicious
            for line_num, line in enumerate(file_contents.split('\n')):
                # REGEX scanning for IOCs and false positive groups
                regex_secrets, false_positives = self.regex_for_secrets(line)
                for regex_secret in regex_secrets:
                    if not any(ioc.lower() in regex_secret.lower() for ioc in ioc_white_list):
                        secret_to_location_mapping[file_path][line_num + 1].append(regex_secret)
                # added false positives into white list array before testing the strings in line

                secrets_white_list = secrets_white_list.union(false_positives)

                if not ignore_entropy:
                    # due to nature of eml files, skip string by string secret detection - only regex
                    if file_extension in SKIP_FILE_TYPE_ENTROPY_CHECKS or \
                            any(demisto_type in file_name for demisto_type in SKIP_DEMISTO_TYPE_ENTROPY_CHECKS):
                        continue
                    line = self.remove_false_positives(line)
                    # calculate entropy for each string in the file
                    for string_ in line.split():
                        # compare the lower case of the string against both generic whitelist & temp white list
                        if not any(
                                white_list_string.lower() in string_.lower()
                                for white_list_string in secrets_white_list):

                            entropy = self.calculate_shannon_entropy(string_)
                            if entropy >= ENTROPY_THRESHOLD:
                                secret_to_location_mapping[file_path][line_num + 1].append(string_)

        return secret_to_location_mapping

    @staticmethod
    def remove_whitelisted_items_from_file(file_content: str, secrets_white_list: set) -> str:
        """Removes whitelisted items from file content

        Arguments:
            file_content (str): The content of the file to remove the whitelisted item from
            secrets_white_list (set): List of whitelist items to remove from the file content.

        Returns:
            str: The file content with the whitelisted items removed.
        """
        for item in secrets_white_list:
            try:
                file_content = re.sub(WHILEIST_REGEX.format(re.escape(item)), '', file_content)
            except re.error as err:
                error_string = f"Could not use secrets with item: {item}"
                print_error(error_string)
                raise re.error(error_string, str(err))
        return file_content

    @staticmethod
    def create_temp_white_list(file_contents) -> set:
        temp_white_list: set = set()
        context_paths = re.findall(r'contextPath: (\S+\.+\S+)', file_contents)
        for context_path in context_paths:
            context_path = context_path.split('.')
            context_path = [white_item.lower() for white_item in context_path if len(white_item) > 4]
            temp_white_list = temp_white_list.union(context_path)

        return temp_white_list

    def get_related_yml_contents(self, file_path):
        # if script or readme file, search for yml in order to retrieve temp white list
        yml_file_contents = ''
        # Validate if it is integration documentation file or supported file extension
        if find_type(file_path) in [FileType.PYTHON_FILE, FileType.README, FileType.POWERSHELL_FILE]:
            yml_file_contents = self.retrieve_related_yml(os.path.dirname(file_path))
        return yml_file_contents

    @staticmethod
    def retrieve_related_yml(integration_path):
        matching_yml_file_contents = None
        yml_file = os.path.join(integration_path, os.path.basename(integration_path) + '.yml')
        if os.path.exists(yml_file):
            with io.open(yml_file, mode="r", encoding="utf-8") as matching_yml_file:
                matching_yml_file_contents = matching_yml_file.read()
        return matching_yml_file_contents

    @staticmethod
    def regex_for_secrets(line):
        """Scans for IOCs with potentially low entropy score
        :param line: line to test as string representation (string)
        :return  potential_secrets (list) IOCs found via regex, false_positives (list) Non secrets with high entropy
        """
        potential_secrets = []
        false_positives = []

        # Dates REGEX for false positive preventing since they have high entropy
        dates = re.findall(DATES_REGEX, line)
        if dates:
            false_positives += [date[0].lower() for date in dates]
        # UUID REGEX - for false positives
        uuids = re.findall(UUID_REGEX, line)
        if uuids:
            false_positives += uuids
        # docker images version are detected as ips. so we ignore and whitelist them
        # example: dockerimage: demisto/duoadmin:1.0.0.147
        re_res = re.search(r'dockerimage:\s*\w*demisto/\w+:(\d+.\d+.\d+.\d+)', line)
        if re_res:
            docker_version = re_res.group(1)
            false_positives.append(docker_version)
            line = line.replace(docker_version, '')
        # URL REGEX
        urls = re.findall(URLS_REGEX, line)
        if urls:
            potential_secrets += urls
        # EMAIL REGEX
        emails = re.findall(EMAIL_REGEX, line)
        if emails:
            potential_secrets += emails
        # IPV6 REGEX
        ipv6_list = re.findall(IPV6_REGEX, line)
        if ipv6_list:
            for ipv6 in ipv6_list:
                if ipv6 != '::' and len(ipv6) > 4:
                    potential_secrets.append(ipv6)
        # IPV4 REGEX
        ipv4_list = re.findall(IPV4_REGEX, line)
        if ipv4_list:
            potential_secrets += ipv4_list

        return potential_secrets, false_positives

    @staticmethod
    def calculate_shannon_entropy(data) -> float:
        """Algorithm to determine the randomness of a given data.
        Higher is more random/complex, most English words will yield in average result of 3
        :param data: could be either a list/dict or a string.
        :return: entropy: entropy score.
        """
        if not data:
            return 0
        entropy = 0.0
        # each unicode code representation of all characters which are considered printable
        for char in (ord(c) for c in string.printable):
            # probability of event X
            p_x = float(data.count(chr(char))) / len(data)
            if p_x > 0:
                # the information in every possible news, in bits
                entropy += - p_x * math.log(p_x, 2)
        return entropy

    def get_white_listed_items(self, is_pack, pack_name):
        final_white_list, ioc_white_list, files_white_list = self.get_generic_white_list(self.white_list_path)
        if is_pack:
            pack_whitelist_path = os.path.join(PACKS_DIR, pack_name, PACKS_WHITELIST_FILE_NAME)
            pack_white_list, _, pack_files_white_list = self.get_packs_white_list(pack_whitelist_path, pack_name)
            final_white_list.extend(pack_white_list)
            files_white_list.extend(pack_files_white_list)

        final_white_list = set(final_white_list)
        if '' in final_white_list:
            # remove('') is ignoring empty lines in whitelists - users can accidentally add empty lines and those will
            # cause whitelisting of every string
            final_white_list.remove('')

        return final_white_list, set(ioc_white_list), set(files_white_list)

    @staticmethod
    def get_generic_white_list(whitelist_path):
        final_white_list = []
        ioc_white_list = []
        files_while_list = []
        if os.path.isfile(whitelist_path):
            with io.open(whitelist_path, mode="r", encoding="utf-8") as secrets_white_list_file:
                secrets_white_list_file = json.load(secrets_white_list_file)
                for name, white_list in secrets_white_list_file.items():  # type: ignore
                    if name == 'iocs':
                        for sublist in white_list:
                            ioc_white_list += [white_item for white_item in white_list[sublist] if len(white_item) > 4]
                        final_white_list += ioc_white_list
                    elif name == 'files':
                        files_while_list = white_list
                    else:
                        final_white_list += [white_item for white_item in white_list if len(white_item) > 4]

        return final_white_list, ioc_white_list, files_while_list

    @staticmethod
    def get_packs_white_list(whitelist_path, pack_name=None):
        final_white_list = []
        files_white_list = []

        if os.path.isfile(whitelist_path):
            with io.open(whitelist_path, mode="r", encoding="utf-8") as secrets_white_list_file:
                temp_white_list = secrets_white_list_file.read().split('\n')
            for white_list_line in temp_white_list:
                if white_list_line.startswith('file:'):
                    white_list_line = os.path.join(PACKS_DIR, pack_name, white_list_line[5:])
                    if not os.path.isfile(os.path.join(white_list_line)):
                        print_warning(f'{white_list_line} not found.\n'
                                      'please add the file name in the following format\n'
                                      'file:[Scripts|Integrations|Playbooks]/name/file.example\n'
                                      'e.g. file:Scripts/HelloWorldScript/HelloWorldScript.py')
                    files_white_list.append(white_list_line)
                else:
                    final_white_list.append(white_list_line)
        return final_white_list, [], files_white_list

    def get_file_contents(self, file_path, file_extension):
        try:
            # if pdf or README.md file, parse text
            integration_readme = re.match(pattern=PACKS_INTEGRATION_README_REGEX,
                                          string=file_path,
                                          flags=re.IGNORECASE)
            if file_extension == '.pdf':
                file_contents = self.extract_text_from_pdf(file_path)
            elif file_extension == '.md' and integration_readme:
                file_contents = self.extract_text_from_md_html(file_path)
            else:
                # Open each file, read its contents in UTF-8 encoding to avoid unicode characters
                with io.open(file_path, mode="r", encoding="utf-8", errors='ignore') as commited_file:
                    file_contents = commited_file.read()
            file_contents = self.ignore_base64(file_contents)
            return file_contents
        except Exception as ex:
            print("Failed opening file: {}. Exception: {}".format(file_path, ex))
            raise

    @staticmethod
    def extract_text_from_pdf(file_path):
        page_num = 0
        file_contents = ''
        try:
            pdf_file_obj = open('./' + file_path, 'rb')
            pdf_reader = PyPDF2.PdfFileReader(pdf_file_obj)
            num_pages = pdf_reader.numPages
        except PyPDF2.utils.PdfReadError:
            print('ERROR: Could not parse PDF file in path: {} - ***Review Manually***'.format(file_path))
            return file_contents
        while page_num < num_pages:
            pdf_page = pdf_reader.getPage(page_num)
            page_num += 1
            file_contents += pdf_page.extractText()

        return file_contents

    @staticmethod
    def extract_text_from_md_html(file_path):
        try:
            with open(file_path, mode='r') as html_page:
                soup = BeautifulSoup(html_page, features="html.parser")
                file_contents = soup.text
                return file_contents
        except Exception as ex:
            print_error('Unable to parse the following file {} due to error {}'.format(file_path, ex))
            raise

    @staticmethod
    def remove_false_positives(line):
        false_positive = re.search(r'([^\s]*[(\[{].*[)\]}][^\s]*)', line)
        if false_positive:
            false_positive = false_positive.group(1)
            line = line.replace(false_positive, '')
        return line

    @staticmethod
    def is_secrets_disabled(line, skip_secrets):
        if bool(re.findall(r'(disable-secrets-detection-start)', line)):
            skip_secrets['skip_multi'] = True
        elif bool(re.findall(r'(disable-secrets-detection-end)', line)):
            skip_secrets['skip_multi'] = False
        elif bool(re.findall(r'(disable-secrets-detection)', line)):
            skip_secrets['skip_once'] = True
        return skip_secrets

    @staticmethod
    def ignore_base64(file_contents):
        base64_strings = re.findall(r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|'
                                    r'[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})', file_contents)
        for base64_string in base64_strings:
            if len(base64_string) > 500:
                file_contents = file_contents.replace(base64_string, '')
        return file_contents

    @staticmethod
    def get_branch_name() -> str:
        branches = run_command('git branch')
        branch_name_reg = re.search(r'\* (.*)', branches)
        if not branch_name_reg:
            return ''
        return branch_name_reg.group(1)

    def find_secrets(self):
        print_color('Starting secrets detection', LOG_COLORS.GREEN)
        is_circle = self.is_circle
        branch_name = self.get_branch_name()
        secrets_found = self.get_secrets(branch_name, is_circle)
        if secrets_found:
            return True
        else:
            print_color('Finished validating secrets, no secrets were found.', LOG_COLORS.GREEN)
            return False

    def remove_secrets_disabled_line(self, file_content: str) -> str:
        """Removes lines that have "disable-secrets-detection" from file content

        Arguments:
            file_content (str): The content of the file to remove the "disable-secrets-detection" lines from

        Returns:
            str: The new file content with the "disable-secrets-detection" lines removed.
        """
        skip_secrets = {'skip_once': False, 'skip_multi': False}
        new_file_content = ""
        for line in file_content.split('\n'):
            skip_secrets = self.is_secrets_disabled(line, skip_secrets)
            if skip_secrets['skip_once'] or skip_secrets['skip_multi']:
                skip_secrets['skip_once'] = False
            else:
                new_file_content += f'{line}\n'
        return new_file_content

    def run(self):
        if self.find_secrets():
            return 1

        else:
            return 0
Ejemplo n.º 11
0
class DocReviewer:
    """Perform a spell check on the given .yml or .md file.
    """

    SUPPORTED_FILE_TYPES = [
        FileType.INTEGRATION, FileType.SCRIPT, FileType.PLAYBOOK,
        FileType.README, FileType.DESCRIPTION, FileType.RELEASE_NOTES,
        FileType.BETA_INTEGRATION, FileType.TEST_PLAYBOOK, FileType.TEST_SCRIPT
    ]

    def __init__(self,
                 file_paths: Optional[List] = None,
                 known_words_file_paths: Optional[List] = None,
                 no_camel_case: bool = False,
                 no_failure: bool = False,
                 expand_dictionary: bool = False,
                 templates: bool = False,
                 use_git: bool = False,
                 prev_ver: str = None,
                 release_notes_only: bool = False,
                 load_known_words_from_pack: bool = False):
        if templates:
            ReleaseNotesChecker(template_examples=True)
            sys.exit(0)

        # if nothing entered will default to use git
        elif not file_paths and not use_git:
            use_git = True

        self.file_paths = file_paths if file_paths else []
        self.git_util = None

        if use_git:
            self.git_util = GitUtil(repo=Content.git())
            self.prev_ver = self.git_util.handle_prev_ver()[1]
        else:
            self.prev_ver = prev_ver if prev_ver else 'demisto/master'

        if release_notes_only:
            self.SUPPORTED_FILE_TYPES = [FileType.RELEASE_NOTES]
            # if running doc-review --release-notes there is no need to consider invalid schema files of yml/json
            self.ignore_invalid_schema_file = True
        else:
            self.ignore_invalid_schema_file = False

        self.known_words_file_paths = known_words_file_paths if known_words_file_paths else []
        self.load_known_words_from_pack = load_known_words_from_pack
        self.known_pack_words_file_path = ''

        self.current_pack = None
        self.files: list = []
        self.spellchecker = SpellChecker()
        self.unknown_words = {}  # type:Dict
        self.no_camel_case = no_camel_case
        self.found_misspelled = False
        self.no_failure = no_failure
        self.expand_dictionary = expand_dictionary
        self.files_with_misspells = set()  # type:Set
        self.files_without_misspells = set()  # type:Set
        self.malformed_rn_files = set()  # type:Set

    @staticmethod
    def find_known_words_from_pack(file_path: str) -> Tuple[str, list]:
        """Find known words in file_path's pack.

        Args:
            file_path: The path of the file within the pack

        Return (the known words file path or '' if it was not found, list of known words)
        """
        file_path_obj = Path(file_path)
        if 'Packs' in file_path_obj.parts:
            pack_name = file_path_obj.parts[file_path_obj.parts.index('Packs')
                                            + 1]
            packs_ignore_path = os.path.join("Packs", pack_name,
                                             PACKS_PACK_IGNORE_FILE_NAME)
            default_pack_known_words = add_default_pack_known_words(file_path)
            if os.path.isfile(packs_ignore_path):
                config = ConfigParser(allow_no_value=True)
                config.read(packs_ignore_path)
                if 'known_words' in config.sections():
                    packs_known_words = default_pack_known_words + list(
                        config['known_words'])
                    return packs_ignore_path, packs_known_words
                else:
                    click.secho(
                        f'\nNo [known_words] section was found within: {packs_ignore_path}',
                        fg='yellow')
                    return packs_ignore_path, default_pack_known_words

            click.secho(
                f'\nNo .pack-ignore file was found within pack: {packs_ignore_path}',
                fg='yellow')
            return '', default_pack_known_words

        click.secho(
            f'\nCould not load pack\'s known words file since no pack structure was found for {file_path}'
            f'\nMake sure you are running from the content directory.',
            fg='bright_red')
        return '', []

    @staticmethod
    def is_upper_case_word_plural(word):
        """check if a given word is an upper case word in plural, like: URLs, IPs, etc"""
        if len(word) > 2 and word[-1] == 's':
            singular_word = word[:-1]
            return singular_word == singular_word.upper()
        return False

    def is_camel_case(self, word):
        """check if a given word is in camel case"""
        if word != word.lower() and word != word.upper(
        ) and "_" not in word and word != word.title():
            # check if word is an upper case plural, like IPs. If it is, then the word is not in camel case
            return not self.is_upper_case_word_plural(word)
        return False

    @staticmethod
    def camel_case_split(camel):
        """split camel case word into sub-words"""
        tokens = re.compile('([A-Z]?[a-z]+)').findall(camel)
        for token in tokens:
            # double space to handle capital words like IP/URL/DNS that not included in the regex
            camel = camel.replace(token, f' {token} ')

        return camel.split()

    def get_all_md_and_yml_files_in_dir(self, dir_name):
        """recursively get all the supported files from a given dictionary"""
        for root, _, files in os.walk(dir_name):
            for file_name in files:
                full_path = (os.path.join(root, file_name))
                if find_type(full_path,
                             ignore_invalid_schema_file=self.
                             ignore_invalid_schema_file
                             ) in self.SUPPORTED_FILE_TYPES:
                    self.files.append(str(full_path))

    def gather_all_changed_files(self):
        modified = self.git_util.modified_files(
            prev_ver=self.prev_ver)  # type: ignore[union-attr]
        added = self.git_util.added_files(
            prev_ver=self.prev_ver)  # type: ignore[union-attr]
        renamed = self.git_util.renamed_files(
            prev_ver=self.prev_ver,
            get_only_current_file_names=True)  # type: ignore[union-attr]

        return modified.union(added).union(renamed)  # type: ignore[arg-type]

    def get_files_from_git(self):
        click.secho('Gathering all changed files from git', fg='bright_cyan')
        for file in self.gather_all_changed_files():
            file = str(file)
            if os.path.isfile(file) and find_type(
                    file,
                    ignore_invalid_schema_file=self.ignore_invalid_schema_file
            ) in self.SUPPORTED_FILE_TYPES:
                self.files.append(file)

    def get_files_to_run_on(self, file_path=None):
        """Get all the relevant files that the spell-check could work on"""
        if self.git_util:
            self.get_files_from_git()

        elif os.path.isdir(file_path):
            self.get_all_md_and_yml_files_in_dir(file_path)

        elif find_type(
                file_path,
                ignore_invalid_schema_file=self.ignore_invalid_schema_file
        ) in self.SUPPORTED_FILE_TYPES:
            self.files.append(file_path)

    @staticmethod
    def print_unknown_words(unknown_words):
        for word, corrections in unknown_words.items():
            if corrections:
                click.secho(f'  - {word} - did you mean: {corrections}',
                            fg='bright_red')
            else:
                click.secho(f'  - {word}', fg='bright_red')
        click.secho(
            'If these are not misspelled consider adding them to a known_words file:\n'
            '  Pack related words: content/Packs/<PackName>/.pack-ignore under the [known_words] section.\n'
            '  Not pack specific words: content/Tests/known_words.txt\n'
            'To test locally add --use-packs-known-words or --known-words flags.',
            fg='yellow')

    def print_file_report(self):
        if self.files_without_misspells:
            click.secho(
                '\n================= Files Without Misspells =================',
                fg='green')
            no_misspells_string = '\n'.join(self.files_without_misspells)
            click.secho(no_misspells_string, fg='green')

        if self.files_with_misspells:
            click.secho(
                '\n================= Files With Misspells =================',
                fg='bright_red')
            misspells_string = '\n'.join(self.files_with_misspells)
            click.secho(misspells_string, fg='bright_red')

        if self.malformed_rn_files:
            click.secho(
                '\n================= Malformed Release Notes =================',
                fg='bright_red')
            bad_rn = '\n'.join(self.malformed_rn_files)
            click.secho(bad_rn, fg='bright_red')

    def run_doc_review(self):
        """Runs spell-check on the given file and release notes check if relevant.

        Returns:
            bool. True if no problematic words found, False otherwise.
        """
        click.secho(
            '\n================= Starting Doc Review =================',
            fg='bright_cyan')
        if len(self.SUPPORTED_FILE_TYPES) == 1:
            click.secho('Running only on release notes', fg='bright_cyan')

        if self.file_paths:
            for file_path in self.file_paths:
                self.get_files_to_run_on(file_path)
        else:
            self.get_files_to_run_on()

        # no eligible files found
        if not self.files:
            click.secho("Could not find any relevant files - Aborting.")
            return True

        self.add_known_words()

        for file in self.files:
            click.echo(f'\nChecking file {file}')
            restarted_spellchecker = self.update_known_words_from_pack(file)
            if restarted_spellchecker:
                self.add_known_words()
            self.unknown_words = {}
            if file.endswith('.md'):
                self.check_md_file(file)

            elif file.endswith('.yml'):
                self.check_yaml(file)

            if self.unknown_words:
                click.secho(
                    f"\n - Words that might be misspelled were found in "
                    f"{file}:",
                    fg='bright_red')
                self.print_unknown_words(unknown_words=self.unknown_words)
                self.found_misspelled = True
                self.files_with_misspells.add(file)

            else:
                click.secho(f" - No misspelled words found in {file}",
                            fg='green')
                self.files_without_misspells.add(file)

        self.print_file_report()
        if (self.found_misspelled
                or self.malformed_rn_files) and not self.no_failure:
            return False

        return True

    def update_known_words_from_pack(self, file_path: str) -> bool:
        """Update spellchecker with the file's pack's known words.

        Args:
            file_path: The path of the file to update the spellchecker with the packs known words.

        Return True if spellchecker was restarted, False otherwise
        """
        restarted_spellchecker = False
        if self.load_known_words_from_pack:
            known_pack_words_file_path, known_words = self.find_known_words_from_pack(
                file_path)
            if self.known_pack_words_file_path != known_pack_words_file_path:
                click.secho(
                    f'\nUsing known words file found within pack: {known_pack_words_file_path}',
                    fg='yellow')
                if self.known_pack_words_file_path:
                    # Restart Spellchecker to remove old known_words packs file
                    self.spellchecker = SpellChecker()
                    self.known_pack_words_file_path = ''
                    restarted_spellchecker = True

            if known_pack_words_file_path:
                self.known_pack_words_file_path = known_pack_words_file_path
                if known_words:
                    # Add the new known_words packs file
                    self.spellchecker.word_frequency.load_words(known_words)

        return restarted_spellchecker

    def add_known_words(self):
        """Add known words to the spellchecker from external and internal files"""
        # adding known words file if given - these words will not count as misspelled
        if self.known_words_file_paths:
            for known_words_file_path in self.known_words_file_paths:
                self.spellchecker.word_frequency.load_text_file(
                    known_words_file_path)

        # adding the KNOWN_WORDS to the spellchecker recognized words.
        self.spellchecker.word_frequency.load_words(KNOWN_WORDS)

        if self.expand_dictionary:
            # nltk - natural language tool kit - is a large package containing several dictionaries.
            # to use it we need to download one of it's dictionaries - we will use the
            # reasonably sized "brown" and "webtext" dicts.
            # to avoid SSL download error we disable SSL connection.
            try:
                _create_unverified_https_context = ssl._create_unverified_context
            except AttributeError:
                pass
            else:
                ssl._create_default_https_context = _create_unverified_https_context

            # downloading "brown" and "webtext" sets from nltk.
            click.secho(
                "Downloading expanded dictionary, this may take a minute...",
                fg='yellow')
            nltk.download('brown')
            nltk.download('webtext')

            # adding nltk's word set to spellchecker.
            self.spellchecker.word_frequency.load_words(brown.words())
            self.spellchecker.word_frequency.load_words(webtext.words())

    @staticmethod
    def remove_punctuation(word):
        """remove leading and trailing punctuation"""
        return word.strip(string.punctuation)

    def check_word(self, word):
        """Check if a word is legal"""
        # check camel cases
        word = self.remove_punctuation(word)
        sub_words = []
        if '-' in word:
            sub_words.extend(word.split('-'))
        elif not self.no_camel_case and self.is_camel_case(word):
            sub_words.extend(self.camel_case_split(word))
        else:
            sub_words.append(word)

        self.unknown_words[word] = set()
        for sub_word in sub_words:
            sub_word = self.remove_punctuation(sub_word)
            if sub_word.isalpha() and self.spellchecker.unknown([sub_word]):
                self.unknown_words[word].update(
                    list(self.spellchecker.candidates(sub_word))[:5])

        if not self.unknown_words[word]:
            del self.unknown_words[word]
        elif word in self.unknown_words[word]:
            # Do not suggest the same word as a correction.
            self.unknown_words[word].remove(word)

    def check_md_file(self, file_path):
        """Runs spell check on .md file. Adds unknown words to given unknown_words set.
        Also if RN file will review it and add it to malformed RN file set if needed.
        """
        pack_object: TextObject = path_to_pack_object(file_path)
        md_file_lines = pack_object.to_str().split('\n')

        if isinstance(pack_object, ReleaseNote):
            good_rn = ReleaseNotesChecker(file_path, md_file_lines).check_rn()
            if not good_rn:
                self.malformed_rn_files.add(file_path)

        for line in md_file_lines:
            for word in line.split():
                self.check_word(word)

    def check_yaml(self, file_path):
        """Runs spell check on .yml file. Adds unknown words to given unknown_words set.

        Args:
            file_path (str): The file path to the yml file.
        """
        pack_object: YAMLContentObject = path_to_pack_object(file_path)
        yml_info = pack_object.to_dict()

        if isinstance(pack_object, Integration):
            self.check_spelling_in_integration(yml_info)

        elif isinstance(pack_object, Script):
            self.check_spelling_in_script(yml_info)

        elif isinstance(pack_object, Playbook):
            self.check_spelling_in_playbook(yml_info)

    def check_spelling_in_integration(self, yml_file):
        """Check spelling on an integration file"""
        self.check_params(yml_file.get('configuration', []))
        self.check_commands(yml_file.get('script', {}).get('commands', []))
        self.check_display_and_description(yml_file.get('display'),
                                           yml_file.get('description'))

    def check_params(self, param_list):
        """Check spelling in integration parameters"""
        for param_conf in param_list:
            param_display = param_conf.get('display')
            if param_display:
                for word in param_display.split():
                    self.check_word(word)

            param_toolip = param_conf.get('additionalinfo')
            if param_toolip:
                for word in param_toolip.split():
                    self.check_word(word)

    def check_commands(self, command_list):
        """Check spelling in integration commands"""
        for command in command_list:
            command_arguments = command.get('arguments', [])
            for argument in command_arguments:
                arg_description = argument.get('description')
                if arg_description:
                    for word in arg_description.split():
                        self.check_word(word)

            command_description = command.get('description')
            if command_description:
                for word in command_description.split():
                    self.check_word(word)

            command_outputs = command.get('outputs', [])
            for output in command_outputs:
                output_description = output.get('description')
                if output_description:
                    for word in output_description.split():
                        self.check_word(word)

    def check_display_and_description(self, display, description):
        """check integration display name and description"""
        if display:
            for word in display.split():
                self.check_word(word)

        if description:
            for word in description.split():
                self.check_word(word)

    def check_spelling_in_script(self, yml_file):
        """Check spelling in script file"""
        self.check_comment(yml_file.get('comment'))
        self.check_script_args(yml_file.get('args', []))
        self.check_script_outputs(yml_file.get('outputs', []))

    def check_script_args(self, arg_list):
        """Check spelling in script arguments"""
        for argument in arg_list:
            arg_description = argument.get('description')
            if arg_description:
                for word in arg_description.split():
                    self.check_word(word)

    def check_comment(self, comment):
        """Check spelling in script comment"""
        if comment:
            for word in comment.split():
                self.check_word(word)

    def check_script_outputs(self, outputs_list):
        """Check spelling in script outputs"""
        for output in outputs_list:
            output_description = output.get('description')
            if output_description:
                for word in output_description.split():
                    self.check_word(word)

    def check_spelling_in_playbook(self, yml_file):
        """Check spelling in playbook file"""
        self.check_playbook_description_and_name(yml_file.get('description'),
                                                 yml_file.get('name'))
        self.check_tasks(yml_file.get('tasks', {}))

    def check_playbook_description_and_name(self, description, name):
        """Check spelling in playbook description and name"""
        if name:
            for word in name.split():
                self.check_word(word)

        if description:
            for word in description.split():
                self.check_word(word)

    def check_tasks(self, task_dict):
        """Check spelling in playbook tasks"""
        for task_key in task_dict.keys():
            task_info = task_dict[task_key].get('task')
            if task_info:
                task_description = task_info.get('description')
                if task_description:
                    for word in task_description.split():
                        self.check_word(word)

                task_name = task_info.get('name')
                if task_name:
                    for word in task_name.split():
                        self.check_word(word)
Ejemplo n.º 12
0
class PackUniqueFilesValidator(BaseValidator):
    """PackUniqueFilesValidator is designed to validate the correctness of content pack's files structure.
    Existence and validity of this files is essential."""

    git_util = GitUtil(repo=Content.git())
    main_branch = git_util.handle_prev_ver()[1]
    if not main_branch.startswith('origin'):
        main_branch = 'origin/' + main_branch

    def __init__(self, pack, pack_path=None, validate_dependencies=False, ignored_errors=None, print_as_warnings=False,
                 should_version_raise=False, id_set_path=None, suppress_print=False, private_repo=False,
                 skip_id_set_creation=False, prev_ver=main_branch, json_file_path=None, support=None,
                 specific_validations=None):
        """Inits the content pack validator with pack's name, pack's path, and unique files to content packs such as:
        secrets whitelist file, pack-ignore file, pack-meta file and readme file
        :param pack: content package name, which is the directory name of the pack
        """
        super().__init__(ignored_errors=ignored_errors, print_as_warnings=print_as_warnings,
                         suppress_print=suppress_print, json_file_path=json_file_path, specific_validations=specific_validations)
        self.pack = pack
        self.pack_path = pack_name_to_path(self.pack) if not pack_path else pack_path
        self.secrets_file = PACKS_WHITELIST_FILE_NAME
        self.pack_ignore_file = PACKS_PACK_IGNORE_FILE_NAME
        self.pack_meta_file = PACKS_PACK_META_FILE_NAME
        self.readme_file = PACKS_README_FILE_NAME
        self.validate_dependencies = validate_dependencies
        self._errors = []
        self.should_version_raise = should_version_raise
        self.id_set_path = id_set_path
        self.private_repo = private_repo
        self.skip_id_set_creation = skip_id_set_creation
        self.prev_ver = prev_ver
        self.support = support
        self.metadata_content: Dict = dict()
    # error handling

    def _add_error(self, error: Tuple[str, str], file_path: str, warning=False):
        """Adds error entry to a list under pack's name
        Returns True if added and false otherwise"""
        error_message, error_code = error

        if self.pack_path not in file_path:
            file_path = os.path.join(self.pack_path, file_path)

        formatted_error = self.handle_error(error_message, error_code, file_path=file_path, should_print=False,
                                            warning=warning)
        if formatted_error:
            self._errors.append(formatted_error)
            return True

        return False

    def get_errors(self, raw=False) -> str:
        """Get the dict version or string version for print"""
        errors = ''
        if raw:
            errors = '\n  '.join(self._errors)
        elif self._errors:
            errors = ' - Issues with unique files in pack: {}\n  {}'.format(self.pack, '\n  '.join(self._errors))

        return errors

    # file utils
    def _get_pack_file_path(self, file_name=''):
        """Returns the full file path to pack's file"""
        return os.path.join(self.pack_path, file_name)

    def _get_pack_latest_rn_version(self):
        """
        Extract all the Release notes from the pack and reutrn the highest version of release note in the Pack.

        Return:
            (str): The lastest version of RN.
        """
        list_of_files = glob.glob(self.pack_path + '/ReleaseNotes/*')
        list_of_release_notes = [os.path.basename(file) for file in list_of_files]
        list_of_versions = [rn[:rn.rindex('.')].replace('_', '.') for rn in list_of_release_notes]
        if list_of_versions:
            list_of_versions.sort(key=LooseVersion)
            return list_of_versions[-1]
        else:
            return ''

    @error_codes('PA128,PA100')
    def _is_pack_file_exists(self, file_name: str, is_required: bool = False):
        """
        Check if a file with given name exists in pack root.
        is_required is True means that absence of the file should block other tests from running
            (see BlockingValidationFailureException).
        """
        if not os.path.isfile(self._get_pack_file_path(file_name)):
            error_function = Errors.required_pack_file_does_not_exist if is_required else Errors.pack_file_does_not_exist
            if self._add_error(error_function(file_name), file_name):
                if is_required:
                    raise BlockingValidationFailureException()
                return False
        return True

    def _read_file_content(self, file_name):
        """Open & Read a file object's content throw exception if can't"""
        try:
            with io.open(self._get_pack_file_path(file_name), mode="r", encoding="utf-8") as file:
                return file.read()
        except IOError:
            if not self._add_error(Errors.cant_open_pack_file(file_name), file_name):
                return "No-Text-Required"
        except ValueError:
            if not self._add_error(Errors.cant_read_pack_file(file_name), file_name):
                return "No-Text-Required"

        return False

    def _read_metadata_content(self) -> Dict:
        """
        Reads metadata content. Avoids the duplication of file opening in case metadata was already opened once.
        Returns:
            (Dict): Metadata JSON pack file content.
        """
        if not self.metadata_content:
            pack_meta_file_content = self._read_file_content(self.pack_meta_file)
            self.metadata_content = json.loads(pack_meta_file_content)
        return self.metadata_content

    def _parse_file_into_list(self, file_name, delimiter='\n'):
        """Parse file's content to list, throw exception if can't"""
        file_content = self._read_file_content(file_name)
        try:
            if file_content:
                return file_content.split(delimiter)
        except ValueError:
            if not self._add_error(Errors.cant_parse_pack_file_to_list(file_name), file_name):
                return True

        return False

    @staticmethod
    def check_timestamp_format(timestamp):
        """Check that the timestamp is in ISO format"""
        try:
            datetime.strptime(timestamp, ISO_TIMESTAMP_FORMAT)
            return True
        except ValueError:
            return False

    # secrets validation
    def validate_secrets_file(self):
        """Validate everything related to .secrets-ignore file"""
        if self._is_pack_file_exists(self.secrets_file) and all([self._is_secrets_file_structure_valid()]):
            return True

        return False

    def _check_if_file_is_empty(self, file_name: str) -> bool:
        """
        Check if file exists and contains info other than space characters.
        Returns false if the file does not exists or not empty
        """
        if self._is_pack_file_exists(file_name):
            content = self._read_file_content(file_name)
            if not content or content.isspace():
                return True

        return False

    def validate_pack_readme_images(self):
        readme_file_path = os.path.join(self.pack_path, self.readme_file)
        readme_validator = ReadMeValidator(readme_file_path, ignored_errors=self.ignored_errors, specific_validations=self.specific_validations)
        errors = readme_validator.check_readme_relative_image_paths(is_pack_readme=True)
        errors += readme_validator.check_readme_absolute_image_paths(is_pack_readme=True)
        if errors:
            self._errors.extend(errors)
            return False
        return True

    @error_codes('IM109')
    def validate_author_image_exists(self):
        if self.metadata_content.get(PACK_METADATA_SUPPORT) == 'partner':
            author_image_path = os.path.join(self.pack_path, 'Author_image.png')
            if not os.path.exists(author_image_path):
                if self._add_error(Errors.author_image_is_missing(author_image_path), file_path=author_image_path):
                    return False

        return True

    @error_codes('RM104')
    def validate_pack_readme_file_is_not_empty(self):
        """
        Validates that README.md file is not empty for partner packs and packs with playbooks
        """
        playbooks_path = os.path.join(self.pack_path, "Playbooks")
        contains_playbooks = os.path.exists(playbooks_path) and len(os.listdir(playbooks_path)) != 0
        if (self.support == 'partner' or contains_playbooks) and self._check_if_file_is_empty(self.readme_file):
            if self._add_error(Errors.empty_readme_error(), self.readme_file):
                return False

        return True

    @error_codes('RM105')
    def validate_pack_readme_and_pack_description(self):
        """
        Validates that README.md file is not the same as the pack description.
        Returns False if the pack readme is different than the pack description.
        """
        metadata = self._read_metadata_content()
        metadata_description = metadata.get(PACK_METADATA_DESC, '').lower().strip()
        if self._is_pack_file_exists(self.readme_file) and not self._check_if_file_is_empty(self.readme_file):
            pack_readme = self._read_file_content(self.readme_file)
            readme_content = pack_readme.lower().strip()
            if metadata_description == readme_content:
                if self._add_error(Errors.readme_equal_description_error(), self.readme_file):
                    return False

        return True

    def _is_secrets_file_structure_valid(self):
        """Check if .secrets-ignore structure is parse-able"""
        if self._parse_file_into_list(self.secrets_file):
            return True

        return False

    # pack ignore validation
    def validate_pack_ignore_file(self):
        """Validate everything related to .pack-ignore file"""
        if self._is_pack_file_exists(self.pack_ignore_file) and all([self._is_pack_ignore_file_structure_valid()]):
            return True

        return False

    @error_codes('PA104')
    def _is_pack_ignore_file_structure_valid(self):
        """Check if .pack-ignore structure is parse-able"""
        try:
            if self._parse_file_into_list(self.pack_ignore_file):
                return True
        except re.error:
            if not self._add_error(Errors.pack_file_bad_format(self.pack_ignore_file), self.pack_ignore_file):
                return True

        return False

    # pack metadata validation
    def validate_pack_meta_file(self):
        """Validate everything related to pack_metadata.json file"""
        if self._is_pack_file_exists(self.pack_meta_file, is_required=True) and all([
            self._is_pack_meta_file_structure_valid(),
            self._is_valid_contributor_pack_support_details(),
            self._is_approved_usecases(),
            self._is_right_version(),
            self._is_approved_tags(),
            self._is_price_changed(),
            self._is_valid_support_type(),
            self.is_right_usage_of_usecase_tag(),
        ]):
            if self.should_version_raise:
                return self.validate_version_bump()
            else:
                return True

        return False

    @error_codes('PA114')
    def validate_version_bump(self):
        metadata_file_path = self._get_pack_file_path(self.pack_meta_file)
        old_meta_file_content = get_remote_file(metadata_file_path, tag=self.prev_ver)
        current_meta_file_content = get_json(metadata_file_path)
        old_version = old_meta_file_content.get('currentVersion', '0.0.0')
        current_version = current_meta_file_content.get('currentVersion', '0.0.0')
        if LooseVersion(old_version) < LooseVersion(current_version):
            return True
        elif self._add_error(Errors.pack_metadata_version_should_be_raised(self.pack, old_version), metadata_file_path):
            return False
        return True

    @error_codes('PA108,PA125')
    def validate_pack_name(self, metadata_file_content: Dict) -> bool:
        # check validity of pack metadata mandatory fields
        pack_name: str = metadata_file_content.get(PACK_METADATA_NAME, '')
        if not pack_name or 'fill mandatory field' in pack_name:
            if self._add_error(Errors.pack_metadata_name_not_valid(), self.pack_meta_file):
                return False
        if len(pack_name) < 3:
            if self._add_error(Errors.pack_name_is_not_in_xsoar_standards("short"), self.pack_meta_file):
                return False
        if pack_name[0].islower():
            if self._add_error(Errors.pack_name_is_not_in_xsoar_standards("capital"), self.pack_meta_file):
                return False
        if re.findall(INCORRECT_PACK_NAME_PATTERN, pack_name):
            if self._add_error(Errors.pack_name_is_not_in_xsoar_standards("wrong_word"), self.pack_meta_file):
                return False
        if not self.name_does_not_contain_excluded_word(pack_name):
            if self._add_error(
                    Errors.pack_name_is_not_in_xsoar_standards('excluded_word', EXCLUDED_DISPLAY_NAME_WORDS),
                    self.pack_meta_file):
                return False
        return True

    def name_does_not_contain_excluded_word(self, pack_name: str) -> bool:
        """
        Checks whether given object has excluded name.
        Args:
            pack_name (str): Name of the pack.
        Returns:
            (bool) False if name corresponding pack name contains excluded name, true otherwise.
        """
        lowercase_name = pack_name.lower()
        return not any(excluded_word in lowercase_name for excluded_word in EXCLUDED_DISPLAY_NAME_WORDS)

    def _is_empty_dir(self, dir_path: Path) -> bool:
        return dir_path.stat().st_size == 0

    def _is_integration_pack(self):
        integration_dir: Path = Path(self.pack_path) / INTEGRATIONS_DIR
        return integration_dir.exists() and not self._is_empty_dir(dir_path=integration_dir)

    @error_codes('PA105,PA106,PA107,PA109,PA110,PA115,PA111,PA129,PA118,PA112')
    def _is_pack_meta_file_structure_valid(self):
        """Check if pack_metadata.json structure is json parse-able and valid"""
        try:
            metadata = self._read_metadata_content()
            if not metadata:
                if self._add_error(Errors.pack_metadata_empty(), self.pack_meta_file):
                    raise BlockingValidationFailureException()

            if not isinstance(metadata, dict):
                if self._add_error(Errors.pack_metadata_should_be_dict(self.pack_meta_file), self.pack_meta_file):
                    raise BlockingValidationFailureException()

            missing_fields = [field for field in PACK_METADATA_FIELDS if field not in metadata.keys()]
            if missing_fields:
                if self._add_error(Errors.missing_field_iin_pack_metadata(self.pack_meta_file, missing_fields),
                                   self.pack_meta_file):
                    raise BlockingValidationFailureException()

            elif not self.validate_pack_name(metadata):
                raise BlockingValidationFailureException()

            description_name = metadata.get(PACK_METADATA_DESC, '').lower()
            if not description_name or 'fill mandatory field' in description_name:
                if self._add_error(Errors.pack_metadata_field_invalid(), self.pack_meta_file):
                    raise BlockingValidationFailureException()

            if not self.is_pack_metadata_desc_too_long(description_name):
                return False

            # check non mandatory dependency field
            dependencies_field = metadata.get(PACK_METADATA_DEPENDENCIES, {})
            if not isinstance(dependencies_field, dict):
                if self._add_error(Errors.dependencies_field_should_be_dict(self.pack_meta_file), self.pack_meta_file):
                    return False

            # check created field in iso format
            created_field = metadata.get(PACK_METADATA_CREATED, '')
            if created_field:
                if not self.check_timestamp_format(created_field):
                    suggested_value = parser.parse(created_field).isoformat() + "Z"
                    if self._add_error(
                            Errors.pack_timestamp_field_not_in_iso_format(PACK_METADATA_CREATED,
                                                                          created_field, suggested_value),
                            self.pack_meta_file):
                        return False

            # check metadata list fields and validate that no empty values are contained in this fields
            for list_field in (PACK_METADATA_KEYWORDS, PACK_METADATA_TAGS, PACK_METADATA_CATEGORIES,
                               PACK_METADATA_USE_CASES):
                field = metadata[list_field]
                if field and len(field) == 1:
                    value = field[0]
                    if not value:
                        if self._add_error(Errors.empty_field_in_pack_metadata(self.pack_meta_file, list_field),
                                           self.pack_meta_file):
                            return False

            # check metadata categories isn't an empty list, only if it is an integration.
            if self._is_integration_pack():
                if not metadata[PACK_METADATA_CATEGORIES]:
                    if self._add_error(Errors.pack_metadata_missing_categories(self.pack_meta_file),
                                       self.pack_meta_file):
                        return False

            # if the field 'certification' exists, check that its value is set to 'certified' or 'verified'
            certification = metadata.get(PACK_METADATA_CERTIFICATION)
            if certification and certification not in ALLOWED_CERTIFICATION_VALUES:
                if self._add_error(Errors.pack_metadata_certification_is_invalid(self.pack_meta_file),
                                   self.pack_meta_file):
                    return False

            # check format of metadata version
            version = metadata.get(PACK_METADATA_CURR_VERSION, '0.0.0')
            if not self._is_version_format_valid(version):
                return False

        except (ValueError, TypeError):
            if self._add_error(Errors.pack_metadata_isnt_json(self.pack_meta_file), self.pack_meta_file):
                raise BlockingValidationFailureException()

        return True

    @error_codes('PA126')
    def is_pack_metadata_desc_too_long(self, description_name):
        if len(description_name) > MAXIMUM_DESCRIPTION_FIELD_LENGTH:
            if self._add_error(Errors.pack_metadata_long_description(), self.pack_meta_file, warning=True):
                return False
        return True

    @error_codes('PA113')
    def validate_support_details_exist(self, pack_meta_file_content):
        """Validate either email or url exist in contributed pack details."""
        if not pack_meta_file_content[PACK_METADATA_URL] and not pack_meta_file_content[PACK_METADATA_EMAIL]:
            if self._add_error(Errors.pack_metadata_missing_url_and_email(), self.pack_meta_file):
                return False

        return True

    @error_codes('PA127')
    def validate_metadata_url(self, pack_meta_file_content):
        """Validate the url in the pack metadata doesn't lead to a github repository."""
        metadata_url = pack_meta_file_content[PACK_METADATA_URL]
        metadata_url = metadata_url.lower().strip()
        if len(re.findall("github.com", metadata_url)) > 0:
            # GitHub URLs that lead to a /issues page are also acceptable as a support URL.
            if not metadata_url.endswith('/issues'):
                self._add_error(Errors.metadata_url_invalid(), self.pack_meta_file)
                return False

        return True

    @error_codes('PA112')
    def _is_valid_contributor_pack_support_details(self):
        """Check email and url in contributed pack metadata details."""
        try:
            pack_meta_file_content = self._read_metadata_content()
            if pack_meta_file_content[PACK_METADATA_SUPPORT] in SUPPORTED_CONTRIBUTORS_LIST:
                return all([self.validate_support_details_exist(pack_meta_file_content),
                            self.validate_metadata_url(pack_meta_file_content)])

        except (ValueError, TypeError):
            if self._add_error(Errors.pack_metadata_isnt_json(self.pack_meta_file), self.pack_meta_file):
                return False

        return True

    @error_codes('PA117,PA112')
    def _is_valid_support_type(self) -> bool:
        """Checks whether the support type is valid in the pack metadata.

        Returns:
            bool: True if the support type is valid, otherwise False

        """
        try:
            pack_meta_file_content = self._read_metadata_content()
            if pack_meta_file_content[PACK_METADATA_SUPPORT] not in SUPPORT_TYPES:
                self._add_error(Errors.pack_metadata_invalid_support_type(), self.pack_meta_file)
                return False
            self.support = pack_meta_file_content[PACK_METADATA_SUPPORT]
        except (ValueError, TypeError):
            if self._add_error(Errors.pack_metadata_isnt_json(self.pack_meta_file), self.pack_meta_file):
                return False

        return True

    @error_codes('PA119')
    def _is_approved_usecases(self) -> bool:
        """Checks whether the usecases in the pack metadata are approved

        Return:
             bool: True if the usecases are approved, otherwise False
        """
        if tools.is_external_repository():
            return True

        non_approved_usecases = set()
        try:
            pack_meta_file_content = self._read_metadata_content()
            current_usecases = tools.get_current_usecases()
            non_approved_usecases = set(pack_meta_file_content[PACK_METADATA_USE_CASES]) - set(current_usecases)
            if non_approved_usecases:
                if self._add_error(
                        Errors.pack_metadata_non_approved_usecases(non_approved_usecases), self.pack_meta_file):
                    return False
        except (ValueError, TypeError):
            if self._add_error(Errors.pack_metadata_non_approved_usecases(non_approved_usecases), self.pack_meta_file):
                return False
        return True

    @error_codes('PA130')
    def _is_version_format_valid(self, version: str) -> bool:
        """
        checks if the meta-data version is in the correct format
        Args:
            version (str): The version to check the foramt on

        Returns:
            bool: True if the version is in the correct format, otherwise false.
        """
        match_obj = re.match(VERSION_REGEX, version)
        if not match_obj:
            self._add_error(Errors.wrong_version_format(), self.pack_meta_file)
            return False
        return True

    @error_codes('PA120')
    def _is_approved_tags(self) -> bool:
        """Checks whether the tags in the pack metadata are approved

        Return:
             bool: True if the tags are approved, otherwise False
        """
        if tools.is_external_repository():
            return True

        non_approved_tags = set()
        try:
            pack_meta_file_content = self._read_metadata_content()
            current_tags = tools.get_current_tags()
            non_approved_tags = set(pack_meta_file_content[PACK_METADATA_TAGS]) - set(current_tags)
            if non_approved_tags:
                if self._add_error(Errors.pack_metadata_non_approved_tags(non_approved_tags), self.pack_meta_file):
                    return False
        except (ValueError, TypeError):
            if self._add_error(Errors.pack_metadata_non_approved_tags(non_approved_tags), self.pack_meta_file):
                return False
        return True

    @error_codes('RN106,PA131')
    def _is_right_version(self):
        """Checks whether the currentVersion field in the pack metadata match the version of the latest release note.

        Return:
             bool: True if the versions are match, otherwise False
        """
        metadata_file_path = self._get_pack_file_path(self.pack_meta_file)
        current_version = self.metadata_content.get('currentVersion', '0.0.0')
        rn_version = self._get_pack_latest_rn_version()
        if not rn_version and current_version == '1.0.0':
            return True
        if not rn_version:
            self._add_error(Errors.missing_release_notes_for_pack(self.pack), self.pack)
            return False
        if parse(rn_version) != parse(current_version):
            self._add_error(Errors.pack_metadata_version_diff_from_rn(self.pack, rn_version, current_version), metadata_file_path)
            return False
        return True

    def _contains_use_case(self):
        """
        Return:
            True if the Pack contains at least one PB, Incident Type or Layout, otherwise False
        """
        playbooks_path = os.path.join(self.pack_path, "Playbooks")
        incidents_path = os.path.join(self.pack_path, "IncidentTypes")
        layouts_path = os.path.join(self.pack_path, "Layouts")

        answers = [
            os.path.exists(playbooks_path) and len(os.listdir(playbooks_path)) != 0,
            os.path.exists(incidents_path) and len(os.listdir(incidents_path)) != 0,
            os.path.exists(layouts_path) and len(os.listdir(layouts_path)) != 0,
        ]
        return any(answers)

    @error_codes('PA123')
    def is_right_usage_of_usecase_tag(self):
        """Checks whether Use Case tag in pack_metadata is used properly

        Return:
             bool: True if the Pack contains at least one PB, Incident Type or Layout, otherwise False
        """
        try:
            pack_meta_file_content = self._read_metadata_content()

            if "Use Case" in pack_meta_file_content['tags']:
                if not self._contains_use_case():
                    if self._add_error(Errors.is_wrong_usage_of_usecase_tag(), self.pack_meta_file):
                        return False
        except (ValueError, TypeError):
            if self._add_error(Errors.is_wrong_usage_of_usecase_tag(), self.pack_meta_file):
                return False
        return True

    def get_master_private_repo_meta_file(self, metadata_file_path: str):
        current_repo = Repo(Path.cwd(), search_parent_directories=True)

        # if running on master branch in private repo - do not run the test
        if current_repo.active_branch == 'master':
            if not self.suppress_print:
                click.secho("Running on master branch - skipping price change validation", fg="yellow")
            return None
        try:
            old_meta_file_content = current_repo.git.show(f'{self.main_branch}:{metadata_file_path}')

        except GitCommandError as e:
            if not self.suppress_print:
                click.secho(f"Got an error while trying to connect to git - {str(e)}\n"
                            f"Skipping price change validation")
            return None

        # if there was no past version
        if not old_meta_file_content:
            if not self.suppress_print:
                click.secho("Unable to find previous pack_metadata.json file - skipping price change validation",
                            fg="yellow")
            return None

        return json.loads(old_meta_file_content)

    @error_codes('PA121')
    def _is_price_changed(self) -> bool:
        # only check on private repo
        if not self.private_repo:
            return True

        metadata_file_path = self._get_pack_file_path(self.pack_meta_file)
        old_meta_file_content = self.get_master_private_repo_meta_file(metadata_file_path)

        # if there was no past version or running on master branch
        if not old_meta_file_content:
            return True

        current_meta_file_content = get_json(metadata_file_path)
        current_price = current_meta_file_content.get('price')
        old_price = old_meta_file_content.get('price')

        # if a price was added, removed or changed compared to the master version - return an error
        if (old_price and not current_price) or (current_price and not old_price) or (old_price != current_price):
            if self._add_error(Errors.pack_metadata_price_change(old_price, current_price), self.pack_meta_file):
                return False

        return True

    def are_valid_files(self, id_set_validations) -> str:
        """Main Execution Method"""
        try:
            self.validate_secrets_file()
            self.validate_pack_ignore_file()
            # metadata file is not validated for API_MODULES_PACK
            if API_MODULES_PACK not in self.pack:
                self.validate_pack_meta_file()

            self.validate_pack_readme_file_is_not_empty()
            self.validate_pack_readme_and_pack_description()
            self.validate_pack_readme_images()
            self.validate_author_image_exists()

            # We only check pack dependencies for -g flag
            if self.validate_dependencies:
                self.validate_pack_dependencies()

            # Check if unique files are valid against the rest of the files, using the ID set.
            if id_set_validations:
                is_valid, error = id_set_validations.is_unique_file_valid_in_set(self.pack_path, self.ignored_errors)
                if not is_valid:
                    self._add_error(error, self.pack_path)
        except BlockingValidationFailureException:
            # note that raising this should happen after adding the error to self._errors,
            # so no special handling is required on this `except` block
            pass

        return self.get_errors()

    # pack dependencies validation
    def validate_pack_dependencies(self):
        try:
            click.secho(f'\nRunning pack dependencies validation on {self.pack}\n',
                        fg="bright_cyan")
            core_pack_list = get_core_pack_list()

            first_level_dependencies = PackDependencies.find_dependencies(
                self.pack, id_set_path=self.id_set_path, silent_mode=True, exclude_ignored_dependencies=False,
                update_pack_metadata=False, skip_id_set_creation=self.skip_id_set_creation, use_pack_metadata=True
            )

            if not first_level_dependencies:
                if not self.suppress_print:
                    click.secho("No first level dependencies found", fg="yellow")
                return True

            for core_pack in core_pack_list:
                first_level_dependencies.pop(core_pack, None)
            if not first_level_dependencies:
                if not self.suppress_print:
                    click.secho("Found first level dependencies only on core packs", fg="yellow")
                return True

            dependency_result = json.dumps(first_level_dependencies, indent=4)
            click.echo(click.style(f"Found dependencies result for {self.pack} pack:", bold=True))
            click.echo(click.style(dependency_result, bold=True))

            if self.pack in core_pack_list:
                if not self.validate_core_pack_dependencies(first_level_dependencies):
                    return False

            non_supported_pack = first_level_dependencies.get('NonSupported', {})
            deprecated_pack = first_level_dependencies.get('DeprecatedContent', {})

            if not self.is_invalid_package_dependencies(non_supported_pack, deprecated_pack):
                return False

            return True

        except ValueError as e:
            if "Couldn't find any items for pack" in str(e):
                error_message, error_code = Errors.invalid_id_set()
                if self._add_error((error_message, error_code), file_path=self.pack_path):
                    return False
                return True
            else:
                raise

    @error_codes('PA116')
    def is_invalid_package_dependencies(self, non_supported_pack, deprecated_pack):
        if (non_supported_pack.get('mandatory')) or (deprecated_pack.get('mandatory')):
            error_message, error_code = Errors.invalid_package_dependencies(self.pack)
            if self._add_error((error_message, error_code), file_path=self.pack_path):
                return False
        return True

    @error_codes('PA124')
    def validate_core_pack_dependencies(self, dependencies_packs):

        found_dependencies = []
        for dependency_pack in dependencies_packs:
            if dependencies_packs.get(dependency_pack, {}).get('mandatory'):
                found_dependencies.append(dependency_pack)

        if found_dependencies:
            error_message, error_code = Errors.invalid_core_pack_dependencies(self.pack, str(found_dependencies))
            if self._add_error((error_message, error_code), file_path=self.pack_path):
                return False
        return True
Ejemplo n.º 13
0
class DocReviewer:
    """Perform a spell check on the given .yml or .md file.
    """

    SUPPORTED_FILE_TYPES = [FileType.INTEGRATION, FileType.SCRIPT, FileType.PLAYBOOK, FileType.README,
                            FileType.DESCRIPTION, FileType.RELEASE_NOTES, FileType.BETA_INTEGRATION,
                            FileType.TEST_PLAYBOOK, FileType.TEST_SCRIPT]

    def __init__(self, file_path: str, known_words_file_path: str = None, no_camel_case: bool = False,
                 no_failure: bool = False, expand_dictionary: bool = False, templates: bool = False,
                 use_git: bool = False, prev_ver: str = None, release_notes_only: bool = False):
        if templates:
            ReleaseNotesChecker(template_examples=True)
            sys.exit(0)

        # if nothing entered will default to use git
        elif not file_path and not use_git:
            use_git = True

        self.file_path = file_path
        self.git_util = None
        self.prev_ver = prev_ver if prev_ver else 'demisto/master'

        if use_git:
            self.git_util = GitUtil()

        if release_notes_only:
            self.SUPPORTED_FILE_TYPES = [FileType.RELEASE_NOTES]

        self.files = set()  # type:Set
        self.spellchecker = SpellChecker()
        self.unknown_words = {}  # type:Dict
        self.no_camel_case = no_camel_case
        self.known_words_file_path = known_words_file_path
        self.found_misspelled = False
        self.no_failure = no_failure
        self.expand_dictionary = expand_dictionary
        self.files_with_misspells = set()  # type:Set
        self.files_without_misspells = set()  # type:Set
        self.malformed_rn_files = set()  # type:Set

    @staticmethod
    def is_camel_case(word):
        """check if a given word is in camel case"""
        return word != word.lower() and word != word.upper() and "_" not in word and word != word.title()

    @staticmethod
    def camel_case_split(camel):
        """split camel case word into sub-words"""
        tokens = re.compile('([A-Z]?[a-z]+)').findall(camel)
        for token in tokens:
            # double space to handle capital words like IP/URL/DNS that not included in the regex
            camel = camel.replace(token, f' {token} ')

        return camel.split()

    def get_all_md_and_yml_files_in_dir(self, dir_name):
        """recursively get all the supported files from a given dictionary"""
        for root, _, files in os.walk(dir_name):
            for file_name in files:
                full_path = (os.path.join(root, file_name))
                if find_type(full_path) in self.SUPPORTED_FILE_TYPES:
                    self.files.add(str(full_path))

    def gather_all_changed_files(self):
        modified = self.git_util.modified_files(prev_ver=self.prev_ver)  # type: ignore[union-attr]
        added = self.git_util.added_files(prev_ver=self.prev_ver)  # type: ignore[union-attr]
        renamed = self.git_util.renamed_files(prev_ver=self.prev_ver)  # type: ignore[union-attr]
        filtered_renamed = set()  # type:Set

        for file_tuple in renamed:
            filtered_renamed.add(file_tuple[1])

        return modified.union(added).union(filtered_renamed)

    def get_files_from_git(self):
        click.secho('Gathering all changed files from git', fg='bright_cyan')
        for file in self.gather_all_changed_files():
            file = str(file)
            if os.path.isfile(file) and find_type(file) in self.SUPPORTED_FILE_TYPES:
                self.files.add(file)

    def get_files_to_run_on(self):
        """Get all the relevant files that the spell-check could work on"""
        if self.git_util:
            self.get_files_from_git()

        elif os.path.isdir(self.file_path):
            self.get_all_md_and_yml_files_in_dir(self.file_path)

        elif find_type(self.file_path) in self.SUPPORTED_FILE_TYPES:
            self.files.add(self.file_path)

    def print_unknown_words(self):
        for word, corrections in self.unknown_words.items():
            click.secho(f'  - {word} - did you mean: {corrections}', fg='bright_red')

    def print_file_report(self):
        if self.files_without_misspells:
            click.secho('\n================= Files Without Misspells =================', fg='green')
            no_misspells_string = '\n'.join(self.files_without_misspells)
            click.secho(no_misspells_string, fg='green')

        if self.files_with_misspells:
            click.secho('\n================= Files With Misspells =================', fg='bright_red')
            misspells_string = '\n'.join(self.files_with_misspells)
            click.secho(misspells_string, fg='bright_red')

        if self.malformed_rn_files:
            click.secho('\n================= Malformed Release Notes =================', fg='bright_red')
            bad_rn = '\n'.join(self.malformed_rn_files)
            click.secho(bad_rn, fg='bright_red')

    def run_doc_review(self):
        """Runs spell-check on the given file and release notes check if relevant.

        Returns:
            bool. True if no problematic words found, False otherwise.
        """
        click.secho('\n================= Starting Doc Review =================', fg='bright_cyan')
        if len(self.SUPPORTED_FILE_TYPES) == 1:
            click.secho('Running only on release notes', fg='bright_cyan')

        self.get_files_to_run_on()

        # no eligible files found
        if not self.files:
            click.secho("Could not find any .md or .yml files - Aborting", fg='bright_red')
            return True

        self.add_known_words()
        for file in self.files:
            click.echo(f'\nChecking spelling on {file}')
            self.unknown_words = {}
            if file.endswith('.md'):
                self.check_md_file(file)

            elif file.endswith('.yml'):
                self.check_yaml(file)

            if self.unknown_words:
                click.secho(f"\n - Words that might be misspelled were found in "
                            f"{file}:", fg='bright_red')
                self.print_unknown_words()
                self.found_misspelled = True
                self.files_with_misspells.add(file)

            else:
                click.secho(f" - No misspelled words found in {file}", fg='green')
                self.files_without_misspells.add(file)

        self.print_file_report()
        if self.found_misspelled and not self.no_failure:
            return False

        return True

    def add_known_words(self):
        """Add known words to the spellchecker from external and internal files"""
        # adding known words file if given - these words will not count as misspelled
        if self.known_words_file_path:
            self.spellchecker.word_frequency.load_text_file(self.known_words_file_path)

        # adding the KNOWN_WORDS to the spellchecker recognized words.
        self.spellchecker.word_frequency.load_words(KNOWN_WORDS)

        if self.expand_dictionary:
            # nltk - natural language tool kit - is a large package containing several dictionaries.
            # to use it we need to download one of it's dictionaries - we will use the
            # reasonably sized "brown" and "webtext" dicts.
            # to avoid SSL download error we disable SSL connection.
            try:
                _create_unverified_https_context = ssl._create_unverified_context
            except AttributeError:
                pass
            else:
                ssl._create_default_https_context = _create_unverified_https_context

            # downloading "brown" and "webtext" sets from nltk.
            click.secho("Downloading expanded dictionary, this may take a minute...", fg='yellow')
            nltk.download('brown')
            nltk.download('webtext')

            # adding nltk's word set to spellchecker.
            self.spellchecker.word_frequency.load_words(brown.words())
            self.spellchecker.word_frequency.load_words(webtext.words())

    @staticmethod
    def remove_punctuation(word):
        """remove leading and trailing punctuation"""
        return word.strip(string.punctuation)

    def check_word(self, word):
        """Check if a word is legal"""
        # check camel cases
        if not self.no_camel_case and self.is_camel_case(word):
            sub_words = self.camel_case_split(word)
            for sub_word in sub_words:
                sub_word = self.remove_punctuation(sub_word)
                if sub_word.isalpha() and self.spellchecker.unknown([sub_word]):
                    self.unknown_words[word] = list(self.spellchecker.candidates(sub_word))[:5]

        else:
            word = self.remove_punctuation(word)
            if word.isalpha() and self.spellchecker.unknown([word]):
                self.unknown_words[word] = list(self.spellchecker.candidates(word))[:5]

    def check_md_file(self, file_path):
        """Runs spell check on .md file. Adds unknown words to given unknown_words set.
        Also if RN file will review it and add it to malformed RN file set if needed.
        """
        pack_object: TextObject = path_to_pack_object(file_path)
        md_file_lines = pack_object.to_str().split('\n')

        if isinstance(pack_object, ReleaseNote):
            good_rn = ReleaseNotesChecker(file_path, md_file_lines).check_rn()
            if not good_rn:
                self.malformed_rn_files.add(file_path)

        for line in md_file_lines:
            for word in line.split():
                self.check_word(word)

    def check_yaml(self, file_path):
        """Runs spell check on .yml file. Adds unknown words to given unknown_words set.

        Args:
            file_path (str): The file path to the yml file.
        """
        pack_object: YAMLContentObject = path_to_pack_object(file_path)
        yml_info = pack_object.to_dict()

        if isinstance(pack_object, Integration):
            self.check_spelling_in_integration(yml_info)

        elif isinstance(pack_object, Script):
            self.check_spelling_in_script(yml_info)

        elif isinstance(pack_object, Playbook):
            self.check_spelling_in_playbook(yml_info)

    def check_spelling_in_integration(self, yml_file):
        """Check spelling on an integration file"""
        self.check_params(yml_file.get('configuration', []))
        self.check_commands(yml_file.get('script', {}).get('commands', []))
        self.check_display_and_description(yml_file.get('display'), yml_file.get('description'))

    def check_params(self, param_list):
        """Check spelling in integration parameters"""
        for param_conf in param_list:
            param_display = param_conf.get('display')
            if param_display:
                for word in param_display.split():
                    self.check_word(word)

            param_toolip = param_conf.get('additionalinfo')
            if param_toolip:
                for word in param_toolip.split():
                    self.check_word(word)

    def check_commands(self, command_list):
        """Check spelling in integration commands"""
        for command in command_list:
            command_arguments = command.get('arguments', [])
            for argument in command_arguments:
                arg_description = argument.get('description')
                if arg_description:
                    for word in arg_description.split():
                        self.check_word(word)

            command_description = command.get('description')
            if command_description:
                for word in command_description.split():
                    self.check_word(word)

            command_outputs = command.get('outputs', [])
            for output in command_outputs:
                output_description = output.get('description')
                if output_description:
                    for word in output_description.split():
                        self.check_word(word)

    def check_display_and_description(self, display, description):
        """check integration display name and description"""
        if display:
            for word in display.split():
                self.check_word(word)

        if description:
            for word in description.split():
                self.check_word(word)

    def check_spelling_in_script(self, yml_file):
        """Check spelling in script file"""
        self.check_comment(yml_file.get('comment'))
        self.check_script_args(yml_file.get('args', []))
        self.check_script_outputs(yml_file.get('outputs', []))

    def check_script_args(self, arg_list):
        """Check spelling in script arguments"""
        for argument in arg_list:
            arg_description = argument.get('description')
            if arg_description:
                for word in arg_description.split():
                    self.check_word(word)

    def check_comment(self, comment):
        """Check spelling in script comment"""
        if comment:
            for word in comment.split():
                self.check_word(word)

    def check_script_outputs(self, outputs_list):
        """Check spelling in script outputs"""
        for output in outputs_list:
            output_description = output.get('description')
            if output_description:
                for word in output_description.split():
                    self.check_word(word)

    def check_spelling_in_playbook(self, yml_file):
        """Check spelling in playbook file"""
        self.check_playbook_description_and_name(yml_file.get('description'), yml_file.get('name'))
        self.check_tasks(yml_file.get('tasks', {}))

    def check_playbook_description_and_name(self, description, name):
        """Check spelling in playbook description and name"""
        if name:
            for word in name.split():
                self.check_word(word)

        if description:
            for word in description.split():
                self.check_word(word)

    def check_tasks(self, task_dict):
        """Check spelling in playbook tasks"""
        for task_key in task_dict.keys():
            task_info = task_dict[task_key].get('task')
            if task_info:
                task_description = task_info.get('description')
                if task_description:
                    for word in task_description.split():
                        self.check_word(word)

                task_name = task_info.get('name')
                if task_name:
                    for word in task_name.split():
                        self.check_word(word)
Ejemplo n.º 14
0
    def check_readme_absolute_image_paths(self,
                                          is_pack_readme: bool = False
                                          ) -> list:
        """ Validate readme images absolute paths - Check if absolute paths are not broken.

        Arguments:
            is_pack_readme (bool) - True if the the README file is a pack README, default: False

        Returns:
            list: List of the errors found
        """
        error_list = []
        working_branch_name: str = ''
        try:
            working_branch_name = GitUtil().get_current_git_branch_or_hash()
        except InvalidGitRepositoryError:
            pass
        should_print_error = not is_pack_readme  # pack readme errors are handled and printed during the pack unique
        # files validation.
        absolute_links = re.findall(r'(!\[.*\])\((https://.*)\)$',
                                    self.readme_content,
                                    re.IGNORECASE | re.MULTILINE)
        absolute_links += re.findall(r'(<img.*?src\s*=\s*"(https://.*?)")',
                                     self.readme_content,
                                     re.IGNORECASE | re.MULTILINE)
        for link in absolute_links:
            error_message: str = ''
            error_code: str = ''
            prefix = '' if 'src' in link[0] else link[0].strip()
            img_url = link[1].strip(
            )  # striping in case there are whitespaces at the beginning/ending of url.
            try:
                # a link that contains a branch name (other than master) is invalid since the branch will be deleted
                # after merge to master. in the url path (after '.com'), the third element should be the branch name.
                # example 'https://raw.githubusercontent.com/demisto/content/<branch-name>/Packs/.../image.png'
                url_path_elem_list = urlparse(img_url).path.split('/')[1:]
                if len(url_path_elem_list) >= 3 and \
                        (url_path_elem_list[2] == working_branch_name and working_branch_name != 'master'):
                    error_message, error_code = \
                        Errors.invalid_readme_image_error(prefix + f'({img_url})',
                                                          error_type='branch_name_readme_absolute_error')
                else:
                    try:
                        get_url_with_retries(img_url,
                                             retries=5,
                                             backoff_factor=1,
                                             timeout=10)
                    except HTTPError as error:
                        error_message, error_code = \
                            Errors.invalid_readme_image_error(prefix + f'({img_url})',
                                                              error_type='general_readme_absolute_error',
                                                              response=error.response)
            except Exception as ex:
                click.secho(
                    f"Could not validate the image link: {img_url}\n {ex}",
                    fg='yellow')
                continue

            if error_message and error_code:
                formatted_error = \
                    self.handle_error(error_message, error_code, file_path=self.file_path,
                                      should_print=should_print_error)
                if formatted_error:
                    error_list.append(formatted_error)

        return error_list