def get_packs(modified_files, added_files): packs = set() changed_files = modified_files.union(added_files) for changed_file in changed_files: if isinstance(changed_file, tuple): changed_file = changed_file[1] pack = get_pack_name(changed_file) if pack and is_file_path_in_pack(changed_file): packs.add(pack) return packs
def get_script_data(file_path, script_code=None): script_data = OrderedDict() data_dictionary = get_yaml(file_path) id = data_dictionary.get('commonfields', {}).get('id', '-') if script_code is None: script_code = data_dictionary.get('script', '') name = data_dictionary.get('name', '-') tests = data_dictionary.get('tests') toversion = data_dictionary.get('toversion') deprecated = data_dictionary.get('deprecated', False) fromversion = data_dictionary.get('fromversion') depends_on, command_to_integration = get_depends_on(data_dictionary) script_executions = sorted( list( set( re.findall(r"demisto.executeCommand\(['\"](\w+)['\"].*", script_code)))) pack = get_pack_name(file_path) script_data['name'] = name script_data['file_path'] = file_path if toversion: script_data['toversion'] = toversion if fromversion: script_data['fromversion'] = fromversion if deprecated: script_data['deprecated'] = deprecated if depends_on: script_data['depends_on'] = depends_on if script_executions: script_data['script_executions'] = script_executions if command_to_integration: script_data['command_to_integration'] = command_to_integration if tests: script_data['tests'] = tests if pack: script_data['pack'] = pack return {id: script_data}
def get_general_data(path): data = OrderedDict() json_data = get_json(path) id = json_data.get('id') brandname = json_data.get('brandName', '') name = json_data.get('name', '') fromversion = json_data.get('fromVersion') toversion = json_data.get('toVersion') pack = get_pack_name(path) if brandname: # for classifiers data['name'] = brandname if name: # for the rest data['name'] = name if toversion: data['toversion'] = toversion if fromversion: data['fromversion'] = fromversion if pack: data['pack'] = pack return {id: data}
def get_playbook_data(file_path): playbook_data = OrderedDict() data_dictionary = get_yaml(file_path) id = data_dictionary.get('id', '-') name = data_dictionary.get('name', '-') deprecated = data_dictionary.get('deprecated', False) tests = data_dictionary.get('tests') toversion = data_dictionary.get('toversion') fromversion = data_dictionary.get('fromversion') implementing_scripts = get_task_ids_from_playbook('scriptName', data_dictionary) implementing_playbooks = get_task_ids_from_playbook( 'playbookName', data_dictionary) command_to_integration = get_commmands_from_playbook(data_dictionary) pack = get_pack_name(file_path) playbook_data['name'] = name playbook_data['file_path'] = file_path if toversion: playbook_data['toversion'] = toversion if fromversion: playbook_data['fromversion'] = fromversion if implementing_scripts: playbook_data['implementing_scripts'] = implementing_scripts if implementing_playbooks: playbook_data['implementing_playbooks'] = implementing_playbooks if command_to_integration: playbook_data['command_to_integration'] = command_to_integration if tests: playbook_data['tests'] = tests if deprecated: playbook_data['deprecated'] = deprecated if pack: playbook_data['pack'] = pack return {id: playbook_data}
def get_layout_data(path): data = OrderedDict() json_data = get_json(path) layout = json_data.get('layout') name = layout.get('name', '-') id = layout.get('id', '-') typeID = json_data.get('typeId') typeName = json_data.get('TypeName') fromversion = json_data.get('fromVersion') toversion = json_data.get('toVersion') pack = get_pack_name(path) if typeID: data['typeID'] = typeID if typeName: data['typename'] = typeName data['name'] = name if toversion: data['toversion'] = toversion if fromversion: data['fromversion'] = fromversion if pack: data['pack'] = pack return {id: data}
def get_integration_data(file_path): integration_data = OrderedDict() data_dictionary = get_yaml(file_path) id = data_dictionary.get('commonfields', {}).get('id', '-') name = data_dictionary.get('name', '-') deprecated = data_dictionary.get('deprecated', False) tests = data_dictionary.get('tests') toversion = data_dictionary.get('toversion') fromversion = data_dictionary.get('fromversion') commands = data_dictionary.get('script', {}).get('commands', []) cmd_list = [command.get('name') for command in commands] pack = get_pack_name(file_path) deprecated_commands = [] for command in commands: if command.get('deprecated', False): deprecated_commands.append(command.get('name')) integration_data['name'] = name integration_data['file_path'] = file_path if toversion: integration_data['toversion'] = toversion if fromversion: integration_data['fromversion'] = fromversion if cmd_list: integration_data['commands'] = cmd_list if tests: integration_data['tests'] = tests if deprecated: integration_data['deprecated'] = deprecated if deprecated_commands: integration_data['deprecated_commands'] = deprecated_commands if pack: integration_data['pack'] = pack return {id: integration_data}
def search_potential_secrets(secrets_file_paths: list): """Returns potential secrets(sensitive data) found in committed and added files :param secrets_file_paths: paths of files that are being commited to git repo :return: dictionary(filename: (list)secrets) of strings sorted by file name for secrets found in files """ secrets_found = {} for file_path in secrets_file_paths: # Get if file path in pack and pack name is_pack = is_file_path_in_pack(file_path) pack_name = get_pack_name(file_path) # Get generic/ioc/files white list sets based on if pack or not secrets_white_list, ioc_white_list, files_white_list = get_white_listed_items( is_pack, pack_name) # Skip white listed files if file_path in files_white_list: print( "Skipping secrets detection for file: {} as it is white listed" .format(file_path)) continue # Init vars for current loop file_name = os.path.basename(file_path) high_entropy_strings = [] secrets_found_with_regex = [] _, file_extension = os.path.splitext(file_path) skip_secrets = {'skip_once': False, 'skip_multi': False} # get file contents file_contents = get_file_contents(file_path, file_extension) # in packs regard all items as regex as well, reset pack's whitelist in order to avoid repetition later if is_pack: file_contents = remove_white_list_regex(file_contents, secrets_white_list) secrets_white_list = set() yml_file_contents = get_related_yml_contents(file_path) # Add all context output paths keywords to whitelist temporary if file_extension == YML_FILE_EXTENSION or yml_file_contents: temp_white_list = create_temp_white_list( yml_file_contents if yml_file_contents else file_contents) secrets_white_list = secrets_white_list.union(temp_white_list) # Search by lines after strings with high entropy / IoCs regex as possibly suspicious for line in file_contents.split('\n'): # if detected disable-secrets comments, skip the line/s skip_secrets = is_secrets_disabled(line, skip_secrets) if skip_secrets['skip_once'] or skip_secrets['skip_multi']: skip_secrets['skip_once'] = False continue # REGEX scanning for IOCs and false positive groups regex_secrets, false_positives = regex_for_secrets(line) for regex_secret in regex_secrets: if not any(ioc.lower() in regex_secret.lower() for ioc in ioc_white_list): secrets_found_with_regex.append(regex_secret) # added false positives into white list array before testing the strings in line secrets_white_list = secrets_white_list.union(false_positives) # due to nature of eml files, skip string by string secret detection - only regex if file_extension in SKIP_FILE_TYPE_ENTROPY_CHECKS or \ any(demisto_type in file_name for demisto_type in SKIP_DEMISTO_TYPE_ENTROPY_CHECKS): continue line = remove_false_positives(line) # calculate entropy for each string in the file for string_ in line.split(): # compare the lower case of the string against both generic whitelist & temp white list if not any(white_list_string.lower() in string_.lower() for white_list_string in secrets_white_list): entropy = calculate_shannon_entropy(string_) if entropy >= ENTROPY_THRESHOLD: high_entropy_strings.append(string_) if high_entropy_strings or secrets_found_with_regex: # uniquify identical matches between lists file_secrets = list( set(high_entropy_strings + secrets_found_with_regex)) secrets_found[file_name] = file_secrets return secrets_found