def update_victims_cve_db_on_s3(): """Update Victims CVE DB on S3.""" repo_url = 'https://github.com/victims/victims-cve-db.git' s3 = StoragePool.get_connected_storage('S3VulnDB') with tempdir() as temp_dir: Git.clone(repo_url, temp_dir, depth="1") s3.store_victims_db(temp_dir)
def _run_victims_cve_db_cli(self, arguments): """Run Victims CVE DB CLI.""" s3 = StoragePool.get_connected_storage('S3VulnDB') output = [] with tempdir() as temp_victims_db_dir: if not s3.retrieve_victims_db_if_exists(temp_victims_db_dir): self.log.debug( 'No Victims CVE DB found on S3, cloning from github') self.update_victims_cve_db_on_s3() s3.retrieve_victims_db_if_exists(temp_victims_db_dir) try: cli = os.path.join(temp_victims_db_dir, 'victims-cve-db-cli.py') command = [ cli, 'search', '--ecosystem', 'java', '--name', arguments['name'], '--version', arguments['version'] ] output = TimedCommand.get_command_output( command, graceful=False, is_json=True, timeout=60) # 1 minute except TaskError as e: self.log.exception(e) return output
def store_victims_db(self, victims_db_dir): """ Zip victims_db_dir/* and store to S3 as VICTIMS_DB_ARCHIVE""" with tempdir() as temp_archive_dir: temp_archive_path = os.path.join(temp_archive_dir, self.VICTIMS_DB_ARCHIVE) with cwd(victims_db_dir): Archive.zip_file('.', temp_archive_path) self.store_file(temp_archive_path, self.VICTIMS_DB_ARCHIVE)
def retrieve_depcheck_db_if_exists(self, data_dir): """ Retrieve zipped CVE DB file as stored on S3 and extract""" if self.object_exists(self.DEPCHECK_DB_ARCHIVE): with tempdir() as archive_dir: archive_path = os.path.join(archive_dir, self.DEPCHECK_DB_ARCHIVE) self.retrieve_file(self.DEPCHECK_DB_ARCHIVE, archive_path) Archive.extract_zip(archive_path, data_dir) return True return False
def retrieve_victims_db_if_exists(self, victims_db_dir): """ Retrieve VICTIMS_DB_ARCHIVE from S3 and extract into victims_db_dir """ if self.object_exists(self.VICTIMS_DB_ARCHIVE): with tempdir() as temp_archive_dir: temp_archive_path = os.path.join(temp_archive_dir, self.VICTIMS_DB_ARCHIVE) self.retrieve_file(self.VICTIMS_DB_ARCHIVE, temp_archive_path) Archive.extract_zip(temp_archive_path, victims_db_dir) return True return False
def store_depcheck_db(self, data_dir): """ Zip CVE DB file and store to S3 """ with tempdir() as archive_dir: archive_path = os.path.join(archive_dir, self.DEPCHECK_DB_ARCHIVE) db_file_path = os.path.join(data_dir, self.DEPCHECK_DB_FILENAME) try: Archive.zip_file(db_file_path, archive_path, junk_paths=True) except TaskError: pass else: self.store_file(archive_path, self.DEPCHECK_DB_ARCHIVE)
def retrieve_index_if_exists(self, target_dir): """ Retrieve central-index.zip from S3 and extract into target_dir/central-index""" if self.object_exists(self._INDEX_ARCHIVE): with tempdir() as temp_dir: archive_path = os.path.join(temp_dir, self._INDEX_ARCHIVE) central_index_dir = os.path.join(target_dir, self._INDEX_DIRNAME) self.retrieve_file(self._INDEX_ARCHIVE, archive_path) Archive.extract_zip(archive_path, central_index_dir, mkdest=True) return True return False
def store_index(self, target_dir): """ Zip files in target_dir/central-index dir and store to S3 """ with tempdir() as temp_dir: central_index_dir = os.path.join(target_dir, self._INDEX_DIRNAME) archive_path = os.path.join(temp_dir, self._INDEX_ARCHIVE) try: Archive.zip_file(central_index_dir, archive_path, junk_paths=True) except TaskError: pass else: self.store_file(archive_path, self._INDEX_ARCHIVE)
def update_depcheck_db_on_s3(): """Update OWASP Dependency-check DB on S3.""" s3 = StoragePool.get_connected_storage('S3VulnDB') depcheck = os.path.join(configuration.OWASP_DEP_CHECK_PATH, 'bin', 'dependency-check.sh') with tempdir() as temp_data_dir: s3.retrieve_depcheck_db_if_exists(temp_data_dir) # give DependencyCheck 25 minutes to download the DB TimedCommand.get_command_output( [depcheck, '--updateonly', '--data', temp_data_dir], timeout=1500) s3.store_depcheck_db(temp_data_dir)
def _resolve_versions(to_solve): """ Resolve version ranges in to_solve :param to_solve: {"groupId:artifactId": "version-range"} :return: {"groupId:artifactId": "version"} """ if not to_solve: return {} with tempdir() as tmpdir: with cwd(tmpdir): MavenSolver._generate_pom_xml(to_solve) return MavenSolver._dependencies_from_pom_xml()
def _get_log(url): """Clone Git repo and get its log. :param url: url to the git repo """ with tempdir() as tmp_dir: git = Git.clone(url, tmp_dir) # nice notebook to check at: # http://nbviewer.jupyter.org/github/tarmstrong/code-analysis/blob/master/IPythonReviewTime.ipynb log = git.log() return log
def test_python_salt(self): """ To make sure we can scan source with standalone PKG-INFO https://github.com/jeremylong/DependencyCheck/issues/896 """ pkg_info = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'pypi', 'salt-2016.11.6', 'PKG-INFO') args = {'ecosystem': 'pypi', 'name': 'salt', 'version': '2016.11.6'} with tempdir() as extracted: # We need a write-access into extracted/ copy(pkg_info, extracted) flexmock(EPVCache).should_receive( 'get_extracted_source_tarball').and_return(extracted) task = CVEcheckerTask.create_test_instance( task_name='source_licenses') results = task.execute(arguments=args) assert isinstance(results, dict) assert set(results.keys()) == {'details', 'status', 'summary'} assert results['status'] == 'success' assert results['summary'] == ['CVE-2017-12791'] # http://www.cvedetails.com/version/222059/Saltstack-Salt-2016.11.6.html expected_details = [{ "cvss": { "score": 7.5, "vector": "AV:N/AC:L/Au:?/C:P/I:P/A:P" }, "description": "Directory traversal vulnerability in minion id validation in " "SaltStack Salt before 2016.11.7 and 2017.7.x before 2017.7.1 " "allows remote minions with incorrect credentials to authenticate " "to a master via a crafted minion ID.", "id": "CVE-2017-12791", "references": [ "http://www.securityfocus.com/bid/100384", "https://bugzilla.redhat.com/show_bug.cgi?id=1482006", "https://github.com/saltstack/salt/pull/42944", "https://docs.saltstack.com/en/2016.11/topics/releases/2016.11.7.html", "https://docs.saltstack.com/en/latest/topics/releases/2017.7.1.html", "https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=872399" ], "severity": "High" }] assert_equal(results.get('details'), expected_details)
def run_mercator_on_git_repo(self, arguments): self._strict_assert(arguments.get('url')) with tempdir() as workdir: repo_url = arguments.get('url') repo = Git.clone(repo_url, path=workdir, depth=str(1)) metadata = self.run_mercator(arguments, workdir, keep_path=True, outermost_only=False, timeout=900) if metadata.get('status', None) != 'success': self.log.error('Mercator failed on %s', repo_url) return None # add some auxiliary information so we can later find the manifest file head = repo.rev_parse(['HEAD'])[0] for detail in metadata['details']: path = detail['path'][len(workdir):] # path should look like this: # <git-sha1>/path/to/manifest.file detail['path'] = head + path return metadata
def test_python_salt(self): """To make sure we can scan source with standalone PKG-INFO. https://github.com/jeremylong/DependencyCheck/issues/896 """ pkg_info = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'pypi', 'salt-2016.11.6', 'PKG-INFO') args = {'ecosystem': 'pypi', 'name': 'salt', 'version': '2016.11.6'} with tempdir() as extracted: # We need a write-access into extracted/ copy(pkg_info, extracted) flexmock(EPVCache).should_receive('get_extracted_source_tarball').and_return(extracted) task = CVEcheckerTask.create_test_instance(task_name='source_licenses') results = task.execute(arguments=args) assert isinstance(results, dict) assert set(results.keys()) == {'details', 'status', 'summary'} assert results['status'] == 'success' assert results['summary'] == ['CVE-2017-14696', 'CVE-2017-14695', 'CVE-2017-12791'] # http://www.cvedetails.com/version/222059/Saltstack-Salt-2016.11.6.html expected_details = [ { "cvss": { "score": 5.0, "vector": "AV:N/AC:L/Au:?/C:?/I:?/A:P" }, "description": "SaltStack Salt before 2016.3.8, 2016.11.x before 2016.11.8, " "and 2017.7.x before 2017.7.2 allows remote attackers to cause " "a denial of service via a crafted authentication request.", "id": "CVE-2017-14696", "references": [ "https://github.com/saltstack/salt/commit/" "5f8b5e1a0f23fe0f2be5b3c3e04199b57a53db5b", "https://docs.saltstack.com/en/latest/topics/releases/2016.11.8.html", "https://docs.saltstack.com/en/latest/topics/releases/2016.3.8.html", "http://lists.opensuse.org/opensuse-updates/2017-10/msg00073.html", "http://lists.opensuse.org/opensuse-updates/2017-10/msg00075.html", "https://bugzilla.redhat.com/show_bug.cgi?id=1500742", "https://docs.saltstack.com/en/latest/topics/releases/2017.7.2.html" ], "severity": "Medium" }, { "cvss": { "score": 7.5, "vector": "AV:N/AC:L/Au:?/C:P/I:P/A:P" }, "description": "Directory traversal vulnerability in minion id validation " "in SaltStack Salt before 2016.3.8, 2016.11.x before 2016.11.8, " "and 2017.7.x before 2017.7.2 allows remote minions with incorrect " "credentials to authenticate to a master via a crafted minion ID. " "NOTE: this vulnerability exists because of an incomplete fix " "for CVE-2017-12791.", "id": "CVE-2017-14695", "references": [ "https://docs.saltstack.com/en/latest/topics/releases/2016.11.8.html", "https://docs.saltstack.com/en/latest/topics/releases/2016.3.8.html", "http://lists.opensuse.org/opensuse-updates/2017-10/msg00073.html", "https://bugzilla.redhat.com/show_bug.cgi?id=1500748", "https://github.com/saltstack/salt/commit/" "80d90307b07b3703428ecbb7c8bb468e28a9ae6d", "http://lists.opensuse.org/opensuse-updates/2017-10/msg00075.html", "https://docs.saltstack.com/en/latest/topics/releases/2017.7.2.html" ], "severity": "High" }, { "cvss": { "score": 7.5, "vector": "AV:N/AC:L/Au:?/C:P/I:P/A:P" }, "description": "Directory traversal vulnerability in minion id validation in " "SaltStack Salt before 2016.11.7 and 2017.7.x before 2017.7.1 " "allows remote minions with incorrect credentials to authenticate " "to a master via a crafted minion ID.", "id": "CVE-2017-12791", "references": [ "http://www.securityfocus.com/bid/100384", "https://bugzilla.redhat.com/show_bug.cgi?id=1482006", "https://github.com/saltstack/salt/pull/42944", "https://docs.saltstack.com/en/2016.11/topics/releases/2016.11.7.html", "https://docs.saltstack.com/en/latest/topics/releases/2017.7.1.html", "https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=872399" ], "severity": "High" } ] assert_equal(results.get('details'), expected_details)
def _run_owasp_dep_check(self, scan_path, experimental=False): def _clean_dep_check_tmp(): for dcdir in glob(os.path.join(gettempdir(), 'dctemp*')): rmtree(dcdir) s3 = StoragePool.get_connected_storage('S3VulnDB') depcheck = os.path.join(self.configuration.OWASP_DEP_CHECK_PATH, 'bin', 'dependency-check.sh') with tempdir() as temp_data_dir: if not s3.retrieve_depcheck_db_if_exists(temp_data_dir): self.log.debug( 'No cached OWASP Dependency-Check DB, generating fresh now ...' ) self.update_depcheck_db_on_s3() s3.retrieve_depcheck_db_if_exists(temp_data_dir) report_path = os.path.join(temp_data_dir, 'report.xml') command = [ depcheck, '--noupdate', '--format', 'XML', '--project', 'CVEcheckerTask', '--data', temp_data_dir, '--scan', scan_path, '--out', report_path ] if experimental: command.extend(['--enableExperimental']) for suppress_xml in glob( os.path.join(os.environ['OWASP_DEP_CHECK_SUPPRESS_PATH'], '*.xml')): command.extend(['--suppress', suppress_xml]) output = [] try: self.log.debug( 'Running OWASP Dependency-Check to scan %s for vulnerabilities' % scan_path) output = TimedCommand.get_command_output( command, graceful=False, timeout=600) # 10 minutes with open(report_path) as r: report_dict = anymarkup.parse(r.read()) except (TaskError, FileNotFoundError) as e: _clean_dep_check_tmp() for line in output: self.log.warning(line) self.log.exception(str(e)) raise FatalTaskError( 'OWASP Dependency-Check scan failed') from e _clean_dep_check_tmp() results = [] dependencies = report_dict.get('analysis', {}).get( 'dependencies') # value can be None dependencies = dependencies.get('dependency', []) if dependencies else [] if not isinstance(dependencies, list): dependencies = [dependencies] for dependency in dependencies: vulnerabilities = dependency.get( 'vulnerabilities') # value can be None vulnerabilities = vulnerabilities.get( 'vulnerability', []) if vulnerabilities else [] if not isinstance(vulnerabilities, list): vulnerabilities = [vulnerabilities] for vulnerability in vulnerabilities: av = vulnerability.get('cvssAccessVector') av = av[0] if av else '?' ac = vulnerability.get('cvssAccessComplexity') ac = ac[0] if ac else '?' au = vulnerability.get('cvssAuthenticationr') au = au[0] if au else '?' c = vulnerability.get('cvssConfidentialImpact') c = c[0] if c else '?' i = vulnerability.get('cvssIntegrityImpact') i = i[0] if i else '?' a = vulnerability.get('cvssAvailabilityImpact') a = a[0] if a else '?' vector = "AV:{AV}/AC:{AC}/Au:{Au}/C:{C}/I:{Integrity}/A:{A}".\ format(AV=av, AC=ac, Au=au, C=c, Integrity=i, A=a) result = { 'cvss': { 'score': vulnerability.get('cvssScore'), 'vector': vector } } references = vulnerability.get('references', {}).get('reference', []) if not isinstance(references, list): references = [references] result['references'] = [r.get('url') for r in references] for field in ['severity', 'description']: result[field] = vulnerability.get(field) result['id'] = vulnerability.get('name') results.append(result) return { 'summary': [r['id'] for r in results], 'status': 'success', 'details': results }
def execute(self, arguments, db): """Dependency finder logic.""" try: results = db.query(StackAnalysisRequest)\ .filter(StackAnalysisRequest.id == arguments.get('external_request_id'))\ .first() except SQLAlchemyError: raise ('Could not find data for request id = %s' % arguments.get('external_request_id')) manifests = [] if results is not None: row = results.to_dict() request_json = row.get("requestJson", {}) manifests = request_json.get('manifest', []) # If we receive a manifest file we need to save it first result = [] for manifest in manifests: with tempdir() as temp_path: with open(os.path.join(temp_path, manifest['filename']), 'a+') as fd: fd.write(manifest['content']) # mercator-go does not work if there is no package.json if 'shrinkwrap' in manifest['filename'].lower(): with open(os.path.join(temp_path, 'package.json'), 'w') as f: f.write(json.dumps({})) # Create instance manually since stack analysis is not handled by dispatcher subtask = MercatorTask.create_test_instance( task_name='metadata') arguments['ecosystem'] = manifest['ecosystem'] out = subtask.run_mercator(arguments, temp_path, resolve_poms=False) if not out["details"]: raise FatalTaskError( "No metadata found processing manifest file '{}'".format( manifest['filename'])) if 'dependencies' not in out['details'][0] and out.get( 'status', None) == 'success': raise FatalTaskError( "Dependencies could not be resolved from manifest file '{}'" .format(manifest['filename'])) out["details"][0]['manifest_file'] = manifest['filename'] out["details"][0]['ecosystem'] = manifest['ecosystem'] out["details"][0]['manifest_file_path'] = manifest.get( 'filepath', 'File path not available') # If we're handling an external request we need to convert dependency specifications to # concrete versions that we can query later on in the `AggregatorTask` manifest_descriptor = get_manifest_descriptor_by_filename( manifest['filename']) if 'external_request_id' in arguments: manifest_dependencies = [] if manifest_descriptor.has_resolved_deps: # npm-shrinkwrap.json, pom.xml if "_dependency_tree_lock" in out["details"][ 0]: # npm-shrinkwrap.json if 'dependencies' in out['details'][0][ "_dependency_tree_lock"]: manifest_dependencies = out["details"][0][ "_dependency_tree_lock"].get( "dependencies", []) else: # pom.xml if 'dependencies' in out['details'][0]: manifest_dependencies = out["details"][0].get( "dependencies", []) if manifest_descriptor.has_recursive_deps: # npm-shrinkwrap.json def _flatten(deps, collect): for dep in deps: collect.append({ 'package': dep['name'], 'version': dep['version'] }) _flatten(dep['dependencies'], collect) resolved_deps = [] _flatten(manifest_dependencies, resolved_deps) else: # pom.xml resolved_deps =\ [{'package': x.split(' ')[0], 'version': x.split(' ')[1]} for x in manifest_dependencies] else: # package.json, requirements.txt resolved_deps = self._handle_external_deps( self.storage.get_ecosystem(arguments['ecosystem']), out["details"][0]["dependencies"]) out["details"][0]['_resolved'] = resolved_deps result.append(out) return {'result': result}
def execute(self): self.log.info("Checking maven index for new releases") maven_index_checker_dir = os.getenv('MAVEN_INDEX_CHECKER_PATH') target_dir = os.path.join(maven_index_checker_dir, 'target') central_index_dir = os.path.join(target_dir, 'central-index') timestamp_path = os.path.join(central_index_dir, 'timestamp') s3 = StoragePool.get_connected_storage('S3MavenIndex') self.log.info('Fetching pre-built maven index from S3, if available.') s3.retrieve_index_if_exists(target_dir) old_timestamp = 0 try: old_timestamp = int(os.stat(timestamp_path).st_mtime) except OSError: self.log.info( 'Timestamp is missing, we need to build the index from scratch.' ) pass last_offset = s3.get_last_offset() with tempdir() as java_temp_dir: cmd = [ 'java', '-Xmx768m', '-Djava.io.tmpdir={}'.format(java_temp_dir), '-jar', 'maven-index-checker.jar', '-c' ] with cwd(maven_index_checker_dir): output = TimedCommand.get_command_output(cmd, is_json=True, graceful=False, timeout=1200) current_count = output['count'] new_timestamp = int(os.stat(timestamp_path).st_mtime) if old_timestamp != new_timestamp: self.log.info('Storing pre-built maven index to S3...') s3.store_index(target_dir) self.log.debug('Stored. Index in S3 is up-to-date.') if old_timestamp == 0: s3.set_last_offset(current_count) self.log.info( 'This is first run, i.e. all packages are considered new. ' 'Skipping scheduling to not analyze all packages in index.' ) return else: self.log.info('Index in S3 is up-to-date.') self.log.debug( "Number of entries in maven indexer: %d, " "last offset used: %d", current_count, last_offset) to_schedule_count = current_count - last_offset if to_schedule_count == 0: self.log.info("No new packages to schedule, exiting...") return cmd = [ 'java', '-Xmx768m', '-Djava.io.tmpdir={}'.format(java_temp_dir), '-jar', 'maven-index-checker.jar', '-r', '0-{}'.format(to_schedule_count) ] output = TimedCommand.get_command_output(cmd, is_json=True, graceful=False, timeout=1200) self.log.info( "Found %d new packages to analyse, scheduling analyses...", len(output)) for entry in output: self.run_selinon_flow( 'bayesianFlow', { 'ecosystem': 'maven', 'name': '{groupId}:{artifactId}'.format(**entry), 'version': entry['version'], 'recursive_limit': 0 }) s3.set_last_offset(current_count) self.log.info( "All new maven releases scheduled for analysis, exiting..")
def _handle_dotnet_solution(self, data): """Handle nuget package metadata.""" if not data.get('Metadata'): return {} data = data['Metadata'] key_map = ( ('Id', 'name'), ('Description', ), ('ProjectUrl', 'homepage'), # ('Summary',), ('Copyright',), # ('RequireLicenseAcceptance', 'require_license_acceptance'), ) transformed = self.transform_keys(data, key_map) if data.get('Authors'): transformed['author'] = ','.join(data['Authors']) if data.get('LicenseUrl'): from f8a_worker.process import IndianaJones # download_file # It's here due to circular dependencies from f8a_worker.workers import LicenseCheckTask # run_scancode transformed['declared_licenses'] = [data['LicenseUrl']] with tempdir() as tmpdir: try: # Get file from 'LicenseUrl' and let LicenseCheckTask decide what license it is if IndianaJones.download_file(data['LicenseUrl'], tmpdir): scancode_results = LicenseCheckTask.run_scancode( tmpdir) if scancode_results.get('summary', {}).get('sure_licenses'): transformed['declared_licenses'] =\ scancode_results['summary']['sure_licenses'] except Exception: # Don't raise if IndianaJones or LicenseCheckTask fail pass # transform # "DependencyGroups": [ # { # "Packages": [ # { # "Id": "NETStandard.Library", # "VersionRange": {"OriginalString": "1.6.0"} # } # ] # } # ] # to ["NETStandard.Library 1.6.0"] deps = set() for dep_group in data.get('DependencyGroups', []): for package in dep_group.get('Packages', []): deps.add('{} {}'.format( package.get('Id', ''), package.get('VersionRange', {}).get('OriginalString', ''))) if deps: transformed['dependencies'] = list(deps) repository = data.get('Repository') if isinstance(repository, dict) and repository: transformed['code_repository'] = { 'type': repository.get('Type'), 'url': repository.get('Url') } elif 'ProjectUrl' in data: transformed['code_repository'] = self._identify_gh_repo( data['ProjectUrl']) version = data.get('Version') if isinstance(version, dict) and version: transformed['version'] = '{}.{}.{}'.format( version.get('Major', ''), version.get('Minor', ''), version.get('Patch', '')) if data.get('Tags'): transformed['keywords'] = self._split_keywords(data['Tags']) return transformed