def processNpmAlert(repoId, alert, hm): artifact = alert['securityVulnerability']['package']['name'] group = 'npm' version = alert['vulnerableRequirements'][2:] packageId = common.getPackageId(group, artifact, version, ecosystem='npm', insertIfNotExists=True) dependencyId = common.getDependencyId(repoId, packageId, idtool=toolId, insertIfNotExists=True) cve = getCVE(alert) if cve: vulnId = common.getVulnerabilityId(cve, None) else: vulnId = addGithubAdvisory(alert) severity = alert['securityAdvisory']['severity'] if (dependencyId, vulnId, toolId) not in hm: hm[(dependencyId, vulnId, toolId)] = {'severity': severity, 'count': 0} else: hm[(dependencyId, vulnId, toolId)]['count'] += 1
def getVulns(repoId, table) -> dict: def getCVEids(cves, packageId): ids=[] for cve in cves: if not cve.startswith('CVE'): raise Exception('non cve vulnerability in victims report', cve) ids.append(common.getVulnerabilityId(cve,None)) return ids rows=table.find_all('tr') d={} cur=None for row in rows: if row.find_all('th'): #new module found cur=row.getText().replace('\n','').replace(' ','') d[cur]={} else: #new alert found cols=row.find_all('td') package=cols[0].getText() group, artifact, version = package.split(':') packageId=common.getPackageId(group, artifact, version) dependencyId = common.getDependencyId(repoId, packageId) cves=(cols[1].getText()).replace('\n','').replace(' ','').split(',') d[cur][dependencyId] = getCVEids(cves,packageId) return d
def getMavenPackageId(dependency: str, identifier: str, insertIfNotExists=False): group, artifact, version = parseMavenIdentifier(dependency, identifier) return common.getPackageId(group, artifact, version, 'maven', insertIfNotExists)
def getPackageId(library, eco): q = '''select * from package where concat(artifact,'-',version) = %s''' results = sql.execute(q, (library, )) if not results: print("not in db", library, eco) temp = library.split('-') version = temp[-1] temp = temp[:-1] artifact = '-'.join(temp) return common.getPackageId(eco, artifact, version, eco, True) else: return results[0]['id']
def processMavenModules(repoId, mavenModules): d = {} for module in mavenModules: print(module['projectName']) for vuln in module['vulnerabilities']: group = vuln['mavenModuleName']['groupId'] artifact = vuln['mavenModuleName']['artifactId'] version = vuln['version'] packageId = common.getPackageId(group, artifact, version) dependencyId = common.getDependencyId(repoId, packageId) dependencyPath = constructDependencyPath(vuln['from']) dependencyPathId = common.getDependencyPathId(dependencyPath) ids = vuln['identifiers'] severity = vuln['severity'] vulnIds = [] snykFlag = True if ids['CVE']: snykFlag = False #CVE id present for cve in ids['CVE']: vulnId = common.getVulnerabilityId(cve, None) if vulnId == -1: if len(ids['CVE']) == 1: snykFlag = True continue vulnIds.append(vulnId) addVulnerabilityInfo(vulnId, vuln) if snykFlag: vulnId = addSnykVulenrability(vuln) vulnIds.append(vulnId) addVulnerabilityInfo(vulnId, vuln) for vulnId in vulnIds: if (vulnId, dependencyId) not in d: d[(vulnId, dependencyId)] = { 'count': 1, 'severity': severity } else: d[(vulnId, dependencyId)]['count'] += 1 addSnykInfo(vuln, dependencyPathId, repoId, 'maven') return d
def addNodeDependencies(repoId, path): df = ndt.parse_dependency(path) if df.empty: return df['packageId'] = df.apply( lambda row: getPackageId('npm', row.package, row.version, 'npm'), axis=1) df.drop(['package', 'version'], axis=1, inplace=True) df['repositoryId'] = [repoId] * len(df) sql.load_df('npmDependencyTree', df) df.drop(['depth', 'scope'], axis=1, inplace=True) df['id'] = [np.nan] * len(df) sql.load_df('dependency', df)
def process_vulnerabilities(repoId, data, allLibraries): #get vuln id srcclrId = True assert 'cve' in data.keys() if data['cve'] is not None: cveId = 'CVE-' + data['cve'] vulnId = common.getVulnerabilityId(cveId, None) if vulnId > 0: srcclrId = False print(cveId) if srcclrId: vulnId = getSrcClrVulnerability(data) depIds = [] for library in data['libraries']: ref = library['_links']['ref'] assert ref.startswith('/records/0/libraries/') ref = ref[len('/records/0/libraries/'):] ref = ref.split('/') p, v = int(ref[0]), int(ref[-1]) package = allLibraries[p] group = package['coordinate1'] artifact = package['coordinate2'] version = package['versions'][v]['version'] packageId = common.getPackageId(group, artifact, version, 'maven', True) dependencyId = common.getDependencyId(repoId, packageId, toolId, True) depIds.append(dependencyId) for dependencyId in depIds: insertQ = 'insert into mavenAlert values(%s,%s,%s,%s,%s,%s,%s,%s)' try: sql.execute(insertQ, (None, None, dependencyId, vulnId, toolId, None, None, 1 / len(depIds))) except sql.pymysql.IntegrityError as error: if error.args[0] == sql.PYMYSQL_DUPLICATE_ERROR: #TODO update scandate print('maven alert exists already in db') else: raise Exception(str(error))
def addMavenDependencies(repoId, path): #generate maven dependency file depfilename = "dep.txt" os.chdir(path) os.system('mvn dependency:tree -DoutputFile={}'.format(depfilename)) #read maven dependency file through dependencyTree2dict files = (os.popen( 'find ./ -name "{}"'.format(depfilename)).read()).split("\n")[:-1] data = dependencyTree2Dict('./' + depfilename) group, artifact, version = data['project'].split(':') #a dataframe to just hold dependencies (repo & package) dependencyDf = pd.DataFrame(columns=['repositoryId', 'packageId']) #read dependency files to get tree data for each for file in files: print(file) data = dependencyTree2Dict(file) module = data['project'].split(':')[1] data = data['dependencies'] if not data: #zero dependencies continue data['repositoryId'] = [repoId] * len(data['artifact']) data['module'] = [module] * len(data['artifact']) df = pd.DataFrame(data) df['packageId'] = df.apply(lambda row: getPackageId( row.group, row.artifact, row.version, 'maven', True), axis=1) df.drop(['group', 'artifact', 'version'], axis=1, inplace=True) sql.load_df('mavenDependencyTree', df) df = df[['repositoryId', 'packageId']] dependencyDf = dependencyDf.append(df, ignore_index=True) dependencyDf = dependencyDf.drop_duplicates(subset='packageId', keep='last') dependencyDf['id'] = [np.nan] * len(dependencyDf) sql.load_df('dependency', dependencyDf)
def getNPMPackageId(dependency: str, identifier: str, insertIfNotExists=False): group, artifact, version = parseNPMIdentifier(dependency, identifier) return common.getPackageId(group, artifact, version, 'npm', insertIfNotExists)