def main(): client = MongoClient('mongodb://localhost:27017/') db = client.cvedb # Get the collection object # Here name of the database is "states" collection = db.cves # Make a query to list all the documents cvedicttemp = collection.find() cvedict = dict() for key in cvedicttemp: cvedict[key['id']] = key vlist = [] carlosplt.pre_paper_plot() fig = plt.figure() tester = calc_laplace() ax = fig.add_subplot(2, 2, 1) tester.laplace_php([]) ax = fig.add_subplot(2, 2, 2) tester.laplace_openjdk([]) ax = fig.add_subplot(2, 2, 3) tester.laplace_wheezy([], False) ax = fig.add_subplot(2, 2, 4) tester.laplace_wheezy([], True) carlosplt.post_paper_plot(True, True, True) plt.show()
def plot_severity_percentage(self): num_low = [0] * (self.years + 1) num_med = [0] * (self.years + 1) num_high = [0] * (self.years + 1) num_udef = [0] * (self.years + 1) total = [0] * (self.years + 1) for i in self.pkg_with_cvss: for j in range(len(self.src2month[i])): try: num_low[j // 12] += self.pkg_with_cvss[i][j][0] num_med[j // 12] += self.pkg_with_cvss[i][j][1] num_high[j // 12] += self.pkg_with_cvss[i][j][2] num_udef[j // 12] += self.pkg_with_cvss[i][j][3] total[j // 12] += self.pkg_with_cvss[i][j][ 3] + self.pkg_with_cvss[i][j][2] + self.pkg_with_cvss[ i][j][1] + self.pkg_with_cvss[i][j][0] except IndexError: xx = j // 12 if xx == 19: continue else: print(xx) #raise IndexError('List index out of bounds') ## Generate percentage for i in range(self.years + 1): try: num_low[i] = num_low[i] / total[i] num_med[i] = num_med[i] / total[i] num_high[i] = num_high[i] / total[i] num_udef[i] = num_udef[i] / total[i] except ZeroDivisionError: num_low[i] = 0 num_med[i] = 0 num_high[i] = 0 num_udef[i] = 0 print(num_low) print(num_high) carlosplt.pre_paper_plot() pal = ['#fee8c8', '#fdbb84', '#e34a33', 'grey'] x = range(2001, 2001 + self.years) labels_cvss = ['low', 'medium', 'high', 'N/A'] h = plt.stackplot( x, [num_low[1:], num_med[1:], num_high[1:], num_udef[1:]], colors=pal, alpha=0.9, labels=labels_cvss) plt.xticks(x) plt.legend(loc='upper left', handles=h[::-1]) carlosplt.post_paper_plot(True, True, True) plt.show()
def test_slocs(src2month, src2sloccount): # Remember sloccount is of the form (total, [ansic, cpp, asm, java, python, perl, sh]) ar1 = [] ar2 = [] print(sum(src2month['linux'])) for pkg in src2month: try: total_slocs = src2sloccount[pkg][0] if total_slocs == 0: continue else: ar1.append(int(total_slocs)) except KeyError: print(pkg + ": no sloccount data found!") continue total = sum(src2month[pkg]) if total > 100: print(pkg + ', ' + str(total) + ', ' + str(total_slocs)) ar2.append(total) vulns_sorted_slocs_total = [ x for _, x in sorted(zip(ar1, ar2), reverse=True) ] pop_xaxis = [y for y, _ in sorted(zip(ar1, ar2), reverse=True)] half_more_slocs = sum( vulns_sorted_slocs_total[:int(len(vulns_sorted_slocs_total) / 2)]) half_less_slocs = sum( vulns_sorted_slocs_total[int(len(vulns_sorted_slocs_total) / 2):]) print(half_more_slocs) print(half_less_slocs) print(pop_xaxis[0]) print(pop_xaxis[len(pop_xaxis) - 1]) print(spearmanr(ar1, ar2)) carlosplt.pre_paper_plot(True) plt.plot(vulns_sorted_slocs_total) plt.ylabel('Number of vulnerabilities') plt.xlabel('Number of SLOCS ranking') carlosplt.post_paper_plot(True, True, True) plt.show()
def plot_all_severity(self, l, m, h, udef): carlosplt.pre_paper_plot() self.l = l self.m = m self.h = h self.udef = udef for i in self.pkg_with_cvss: self.src2month_temp[i] = [] for j in range(len(self.src2month[i])): num_low = self.pkg_with_cvss[i][j][0] num_med = self.pkg_with_cvss[i][j][1] num_high = self.pkg_with_cvss[i][j][2] num_udef = self.pkg_with_cvss[i][j][3] tempp = 0 if l: tempp += num_low if m: tempp += num_med if h: tempp += num_high if udef: tempp += num_udef self.src2month_temp[i].append(tempp) for i in self.src2month: self.src2month_loc[i] = self.src2month_temp[ i][:] # don't cut data for 2018 self.severityPlotter = Temp_Plotter(self) self.severityPlotter.plot_total() # Plot total number per year self.pkgnumPlotter = NumPackages_Plotter(self.severityPlotter) # Plot number of affected packages per year self.pkgnumPlotter.plot_num_affected() # Plot average number of vulnerabilities per affected package per year self.pkgnumPlotter.plot_average_number() # Plot regular and LTS for Wheezy self.wheezy = WheezyPloter(self) self.wheezy.plot_wheezy_lts()
def test_pop(src2month, src2pop): ar1 = [] ar2 = [] for pkg in src2month: try: ar1.append(int(src2pop[pkg])) except KeyError: #print(pkg + ": no popularity data found!") continue total = sum(src2month[pkg]) if total > 100: print(pkg + ', ' + str(total) + ', ' + src2pop[pkg]) ar2.append(total) vulns_sorted_pop = [x for _, x in sorted(zip(ar1, ar2), reverse=True)] pop_xaxis = [y for y, _ in sorted(zip(ar1, ar2), reverse=True)] half_more_popular = sum(vulns_sorted_pop[:int(len(vulns_sorted_pop) / 2)]) half_less_popular = sum(vulns_sorted_pop[int(len(vulns_sorted_pop) / 2):]) print(half_more_popular) print(half_less_popular) print(pop_xaxis[0]) print(pop_xaxis[len(pop_xaxis) - 1]) print(src2pop) print(spearmanr(ar1, ar2)) carlosplt.pre_paper_plot(True) plt.plot(vulns_sorted_pop) plt.ylabel('Number of vulnerabilities') plt.xlabel('Popularity ranking') carlosplt.post_paper_plot(True, True, True) plt.show()
print('xmin = ', results.power_law.xmin) print('xmax = ', results.power_law.xmax) print('sigma = ', results.power_law.sigma) print('D = ', results.power_law.D) print(results.truncated_power_law.xmin) print('xmax = ', results.truncated_power_law.xmax) print(results.power_law.discrete) print('lognormal mu: ', results.lognormal.mu) print('lognormal sigma: ', results.lognormal.sigma) #custom_model=[] #for i in sorted(mydata,reverse=True): # ccdf = #fig=results.plot_pdf(color='b', linewidth=2) carlosplt.pre_paper_plot(True) fig = results.plot_ccdf(color='darkblue', linestyle='-', label='data') results.power_law.plot_ccdf(color='darkgreen', ax=fig, label='power-law fit') #results.truncated_power_law.plot_ccdf(color = 'red', ax=fig) #results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig) #results.lognormal.plot_ccdf(color = 'brown', ax=fig) #results.exponential.plot_ccdf(color = 'orange', ax=fig) plt.ylabel('ccdf') plt.xlabel('Vulnerabilities') fig.legend() carlosplt.post_paper_plot(True, True, True) plt.show() R, p = results.distribution_compare('power_law', 'exponential') print('Exponential: ', R, p) R, p = results.distribution_compare('power_law', 'stretched_exponential') print('Stretched exponential: ', R, p)
def plot_bounties(ff): reports_team = dict() sum_team = dict() with open("reports_team.json", "r") as fp: reports_team = json.load(fp) with open("sum_team.json", "r") as fp: sum_team = json.load(fp) if ff < 2: ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache'] print('list follows') for j in ibb_list: print(reports_team[j]) else: ibb_list = [team for team in reports_team] most_team = dict() sum_bounty_team = dict() for team in ibb_list: old = 0.0 old_sum = 0.0 for report in reports_team[team]: try: new = float(report['total_awarded_bounty_amount']) old_sum += new except KeyError: print('#'*80) print(report) print('Report id ', report['id'], ' - bounty not found') continue if new > old: old = new most_team[team] = old sum_bounty_team[team] = old_sum print(most_team) print(sum_bounty_team) month2sum = [] month2money = [] month2bountylist = [] #Years: 2001-2018 for i in range(12*18): month2sum.append(0) month2money.append(0.0) month2bountylist.append([]) for team in ibb_list: for report in reports_team[team]: datetime_obj = parser.parse(report['latest_disclosable_activity_at']) print(str(datetime_obj)) month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1 try: #if report['severity_rating'] == "high": if (ff==0 or ff ==2) or (report['severity_rating'] == "high") or (report['severity_rating'] == "critical"): month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount']) month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])] except KeyError: continue print(month2bountylist) #plt.plot(month2sum[-12*5:]) #plt.show() #plt.plot(month2money[-12*5:]) #plt.show() years = 18 quarter_num = years*4 quarter_sum = [] quarter_av = [] carlosplt.pre_paper_plot() quarter2bountylist = [] quartersx = [] for i in range(1,years+1): for j in range(1,5): if j==1: quartersx.append('Q' + str(j)+'\''+str(i).zfill(2)) else: quartersx.append(' ') for j in range(quarter_num): temp2 = sum(month2money[3*j:3*(j+1)]) temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist] temp1 = len(temp3) print(temp3) quarter_sum.append(temp1) quarter2bountylist.append(temp3) try: quarter_av.append(temp2/temp1) except ZeroDivisionError: quarter_av.append(0) n = len(quarter_sum[-4*5:]) x = range(len(quarter_sum[-4*5:])) width = 1/2 #plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='brown', label='Number', edgecolor='black') #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical") #plt.ylabel('Number of rewards') #plt.xlabel('Quarter') #carlosplt.post_paper_plot(True,True,True) #plt.show() # #plt.bar(x[-4*5:], quarter_av[-4*5:], width, color='darkblue', label='regular support', edgecolor='black') # #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical") #plt.ylabel('Average bug price of IBB projects (USD)') #plt.xlabel('Quarter') #carlosplt.post_paper_plot(True,True,True) #plt.show() #print(quarter2bountylist) if ff==0: labeltext = 'IBB-all' elif ff==1: labeltext = 'IBB-high' elif ff==2: labeltext = 'All-all' elif ff==3: labeltext = 'All-high' ## Shapiro normality test for each quarter ## Added powerlaw test reference = [] for i in quarter2bountylist: reference+=i print(reference) for i in quarter2bountylist: print(i) data = i if len(i)>3: #sns.distplot(i) #plt.show() stat, p = shapiro(data) print('Statistics=%.3f, p=%.3f' % (stat, p)) # interpret alpha = 0.01 if p > alpha: print('Sample looks Gaussian (fail to reject H0)') else: print('Sample does not look Gaussian (reject H0)') w,p = ks_2samp(i,reference) if p > alpha: print('Samples look similar') else: print('Samples do not look similar') #mydata = i #results=powerlaw.Fit(mydata, discrete=False, xmax=5000) #print('alpha = ',results.power_law.alpha) #print(results.truncated_power_law.alpha) #print('xmin = ',results.power_law.xmin) #print('xmax = ',results.power_law.xmax) #print('sigma = ',results.power_law.sigma) #print('D = ',results.power_law.D) #print(results.truncated_power_law.xmin) #print('xmax = ', results.truncated_power_law.xmax) #print(results.power_law.discrete) #print('lognormal mu: ',results.lognormal.mu) #print('lognormal sigma: ',results.lognormal.sigma) ## Linear regression of average and median # Average xx = [] yy = quarter_av[-4*5:] y = [] counter=0 for i in yy: if i!=0: y.append(i) xx.append(counter) counter+=1 X = sm.add_constant(xx) model = sm.OLS(y,X).fit() predictions = model.predict(X) plt.plot(xx,predictions) print(model.summary()) print(model.summary().as_latex()) xx = [] yy = quarter2bountylist[-4*5:] y = [] counter=0 for i in yy: if i!=[]: y.append(median(i)) xx.append(counter) counter+=1 X = sm.add_constant(xx) model = sm.OLS(y,X).fit() predictions = model.predict(X) plt.plot(xx,predictions, color='darkred') print(model.summary()) print(model.summary().as_latex()) ## Create box plot bp = plt.boxplot((quarter2bountylist[-4*5:]), whis = [5,95], patch_artist=True, positions = x) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='darkred') plt.setp(bp['caps'], color='darkred') plt.setp(bp['fliers'], markersize = 3.0) plt.yscale('log') plt.ylim(top=50000) plt.ylim(bottom=1) plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical") plt.ylabel(labeltext) plt.xlabel('Quarter') carlosplt.post_paper_plot(True,True,True)
def plot_demographics(ff): if ff==0: labeltext = 'num - IBB' elif ff==1: labeltext = 'new - IBB' elif ff==2: labeltext = 'num - All' elif ff==3: labeltext = 'new - All' reports_team = dict() sum_team = dict() with open("reports_team.json", "r") as fp: reports_team = json.load(fp) with open("sum_team.json", "r") as fp: sum_team = json.load(fp) if ff < 2: ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache'] print('list follows') for j in ibb_list: print(reports_team[j]) else: ibb_list = [team for team in reports_team] most_team = dict() sum_bounty_team = dict() for team in ibb_list: old = 0.0 old_sum = 0.0 for report in reports_team[team]: try: new = float(report['total_awarded_bounty_amount']) old_sum += new except KeyError: print('#'*80) print(report) print('Report id ', report['id'], ' - bounty not found') continue if new > old: old = new most_team[team] = old sum_bounty_team[team] = old_sum print(most_team) print(sum_bounty_team) month2sum = [] month2money = [] month2bountylist = [] month2newreporters = [] repuntilnow = [] #Years: 2001-2018 for i in range(12*18): month2sum.append(0) month2newreporters.append(0) month2money.append(0.0) month2bountylist.append([]) for team in ibb_list: for report in reports_team[team]: datetime_obj = parser.parse(report['latest_disclosable_activity_at']) print(str(datetime_obj)) month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1 try: reporter=report['reporter']['id'] #if report['severity_rating'] == "high": month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount']) month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])] if reporter not in repuntilnow: month2newreporters[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1 repuntilnow.append(reporter) except KeyError: print('Error with report ', report['id']) continue print(month2bountylist) #plt.plot(month2sum[-12*5:]) #plt.show() #plt.plot(month2money[-12*5:]) #plt.show() years = 18 quarter_num = years*4 quarter_sum = [] quarter_av = [] carlosplt.pre_paper_plot() quarter2bountylist = [] quartersx = [] for i in range(1,years+1): for j in range(1,5): if j==1: quartersx.append('Q' + str(j)+'\''+str(i).zfill(2)) else: quartersx.append(' ') for j in range(quarter_num): temp2 = sum(month2money[3*j:3*(j+1)]) temp4 = sum(month2newreporters[3*j:3*(j+1)]) temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist] temp1 = len(temp3) if ff==1 or ff==3: quarter_sum.append(temp4) else: quarter_sum.append(temp1) n = len(quarter_sum[-4*5:]) x = range(len(quarter_sum[-4*5:])) width = 1/2 #print(quarter2bountylist) reference = [] for i in quarter2bountylist: reference+=i print(reference) ## Create bars plot plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='darkblue', label='Number', edgecolor='black') plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical") plt.ylabel(labeltext) plt.xlabel('Quarter') carlosplt.post_paper_plot(True,True,True)
x = range(len(quarter_sum[-4*5:])) width = 1/2 #print(quarter2bountylist) reference = [] for i in quarter2bountylist: reference+=i print(reference) ## Create bars plot plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='darkblue', label='Number', edgecolor='black') plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical") plt.ylabel(labeltext) plt.xlabel('Quarter') carlosplt.post_paper_plot(True,True,True) if __name__ == "__main__": #main() fig = plt.figure() carlosplt.pre_paper_plot() for i in range(4): ax = fig.add_subplot(2,2,i+1) plot_bounties(i) #plot_demographics(i) #plot_bounties(1) plt.show()