Ejemplo n.º 1
0
def main():
    client = MongoClient('mongodb://localhost:27017/')
    db = client.cvedb

    # Get the collection object
    # Here name of the database is "states"

    collection = db.cves

    # Make a query to list all the documents

    cvedicttemp = collection.find()

    cvedict = dict()
    for key in cvedicttemp:
        cvedict[key['id']] = key

    vlist = []
    carlosplt.pre_paper_plot()
    fig = plt.figure()
    tester = calc_laplace()
    ax = fig.add_subplot(2, 2, 1)
    tester.laplace_php([])
    ax = fig.add_subplot(2, 2, 2)
    tester.laplace_openjdk([])
    ax = fig.add_subplot(2, 2, 3)
    tester.laplace_wheezy([], False)
    ax = fig.add_subplot(2, 2, 4)
    tester.laplace_wheezy([], True)
    carlosplt.post_paper_plot(True, True, True)
    plt.show()
Ejemplo n.º 2
0
    def plot_severity_percentage(self):
        num_low = [0] * (self.years + 1)
        num_med = [0] * (self.years + 1)
        num_high = [0] * (self.years + 1)
        num_udef = [0] * (self.years + 1)
        total = [0] * (self.years + 1)
        for i in self.pkg_with_cvss:
            for j in range(len(self.src2month[i])):
                try:
                    num_low[j // 12] += self.pkg_with_cvss[i][j][0]
                    num_med[j // 12] += self.pkg_with_cvss[i][j][1]
                    num_high[j // 12] += self.pkg_with_cvss[i][j][2]
                    num_udef[j // 12] += self.pkg_with_cvss[i][j][3]
                    total[j // 12] += self.pkg_with_cvss[i][j][
                        3] + self.pkg_with_cvss[i][j][2] + self.pkg_with_cvss[
                            i][j][1] + self.pkg_with_cvss[i][j][0]
                except IndexError:
                    xx = j // 12
                    if xx == 19:
                        continue
                    else:
                        print(xx)
                        #raise IndexError('List index out of bounds')

        ## Generate percentage
        for i in range(self.years + 1):
            try:
                num_low[i] = num_low[i] / total[i]
                num_med[i] = num_med[i] / total[i]
                num_high[i] = num_high[i] / total[i]
                num_udef[i] = num_udef[i] / total[i]
            except ZeroDivisionError:
                num_low[i] = 0
                num_med[i] = 0
                num_high[i] = 0
                num_udef[i] = 0

        print(num_low)
        print(num_high)

        carlosplt.pre_paper_plot()

        pal = ['#fee8c8', '#fdbb84', '#e34a33', 'grey']
        x = range(2001, 2001 + self.years)

        labels_cvss = ['low', 'medium', 'high', 'N/A']

        h = plt.stackplot(
            x, [num_low[1:], num_med[1:], num_high[1:], num_udef[1:]],
            colors=pal,
            alpha=0.9,
            labels=labels_cvss)
        plt.xticks(x)
        plt.legend(loc='upper left', handles=h[::-1])
        carlosplt.post_paper_plot(True, True, True)
        plt.show()
Ejemplo n.º 3
0
def test_slocs(src2month, src2sloccount):
    # Remember sloccount is of the form (total, [ansic, cpp, asm, java, python, perl, sh])
    ar1 = []
    ar2 = []
    print(sum(src2month['linux']))
    for pkg in src2month:
        try:
            total_slocs = src2sloccount[pkg][0]
            if total_slocs == 0:
                continue
            else:
                ar1.append(int(total_slocs))
        except KeyError:
            print(pkg + ": no sloccount data found!")
            continue
        total = sum(src2month[pkg])
        if total > 100:
            print(pkg + ', ' + str(total) + ', ' + str(total_slocs))
        ar2.append(total)

    vulns_sorted_slocs_total = [
        x for _, x in sorted(zip(ar1, ar2), reverse=True)
    ]
    pop_xaxis = [y for y, _ in sorted(zip(ar1, ar2), reverse=True)]

    half_more_slocs = sum(
        vulns_sorted_slocs_total[:int(len(vulns_sorted_slocs_total) / 2)])
    half_less_slocs = sum(
        vulns_sorted_slocs_total[int(len(vulns_sorted_slocs_total) / 2):])

    print(half_more_slocs)
    print(half_less_slocs)

    print(pop_xaxis[0])
    print(pop_xaxis[len(pop_xaxis) - 1])

    print(spearmanr(ar1, ar2))

    carlosplt.pre_paper_plot(True)

    plt.plot(vulns_sorted_slocs_total)
    plt.ylabel('Number of vulnerabilities')
    plt.xlabel('Number of SLOCS ranking')
    carlosplt.post_paper_plot(True, True, True)

    plt.show()
Ejemplo n.º 4
0
    def plot_all_severity(self, l, m, h, udef):
        carlosplt.pre_paper_plot()
        self.l = l
        self.m = m
        self.h = h
        self.udef = udef
        for i in self.pkg_with_cvss:
            self.src2month_temp[i] = []
            for j in range(len(self.src2month[i])):
                num_low = self.pkg_with_cvss[i][j][0]
                num_med = self.pkg_with_cvss[i][j][1]
                num_high = self.pkg_with_cvss[i][j][2]
                num_udef = self.pkg_with_cvss[i][j][3]
                tempp = 0
                if l:
                    tempp += num_low
                if m:
                    tempp += num_med
                if h:
                    tempp += num_high
                if udef:
                    tempp += num_udef
                self.src2month_temp[i].append(tempp)

        for i in self.src2month:
            self.src2month_loc[i] = self.src2month_temp[
                i][:]  # don't cut data for 2018

        self.severityPlotter = Temp_Plotter(self)
        self.severityPlotter.plot_total()

        # Plot total number per year
        self.pkgnumPlotter = NumPackages_Plotter(self.severityPlotter)

        # Plot number of affected packages per year
        self.pkgnumPlotter.plot_num_affected()

        # Plot average number of vulnerabilities per affected package per year
        self.pkgnumPlotter.plot_average_number()

        # Plot regular and LTS for Wheezy
        self.wheezy = WheezyPloter(self)
        self.wheezy.plot_wheezy_lts()
Ejemplo n.º 5
0
def test_pop(src2month, src2pop):
    ar1 = []
    ar2 = []
    for pkg in src2month:
        try:
            ar1.append(int(src2pop[pkg]))
        except KeyError:
            #print(pkg + ": no popularity data found!")
            continue
        total = sum(src2month[pkg])
        if total > 100:
            print(pkg + ', ' + str(total) + ', ' + src2pop[pkg])
        ar2.append(total)

    vulns_sorted_pop = [x for _, x in sorted(zip(ar1, ar2), reverse=True)]
    pop_xaxis = [y for y, _ in sorted(zip(ar1, ar2), reverse=True)]

    half_more_popular = sum(vulns_sorted_pop[:int(len(vulns_sorted_pop) / 2)])
    half_less_popular = sum(vulns_sorted_pop[int(len(vulns_sorted_pop) / 2):])

    print(half_more_popular)
    print(half_less_popular)

    print(pop_xaxis[0])
    print(pop_xaxis[len(pop_xaxis) - 1])

    print(src2pop)

    print(spearmanr(ar1, ar2))

    carlosplt.pre_paper_plot(True)

    plt.plot(vulns_sorted_pop)
    plt.ylabel('Number of vulnerabilities')
    plt.xlabel('Popularity ranking')
    carlosplt.post_paper_plot(True, True, True)

    plt.show()
Ejemplo n.º 6
0
print('xmin = ', results.power_law.xmin)
print('xmax = ', results.power_law.xmax)
print('sigma = ', results.power_law.sigma)
print('D = ', results.power_law.D)
print(results.truncated_power_law.xmin)
print('xmax = ', results.truncated_power_law.xmax)
print(results.power_law.discrete)
print('lognormal mu: ', results.lognormal.mu)
print('lognormal sigma: ', results.lognormal.sigma)

#custom_model=[]
#for i in sorted(mydata,reverse=True):
#    ccdf =

#fig=results.plot_pdf(color='b', linewidth=2)
carlosplt.pre_paper_plot(True)
fig = results.plot_ccdf(color='darkblue', linestyle='-', label='data')
results.power_law.plot_ccdf(color='darkgreen', ax=fig, label='power-law fit')
#results.truncated_power_law.plot_ccdf(color = 'red', ax=fig)
#results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig)
#results.lognormal.plot_ccdf(color = 'brown', ax=fig)
#results.exponential.plot_ccdf(color = 'orange', ax=fig)
plt.ylabel('ccdf')
plt.xlabel('Vulnerabilities')
fig.legend()
carlosplt.post_paper_plot(True, True, True)
plt.show()
R, p = results.distribution_compare('power_law', 'exponential')
print('Exponential: ', R, p)
R, p = results.distribution_compare('power_law', 'stretched_exponential')
print('Stretched exponential: ', R, p)
Ejemplo n.º 7
0
def plot_bounties(ff):
    reports_team = dict()
    sum_team = dict()
    with open("reports_team.json", "r") as fp:
        reports_team = json.load(fp)

    with open("sum_team.json", "r") as fp:
        sum_team = json.load(fp)

    if ff < 2:
        ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
        print('list follows')
        for j in ibb_list:
            print(reports_team[j])
    else:
        ibb_list = [team for team in reports_team]
    
    most_team = dict()
    sum_bounty_team = dict()
    for team in ibb_list:
        old = 0.0
        old_sum = 0.0
        for report in reports_team[team]:
            try:
                new = float(report['total_awarded_bounty_amount'])
                old_sum += new
            except KeyError:
                print('#'*80)
                print(report)
                print('Report id ', report['id'], ' - bounty not found')
                continue
            if new > old:
                old = new
        most_team[team] = old
        sum_bounty_team[team] = old_sum

    print(most_team)
    print(sum_bounty_team)

    month2sum = []
    month2money = []
    month2bountylist = []

    #Years: 2001-2018
    for i in range(12*18):
        month2sum.append(0)
        month2money.append(0.0)
        month2bountylist.append([])

    for team in ibb_list:
        for report in reports_team[team]:
            datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
            print(str(datetime_obj))
            month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
            try:
                #if report['severity_rating'] == "high":
                if (ff==0 or ff ==2) or (report['severity_rating'] == "high") or (report['severity_rating'] == "critical"):
                    month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
                    month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
            except KeyError:
                continue

    print(month2bountylist)

    #plt.plot(month2sum[-12*5:])
    #plt.show()
    
    #plt.plot(month2money[-12*5:])
    #plt.show()

    years = 18
    quarter_num = years*4
    quarter_sum = []
    quarter_av = []
    carlosplt.pre_paper_plot()

    quarter2bountylist = []
    
    
    quartersx = []
    for i in range(1,years+1):
        for j in range(1,5):
            if j==1:
                quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
            else:
                quartersx.append(' ')
    
    for j in range(quarter_num):
        temp2 = sum(month2money[3*j:3*(j+1)])
        temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
        temp1 = len(temp3)
        print(temp3)
        quarter_sum.append(temp1)
        quarter2bountylist.append(temp3)
        try:
            quarter_av.append(temp2/temp1)
        except ZeroDivisionError:
            quarter_av.append(0)


    n = len(quarter_sum[-4*5:])
    x = range(len(quarter_sum[-4*5:]))
    width = 1/2

    #plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='brown', label='Number', edgecolor='black')
    
    #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    #plt.ylabel('Number of rewards')
    #plt.xlabel('Quarter')
    #carlosplt.post_paper_plot(True,True,True)
    #plt.show()
    #
    #plt.bar(x[-4*5:], quarter_av[-4*5:], width, color='darkblue', label='regular support', edgecolor='black')
   # 
    #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    #plt.ylabel('Average bug price of IBB projects (USD)')
    #plt.xlabel('Quarter')
    #carlosplt.post_paper_plot(True,True,True)
    #plt.show()

    #print(quarter2bountylist)
    if ff==0:
        labeltext = 'IBB-all'
    elif ff==1:
        labeltext = 'IBB-high'
    elif ff==2:
        labeltext = 'All-all'
    elif ff==3:
        labeltext = 'All-high'
    
    ## Shapiro normality test for each quarter
    ## Added powerlaw test
    reference = []
    for i in quarter2bountylist:
        reference+=i
    print(reference)

    for i in quarter2bountylist:
        print(i)
        data = i
        if len(i)>3:
            #sns.distplot(i)
            #plt.show()
            stat, p = shapiro(data)
            print('Statistics=%.3f, p=%.3f' % (stat, p))
            # interpret
            alpha = 0.01
            if p > alpha:
                print('Sample looks Gaussian (fail to reject H0)')
            else:
                print('Sample does not look Gaussian (reject H0)')

            w,p = ks_2samp(i,reference)
            if p > alpha:
                print('Samples look similar')
            else:
                print('Samples do not look similar')
            #mydata = i
            #results=powerlaw.Fit(mydata, discrete=False, xmax=5000)
            #print('alpha = ',results.power_law.alpha)
            #print(results.truncated_power_law.alpha)
            #print('xmin = ',results.power_law.xmin)
            #print('xmax = ',results.power_law.xmax)
            #print('sigma = ',results.power_law.sigma)
            #print('D = ',results.power_law.D)
            #print(results.truncated_power_law.xmin)
            #print('xmax = ', results.truncated_power_law.xmax)
            #print(results.power_law.discrete)
            #print('lognormal mu: ',results.lognormal.mu)
            #print('lognormal sigma: ',results.lognormal.sigma)

    ## Linear regression of average and median
    # Average
    xx = []
    yy = quarter_av[-4*5:]
    y = []
    counter=0
    for i in yy:
        if i!=0:
            y.append(i)
            xx.append(counter)
        counter+=1
    
    X = sm.add_constant(xx)
    model = sm.OLS(y,X).fit()
    predictions = model.predict(X)
    plt.plot(xx,predictions)
    print(model.summary())
    print(model.summary().as_latex())
    
    xx = []
    yy = quarter2bountylist[-4*5:]
    y = []
    counter=0
    for i in yy:
        if i!=[]:
            y.append(median(i))
            xx.append(counter)
        counter+=1
    
    X = sm.add_constant(xx)

    model = sm.OLS(y,X).fit()
    predictions = model.predict(X)
    plt.plot(xx,predictions, color='darkred')
    print(model.summary())
    print(model.summary().as_latex())

    ## Create box plot
    bp = plt.boxplot((quarter2bountylist[-4*5:]), whis = [5,95], patch_artist=True, positions = x)
    plt.setp(bp['boxes'], color='black')
    plt.setp(bp['whiskers'], color='darkred')
    plt.setp(bp['caps'], color='darkred')
    plt.setp(bp['fliers'], markersize = 3.0)
    plt.yscale('log')
    plt.ylim(top=50000)
    plt.ylim(bottom=1)
    plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    plt.ylabel(labeltext)
    plt.xlabel('Quarter')
    carlosplt.post_paper_plot(True,True,True)
Ejemplo n.º 8
0
def plot_demographics(ff):
    if ff==0:
        labeltext = 'num - IBB'
    elif ff==1:
        labeltext = 'new - IBB'
    elif ff==2:
        labeltext = 'num - All'
    elif ff==3:
        labeltext = 'new - All'
    reports_team = dict()
    sum_team = dict()
    with open("reports_team.json", "r") as fp:
        reports_team = json.load(fp)

    with open("sum_team.json", "r") as fp:
        sum_team = json.load(fp)

    if ff < 2:
        ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
        print('list follows')
        for j in ibb_list:
            print(reports_team[j])
    else:
        ibb_list = [team for team in reports_team]
    
    most_team = dict()
    sum_bounty_team = dict()
    for team in ibb_list:
        old = 0.0
        old_sum = 0.0
        for report in reports_team[team]:
            try:
                new = float(report['total_awarded_bounty_amount'])
                old_sum += new
            except KeyError:
                print('#'*80)
                print(report)
                print('Report id ', report['id'], ' - bounty not found')
                continue
            if new > old:
                old = new
        most_team[team] = old
        sum_bounty_team[team] = old_sum

    print(most_team)
    print(sum_bounty_team)

    month2sum = []
    month2money = []
    month2bountylist = []
    month2newreporters = []
    repuntilnow = []

    #Years: 2001-2018
    for i in range(12*18):
        month2sum.append(0)
        month2newreporters.append(0)
        month2money.append(0.0)
        month2bountylist.append([])

    for team in ibb_list:
        for report in reports_team[team]:
            datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
            print(str(datetime_obj))
            month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
            try:
                reporter=report['reporter']['id']
                #if report['severity_rating'] == "high":
                month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
                month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
                if reporter not in repuntilnow:
                    month2newreporters[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
                    repuntilnow.append(reporter)
            except KeyError:
                print('Error with report ', report['id'])
                continue

    print(month2bountylist)

    #plt.plot(month2sum[-12*5:])
    #plt.show()
    
    #plt.plot(month2money[-12*5:])
    #plt.show()

    years = 18
    quarter_num = years*4
    quarter_sum = []
    quarter_av = []
    carlosplt.pre_paper_plot()

    quarter2bountylist = []
    
    
    quartersx = []
    for i in range(1,years+1):
        for j in range(1,5):
            if j==1:
                quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
            else:
                quartersx.append(' ')
    
    for j in range(quarter_num):
        temp2 = sum(month2money[3*j:3*(j+1)])
        temp4 = sum(month2newreporters[3*j:3*(j+1)])
        temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
        temp1 = len(temp3)
        if ff==1 or ff==3:
            quarter_sum.append(temp4)
        else:
            quarter_sum.append(temp1)

    n = len(quarter_sum[-4*5:])
    x = range(len(quarter_sum[-4*5:]))
    width = 1/2
    #print(quarter2bountylist)

    
    reference = []
    for i in quarter2bountylist:
        reference+=i
    print(reference)

    ## Create bars plot
    plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='darkblue', label='Number', edgecolor='black')
    
    plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    plt.ylabel(labeltext)
    plt.xlabel('Quarter')
    carlosplt.post_paper_plot(True,True,True)
Ejemplo n.º 9
0
    x = range(len(quarter_sum[-4*5:]))
    width = 1/2
    #print(quarter2bountylist)

    
    reference = []
    for i in quarter2bountylist:
        reference+=i
    print(reference)

    ## Create bars plot
    plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='darkblue', label='Number', edgecolor='black')
    
    plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
    plt.ylabel(labeltext)
    plt.xlabel('Quarter')
    carlosplt.post_paper_plot(True,True,True)


if __name__ == "__main__":
    #main()
    fig = plt.figure()
    carlosplt.pre_paper_plot()
    for i in range(4):
        ax = fig.add_subplot(2,2,i+1)
        plot_bounties(i)
        #plot_demographics(i)

    #plot_bounties(1)
    plt.show()