Esempio n. 1
0
def plot_peak_ratio_cdf(rperdev1, rperdev2, agg_param, PLOTPATH):
    """
    plot CDF of peak-ratio all, or peak-ratio per device
    """

    fig1, ax1 = plt.subplots(1, 1)

    # x-axis: peak-ratio per device, y-axis: agg (param) ratio over days
    x1,y1 = getSortedCDF(rperdev1.values)
    x2,y2 = getSortedCDF(rperdev2.values)
    ax1.plot(x1, y1, marker='o', markevery=len(y1)//10, linestyle='--', label='treatment')
    ax1.plot(x2, y2, marker='d', markevery=len(y2)//10, linestyle='--', label='control')

    filename_label = agg_param.upper()
    #ax1.set_xlabel(agg_param + ' peak-ratio per device')
    ax1.set_xlabel('Peak-ratio per household')
    #ax1.set_xscale('log')
    #ax1.set_title("Distribution of peak-ratio (95%:average) per device aggregated "+agg_param+" over days")

    plotname = 'peakratio-CDF-devices-'+filename_label
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    #fig1.savefig(PLOTPATH + plotname)
    fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF')
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 2
0
def plot_cdf_all_bytes(test_full, control_full, PLOTPATH):
    fig1, ax1 = plt.subplots(1,1)

    x1, y1 = getSortedCDF( test_full['octets_passed'].values )
    ax1.plot(x1, y1, marker='o', color='b', markevery=len(y1)//10, linestyle='--', label='treatment')
    x2, y2 = getSortedCDF( control_full['octets_passed'].values )
    ax1.plot(x2, y2, marker='d', color='g', markevery=len(y2)//10, linestyle='--', label='control')

    #format_axes(ax1)
    #ax1.set_xscale('log')
    ax1.set_xlabel("Bytes Transferred")
    #ax1.set_ylabel('CDF')
    #ax1.set_title('All Bytes')

    plotname = 'cdf-all-bytes'
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    fig1.savefig(PLOTPATH + plotname +".pdf", format='PDF')
    #fig1.savefig(PLOTPATH + plotname)
    logger.info("CREATE FILE " + PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 3
0
def plot_peak_ratio_timeseries(rperday1, rperday2, agg_param, PLOTPATH):
    """
    plot timeseries of peak-ratio (mean, max, median of devices) per day
    """

    fig1, ax1 = plt.subplots(1, 1)

    # x-axis: DAY, y-axis: agg (param) ratio over devices
    rperday1.plot(ax=ax1, marker='o', linestyle='--', label='treatment')
    rperday2.plot(ax=ax1, marker='d', linestyle='--', label='control')

    filename_label = agg_param.upper()
    ax1.set_ylabel(agg_param+' peak-ratio')
    ax1.set_yscale('log')
    #ax1.set_title("Daily peak-ratio (95%:average) aggregated "+agg_param+" over devices")

    plotname = 'peakratio-timeseries-'+filename_label
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    #fig1.savefig(PLOTPATH + plotname)
    fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF')
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 4
0
def plot_initial_timeseries(g1, g2, param, PLOTPATH):
    """
    plot timeseries of data rate (mean, max, median of devices) per datetime
    Input: pandas grouped by datetime; Index = datetime; Columns = throughput (over all devices)
    """

    fig1, ax1 = plt.subplots(1, 1, figsize=(18,5))

    # get the right timeseries
    if param in ['sum', 'max', 'min', 'median', 'mean']:
        ts1 = getattr(g1['throughput'], param)()
        ts2 = getattr(g2['throughput'], param)()
    elif param == 'perc90':
        ts1 = getattr(g1['throughput'], 'quantile')(0.95)
        ts2 = getattr(g2['throughput'], 'quantile')(0.95)
        param = 'perc95'
    else:
        logger.warning("unknown parameter to plot throughput timeseries")
        plt.close()
        return

    # plot the time series
    ts1.plot(ax=ax1, marker='o', alpha=.5, linestyle='', markersize=4, label='treatment')
    ts2.plot(ax=ax1, marker='d', alpha=.5, linestyle='', markersize=4, label='control')

    # save with a filename containing the aggregation parameter
    filename_label = param.upper()
    ax1.set_ylabel('Data Rate [kbps]')
    ax1.set_title(param+' Avg Data Rate')

    plotname = 'timeseries-throughput-'+filename_label
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    #fig1.savefig(PLOTPATH + plotname)
    fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF')
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 5
0
def plot_prevalence_total_devices(test_full, control_full, PLOTPATH):

    maxThresh = max( test_full['octets_passed'].max(), control_full['octets_passed'].max() )
    minThresh = min( test_full['octets_passed'].min(), control_full['octets_passed'].min() )
    stepThresh = (maxThresh - minThresh)/20

    fig1, ax1 = plt.subplots(1,1)
    ctr = 0
    c = ['b', 'g']
    m = ['o', 'd']
    lab = ['treatment', 'control']
    for df in [test_full, control_full]:
        xdata = []
        ydata = []
        for THRESH in np.arange(minThresh, maxThresh, stepThresh):
            xdata.append(THRESH)
            sliced_df = df [ df['octets_passed'] >= THRESH ]
            num_dev = len( sliced_df['Device_number'].unique() )
            ydata.append( num_dev )
        ax1.plot(xdata, ydata, color=c[ctr], marker=m[ctr], label=lab[ctr])
        ctr+=1
    ax1.set_xscale('linear')
    ax1.set_xlabel('Threshold Bytes')
    ax1.set_ylabel('Number of Devices')
    ax1.set_yscale('log')
    #ax1.set_title("Prevalence: total devices")

    plotname = 'slice-dataset-threshold-count-ndevices'
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    fig1.savefig(PLOTPATH + plotname + '.pdf', format='PDF')
    #fig1.savefig(PLOTPATH + plotname)
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 6
0
def plot_cdf_per_device(test_full, control_full, PLOTPATH, groupBy=None, param1='max', param2='perc95'):
    '''
    input: test, control df ['throughput', 'date, 'day', 'month']
    groupBy = None, date
    param1 = max, perc95
    param2 = mean, median
    '''
    ## CDF max per device
    ## 95perc per device
    fig1, ax1 = plt.subplots(1,1)
    ctr = 0
    c = ['b', 'g', 'k', 'r']
    m = ['o', 'd']
    lab = ['treatment', 'control']

    perc95 = lambda x: x.quantile(0.95)

    for df in [test_full, control_full]:
        #xdata = df.groupby('Device_number')['speed'].max()
        if groupBy !=None:
            g = df.groupby(['Device_number', groupBy])['octets_passed']
        else:
            g = df.groupby(['Device_number'])['octets_passed']
        if param1 == 'perc95':
            xdata = getattr(g, 'quantile')(0.95)
        else:
            xdata = getattr(g, param1)()
        if param2 == 'perc95':
            xdata2 = getattr(g, 'quantile')(0.95)
        elif param2 == '':
            pass
        else:
            xdata2 = getattr(g, param2)()
        x,y = getSortedCDF(xdata)
        ax1.plot(x, y, color=c[ctr], marker=m[ctr], markevery=len(y)//10, label=lab[ctr])
        if param2 != '':
            x,y = getSortedCDF(xdata2)
            ax1.plot(x, y, color=c[ctr+2], marker=m[ctr], ls='--', markevery=len(y)//10, label=lab[ctr]+'-'+param2)
        ctr+=1
    #format_axes(ax1)
    #ax1.set_xscale('log')
    ax1.set_xlabel("Bytes Transferred")
    if groupBy == 'date':
        ax1.set_xlabel("Bytes Transferred per Day [kbps]")

    #ax1.set_ylabel('CDF')
    #ax1.set_title('Max per Device')

    if param2 =='' and groupBy:
        plotname = 'cdf-per-device_'+groupBy+'-'+param1
    elif param2 == '':
        plotname = 'cdf-per-device-'+param1
    elif groupBy:
        plotname = 'cdf-per-device_'+groupBy+'-'+param1+'_'+param2
    else:
        plotname = 'cdf-per-device-'+param1+'_'+param2
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    fig1.savefig(PLOTPATH + plotname +'.pdf', format='PDF')
    #fig1.savefig(PLOTPATH + plotname)
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 7
0
def plot_primetime_ratio_per_device(r_test, r_control, param, PLOTPATH):
    # CDF
    fig1, ax1 = plt.subplots(1,1)
    r_test_g = r_test.groupby('Device_number')['ratio']
    r_control_g = r_control.groupby('Device_number')['ratio']
    tit = param

    if param in ['mean', 'median', 'max']:
        r_t = getattr(r_test_g, param)()
        r_c = getattr(r_control_g, param)()
    elif param=='perc90':
        r_t = getattr(r_test_g, 'quantile')(0.95)
        r_c = getattr(r_control_g, 'quantile')(0.95)

    elif param=='all1':
        logger.debug("Plot: median, perc90")
        r_t2 = getattr(r_test_g, 'median')()
        r_c2 = getattr(r_control_g, 'median')()
        x,y = getSortedCDF(r_t2)
        ax1.plot(x, y, marker='o', color='k', linestyle='--', label='treatment-median', markevery=len(y)//10)
        x,y = getSortedCDF(r_c2)
        ax1.plot(x, y, marker='d', color='r', linestyle='--', label='control-median', markevery=len(y)//10)

        r_t = getattr(r_test_g, 'quantile')(0.95)
        r_c = getattr(r_control_g, 'quantile')(0.95)
        tit = 'median, perc95'

    elif param=='all2':
        logger.debug("Plot: mean, max")
        r_t2 = getattr(r_test_g, 'mean')()
        r_c2 = getattr(r_control_g, 'mean')()
        x,y = getSortedCDF(r_t2)
        ax1.plot(x, y, marker='o', color='k', linestyle='--', label='treatment-mean', markevery=len(y)//10)
        x,y = getSortedCDF(r_c2)
        ax1.plot(x, y, marker='d', color='r', linestyle='--', label='control-mean', markevery=len(y)//10)

        r_t = getattr(r_test_g, 'max')()
        r_c = getattr(r_control_g, 'max')()
        tit = 'mean, max'

    else:
        logger.debug("Plot all: median, perc90")
        r_t2 = getattr(r_test_g, 'median')()
        r_c2 = getattr(r_control_g, 'median')()
        x,y = getSortedCDF(r_t2)
        ax1.plot(x, y, marker='o', color='k', linestyle='--', label='treatment-median', markevery=len(y)//10)
        x,y = getSortedCDF(r_c2)
        ax1.plot(x, y, marker='d', color='r', linestyle='--', label='control-median', markevery=len(y)//10)

        r_t = getattr(r_test_g, 'quantile')(0.95)
        r_c = getattr(r_control_g, 'quantile')(0.95)

    x,y = getSortedCDF(r_t)
    ax1.plot(x, y, marker='o', color='b', label='treatment', markevery=len(y)//10)
    x,y = getSortedCDF(r_c)
    ax1.plot(x, y, marker='d', color='g', label='control', markevery=len(y)//10)

    ax1.set_xscale('log')
    ax1.set_xlabel("Prime-time Ratio")
    #ax1.set_ylabel('CDF')
    #ax1.set_title('Prime-time Ratio per Device - '+tit)

    filename_label=param.upper()
    plotname = 'prime-time-ratio-per-device-cdf-'+filename_label
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF')
    #fig1.savefig(PLOTPATH + plotname)
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 8
0
def plot_primetime_ratio_by_date(r_test, r_control, param, PLOTPATH):
    # Timeseries
    fig1, ax1 = plt.subplots(1, 1, figsize=(13,8))
    tit = param
    lab = param

    r_test_g = r_test.groupby('date')['ratio']
    r_control_g = r_control.groupby('date')['ratio']

    if param in ['mean', 'median', 'max']:
        r_t = getattr(r_test_g, param)()
        r_c = getattr(r_control_g, param)()

    elif param=='perc90':
        r_t = getattr(r_test_g, 'quantile')(0.95)
        r_c = getattr(r_control_g, 'quantile')(0.95)

    elif param=='all1':
        logger.debug("Plot: median, perc90")
        r_t2 = getattr(r_test_g, 'median')()
        r_c2 = getattr(r_control_g, 'median')()
        r_t2.plot(ax=ax1, marker='o', color='k', linestyle='--', label='treatment-median')
        r_c2.plot(ax=ax1, marker='d', color='r', linestyle='--', label='control-median')

        r_t = getattr(r_test_g, 'quantile')(0.95)
        r_c = getattr(r_control_g, 'quantile')(0.95)
        lab = 'perc95'
        tit = 'median, perc95'

    elif param=='all2':
        logger.debug("Plot: mean, max")
        r_t2 = getattr(r_test_g, 'mean')()
        r_c2 = getattr(r_control_g, 'mean')()
        r_t2.plot(ax=ax1, marker='o', color='k', linestyle='--', label='treatment-mean')
        r_c2.plot(ax=ax1, marker='d', color='r', linestyle='--', label='control-mean')

        r_t = getattr(r_test_g, 'max')()
        r_c = getattr(r_control_g, 'max')()
        lab = 'max'
        tit = 'mean, max'

    else:
        logger.debug("Plot all: median, perc90")
        r_t2 = getattr(r_test_g, 'median')()
        r_c2 = getattr(r_control_g, 'median')()
        r_t2.plot(ax=ax1, marker='o', color='k', linestyle='--', label='treatment')
        r_c2.plot(ax=ax1, marker='d', color='r', linestyle='--', label='control')

        r_t = getattr(r_test_g, 'quantile')(0.95)
        r_c = getattr(r_control_g, 'quantile')(0.95)

    r_t.plot(ax=ax1, color='b', marker='o', label='treatment')
    r_c.plot(ax=ax1, color='g', marker='d', label='control')

    #ax1.set_ylabel('Prime-time ratio (log)')
    #ax1.set_yscale('log')
    ax1.set_title("Prime-time Ratio every Date - "+tit)

    filename_label=param.upper()
    plotname = 'prime-time-ratio-by-date-timeseries-'+filename_label
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF')
    #fig1.savefig(PLOTPATH + plotname)
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return
Esempio n. 9
0
def plot_throughput_per_day(g1, g2, param_device, param_time, PLOTPATH):
    """
    Same as above, just convert octets to throughput
    Input: pandas groupedby; Index=['time', 'day']; Columns=[sum, perc90, max, min, median, ...]
    param_device: denotes the agg of bytes in a particular datetime over devices for the timeseries
    param_time: denotes the agg over [week, time] group: median, mean, perc90, max
    """

    fig1, ax1 = plt.subplots(1, 1, figsize=(18,8))
    tit=param_time

    if param_time in ['mean', 'max', 'min', 'median']:
        ts1 = getattr(g1[param_device], param_time) * CONVERT_OCTETS
        ts2 = getattr(g2[param_device], param_time) * CONVERT_OCTETS

        ts1.plot(ax=ax1, color='b', linestyle='-.', linewidth=3, label='treatment-'+param_time)
        ts2.plot(ax=ax1, color='g', linestyle='-.', linewidth=3, label='control-'+param_time)

    elif param_time == 'perc90':
        param_time = 'perc95'
        ts1 = g1[param_device].quantile(0.95)* CONVERT_OCTETS
        ts2 = g2[param_device],quantile(0.95)* CONVERT_OCTETS

        ts1.plot(ax=ax1, color='b', linestyle='-.', linewidth=3, label='treatment-'+param_time)
        ts2.plot(ax=ax1, color='g', linestyle='-.', linewidth=3, label='control-'+param_time)

    elif param_time == 'all1':
        logger.debug("plot perc90, median")
        tit = 'perc95, median'

        (g1[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle='-', label='treatment-perc95')
        (g2[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle='-', label='control-perc95')

        (g1[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='--', linewidth=2, label='treatment-median')
        (g2[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='--', linewidth=2, label='control-median')

    elif param_time == 'all2':
        logger.debug("plot max, mean")
        tit = 'max, mean'

        (g1[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle='-', label='treatment-max')
        (g2[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle='-', label='control-max')

        (g1[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='--', linewidth=2, label='treatment-mean')
        (g2[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='--', linewidth=2, label='control-mean')

    else:
        logger.debug("no param_time selected so plot 90-%ile, median, max, mean over time")

        (g1[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle='-.', linewidth=3, label='treatment')
        (g2[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle='-.', linewidth=3, label='control')

        (g1[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='-', label='treatment')
        (g2[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='-', label='control')

        (g1[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle=':', linewidth=3, label='treatment')
        (g2[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle=':', linewidth=3, label='control')

        (g1[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='--', linewidth=2, label='treatment')
        (g2[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='--', linewidth=2, label='control')

    ax1.set_ylabel(param_time + '$_{time}$ '+param_device+'$_{device}$ Data Rate [kbps]')
    ax1.set_title(tit + " Data Rate in a 15 min slot")

    filename_label = param_time.upper()
    plotname = 'describe-total-throughput-per-day-'+filename_label
    ax1.grid(1)
    ax1.legend(loc='best')
    fig1.tight_layout()
    if LATEXIFY:
        format_axes(ax1)
    fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF')
    #fig1.savefig(PLOTPATH + plotname)
    logger.info("CREATE FILE "+PLOTPATH + plotname)
    plt.close()
    return