def plot_peak_ratio_cdf(rperdev1, rperdev2, agg_param, PLOTPATH): """ plot CDF of peak-ratio all, or peak-ratio per device """ fig1, ax1 = plt.subplots(1, 1) # x-axis: peak-ratio per device, y-axis: agg (param) ratio over days x1,y1 = getSortedCDF(rperdev1.values) x2,y2 = getSortedCDF(rperdev2.values) ax1.plot(x1, y1, marker='o', markevery=len(y1)//10, linestyle='--', label='treatment') ax1.plot(x2, y2, marker='d', markevery=len(y2)//10, linestyle='--', label='control') filename_label = agg_param.upper() #ax1.set_xlabel(agg_param + ' peak-ratio per device') ax1.set_xlabel('Peak-ratio per household') #ax1.set_xscale('log') #ax1.set_title("Distribution of peak-ratio (95%:average) per device aggregated "+agg_param+" over days") plotname = 'peakratio-CDF-devices-'+filename_label ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) #fig1.savefig(PLOTPATH + plotname) fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF') logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_cdf_all_bytes(test_full, control_full, PLOTPATH): fig1, ax1 = plt.subplots(1,1) x1, y1 = getSortedCDF( test_full['octets_passed'].values ) ax1.plot(x1, y1, marker='o', color='b', markevery=len(y1)//10, linestyle='--', label='treatment') x2, y2 = getSortedCDF( control_full['octets_passed'].values ) ax1.plot(x2, y2, marker='d', color='g', markevery=len(y2)//10, linestyle='--', label='control') #format_axes(ax1) #ax1.set_xscale('log') ax1.set_xlabel("Bytes Transferred") #ax1.set_ylabel('CDF') #ax1.set_title('All Bytes') plotname = 'cdf-all-bytes' ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) fig1.savefig(PLOTPATH + plotname +".pdf", format='PDF') #fig1.savefig(PLOTPATH + plotname) logger.info("CREATE FILE " + PLOTPATH + plotname) plt.close() return
def plot_peak_ratio_timeseries(rperday1, rperday2, agg_param, PLOTPATH): """ plot timeseries of peak-ratio (mean, max, median of devices) per day """ fig1, ax1 = plt.subplots(1, 1) # x-axis: DAY, y-axis: agg (param) ratio over devices rperday1.plot(ax=ax1, marker='o', linestyle='--', label='treatment') rperday2.plot(ax=ax1, marker='d', linestyle='--', label='control') filename_label = agg_param.upper() ax1.set_ylabel(agg_param+' peak-ratio') ax1.set_yscale('log') #ax1.set_title("Daily peak-ratio (95%:average) aggregated "+agg_param+" over devices") plotname = 'peakratio-timeseries-'+filename_label ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) #fig1.savefig(PLOTPATH + plotname) fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF') logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_initial_timeseries(g1, g2, param, PLOTPATH): """ plot timeseries of data rate (mean, max, median of devices) per datetime Input: pandas grouped by datetime; Index = datetime; Columns = throughput (over all devices) """ fig1, ax1 = plt.subplots(1, 1, figsize=(18,5)) # get the right timeseries if param in ['sum', 'max', 'min', 'median', 'mean']: ts1 = getattr(g1['throughput'], param)() ts2 = getattr(g2['throughput'], param)() elif param == 'perc90': ts1 = getattr(g1['throughput'], 'quantile')(0.95) ts2 = getattr(g2['throughput'], 'quantile')(0.95) param = 'perc95' else: logger.warning("unknown parameter to plot throughput timeseries") plt.close() return # plot the time series ts1.plot(ax=ax1, marker='o', alpha=.5, linestyle='', markersize=4, label='treatment') ts2.plot(ax=ax1, marker='d', alpha=.5, linestyle='', markersize=4, label='control') # save with a filename containing the aggregation parameter filename_label = param.upper() ax1.set_ylabel('Data Rate [kbps]') ax1.set_title(param+' Avg Data Rate') plotname = 'timeseries-throughput-'+filename_label ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) #fig1.savefig(PLOTPATH + plotname) fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF') logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_prevalence_total_devices(test_full, control_full, PLOTPATH): maxThresh = max( test_full['octets_passed'].max(), control_full['octets_passed'].max() ) minThresh = min( test_full['octets_passed'].min(), control_full['octets_passed'].min() ) stepThresh = (maxThresh - minThresh)/20 fig1, ax1 = plt.subplots(1,1) ctr = 0 c = ['b', 'g'] m = ['o', 'd'] lab = ['treatment', 'control'] for df in [test_full, control_full]: xdata = [] ydata = [] for THRESH in np.arange(minThresh, maxThresh, stepThresh): xdata.append(THRESH) sliced_df = df [ df['octets_passed'] >= THRESH ] num_dev = len( sliced_df['Device_number'].unique() ) ydata.append( num_dev ) ax1.plot(xdata, ydata, color=c[ctr], marker=m[ctr], label=lab[ctr]) ctr+=1 ax1.set_xscale('linear') ax1.set_xlabel('Threshold Bytes') ax1.set_ylabel('Number of Devices') ax1.set_yscale('log') #ax1.set_title("Prevalence: total devices") plotname = 'slice-dataset-threshold-count-ndevices' ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) fig1.savefig(PLOTPATH + plotname + '.pdf', format='PDF') #fig1.savefig(PLOTPATH + plotname) logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_cdf_per_device(test_full, control_full, PLOTPATH, groupBy=None, param1='max', param2='perc95'): ''' input: test, control df ['throughput', 'date, 'day', 'month'] groupBy = None, date param1 = max, perc95 param2 = mean, median ''' ## CDF max per device ## 95perc per device fig1, ax1 = plt.subplots(1,1) ctr = 0 c = ['b', 'g', 'k', 'r'] m = ['o', 'd'] lab = ['treatment', 'control'] perc95 = lambda x: x.quantile(0.95) for df in [test_full, control_full]: #xdata = df.groupby('Device_number')['speed'].max() if groupBy !=None: g = df.groupby(['Device_number', groupBy])['octets_passed'] else: g = df.groupby(['Device_number'])['octets_passed'] if param1 == 'perc95': xdata = getattr(g, 'quantile')(0.95) else: xdata = getattr(g, param1)() if param2 == 'perc95': xdata2 = getattr(g, 'quantile')(0.95) elif param2 == '': pass else: xdata2 = getattr(g, param2)() x,y = getSortedCDF(xdata) ax1.plot(x, y, color=c[ctr], marker=m[ctr], markevery=len(y)//10, label=lab[ctr]) if param2 != '': x,y = getSortedCDF(xdata2) ax1.plot(x, y, color=c[ctr+2], marker=m[ctr], ls='--', markevery=len(y)//10, label=lab[ctr]+'-'+param2) ctr+=1 #format_axes(ax1) #ax1.set_xscale('log') ax1.set_xlabel("Bytes Transferred") if groupBy == 'date': ax1.set_xlabel("Bytes Transferred per Day [kbps]") #ax1.set_ylabel('CDF') #ax1.set_title('Max per Device') if param2 =='' and groupBy: plotname = 'cdf-per-device_'+groupBy+'-'+param1 elif param2 == '': plotname = 'cdf-per-device-'+param1 elif groupBy: plotname = 'cdf-per-device_'+groupBy+'-'+param1+'_'+param2 else: plotname = 'cdf-per-device-'+param1+'_'+param2 ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) fig1.savefig(PLOTPATH + plotname +'.pdf', format='PDF') #fig1.savefig(PLOTPATH + plotname) logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_primetime_ratio_per_device(r_test, r_control, param, PLOTPATH): # CDF fig1, ax1 = plt.subplots(1,1) r_test_g = r_test.groupby('Device_number')['ratio'] r_control_g = r_control.groupby('Device_number')['ratio'] tit = param if param in ['mean', 'median', 'max']: r_t = getattr(r_test_g, param)() r_c = getattr(r_control_g, param)() elif param=='perc90': r_t = getattr(r_test_g, 'quantile')(0.95) r_c = getattr(r_control_g, 'quantile')(0.95) elif param=='all1': logger.debug("Plot: median, perc90") r_t2 = getattr(r_test_g, 'median')() r_c2 = getattr(r_control_g, 'median')() x,y = getSortedCDF(r_t2) ax1.plot(x, y, marker='o', color='k', linestyle='--', label='treatment-median', markevery=len(y)//10) x,y = getSortedCDF(r_c2) ax1.plot(x, y, marker='d', color='r', linestyle='--', label='control-median', markevery=len(y)//10) r_t = getattr(r_test_g, 'quantile')(0.95) r_c = getattr(r_control_g, 'quantile')(0.95) tit = 'median, perc95' elif param=='all2': logger.debug("Plot: mean, max") r_t2 = getattr(r_test_g, 'mean')() r_c2 = getattr(r_control_g, 'mean')() x,y = getSortedCDF(r_t2) ax1.plot(x, y, marker='o', color='k', linestyle='--', label='treatment-mean', markevery=len(y)//10) x,y = getSortedCDF(r_c2) ax1.plot(x, y, marker='d', color='r', linestyle='--', label='control-mean', markevery=len(y)//10) r_t = getattr(r_test_g, 'max')() r_c = getattr(r_control_g, 'max')() tit = 'mean, max' else: logger.debug("Plot all: median, perc90") r_t2 = getattr(r_test_g, 'median')() r_c2 = getattr(r_control_g, 'median')() x,y = getSortedCDF(r_t2) ax1.plot(x, y, marker='o', color='k', linestyle='--', label='treatment-median', markevery=len(y)//10) x,y = getSortedCDF(r_c2) ax1.plot(x, y, marker='d', color='r', linestyle='--', label='control-median', markevery=len(y)//10) r_t = getattr(r_test_g, 'quantile')(0.95) r_c = getattr(r_control_g, 'quantile')(0.95) x,y = getSortedCDF(r_t) ax1.plot(x, y, marker='o', color='b', label='treatment', markevery=len(y)//10) x,y = getSortedCDF(r_c) ax1.plot(x, y, marker='d', color='g', label='control', markevery=len(y)//10) ax1.set_xscale('log') ax1.set_xlabel("Prime-time Ratio") #ax1.set_ylabel('CDF') #ax1.set_title('Prime-time Ratio per Device - '+tit) filename_label=param.upper() plotname = 'prime-time-ratio-per-device-cdf-'+filename_label ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF') #fig1.savefig(PLOTPATH + plotname) logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_primetime_ratio_by_date(r_test, r_control, param, PLOTPATH): # Timeseries fig1, ax1 = plt.subplots(1, 1, figsize=(13,8)) tit = param lab = param r_test_g = r_test.groupby('date')['ratio'] r_control_g = r_control.groupby('date')['ratio'] if param in ['mean', 'median', 'max']: r_t = getattr(r_test_g, param)() r_c = getattr(r_control_g, param)() elif param=='perc90': r_t = getattr(r_test_g, 'quantile')(0.95) r_c = getattr(r_control_g, 'quantile')(0.95) elif param=='all1': logger.debug("Plot: median, perc90") r_t2 = getattr(r_test_g, 'median')() r_c2 = getattr(r_control_g, 'median')() r_t2.plot(ax=ax1, marker='o', color='k', linestyle='--', label='treatment-median') r_c2.plot(ax=ax1, marker='d', color='r', linestyle='--', label='control-median') r_t = getattr(r_test_g, 'quantile')(0.95) r_c = getattr(r_control_g, 'quantile')(0.95) lab = 'perc95' tit = 'median, perc95' elif param=='all2': logger.debug("Plot: mean, max") r_t2 = getattr(r_test_g, 'mean')() r_c2 = getattr(r_control_g, 'mean')() r_t2.plot(ax=ax1, marker='o', color='k', linestyle='--', label='treatment-mean') r_c2.plot(ax=ax1, marker='d', color='r', linestyle='--', label='control-mean') r_t = getattr(r_test_g, 'max')() r_c = getattr(r_control_g, 'max')() lab = 'max' tit = 'mean, max' else: logger.debug("Plot all: median, perc90") r_t2 = getattr(r_test_g, 'median')() r_c2 = getattr(r_control_g, 'median')() r_t2.plot(ax=ax1, marker='o', color='k', linestyle='--', label='treatment') r_c2.plot(ax=ax1, marker='d', color='r', linestyle='--', label='control') r_t = getattr(r_test_g, 'quantile')(0.95) r_c = getattr(r_control_g, 'quantile')(0.95) r_t.plot(ax=ax1, color='b', marker='o', label='treatment') r_c.plot(ax=ax1, color='g', marker='d', label='control') #ax1.set_ylabel('Prime-time ratio (log)') #ax1.set_yscale('log') ax1.set_title("Prime-time Ratio every Date - "+tit) filename_label=param.upper() plotname = 'prime-time-ratio-by-date-timeseries-'+filename_label ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF') #fig1.savefig(PLOTPATH + plotname) logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return
def plot_throughput_per_day(g1, g2, param_device, param_time, PLOTPATH): """ Same as above, just convert octets to throughput Input: pandas groupedby; Index=['time', 'day']; Columns=[sum, perc90, max, min, median, ...] param_device: denotes the agg of bytes in a particular datetime over devices for the timeseries param_time: denotes the agg over [week, time] group: median, mean, perc90, max """ fig1, ax1 = plt.subplots(1, 1, figsize=(18,8)) tit=param_time if param_time in ['mean', 'max', 'min', 'median']: ts1 = getattr(g1[param_device], param_time) * CONVERT_OCTETS ts2 = getattr(g2[param_device], param_time) * CONVERT_OCTETS ts1.plot(ax=ax1, color='b', linestyle='-.', linewidth=3, label='treatment-'+param_time) ts2.plot(ax=ax1, color='g', linestyle='-.', linewidth=3, label='control-'+param_time) elif param_time == 'perc90': param_time = 'perc95' ts1 = g1[param_device].quantile(0.95)* CONVERT_OCTETS ts2 = g2[param_device],quantile(0.95)* CONVERT_OCTETS ts1.plot(ax=ax1, color='b', linestyle='-.', linewidth=3, label='treatment-'+param_time) ts2.plot(ax=ax1, color='g', linestyle='-.', linewidth=3, label='control-'+param_time) elif param_time == 'all1': logger.debug("plot perc90, median") tit = 'perc95, median' (g1[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle='-', label='treatment-perc95') (g2[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle='-', label='control-perc95') (g1[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='--', linewidth=2, label='treatment-median') (g2[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='--', linewidth=2, label='control-median') elif param_time == 'all2': logger.debug("plot max, mean") tit = 'max, mean' (g1[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle='-', label='treatment-max') (g2[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle='-', label='control-max') (g1[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='--', linewidth=2, label='treatment-mean') (g2[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='--', linewidth=2, label='control-mean') else: logger.debug("no param_time selected so plot 90-%ile, median, max, mean over time") (g1[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle='-.', linewidth=3, label='treatment') (g2[param_device].max() * CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle='-.', linewidth=3, label='control') (g1[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='-', label='treatment') (g2[param_device].quantile(0.95)* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='-', label='control') (g1[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='b', linestyle=':', linewidth=3, label='treatment') (g2[param_device].mean()* CONVERT_OCTETS).plot(ax=ax1, color='g', linestyle=':', linewidth=3, label='control') (g1[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='k', linestyle='--', linewidth=2, label='treatment') (g2[param_device].median()* CONVERT_OCTETS).plot(ax=ax1, color='r', linestyle='--', linewidth=2, label='control') ax1.set_ylabel(param_time + '$_{time}$ '+param_device+'$_{device}$ Data Rate [kbps]') ax1.set_title(tit + " Data Rate in a 15 min slot") filename_label = param_time.upper() plotname = 'describe-total-throughput-per-day-'+filename_label ax1.grid(1) ax1.legend(loc='best') fig1.tight_layout() if LATEXIFY: format_axes(ax1) fig1.savefig(PLOTPATH + plotname+'.pdf', format='PDF') #fig1.savefig(PLOTPATH + plotname) logger.info("CREATE FILE "+PLOTPATH + plotname) plt.close() return