def netflix_uploaded_data(): input_file = 'hannibal_dump/chrome.csv' home_ip = '192.168.1.2' website = 'netflix.com' ad_dict = get_ad_streams(input_file, home_ip, website=website, is_incoming=False) # https://www.netflix.com/api/msl/NFCDCH-LX-/cadmium/pblifecycle stream_ids = set([]) for key in ad_dict: # print key for url in ad_dict[key]: if 'pblifecycle' in url: stream_ids.add(key) # if 'nflxvideo.net/range' in url: # stream_ids.add(key) # print '\t', url sizes, total = get_stream_sizes(input_file, home_ip, stream_ids) print 'Total data sent:', storage_formatter_factory( unit_speed=False)(total) print 'Percentage range data {0:0.1f}%'.format( sum(sizes) * 100 / float(total)) print 'Total ad data sent:', storage_formatter_factory(unit_speed=False)( sum(sizes))
def get_ad_sizes(print_urls=False): input_file = 'chrome_combined_dataset.csv' home_ip = '10.0.2.15' website = 'youtube.com' ad_dict = get_ad_streams(input_file, home_ip, website=website, is_incoming=False) qu = [] for stream_id, url_list in ad_dict.items(): if reduce(lambda x, y: x and y, map(is_ad, url_list), True): qu.append(stream_id) if print_urls: print stream_id, len(url_list) for url in url_list: print '\t', url sizes, total = get_stream_sizes(input_file, home_ip, qu) print 'Total ad data sent:', storage_formatter_factory(unit_speed=False)( sum(sizes)) print 'Total data sent:', storage_formatter_factory( unit_speed=False)(total) print 'Percentage ad data {0:0.1f}%'.format( sum(sizes) * 100 / float(total))
def cdf_plot(x_values, y_values, color='red', ax=None, is_log=False): if ax is None: fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) if is_log: plt.xscale('log') ax.set_xticks([ 0, 10, 100, 1024, 10 * 1024, 100 * 1024, 1024 * 1024, 10 * 1024 * 1024 ]) else: ax.set_xticks([ 0, 256 * 1024, 512 * 1024, 768 * 1024, 1024 * 1024, 1.25 * 1024 * 1024, 1.5 * 1024 * 1024, 1.75 * 1024 * 1024, 2 * 1024 * 1024 ]) # ax.set_xticks([0, 512 * 1024, 1024 * 1024, 1.5 * 1024 * 1024, 2 * 1024 * 1024, 2.5 * 1024 * 1024, 3 * 1024 * 1024, 3.5 * 1024 * 1024, 4 * 1024 * 1024]) ax.get_xaxis().set_major_formatter( FuncFormatter( storage_formatter_factory(unit_speed=True, decimal_places=2))) ax.set_ylabel('% Total') ax.set_xlabel('Bitrate') plt.tight_layout() line = plt.plot(x_values, y_values, color=color) plt.ylim(0, 1.0) return ax, line
def chunks_both_real_fake(show=False): input_file = 'chrome_combined_dataset.csv' fake_chunks = get_chunk_data_10(True) fake_chunk_sizes = map(lambda x: x[0], fake_chunks) fake_x_values = sorted(list(set(fake_chunk_sizes))) fake_y_values = [len(filter(lambda x: x <= chunk_size, fake_chunk_sizes)) for chunk_size in fake_x_values] fake_y_values = map(lambda x: x / float(len(fake_chunk_sizes)), fake_y_values) real_chunks = youtube_get_chunk_size_data(input_file) real_chunks = map(lambda x: x[0], reduce(lambda x, y: x + y, map(lambda x: x[1], real_chunks.items()), [])) real_x_values = sorted(list(set(real_chunks))) real_y_values = [len(filter(lambda x: x <= chunk_size, real_chunks)) for chunk_size in real_x_values] real_y_values = map(lambda x: x / float(len(real_chunks)), real_y_values) fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory())) ax.set_xticks(map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512])) fake_line = plt.plot(fake_x_values, fake_y_values, color='blue') real_line = plt.plot(real_x_values, real_y_values, color='red') plt.legend([fake_line[0], real_line[0]], ['Clustering', 'From URLs'], loc=4) plt.savefig('chunk_sizes_both.svg') if show: plt.show() plt.clf()
def chunks_both_android_chrome(show=False): threshold = 0.11 android_chunks = get_chunk_data_10(is_chrome=False, threshold=threshold) android_chunk_sizes = map(lambda x: x[0], android_chunks) android_x_values = sorted(list(set(android_chunk_sizes))) android_y_values = [len(filter(lambda x: x <= chunk_size, android_chunk_sizes)) for chunk_size in android_x_values] android_y_values = map(lambda x: x / float(len(android_chunk_sizes)), android_y_values) chrome_chunks = get_chunk_data_10(is_chrome=True, threshold=threshold) chrome_chunk_sizes = map(lambda x: x[0], chrome_chunks) chrome_x_values = sorted(list(set(chrome_chunk_sizes))) chrome_y_values = [len(filter(lambda x: x <= chunk_size, chrome_chunk_sizes)) for chunk_size in chrome_x_values] chrome_y_values = map(lambda x: x / float(len(chrome_chunk_sizes)), chrome_y_values) fig, ax = plt.subplots() ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory())) ax.set_xticks(map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072])) android_line = plt.plot(android_x_values, android_y_values, color='blue') chrome_line = plt.plot(chrome_x_values, chrome_y_values, color='red') plt.ylim(0., 1.) plt.xlim(0, 3072 * 1024) ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) plt.legend([android_line[0], chrome_line[0]], ['YouTube Android', 'YouTube Chrome'], loc=4) plt.savefig('chunk_sizes_android_chrome.svg') if show: plt.show() plt.clf()
def netflix_plot_chunk_sizes(show=False): android_file = 'netflix_chunks.csv' android_chunks = map(lambda x: x[0], netflix_android_get_chunk_size_data(android_file)) android_x_values = sorted(list(set(android_chunks))) android_y_values = [len(filter(lambda x: x <= chunk_size, android_chunks)) for chunk_size in android_x_values] android_y_values = map(lambda x: x / float(len(android_chunks)), android_y_values) chrome_file = 'hannibal_dump/chrome.csv' chrome_chunks = map(lambda x: x[0], netflix_chrome_get_chunk_size_data(chrome_file)) chrome_x_values = sorted(list(set(chrome_chunks))) chrome_y_values = [len(filter(lambda x: x <= chunk_size, chrome_chunks)) for chunk_size in chrome_x_values] chrome_y_values = map(lambda x: x / float(len(chrome_chunks)), chrome_y_values) fig, ax = plt.subplots() ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory())) ax.set_xticks(map(lambda x: x * 1024, [0, 256, 512, 768, 1024, 1024 + 256, 1024 + 512, 1024 + 768, 2048])) android_line = plt.plot(android_x_values, android_y_values, color='blue') chrome_line = plt.plot(chrome_x_values, chrome_y_values, color='red') ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) plt.legend([android_line[0], chrome_line[0]], ['Netflix Android', 'Netflix Chrome'], loc=4) plt.savefig('netflix_chunk_sizes_range.svg') if show: plt.show() plt.clf()
def get_ad_sizes(print_urls=False): input_file = "chrome_combined_dataset.csv" home_ip = "10.0.2.15" website = "youtube.com" ad_dict = get_ad_streams(input_file, home_ip, website=website, is_incoming=False) qu = [] for stream_id, url_list in ad_dict.items(): if reduce(lambda x, y: x and y, map(is_ad, url_list), True): qu.append(stream_id) if print_urls: print stream_id, len(url_list) for url in url_list: print "\t", url sizes, total = get_stream_sizes(input_file, home_ip, qu) print "Total ad data sent:", storage_formatter_factory(unit_speed=False)(sum(sizes)) print "Total data sent:", storage_formatter_factory(unit_speed=False)(total) print "Percentage ad data {0:0.1f}%".format(sum(sizes) * 100 / float(total))
def netflix_uploaded_data(): input_file = "hannibal_dump/chrome.csv" home_ip = "192.168.1.2" website = "netflix.com" ad_dict = get_ad_streams(input_file, home_ip, website=website, is_incoming=False) # https://www.netflix.com/api/msl/NFCDCH-LX-/cadmium/pblifecycle stream_ids = set([]) for key in ad_dict: # print key for url in ad_dict[key]: if "pblifecycle" in url: stream_ids.add(key) # if 'nflxvideo.net/range' in url: # stream_ids.add(key) # print '\t', url sizes, total = get_stream_sizes(input_file, home_ip, stream_ids) print "Total data sent:", storage_formatter_factory(unit_speed=False)(total) print "Percentage range data {0:0.1f}%".format(sum(sizes) * 100 / float(total)) print "Total ad data sent:", storage_formatter_factory(unit_speed=False)(sum(sizes))
def chunks_both_android_chrome(show=False): threshold = 0.11 android_chunks = get_chunk_data_10(is_chrome=False, threshold=threshold) android_chunk_sizes = map(lambda x: x[0], android_chunks) android_x_values = sorted(list(set(android_chunk_sizes))) android_y_values = [ len(filter(lambda x: x <= chunk_size, android_chunk_sizes)) for chunk_size in android_x_values ] android_y_values = map(lambda x: x / float(len(android_chunk_sizes)), android_y_values) chrome_chunks = get_chunk_data_10(is_chrome=True, threshold=threshold) chrome_chunk_sizes = map(lambda x: x[0], chrome_chunks) chrome_x_values = sorted(list(set(chrome_chunk_sizes))) chrome_y_values = [ len(filter(lambda x: x <= chunk_size, chrome_chunk_sizes)) for chunk_size in chrome_x_values ] chrome_y_values = map(lambda x: x / float(len(chrome_chunk_sizes)), chrome_y_values) fig, ax = plt.subplots() ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter( FuncFormatter(storage_formatter_factory())) ax.set_xticks( map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072])) android_line = plt.plot(android_x_values, android_y_values, color='blue') chrome_line = plt.plot(chrome_x_values, chrome_y_values, color='red') plt.ylim(0., 1.) plt.xlim(0, 3072 * 1024) ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) plt.legend([android_line[0], chrome_line[0]], ['YouTube Android', 'YouTube Chrome'], loc=4) plt.savefig('chunk_sizes_android_chrome.svg') if show: plt.show() plt.clf()
def save_cdf(x_values, y_values, threshold, is_chrome, show=False): fig, ax = plt.subplots() ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory())) ax.set_xticks(map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072])) plt.plot(x_values, y_values, color='blue') plt.ylim(0., 1.) plt.xlim(0, 3072 * 1024) ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) plt.savefig('cdf-fig-{}0ms-{}.svg'.format(int(threshold * 100), 'chrome' if is_chrome else 'android')) if show: plt.show() plt.clf()
def chunks_both_real_fake(show=False): input_file = 'chrome_combined_dataset.csv' fake_chunks = get_chunk_data_10(True) fake_chunk_sizes = map(lambda x: x[0], fake_chunks) fake_x_values = sorted(list(set(fake_chunk_sizes))) fake_y_values = [ len(filter(lambda x: x <= chunk_size, fake_chunk_sizes)) for chunk_size in fake_x_values ] fake_y_values = map(lambda x: x / float(len(fake_chunk_sizes)), fake_y_values) real_chunks = youtube_get_chunk_size_data(input_file) real_chunks = map( lambda x: x[0], reduce(lambda x, y: x + y, map(lambda x: x[1], real_chunks.items()), [])) real_x_values = sorted(list(set(real_chunks))) real_y_values = [ len(filter(lambda x: x <= chunk_size, real_chunks)) for chunk_size in real_x_values ] real_y_values = map(lambda x: x / float(len(real_chunks)), real_y_values) fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter( FuncFormatter(storage_formatter_factory())) ax.set_xticks( map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512])) fake_line = plt.plot(fake_x_values, fake_y_values, color='blue') real_line = plt.plot(real_x_values, real_y_values, color='red') plt.legend([fake_line[0], real_line[0]], ['Clustering', 'From URLs'], loc=4) plt.savefig('chunk_sizes_both.svg') if show: plt.show() plt.clf()
def cdf_plot(x_values, y_values, color='red', ax=None, is_log=False): if ax is None: fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) if is_log: plt.xscale('log') ax.set_xticks([0, 10, 100, 1024, 10 * 1024, 100 * 1024, 1024 * 1024, 10 * 1024 * 1024]) else: ax.set_xticks([0, 256 * 1024, 512 * 1024, 768 * 1024, 1024 * 1024, 1.25 * 1024 * 1024, 1.5 * 1024 * 1024, 1.75 * 1024 * 1024, 2 * 1024 * 1024]) # ax.set_xticks([0, 512 * 1024, 1024 * 1024, 1.5 * 1024 * 1024, 2 * 1024 * 1024, 2.5 * 1024 * 1024, 3 * 1024 * 1024, 3.5 * 1024 * 1024, 4 * 1024 * 1024]) ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory(unit_speed=True, decimal_places=2))) ax.set_ylabel('% Total') ax.set_xlabel('Bitrate') plt.tight_layout() line = plt.plot(x_values, y_values, color=color) plt.ylim(0, 1.0) return ax, line
def plot_packet_length(sorted_data, color='red', ax=None, use_log=False, storage_units=False): y_values = np.arange(len(sorted_data)) / float(len(sorted_data)) if ax is None: fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) ax.set_ylabel('% Total') ax.set_xlabel('Packet size in bytes') if use_log: plt.xscale('log') ax.set_xticks([0, 10, 100, 1024, 10 * 1024, 64 * 1024]) if storage_units: ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory())) plt.tight_layout() plt.plot(sorted_data, y_values, color=color) return ax
def save_cdf(x_values, y_values, threshold, is_chrome, show=False): fig, ax = plt.subplots() ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter( FuncFormatter(storage_formatter_factory())) ax.set_xticks( map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072])) plt.plot(x_values, y_values, color='blue') plt.ylim(0., 1.) plt.xlim(0, 3072 * 1024) ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) plt.savefig('cdf-fig-{}0ms-{}.svg'.format( int(threshold * 100), 'chrome' if is_chrome else 'android')) if show: plt.show() plt.clf()
def youtube_plot_chunk_sizes(show=False): input_file = 'chrome_combined_dataset.csv' itags = youtube_get_chunk_size_data(input_file) fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) ax.set_ylabel('% Total') ax.set_xlabel('Chunk size') ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory())) ax.set_xticks(map(lambda x: x * 1024, [0, 256, 512, 768, 1024, 1024 + 256, 1024 + 512, 1024 + 768, 2048])) tag_dict = { '243': 'video @ 524k', '244': 'video @ 798k', '251': 'audio @ 160k' } colors = ['orange', 'red', 'brown', 'green', 'blue'] types = [] averages = [] lines = [] for i, (itag, chunks) in enumerate(itags.items()): chunks = map(lambda x: x[0], chunks) if itag == '278' or itag == '250': continue x_values = sorted(list(set(chunks))) y_values = [len(filter(lambda x: x <= chunk_size, chunks)) for chunk_size in x_values] y_values = map(lambda x: x / float(len(chunks)), y_values) types.append(itag) average = sum(chunks) / float(len(chunks)) print itag, tag_dict[itag], average averages.append(average) lines.append(plt.plot(x_values, y_values, color=colors[i])) plt.legend(map(lambda x: x[0], lines), map(lambda x: tag_dict[x], types), loc=4) plt.tight_layout() plt.savefig('youtube_chunk_sizes_range.svg') if show: plt.show() plt.clf()
def plot_packet_length(sorted_data, color='red', ax=None, use_log=False, storage_units=False): y_values = np.arange(len(sorted_data)) / float(len(sorted_data)) if ax is None: fig, ax = plt.subplots() ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()]) ax.set_ylabel('% Total') ax.set_xlabel('Packet size in bytes') if use_log: plt.xscale('log') ax.set_xticks([0, 10, 100, 1024, 10 * 1024, 64 * 1024]) if storage_units: ax.get_xaxis().set_major_formatter( FuncFormatter(storage_formatter_factory())) plt.tight_layout() plt.plot(sorted_data, y_values, color=color) return ax