예제 #1
0
def netflix_uploaded_data():
    input_file = 'hannibal_dump/chrome.csv'
    home_ip = '192.168.1.2'
    website = 'netflix.com'
    ad_dict = get_ad_streams(input_file,
                             home_ip,
                             website=website,
                             is_incoming=False)

    # https://www.netflix.com/api/msl/NFCDCH-LX-/cadmium/pblifecycle

    stream_ids = set([])
    for key in ad_dict:
        # print key
        for url in ad_dict[key]:
            if 'pblifecycle' in url:
                stream_ids.add(key)
            # if 'nflxvideo.net/range' in url:
            #     stream_ids.add(key)
        #     print '\t', url
    sizes, total = get_stream_sizes(input_file, home_ip, stream_ids)
    print 'Total data sent:', storage_formatter_factory(
        unit_speed=False)(total)
    print 'Percentage range data {0:0.1f}%'.format(
        sum(sizes) * 100 / float(total))
    print 'Total ad data sent:', storage_formatter_factory(unit_speed=False)(
        sum(sizes))
예제 #2
0
def get_ad_sizes(print_urls=False):
    input_file = 'chrome_combined_dataset.csv'
    home_ip = '10.0.2.15'
    website = 'youtube.com'

    ad_dict = get_ad_streams(input_file,
                             home_ip,
                             website=website,
                             is_incoming=False)
    qu = []
    for stream_id, url_list in ad_dict.items():
        if reduce(lambda x, y: x and y, map(is_ad, url_list), True):
            qu.append(stream_id)
            if print_urls:
                print stream_id, len(url_list)
                for url in url_list:
                    print '\t', url

    sizes, total = get_stream_sizes(input_file, home_ip, qu)

    print 'Total ad data sent:', storage_formatter_factory(unit_speed=False)(
        sum(sizes))
    print 'Total data sent:', storage_formatter_factory(
        unit_speed=False)(total)
    print 'Percentage ad data {0:0.1f}%'.format(
        sum(sizes) * 100 / float(total))
예제 #3
0
def cdf_plot(x_values, y_values, color='red', ax=None, is_log=False):
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    if is_log:
        plt.xscale('log')
        ax.set_xticks([
            0, 10, 100, 1024, 10 * 1024, 100 * 1024, 1024 * 1024,
            10 * 1024 * 1024
        ])
    else:
        ax.set_xticks([
            0, 256 * 1024, 512 * 1024, 768 * 1024, 1024 * 1024,
            1.25 * 1024 * 1024, 1.5 * 1024 * 1024, 1.75 * 1024 * 1024,
            2 * 1024 * 1024
        ])
        # ax.set_xticks([0, 512 * 1024, 1024 * 1024, 1.5 * 1024 * 1024, 2 * 1024 * 1024, 2.5 * 1024 * 1024, 3 * 1024 * 1024, 3.5 * 1024 * 1024, 4 * 1024 * 1024])

    ax.get_xaxis().set_major_formatter(
        FuncFormatter(
            storage_formatter_factory(unit_speed=True, decimal_places=2)))

    ax.set_ylabel('% Total')
    ax.set_xlabel('Bitrate')
    plt.tight_layout()
    line = plt.plot(x_values, y_values, color=color)
    plt.ylim(0, 1.0)
    return ax, line
예제 #4
0
def chunks_both_real_fake(show=False):
    input_file = 'chrome_combined_dataset.csv'
    fake_chunks = get_chunk_data_10(True)
    fake_chunk_sizes = map(lambda x: x[0], fake_chunks)

    fake_x_values = sorted(list(set(fake_chunk_sizes)))
    fake_y_values = [len(filter(lambda x: x <= chunk_size, fake_chunk_sizes)) for chunk_size in fake_x_values]
    fake_y_values = map(lambda x: x / float(len(fake_chunk_sizes)), fake_y_values)

    real_chunks = youtube_get_chunk_size_data(input_file)
    real_chunks = map(lambda x: x[0], reduce(lambda x, y: x + y, map(lambda x: x[1], real_chunks.items()), []))

    real_x_values = sorted(list(set(real_chunks)))
    real_y_values = [len(filter(lambda x: x <= chunk_size, real_chunks)) for chunk_size in real_x_values]
    real_y_values = map(lambda x: x / float(len(real_chunks)), real_y_values)

    fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512]))

    fake_line = plt.plot(fake_x_values, fake_y_values, color='blue')
    real_line = plt.plot(real_x_values, real_y_values, color='red')

    plt.legend([fake_line[0], real_line[0]], ['Clustering', 'From URLs'], loc=4)

    plt.savefig('chunk_sizes_both.svg')
    if show:
        plt.show()
    plt.clf()
예제 #5
0
def chunks_both_android_chrome(show=False):
    threshold = 0.11
    android_chunks = get_chunk_data_10(is_chrome=False, threshold=threshold)
    android_chunk_sizes = map(lambda x: x[0], android_chunks)
    android_x_values = sorted(list(set(android_chunk_sizes)))
    android_y_values = [len(filter(lambda x: x <= chunk_size, android_chunk_sizes)) for chunk_size in android_x_values]
    android_y_values = map(lambda x: x / float(len(android_chunk_sizes)), android_y_values)

    chrome_chunks = get_chunk_data_10(is_chrome=True, threshold=threshold)
    chrome_chunk_sizes = map(lambda x: x[0], chrome_chunks)
    chrome_x_values = sorted(list(set(chrome_chunk_sizes)))
    chrome_y_values = [len(filter(lambda x: x <= chunk_size, chrome_chunk_sizes)) for chunk_size in chrome_x_values]
    chrome_y_values = map(lambda x: x / float(len(chrome_chunk_sizes)), chrome_y_values)

    fig, ax = plt.subplots()

    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072]))

    android_line = plt.plot(android_x_values, android_y_values, color='blue')
    chrome_line = plt.plot(chrome_x_values, chrome_y_values, color='red')

    plt.ylim(0., 1.)
    plt.xlim(0, 3072 * 1024)
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])

    plt.legend([android_line[0], chrome_line[0]], ['YouTube Android', 'YouTube Chrome'], loc=4)

    plt.savefig('chunk_sizes_android_chrome.svg')
    if show:
        plt.show()
    plt.clf()
예제 #6
0
def netflix_plot_chunk_sizes(show=False):
    android_file = 'netflix_chunks.csv'
    android_chunks = map(lambda x: x[0], netflix_android_get_chunk_size_data(android_file))
    android_x_values = sorted(list(set(android_chunks)))
    android_y_values = [len(filter(lambda x: x <= chunk_size, android_chunks)) for chunk_size in android_x_values]
    android_y_values = map(lambda x: x / float(len(android_chunks)), android_y_values)

    chrome_file = 'hannibal_dump/chrome.csv'
    chrome_chunks = map(lambda x: x[0], netflix_chrome_get_chunk_size_data(chrome_file))
    chrome_x_values = sorted(list(set(chrome_chunks)))
    chrome_y_values = [len(filter(lambda x: x <= chunk_size, chrome_chunks)) for chunk_size in chrome_x_values]
    chrome_y_values = map(lambda x: x / float(len(chrome_chunks)), chrome_y_values)

    fig, ax = plt.subplots()
    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(map(lambda x: x * 1024, [0, 256, 512, 768, 1024, 1024 + 256, 1024 + 512, 1024 + 768, 2048]))

    android_line = plt.plot(android_x_values, android_y_values, color='blue')
    chrome_line = plt.plot(chrome_x_values, chrome_y_values, color='red')

    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])

    plt.legend([android_line[0], chrome_line[0]], ['Netflix Android', 'Netflix Chrome'], loc=4)
    plt.savefig('netflix_chunk_sizes_range.svg')
    if show:
        plt.show()
    plt.clf()
예제 #7
0
def get_ad_sizes(print_urls=False):
    input_file = "chrome_combined_dataset.csv"
    home_ip = "10.0.2.15"
    website = "youtube.com"

    ad_dict = get_ad_streams(input_file, home_ip, website=website, is_incoming=False)
    qu = []
    for stream_id, url_list in ad_dict.items():
        if reduce(lambda x, y: x and y, map(is_ad, url_list), True):
            qu.append(stream_id)
            if print_urls:
                print stream_id, len(url_list)
                for url in url_list:
                    print "\t", url

    sizes, total = get_stream_sizes(input_file, home_ip, qu)

    print "Total ad data sent:", storage_formatter_factory(unit_speed=False)(sum(sizes))
    print "Total data sent:", storage_formatter_factory(unit_speed=False)(total)
    print "Percentage ad data {0:0.1f}%".format(sum(sizes) * 100 / float(total))
예제 #8
0
def netflix_uploaded_data():
    input_file = "hannibal_dump/chrome.csv"
    home_ip = "192.168.1.2"
    website = "netflix.com"
    ad_dict = get_ad_streams(input_file, home_ip, website=website, is_incoming=False)

    # https://www.netflix.com/api/msl/NFCDCH-LX-/cadmium/pblifecycle

    stream_ids = set([])
    for key in ad_dict:
        # print key
        for url in ad_dict[key]:
            if "pblifecycle" in url:
                stream_ids.add(key)
            # if 'nflxvideo.net/range' in url:
            #     stream_ids.add(key)
        #     print '\t', url
    sizes, total = get_stream_sizes(input_file, home_ip, stream_ids)
    print "Total data sent:", storage_formatter_factory(unit_speed=False)(total)
    print "Percentage range data {0:0.1f}%".format(sum(sizes) * 100 / float(total))
    print "Total ad data sent:", storage_formatter_factory(unit_speed=False)(sum(sizes))
예제 #9
0
def chunks_both_android_chrome(show=False):
    threshold = 0.11
    android_chunks = get_chunk_data_10(is_chrome=False, threshold=threshold)
    android_chunk_sizes = map(lambda x: x[0], android_chunks)
    android_x_values = sorted(list(set(android_chunk_sizes)))
    android_y_values = [
        len(filter(lambda x: x <= chunk_size, android_chunk_sizes))
        for chunk_size in android_x_values
    ]
    android_y_values = map(lambda x: x / float(len(android_chunk_sizes)),
                           android_y_values)

    chrome_chunks = get_chunk_data_10(is_chrome=True, threshold=threshold)
    chrome_chunk_sizes = map(lambda x: x[0], chrome_chunks)
    chrome_x_values = sorted(list(set(chrome_chunk_sizes)))
    chrome_y_values = [
        len(filter(lambda x: x <= chunk_size, chrome_chunk_sizes))
        for chunk_size in chrome_x_values
    ]
    chrome_y_values = map(lambda x: x / float(len(chrome_chunk_sizes)),
                          chrome_y_values)

    fig, ax = plt.subplots()

    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(
        FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(
        map(lambda x: x * 1024,
            [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072]))

    android_line = plt.plot(android_x_values, android_y_values, color='blue')
    chrome_line = plt.plot(chrome_x_values, chrome_y_values, color='red')

    plt.ylim(0., 1.)
    plt.xlim(0, 3072 * 1024)
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])

    plt.legend([android_line[0], chrome_line[0]],
               ['YouTube Android', 'YouTube Chrome'],
               loc=4)

    plt.savefig('chunk_sizes_android_chrome.svg')
    if show:
        plt.show()
    plt.clf()
예제 #10
0
def save_cdf(x_values, y_values, threshold, is_chrome, show=False):
    fig, ax = plt.subplots()

    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072]))

    plt.plot(x_values, y_values, color='blue')

    plt.ylim(0., 1.)
    plt.xlim(0, 3072 * 1024)
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    plt.savefig('cdf-fig-{}0ms-{}.svg'.format(int(threshold * 100), 'chrome' if is_chrome else 'android'))
    if show:
        plt.show()
    plt.clf()
예제 #11
0
def chunks_both_real_fake(show=False):
    input_file = 'chrome_combined_dataset.csv'
    fake_chunks = get_chunk_data_10(True)
    fake_chunk_sizes = map(lambda x: x[0], fake_chunks)

    fake_x_values = sorted(list(set(fake_chunk_sizes)))
    fake_y_values = [
        len(filter(lambda x: x <= chunk_size, fake_chunk_sizes))
        for chunk_size in fake_x_values
    ]
    fake_y_values = map(lambda x: x / float(len(fake_chunk_sizes)),
                        fake_y_values)

    real_chunks = youtube_get_chunk_size_data(input_file)
    real_chunks = map(
        lambda x: x[0],
        reduce(lambda x, y: x + y, map(lambda x: x[1], real_chunks.items()),
               []))

    real_x_values = sorted(list(set(real_chunks)))
    real_y_values = [
        len(filter(lambda x: x <= chunk_size, real_chunks))
        for chunk_size in real_x_values
    ]
    real_y_values = map(lambda x: x / float(len(real_chunks)), real_y_values)

    fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(
        FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(
        map(lambda x: x * 1024, [0, 512, 1024, 1024 + 512, 2048, 2048 + 512]))

    fake_line = plt.plot(fake_x_values, fake_y_values, color='blue')
    real_line = plt.plot(real_x_values, real_y_values, color='red')

    plt.legend([fake_line[0], real_line[0]], ['Clustering', 'From URLs'],
               loc=4)

    plt.savefig('chunk_sizes_both.svg')
    if show:
        plt.show()
    plt.clf()
예제 #12
0
def cdf_plot(x_values, y_values, color='red', ax=None, is_log=False):
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    if is_log:
        plt.xscale('log')
        ax.set_xticks([0, 10, 100, 1024, 10 * 1024, 100 * 1024, 1024 * 1024, 10 * 1024 * 1024])
    else:
        ax.set_xticks([0, 256 * 1024, 512 * 1024, 768 * 1024, 1024 * 1024, 1.25 * 1024 * 1024, 1.5 * 1024 * 1024, 1.75 * 1024 * 1024, 2 * 1024 * 1024])
        # ax.set_xticks([0, 512 * 1024, 1024 * 1024, 1.5 * 1024 * 1024, 2 * 1024 * 1024, 2.5 * 1024 * 1024, 3 * 1024 * 1024, 3.5 * 1024 * 1024, 4 * 1024 * 1024])

    ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory(unit_speed=True, decimal_places=2)))

    ax.set_ylabel('% Total')
    ax.set_xlabel('Bitrate')
    plt.tight_layout()
    line = plt.plot(x_values, y_values, color=color)
    plt.ylim(0, 1.0)
    return ax, line
def plot_packet_length(sorted_data, color='red', ax=None, use_log=False, storage_units=False):
    y_values = np.arange(len(sorted_data)) / float(len(sorted_data))
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    ax.set_ylabel('% Total')
    ax.set_xlabel('Packet size in bytes')

    if use_log:
        plt.xscale('log')
        ax.set_xticks([0, 10, 100, 1024, 10 * 1024, 64 * 1024])

    if storage_units:
        ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory()))

    plt.tight_layout()

    plt.plot(sorted_data, y_values, color=color)
    return ax
예제 #14
0
def save_cdf(x_values, y_values, threshold, is_chrome, show=False):
    fig, ax = plt.subplots()

    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(
        FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(
        map(lambda x: x * 1024,
            [0, 512, 1024, 1024 + 512, 2048, 2048 + 512, 3072]))

    plt.plot(x_values, y_values, color='blue')

    plt.ylim(0., 1.)
    plt.xlim(0, 3072 * 1024)
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    plt.savefig('cdf-fig-{}0ms-{}.svg'.format(
        int(threshold * 100), 'chrome' if is_chrome else 'android'))
    if show:
        plt.show()
    plt.clf()
예제 #15
0
def youtube_plot_chunk_sizes(show=False):
    input_file = 'chrome_combined_dataset.csv'
    itags = youtube_get_chunk_size_data(input_file)

    fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    ax.set_ylabel('% Total')
    ax.set_xlabel('Chunk size')
    ax.get_xaxis().set_major_formatter(FuncFormatter(storage_formatter_factory()))
    ax.set_xticks(map(lambda x: x * 1024, [0, 256, 512, 768, 1024, 1024 + 256, 1024 + 512, 1024 + 768, 2048]))

    tag_dict = {
        '243': 'video @ 524k',
        '244': 'video @ 798k',
        '251': 'audio @ 160k'
    }
    colors = ['orange', 'red', 'brown', 'green', 'blue']
    types = []
    averages = []
    lines = []
    for i, (itag, chunks) in enumerate(itags.items()):
        chunks = map(lambda x: x[0], chunks)
        if itag == '278' or itag == '250':
            continue
        x_values = sorted(list(set(chunks)))
        y_values = [len(filter(lambda x: x <= chunk_size, chunks)) for chunk_size in x_values]
        y_values = map(lambda x: x / float(len(chunks)), y_values)
        types.append(itag)
        average = sum(chunks) / float(len(chunks))
        print itag, tag_dict[itag], average
        averages.append(average)
        lines.append(plt.plot(x_values, y_values, color=colors[i]))

    plt.legend(map(lambda x: x[0], lines), map(lambda x: tag_dict[x], types), loc=4)
    plt.tight_layout()
    plt.savefig('youtube_chunk_sizes_range.svg')
    if show:
        plt.show()
    plt.clf()
예제 #16
0
def plot_packet_length(sorted_data,
                       color='red',
                       ax=None,
                       use_log=False,
                       storage_units=False):
    y_values = np.arange(len(sorted_data)) / float(len(sorted_data))
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_yticklabels(['{:3}%'.format(x * 100) for x in ax.get_yticks()])
    ax.set_ylabel('% Total')
    ax.set_xlabel('Packet size in bytes')

    if use_log:
        plt.xscale('log')
        ax.set_xticks([0, 10, 100, 1024, 10 * 1024, 64 * 1024])

    if storage_units:
        ax.get_xaxis().set_major_formatter(
            FuncFormatter(storage_formatter_factory()))

    plt.tight_layout()

    plt.plot(sorted_data, y_values, color=color)
    return ax