for key, value in datafiles.items():
    if key in list(ubuntus['Filename']):
        plt.hist([size for size in value['st_size'] if size > 1], bins, normed=True, log=True, alpha=.5, label=key);
        
plt.xlabel('File size')
plt.ylabel('Frequency (logarithmic)')
plt.title('Distribution of file sizes on Ubuntu')
plt.show()


# <codecell>

# try and draw Seaborn KDE plots

with sns.palette_context("husl"):
    
    f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 20), sharex=False)
    
    for key, value in datafiles.items():
        if key in list(macs['Filename']):
            path = key.replace('-', '/').split('.')[0]
            ax1 = sns.kdeplot(double([size for size in value['st_size'] if size > 1]), shade=True, label=path, legend=True, ax=ax1)
    
    ax1.set_xscale('log')
            
    for key, value in datafiles.items():
        if key in list(ubuntus['Filename']):
            path = key.replace('-', '/').split('.')[0]
            ax2 =  sns.kdeplot(double([size for size in value['st_size'] if size > 1]), shade=True, label=path, legend=True, ax=ax2)
    
Example #2
0
from scipy import stats
from pylab import *
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# <codecell>

# Plot the relationship between time and number of data items

consumers = [2,4,6,8,10,12]

with sns.palette_context('husl'):

    plt.figure(figsize=(8,8))
    ax = plt.gca()

    for count in consumers:
        segment = means.loc[count].reset_index()
        segment.plot(y='time', x='data', label='{0} consumers'.format(count))
        
    plt.xlabel('Number of data items')
    plt.ylabel('Time (seconds)')
    plt.title('Time vs. Number of Data Items for Varying Numbers of Consumers')
    plt.legend()
    plt.savefig(figpath + 'timevsdatas.pdf')
    plt.show()