for key, value in datafiles.items(): if key in list(ubuntus['Filename']): plt.hist([size for size in value['st_size'] if size > 1], bins, normed=True, log=True, alpha=.5, label=key); plt.xlabel('File size') plt.ylabel('Frequency (logarithmic)') plt.title('Distribution of file sizes on Ubuntu') plt.show() # <codecell> # try and draw Seaborn KDE plots with sns.palette_context("husl"): f, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 20), sharex=False) for key, value in datafiles.items(): if key in list(macs['Filename']): path = key.replace('-', '/').split('.')[0] ax1 = sns.kdeplot(double([size for size in value['st_size'] if size > 1]), shade=True, label=path, legend=True, ax=ax1) ax1.set_xscale('log') for key, value in datafiles.items(): if key in list(ubuntus['Filename']): path = key.replace('-', '/').split('.')[0] ax2 = sns.kdeplot(double([size for size in value['st_size'] if size > 1]), shade=True, label=path, legend=True, ax=ax2)
from scipy import stats from pylab import * import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline # <codecell> # Plot the relationship between time and number of data items consumers = [2,4,6,8,10,12] with sns.palette_context('husl'): plt.figure(figsize=(8,8)) ax = plt.gca() for count in consumers: segment = means.loc[count].reset_index() segment.plot(y='time', x='data', label='{0} consumers'.format(count)) plt.xlabel('Number of data items') plt.ylabel('Time (seconds)') plt.title('Time vs. Number of Data Items for Varying Numbers of Consumers') plt.legend() plt.savefig(figpath + 'timevsdatas.pdf') plt.show()