import pandas as pandas import numpy as np import scikits.statsmodels.tools.tools as tools import matplotlib.pyplot as plt xls = pandas.ExcelFile('/home/pybokeh/Desktop/Civic_Blower.xls') df = xls.parse('Claims') ELP_2008 = df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'ELP') & (df['MODEL_NAME'] == 'CIVIC')]['DAYS_TO_FAIL_MINZERO'].values HCM_2008 = df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'HCM') & (df['MODEL_NAME'] == 'CIVIC')]['DAYS_TO_FAIL_MINZERO'].values SSS_2008 = df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'SSS') & (df['MODEL_NAME'] == 'CIVIC')]['DAYS_TO_FAIL_MINZERO'].values ecdf1 = tools.ECDF(ELP_2008) x1 = np.linspace(min(ELP_2008), max(ELP_2008)) y1 = ecdf1(x1) ecdf2 = tools.ECDF(HCM_2008) x2 = np.linspace(min(HCM_2008), max(HCM_2008)) y2 = ecdf2(x2) ecdf3 = tools.ECDF(SSS_2008) x3 = np.linspace(min(SSS_2008), max(SSS_2008)) y3 = ecdf3(x3) fig1 = plt.figure(1) elp, hcm, sss = plt.step(x1, y1, 'r-', x2, y2, 'b-', x3, y3, 'g-') plt.grid(True) plt.ylabel('ECDF', fontsize=8)
df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'SSS')]['MILES_TO_FAIL'].values, 15, normed=False, cumulative=False, facecolor='green', alpha=1) plt.grid(True) plt.ylabel('Frequency', fontsize=8) plt.xlabel('MTF Bins', fontsize=8) plt.title('2008 SSS Civic', fontsize=12) plt.subplot(224) dataELP = df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'ELP')]['MILES_TO_FAIL'].values ecdf1 = tools.ECDF(dataELP) x1 = np.linspace(min(dataELP), max(dataELP)) y1 = ecdf1(x1) dataHCM = df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'HCM')]['MILES_TO_FAIL'].values ecdf2 = tools.ECDF(dataHCM) x2 = np.linspace(min(dataHCM), max(dataHCM)) y2 = ecdf2(x2) dataSSS = df[(df['MODEL_YEAR'] == 2008) & (df['FACTORY_CODE'] == 'SSS')]['MILES_TO_FAIL'].values ecdf3 = tools.ECDF(dataSSS) x3 = np.linspace(min(dataSSS), max(dataSSS)) y3 = ecdf3(x3)
plt.subplot(2, 1, 2) n, bins, patches = plt.hist(ELP_CRV_2009, 20, normed=False, cumulative=False, facecolor='red', alpha=1) plt.xlabel("MTF") plt.ylabel("Frequency") plt.title("ELP CRV MTF Histogram") plt.grid(True) # Make a ECDF figure fig3 = plt.figure(3) ecdf = tools.ECDF(ELP_CRV_2009) x = np.linspace(min(ELP_CRV_2009), max(ELP_CRV_2009)) y = ecdf(x) plt.step(x, y, 'r-') plt.grid(True) # Make text figure. Reference: http://matplotlib.sourceforge.net/users/text_props.html fig4 = plt.figure(4) ax = fig4.add_axes([0, 0, 1, 1]) # This sets up axis dimensions of 1 x 1 ax.text(0.05, 0.65, str('2009M ELP CRV:\n' + str(dfCRV_09.describe())), color='black', transform=ax.transAxes) ax.set_axis_off()
plt.xticks(rotation=90) major_tick_format = FormatStrFormatter('%d') ax.xaxis.set_major_formatter(major_tick_format) xtick_labels = ax.xaxis.get_ticklabels() for label in xtick_labels: label.set_fontsize(8) plt.xlabel("DTF") plt.ylabel("Frequency") plt.title("2009M TL Keyless Remote Battery DTF Histogram\n"+"Number of bins: " + str(len(bins)-1)) plt.grid(True) # Make a ECDF figure fig3 = plt.figure(3) ecdf2008 = tools.ECDF(DTF2008) x2008 = np.linspace(min(DTF2008), max(DTF2008)) y2008 = ecdf2008(x2008) ecdf2009 = tools.ECDF(DTF2009) x2009 = np.linspace(min(DTF2009), max(DTF2009)) y2009 = ecdf2009(x2009) plt.step(x2008,y2008,'r-', x2009, y2009, 'b-') plt.grid(True) plt.legend(['2008','2009'], loc='best') plt.title('Empirical CDF vs DTF') plt.ylabel('% of Failures') plt.xlabel('Days To Fail')