def final_check(year=2006): test_filename = os.path.join(DATA_SOURCES_DIR, "test.h5") survey_filename = os.path.join(DATA_SOURCES_DIR, "survey.h5") store = HDFStore(test_filename) survey = HDFStore(survey_filename) final2 = store.get('survey_2006') print survey finalT = survey.get('survey_2006') varlist = [ 'adeben', 'adfdap', 'amois', 'ancchom', 'ancentr', 'anciatm', 'ancrech', 'anref', 'contra', 'datant', 'dimtyp', 'ident', 'idfoy' 'noi', 'nondic', 'rabs', 'RABSP', 'RAISTP', 'raistp', 'rdem', 'retrai', 'sitant', 'sp10', 'sp11', 'stc', 'TXTPPB', ] for i in range(0, 10): varname = 'sp0' + str(i) varlist.append(varname) varlist = set(varlist) columns = final2.columns columns = set(columns) print varlist.difference(columns) print final2.loc[ final2.idfoy == 603018901, ['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi'] ].to_string() return
def final_check(year=2006): test_filename = os.path.join(DATA_SOURCES_DIR,"test.h5") survey_filename = os.path.join(DATA_SOURCES_DIR,"survey.h5") store = HDFStore(test_filename) survey = HDFStore(survey_filename) final2 = store.get('survey_2006') print survey finalT = survey.get('survey_2006') varlist = ['anref', 'sitant', 'adeben', 'stc', 'retrai', 'contra', 'datant', 'rabs', 'nondic', 'TXTPPB', 'ancrech', 'RAISTP', 'amois', 'adfdap', 'ancentr', 'anciatm', 'ancchom', 'ident', 'noi', 'dimtyp', 'RABSP', 'raistp', 'rdem', 'sp10', 'sp11', 'idfoy'] for i in range(0,10): varname = 'sp0' + str(i) varlist.append(varname) varlist = set(varlist) columns = final2.columns ; columns = set(columns) print varlist.difference(columns) print final2.loc[final2.idfoy==603018901, ['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi']].to_string() # print final2 # print finalT # # control(final2, debug=True, verbose=True, verbose_columns=['idfam', 'quifam']) # # control(finalT, debug=True, verbose=True, verbose_columns=['idfam', 'quifam']) # print 'FAMILLE--------------' # print final2.quifam.value_counts() # print finalT.quifam.value_counts() # print '' # print 'FOYER------------------' # print final2.quifoy.value_counts() # print finalT.quifoy.value_counts() # print '' # print 'MENAGES-----------------' # print final2.quimen.value_counts() # print finalT.quimen.value_counts() # # print '' # print final2.age.describe() # print finalT.age.describe() # # age_data = final2['age'].value_counts().reset_index() # # age_data = age_data.sort_index(by='index', ascending='True') # # print age_data.to_string() # # print final2.loc[final2['quifam']==2, ['quifam', 'age']].describe() return
def test(): ''' Validate check_consistency ''' #=========================================================================== # from pandas import DataFrame #res = DataFrame({af_col.name: simulation.output_table.get_value(af_col.name, af_col.entity)}) # print res #=========================================================================== store = HDFStore(os.path.join(os.path.dirname(os.path.join(SRC_PATH,'countries','france','data','erf')),'fichiertest.h5')) datatable = store.get('test12') test_simu = store.get('test_simu') print check_consistency(test_simu, datatable)
def load_df(path, default=None): """Load DataFrame for HDF5 store path '\logs' table""" try: store = HDFStore(path) print store.keys() df = store.get('logs') store.close() return df except: return default
def build_comparison(): directory = os.path.dirname(__file__) fname = os.path.join(directory, H5_FILENAME) store = HDFStore(fname) openfisca = store.get("openfisca") insee = store.get("insee") print openfisca print insee # for year in range(2006,2010): print openfisca.head() openfisca.drop(0, axis=0, inplace=True) openfisca.reset_index(inplace=True) from pandas import DataFrame print (openfisca.sum() - insee.sum())/insee.sum() df = (openfisca-insee)/insee print df print df.to_string()
class HDFStoreDataFrame(BaseIO): def setup(self): N = 25000 index = tm.makeStringIndex(N) self.df = DataFrame({'float1': np.random.randn(N), 'float2': np.random.randn(N)}, index=index) self.df_mixed = DataFrame({'float1': np.random.randn(N), 'float2': np.random.randn(N), 'string1': ['foo'] * N, 'bool1': [True] * N, 'int1': np.random.randint(0, N, size=N)}, index=index) self.df_wide = DataFrame(np.random.randn(N, 100)) self.start_wide = self.df_wide.index[10000] self.stop_wide = self.df_wide.index[15000] self.df2 = DataFrame({'float1': np.random.randn(N), 'float2': np.random.randn(N)}, index=date_range('1/1/2000', periods=N)) self.start = self.df2.index[10000] self.stop = self.df2.index[15000] self.df_wide2 = DataFrame(np.random.randn(N, 100), index=date_range('1/1/2000', periods=N)) self.df_dc = DataFrame(np.random.randn(N, 10), columns=['C%03d' % i for i in range(10)]) self.fname = '__test__.h5' self.store = HDFStore(self.fname) self.store.put('fixed', self.df) self.store.put('fixed_mixed', self.df_mixed) self.store.append('table', self.df2) self.store.append('table_mixed', self.df_mixed) self.store.append('table_wide', self.df_wide) self.store.append('table_wide2', self.df_wide2) def teardown(self): self.store.close() self.remove(self.fname) def time_read_store(self): self.store.get('fixed') def time_read_store_mixed(self): self.store.get('fixed_mixed') def time_write_store(self): self.store.put('fixed_write', self.df) def time_write_store_mixed(self): self.store.put('fixed_mixed_write', self.df_mixed) def time_read_store_table_mixed(self): self.store.select('table_mixed') def time_write_store_table_mixed(self): self.store.append('table_mixed_write', self.df_mixed) def time_read_store_table(self): self.store.select('table') def time_write_store_table(self): self.store.append('table_write', self.df) def time_read_store_table_wide(self): self.store.select('table_wide') def time_write_store_table_wide(self): self.store.append('table_wide_write', self.df_wide) def time_write_store_table_dc(self): self.store.append('table_dc_write', self.df_dc, data_columns=True) def time_query_store_table_wide(self): self.store.select('table_wide', where="index > self.start_wide and " "index < self.stop_wide") def time_query_store_table(self): self.store.select('table', where="index > self.start and " "index < self.stop") def time_store_repr(self): repr(self.store) def time_store_str(self): str(self.store) def time_store_info(self): self.store.info()
def PlotTestLogsAll(myfile): """ Reads the specifed TestLogsAll.HDF5 file and calculates and plots relationships. Input: File name with the results, e.g.: "aLabView2\\TestLogsAll.h5" Output: Plots in the same directory """ import pandas as pd # multidimensional data analysis import numpy as np # python numerical library # from os import listdir # from os.path import isdir, isfile, join import matplotlib.pyplot as plt # from matplotlib.backends.backend_pdf import PdfPages # Wei's advice === import matplotlib.backends.backend_pdf as dpdf from pandas import read_hdf, HDFStore, ExcelWriter CONFIG = {'PlotIslDurHist':True, # Island durations histogram 'PlotCorDur2Pen':True, # Correlation of duration to penetration 'PlotCorDur2fStd':True, # Correlation of duration to standard deviation of frequency } if myfile.endswith(".h5"): mypdffile = "".join(myfile.split(".")[0:-1] + ['.pdf']) else: mypdffile = myfile + '.pdf' print "Opening: " + myfile h5store = HDFStore(myfile) TestLog = h5store.get('TestLogsAll') print "Opening: " + mypdffile pltPdf = dpdf.PdfPages(mypdffile) # Filtering TestLog into df1 df1 = TestLog[(TestLog['tIslDur'] > 0.) & (TestLog['NrmlFlg'] == 'y') & (TestLog['FileName'] != 'TestLogMotorBr.xlsx') & (TestLog['FileName'] != 'TestLogSummer01.xlsx')] # Adding details to df1 df1['QCload0']=df1['QCload'] df1.loc[(df1[df1['QCload0']<0].index), ('QCload0')] =0.0 # df1['QCload0'][df1['QCload0']<0] = 0.0 df1['PFact']=df1['LabViewP']/(df1['LabViewP']**2 + (-df1['GEAmpQ']+df1['QCload0'])**2).apply(np.sqrt) df1['PFactsign']='ind' df1.loc[(df1[df1['QCload0']>df1['GEAmpQ']].index), ('PFactsign')]='cap' # df1['PFactsign'][df1['QCload0']>df1['GEAmpQ']]='cap' df1s = df1[df1['FileName'].str.contains('Summer')] df1w = df1[df1['FileName'].str.contains('Winter')] if CONFIG['PlotIslDurHist']: # Island duration histogram # Fig: Island duration histogram fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) # provision for a label # fig.suptitle(myfile) # This titles the figure # File info output to page top # label= file_info[file_info.index==fname][['fiComment']].values[0][0] # label = myfile # ax0.annotate(label, # xy=(0.2/6.4, 4.6/4.8), # (0.2,-0.2)inch from top left corner # xycoords='figure fraction', # horizontalalignment='left', # verticalalignment='top', # fontsize=10) # subplots_adjust(top=4./4.8) df2a = TestLog['tIslDur'][TestLog['tIslDur'] > 0.] ax0.set_title('Island Duration Histogram') df2a.plot(kind='hist', bins=20, ax=ax0, alpha=0.5) # legend=True # df2.plot(kind='hist', bins=20, ax=ax0, alpha=0.5, legend=True) # ax0.set_xlim([-1.5,1.5]) # ax0.set_ylim([-1.2,1.2]) ax0.grid(True, which='both') ax0.set_xlabel('Island duration (sec)') ax0.set_ylabel('Number of observations') # ax0.set_aspect('equal') # ax1.set_title('Currents Al/Be') # ax1.plot(df2['pvIal']/1.5, df2['pvIbe']/1.5) # ax1.set_xlim([-300,300]) # ax1.set_ylim([-240,240]) # ax1.grid(True, which='both') # ax1.set_aspect('equal') # ax1.set_title('Island Voltage Al/Be') # ax1.plot(df2['Time'], df2['Island Val']/1.5/sqrt(2)/BASE['Vln']) # ax1.plot(df2['Time'], df2['Island Vbe']/1.5/sqrt(2)/BASE['Vln']) # ax1.set_ylim([-1.2,1.2]) # ax1.grid(True, which='both') pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory # Fig: PF actual histogram fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) df2a = df1['PFact'] ax0.set_title('Load PF actual') df2a.plot(kind='hist', bins=20, ax=ax0, alpha=0.5) # legend=True ax0.grid(True, which='both') ax0.set_xlabel('PF actual') ax0.set_ylabel('Number of observations') pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory if CONFIG['PlotCorDur2Pen']: # Island duration corelations # Fig: duration to penetration fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) ax0.set_title('Island Duration vs. Penetration') df2a = TestLog[['tIslDur','PrcntPen']][(TestLog['tIslDur'] > 0.)&(TestLog['NrmlFlg'] == 'y') & (TestLog['FileName'] != 'TestLogMotorBr.xlsx')] df2b = TestLog[['tIslDur','PrcntPen']][(TestLog['tIslDur'] > 0.) & (TestLog['NrmlFlg'] == 'y') & (TestLog['FileName'] == 'TestLogMotorBr.xlsx')] df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)] df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)] ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='CMPLDs') ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean() + 1.0, df2a1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='') ax0.plot(df2b['PrcntPen'], df2b['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='MotorB') ax0.grid(True, which='both') ax0.set_xlabel('PV Penetration (pu)') ax0.set_ylabel('Island Duration (sec)') # Now add the legend with some customizations. legend = ax0.legend(loc='upper left', shadow=False) for label in legend.get_texts(): label.set_fontsize('small') # plt.legend() pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory # Fig: duration to penetration with PF as a parameter fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) ax0.set_title('Island Duration vs. Penetration') df2a = df1[['tIslDur','PrcntPen']][(df1['PFact'] < 0.97) & (df1['PFactsign'] == 'ind')] df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)] df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)] df2b = df1[['tIslDur','PrcntPen']][(df1['PFact'] < 0.99) & (df1['PFact'] >= 0.97) & (df1['PFactsign'] == 'ind')] df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)] df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)] df2c = df1[['tIslDur','PrcntPen']][(df1['PFact'] > 0.99)] df2c1 = df2c[['tIslDur','PrcntPen']][(df2c['PrcntPen'] > 0.95) & (df2c['PrcntPen'] < 1.05)] df2c2 = df2c[['tIslDur','PrcntPen']][(df2c['PrcntPen'] < 0.95) | (df2c['PrcntPen'] > 1.05)] df2d = df1[['tIslDur','PrcntPen']][(df1['PFact'] < 0.99) & (df1['PFactsign'] == 'cap')] df2d1 = df2d[['tIslDur','PrcntPen']][(df2d['PrcntPen'] > 0.95) & (df2d['PrcntPen'] < 1.05)] df2d2 = df2d[['tIslDur','PrcntPen']][(df2d['PrcntPen'] < 0.95) | (df2d['PrcntPen'] > 1.05)] print df2a['tIslDur'].count() print df2b['tIslDur'].count() print df2c['tIslDur'].count() print df2d['tIslDur'].count() ax0.plot(df2d1['PrcntPen']-df2d1['PrcntPen'].mean()+1.0, df2d1['tIslDur'], 'ko', markersize=4, markeredgecolor='none', label='') ax0.plot(df2d2['PrcntPen'], df2d2['tIslDur'], 'ko', markersize=4, markeredgecolor='none', label='PF ~ 0.98cap') ax0.plot(df2c1['PrcntPen']-df2c1['PrcntPen'].mean()+1.0, df2c1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='') ax0.plot(df2c2['PrcntPen'], df2c2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='PF ~ 1.0') ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'go', markersize=4, markeredgecolor='none', label='') ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'go', markersize=4, markeredgecolor='none', label='PF ~ 0.98ind') ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='') ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='PF ~ 0.95ind') ax0.grid(True, which='both') ax0.set_xlabel('PV Penetration (pu)') ax0.set_ylabel('Island Duration (sec)') ax0.set_ylim([0,0.6]) # Now add the legend with some customizations. legend = ax0.legend(loc='upper left', shadow=False) for label in legend.get_texts(): label.set_fontsize('small') # plt.legend() pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory # Fig: duration to penetration at PF with season as parameter fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) ax0.set_title('Island Duration vs. Penetration at PF ~ 0.95ind') df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] < 0.97) & (df1s['PFactsign'] == 'ind')] df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)] df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)] df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] < 0.97) & (df1w['PFactsign'] == 'ind')] df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)] df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)] print "Summer: " + str(df2a['tIslDur'].count()) print "Winter: " + str(df2b['tIslDur'].count()) ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='') ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter') ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='') ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer') ax0.grid(True, which='both') ax0.set_xlabel('PV Penetration (pu)') ax0.set_ylabel('Island Duration (sec)') ax0.set_ylim([0,0.6]) # Now add the legend with some customizations. legend = ax0.legend(loc='upper left', shadow=False) for label in legend.get_texts(): label.set_fontsize('small') # plt.legend() pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory # Fig: duration to penetration at PF with season as parameter fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) ax0.set_title('Island Duration vs. Penetration at PF ~ 0.98ind') df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] > 0.97) & (df1s['PFact'] < 0.99) & (df1s['PFactsign'] == 'ind')] df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)] df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)] df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] > 0.97) & (df1w['PFact'] < 0.99) & (df1w['PFactsign'] == 'ind')] df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)] df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)] print "Summer: " + str(df2a['tIslDur'].count()) print "Winter: " + str(df2b['tIslDur'].count()) ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='') ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter') ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='') ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer') ax0.grid(True, which='both') ax0.set_xlabel('PV Penetration (pu)') ax0.set_ylabel('Island Duration (sec)') ax0.set_ylim([0,0.6]) # Now add the legend with some customizations. legend = ax0.legend(loc='upper left', shadow=False) for label in legend.get_texts(): label.set_fontsize('small') # plt.legend() pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory # Fig: duration to penetration at PF with season as parameter fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) ax0.set_title('Island Duration vs. Penetration at PF ~ 1.0') df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] > 0.99)] df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)] df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)] df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] > 0.99)] df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)] df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)] print "Summer: " + str(df2a['tIslDur'].count()) print "Winter: " + str(df2b['tIslDur'].count()) ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='') ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter') ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='') ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer') ax0.grid(True, which='both') ax0.set_xlabel('PV Penetration (pu)') ax0.set_ylabel('Island Duration (sec)') ax0.set_ylim([0,0.6]) # Now add the legend with some customizations. legend = ax0.legend(loc='upper left', shadow=False) for label in legend.get_texts(): label.set_fontsize('small') # plt.legend() pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory # Fig: duration to penetration at PF with season as parameter fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6)) ax0.set_title('Island Duration vs. Penetration at PF ~ 0.98cap') df2a = df1s[['tIslDur','PrcntPen']][(df1s['PFact'] > 0.97) & (df1s['PFact'] < 0.99) & (df1s['PFactsign'] == 'cap')] df2a1 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] > 0.95) & (df2a['PrcntPen'] < 1.05)] df2a2 = df2a[['tIslDur','PrcntPen']][(df2a['PrcntPen'] < 0.95) | (df2a['PrcntPen'] > 1.05)] df2b = df1w[['tIslDur','PrcntPen']][(df1w['PFact'] > 0.97) & (df1w['PFact'] < 0.99) & (df1w['PFactsign'] == 'cap')] df2b1 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] > 0.95) & (df2b['PrcntPen'] < 1.05)] df2b2 = df2b[['tIslDur','PrcntPen']][(df2b['PrcntPen'] < 0.95) | (df2b['PrcntPen'] > 1.05)] print "Summer: " + str(df2a['tIslDur'].count()) print "Winter: " + str(df2b['tIslDur'].count()) ax0.plot(df2b1['PrcntPen']-df2b1['PrcntPen'].mean()+1.0, df2b1['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='') ax0.plot(df2b2['PrcntPen'], df2b2['tIslDur'], 'bo', markersize=4, markeredgecolor='none', label='winter') ax0.plot(df2a1['PrcntPen']-df2a1['PrcntPen'].mean()+1.0, df2a1['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='') ax0.plot(df2a2['PrcntPen'], df2a2['tIslDur'], 'ro', markersize=4, markeredgecolor='none', label='summer') ax0.grid(True, which='both') ax0.set_xlabel('PV Penetration (pu)') ax0.set_ylabel('Island Duration (sec)') ax0.set_ylim([0,0.6]) # Now add the legend with some customizations. legend = ax0.legend(loc='upper left', shadow=False) for label in legend.get_texts(): label.set_fontsize('small') # plt.legend() pltPdf.savefig() # saves fig to pdf plt.close() # Closes fig to clean up memory if False: # # Fig2: fig, (ax0,ax1,ax2,ax3,ax4) = plt.subplots(nrows=5, ncols=1, figsize=(8.5,11), sharex=True) fig.suptitle(fname) # This titles the figure # ax0.set_title('Utility Bus Vabc') # ax0.plot(df2['Time'], df2[u'Utility Bus V A']) # ax0.plot(df2['Time'], df2[u'Utility Bus V B']) # ax0.plot(df2['Time'], df2[u'Utility Bus V C']) # ax0.set_ylim([-500,500]) # ax0.grid(True, which='both') ax0.set_title('Island Bus Vabc') ax0.plot(df2['Time'], df2[u'Island Bus V A']) ax0.plot(df2['Time'], df2[u'Island Bus V B']) ax0.plot(df2['Time'], df2[u'Island Bus V C']) ax0.plot(df2['Time'], df2[u'Island Vmag']) # ax0.set_ylim([-500,500]) ax0.grid(True, which='both') ax1.set_title('Island Bus Frequency') # ax1.plot(df2['Time'], df2[u'Island Wpll']/(2*pi)) ax1.plot(df2['Time'], df2[u'Island freq']) # ax1.set_ylim([50, 70]) ax1.grid(True, which='both') ax2.set_title('Total Load Current Iabc') ax2.plot(df2['Time'], df2[u'RLC Passive Load I A']+df2[u'GE Load I A']) ax2.plot(df2['Time'], df2[u'RLC Passive Load I B']+df2[u'GE Load I B']) ax2.plot(df2['Time'], df2[u'RLC Passive Load I C']+df2[u'GE Load I C']) # ax2.set_ylim([-100,100]) ax2.grid(True, which='both') ax3.set_title('B1+B2 Iabc') ax3.plot(df2['Time'], df2[u'pvIa']) ax3.plot(df2['Time'], df2[u'pvIb']) ax3.plot(df2['Time'], df2[u'pvIc']) # ax3.set_ylim([-100,100]) ax3.grid(True, which='both') ax4.set_title('Utility Iabc') ax4.plot(df2['Time'], df2[u'Utility I A']) ax4.plot(df2['Time'], df2[u'Utility I B']) ax4.plot(df2['Time'], df2[u'Utility I C']) # ax4.set_ylim([-100,100]) ax4.grid(True, which='both') pltPdf.savefig() # Saves fig to pdf plt.close() # Closes fig to clean up memory # Fig4: fig, (ax0,ax1,ax2,ax3,ax4) = plt.subplots(nrows=5, ncols=1, figsize=(8.5,11), sharex=True) fig.suptitle(fname) # This titles the figure ax0.set_title('P[kW]: Utility, Load, PV') ax0.plot(df2['Time'], df2[u'P Utility']) ax0.plot(df2['Time'], df2[u'P RLC']+df2[u'P AMP']) ax0.plot(df2['Time'], df2[u'P B1']+df2[u'P B2']) # ax0.set_ylim([-50,250]) ax0.grid(True, which='both') ax1.set_title('Q[kVAr]: Utility, Load, PV') ax1.plot(df2['Time'], df2[u'Q Utility']) ax1.plot(df2['Time'], df2[u'Q RLC']+df2[u'Q AMP']) ax1.plot(df2['Time'], df2[u'Q B1']+df2[u'Q B2']) # ax1.set_ylim([-80,80]) ax1.grid(True, which='both') ax2.set_title('Island Vpos, pu penetration') ax2.plot(df2['Time'], df2[u'Island Vpos']/BASE['Vln']) ax2.plot(df2['Time'], df2[u'B1+B2 pen']) ax2.set_ylim([0,1.5]) ax2.grid(True, which='both') ax3.set_title('Island Vneg, Vzero') ax3.plot(df2['Time'], df2[u'Island Vneg']/BASE['Vln']) ax3.plot(df2['Time'], df2[u'Island Vzer']/BASE['Vln']) # ax3.set_ylim([0,0.25]) ax3.grid(True, which='both') ax4.set_title('Island Vrms abc') ax4.plot(df2['Time'], df2[u'Island Varms']/BASE['Vln']) ax4.plot(df2['Time'], df2[u'Island Vbrms']/BASE['Vln']) ax4.plot(df2['Time'], df2[u'Island Vcrms']/BASE['Vln']) # ax4.set_ylim([0,1.25]) ax4.grid(True, which='both') pltPdf.savefig() # Saves fig to pdf plt.close() # Closes fig to clean up memory print "Closing: " + mypdffile pltPdf.close() # Close the pdf file h5store.close() return
from pandas import HDFStore store = HDFStore('store.h5', complevel=9) fmap = wrap_monitor(wrap_write(partial(fetch_safe, rse=args.rse), store, overwrite=args.overwrite), monitor) p.map(fmap, datelist) monitor.close() logging.info("closing file") store.close() logging.info("trying to open output") store = HDFStore('store.h5') data = [] for k in store.keys(): try: d = store.get(k) d['timestamp'] = pd.to_datetime(k.split("_")[1], format='%d%m%Y') data.append(d) except Exception as e: print "Problem reading", k print e store.close() data = pd.concat(data) data = data.set_index(['timestamp', 'owner']) data_to_plot = data['size'].unstack().fillna(0) dataplot = data_to_plot.iplot(kind='area', fill=True, asFigure=True) for d in dataplot['data']: d['hoverinfo'] = 'text+x+name' d['text'] = ["%.2f Gb" % xx for xx in data_to_plot[d['name']].tolist()] data.iplot(data=dataplot['data'])
class LogSaver: """ self.directory : Directory structure for temp and saved files self.log_list : List of server.log files to process self.extra : True if log messages and thread ids are to be saved too self.history_path : History of server.log conversions saved here self.progress_store_path : HDF5 file that holds one DataFrame for each server.log file self.store_path : Final DataFrame of all server.log entries saved here self.history : History of server.log conversions """ FINAL = 'logs' PROGRESS = 'progress' HISTORY = 'history' @staticmethod def normalize(name): return re.sub(r'[^a-zA-Z0-9]', '_', name) @staticmethod def make_name(base_name, extra): if extra: return base_name + '.extra' else: return base_name #@staticmethod #def temp_name(log_list, extra): # hsh = hash(log_list) # sgn = 'n' if hsh < 0 else 'p' # temp = 'temp_%s%08X' % (sgn, abs(hsh)) # return LogSaver.make_name(temp, extra) def __init__(self, store_path, log_list, extra): self.directory = ObjectDirectory(store_path) self.log_list = tuple(sorted(log_list)) self.extra = extra self.history_path = self.directory.get_path(LogSaver.HISTORY, temp=True) self.progress_store_path = self.directory.get_path(LogSaver.PROGRESS, temp=True, is_df=True) self.store_path = self.directory.get_path(LogSaver.make_name(LogSaver.FINAL, extra), is_df=True) self.history = ObjectDirectory.load_object(self.history_path, {}) self.saved = False def __repr__(self): return '\n'.join('%s: %s' % (k,v) for k,v in self.__dict__.items()) def __str__(self): return '\n'.join([repr(self), '%d log files' % len(self.log_list)]) def save_all_logs(self, force=False): if os.path.exists(self.store_path): final_store = HDFStore(self.store_path) print 'Keys: %s' % final_store final_store.close() return if not force: assert not os.path.exists(self.history_path), ''' %s exists but %s does not. There appears to be a conversion in progress. -f forces conversion to complete. ''' % (self.history_path, self.store_path) self.directory.make_dir_if_necessary(self.progress_store_path) self.progress_store = HDFStore(self.progress_store_path) for path in self.log_list: self.save_log(path) self.check() print '--------' print 'All tables in %s' % self.progress_store_path print self.progress_store.keys() print '--------' def get_log(path): try: return self.progress_store.get(LogSaver.normalize(path)) except Exception as e: print print path raise e df_list = [get_log(path) for path in self.log_list] self.progress_store.close() print 'Closed %s' % self.progress_store_path df_all = pd.concat(df_list) print 'Final list has %d entries' % len(df_all) final_store = HDFStore(self.store_path) final_store.put('logs', df_all) print 'Keys: %s' % final_store final_store.close() print 'Closed %s' % self.store_path # Save the history in a corresponding file self.directory.save('history', self.history) print 'Saved history' self.saved = True def test_store(self): final_store = HDFStore(self.store_path) print '----' print final_store.keys() print '-' * 80 logs = final_store['/logs'] print type(logs) print len(logs) print logs.columns final_store.close() def cleanup(self): os.remove(self.progress_store_path) os.remove(self.history_path) def delete(self): os.remove(self.store_path) def save_log(self, path): """Return a pandas DataFrame for all the valid log entry lines in log_file The index of the DataFrame are the uniqufied timestamps of the log entries """ if path in self.history: return print 'Processing %s' % path, start = time.time() header, df = load_log(path, extra=self.extra) if df is None: print 'Could not process %s' % path return self.progress_store.put(LogSaver.normalize(path), df) load_time = time.time() - start self.history[path] = { 'start': df.index[0], 'end': df.index[-1], 'load_time': int(load_time), 'num': len(df), 'header': header } ObjectDirectory.save_object(self.history_path, self.history) del df print { k:v for k,v in self.history[path].items() if k != 'header' }, print '%d of %d' % (len(self.history), len(self.log_list)) def check(self): history = ObjectDirectory.load_object(self.history_path, {}) sorted_keys = history.keys() sorted_keys.sort(key=lambda k: history[k]['start']) print '-' * 80 print 'Time range by log file' for i, path in enumerate(sorted_keys): hist = history[path] print '%2d: %s --- %s : %s' % (i, hist['start'], hist['end'], path) path0 = sorted_keys[0] for path1 in sorted_keys[1:]: hist0,hist1 = history[path0],history[path1] assert hist0['end'] < hist1['start'], ''' ----------- %s %s start: %s end : %s ----------- %s %s hist1['start'] start: %s end : %s ''' % ( path0, hist0, hist0['start'], hist0['end'], path1, hist1, hist1['start'], hist1['end'])