def correlation(): ''' get the correlation of power of all other factors, such as temperature, humidity, wind speed, wind direction ''' nbqList = tool.getnbqList() # hlxList = map(str, hlxList) for idx, nbqID in enumerate(nbqList): print(idx, nbqID) nbqData = pd.read_csv(resPath + nbqID + '.csv').loc[:, [ 'I', 'data_date', 'V', 'Fs2m', 'I1m', 'I2m', 'Sd', 'T0', 'V1m', 'Wd', 'Wv' ]] nbqData.fillna(method='ffill') # downsample for quick plot nbqSample = nbqData.sample(n=2000, replace='False') nbqSample = nbqSample.drop(['data_date'], axis=1) nbqSample['P'] = nbqSample['I'] * nbqSample['V'] nbqSample['P_cln'] = nbqSample['I1m'] * nbqSample['V1m'] nbqSample['P_syn'] = nbqSample['I2m'] * nbqSample['V1m'] nbqSample = nbqSample.drop(['I1m'], axis=1) nbqSample = nbqSample.drop(['I2m'], axis=1) nbqSample = nbqSample.drop(['V1m'], axis=1) nbqSample = nbqSample.drop(['I'], axis=1) nbqSample = nbqSample.drop(['V'], axis=1) nbqSample.fillna(method='backfill') nbqSample.fillna(0) nbqSample.corr().to_csv(corPath + nbqID + '.csv')
def cleaninglist(outPath): ''' get cleaning list for each inverter the number day of dust accumulation ''' #get date list dayList = tool.getDayList() #get cleaning list for each inverter nbqList = tool.getnbqList() df = pd.DataFrame(columns=nbqList) df['Date'] = dayList for idx, nbqname in enumerate(nbqList): #get cleaning record nbq_qx = pd.read_csv(qxjl) #find the inverter nbq_qx = nbq_qx[nbq_qx['nbqno'] == nbqname] # reorganize code and make a flow #get cleaning list for the invert cleaningList = [item[0:10] for item in nbq_qx['qxdate'].values] startIDX = 0 #calculate the number of dust accumulation for jdx, riqi in enumerate(cleaningList): #get end index endIDX = df[df.Date == riqi].index.tolist()[0] if (endIDX - startIDX) < 50: #set experience value, dustacclist = range(0, endIDX - startIDX + 1) df.loc[startIDX:endIDX, nbqname] = dustacclist else: # it is impossible that the dust accumulation interval is larger than 50 days. in pingyuan site dustacclist = [0] * (endIDX - startIDX + 1) df.loc[startIDX:endIDX, nbqname] = dustacclist #next cleaning event startIDX = endIDX #startDate = riqi #for the last cleaning record endIDX = len(dayList) - 1 dustacclist = range(0, endIDX - startIDX + 1) df.loc[startIDX:endIDX, nbqname] = dustacclist df.to_csv(outPath)
def correlationPM25(): ''' get the correlation of power of pm2.5 ''' #get pm2.5 data pm25_df = pd.read_csv(pmfile).loc[:, ['Date', 'PM2.5', 'PM10']] nbqList = tool.getnbqList() for idx, nbqID in enumerate(nbqList): print(idx, nbqID) nbqData = pd.read_csv(allpowerPath + nbqID + '.csv').loc[:, ['Date', 'P_true']] merged = pd.merge(pm25_df, nbqData) print(merged) print(merged.corr())
def pvalues(): ''' get the pvalues of power of all other factors, such as temperature, humidity, wind speed, wind direction ''' nbqList = tool.getnbqList() #pvalues list for 74 inverters and two panels in weather satation fs2m_es = np.zeros((invertnum + 2, 1)) sd_es = np.zeros((invertnum + 2, 1)) t0_es = np.zeros((invertnum + 2, 1)) Wd_es = np.zeros((invertnum + 2, 1)) Wv_es = np.zeros((invertnum + 2, 1)) es = [fs2m_es, sd_es, t0_es, Wd_es, Wv_es] nbq_List = [] #for all inverters for idx, nbqID in enumerate(nbqList): print(idx, nbqID) nbq_List.append(nbqID) nbqData = pd.read_csv(resPath + nbqID + '.csv').loc[:, [ 'I', 'data_date', 'V', 'Fs2m', 'I1m', 'I2m', 'Sd', 'T0', 'V1m', 'Wd', 'Wv' ]] nbqData.fillna(method='ffill') nbqSample = nbqData dateList = [item[0:10] for item in nbqSample['data_date'].values] nbqSample['Date'] = dateList nbqSample.set_index(['Date'], inplace=True) nbqSample = nbqSample.drop(['data_date'], axis=1) nbqSample['P'] = nbqSample['I'] * nbqSample['V'] nbqSample['P_cln'] = nbqSample['I1m'] * nbqSample['V1m'] nbqSample['P_syn'] = nbqSample['I2m'] * nbqSample['V1m'] nbqSample = nbqSample.drop(['I1m'], axis=1) nbqSample = nbqSample.drop(['I2m'], axis=1) nbqSample = nbqSample.drop(['V1m'], axis=1) nbqSample = nbqSample.drop(['I'], axis=1) nbqSample = nbqSample.drop(['V'], axis=1) nbqSample.fillna(method='backfill') nbqSample.fillna(0) #normalization #nbqSample.iloc[:,:] = nbqSample.iloc[:,:].apply(lambda x:(x-np.min(x))/(np.max(x)-np.min(x))) #factors factors = ['Fs2m', 'Sd', 'T0', 'Wd', 'Wv'] Fs2m = [] Sd = [] T0 = [] Wd = [] Wv = [] pvalues = [Fs2m, Sd, T0, Wd, Wv] #analysis daily pvalues dateList = np.unique(dateList) for zdx, item in enumerate(dateList): #get daily data tmp_df = nbqSample[nbqSample.index == item] #get daily pvalues for jdx, jtem in enumerate(factors): slope, intercept, r_value, p_value, std_err = stats.linregress( tmp_df[jtem], tmp_df.P) pvalues[jdx].append(p_value) #mean daily for i, item in enumerate(es): es[i][idx] = np.mean(pvalues[i]) #cal pvalues for weather station if idx == (invertnum - 1): # for the cleaning panel for jdx, jtem in enumerate(factors): slope, intercept, r_value, p_value, std_err = stats.linregress( tmp_df[jtem], tmp_df.P_cln) es[jdx][idx + 1] = p_value nbq_List.append('1cleaning Panel') # for the sync panel for jdx, jtem in enumerate(factors): slope, intercept, r_value, p_value, std_err = stats.linregress( tmp_df[jtem], tmp_df.P_syn) es[jdx][idx + 2] = p_value nbq_List.append('1sync Panel') #merge in dataframe and sort new_df = pd.DataFrame(data=nbq_List, columns=['nbqName']) new_df['nbqName'] = nbq_List new_df['Fs2m'] = fs2m_es new_df['Sd'] = sd_es new_df['T0'] = t0_es new_df['Wd'] = Wd_es new_df['Wv'] = Wv_es new_df['ls'] = new_df.Wv * 0.0 + 0.05 new_df = new_df.sort_values(by=['nbqName']) new_df.set_index(['nbqName'], inplace=True) #fill nan new_df.fillna(method='ffill', inplace=True) new_df.fillna(method='backfill', inplace=True) print(new_df) #plot plt.plot(new_df.index, new_df['Fs2m'], linewidth=2, label='Solar Radiation') plt.plot(new_df.index, new_df['Sd'], linewidth=2, label='Humidity') plt.plot(new_df.index, new_df['T0'], linewidth=2, label='Ambient Temperature') plt.plot(new_df.index, new_df['Wd'], linewidth=2, label='Wind Direction') plt.plot(new_df.index, new_df['Wv'], linewidth=2, label='Wind Speed') plt.plot(new_df.index, new_df['ls'], linewidth=2, label='Significance ( .05)') plt.ylabel('Daily p-values') plt.xlabel('No. of Inverters') plt.subplots_adjust(left=0.18, wspace=0.25, hspace=0.25, bottom=0.20, top=0.80) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.xticks(rotation=90, fontsize=4) plt.savefig(figPath + 'pvalues.png', dpi=300) plt.show()
def effectsize(): ''' get the effect size of power of all other factors, such as temperature, humidity, wind speed, wind direction ''' nbqList = tool.getnbqList() #pvalues list for 74 inverters and two panels in weather satation fs2m_es = np.zeros((invertnum + 2, 1)) sd_es = np.zeros((invertnum + 2, 1)) t0_es = np.zeros((invertnum + 2, 1)) Wd_es = np.zeros((invertnum + 2, 1)) Wv_es = np.zeros((invertnum + 2, 1)) es = [fs2m_es, sd_es, t0_es, Wd_es, Wv_es] nbq_List = [] #for all inverters for idx, nbqID in enumerate(nbqList): print(idx, nbqID) nbq_List.append(nbqID) nbqData = pd.read_csv(resPath + nbqID + '.csv').loc[:, [ 'I', 'data_date', 'V', 'Fs2m', 'I1m', 'I2m', 'Sd', 'T0', 'V1m', 'Wd', 'Wv' ]] nbqData.fillna(method='ffill') nbqSample = nbqData nbqSample = nbqSample.drop(['data_date'], axis=1) nbqSample['P'] = nbqSample['I'] * nbqSample['V'] nbqSample['P_cln'] = nbqSample['I1m'] * nbqSample['V1m'] nbqSample['P_syn'] = nbqSample['I2m'] * nbqSample['V1m'] nbqSample = nbqSample.drop(['I1m'], axis=1) nbqSample = nbqSample.drop(['I2m'], axis=1) nbqSample = nbqSample.drop(['V1m'], axis=1) nbqSample = nbqSample.drop(['I'], axis=1) nbqSample = nbqSample.drop(['V'], axis=1) nbqSample.fillna(method='backfill') nbqSample.fillna(0) #normalization nbqSample.iloc[:, :] = nbqSample.iloc[:, :].apply( lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))) #factors factors = ['Fs2m', 'Sd', 'T0', 'Wd', 'Wv'] for jdx, jtem in enumerate(factors): #slope, intercept, r_value, p_value, std_err = stats.linregress(nbqSample[jtem], nbqSample.P) #pvalues[jdx][idx] = "%.2f" %p_value cohens_d = abs(nbqSample[jtem].mean() - nbqSample.P.mean()) / ( sqrt((nbqSample[jtem].std()**2 + nbqSample.P.std()**2) / 2)) es[jdx][idx] = "%.2f" % cohens_d #get the effect size in weather station print(idx) # for the cleaning panel for jdx, jtem in enumerate(factors): cohens_d = abs(nbqSample[jtem].mean() - nbqSample.P_cln.mean()) / ( sqrt((nbqSample[jtem].std()**2 + nbqSample.P_cln.std()**2) / 2)) es[jdx][idx + 1] = "%.2f" % cohens_d nbq_List.append('1cleaning Panel') # for the sync panel for jdx, jtem in enumerate(factors): cohens_d = abs(nbqSample[jtem].mean() - nbqSample.P_syn.mean()) / ( sqrt((nbqSample[jtem].std()**2 + nbqSample.P_syn.std()**2) / 2)) es[jdx][idx + 2] = "%.2f" % cohens_d nbq_List.append('1sync Panel') #merge in dataframe and sort new_df = pd.DataFrame(data=nbq_List, columns=['nbqName']) print(len(nbq_List)) print(len(fs2m_es)) new_df['nbqName'] = nbq_List new_df['Fs2m'] = fs2m_es new_df['Sd'] = sd_es new_df['T0'] = t0_es new_df['Wd'] = Wd_es new_df['Wv'] = Wv_es new_df['boudary'] = new_df['Wv'] * 0.0 + 0.2 new_df = new_df.sort_values(by=['nbqName']) new_df.set_index(['nbqName'], inplace=True) #plot plt.plot(new_df.index, new_df['Fs2m'], linewidth=2, label='Solar Radiation') plt.plot(new_df.index, new_df['Sd'], linewidth=2, label='Humidity') plt.plot(new_df.index, new_df['T0'], linewidth=2, label='Ambient Temperature') plt.plot(new_df.index, new_df['Wd'], linewidth=2, label='Wind Direction') plt.plot(new_df.index, new_df['Wv'], linewidth=2, label='Wind Speed') plt.plot(new_df.index, new_df['boudary'], linewidth=2, label='Small (.2)') plt.ylabel('Effect Size with Power Output') plt.xlabel('No. of Inverters') plt.subplots_adjust(left=0.18, wspace=0.25, hspace=0.25, bottom=0.20, top=0.80) plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.xticks(rotation=90, fontsize=4) plt.savefig(figPath + 'es.png', dpi=300) plt.show()
flist = glob.glob(nbqDataPath + '*.csv') for f in flist: nbqname = os.path.basename(f) nbqData = pd.read_csv(f, delimiter=',') nbqData = nbqData.fillna(method='ffill') soil_model_inverter.extractSlopeFea(nbqData, nbqname) #soil_model_inverter.extractMultiSlopeFea(nbqData,nbqname) #get soiling rate if getSoilRate == True: soiling_rate.countdust(singleslopePath, method='single') #soiling_rate.countdust(MultiSlopePath,method = 'multi') #evaluation, cal mean relative power loss if eva == True: allnbq = tool.getnbqList() for idx, nbq in enumerate(allnbq): #evaluation.getpowers(siglepowers,nbq) evaluation.getmultipowers(multiPowers, nbq) evaluation.MRE(siglepowers, method='single') #evaluation.MRE(multiPowers,method = 'multi') if VerWS == True: #extract weather sataion data form either inverter wsPath = nbqDataPath + 'S01-NBA.csv' wsData = pd.read_csv(wsPath, delimiter=',') #for daily clean panle #soil_model_inverter.extractWSSlopeFea(wsData,field = 'P_cln') #soil_model_inverter.extractWSSlopeFea(wsData,field = 'P_syn') #get soiling rate
def cmpPowerLoss(): ''' compare the power loss before cleaning and after cleaning for each inverter ''' #get nbq list nbqList = tool.getnbqList() mean_list = np.zeros((invertnum, 1)) var_list = np.zeros((invertnum, 1)) for idx, item in enumerate(nbqList): #creating slopes after each cleaning filename = '' flist = glob.glob(powerPath + '*.csv') for f in flist: filename = os.path.basename(f)[0:7] if filename == item: break nbq_df = pd.read_csv(f).loc[:, ['Date', 'P_true', 'Pe_slope']] #get cleaning list for the inverter nbq_qx = pd.read_csv(qxjl) nbq_qx = nbq_qx[nbq_qx['nbqno'] == item] nbq_qx['qxdate'] = pd.to_datetime(nbq_qx['qxdate'], format='%Y-%m-%d') nbq_qx = nbq_qx[(nbq_qx['qxdate'] >= '2016-01-01') & (nbq_qx['qxdate'] < '2017-01-01')] qxList = nbq_qx['qxdate'].tolist() qxList = [riqi.strftime("%Y-%m-%d") for riqi in qxList] #Creating cleaning list dictionay #startDate = nbq_df.at[0,'Date'] startIDX = 0 for riqi in qxList: endIDX = nbq_df[nbq_df.Date == riqi].index.tolist()[0] #relative power loss standard = nbq_df.at[startIDX, 'Pe_slope'] nbq_df.loc[startIDX:endIDX, 'powerloss'] = ( nbq_df.loc[startIDX:endIDX, 'Pe_slope'] - nbq_df.loc[startIDX:endIDX, 'P_true']) / standard #/standard #next cleaning event startIDX = endIDX #startDate = riqi nbq_df['powerloss'], upper_quartile = tool.removeOutliers( nbq_df.powerloss, 1.5) nbq_df['avg_powerloss'] = nbq_df['powerloss'].mean() mean_list[idx] = nbq_df['powerloss'].mean() var_list[idx] = nbq_df['powerloss'].var() #nbq_df['powerloss'] = nbq_df['powerloss'].rolling(window=5,center=True).median() n_clean = [0.0] * nbq_qx.qxdate.shape[0] nbq_df['Date'] = pd.to_datetime(nbq_df['Date'], format='%Y-%m-%d') nbq_df.set_index(['Date'], inplace=True) nbq_qx.set_index(['qxdate'], inplace=True) #plot for each inverter # plt.plot(nbq_qx.index, n_clean, 'x', label='Clean Event') # plt.plot(nbq_df.index, nbq_df.powerloss,label='Power loss at a cleaning interval') # plt.plot(nbq_df.index, nbq_df.avg_powerloss, label='Mean Relative Power Loss = 5.7%') # plt.ylabel('Relative Power Loss') # plt.xlabel('Date') # plt.legend() # plt.savefig(figPath + filename + '_powerloss.png', dpi=300) # plt.show() # plt.close() #plot for all inverters print(mean_list) plt.plot(mean_list, label='Mean Relative Power Loss') plt.plot(var_list, label='Variance of Relative Power Loss') plt.ylabel('Values') plt.xlabel('No. of Inverters') plt.legend() plt.savefig(figPath + 'powerloss_allinverters.png', dpi=300) plt.show() plt.close()
def cmpCleanBeforeAndAfter(slopePath): ''' compare the slopes before cleaning and after cleaning for each inverter ''' #get nbq list nbqList = tool.getnbqList() for idx, item in enumerate(nbqList): #creating slopes after each cleaning pr = {} prsd = {} prwv = {} prwd = {} prt0 = {} #get slopes list for the inverter filename = '' flist = glob.glob(slopePath + '*.csv') for f in flist: filename = os.path.basename(f)[0:7] if filename == item: break slope_df = pd.read_csv(f) #normalized slopes and remove outliers slope_pr = slope_df.Pr slope_prsd = slope_df.PrSd slope_prwv = slope_df.PrWv slope_prwd = slope_df.PrWd slope_prt0 = slope_df.PrT0 med_slope_pr, upper = tool.removeOutliers(slope_pr, 1) med_slope_prsd, upper = tool.removeOutliers(slope_prsd, 1) med_slope_prwv, upper = tool.removeOutliers(slope_prwv, 1) med_slope_prwd, upper = tool.removeOutliers(slope_prwd, 1) med_slope_prt0, upper = tool.removeOutliers(slope_prt0, 1) #medfile using 7 days filterday = 7 med_slope_pr = medfilt(med_slope_pr, filterday) med_slope_prsd = medfilt(med_slope_prsd, filterday) med_slope_prwv = medfilt(med_slope_prwv, filterday) med_slope_prwd = medfilt(med_slope_prwd, filterday) med_slope_prt0 = medfilt(med_slope_prt0, filterday) slope_df['smoothedSlopePr'] = med_slope_pr slope_df['smoothedSlopePrsd'] = med_slope_prsd slope_df['smoothedSlopePrwv'] = med_slope_prwv slope_df['smoothedSlopePrwd'] = med_slope_prwd slope_df['smoothedSlopePrt0'] = med_slope_prt0 #get cleaning list for the inverter nbq_qx = pd.read_csv(qxjl) nbq_qx = nbq_qx[nbq_qx['nbqno'] == item] nbq_qx['qxdate'] = pd.to_datetime(nbq_qx['qxdate'], format='%Y-%m-%d') #qxList = time.strftime('%Y-%m-%d',nbq_qx['qxdate']).tolist() qxList = nbq_qx['qxdate'].tolist() qxList = [riqi.strftime("%Y-%m-%d") for riqi in qxList] #Creating slopes dictionay startDate = slope_df.at[0, 'data_date'] startIDX = 0 for riqi in qxList: endIDX = slope_df[slope_df.data_date == riqi].index.tolist()[0] #original slopes #cmpdict[startDate] = slope_df.loc[startIDX:endIDX,'Pr'] #slopes with no noise pr[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePr'] prsd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrsd'] prwv[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrwv'] prwd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrwd'] prt0[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrt0'] #plot plt.figure(1) plt.plot(pr[startDate], label='Slopes at a cleaning interval') plt.figure(2) plt.plot(prsd[startDate], label='Slopes at a cleaning interval') plt.figure(3) plt.plot(prwv[startDate], label='Slopes at a cleaning interval') plt.figure(4) plt.plot(prwd[startDate], label='Slopes at a cleaning interval') plt.figure(5) plt.plot(prt0[startDate], label='Slopes at a cleaning interval') #next cleaning event startIDX = endIDX startDate = riqi #adding the last cleaning event pr[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePr'] prsd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrsd'] prwv[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrwv'] prwd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrwd'] prwd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrt0'] #plot #plt.ylabel('Slope of solar radiation') plt.xlabel('TimeStamp (Day)') plt.savefig(figPath + filename + '_multislopesVar.png', dpi=300) plt.show() plt.close() break