def correlation():
    '''
    get the correlation of power of all other factors, such as temperature, humidity, wind speed, wind direction
    
    '''
    nbqList = tool.getnbqList()
    # hlxList = map(str, hlxList)

    for idx, nbqID in enumerate(nbqList):
        print(idx, nbqID)
        nbqData = pd.read_csv(resPath + nbqID + '.csv').loc[:, [
            'I', 'data_date', 'V', 'Fs2m', 'I1m', 'I2m', 'Sd', 'T0', 'V1m',
            'Wd', 'Wv'
        ]]
        nbqData.fillna(method='ffill')

        # downsample for quick plot
        nbqSample = nbqData.sample(n=2000, replace='False')
        nbqSample = nbqSample.drop(['data_date'], axis=1)
        nbqSample['P'] = nbqSample['I'] * nbqSample['V']
        nbqSample['P_cln'] = nbqSample['I1m'] * nbqSample['V1m']
        nbqSample['P_syn'] = nbqSample['I2m'] * nbqSample['V1m']
        nbqSample = nbqSample.drop(['I1m'], axis=1)
        nbqSample = nbqSample.drop(['I2m'], axis=1)
        nbqSample = nbqSample.drop(['V1m'], axis=1)
        nbqSample = nbqSample.drop(['I'], axis=1)
        nbqSample = nbqSample.drop(['V'], axis=1)
        nbqSample.fillna(method='backfill')
        nbqSample.fillna(0)

        nbqSample.corr().to_csv(corPath + nbqID + '.csv')
예제 #2
0
def cleaninglist(outPath):
    '''
    get cleaning list for each inverter
    the number day of dust accumulation
    
    '''
    #get date list
    dayList = tool.getDayList()

    #get cleaning list for each inverter
    nbqList = tool.getnbqList()

    df = pd.DataFrame(columns=nbqList)
    df['Date'] = dayList

    for idx, nbqname in enumerate(nbqList):
        #get cleaning record
        nbq_qx = pd.read_csv(qxjl)
        #find the inverter
        nbq_qx = nbq_qx[nbq_qx['nbqno'] ==
                        nbqname]  # reorganize code and make a flow

        #get cleaning list for the invert
        cleaningList = [item[0:10] for item in nbq_qx['qxdate'].values]

        startIDX = 0
        #calculate the number of dust accumulation
        for jdx, riqi in enumerate(cleaningList):

            #get end index
            endIDX = df[df.Date == riqi].index.tolist()[0]

            if (endIDX - startIDX) < 50:  #set experience value,
                dustacclist = range(0, endIDX - startIDX + 1)
                df.loc[startIDX:endIDX, nbqname] = dustacclist
            else:  # it is impossible that the dust accumulation interval is larger than 50 days. in pingyuan site
                dustacclist = [0] * (endIDX - startIDX + 1)
                df.loc[startIDX:endIDX, nbqname] = dustacclist

            #next cleaning event
            startIDX = endIDX
            #startDate = riqi

        #for the last cleaning record
        endIDX = len(dayList) - 1
        dustacclist = range(0, endIDX - startIDX + 1)
        df.loc[startIDX:endIDX, nbqname] = dustacclist
    df.to_csv(outPath)
def correlationPM25():
    '''
    get the correlation of power of  pm2.5
    
    '''
    #get pm2.5 data
    pm25_df = pd.read_csv(pmfile).loc[:, ['Date', 'PM2.5', 'PM10']]
    nbqList = tool.getnbqList()

    for idx, nbqID in enumerate(nbqList):
        print(idx, nbqID)
        nbqData = pd.read_csv(allpowerPath + nbqID +
                              '.csv').loc[:, ['Date', 'P_true']]

        merged = pd.merge(pm25_df, nbqData)
        print(merged)

        print(merged.corr())
def pvalues():
    '''
    get the pvalues of power of all other factors, such as temperature, humidity, wind speed, wind direction
    
    '''
    nbqList = tool.getnbqList()

    #pvalues list for 74 inverters and two panels in weather satation

    fs2m_es = np.zeros((invertnum + 2, 1))
    sd_es = np.zeros((invertnum + 2, 1))
    t0_es = np.zeros((invertnum + 2, 1))
    Wd_es = np.zeros((invertnum + 2, 1))
    Wv_es = np.zeros((invertnum + 2, 1))
    es = [fs2m_es, sd_es, t0_es, Wd_es, Wv_es]
    nbq_List = []

    #for all inverters
    for idx, nbqID in enumerate(nbqList):
        print(idx, nbqID)
        nbq_List.append(nbqID)
        nbqData = pd.read_csv(resPath + nbqID + '.csv').loc[:, [
            'I', 'data_date', 'V', 'Fs2m', 'I1m', 'I2m', 'Sd', 'T0', 'V1m',
            'Wd', 'Wv'
        ]]
        nbqData.fillna(method='ffill')

        nbqSample = nbqData
        dateList = [item[0:10] for item in nbqSample['data_date'].values]
        nbqSample['Date'] = dateList
        nbqSample.set_index(['Date'], inplace=True)
        nbqSample = nbqSample.drop(['data_date'], axis=1)

        nbqSample['P'] = nbqSample['I'] * nbqSample['V']
        nbqSample['P_cln'] = nbqSample['I1m'] * nbqSample['V1m']
        nbqSample['P_syn'] = nbqSample['I2m'] * nbqSample['V1m']
        nbqSample = nbqSample.drop(['I1m'], axis=1)
        nbqSample = nbqSample.drop(['I2m'], axis=1)
        nbqSample = nbqSample.drop(['V1m'], axis=1)
        nbqSample = nbqSample.drop(['I'], axis=1)
        nbqSample = nbqSample.drop(['V'], axis=1)
        nbqSample.fillna(method='backfill')
        nbqSample.fillna(0)
        #normalization
        #nbqSample.iloc[:,:] = nbqSample.iloc[:,:].apply(lambda x:(x-np.min(x))/(np.max(x)-np.min(x)))

        #factors
        factors = ['Fs2m', 'Sd', 'T0', 'Wd', 'Wv']
        Fs2m = []
        Sd = []
        T0 = []
        Wd = []
        Wv = []
        pvalues = [Fs2m, Sd, T0, Wd, Wv]
        #analysis daily pvalues
        dateList = np.unique(dateList)
        for zdx, item in enumerate(dateList):
            #get daily data
            tmp_df = nbqSample[nbqSample.index == item]
            #get daily pvalues
            for jdx, jtem in enumerate(factors):
                slope, intercept, r_value, p_value, std_err = stats.linregress(
                    tmp_df[jtem], tmp_df.P)
                pvalues[jdx].append(p_value)
        #mean daily
        for i, item in enumerate(es):
            es[i][idx] = np.mean(pvalues[i])

        #cal pvalues for weather station
        if idx == (invertnum - 1):
            # for the cleaning panel
            for jdx, jtem in enumerate(factors):
                slope, intercept, r_value, p_value, std_err = stats.linregress(
                    tmp_df[jtem], tmp_df.P_cln)
                es[jdx][idx + 1] = p_value
            nbq_List.append('1cleaning Panel')
            # for the sync panel
            for jdx, jtem in enumerate(factors):
                slope, intercept, r_value, p_value, std_err = stats.linregress(
                    tmp_df[jtem], tmp_df.P_syn)
                es[jdx][idx + 2] = p_value
            nbq_List.append('1sync Panel')

    #merge in dataframe and sort
    new_df = pd.DataFrame(data=nbq_List, columns=['nbqName'])
    new_df['nbqName'] = nbq_List
    new_df['Fs2m'] = fs2m_es
    new_df['Sd'] = sd_es
    new_df['T0'] = t0_es
    new_df['Wd'] = Wd_es
    new_df['Wv'] = Wv_es
    new_df['ls'] = new_df.Wv * 0.0 + 0.05
    new_df = new_df.sort_values(by=['nbqName'])
    new_df.set_index(['nbqName'], inplace=True)

    #fill nan
    new_df.fillna(method='ffill', inplace=True)
    new_df.fillna(method='backfill', inplace=True)
    print(new_df)
    #plot

    plt.plot(new_df.index,
             new_df['Fs2m'],
             linewidth=2,
             label='Solar Radiation')
    plt.plot(new_df.index, new_df['Sd'], linewidth=2, label='Humidity')
    plt.plot(new_df.index,
             new_df['T0'],
             linewidth=2,
             label='Ambient Temperature')
    plt.plot(new_df.index, new_df['Wd'], linewidth=2, label='Wind Direction')
    plt.plot(new_df.index, new_df['Wv'], linewidth=2, label='Wind Speed')
    plt.plot(new_df.index,
             new_df['ls'],
             linewidth=2,
             label='Significance ( .05)')

    plt.ylabel('Daily p-values')
    plt.xlabel('No. of Inverters')
    plt.subplots_adjust(left=0.18,
                        wspace=0.25,
                        hspace=0.25,
                        bottom=0.20,
                        top=0.80)
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
               loc=3,
               ncol=2,
               mode="expand",
               borderaxespad=0.)
    plt.xticks(rotation=90, fontsize=4)
    plt.savefig(figPath + 'pvalues.png', dpi=300)

    plt.show()
def effectsize():
    '''
    get the effect size  of power of all other factors, such as temperature, humidity, wind speed, wind direction
    
    '''
    nbqList = tool.getnbqList()

    #pvalues list for 74 inverters and two panels in weather satation

    fs2m_es = np.zeros((invertnum + 2, 1))
    sd_es = np.zeros((invertnum + 2, 1))
    t0_es = np.zeros((invertnum + 2, 1))
    Wd_es = np.zeros((invertnum + 2, 1))
    Wv_es = np.zeros((invertnum + 2, 1))
    es = [fs2m_es, sd_es, t0_es, Wd_es, Wv_es]
    nbq_List = []

    #for all inverters
    for idx, nbqID in enumerate(nbqList):
        print(idx, nbqID)
        nbq_List.append(nbqID)
        nbqData = pd.read_csv(resPath + nbqID + '.csv').loc[:, [
            'I', 'data_date', 'V', 'Fs2m', 'I1m', 'I2m', 'Sd', 'T0', 'V1m',
            'Wd', 'Wv'
        ]]
        nbqData.fillna(method='ffill')

        nbqSample = nbqData
        nbqSample = nbqSample.drop(['data_date'], axis=1)
        nbqSample['P'] = nbqSample['I'] * nbqSample['V']
        nbqSample['P_cln'] = nbqSample['I1m'] * nbqSample['V1m']
        nbqSample['P_syn'] = nbqSample['I2m'] * nbqSample['V1m']
        nbqSample = nbqSample.drop(['I1m'], axis=1)
        nbqSample = nbqSample.drop(['I2m'], axis=1)
        nbqSample = nbqSample.drop(['V1m'], axis=1)
        nbqSample = nbqSample.drop(['I'], axis=1)
        nbqSample = nbqSample.drop(['V'], axis=1)
        nbqSample.fillna(method='backfill')
        nbqSample.fillna(0)
        #normalization
        nbqSample.iloc[:, :] = nbqSample.iloc[:, :].apply(
            lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)))

        #factors
        factors = ['Fs2m', 'Sd', 'T0', 'Wd', 'Wv']

        for jdx, jtem in enumerate(factors):
            #slope, intercept, r_value, p_value, std_err = stats.linregress(nbqSample[jtem], nbqSample.P)
            #pvalues[jdx][idx] = "%.2f" %p_value
            cohens_d = abs(nbqSample[jtem].mean() - nbqSample.P.mean()) / (
                sqrt((nbqSample[jtem].std()**2 + nbqSample.P.std()**2) / 2))
            es[jdx][idx] = "%.2f" % cohens_d

    #get the effect size in weather station
    print(idx)
    # for the cleaning panel
    for jdx, jtem in enumerate(factors):
        cohens_d = abs(nbqSample[jtem].mean() - nbqSample.P_cln.mean()) / (
            sqrt((nbqSample[jtem].std()**2 + nbqSample.P_cln.std()**2) / 2))
        es[jdx][idx + 1] = "%.2f" % cohens_d
    nbq_List.append('1cleaning Panel')
    # for the sync panel
    for jdx, jtem in enumerate(factors):
        cohens_d = abs(nbqSample[jtem].mean() - nbqSample.P_syn.mean()) / (
            sqrt((nbqSample[jtem].std()**2 + nbqSample.P_syn.std()**2) / 2))
        es[jdx][idx + 2] = "%.2f" % cohens_d
    nbq_List.append('1sync Panel')

    #merge in dataframe and sort
    new_df = pd.DataFrame(data=nbq_List, columns=['nbqName'])
    print(len(nbq_List))
    print(len(fs2m_es))
    new_df['nbqName'] = nbq_List
    new_df['Fs2m'] = fs2m_es
    new_df['Sd'] = sd_es
    new_df['T0'] = t0_es
    new_df['Wd'] = Wd_es
    new_df['Wv'] = Wv_es
    new_df['boudary'] = new_df['Wv'] * 0.0 + 0.2
    new_df = new_df.sort_values(by=['nbqName'])
    new_df.set_index(['nbqName'], inplace=True)

    #plot
    plt.plot(new_df.index,
             new_df['Fs2m'],
             linewidth=2,
             label='Solar Radiation')
    plt.plot(new_df.index, new_df['Sd'], linewidth=2, label='Humidity')
    plt.plot(new_df.index,
             new_df['T0'],
             linewidth=2,
             label='Ambient Temperature')
    plt.plot(new_df.index, new_df['Wd'], linewidth=2, label='Wind Direction')
    plt.plot(new_df.index, new_df['Wv'], linewidth=2, label='Wind Speed')
    plt.plot(new_df.index, new_df['boudary'], linewidth=2, label='Small (.2)')

    plt.ylabel('Effect Size with Power Output')
    plt.xlabel('No. of Inverters')
    plt.subplots_adjust(left=0.18,
                        wspace=0.25,
                        hspace=0.25,
                        bottom=0.20,
                        top=0.80)
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
               loc=3,
               ncol=2,
               mode="expand",
               borderaxespad=0.)
    plt.xticks(rotation=90, fontsize=4)
    plt.savefig(figPath + 'es.png', dpi=300)

    plt.show()
예제 #6
0
        flist = glob.glob(nbqDataPath + '*.csv')
        for f in flist:
            nbqname = os.path.basename(f)
            nbqData = pd.read_csv(f, delimiter=',')
            nbqData = nbqData.fillna(method='ffill')
            soil_model_inverter.extractSlopeFea(nbqData, nbqname)
            #soil_model_inverter.extractMultiSlopeFea(nbqData,nbqname)

    #get soiling rate
    if getSoilRate == True:
        soiling_rate.countdust(singleslopePath, method='single')
        #soiling_rate.countdust(MultiSlopePath,method = 'multi')

    #evaluation, cal mean relative power loss
    if eva == True:
        allnbq = tool.getnbqList()
        for idx, nbq in enumerate(allnbq):
            #evaluation.getpowers(siglepowers,nbq)
            evaluation.getmultipowers(multiPowers, nbq)
        evaluation.MRE(siglepowers, method='single')
        #evaluation.MRE(multiPowers,method = 'multi')
    if VerWS == True:

        #extract weather sataion data form either inverter
        wsPath = nbqDataPath + 'S01-NBA.csv'
        wsData = pd.read_csv(wsPath, delimiter=',')

        #for daily clean panle
        #soil_model_inverter.extractWSSlopeFea(wsData,field = 'P_cln')
        #soil_model_inverter.extractWSSlopeFea(wsData,field = 'P_syn')
        #get soiling rate
def cmpPowerLoss():
    '''
    compare the power loss  before cleaning and after cleaning for each inverter

    '''
    #get nbq list
    nbqList = tool.getnbqList()
    mean_list = np.zeros((invertnum, 1))
    var_list = np.zeros((invertnum, 1))
    for idx, item in enumerate(nbqList):
        #creating slopes after each cleaning

        filename = ''
        flist = glob.glob(powerPath + '*.csv')
        for f in flist:
            filename = os.path.basename(f)[0:7]
            if filename == item:
                break
        nbq_df = pd.read_csv(f).loc[:, ['Date', 'P_true', 'Pe_slope']]

        #get cleaning list for the inverter
        nbq_qx = pd.read_csv(qxjl)
        nbq_qx = nbq_qx[nbq_qx['nbqno'] == item]
        nbq_qx['qxdate'] = pd.to_datetime(nbq_qx['qxdate'], format='%Y-%m-%d')

        nbq_qx = nbq_qx[(nbq_qx['qxdate'] >= '2016-01-01')
                        & (nbq_qx['qxdate'] < '2017-01-01')]
        qxList = nbq_qx['qxdate'].tolist()
        qxList = [riqi.strftime("%Y-%m-%d") for riqi in qxList]

        #Creating cleaning list  dictionay
        #startDate = nbq_df.at[0,'Date']
        startIDX = 0

        for riqi in qxList:

            endIDX = nbq_df[nbq_df.Date == riqi].index.tolist()[0]

            #relative power loss
            standard = nbq_df.at[startIDX, 'Pe_slope']

            nbq_df.loc[startIDX:endIDX, 'powerloss'] = (
                nbq_df.loc[startIDX:endIDX, 'Pe_slope'] -
                nbq_df.loc[startIDX:endIDX, 'P_true']) / standard
            #/standard
            #next cleaning event
            startIDX = endIDX
            #startDate = riqi

        nbq_df['powerloss'], upper_quartile = tool.removeOutliers(
            nbq_df.powerloss, 1.5)
        nbq_df['avg_powerloss'] = nbq_df['powerloss'].mean()
        mean_list[idx] = nbq_df['powerloss'].mean()
        var_list[idx] = nbq_df['powerloss'].var()

        #nbq_df['powerloss'] = nbq_df['powerloss'].rolling(window=5,center=True).median()
        n_clean = [0.0] * nbq_qx.qxdate.shape[0]

        nbq_df['Date'] = pd.to_datetime(nbq_df['Date'], format='%Y-%m-%d')
        nbq_df.set_index(['Date'], inplace=True)
        nbq_qx.set_index(['qxdate'], inplace=True)

        #plot for each inverter


#        plt.plot(nbq_qx.index, n_clean, 'x', label='Clean Event')
#        plt.plot(nbq_df.index, nbq_df.powerloss,label='Power loss at a cleaning interval')
#        plt.plot(nbq_df.index, nbq_df.avg_powerloss, label='Mean Relative Power Loss = 5.7%')
#        plt.ylabel('Relative Power Loss')
#        plt.xlabel('Date')
#        plt.legend()
#        plt.savefig(figPath + filename + '_powerloss.png', dpi=300)
#        plt.show()
#        plt.close()

#plot for all inverters
    print(mean_list)
    plt.plot(mean_list, label='Mean Relative Power Loss')
    plt.plot(var_list, label='Variance of Relative Power Loss')
    plt.ylabel('Values')
    plt.xlabel('No. of Inverters')
    plt.legend()
    plt.savefig(figPath + 'powerloss_allinverters.png', dpi=300)
    plt.show()
    plt.close()
def cmpCleanBeforeAndAfter(slopePath):
    '''
    compare the slopes before cleaning and after cleaning for each inverter

    '''
    #get nbq list
    nbqList = tool.getnbqList()
    for idx, item in enumerate(nbqList):
        #creating slopes after each cleaning
        pr = {}
        prsd = {}
        prwv = {}
        prwd = {}
        prt0 = {}

        #get slopes list for the inverter
        filename = ''
        flist = glob.glob(slopePath + '*.csv')
        for f in flist:
            filename = os.path.basename(f)[0:7]
            if filename == item:
                break
        slope_df = pd.read_csv(f)

        #normalized slopes and remove outliers
        slope_pr = slope_df.Pr
        slope_prsd = slope_df.PrSd
        slope_prwv = slope_df.PrWv
        slope_prwd = slope_df.PrWd
        slope_prt0 = slope_df.PrT0

        med_slope_pr, upper = tool.removeOutliers(slope_pr, 1)
        med_slope_prsd, upper = tool.removeOutliers(slope_prsd, 1)
        med_slope_prwv, upper = tool.removeOutliers(slope_prwv, 1)
        med_slope_prwd, upper = tool.removeOutliers(slope_prwd, 1)
        med_slope_prt0, upper = tool.removeOutliers(slope_prt0, 1)

        #medfile using 7 days
        filterday = 7
        med_slope_pr = medfilt(med_slope_pr, filterday)
        med_slope_prsd = medfilt(med_slope_prsd, filterday)
        med_slope_prwv = medfilt(med_slope_prwv, filterday)
        med_slope_prwd = medfilt(med_slope_prwd, filterday)
        med_slope_prt0 = medfilt(med_slope_prt0, filterday)

        slope_df['smoothedSlopePr'] = med_slope_pr
        slope_df['smoothedSlopePrsd'] = med_slope_prsd
        slope_df['smoothedSlopePrwv'] = med_slope_prwv
        slope_df['smoothedSlopePrwd'] = med_slope_prwd
        slope_df['smoothedSlopePrt0'] = med_slope_prt0

        #get cleaning list for the inverter
        nbq_qx = pd.read_csv(qxjl)
        nbq_qx = nbq_qx[nbq_qx['nbqno'] == item]
        nbq_qx['qxdate'] = pd.to_datetime(nbq_qx['qxdate'], format='%Y-%m-%d')

        #qxList = time.strftime('%Y-%m-%d',nbq_qx['qxdate']).tolist()
        qxList = nbq_qx['qxdate'].tolist()
        qxList = [riqi.strftime("%Y-%m-%d") for riqi in qxList]

        #Creating slopes dictionay
        startDate = slope_df.at[0, 'data_date']
        startIDX = 0

        for riqi in qxList:
            endIDX = slope_df[slope_df.data_date == riqi].index.tolist()[0]
            #original slopes
            #cmpdict[startDate] = slope_df.loc[startIDX:endIDX,'Pr']

            #slopes with no noise
            pr[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePr']
            prsd[startDate] = slope_df.loc[startIDX:endIDX,
                                           'smoothedSlopePrsd']
            prwv[startDate] = slope_df.loc[startIDX:endIDX,
                                           'smoothedSlopePrwv']
            prwd[startDate] = slope_df.loc[startIDX:endIDX,
                                           'smoothedSlopePrwd']
            prt0[startDate] = slope_df.loc[startIDX:endIDX,
                                           'smoothedSlopePrt0']

            #plot
            plt.figure(1)
            plt.plot(pr[startDate], label='Slopes at a cleaning interval')
            plt.figure(2)

            plt.plot(prsd[startDate], label='Slopes at a cleaning interval')
            plt.figure(3)
            plt.plot(prwv[startDate], label='Slopes at a cleaning interval')
            plt.figure(4)

            plt.plot(prwd[startDate], label='Slopes at a cleaning interval')
            plt.figure(5)

            plt.plot(prt0[startDate], label='Slopes at a cleaning interval')

            #next cleaning event
            startIDX = endIDX
            startDate = riqi

        #adding the last cleaning event
        pr[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePr']
        prsd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrsd']
        prwv[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrwv']
        prwd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrwd']
        prwd[startDate] = slope_df.loc[startIDX:endIDX, 'smoothedSlopePrt0']

        #plot

        #plt.ylabel('Slope of solar radiation')
        plt.xlabel('TimeStamp (Day)')

        plt.savefig(figPath + filename + '_multislopesVar.png', dpi=300)
        plt.show()
        plt.close()

        break