Exemple #1
0
def skill_score_murphy(predicted,reference):
    '''
    Calculate non-dimensional skill score (SS) between two variables using
    definition of Murphy (1988)

    Calculates the non-dimensional skill score (SS) difference between two
    variables PREDICTED and REFERENCE. The skill score is calculated using
    the formula:

    SS = 1 - RMSE^2/SDEV^2

    where RMSE is the root-mean-squre error between the predicted and
    reference values

    (RMSE)^2 = sum_(n=1)^N (p_n - r_n)^2/N

    and SDEV is the standard deviation of the reference values

    SDEV^2 = sum_(n=1)^N [r_n - mean(r)]^2/(N-1)

    where p is the predicted values, r is the reference values, and
    N is the total number of values in p & r. Note that p & r must
    have the same number of values.

    Input:
    PREDICTED : predicted field
    REFERENCE : reference field

    Output:
    SS : skill score

    Reference:
    Allan H. Murphy, 1988: Skill Scores Based on the Mean Square Error
    and Their Relationships to the Correlation Coefficient. Mon. Wea.
    Rev., 116, 2417-2424.
    doi: http//dx.doi.org/10.1175/1520-0493(1988)<2417:SSBOTM>2.0.CO;2

    Author: Peter A. Rochford
        Symplectic, LLC
        www.thesymplectic.com
        [email protected]

    Created on Dec 7, 2016
    '''

    utils.check_arrays(predicted, reference)

    # Calculate RMSE
    rmse2 = rmsd(predicted,reference)**2

    # Calculate standard deviation
    sdev2 = np.std(reference,ddof=1)**2

    #% Calculate skill score
    ss = 1 - rmse2/sdev2

    return ss
def taylorPlot():
    obsCol = df.iloc[:, iObs]
    stds = []
    rmses = []
    coefs = []
    labels = []
    stds.append(obsCol.std())
    rmses.append(0)
    coefs.append(1)
    labels.append('Fluxnet')
    for i in range(colN):
        if i != iObs:
            simLabel = df.columns[i]
            thisDF = df.dropna(subset=[simLabel, df.columns[iObs]])
            simCol = thisDF[simLabel]
            obsCol = thisDF[df.columns[iObs]]

            if simCol.any():
                std = simCol.std()
                rmse = sm.rmsd(simCol, obsCol)
                coef = np.corrcoef(simCol, obsCol)[0, 1]

                stds.append(std)
                rmses.append(rmse)
                coefs.append(coef)
                labels.append(simLabel)
            else:
                pass
        else:
            pass

    sm.taylor_diagram(np.array(stds),
                      np.array(rmses),
                      np.array(coefs),
                      markerLabel=labels,
                      markerLabelColor='r',
                      markerSize=6,
                      markerLegend='on',
                      colOBS='g',
                      styleOBS='-',
                      markerobs='o',
                      showlabelsRMS='on',
                      titleRMS='on',
                      titleOBS='Fluxnet',
                      rmslabelformat=':.1f')
    plt.title(metricName,
              y=1.06,
              fontsize='large',
              loc='center',
              horizontalalignment='center')
    plt.savefig(argv['outputPath'])
    plt.close('all')
def getStatisticalIndex():
    result = {
        'means': [],
        'stds': [],
        'rmses': [],
        'coefs': [],
        'nses': [],
        'r2s': [],
        'labels': []
    }
    for i in range(colN):
        if i != iObs:
            simLabel = df.columns[i]
            thisDF = df.dropna(subset=[simLabel, df.columns[iObs]])
            simCol = thisDF[simLabel]
            obsCol = thisDF[df.columns[iObs]]

            std = simCol.std()
            mean = simCol.mean()
            if simCol.any():
                rmse = sm.rmsd(simCol, obsCol)
                coef = np.corrcoef(simCol, obsCol)[0, 1]
                nse = 1 - sum((simCol - obsCol)**2) / sum(
                    (obsCol - obsCol.mean())**2)
                r2 = coef**2
            else:
                rmse = np.NaN
                coef = np.NaN
                nse = np.NaN
                r2 = np.NaN

            result['means'].append(mean)
            result['labels'].append(simLabel)
            result['stds'].append(std)
            result['rmses'].append(rmse)
            result['coefs'].append(coef)
            result['nses'].append(nse)
            result['r2s'].append(r2)
        else:
            result['means'].append(obsCol.mean())
            result['labels'].append('Fluxnet')
            result['stds'].append(obsCol.std())
            result['rmses'].append(0)
            result['coefs'].append(1)
            result['nses'].append(0)
            result['r2s'].append(1)

    jsonStr = json.dumps(result).replace('NaN', 'null')
    print(jsonStr)
Exemple #4
0
    # Calculate various skill metrics, writing results to screen
    # and Excel file. Use an ordered dictionary so skill metrics are
    # saved in the Excel file in the same order as written to screen.
    stats = OrderedDict()

    # Read data from pickle file
    data = load_obj('target_data')
    pred = data.pred1['data']
    ref = data.ref['data']

    # Get bias
    stats['bias'] = sm.bias(pred, ref)
    print('Bias = ' + str(stats['bias']))

    # Get Root-Mean-Square-Deviation (RMSD)
    stats['rmsd'] = sm.rmsd(pred, ref)
    print('RMSD = ' + str(stats['rmsd']))

    # Get Centered Root-Mean-Square-Deviation (CRMSD)
    stats['crmsd'] = sm.centered_rms_dev(pred, ref)
    print('CRMSD = ' + str(stats['crmsd']))

    # Get Standard Deviation (SDEV)
    stats['sdev'] = np.std(pred)
    print('SDEV = ' + str(stats['sdev']))

    # Get correlation coefficient (r)
    ccoef = np.corrcoef(pred, ref)
    stats['ccoef'] = ccoef[0, 1]
    print('r = ' + str(stats['ccoef']))
Exemple #5
0
        observationData = observationData.reshape(observationData.size)

        stds = []
        rmses = []
        coefs = []
        stds.append(observationData.std())
        rmses.append(0)
        coefs.append(1)

        for i, nc in enumerate(cmip.ncPaths):
            if i == 0:
                continue
            data = cmip.getData(i, allTime=False, timeIndex=0)
            data = data.reshape(data.size)
            std = data.std()
            rmse = sm.rmsd(data, observationData)
            coef = np.ma.corrcoef(data, observationData)[0, 1]

            stds.append(std)
            rmses.append(rmse)
            coefs.append(coef)
        # intervalsCOR = np.concatenate((np.arange(0,1.0,0.2), [0.9, 0.95, 0.99, 1]))
        sm.taylor_diagram(
            np.array(stds),
            np.array(rmses),
            np.array(coefs),
            markerLabel=cmip.markerLabels,
            # tickRMS = np.arange(0,25,10),
            # tickSTD = np.arange(9,20,5),
            # tickCOR = intervalsCOR,
            rmslabelformat=':.1f')
Exemple #6
0
####### Plot Nov
plt.scatter(fechan, realn, color='black', label='Data')  # datos iniciales
plt.plot(fechan, testn, color='red', label='Modelo RBF')  # RBF kernel
plt.title('Tunal (Nov 22 y 23)')
#plt.plot(fechas,svr_lin.predict(fechas), color= 'green', label= 'Modelo Lineal') # lineal kernel
#plt.plot(fechas,svr_poly.predict(fechas), color= 'blue', label= 'Modelo Polinomial') # Polinomial kernel
plt.xlabel('Horas')
plt.ylabel('Concentracion de PM_25')
plt.legend()
plt.savefig('Tun_Nov_22-23.png')
plt.show()
plt.close()

print('------ Febrero 14 y 15 de 2019 ------')
print(np.corrcoef(real, test))
print(sm.rmsd(test, np.array(real)))
print(sm.bias(test, np.array(real)))

print('------ Noviembre 22 y 23 de 2018 ------')
print(np.corrcoef(realn, testn))
print(sm.rmsd(testn, np.array(realn)))
print(sm.bias(testn, np.array(realn)))

##### Create files
## Feb
soda = {'SVR_tun': test.tolist()}
df = pd.DataFrame(soda, columns=['SVR_tun'])
df.to_csv('tun_feb_svr.csv')

## Nov
soda = {'SVR_tun': test.tolist()}