예제 #1
0
def krigingmodel(DF, dic_polia, dic_latlon):
    #start_time_of_gmodel = time.time()
    # MODELWI is model without interpolation of shape, it is an array of shape (142,271)
    MODELWI = regresia.model_reg(DF, dic_polia, dic_latlon)[0]
    # ERROR is caculated as pollutant minus PREDICT, it has shape same as measured pollutant (32)
    ERROR_stanice = regresia.reg_funkcia(DF, dic_polia, dic_latlon)[4]

    x, y, z = list(DF['lon_x']), list(DF['lat_x']), ERROR_stanice
    #xi,yi are new grid coordinates
    xi = np.linspace(np.min(dic_latlon['LON'].flatten()),
                     np.max(dic_latlon['LON'].flatten()), 271)
    yi = np.linspace(np.min(dic_latlon['LAT'].flatten()),
                     np.max(dic_latlon['LAT'].flatten()), 142)

    # OK is kriging funkction
    OK = OrdinaryKriging(x, y, z, variogram_model='spherical')
    """
    OK: is kriging funkction form pykrige package 
    x, y: are latitude and lontitude of measured concentrations from DF(dataframe)
    z : error concentrations of pollutant, from linear regresion model
    variogram model: is tzpe of theoretical variogram
    """
    # z1 is kriging interpolation array
    z1, ss1 = OK.execute('grid', xi, yi)
    """
    z1: is interpolated array realised on grid 
    xi, yi: are new grid coordinates
    """
    KRIGING_ERROR = z1
    #KOMPLET_MODEL is linear reggresion predict MODELWI plus kriging of residuals
    KOMPLET_MODEL = KRIGING_ERROR + MODELWI
    return KOMPLET_MODEL, KRIGING_ERROR, np.exp(KOMPLET_MODEL), np.exp(
        KRIGING_ERROR)
예제 #2
0
def validation(DF, dic_polia, dic_latlon, model_type):
    """
    leaving-one-out validation of linear reggresion model,
    linear reggresion model + idw interpolation of residulas,
    linear gergession model + kriging interpolation if residuals
    """

    print('###########################')
    print('leaving one validation for: ' + model_type)

    predicted_value = [
    ]  # this is the list of arrays, each array has 1 station dropped.
    for i, row in DF.iterrows():
        DF1 = DF.drop([i])
        ix, iy = regresia.getclosest_ij(dic_latlon['LAT'], dic_latlon['LON'],
                                        row['lat_x'], row['lon_x'])
        if model_type == 'regresia + IDW':
            value = model_idw.idwmodel(DF1, dic_polia, dic_latlon)[2][ix, iy]
        elif model_type == 'regresia':
            value = regresia.model_reg(DF1, dic_polia, dic_latlon)[1][ix, iy]
        elif model_type == 'regresia + kriging':
            value = model_kriging.krigingmodel(DF1, dic_polia,
                                               dic_latlon)[2][ix, iy]
        predicted_value.append(value)
        #print('EOI = {}, model = {}, measured = {} '.format(row['EOI'],value,row['pollutant']))
        #print('{}      {}      {}      {}'.format(row['name'], row['EOI'], value,  row['pollutant']))
    predicted_value = np.array(predicted_value)
    RMSE = ((np.sum((DF['pollutant'] - predicted_value)**2)) * (1 / len(
        (DF['pollutant'] - predicted_value))))**0.5
    BIAS = np.sum(
        (predicted_value - DF['pollutant'])) / predicted_value.shape[0]
    r = np.corrcoef(predicted_value, DF['pollutant'])[0, 1]
    print('RMSE={}, BIAS={}, r={}'.format(RMSE, BIAS, r))
def idwmodel(DF, dic_polia, dic_latlon):
    #start_time_of_gmodel = time.time()
    # MODELWI is model without interpolation of shape, it is an array of shape (142,271)
    MODELWI = regresia.model_reg(DF, dic_polia, dic_latlon)[0]
    # ERROR is caculated as pollutant minus PREDICT, it has shape same as measured pollutant (32)
    ERROR_stanice = regresia.reg_funkcia(DF, dic_polia, dic_latlon)[4]

    x, y, z = list(DF['lat_x']), list(DF['lon_x']), ERROR_stanice

    #xi,yi are new grid coordinates
    xi = dic_latlon['LAT'].flatten()
    yi = dic_latlon['LON'].flatten()
    IDW_ERROR = np.asarray(idwr(y, x, z, yi, xi))
    IDW_ERROR = np.reshape(IDW_ERROR, (142, 271))

    # adding pollutant values to interpolated errror array (IDW_ERROR) which shape is (142,271)

    for i in range(0, len(x)):
        ix, iy = getclosest_ij(dic_latlon['LAT'], dic_latlon['LON'], x[i],
                               y[i])
        IDW_ERROR[ix, iy] = ERROR_stanice[i]

    #KOMPLET_MODEL is linear reggresion predict MODELWI plus IDW of residuals
    KOMPLET_MODEL = IDW_ERROR + MODELWI
    #print('gmodel {0:.3f}'.format(time.time() - start_time_of_gmodel))
    return KOMPLET_MODEL, IDW_ERROR, np.exp(KOMPLET_MODEL), np.exp(IDW_ERROR)
def mapy(DF, dic_polia, dic_latlon, name):
    """
    mapy is funkction which produce prediction maps transformed from ln(pollutant) to pollutant of linear reggresion in 3 forms:
    1) only prediction of linear reggresion
    2) prediction of linear reggresion + kriging interpolation of residuals
    3) prediction of linear reggresion + idw interpolation of residuals
    """
    ################################

    for i in (regresia.model_reg(DF, dic_polia, dic_latlon)[1],
              model_idw.idwmodel(DF, dic_polia, dic_latlon)[2],
              model_kriging.krigingmodel(DF, dic_polia, dic_latlon)[2]):
        mapb.drawcountries()
        mapb.pcolormesh(dic_latlon['LON'],
                        dic_latlon['LAT'],
                        i,
                        cmap=plt.cm.jet,
                        latlon=True)
        mapb.readshapefile('C:/Users/kocok/Desktop/Shapefile0/slovensko',
                           'slovensko',
                           drawbounds=True,
                           linewidth=4)
        if i in regresia.model_reg(DF, dic_polia, dic_latlon)[1]:
            plt.colorbar(label=name + ' [$\mu$g.$m^{-3}$]', extend='max')
            plt.title('Koncentrácia ' + name +
                      ' za rok 2017 (lineárna regresia)',
                      fontsize=15)
            plt.clim(0, 40)
            mapb.scatter(DF['lon_x'].values,
                         DF['lat_x'].values,
                         c=DF['pollutant'].values,
                         s=30,
                         latlon=True,
                         cmap='jet',
                         alpha=1,
                         edgecolors='black')
            plt.clim(0, 40)
            #plt.show()
            plt.savefig('lnNO2 expregresia {}.png'.format(len(
                dic_polia.keys())),
                        bbox_inches='tight',
                        dpi=600)
            plt.clf()
        elif i in model_kriging.krigingmodel(DF, dic_polia, dic_latlon)[2]:
            plt.colorbar(label=name + ' [$\mu$g.$m^{-3}$]', extend='max')
            plt.clim(0, 40)
            plt.title(
                'Koncentrácia ' + name +
                ' za rok 2017 (lineárna regresia + OK interpolácia rezíduí)',
                fontsize=15)
            mapb.scatter(DF['lon_x'].values,
                         DF['lat_x'].values,
                         c=DF['pollutant'].values,
                         s=30,
                         latlon=True,
                         cmap='jet',
                         alpha=1,
                         edgecolors='black')
            plt.clim(0, 40)
            #plt.show()
            plt.savefig('lnNO2 expregresia+ok pocet poli {}.png'.format(
                len(dic_polia.keys())),
                        bbox_inches='tight',
                        dpi=600)
            plt.clf()
        else:
            plt.colorbar(label=name + ' [$\mu$g.$m^{-3}$]', extend='max')
            plt.clim(0, 40)
            plt.title(
                'Koncentrácia ' + name +
                ' za rok 2017 (lineárna regresia + IDW interpolácia rezíduí)',
                fontsize=15)
            mapb.scatter(DF['lon_x'].values,
                         DF['lat_x'].values,
                         c=DF['pollutant'].values,
                         s=30,
                         latlon=True,
                         cmap='jet',
                         alpha=1,
                         edgecolors='black')
            plt.clim(0, 40)
            #plt.show()
            plt.savefig('lnNO2 expregresia+idw pocet poli {}.png'.format(
                len(dic_polia.keys())),
                        bbox_inches='tight',
                        dpi=600)
            plt.clf()