Python transform_data 예제들, src.nnload.transform_data Python 예제들

예제 #1

0

파일 보기

파일: nnio.py 프로젝트: jgdwyer/ML-convection

def write_netcdf_convcond_v1():
    mlp_str = 'convcond_X-StandardScaler-qTindi_Y-SimpleY-qTindi_' +\
        'Ntrnex100000_r_100R_mom0.9reg1e-05_Niter10000_v3'
    datasource = './data/convcond_training_v3.pkl'
    # Set output filename
    filename = '/Users/jgdwyer/neural_weights_convcond_v1.nc'
    # Load ANN and preprocessors
    mlp, _, errors, x_ppi, y_ppi, x_pp, y_pp, lat, lev, dlev = \
        pickle.load(open('./data/regressors/' + mlp_str + '.pkl', 'rb'))
    # Need to first transform some data for preprocessors to be able to export params
    x_unscl, y_unscl, _, _, _, _, _, _ = nnload.LoadData(datasource,
                                                         minlev=min(lev))
    x_scl = nnload.transform_data(x_ppi, x_pp, x_unscl)
    y_scl = nnload.transform_data(y_ppi, y_pp, y_unscl)
    # Also need to first use the predict method to be able to export ANN params
    _ = mlp.predict(x_scl)
    # Grab weights and input normalization
    w1 = mlp.get_parameters()[0].weights
    w2 = mlp.get_parameters()[1].weights
    b1 = mlp.get_parameters()[0].biases
    b2 = mlp.get_parameters()[1].biases
    xscale_mean = x_pp.mean_
    xscale_stnd = x_pp.scale_
    Nlev = len(lev)
    yscale_absmax = np.zeros(b2.shape)
    yscale_absmax[:Nlev] = y_pp[0]
    yscale_absmax[Nlev:] = y_pp[1]
    # Write weights to file
    ncfile = Dataset(filename, 'w')
    # Write the dimensions
    ncfile.createDimension('N_in', w1.shape[0])
    ncfile.createDimension('N_h1', w1.shape[1])
    ncfile.createDimension('N_out', w2.shape[1])
    # Create variable entries in the file
    nc_w1 = ncfile.createVariable('w1',
                                  np.dtype('float64').char,
                                  ('N_h1', 'N_in'))  # Reverse dims
    nc_w2 = ncfile.createVariable('w2',
                                  np.dtype('float64').char, ('N_out', 'N_h1'))
    nc_b1 = ncfile.createVariable('b1', np.dtype('float64').char, ('N_h1'))
    nc_b2 = ncfile.createVariable('b2', np.dtype('float64').char, ('N_out'))
    nc_xscale_mean = ncfile.createVariable('xscale_mean',
                                           np.dtype('float64').char, ('N_in'))
    nc_xscale_stnd = ncfile.createVariable('xscale_stnd',
                                           np.dtype('float64').char, ('N_in'))
    nc_yscale_absmax = ncfile.createVariable('yscale_absmax',
                                             np.dtype('float64').char,
                                             ('N_out'))
    # Write variables and close file - transpose because fortran reads it in
    # "backwards"
    nc_w1[:] = w1.T
    nc_w2[:] = w2.T
    nc_b1[:] = b1
    nc_b2[:] = b2
    nc_xscale_mean[:] = xscale_mean
    nc_xscale_stnd[:] = xscale_stnd
    nc_yscale_absmax[:] = yscale_absmax
    # Write global file attributes
    ncfile.description = mlp_str
    ncfile.close()

예제 #2

0

파일 보기

파일: nntrain.py 프로젝트: jgdwyer/nn-convection

def PreprocessData(x_ppi, x, y_ppi, y, pp_str, N_trn_exs):
    """Transform data according to input preprocessor requirements and make
    make preprocessor string for saving"""
    x_pp = nnload.init_pp(x_ppi, x)
    x = nnload.transform_data(x_ppi, x_pp, x)
    y_pp = nnload.init_pp(y_ppi, y)
    y = nnload.transform_data(y_ppi, y_pp, y)
    # Make preprocessor string for saving
    pp_str = pp_str + 'X-' + x_ppi['name'] + '-' + x_ppi['method'][:6] + '_'
    pp_str = pp_str + 'Y-' + y_ppi['name'] + '-' + y_ppi['method'][:6] + '_'
    # Add number of training examples to string
    pp_str = pp_str + 'Ntrnex' + str(N_trn_exs) + '_'
    return x_pp, x, y_pp, y, pp_str

예제 #3

0

파일 보기

def PreprocessData(x_ppi, x, y_ppi, y, pp_str, N_trn_exs):
    """Transform data according to input preprocessor requirements and make
    make preprocessor string for saving"""
    x_pp = nnload.init_pp(x_ppi, x)
    x = nnload.transform_data(x_ppi, x_pp, x)
    y_pp = nnload.init_pp(y_ppi, y)
    y = nnload.transform_data(y_ppi, y_pp, y)
    # Make preprocessor string for saving
    pp_str = pp_str + 'X-' + x_ppi['name'] + '-' + x_ppi['method'][:6] + '_'
    pp_str = pp_str + 'Y-' + y_ppi['name'] + '-' + y_ppi['method'][:6] + '_'
    # Add number of training examples to string
    pp_str = pp_str + 'Ntrnex' + str(N_trn_exs) + '_'
    return x_pp, x, y_pp, y, pp_str

예제 #4

0

파일 보기

파일: nnio.py 프로젝트: jgdwyer/ML-convection

def verify_netcdf_weights():
    r_str = 'convcond_X-StandardScaler-qTindi_Y-SimpleY-qTindi_' +\
        'Ntrnex100000_r_100R_mom0.9reg1e-05_Niter10000_v3'
    nc_str = '/Users/jgdwyer/neural_weights_convcond_v1.nc'
    # Load unscaled data
    x, y, cv, Pout, lat, lev, dlev, timestep = \
        nnload.LoadData('./data/convcond_testing_v3.pkl',
                        0.2, all_lats=True, indlat=None, rainonly=False)
    # Load preprocessers
    r_mlp_eval, _, errors, x_ppi, y_ppi, x_pp, y_pp, lat2, lev2, dlev = \
        pickle.load(open('./data/regressors/' + r_str + '.pkl', 'rb'))
    print('Loading predictor: ' + r_str)
    print('Loading ncfile :' + nc_str)
    # Load netcdf files
    ncfile = Dataset(nc_str, 'r')
    yscale_absmax = ncfile['yscale_absmax'][:]
    yscale_absmax = yscale_absmax[:, None].T
    xscale_mean = ncfile['xscale_mean'][:]
    xscale_mean = xscale_mean[:, None].T
    xscale_std = ncfile['xscale_stnd'][:]
    xscale_std = xscale_std[:, None].T
    print(x_ppi)
    # Scaled variables as calculated by NN weights
    xs = nnload.transform_data(x_ppi, x_pp, x)
    ys = nnload.transform_data(y_ppi, y_pp, y)
    # Scaled variables as calculated by hand from netcdf files
    xs_byhand = (x - xscale_mean) / xscale_std
    ys_byhand = y / yscale_absmax
    print('Difference between x-scaling methods: {:.1f}'.format(
        np.sum(np.abs(xs - xs_byhand))))
    print('Difference between y-scaling methods: {:.1f}'.format(
        np.sum(np.abs(ys - ys_byhand))))
    # Now check that transformation is done correctly
    # Load NN weights
    w1 = ncfile['w1'][:].T
    w2 = ncfile['w2'][:].T
    b1 = ncfile['b1'][:]
    b2 = ncfile['b2'][:]
    yps_byhand = np.dot(xs_byhand, w1) + b1
    yps_byhand[yps_byhand < 0] = 0
    yps_byhand = np.dot(yps_byhand, w2) + b2
    yps = r_mlp_eval.predict(xs)
    print('Difference between predicted tendencies: {:.1f}'.format(
        np.sum(np.abs(yps - yps_byhand))))

예제 #5

0

파일 보기

파일: nnio.py 프로젝트: jgdwyer/ML-convection

def compare_convcond_prediction(cv_str, cvcd_str, minlev):
    cv_mlp, _, errors, x_ppi, y_ppi, x_pp, y_pp, lat, lev, _ = \
        pickle.load(open('./data/regressors/' + cv_str + '.pkl', 'rb'))
    cvcd_mlp, _, errors, x_ppi_check, y_ppi_check, x_pp, y_pp, lat, lev, _ = \
        pickle.load(open('./data/regressors/' + cvcd_str + '.pkl', 'rb'))
    # Check that preprocessers are the same
    if ((x_ppi != x_ppi_check) or (y_ppi != y_ppi_check)):
        raise ValueError('Preprocessing schemes different for conv only and ' +
                         'conv+cond! This means that comparing the two in ' +
                         'scaled space may give different results')
    # Load data
    x_unscl, ytcv_unscl, _, _, _, _, _, _ = \
        nnload.LoadData('./data/conv_testing_v3.pkl', minlev=minlev,
                        N_trn_exs=10000, randseed=True)
    xcvcd_unscl, ytcvcd_unscl, _, _, _, _, _, _ = \
        nnload.LoadData('./data/convcond_testing_v3.pkl', minlev=minlev,
                        N_trn_exs=10000, randseed=True)
    # Check that x values are the same to make sure random seeds are same
    if np.sum(np.abs(x_unscl - xcvcd_unscl)) > 0.0:
        raise ValueError('Data loaded in different order!')
    # Convert true y-values to scaled by applying an inverse transformation
    ytcv_scl = nnload.transform_data(y_ppi, y_pp, ytcv_unscl)
    ytcvcd_scl = nnload.transform_data(y_ppi, y_pp, ytcvcd_unscl)
    # Derived true y-values for cond only
    ytcd_scl = ytcvcd_scl - ytcvcd_scl
    # Calculate predicted y values for conv and convcond
    ypcv_scl = cv_mlp.predict(x_scl)
    ypcvcd_scl = cvcd_mlp.predict(x_scl)
    # Add true cond values to ycv_true and ycv_pred
    v = 'q'
    mse_cvcd_predictboth = nnload.calc_mse(nnload.unpack(ypcvcd_scl, v),
                                           nnload.unpack(ytcvcd_scl, v),
                                           relflag=True)
    mse_cv = nnload.calc_mse(nnload.unpack(ypcv_scl, v),
                             nnload.unpack(ytcv_scl, v),
                             relflag=True)
    print('MSE predicting convection and condensation in one step: {:.5f}'.
          format(mse_cvcd_predictboth))
    print('MSE predicting convection only (no condensation): {:.5f}'.format(
        mse_cv))

예제 #6

0

파일 보기

파일: nnplot.py 프로젝트: jgdwyer/ML-convection

def plot_neural_fortran(training_file, mlp_str, latind=None, timeind=None,
                        ensemble=False):
    # mlp_str = 'X-StandardScaler-qTindi_Y-SimpleY-qTindi_' + \
    #     'Ntrnex100000_r_100R_mom0.9reg1e-06_Niter10000_v3'
    mlp, _, errors, x_ppi, y_ppi, x_pp, y_pp, lat, lev, dlev = \
        pickle.load(open('./data/regressors/' + mlp_str + '.pkl', 'rb'))
    x_unscl, ytrue_unscl, y_dbm_unscl, Ptrue, P_dbm, ten, qen = \
        nnload.load_netcdf_onepoint(training_file, min(lev), latind=latind,
                                    timeind=timeind, ensemble=ensemble)
    ind = 0
    x_scl = nnload.transform_data(x_ppi, x_pp, x_unscl)
    ypred_scl = mlp.predict(x_scl)
    ypred_unscl = nnload.inverse_transform_data(y_ppi, y_pp, ypred_scl)
    Ppred = nnatmos.calc_precip(nnload.unpack(ypred_unscl, 'q'), dlev)
    f, (a1, a2) = plt.subplots(1, 2)
    a1.plot(unpack(ytrue_unscl, 'T')[ind, :], lev, label='GCM dT')
    a1.plot(unpack(ypred_unscl, 'T')[ind, :], lev, label='NN dT')
    a1.plot(unpack(y_dbm_unscl, 'T')[ind, :], lev, label='DBM dT')
    if ensemble:
        for key in ten:
            a1.plot(ten[key], lev, color='gray')
    a1.set_xlabel('K/day')
    a1.set_ylim(1, 0)
    a1.legend()
    a2.plot(unpack(ytrue_unscl, 'q')[ind, :], lev, label='GCM dq')
    a2.plot(unpack(ypred_unscl, 'q')[ind, :], lev, label='NN dq')
    a2.plot(unpack(y_dbm_unscl, 'q')[ind, :], lev, label='DBM dq')
    if ensemble:
        for key in qen:
            a2.plot(qen[key], lev, color='gray')
    a2.set_xlabel('g/kg/day')
    a2.set_ylim(1, 0)
    a2.legend()
    f.savefig('./figs/sampletest/out.png', bbox_inches='tight')
    # Plot inputs
    f, (a1, a2) = plt.subplots(1, 2)
    a1.plot(unpack(x_unscl, 'T')[ind, :].T, lev, label='input T [K]')
    a1.set_ylim(1, 0)
    q0 = unpack(x_unscl, 'q')[ind, :]*1000  # now g/kg
    time_step = 20*60
    dq = unpack(ypred_unscl, 'q')[ind, :]*time_step/3600/24  # now g/kg
    a2.plot(q0, lev, label='input q [kg/kg]')
    a2.plot(q0 + dq, lev, label='q after')
    a2.set_ylim(1, 0)
    plt.xlabel('g/kg')
    plt.legend()
    print('GCM Precip is: {:.2f}'.format(Ptrue[ind]))
    print('MLP Precip is: {:.2f}'.format(Ppred[ind]))
    print('DBM Precip is: {:.2f}'.format(P_dbm[ind]))
    f.savefig('./figs/sampletest/in.png', bbox_inches='tight')

예제 #7

0

파일 보기

파일: nnio.py 프로젝트: jgdwyer/ML-convection

def write_netcdf_ensemble1():
    ntrns = np.arange(125000, 125010)
    base1 = 'X-StandardScaler-qTindi_Y-SimpleY-qTindi_Ntrnex'
    base2 = '_r_50R_mom0.9reg1e-06_Niter3000_v3'
    mlp_str = [base1 + str(ntrn) + base2 for ntrn in ntrns]
    N_e = len(mlp_str)
    datasource = './data/conv_training_v3.pkl'
    # Set output filename
    filename = '/Users/jgdwyer/neural_weights_ensemble1.nc'
    # Load ANN and preprocessors
    yscale_absmax = np.zeros((32, len(mlp_str)))
    w1 = np.zeros((32, 50, N_e))
    w2 = np.zeros((50, 32, N_e))
    b1 = np.zeros((50, N_e))
    b2 = np.zeros((32, N_e))
    xscale_mean = np.zeros((32, N_e))
    xscale_stnd = np.zeros((32, N_e))
    yscale_absmax = np.zeros((32, N_e))

    for i in range(len(mlp_str)):
        mlp, _, errors, x_ppi, y_ppi, x_pp, y_pp, lat, lev, dlev = \
            pickle.load(open('./data/regressors/' + mlp_str[i] + '.pkl', 'rb'))
        # Need to transform some data for preprocessors to be able to export
        # params
        x_unscl, y_unscl, _, _, _, _, _, _ = nnload.LoadData(datasource,
                                                             minlev=min(lev))
        x_scl = nnload.transform_data(x_ppi, x_pp, x_unscl)
        y_scl = nnload.transform_data(y_ppi, y_pp, y_unscl)
        # Also need to use the predict method to be able to export ANN params
        _ = mlp.predict(x_scl)
        # Grab weights and input normalization
        w1[:, :, i] = mlp.get_parameters()[0].weights
        w2[:, :, i] = mlp.get_parameters()[1].weights
        b1[:, i] = mlp.get_parameters()[0].biases
        b2[:, i] = mlp.get_parameters()[1].biases
        xscale_mean[:, i] = x_pp.mean_
        xscale_stnd[:, i] = x_pp.scale_
        Nlev = len(lev)
        yscale_absmax[:Nlev, i] = y_pp[0]
        yscale_absmax[Nlev:, i] = y_pp[1]
    # Write weights to file
    ncfile = Dataset(filename, 'w')
    # Write the dimensions
    ncfile.createDimension('N_in', w1.shape[0])
    ncfile.createDimension('N_h1', w1.shape[1])
    ncfile.createDimension('N_out', w2.shape[1])
    ncfile.createDimension('N_e', N_e)
    # Create variable entries in the file
    # Variables need to "reversed" to be read in by Fortran GCM code
    nc_w1 = ncfile.createVariable('w1',
                                  np.dtype('float64').char,
                                  ('N_e', 'N_h1', 'N_in'))
    nc_w2 = ncfile.createVariable('w2',
                                  np.dtype('float64').char,
                                  ('N_e', 'N_out', 'N_h1'))
    nc_b1 = ncfile.createVariable('b1',
                                  np.dtype('float64').char, ('N_e', 'N_h1'))
    nc_b2 = ncfile.createVariable('b2',
                                  np.dtype('float64').char, ('N_e', 'N_out'))
    nc_xscale_mean = ncfile.createVariable('xscale_mean',
                                           np.dtype('float64').char,
                                           ('N_e', 'N_in'))
    nc_xscale_stnd = ncfile.createVariable('xscale_stnd',
                                           np.dtype('float64').char,
                                           ('N_e', 'N_in'))
    nc_yscale_absmax = ncfile.createVariable('yscale_absmax',
                                             np.dtype('float64').char,
                                             ('N_e', 'N_out'))
    # Write variables and close file - transpose because fortran reads it in
    # "backwards"
    nc_w1[:] = np.transpose(w1, (2, 1, 0))
    nc_w2[:] = np.transpose(w2, (2, 1, 0))
    nc_b1[:] = b1.T
    nc_b2[:] = b2.T
    nc_xscale_mean[:] = xscale_mean.T
    nc_xscale_stnd[:] = xscale_stnd.T
    nc_yscale_absmax[:] = yscale_absmax.T
    ncfile.close()