예제 #1
0
# load npz files. encoding argument is used only if npz files have been
# saved using py2s.x and are loaded by py3.x
# Calculate Maximum distance vector size
d = []
fff = []
font = {'size': 18}
plt.rc('font', **font)
for filename in npz_files:
    plt.close('all')
    matrix, distance, _ = ml_utilities.feature_matrix_from_npz(
        os.path.join(filespath, filename))
    if distance.size < 2000:
        continue
    # Imputate NaNs
    matrix, _ = ml_utilities.imputate_nans_feature_matrix(matrix,
                                                          method='Interpolate',
                                                          drop_nan=False)

    label = np.array(matrix['SSHA_105'])
    #    label = ml_utilities.matrix_min_max_rescale(label, 1, -1, axis=0)
    matrix = matrix.drop(columns=['SSHA_35', 'SSHA_71', 'SSHA_105'])
    #    matrix = ml_utilities.matrix_min_max_rescale(matrix, 0.5, -0.5, axis=0)
    matrix = np.array(matrix)
    matrix = ml_utilities.my_standardizer(matrix, matrix)  # standardize
    label = ml_utilities.my_standardizer(np.expand_dims(label, axis=1),
                                         np.expand_dims(label,
                                                        axis=1))  # standardize
    matrix = matrix.squeeze()
    label = label.squeeze()
    _, ccorr, _, _ = plt.xcorr(label,
                               matrix,
        n_y = other_variables['n_y']

        # Re-order features so they will be the same order as when the models where trained
        matrix = matrix.reindex(columns=[
            'KD490_M07_OLCI_150km', 'SST_32km', 'SST_125km',
            'TSM_NN_OLCI_95km', 'SST_95km', 'KD490_M07_OLCI_32km',
            'ADG443_NN_OLCI_150km', 'TSM_NN_OLCI_32km', 'CHL_OC4ME_OLCI_95km',
            'SST_53km', 'SST_12.5km', 'KD490_M07_OLCI_95km',
            'ADG443_NN_OLCI_95km', 'CHL_OC4ME_OLCI_150km', 'SST_150km',
            'CHL_OC4ME_OLCI_32km', 'ADG443_NN_OLCI_32km', 'TSM_NN_OLCI_150km'
        ])

        # =============================================================================
        # APPLY MODEL
        # =============================================================================
        matrix_2, idx_nan = ml_utilities.imputate_nans_feature_matrix(
            matrix, method='Kickout')
        if matrix_2.empty:
            empty_frames.append(npz_file_grid)
            continue

        y_hat = model.predict(matrix_2)

        # Recreate grid
        y_hat_new = np.zeros(shape=matrix.shape[0]) * np.nan
        y_hat_new[~idx_nan] = y_hat

        y_hat_new = y_hat_new.reshape([n_x, n_y])
        X = X.reshape([n_x, n_y])
        Y = Y.reshape([n_x, n_y])

        # =============================================================================
예제 #3
0
for filename in npz_files:
    try:

        # Progress
        sys.stdout.write('\rFiles {0} out of {1}'.format(i, N_npz_files))
        sys.stdout.flush()

        fullpath = os.path.join(filespath, filename)
        matrix_temp, distance, _ = ml_utilities.feature_matrix_from_npz(
            fullpath)

        # =============================================================================
        # MISSING VALUES IMPUTATION
        # =============================================================================
        matrix_temp, _ = ml_utilities.imputate_nans_feature_matrix(
            matrix_temp, method='Interpolate', drop_nan=True)

        label_temp = matrix_temp['SSHA_35']

        matrix_temp = matrix_temp.drop(columns=var_to_drop)

        # Concatenate features (SST) to matrix
        label = pd.concat([label, label_temp], axis=0)

        matrix = pd.concat([matrix, matrix_temp], axis=0, ignore_index=True)
        #        if i == 5:
        #            break
        i = i + 1
    except:
        print('STOPPED')
        N_npz_files = N_npz_files - 1
예제 #4
0
npz_files = os.listdir(path_npzfiles)
npz_files = [item for item in npz_files if 'npz' in item]

model_name = 'S3B_2019-03-28 14_55_41__2019-03-28 01_16_43_RF_slstr_model.sav'

for npz in npz_files:
    if npz[4:14] == model_name[4:14]:
        pass
    else:
        continue
    
    # Read model
    model = pickle.load(open(os.path.join(path_models, model_name), 'rb'))
    # Read npz file
    matrix, distance, _ = ml_utilities.feature_matrix_from_npz(os.path.join(path_npzfiles, npz))
    matrix, idx_nan = ml_utilities.imputate_nans_feature_matrix(matrix, method='Interpolate', drop_nan=True)
    
    label = np.array(matrix['SSHA_35'])

    matrix = matrix.drop(columns=['SSHA_35', 'SST_125km', 'SST_95km','SST_75km', 'SST_32km', 'SST_16km', 'SST_12.5km'])
    
    matrix_labels = list(matrix.columns) # keep feature matrix names
    matrix = np.array(matrix)
    
    # Predict
    y_hat = model.predict(matrix)
    
    # PLOT
    font = {'size' : 18}
    plt.rc('font', **font)
    fig = plt.figure(figsize=(13,16))
예제 #5
0
    # saved using py2.x and are loaded by py3.x
    dat = np.load(os.path.join(filespath, file_name), encoding='latin1', allow_pickle=True)
    
    # Retrieve dictionary
    dat = dat['arr_0'].item()
    # Keep distance in variable
#    distance = dat['Distance']
    del dat['Metadata'], dat['Distance']
    
    # =============================================================================
    # MISSING VALUES HANDLING            
    # =============================================================================
    # Assign label and feature matrix to temporary variables
    data_temp = pd.DataFrame.from_dict(dat, dtype=np.float32)
    
    data_temp = ml_utilities.imputate_nans_feature_matrix(data_temp, method='Interpolate', drop_nan=True)
   
    # Concatenate label
    label = pd.concat([label, pd.DataFrame(data_temp['SSHA_35'])])
    # Delete SSHA column and keep the SST columns
    data_temp = data_temp.drop(columns=['SSHA_35'])
    
    # Concatenate features (SST) to matrix
    matrix = pd.concat([matrix, data_temp], axis=0)

    counter_2 = counter_2 + 1

del data_temp

# Rescale
ub = 1