# load npz files. encoding argument is used only if npz files have been # saved using py2s.x and are loaded by py3.x # Calculate Maximum distance vector size d = [] fff = [] font = {'size': 18} plt.rc('font', **font) for filename in npz_files: plt.close('all') matrix, distance, _ = ml_utilities.feature_matrix_from_npz( os.path.join(filespath, filename)) if distance.size < 2000: continue # Imputate NaNs matrix, _ = ml_utilities.imputate_nans_feature_matrix(matrix, method='Interpolate', drop_nan=False) label = np.array(matrix['SSHA_105']) # label = ml_utilities.matrix_min_max_rescale(label, 1, -1, axis=0) matrix = matrix.drop(columns=['SSHA_35', 'SSHA_71', 'SSHA_105']) # matrix = ml_utilities.matrix_min_max_rescale(matrix, 0.5, -0.5, axis=0) matrix = np.array(matrix) matrix = ml_utilities.my_standardizer(matrix, matrix) # standardize label = ml_utilities.my_standardizer(np.expand_dims(label, axis=1), np.expand_dims(label, axis=1)) # standardize matrix = matrix.squeeze() label = label.squeeze() _, ccorr, _, _ = plt.xcorr(label, matrix,
n_y = other_variables['n_y'] # Re-order features so they will be the same order as when the models where trained matrix = matrix.reindex(columns=[ 'KD490_M07_OLCI_150km', 'SST_32km', 'SST_125km', 'TSM_NN_OLCI_95km', 'SST_95km', 'KD490_M07_OLCI_32km', 'ADG443_NN_OLCI_150km', 'TSM_NN_OLCI_32km', 'CHL_OC4ME_OLCI_95km', 'SST_53km', 'SST_12.5km', 'KD490_M07_OLCI_95km', 'ADG443_NN_OLCI_95km', 'CHL_OC4ME_OLCI_150km', 'SST_150km', 'CHL_OC4ME_OLCI_32km', 'ADG443_NN_OLCI_32km', 'TSM_NN_OLCI_150km' ]) # ============================================================================= # APPLY MODEL # ============================================================================= matrix_2, idx_nan = ml_utilities.imputate_nans_feature_matrix( matrix, method='Kickout') if matrix_2.empty: empty_frames.append(npz_file_grid) continue y_hat = model.predict(matrix_2) # Recreate grid y_hat_new = np.zeros(shape=matrix.shape[0]) * np.nan y_hat_new[~idx_nan] = y_hat y_hat_new = y_hat_new.reshape([n_x, n_y]) X = X.reshape([n_x, n_y]) Y = Y.reshape([n_x, n_y]) # =============================================================================
for filename in npz_files: try: # Progress sys.stdout.write('\rFiles {0} out of {1}'.format(i, N_npz_files)) sys.stdout.flush() fullpath = os.path.join(filespath, filename) matrix_temp, distance, _ = ml_utilities.feature_matrix_from_npz( fullpath) # ============================================================================= # MISSING VALUES IMPUTATION # ============================================================================= matrix_temp, _ = ml_utilities.imputate_nans_feature_matrix( matrix_temp, method='Interpolate', drop_nan=True) label_temp = matrix_temp['SSHA_35'] matrix_temp = matrix_temp.drop(columns=var_to_drop) # Concatenate features (SST) to matrix label = pd.concat([label, label_temp], axis=0) matrix = pd.concat([matrix, matrix_temp], axis=0, ignore_index=True) # if i == 5: # break i = i + 1 except: print('STOPPED') N_npz_files = N_npz_files - 1
npz_files = os.listdir(path_npzfiles) npz_files = [item for item in npz_files if 'npz' in item] model_name = 'S3B_2019-03-28 14_55_41__2019-03-28 01_16_43_RF_slstr_model.sav' for npz in npz_files: if npz[4:14] == model_name[4:14]: pass else: continue # Read model model = pickle.load(open(os.path.join(path_models, model_name), 'rb')) # Read npz file matrix, distance, _ = ml_utilities.feature_matrix_from_npz(os.path.join(path_npzfiles, npz)) matrix, idx_nan = ml_utilities.imputate_nans_feature_matrix(matrix, method='Interpolate', drop_nan=True) label = np.array(matrix['SSHA_35']) matrix = matrix.drop(columns=['SSHA_35', 'SST_125km', 'SST_95km','SST_75km', 'SST_32km', 'SST_16km', 'SST_12.5km']) matrix_labels = list(matrix.columns) # keep feature matrix names matrix = np.array(matrix) # Predict y_hat = model.predict(matrix) # PLOT font = {'size' : 18} plt.rc('font', **font) fig = plt.figure(figsize=(13,16))
# saved using py2.x and are loaded by py3.x dat = np.load(os.path.join(filespath, file_name), encoding='latin1', allow_pickle=True) # Retrieve dictionary dat = dat['arr_0'].item() # Keep distance in variable # distance = dat['Distance'] del dat['Metadata'], dat['Distance'] # ============================================================================= # MISSING VALUES HANDLING # ============================================================================= # Assign label and feature matrix to temporary variables data_temp = pd.DataFrame.from_dict(dat, dtype=np.float32) data_temp = ml_utilities.imputate_nans_feature_matrix(data_temp, method='Interpolate', drop_nan=True) # Concatenate label label = pd.concat([label, pd.DataFrame(data_temp['SSHA_35'])]) # Delete SSHA column and keep the SST columns data_temp = data_temp.drop(columns=['SSHA_35']) # Concatenate features (SST) to matrix matrix = pd.concat([matrix, data_temp], axis=0) counter_2 = counter_2 + 1 del data_temp # Rescale ub = 1