def load_transform(self, lat, lon): """ Standardisation X by equation x_new = (x-mean(x))/std(x) Parameteres --------------------- lat : float Latitude coordinate. lon : float Longitude coordinate. Returns --------------------- mean, std : float Values used in transformation """ """ Normalizes the distribution. It is centered around the mean with std of 1. Subtract the mean divide by the standard deviation. """ # Move some of this to the dataloader part? ds = get_pixel_from_ds(self.dataset, lat, lon) if self.order > 0: X, y = dataset_to_numpy_order_traditional_ar(ds, order=self.order, bias=self.bias) #else: # X, y = dataset_to_numpy_r_traditional_ar(ds, bias = self.bias) # Removes nan's a = np.concatenate([X, y], axis=1) a = a[~np.isnan(a).any(axis=1)] X = a[:, :-1] if self.sigmoid: y = inverse_sigmoid(a[:, -1, np.newaxis]) # not tested else: y = a[:, -1, np.newaxis] order = self.order n_times, n_vars = X.shape #VARIABLES = ['t2m', 'q', 'r', 'sp'] transformed = np.zeros((n_times, order)) if order > 0: var = 'tcc' m = xr.open_dataset( base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel( latitude=lat, longitude=lon).values s = xr.open_dataset( base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel( latitude=lat, longitude=lon).values for k in range(order): # Something wierd with the rotation of cloud cover values transformed[:, k] = (X[:, k] - m) / s return transformed, y
def load(self, lat, lon): # Move some of this to the dataloader part? ds = get_pixel_from_ds(self.dataset, lat, lon) #print(ds) if self.order > 0: X, y = dataset_to_numpy_order_traditional_ar(ds, order=self.order, bias=self.bias) else: X, y = dataset_to_numpy(ds, bias=self.bias) # print('Number of samples prior to removal of nans {}.'.format(len(y))) # Removes nan's a = np.concatenate([X, y], axis=1) B = a[~np.isnan(a).any(axis=1)] X = B[:, :-1] y = B[:, -1, np.newaxis] # not tested return X, y
def load_transform_fit(self, lat, lon): """ Standardisation X by equation x_new = (x-mean(x))/std(x) Parameteres --------------------- lat : float Latitude coordinate. lon : float Longitude coordinate. Returns --------------------- mean, std : float Values used in transformation """ """ Normalizes the distribution. It is centered around the mean with std of 1. Subtract the mean divide by the standard deviation. """ # Move some of this to the dataloader part? ds = get_pixel_from_ds(self.dataset, lat, lon) if self.order > 0: X, y = dataset_to_numpy_order_traditional_ar(ds, order=self.order, bias=self.bias) #print(X.shape) #print(y.shape) #else: # X, y = dataset_to_numpy_r_traditional_ar(ds, bias = self.bias) # Removes nan's a = np.concatenate([X, y], axis=1) a = a[~np.isnan(a).any(axis=1)] X = a[:, :-1] #print(X.shape) if self.sigmoid: y = inverse_sigmoid(a[:, -1, np.newaxis]) # not tested else: y = a[:, -1, np.newaxis] print(y.shape) order = self.order n_times, n_vars = X.shape #VARIABLES = ['t2m', 'q', 'r', 'sp'] if self.transform: transformed_train = np.zeros((n_times, order)) if order > 0: var = 'tcc' m = xr.open_dataset( base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel( latitude=lat, longitude=lon).values s = xr.open_dataset( base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel( latitude=lat, longitude=lon).values for k in range(order): # Something wierd with the rotation of cloud cover values transformed_train[:, k] = (X[:, k] - m) / s X_train = transformed_train if self.test_start is not None and self.test_stop is not None: # Based on start and stop descide which files it gets. ds = get_pixel_from_ds(self.test_dataset, lat, lon) #print(ds) if self.order > 0: X_test, y_test_true = dataset_to_numpy_order_traditional_ar( ds, self.order, bias=self.bias) n_times, n_vars = X_test.shape #VARIABLES = ['t2m', 'q', 'r', 'sp'] if self.transform: transformed_test = np.zeros((n_times, order)) if order > 0: var = 'tcc' m = xr.open_dataset( base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel( latitude=lat, longitude=lon).values s = xr.open_dataset( base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel( latitude=lat, longitude=lon).values for k in range(order): # Something wierd with the rotation of cloud cover values transformed_test[:, k] = (X_test[:, k] - m) / s X_test = transformed_test #print('Detects shap Xtest {} and ytest {}'.format( np.shape(X_test), np.shape(y_test_true) )) # TODO add this #print('(~np.isnan(X)).sum(axis=0) {}'.format(np.shape( # (~np.isnan(X)).sum(axis=0)))) #print('(~np.isnan(self. Xtrain)).sum(axis=0) {}'.format(np.shape( # (~np.isnan(self.X_train)).sum(axis=0)))) num_test = (~np.isnan(X_test)).sum(axis=0)[0] #print(num_test) num_train = (~np.isnan(X_train)).sum(axis=0)[0] coeffs = fit_pixel(X, y) #print(coeffs) #print(X_test) y_test_pred = predict_pixel(X_test, coeffs) if self.sigmoid: y_test_pred = inverse_sigmoid(y_test_pred) # TODO: upgrade this to compute train score as well as test score. # y_pred = self.predict(X) # prediction based on testset and # y_true = self.y_train if len(y_test_true) == 4: y_test_true = y_test_true[:, :, :, 0] if len(y_test_pred) == 4: y_test_pred = y_test_pred[:, :, :, 0] # Move most of content in store performance to evaluate mse = mean_squared_error(y_test_true, y_test_pred)[0] #print('mse shape {}'.format(np.shape(mse))) ase = accumulated_squared_error(y_test_true, y_test_pred)[0] r2 = r2_score(y_test_true, y_test_pred)[0] #print(mse, ase, r2) return coeffs.flatten(), mse, ase, r2, num_test, num_train
def load_transform_fit(self, lat, lon): """ Standardisation X by equation x_new = (x-mean(x))/std(x) Parameteres --------------------- lat : float Latitude coordinate. lon : float Longitude coordinate. Returns --------------------- mean, std : float Values used in transformation """ """ Normalizes the distribution. It is centered around the mean with std of 1. Subtract the mean divide by the standard deviation. """ # Move some of this to the dataloader part? local = timer() print('Enters load_transform_fit after {} seconds'.format(local - self.timer_start)) ds = get_pixel_from_ds(self.dataset, lat, lon) if self.type == 'ar': if self.order > 0: X, y = dataset_to_numpy_order(ds, order = self.order, bias = self.bias) else: X, y = dataset_to_numpy(ds, bias = self.bias) else: print('finds traditional model') X, y = dataset_to_numpy_order_traditional_ar(ds, order = self.order, bias = self.bias) local = timer() print('Finished reading in pixel in load_transform_fit after {} seconds'.format(local - self.timer_start)) # Removes nan's a = np.concatenate([X, y], axis = 1) a = a[~np.isnan(a).any(axis = 1)] X = a[:, :-1] #print(X.shape) if self.sigmoid: y = inverse_sigmoid(a[:, -1, np.newaxis]) # not tested else: y = a[:, -1, np.newaxis] #print(y.shape) order = self.order n_times, n_vars = X.shape #VARIABLES = ['t2m', 'q', 'r', 'sp'] if self.transform: transformed_train = np.zeros(X.shape) for j, var in enumerate(self.variables): m = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel(latitude = lat, longitude = lon).values s = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel(latitude = lat, longitude = lon).values transformed_train[:, j] = (X[:, j]- m)/s #for i in range(n_times): # transformed[i, :, :, j] = (X[i, :, :, j] - m)/s if order > 0: j = len(self.variables) var = 'tcc' for k in range(order): m = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel(latitude = lat, longitude = lon).values s = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel(latitude = lat, longitude = lon).values # Something wierd with the rotation of cloud cover values transformed_train[:, k+j] = (X[:, k+j]- m)/s X_train = transformed_train else: X_train = X local = timer() print('Finished transforming pixel in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) #if self.test_dataset is not None: #if self.test_start is not None and self.test_stop is not None: # Based on start and stop descide which files it gets. ds = get_pixel_from_ds(self.test_dataset, lat, lon) #print(ds) if self.type == 'ar': if self.order > 0: #print('Dataset has order {}'.format(order)) X_test, y_test_true = dataset_to_numpy_order(ds, self.order, bias = self.bias) else: #print('Dataset has order {} -- should be zero.'.format(order)) X_test, y_test_true = dataset_to_numpy(ds, bias = self.bias) else: X_test, y_test_true = dataset_to_numpy_order_traditional_ar(ds, order = self.order, bias = self.bias) n_times, n_vars = X_test.shape print('Finished reading in test data pixel in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) #VARIABLES = ['t2m', 'q', 'r', 'sp'] if self.transform: transformed_test = np.zeros((n_times, n_vars )) for j, var in enumerate(self.variables): t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var)) m = t_data['mean'].sel(latitude = lat, longitude = lon).values s = t_data['std'].sel(latitude = lat, longitude = lon).values transformed_test[:, j] = (X_test[:, j]- m)/s if order > 0: j = len(self.variables) var = 'tcc' t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var)) m = t_data['mean'].sel(latitude = lat, longitude = lon).values s = t_data['std'].sel(latitude = lat, longitude = lon).values for k in range(order): # Something wierd with the rotation of cloud cover values transformed_test[:, k+j] = (X_test[:, k+j]- m)/s X_test = transformed_test print('Finished transforming test data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) num_test = (~np.isnan(X_test)).sum(axis=0)[0] num_train = (~np.isnan(X_train)).sum(axis=0)[0] #print('Xtrain shape {} y train.shape {}'.format(X_train.shape, y.shape)) coeffs = fit_pixel(X_train, y) #print('coeff {}'.format(coeffs)) print('Finished fitting pixel test data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) y_test_pred = predict_pixel(X_test, coeffs) y_train_pred = predict_pixel(X_train, coeffs) print('Finished predicting test pixel data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) if self.sigmoid: y_test_pred = inverse_sigmoid(y_test_pred) if len(y_test_true) == 4: y_test_true = y_test_true[:, :, :, 0] if len(y_test_pred) == 4: y_test_pred = y_test_pred[:, :, :, 0] # Move most of content in store performance to evaluate mse = mean_squared_error(y_test_true, y_test_pred)[0] print('mse shape {}'.format(np.shape(mse))) ase = accumulated_squared_error(y_test_true, y_test_pred)[0] r2 = r2_score(y_test_true, y_test_pred)[0] mse_tr = mean_squared_error(y, y_train_pred)[0] ase_tr = accumulated_squared_error(y, y_train_pred)[0] r2_tr = r2_score(y, y_train_pred)[0] #print(mse, ase, r2) print('Finished computing mse, ase, r2 data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) print('mse {}, ase {}, r2 {}'.format(mse, ase, r2)) return coeffs.flatten(), mse, ase, r2, num_test, num_train, mse_tr, ase_tr, r2_tr
def predict(self, lat, lon): """ Used by model loader. """ # TODO loop over dataset ... ds = get_pixel_from_ds(self.test_dataset, lat, lon) if self.type == 'ar': if self.order > 0: #print('Dataset has order {}'.format(order)) X_test, y_test_true = dataset_to_numpy_order(ds, self.order, bias = self.bias) else: #print('Dataset has order {} -- should be zero.'.format(order)) X_test, y_test_true = dataset_to_numpy(ds, bias = self.bias) else: X_test, y_test_true = dataset_to_numpy_order_traditional_ar(ds, order = self.order, bias = self.bias) #VARIABLES = ['t2m', 'q', 'r', 'sp'] if self.transform: transformed_test = np.zeros((n_times, n_vars )) for j, var in enumerate(self.variables): t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var)) m = t_data['mean'].sel(latitude = lat, longitude = lon).values s = t_data['std'].sel(latitude = lat, longitude = lon).values transformed_test[:, j] = (X_test[:, j]- m)/s if order > 0: j = len(self.variables) var = 'tcc' t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var)) m = t_data['mean'].sel(latitude = lat, longitude = lon).values s = t_data['std'].sel(latitude = lat, longitude = lon).values for k in range(order): # Something wierd with the rotation of cloud cover values transformed_test[:, k+j] = (X_test[:, k+j]- m)/s X_test = transformed_test print('Finished transforming test data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) i = (lat-30.0)/0.25 j = (lon-(-15.0))/0.25 coeffs = self.coeff_matrix[int(i), int(j), :][:, np.newaxis] y_test_pred = predict_pixel(X_test, coeffs) print('Finished predicting test pixel data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) if self.sigmoid: y_test_pred = inverse_sigmoid(y_test_pred) if len(y_test_true) == 4: y_test_true = y_test_true[:, :, :, 0] if len(y_test_pred) == 4: y_test_pred = y_test_pred[:, :, :, 0] # Move most of content in store performance to evaluate mse = mean_squared_error(y_test_true, y_test_pred)[0] print('mse shape {}'.format(np.shape(mse))) ase = accumulated_squared_error(y_test_true, y_test_pred)[0] r2 = r2_score(y_test_true, y_test_pred)[0] #print(mse, ase, r2) print('Finished computing mse, ase, r2 data in load_transform_fit after {} seconds'.format(timer() - self.timer_start)) print('mse {}, ase {}, r2 {}'.format(mse, ase, r2)) return mse, ase, r2