Ejemplo n.º 1
0
    def load_transform(self, lat, lon):
        """ Standardisation X by equation x_new = (x-mean(x))/std(x)

        Parameteres
        ---------------------
        lat : float
            Latitude coordinate.
        lon : float
            Longitude coordinate.

        Returns
        ---------------------
        mean, std : float
            Values used in transformation
        """
        """ Normalizes the distribution. It is centered around the mean with std of 1.

        Subtract the mean divide by the standard deviation. """
        # Move some of this to the dataloader part?
        ds = get_pixel_from_ds(self.dataset, lat, lon)

        if self.order > 0:
            X, y = dataset_to_numpy_order_traditional_ar(ds,
                                                         order=self.order,
                                                         bias=self.bias)
        #else:
        #    X, y   = dataset_to_numpy_r_traditional_ar(ds, bias = self.bias)

        # Removes nan's
        a = np.concatenate([X, y], axis=1)
        a = a[~np.isnan(a).any(axis=1)]

        X = a[:, :-1]

        if self.sigmoid:
            y = inverse_sigmoid(a[:, -1, np.newaxis])  # not tested
        else:
            y = a[:, -1, np.newaxis]

        order = self.order
        n_times, n_vars = X.shape
        #VARIABLES = ['t2m', 'q', 'r', 'sp']
        transformed = np.zeros((n_times, order))

        if order > 0:
            var = 'tcc'
            m = xr.open_dataset(
                base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel(
                    latitude=lat, longitude=lon).values
            s = xr.open_dataset(
                base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel(
                    latitude=lat, longitude=lon).values

            for k in range(order):
                # Something wierd with the rotation of cloud cover values
                transformed[:, k] = (X[:, k] - m) / s

        return transformed, y
Ejemplo n.º 2
0
    def load(self, lat, lon):

        # Move some of this to the dataloader part?
        ds = get_pixel_from_ds(self.dataset, lat, lon)
        #print(ds)
        if self.order > 0:
            X, y = dataset_to_numpy_order_traditional_ar(ds,
                                                         order=self.order,
                                                         bias=self.bias)
        else:
            X, y = dataset_to_numpy(ds, bias=self.bias)

        # print('Number of samples prior to removal of nans {}.'.format(len(y)))
        # Removes nan's
        a = np.concatenate([X, y], axis=1)
        B = a[~np.isnan(a).any(axis=1)]
        X = B[:, :-1]
        y = B[:, -1, np.newaxis]  # not tested
        return X, y
Ejemplo n.º 3
0
    def load_transform_fit(self, lat, lon):
        """ Standardisation X by equation x_new = (x-mean(x))/std(x)

        Parameteres
        ---------------------
        lat : float
            Latitude coordinate.
        lon : float
            Longitude coordinate.

        Returns
        ---------------------
        mean, std : float
            Values used in transformation
        """
        """ Normalizes the distribution. It is centered around the mean with std of 1.

        Subtract the mean divide by the standard deviation. """
        # Move some of this to the dataloader part?
        ds = get_pixel_from_ds(self.dataset, lat, lon)

        if self.order > 0:
            X, y = dataset_to_numpy_order_traditional_ar(ds,
                                                         order=self.order,
                                                         bias=self.bias)

        #print(X.shape)
        #print(y.shape)
        #else:
        #    X, y   = dataset_to_numpy_r_traditional_ar(ds, bias = self.bias)

        # Removes nan's
        a = np.concatenate([X, y], axis=1)
        a = a[~np.isnan(a).any(axis=1)]

        X = a[:, :-1]
        #print(X.shape)
        if self.sigmoid:
            y = inverse_sigmoid(a[:, -1, np.newaxis])  # not tested
        else:
            y = a[:, -1, np.newaxis]
        print(y.shape)

        order = self.order
        n_times, n_vars = X.shape
        #VARIABLES = ['t2m', 'q', 'r', 'sp']
        if self.transform:
            transformed_train = np.zeros((n_times, order))

            if order > 0:
                var = 'tcc'
                m = xr.open_dataset(
                    base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel(
                        latitude=lat, longitude=lon).values
                s = xr.open_dataset(
                    base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel(
                        latitude=lat, longitude=lon).values

                for k in range(order):
                    # Something wierd with the rotation of cloud cover values
                    transformed_train[:, k] = (X[:, k] - m) / s

            X_train = transformed_train

        if self.test_start is not None and self.test_stop is not None:
            # Based on start and stop descide which files it gets.

            ds = get_pixel_from_ds(self.test_dataset, lat, lon)
            #print(ds)
            if self.order > 0:
                X_test, y_test_true = dataset_to_numpy_order_traditional_ar(
                    ds, self.order, bias=self.bias)
                n_times, n_vars = X_test.shape
                #VARIABLES = ['t2m', 'q', 'r', 'sp']
                if self.transform:
                    transformed_test = np.zeros((n_times, order))

                    if order > 0:
                        var = 'tcc'
                        m = xr.open_dataset(
                            base +
                            'stats_pixel_{}_all.nc'.format(var))['mean'].sel(
                                latitude=lat, longitude=lon).values
                        s = xr.open_dataset(
                            base +
                            'stats_pixel_{}_all.nc'.format(var))['std'].sel(
                                latitude=lat, longitude=lon).values

                        for k in range(order):
                            # Something wierd with the rotation of cloud cover values
                            transformed_test[:, k] = (X_test[:, k] - m) / s

                    X_test = transformed_test

                #print('Detects shap Xtest {} and ytest {}'.format( np.shape(X_test), np.shape(y_test_true)  ))

        # TODO add this
        #print('(~np.isnan(X)).sum(axis=0) {}'.format(np.shape(
        #                                        (~np.isnan(X)).sum(axis=0))))
        #print('(~np.isnan(self. Xtrain)).sum(axis=0) {}'.format(np.shape(
        #                            (~np.isnan(self.X_train)).sum(axis=0))))
        num_test = (~np.isnan(X_test)).sum(axis=0)[0]
        #print(num_test)
        num_train = (~np.isnan(X_train)).sum(axis=0)[0]
        coeffs = fit_pixel(X, y)
        #print(coeffs)
        #print(X_test)
        y_test_pred = predict_pixel(X_test, coeffs)

        if self.sigmoid:
            y_test_pred = inverse_sigmoid(y_test_pred)

        # TODO: upgrade this to compute train  score as well as test score.
        # y_pred = self.predict(X) # prediction based on testset and
        # y_true = self.y_train

        if len(y_test_true) == 4:
            y_test_true = y_test_true[:, :, :, 0]

        if len(y_test_pred) == 4:
            y_test_pred = y_test_pred[:, :, :, 0]

        # Move most of content in store performance to evaluate
        mse = mean_squared_error(y_test_true, y_test_pred)[0]
        #print('mse shape {}'.format(np.shape(mse)))
        ase = accumulated_squared_error(y_test_true, y_test_pred)[0]
        r2 = r2_score(y_test_true, y_test_pred)[0]
        #print(mse, ase, r2)
        return coeffs.flatten(), mse, ase, r2, num_test, num_train
Ejemplo n.º 4
0
Archivo: model.py Proyecto: hannasv/MS
    def load_transform_fit(self, lat, lon):
        """ Standardisation X by equation x_new = (x-mean(x))/std(x)

        Parameteres
        ---------------------
        lat : float
            Latitude coordinate.
        lon : float
            Longitude coordinate.

        Returns
        ---------------------
        mean, std : float
            Values used in transformation
        """
        """ Normalizes the distribution. It is centered around the mean with std of 1.

        Subtract the mean divide by the standard deviation. """
        # Move some of this to the dataloader part?
        local = timer()
        print('Enters load_transform_fit after {} seconds'.format(local - self.timer_start))
        ds     = get_pixel_from_ds(self.dataset, lat, lon)
        if self.type == 'ar':
            if self.order > 0:
                X, y   = dataset_to_numpy_order(ds, order = self.order, bias = self.bias)
            else:
                X, y   = dataset_to_numpy(ds, bias = self.bias)
        else:
            print('finds traditional model')
            X, y   = dataset_to_numpy_order_traditional_ar(ds,
                                        order = self.order, bias = self.bias)
        local = timer()
        print('Finished reading in pixel in load_transform_fit after {} seconds'.format(local - self.timer_start))
        # Removes nan's
        a = np.concatenate([X, y], axis = 1)
        a = a[~np.isnan(a).any(axis = 1)]

        X = a[:, :-1]
        #print(X.shape)
        if self.sigmoid:
            y = inverse_sigmoid(a[:, -1, np.newaxis]) # not tested
        else:
            y = a[:, -1, np.newaxis]
        #print(y.shape)

        order = self.order
        n_times, n_vars = X.shape
        #VARIABLES = ['t2m', 'q', 'r', 'sp']
        if self.transform:
            transformed_train = np.zeros(X.shape)
            for j, var in enumerate(self.variables):

                m = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel(latitude = lat, longitude = lon).values
                s = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel(latitude = lat, longitude = lon).values

                transformed_train[:, j] = (X[:, j]- m)/s
                #for i in range(n_times):
                #    transformed[i, :, :, j] =  (X[i, :, :, j]  - m)/s
            if order > 0:
                j = len(self.variables)

                var = 'tcc'
                for k in range(order):
                    m = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['mean'].sel(latitude = lat, longitude = lon).values
                    s = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))['std'].sel(latitude = lat, longitude = lon).values
                    # Something wierd with the rotation of cloud cover values
                    transformed_train[:, k+j] = (X[:, k+j]- m)/s

            X_train = transformed_train
        else:
            X_train = X
        local = timer()
        print('Finished transforming pixel in load_transform_fit after {} seconds'.format(timer() - self.timer_start))
        #if self.test_dataset is not None:
        #if self.test_start is not None and self.test_stop is not None:
            # Based on start and stop descide which files it gets.

        ds     = get_pixel_from_ds(self.test_dataset, lat, lon)
        #print(ds)

        if self.type == 'ar':
            if self.order > 0:
                #print('Dataset has order {}'.format(order))
                X_test, y_test_true = dataset_to_numpy_order(ds, self.order, bias = self.bias)
            else:
                #print('Dataset has order {} -- should be zero.'.format(order))
                X_test, y_test_true  = dataset_to_numpy(ds, bias = self.bias)
        else:
            X_test, y_test_true   = dataset_to_numpy_order_traditional_ar(ds,
                                        order = self.order, bias = self.bias)
        n_times, n_vars = X_test.shape
        print('Finished reading in test data pixel in load_transform_fit after {} seconds'.format(timer() - self.timer_start))

        #VARIABLES = ['t2m', 'q', 'r', 'sp']
        if self.transform:
            transformed_test = np.zeros((n_times, n_vars ))

            for j, var in enumerate(self.variables):
                t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))
                m = t_data['mean'].sel(latitude = lat, longitude = lon).values
                s = t_data['std'].sel(latitude = lat, longitude = lon).values

                transformed_test[:, j] = (X_test[:, j]- m)/s

            if order > 0:
                j = len(self.variables)
                var = 'tcc'
                t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))
                m = t_data['mean'].sel(latitude = lat, longitude = lon).values
                s = t_data['std'].sel(latitude = lat, longitude = lon).values

                for k in range(order):
                    # Something wierd with the rotation of cloud cover values
                    transformed_test[:, k+j] = (X_test[:, k+j]- m)/s
            X_test = transformed_test
            print('Finished transforming test data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))

        num_test = (~np.isnan(X_test)).sum(axis=0)[0]
        num_train = (~np.isnan(X_train)).sum(axis=0)[0]
        #print('Xtrain shape {} y train.shape {}'.format(X_train.shape, y.shape))
        coeffs = fit_pixel(X_train, y)
        #print('coeff {}'.format(coeffs))
        print('Finished fitting pixel test data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))
        y_test_pred = predict_pixel(X_test, coeffs)
        y_train_pred = predict_pixel(X_train, coeffs)
        print('Finished predicting test pixel data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))

        if self.sigmoid:
            y_test_pred = inverse_sigmoid(y_test_pred)

        if len(y_test_true) == 4:
            y_test_true = y_test_true[:, :, :, 0]


        if len(y_test_pred) == 4:
            y_test_pred = y_test_pred[:, :, :, 0]

        # Move most of content in store performance to evaluate
        mse  = mean_squared_error(y_test_true, y_test_pred)[0]
        print('mse shape {}'.format(np.shape(mse)))
        ase  = accumulated_squared_error(y_test_true, y_test_pred)[0]
        r2   = r2_score(y_test_true, y_test_pred)[0]
        mse_tr = mean_squared_error(y, y_train_pred)[0]
        ase_tr = accumulated_squared_error(y, y_train_pred)[0]
        r2_tr  = r2_score(y, y_train_pred)[0]

        #print(mse, ase, r2)
        print('Finished computing mse, ase, r2 data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))
        print('mse {}, ase {}, r2 {}'.format(mse, ase, r2))
        return coeffs.flatten(), mse, ase, r2, num_test, num_train, mse_tr, ase_tr, r2_tr
Ejemplo n.º 5
0
Archivo: model.py Proyecto: hannasv/MS
    def predict(self, lat, lon):
        """ Used by model loader.
        """
        # TODO loop over dataset ...
        ds     = get_pixel_from_ds(self.test_dataset, lat, lon)
        if self.type == 'ar':
            if self.order > 0:
                #print('Dataset has order {}'.format(order))
                X_test, y_test_true = dataset_to_numpy_order(ds, self.order, bias = self.bias)
            else:
                #print('Dataset has order {} -- should be zero.'.format(order))
                X_test, y_test_true  = dataset_to_numpy(ds, bias = self.bias)
        else:
            X_test, y_test_true   = dataset_to_numpy_order_traditional_ar(ds,
                                        order = self.order, bias = self.bias)
        #VARIABLES = ['t2m', 'q', 'r', 'sp']
        if self.transform:
            transformed_test = np.zeros((n_times, n_vars ))

            for j, var in enumerate(self.variables):
                t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))
                m = t_data['mean'].sel(latitude = lat, longitude = lon).values
                s = t_data['std'].sel(latitude = lat, longitude = lon).values

                transformed_test[:, j] = (X_test[:, j]- m)/s

            if order > 0:
                j = len(self.variables)
                var = 'tcc'
                t_data = xr.open_dataset(base + 'stats_pixel_{}_all.nc'.format(var))
                m = t_data['mean'].sel(latitude = lat, longitude = lon).values
                s = t_data['std'].sel(latitude = lat, longitude = lon).values

                for k in range(order):
                    # Something wierd with the rotation of cloud cover values
                    transformed_test[:, k+j] = (X_test[:, k+j]- m)/s
            X_test = transformed_test
            print('Finished transforming test data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))

        i = (lat-30.0)/0.25
        j = (lon-(-15.0))/0.25

        coeffs = self.coeff_matrix[int(i), int(j), :][:, np.newaxis]
        y_test_pred = predict_pixel(X_test, coeffs)
        print('Finished predicting test pixel data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))

        if self.sigmoid:
            y_test_pred = inverse_sigmoid(y_test_pred)

        if len(y_test_true) == 4:
            y_test_true = y_test_true[:, :, :, 0]


        if len(y_test_pred) == 4:
            y_test_pred = y_test_pred[:, :, :, 0]

        # Move most of content in store performance to evaluate
        mse  = mean_squared_error(y_test_true, y_test_pred)[0]
        print('mse shape {}'.format(np.shape(mse)))
        ase  = accumulated_squared_error(y_test_true, y_test_pred)[0]
        r2   = r2_score(y_test_true, y_test_pred)[0]
        #print(mse, ase, r2)
        print('Finished computing mse, ase, r2 data in load_transform_fit after {} seconds'.format(timer() - self.timer_start))
        print('mse {}, ase {}, r2 {}'.format(mse, ase, r2))
        return mse, ase, r2