def vul_func(ratio): """ This functions estimates a vulnerability function, by flattening the ration of observed and modeled damages. Provided is a smoothing with different window-sizes for running means and a smoothing with the SSA tool. For further analysis only ssa_5 was considered (11-yr running mean) Parameters ---------- ratio : Column of DataFrame Ratio of recorded to modeled damages Returns ------- np.arrays Ratios with different window sizes """ ratio11yr = ratio['ratios_D_Obs_D_CliExp'].rolling(window=11, min_periods=5, center=True).mean() ratio_reg = ratio['ratios_D_Obs_D_CliExp'].replace([np.nan], [ratio['ratios_D_Obs_D_CliExp'].median()]) ratio_test_reg = np.zeros((1, 31)) ratio_test_reg[0, :] = ratio_reg ssa = SingularSpectrumAnalysis(window_size=11, groups=None) X_ssa5 = ssa.fit_transform(ratio_test_reg) ssa_5 = X_ssa5[0, :] return ratio11yr, ssa_5
def SSA_filter(time_series, no_sel, window_size=100): groups = [np.arange(0, no_sel), np.arange(no_sel, window_size)] transformer = SingularSpectrumAnalysis(window_size=window_size, groups=groups) X_new = transformer.transform(time_series.reshape(1, len(time_series))) signal = X_new[0, :] noise = X_new[1, :] return signal, noise
def ssa( data_frame: pd.DataFrame, window_size: int or float = 4, groups: int or [int] = None, headers: [str] = None, ) -> pd.DataFrame: """Singular Spectrum Analysis. This is a adapted SingularSpectrumAnalysis function of pyts package. Arguments: data_frame {pd.DataFrame} -- input dataframe Keyword Arguments: headers {[str]} -- chosen dataframe headers (default: {None}). {others params} -- See pyts.decomposition.SingularSpectrumAnalysis Returns: pd.DataFrame -- A object decomposed of length window_size for each feature (header). """ # Select and changes columns to index if headers is None: headers = data_frame.columns else: data_frame = data_frame[headers] # Generates window_size (default:4) lists for each feature ssa = SingularSpectrumAnalysis(window_size, groups) x_ssa = ssa.fit_transform(data_frame.T) # Decompose each feature adding each window_size in line if data_frame.shape[1] > 1: decompose = [[x for x in x_ssa[i]] for i, _ in enumerate(headers)] decompose = pd.DataFrame(decompose, index=headers) else: decompose = pd.DataFrame(x_ssa) return decompose.T
def SSA_EEG(data, n_components=3, graph=False): ssa = SingularSpectrumAnalysis(window_size=20, groups=None) X = (data, (range(len(data)))) X_ssa = ssa.fit_transform(X) if graph is True: plt.figure(figsize=(16, 6)) ax1 = plt.subplot(121) ax1.plot(X[0], label='Original', color='darkblue') ax1.legend(loc='best', fontsize=14) plt.xlabel('Samples', size=14) plt.ylabel('Amplitude', size=14) ax2 = plt.subplot(122) color_list = ['darkgoldenrod', 'red', 'darkcyan', 'indigo', 'magenta'] for i, c in zip(range(len(X_ssa[0][0:n_components])), color_list): ax2.plot(X_ssa[0, i], '--', label='SSA {0}'.format(i + 1), color=c) plt.xlabel('Samples', size=14) plt.ylabel('Amplitude', size=14) #plt.ylim([-0.0000001, 0.0000001]) ax2.legend(loc='best', fontsize=14) plt.suptitle('Singular Spectrum Analysis', fontsize=20) plt.tight_layout() plt.subplots_adjust(top=0.88) plt.show() return X_ssa
def vul_funcs(ratio): """ This functions estimates a vulnerability function, by flattening the ration of observed and modeled damages. Provided is a smoothing with different window-sizes for running means and a smoothing with the SSA tool. For further analysis only ssa_5 was considered (11-yr running mean) Parameters ---------- ratio : Column of DataFrame Ratio of recorded to modeled damages Returns ------- np.arrays Ratios with different window sizes """ ratio3yr = runmean(np.array(ratio), 1) ratio5yr = runmean(np.array(ratio), 2) ratio7yr = runmean(np.array(ratio), 3) ratio9yr = runmean(np.array(ratio), 4) ratio_ssa = ratio.replace([np.nan], [ratio.median()]) ratio_test = np.zeros((1, 31)) ratio_test[0, :] = ratio_ssa ssa = SingularSpectrumAnalysis(window_size=11, groups=None) X_ssa5 = ssa.fit_transform(ratio_test) ssa = SingularSpectrumAnalysis(window_size=10, groups=None) X_ssa10 = ssa.fit_transform(ratio_test) ssa_5 = X_ssa5[0, :] ssa_10 = X_ssa10[0, :] return ratio3yr, ratio5yr, ratio7yr, ratio9yr, ssa_5, ssa_10
from pyts.datasets import make_cylinder_bell_funnel # Parameters n_samples, n_timestamps = 3, 128 X_cbf, y = make_cylinder_bell_funnel(n_samples=10, random_state=42, shuffle=False) X_period = 3 * np.sin(np.arange(n_timestamps)) X = X_cbf[:, :n_timestamps] + X_period # We decompose the time series into three subseries window_size = 20 # Singular Spectrum Analysis ssa = SingularSpectrumAnalysis(window_size=window_size, groups="auto") X_ssa = ssa.fit_transform(X) # Show the results for different frequency-parameters plt.figure(figsize=(16, 12)) ax1 = plt.subplot(221) ax1.plot(X[0], 'o-', label='Original') ax1.plot(X_period, 'o-', label='periodic') ax1.legend(loc='best', fontsize=14) ax1.set_ylim([np.min(X[0]) * 1.1, np.max(X[0]) * 1.1]) params = [(0.01, 0.85), (0.01, 0.98)] for idx in range(3): ax = plt.subplot(222 + idx)
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" ssa = SingularSpectrumAnalysis(**params) with pytest.raises(error, match=re.escape(err_msg)): ssa.transform(X)
def test_actual_results(params): """Test that the actual results are the expected ones.""" ssa = SingularSpectrumAnalysis(**params) arr_actual = ssa.fit_transform(X).sum(axis=1) np.testing.assert_allclose(arr_actual, X, atol=1e-5, rtol=0.)
import matplotlib.pyplot as plt from pyts.decomposition import SingularSpectrumAnalysis # Parameters n_samples, n_timestamps = 100, 48 # Toy dataset rng = np.random.RandomState(41) X = rng.randn(n_samples, n_timestamps) # We decompose the time series into three subseries window_size = 15 groups = [np.arange(i, i + 5) for i in range(0, 11, 5)] # Singular Spectrum Analysis ssa = SingularSpectrumAnalysis(window_size=15, groups=groups) X_ssa = ssa.fit_transform(X) # Show the results for the first time series and its subseries plt.figure(figsize=(16, 6)) ax1 = plt.subplot(121) ax1.plot(X[0], 'o-', label='Original') ax1.legend(loc='best', fontsize=14) ax2 = plt.subplot(122) for i in range(len(groups)): ax2.plot(X_ssa[0, i], 'o--', label='SSA {0}'.format(i + 1)) ax2.legend(loc='best', fontsize=14) plt.suptitle('Singular Spectrum Analysis', fontsize=20)
output_dir = output_base / dataset input_file = output_dir / Path("tas_" + dataset.lower() + "_merged.nc4") mean_file = str(input_file).replace("_merged.nc4", "_gmt.nc4") ssa_file = str(mean_file).replace("tas_", "").replace("_gmt", "_ssa_gmt") print(ssa_file) cmd = "module load cdo && cdo fldmean " + str(input_file) + " " + str( mean_file) print(cmd) subprocess.check_call(cmd, shell=True) print('checked subprocess call') input_ds = nc.Dataset(mean_file, "r") col = np.array(np.squeeze(input_ds.variables["tas"][::subset]), ndmin=2) ssa = SSA(window_size) print('calculate ssa') X_ssa = ssa.fit_transform(col) print('ssa calculated') output_ds = nc.Dataset(ssa_file, "w", format="NETCDF4") time = output_ds.createDimension("time", None) times = output_ds.createVariable("time", "f8", ("time", )) tas = output_ds.createVariable("tas", "f8", ("time")) output_ds.description = "GMT created from daily values by SSA (10 year step)" times.units = input_ds.variables["time"].units times.calendar = input_ds.variables["time"].calendar times[:] = input_ds.variables["time"][::subset] tas[:] = X_ssa[0, :] output_ds.close()
pca.fit(df_small) fit = pca.fit(df_small) trans = pca.fit_transform(df_small) #_df.adjclose.plot() trans.iloc[:, 0].plot() plt.show() print(fit.column_correlations(df_small)) from pyts.decomposition import SingularSpectrumAnalysis from pymssa import MSSA window_size = 20 groups = [np.arange(i, i+5) for i in range(0, 20, 5)] ssa = SingularSpectrumAnalysis(window_size= window_size) X_ssa = ssa.fit_transform(df_small) mssa = MSSA(n_components=5, window_size=21, verbose=True) mssa.fit(_df.adjclose) pd.DataFrame(mssa.components_[0,:,:], index=_df.index).plot() plt.show() _df.adjclose.plot() plt.show()