def save_to_GAF_img(df, file, step): OHLC = ["Open", "High", "Low", "Close"] high = max(df["High"]) low = min(df["Low"]) for col in OHLC: Path("/content/GASF/" + col + "/").mkdir(parents=True, exist_ok=True) Path("/content/GADF/" + col + "/").mkdir(parents=True, exist_ok=True) Path("/content/MTF/" + col + "/").mkdir(parents=True, exist_ok=True) gasf = GramianAngularField(image_size=step, method="summation") gadf = GramianAngularField(image_size=step, method="difference") mtf = MarkovTransitionField(image_size=step) ts_norm = [(i - low) / (high - low) for i in list(df[col])] X_mtf = mtf.fit_transform([ts_norm]) X_gasf = gasf.fit_transform([ts_norm]) X_gadf = gadf.fit_transform([ts_norm]) plt.imsave("/content/other_n/GASF/" + col + "/" + file, X_gasf[0], cmap="gray") plt.imsave("/content/other_n/GADF/" + col + "/" + file, X_gadf[0], cmap="gray") plt.imsave("/content/other_n/MTF/" + col + "/" + file, X_mtf[0], cmap="gray")
def encode_dataset(batch_size, downscale_factor, dataset, pooling_function, number_of_bins=15): """ Computation of encodings has to be done in batches due to the large size of the dataset. Otherwise the kernel will die! For downscaling pick np.mean (average pooling) or np.max (max pooling) respectively. If downscaling is not required choose downscale_factor=1. Keep in mind the network expects an input image size of 64x64. The function returns a 3D matrix. The new 3D matrix contains several 2D matrices, which correspond to the time series encodings/images. The order of the objects does not change, which means for example that the 23rd slice of the input dataset corresponds to the 23rd encoding in the 3D Matrix.""" n, l = np.shape(dataset) f = downscale_factor n_batches = n // batch_size batches = np.linspace(1, n_batches, n_batches, dtype=int) * batch_size mtf = MarkovTransitionField(image_size=1., n_bins=number_of_bins, strategy='uniform', overlapping=False) print('Encoding started...') for p in range(n_batches): if p == 0: X_mtf = mtf.transform(dataset[0:batches[p], :]) sample = block_reduce(X_mtf[0], block_size=(f, f), func=pooling_function) l_red = sample.shape[0] X_mtf_red = np.zeros((n, l_red, l_red)) print('output 3D Matrix shape: ', np.shape(X_mtf_red)) j = 0 for i in range(0, batches[p]): X_mtf_red[i] = block_reduce(X_mtf[j], block_size=(f, f), func=pooling_function) j += 1 else: X_mtf = mtf.transform(X[batches[p - 1]:batches[p], :]) j = 0 for i in range(batches[p - 1], batches[p]): X_mtf_red[i] = block_reduce(X_mtf[j], block_size=(f, f), func=pooling_function) j += 1 print('Encoding successful!') print('#####################################') return X_mtf_red
def prep_seriesConvLSTM(seq_len, out_window, in_window, img_size, channels, series_test, h): print("Preparing data: ") sample_range = (-1, 1) signal_test = series_test signal_test = signal_test.reshape(-1, 1) signal_test_scaled = signal_test.flatten() window_input_test, window_output_test = sequence_splitter( signal_test_scaled, in_window, out_window, h) gadf = GramianAngularField(image_size=img_size, method='difference', sample_range=sample_range) gasf = GramianAngularField(image_size=img_size, method='summation', sample_range=sample_range) mtf = MarkovTransitionField(image_size=img_size, n_bins=8, strategy='quantile') gadf_test = np.expand_dims(gadf.fit_transform(window_input_test), axis=3) gasf_test = np.expand_dims(gasf.fit_transform(window_input_test), axis=3) mtf_test = np.expand_dims(mtf.fit_transform(window_input_test), axis=3) y_test = window_output_test.reshape(-1) if (channels == 2): X_test_windowed = np.concatenate((gadf_test, gasf_test), axis=3) else: X_test_windowed = np.concatenate((gadf_test, gasf_test, mtf_test), axis=3) X_test_Conv_LSTM = np.zeros((X_test_windowed.shape[0] - seq_len + 1, seq_len, img_size, img_size, channels)) y_test_Conv_LSTM = np.zeros( (X_test_windowed.shape[0] - seq_len + 1, out_window)) print("Test data:") for i in tqdm(range(0, X_test_windowed.shape[0] - seq_len + 1)): current_seq_X = np.zeros((seq_len, img_size, img_size, channels)) for l in range(seq_len): current_seq_X[l] = X_test_windowed[i + l] current_seq_X = current_seq_X.reshape(1, seq_len, img_size, img_size, channels) X_test_Conv_LSTM[i] = current_seq_X y_test_Conv_LSTM[i] = y_test[i + seq_len - 1] X_test_Conv_LSTM = X_test_Conv_LSTM.reshape(-1, seq_len, img_size, img_size, channels) y_test_Conv_LSTM = y_test_Conv_LSTM.reshape(-1, out_window) return (X_test_Conv_LSTM, y_test_Conv_LSTM)
class TSToMTF(Transform): r"""Transforms a time series batch to a 4d TSImage (bs, n_vars, size, size) by applying Markov Transition Field""" order = 98 def __init__(self, size=224, cmap=None, n_bins=5, **kwargs): self.size, self.cmap = size, cmap self.encoder = MarkovTransitionField(n_bins=n_bins, **kwargs) def encodes(self, o: TSTensor): bs, *_, seq_len = o.shape size = ifnone(self.size, seq_len) if size != seq_len: o = F.interpolate(o.reshape(-1, 1, seq_len), size=size, mode='linear', align_corners=False)[:, 0] else: o = o.reshape(-1, seq_len) output = self.encoder.fit_transform(o.cpu().numpy()).reshape( bs, -1, size, size) if self.cmap and output.shape[1] == 1: output = TSImage(plt.get_cmap( self.cmap)(output)[..., :3]).squeeze(1).permute(0, 3, 1, 2) else: output = TSImage(output) return output.to(device=o.device)
def evaluate_classifiers(dst): print("[%s] Processing dataset %s" % (datetime.now().strftime("%F %T"), dst)) train_x, train_y = load_from_tsfile_to_dataframe(os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts")) test_x, test_y = load_from_tsfile_to_dataframe(os.path.join(UCR_DATASET_PATH, dst, dst + "_TEST.ts")) data_train = [train_x.iloc[i][0] for i in range(train_x.shape[0])] data_test = [test_x.iloc[i][0] for i in range(test_x.shape[0])] enc = LabelEncoder().fit(train_y) ohe = OneHotEncoder(sparse=False) labels_encoded = enc.transform(train_y) integer_encoded = labels_encoded.reshape(len(labels_encoded), 1) labels_train = ohe.fit_transform(integer_encoded) ts_plotters = [RecurrencePlot(threshold='point', percentage=20), MarkovTransitionField(), GramianAngularField()] def evaluate_classifier(plot_obj): try: classifier = classif_class(input_dim, num_classes=len(set(train_y)), batch_size=batch_size, series_plot_obj=plot_obj) classifier.train(data_train, labels_train, n_epochs=n_epochs) y_pred = [classifier.predict(series) for series in data_test] y_pred = enc.inverse_transform(y_pred) accuracy = accuracy_score(test_y, y_pred) f1 = f1_score(test_y, y_pred, average='macro') with open("tsplot_results.csv", "a") as f: f.write("{};{};{};{};{}\n".format(classif_class.__class__.__name__, plot_obj.__class__.__name__, dst, accuracy, f1)) return accuracy, f1 except Exception as e: print("Exception while evaluating classifier:", e.__str__()) return float('nan'), float('nan') return list(itertools.chain(*[evaluate_classifier(plot_obj) for plot_obj in ts_plotters]))
def mtf_encode_3_to_4(arr_3d, dim_mult=3): dim = arr_3d[0].mean(axis=0).shape[1] * dim_mult transformer_multi = MultivariateTransformer( MarkovTransitionField(image_size=dim), flatten=False) mtf_fsdd_4d = (transformer_multi.fit_transform(array.T) for array in arr_3d) return mtf_fsdd_4d
def toMTFdata(tsdatas, image_size=1., n_bins=5, strategy='quantile', overlapping=False, flatten=False): X = [] mtf = MarkovTransitionField(image_size=image_size, n_bins=n_bins, strategy=strategy, overlapping=overlapping, flatten=flatten) for data in tsdatas: data_mtf = mtf.fit_transform(data) X.append(data_mtf[0]) return np.array(X)
def create_image(data, date=None, image_size=10, method='summation', field='gaf', strategy='uniform'): """Creates an image of the specified size and using the specified field Arguments: data {list} -- [the list of data to transform] Keyword Arguments: image_size {int} -- the size of the output image (default: {30}) method {str} -- the method for the gramian angular field. Def. is summation (default: {'s'}) field {sr} -- The specified field to use. GAF or MTF (default: {'g'}) strategy {str} -- the strategy for MTF Returns dataframe if successful or None if not """ if field is 'gaf': field = GramianAngularField(image_size=image_size, method=method) elif field is 'mtf': field = MarkovTransitionField(image_size=image_size, strategy=strategy) else: return None data = pd.DataFrame(data).transpose() img = pd.DataFrame(field.transform(data).flatten()).transpose() img['Date'] = date img = img.set_index('Date') return img
def MTF_encoder(ts, size=None, n_bins=8, strategy='quantile', overlapping=False, **kwargs): ts = To2dArray(ts) assert ts.ndim == 2, 'ts ndim must be 2!' if size is None: size = ts.shape[-1] else: size = min(size, ts.shape[-1]) ts = PAA(window_size=None, output_size=size).fit_transform(ts) encoder = MTF(size, n_bins=n_bins, strategy=strategy, overlapping=overlapping) output = np.squeeze(encoder.fit_transform(ts), 0) return output
def mtf_transform(data, image_size=500, show=False, img_index=0): transform = MarkovTransitionField(image_size) return (pyts_transform(transform, data, image_size=image_size, show=show, cmap='rainbow', img_index=img_index))
def prep_seriesConvMLP(window_size_x, window_size_y, img_size, signal_test, h): signal_test = signal_test.reshape(-1, 1) sample_range = (-1, 1) signal_test_scaled = signal_test.flatten() # Split Sequence window_input_test, window_output_test = sequence_splitter( signal_test_scaled, window_size_x, window_size_y, h) # %%--------------------------------------------------------------------------- ''' Field transformations ''' gadf = GramianAngularField(image_size=img_size, method='difference', sample_range=sample_range) gasf = GramianAngularField(image_size=img_size, method='summation', sample_range=sample_range) mtf = MarkovTransitionField(image_size=img_size, n_bins=8, strategy='quantile') gadf_transformed_test = np.expand_dims( gadf.fit_transform(window_input_test), axis=3) gasf_transformed_test = np.expand_dims( gasf.fit_transform(window_input_test), axis=3) mtf_transformed_test = np.expand_dims(mtf.fit_transform(window_input_test), axis=3) X_test_windowed = np.concatenate( (gadf_transformed_test, gasf_transformed_test, mtf_transformed_test), axis=3) # Data reshaping X_test_Conv_MLP = X_test_windowed y_test_Conv_MLP = window_output_test return (X_test_Conv_MLP, y_test_Conv_MLP)
def _build_images_one_stock(df_one_permno, window_len, retrain_freq, encoding_method, image_size): """ Encodes images as timeseries for one stock :param df_one_permno: dataframe of the timeseries of all data for one particular stock :param window_len: number of observations to consider (42 for 2 months) :param retrain_freq: lag to consider between making two samples :param encoding_method: method to encode the images :param image_size: final size of the image (using window_len*window_len will avoid any averaging) :return: np.ndarray of the samples of shape (N,window_len,window_len,M) where: - M is the number of features - N is the number of final samples ~ len(df_one_permno)/retrain_freq """ n_days = df_one_permno.T.shape[-1] samples_list, dates_list, prc_list = [], [], [] for i in range(window_len, n_days, retrain_freq): window_data = df_one_permno.T.iloc[:, i - window_len:i] # Use GADF algorithm to transform data if encoding_method == 'GADF': try: from pyts.image import GADF gadf = GADF(image_size) except: from pyts.image import GramianAngularField gadf = GramianAngularField(image_size, method='difference') samples_list.append(gadf.fit_transform(window_data).T) # Use GASF algorithm to transform data elif encoding_method == 'GASF': try: from pyts.image import GASF gasf = GASF(image_size) except: from pyts.image import GramianAngularField gasf = GramianAngularField(image_size, method='summation') samples_list.append(gasf.fit_transform(window_data).T) # Use MTF algorithm to transform data elif encoding_method == 'MTF': try: from pyts.image import MTF mtf = MTF(image_size) except: from pyts.image import MarkovTransitionField mtf = MarkovTransitionField(image_size) samples_list.append(mtf.fit_transform(window_data).T) else: raise BaseException( 'Method must be either GADF, GASF or MTF not {}'.format( encoding_method)) samples_list = np.asarray(samples_list) return samples_list
def _show_images(self, df_window_data): """ Plots a multi dimensional timeseries encoded as an image :param df_window_data: timeseries we want to encode as an image """ data = df_window_data.reset_index().set_index('date').drop('PERMNO', axis=1).T channels = list(data.index) if self._encoding_method == 'GADF': try: from pyts.image import GADF gadf = GADF(self._image_size) except: from pyts.image import GramianAngularField gadf = GramianAngularField(self._image_size, method='difference') image_data = (gadf.fit_transform(data).T) elif self._encoding_method == 'GASF': try: from pyts.image import GASF gasf = GASF(self._image_size) except: from pyts.image import GramianAngularField gasf = GramianAngularField(self._image_size, method='summation') image_data = (gasf.fit_transform(data).T) elif self._encoding_method == 'MTF': try: from pyts.image import MTF mtf = MTF(self._image_size) except: from pyts.image import MarkovTransitionField mtf = MarkovTransitionField(self._image_size) image_data = (mtf.fit_transform(data).T) else: raise BaseException( 'Method must be either GADF, GASF or MTF not {}'.format( self._encoding_method)) num_channels = image_data.shape[-1] plt.figure(figsize=(12, 14)) for j in range(1, num_channels + 1): channel = image_data[:, :, j - 1] plt.subplot(int(num_channels / 2) + 1, 2, j) plt.imshow(channel, cmap='rainbow', origin='lower') plt.xlabel('$time$') plt.ylabel('$time$') plt.title(channels[j - 1]) plt.tight_layout() plt.show()
def test_flatten(): """Test the 'flatten' parameter.""" arr_false = MarkovTransitionField(n_bins=2).transform(X).reshape(2, -1) arr_true = MarkovTransitionField(n_bins=2, flatten=True).transform(X) np.testing.assert_allclose(arr_false, arr_true, atol=1e-5, rtol=0.)
def test_actual_results(params, X, arr_desired): """Test that the actual results are the expected ones.""" arr_actual = MarkovTransitionField(**params).fit_transform(X) np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)
def test_parameter_check(params, error, err_msg): """Test parameter validation.""" mtf = MarkovTransitionField(**params) with pytest.raises(error, match=re.escape(err_msg)): mtf.transform(X)
plt.title('PAA image') divider = make_axes_locatable(ax2) cax = divider.append_axes("right", size="5%", pad=0.2) plt.colorbar(cax=cax) ax3 = plt.subplot(133) plt.imshow(patchimage[k]) plt.title('patch average') divider = make_axes_locatable(ax3) cax = divider.append_axes("right", size="5%", pad=0.2) plt.colorbar(cax=cax) plt.show() ## call API X, _, _, _ = load_gunpoint(return_X_y=True) mtf = MarkovTransitionField() fullimage = mtf.transform(X) # downscale MTF of the time series (without paa) through mean operation batch = int(len(X[0]) / s) patch = [] for p in range(s): for q in range(s): patch.append( np.mean(fullimage[0][p * batch:(p + 1) * batch, q * batch:(q + 1) * batch])) # reshape patchimage = np.array(patch).reshape(s, s) plt.figure() plt.suptitle('gunpoint_index_' + str(k))
def mtf_encode_2_to_3(arr_2d, dim_multiple=3): dim = arr_2d[0].shape[1] * dim_multiple mtf = MarkovTransitionField(image_size=dim) mtf_fdd_3d = (mtf.fit_transform(array.T) for array in arr_2d) return mtf_fdd_3d
def train_series(self, series_train, series_test, p_filepath="predictions", l_filepath="logs", w_filepath="weights", epochs=5, bsize=16, h=12, callbacks=True): img_size = self.img_size Conv_LSTM_model = self.model out_window = self.out_window in_window = self.in_window sample_range = (-1, 1) name = "Conv-LSTM_raw_" + datetime.now().strftime("%Y%m%d-%H%M") logdir = os.path.join(l_filepath, name) tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) path_w = w_filepath filepath = os.path.join( path_w, "Conv_LSTM_raw_weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5" ) checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='loss', verbose=0, save_best_only=True, mode='min') if callbacks: callbacks_list = [checkpoint, tensorboard_callback] else: callbacks_list = [] signal_train = series_train signal_test = series_test signal_train = signal_train.reshape(-1, 1) signal_test = signal_test.reshape(-1, 1) # MMscaler = MinMaxScaler(feature_range=sample_range) # # signal_train_scaled = MMscaler.fit_transform(signal_train).flatten() # signal_test_scaled = MMscaler.fit_transform(signal_test).flatten() signal_train_scaled = signal_train.flatten() signal_test_scaled = signal_test.flatten() window_input_train, window_output_train = sequence_splitter( signal_train_scaled, in_window, out_window, h) window_input_test, window_output_test = sequence_splitter( signal_test_scaled, in_window, out_window, h) gadf = GramianAngularField(image_size=img_size, method='difference', sample_range=sample_range) gasf = GramianAngularField(image_size=img_size, method='summation', sample_range=sample_range) mtf = MarkovTransitionField(image_size=img_size, n_bins=8, strategy='quantile') gadf_transformed_train = np.expand_dims( gadf.fit_transform(window_input_train), axis=3) gasf_transformed_train = np.expand_dims( gasf.fit_transform(window_input_train), axis=3) mtf_transformed_train = np.expand_dims( mtf.fit_transform(window_input_train), axis=3) gadf_transformed_test = np.expand_dims( gadf.fit_transform(window_input_test), axis=3) gasf_transformed_test = np.expand_dims( gasf.fit_transform(window_input_test), axis=3) mtf_transformed_test = np.expand_dims( mtf.fit_transform(window_input_test), axis=3) X_train_prep, y_train_prep, X_test_prep, y_test_prep = self.prep_series( gadf_transformed_train, gasf_transformed_train, mtf_transformed_train, gadf_transformed_test, gasf_transformed_test, mtf_transformed_test, window_output_train, window_output_test) history_Conv_LSTM_prep = Conv_LSTM_model.fit( x=X_train_prep, y=y_train_prep, batch_size=bsize, epochs=epochs, callbacks=callbacks_list, shuffle=True, validation_data=(X_test_prep, y_test_prep)) print("Average test loss: ", np.average(history_Conv_LSTM_prep.history['val_loss'])) preds_prep = Conv_LSTM_model.predict(X_test_prep) preds_prep = preds_prep.reshape(-1, 1) y_test_prep = y_test_prep.reshape(-1, 1) # preps_unscaled = MMscaler.inverse_transform(preds_prep) # y_test_unscaled = MMscaler.inverse_transform(y_test_prep) preps_unscaled = preds_prep y_test_unscaled = y_test_prep MSE_test_no_HW = ((y_test_unscaled - preps_unscaled)**2).mean() print("Test loss: ", MSE_test_no_HW) # show_result(y_test_prep, preds_prep) df = pd.DataFrame({ 'True value': y_test_unscaled.flatten(), 'Predictions': preps_unscaled.flatten() }) fpath_p = os.path.join( p_filepath, "Conv-LSTM_raw_" + datetime.now().strftime("%Y%m%d-%H%M") + ".csv") df.to_csv(fpath_p) return Conv_LSTM_model, y_test_prep, preds_prep
""" ======================= Markov Transition Field ======================= This example shows how to transform a time series into a Markov Transition Field using :class:`pyts.image.MarkovTransitionField`. """ import matplotlib.pyplot as plt from pyts.image import MarkovTransitionField from pyts.datasets import load_gunpoint X, _, _, _ = load_gunpoint(return_X_y=True) # MTF transformation mtf = MarkovTransitionField(image_size=24) X_mtf = mtf.fit_transform(X) # Show the image for the first time series plt.figure(figsize=(6, 6)) plt.imshow(X_mtf[0], cmap='rainbow', origin='lower') plt.title('Markov Transition Field', fontsize=18) plt.colorbar(fraction=0.0457, pad=0.04) plt.show()
def get_multivariate_mtf(timeseries_list, tags_list=None, resample_rate=None, image_size=IMAGE_SIZE): """ This function computes the MTF for each of the timeseries passed as argument. It perform the appropriate data preprocessing to allow the MTF to be computed (NaN removal, identifying constant signals...). PARAMS ====== timeseries_list: list of pandas.dataframe A list of dataframes (one per time series) tags_list: list of strings (default to None) List of all the tag names if available. resample_rate: string (default to None) A resampling rate to be used before applying the MTF computation. image_size: integer (default to 48) Resolution of the MTF RETURNS ======= tags_mtf: numpy.ndarray An array of shape (num_timeseries, image_size, image_size) with the MTF computed for each signal. constant_signals: list of string A list of all the constant signals removed from the final result selected_signals: list of string A list of all the signals selected for the final result """ # Building a single tags dataframe: timestamps MUST be aligned: tags_df = pd.concat(timeseries_list, axis='columns') # Resampling before taking MTF to reduce computational load: if resample_rate is not None: tags_df = tags_df.resample(resample_rate).mean() # Cleaning NaN as they are not allowed to build the MTF: tags_df.replace(np.nan, 0.0, inplace=True) num_timeseries = len(timeseries_list) # Adjust the column names to reflect the tags list: if tags_list is not None: tags_df.columns = tags_list # Check for constant signals and remove them: tags_stats = tags_df.describe().T constant_signals = tags_stats[(tags_stats['max'] - tags_stats['min']) == 0].index tags_df = tags_df[tags_df.columns.difference(constant_signals)] selected_signals = tags_df.columns.tolist() # Get the MTF for all the signals: mtf = MarkovTransitionField(image_size=image_size) X = tags_df.values.T.reshape(tags_df.shape[1], -1) tags_mtf = mtf.fit_transform(X) return tags_mtf, constant_signals.tolist(), selected_signals
def __init__(self,serie,serie_test,m,h,windowsize=12,stride=1,alpha=0.25,beta=0,gamma=0.35,pr=3,compute_mtf=True): """Parameters: -- serie: the training serie -- serie_test: the test serie (to simplify the implementation it must be of the same length of serie) -- m: seasonality of the serie -- h: horizon of prediction -- alpha,beta,gamma: intial guess of parameters of HW. The optimal parameters are computed by the method parameter refinment given the training serie -- windowsize: the size of the window --stride: every how many steps a prediction is done e.g. stride=2 a predition is done a time t,an other a time t+2, predicting t+h, and t+h+2 -- compute_mtf wheter computing the mtf field: the library pyts does not manage to compute this field for Lorenz series Requires: if m!=1, m>h (i.e. prediction are possible only within a season)""" super(Holt_Winters_NN,self).__init__(serie,serie_test,m,h,alpha,0,gamma) self._b[self._m-1]=0 self._b_test[self._m-1]=0 self.compute_states() self.parameter_refinment(pr) self.compute_states_test() #the vector to give to the NN for training (i.e. the time series scaled and desonalised) self._training_vector=(self._serie[self._m:self._length-self._h]/ \ self._l[self._m:(self._length-self._h)])/ \ self._s[0:(self._length-self._h-self._m)] self._test_vector=(self._serie_test[self._m:self._length-self._h]/ \ self._l_test[self._m:(self._length-self._h)])/ \ self._s_test[0:(self._length-self._h-self._m)] self._windowsize=windowsize self._stride=stride #n_windows=length of the list of images,lag: the first lag element of the serie are not used so that the windowsize fit the length of the serie [n_windows,lag] = windomize_size(self._training_vector.size,self._windowsize,self._stride) #serie deseasonalised and scaled, from which obtaining the imgs to give to the NN self._training_output=self._serie[self._m+lag+self._windowsize-1+self._h:self._length]/ \ (self._l[(self._m+lag+self._windowsize-1):(self._length-self._h)]* \ self._s[(lag+self._windowsize-1+self._h):(self._length-self._m)]) #value for which the prediction of the NN must be multiplied self.forecast_multiplier=self._l_test[(self._m+lag+self._windowsize-1):(self._length-self._h)]* \ self._s_test[(lag+self._windowsize-1+self._h):(self._length-self._m)] #contains the value of the test serie aligned with the prediction self.test_output=self._serie_test[self._m+lag+self._windowsize-1+self._h:self._length] self.test_output_val=self._serie_test[self._m+lag+self._windowsize-1+self._h:self._length]/ \ (self._l_test[(self._m+lag+self._windowsize-1):(self._length-self._h)]* \ self._s_test[(lag+self._windowsize-1+self._h):(self._length-self._m)]) #self._training_output_multiple=np.zeros([m,self._training_output.size]) #check end of the vector it may #for hh in range(1,self._m+1): #self._training_output_multiple[hh-1,:]=self._serie[self._m+lag+self._windowsize-1+hh:self._length]/ \ #(self._l[(self._m+lag+self._windowsize-1):(self._length-hh)]* \ #self._s[(lag+self._windowsize-1):(self._length-hh-self._m)]) print(self._training_vector.mean()) #computation of the list of images for training and test b=max(self._training_vector) a=min(self._training_vector) self._scale=b-a self._training_vector=2*(self._training_vector-a)/(b-a)-1 b=max(self._test_vector) a=min(self._test_vector) self._scale_test=b-a self._test_vector=2*(self._test_vector-a)/(b-a)-1 self._training_matrix=windowmize(self._training_vector,self._windowsize,self._stride) gasf = GramianAngularField(image_size=1., method='summation',sample_range=None) self.gasf = gasf.fit_transform(self._training_matrix) gadf = GramianAngularField(image_size=1., method='difference',sample_range=None) self.gadf = gadf.fit_transform(self._training_matrix) if(compute_mtf): mtf=MarkovTransitionField(image_size=1.) self.mtf= mtf.fit_transform(self._training_matrix) #in case of a first dense layer they could be usefull #self.concatenated_images=np.concatenate((self.gadf,self.gasf), axis=1) #self.concatenated_images=np.concatenate((self.gadf,self.gasf,self.mtf), axis=1) self._test_matrix=windowmize(self._test_vector,self._windowsize,self._stride) gasf_test = GramianAngularField(image_size=1., method='summation',sample_range=None) self.gasf_test = gasf_test.fit_transform(self._test_matrix) gadf_test= GramianAngularField(image_size=1., method='difference',sample_range=None) self.gadf_test= gadf_test.fit_transform(self._test_matrix) #check if it is correct if(compute_mtf): mtf_test=MarkovTransitionField(image_size=1.) self.mtf_test= mtf_test.fit_transform(self._test_matrix)
def get_diff(self, series_train, series_test): img_size = self.img_size Conv_LSTM_model = self.model out_window = self.out_window in_window = self.in_window sample_range = (-1, 1) seq_len = self.seq_length channels = self.channels signal_train = series_train signal_test = series_test signal_train = signal_train.reshape(-1, 1) signal_test = signal_test.reshape(-1, 1) from sklearn.preprocessing import MinMaxScaler MMscaler = MinMaxScaler(feature_range=sample_range) signal_train_scaled = MMscaler.fit_transform(signal_train).flatten() signal_test_scaled = MMscaler.fit_transform(signal_test).flatten() window_input_train, window_output_train = sequence_splitter( signal_train_scaled, in_window, out_window) window_input_test, window_output_test = sequence_splitter( signal_test_scaled, in_window, out_window) gadf = GramianAngularField(image_size=img_size, method='difference', sample_range=sample_range) gasf = GramianAngularField(image_size=img_size, method='summation', sample_range=sample_range) mtf = MarkovTransitionField(image_size=img_size, n_bins=8, strategy='quantile') gadf_transformed_train = np.expand_dims( gadf.fit_transform(window_input_train), axis=3) gasf_transformed_train = np.expand_dims( gasf.fit_transform(window_input_train), axis=3) mtf_transformed_train = np.expand_dims( mtf.fit_transform(window_input_train), axis=3) gadf_transformed_test = np.expand_dims( gadf.fit_transform(window_input_test), axis=3) gasf_transformed_test = np.expand_dims( gasf.fit_transform(window_input_test), axis=3) mtf_transformed_test = np.expand_dims( mtf.fit_transform(window_input_test), axis=3) X_train_windowed = np.concatenate( (gadf_transformed_train, gasf_transformed_train, mtf_transformed_train), axis=3) X_test_windowed = np.concatenate( (gadf_transformed_test, gasf_transformed_test, mtf_transformed_test), axis=3) X_train_Conv_LSTM = [] y_train_Conv_LSTM = [] X_test_Conv_LSTM = [] y_test_Conv_LSTM = [] print("Getting Train original") for i in tqdm(range(0, X_train_windowed.shape[0] - seq_len + 1)): current_seq_X = [] for l in range(seq_len): current_seq_X.append(X_train_windowed[i + l]) X_train_Conv_LSTM.append(current_seq_X) y_train_Conv_LSTM.append(window_output_train[i + seq_len - 1]) X_train_Conv_LSTM = np.array(X_train_Conv_LSTM) X_train_Conv_LSTM = X_train_Conv_LSTM.reshape(-1, seq_len, img_size, img_size, channels) y_train_Conv_LSTM = np.array(y_train_Conv_LSTM) y_train_Conv_LSTM = y_train_Conv_LSTM.reshape(-1, out_window) print("Getting test original") for i in tqdm(range(0, X_test_windowed.shape[0] - seq_len + 1)): current_seq_X = [] for l in range(seq_len): current_seq_X.append(X_test_windowed[i + l]) X_test_Conv_LSTM.append(current_seq_X) y_test_Conv_LSTM.append(window_output_test[i + seq_len - 1]) X_test_Conv_LSTM = np.array(X_test_Conv_LSTM) X_test_Conv_LSTM = X_test_Conv_LSTM.reshape(-1, seq_len, img_size, img_size, channels) y_test_Conv_LSTM = np.array(y_test_Conv_LSTM) y_test_Conv_LSTM = y_test_Conv_LSTM.reshape(-1, out_window) X_train_prep, y_train_prep, X_test_prep, y_test_prep = self.prep_series( gadf_transformed_train, gasf_transformed_train, mtf_transformed_train, gadf_transformed_test, gasf_transformed_test, mtf_transformed_test, window_output_train, window_output_test) # df = pd.DataFrame({'x_test_prep_c3': X_train_prep[:,0,15,15,2].flatten(), 'x_test_c3': X_train_Conv_LSTM[:,0,15,15,2].flatten(), 'x_test_prep_c2': X_train_prep[:,0,15,15,1].flatten(), 'x_test_c2': X_train_Conv_LSTM[:,0,15,15,1].flatten()}) # df.to_csv("test.csv") history_Conv_LSTM_prep = Conv_LSTM_model.fit( x=X_train_prep, y=y_train_prep, batch_size=16, epochs=5, callbacks=None, shuffle=True, validation_data=(X_test_prep, y_test_prep)) print("Average test loss prep: ", np.average(history_Conv_LSTM_prep.history['val_loss'])) preds_prep = Conv_LSTM_model.predict(X_train_prep) preds_prep = preds_prep.reshape(-1) y_test_prep = y_test_prep.reshape(-1) MSE_test_no_HW_prep = ((y_test_prep - preds_prep)**2).mean() print("Test loss prep: ", MSE_test_no_HW_prep) df = pd.DataFrame({ 'True value': y_test_prep.flatten(), 'Predictions': preds_prep.flatten() }) fpath_p = os.path.join( "predictions", "predictions_raw_prep_" + datetime.now().strftime("%Y%m%d-%H%M%S") + ".csv") df.to_csv(fpath_p) model2 = self.get_model_lstm(out_window, img_size, 3, 0, 1, bidirectional=True, dropout=0.4, channels=3) model2.compile(optimizer="adam", loss="mse", metrics=['mse']) history_Conv_LSTM = model2.fit(x=X_train_Conv_LSTM, y=y_train_Conv_LSTM, batch_size=16, epochs=5, callbacks=None, shuffle=True, validation_data=(X_test_Conv_LSTM, y_test_Conv_LSTM)) print("Average test loss: ", np.average(history_Conv_LSTM.history['val_loss'])) preds = model2.predict(X_test_Conv_LSTM) preds = preds.reshape(-1) y_test_Conv_LSTM = y_test_Conv_LSTM.reshape(-1) MSE_test_no_HW = ((y_test_Conv_LSTM - preds)**2).mean() print("Test loss: ", MSE_test_no_HW) df = pd.DataFrame({ 'True value': y_test_Conv_LSTM.flatten(), 'Predictions': preds.flatten() }) fpath_p = os.path.join( "predictions", "predictions_raw_fresh_" + datetime.now().strftime("%Y%m%d-%H%M%S") + ".csv") df.to_csv(fpath_p) return y_test_Conv_LSTM, preds, y_test_prep, preds_prep
signal_test_scaled, window_size_x, window_size_y) #%%--------------------------------------------------------------------------- ''' Field transformations ''' from pyts.image import GramianAngularField from pyts.image import MarkovTransitionField gadf = GramianAngularField(image_size=img_size, method='difference', sample_range=sample_range) gasf = GramianAngularField(image_size=img_size, method='summation', sample_range=sample_range) mtf = MarkovTransitionField(image_size=img_size, n_bins=8, strategy='quantile') gadf_transformed_train = np.expand_dims(gadf.fit_transform(window_input_train), axis=3) gasf_transformed_train = np.expand_dims(gasf.fit_transform(window_input_train), axis=3) mtf_transformed_train = np.expand_dims(mtf.fit_transform(window_input_train), axis=3) X_train_windowed = np.concatenate( (gadf_transformed_train, gasf_transformed_train, mtf_transformed_train), axis=3) gadf_transformed_test = np.expand_dims(gadf.fit_transform(window_input_test), axis=3) gasf_transformed_test = np.expand_dims(gasf.fit_transform(window_input_test),
def train_series(self, signal_train, signal_test, window_size_x=100, window_size_y=1, epochs=5, bsize=32, p_filepath="predictions", l_filepath="tensorboard_logs", w_filepath="weights_conv_mlp", h=12, callbacks=True): Conv_MLP_model = self.Conv_MLP_series(self.img_size, window_size_y, self.N_Channel) # N_Channel = self.N_Channel img_size = self.img_size signal_train = signal_train.reshape(-1, 1) signal_test = signal_test.reshape(-1, 1) sample_range = (-1, 1) # Scaling # from sklearn.preprocessing import MinMaxScaler # # MMscaler = MinMaxScaler(feature_range=(-1,1)) # MMscaler_test = MinMaxScaler(feature_range=(-1, 1)) # signal_train_scaled = MMscaler.fit_transform(signal_train).flatten() # signal_test_scaled = MMscaler_test.fit_transform(signal_test).flatten() signal_train_scaled = signal_train.flatten() signal_test_scaled = signal_test.flatten() # Split Sequence window_input_train, window_output_train = sequence_splitter(signal_train_scaled, window_size_x, window_size_y, h) window_input_test, window_output_test = sequence_splitter(signal_test_scaled, window_size_x, window_size_y, h) # %%--------------------------------------------------------------------------- ''' Field transformations ''' from pyts.image import GramianAngularField from pyts.image import MarkovTransitionField gadf = GramianAngularField(image_size=img_size, method='difference', sample_range=sample_range) gasf = GramianAngularField(image_size=img_size, method='summation', sample_range=sample_range) mtf = MarkovTransitionField(image_size=img_size, n_bins=8, strategy='quantile') gadf_transformed_train = np.expand_dims(gadf.fit_transform(window_input_train), axis=3) gasf_transformed_train = np.expand_dims(gasf.fit_transform(window_input_train), axis=3) mtf_transformed_train = np.expand_dims(mtf.fit_transform(window_input_train), axis=3) X_train_windowed = np.concatenate((gadf_transformed_train, gasf_transformed_train, mtf_transformed_train), axis=3) gadf_transformed_test = np.expand_dims(gadf.fit_transform(window_input_test), axis=3) gasf_transformed_test = np.expand_dims(gasf.fit_transform(window_input_test), axis=3) mtf_transformed_test = np.expand_dims(mtf.fit_transform(window_input_test), axis=3) X_test_windowed = np.concatenate((gadf_transformed_test, gasf_transformed_test, mtf_transformed_test), axis=3) # Data reshaping X_train_Conv_MLP = X_train_windowed y_train_Conv_MLP = window_output_train X_test_Conv_MLP = X_test_windowed y_test_Conv_MLP = window_output_test # *** Callbacks *** # name = "Conv-MLP_raw_" + datetime.now().strftime("%Y%m%d-%H%M") logdir = os.path.join(l_filepath, name) tensorboard_callback = TensorBoard(log_dir=logdir) path_w = w_filepath filepath = os.path.join(path_w, "Conv_MLP_raw-weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5") checkpoint = tf.keras.callbacks.ModelCheckpoint( filepath, monitor='loss', verbose=0, save_best_only=True, mode='min' ) if callbacks: callback_list = [checkpoint, tensorboard_callback] else: callback_list = [] # *** Fit Model *** # history_Conv_MLP = Conv_MLP_model.fit(x=X_train_Conv_MLP, y=y_train_Conv_MLP, batch_size=bsize, epochs=epochs, callbacks=callback_list, shuffle=False, validation_data=(X_test_Conv_MLP, y_test_Conv_MLP)) # *** Predict *** # print("Average test loss: ", np.average(history_Conv_MLP.history['val_loss'])) preds_prep = Conv_MLP_model.predict(X_test_Conv_MLP) preds_prep = preds_prep.reshape(-1, 1) y_test_prep = y_test_Conv_MLP.reshape(-1, 1) # preds_unscaled = MMscaler_test.inverse_transform(preds_prep) # y_test_unscaled = MMscaler_test.inverse_transform(y_test_prep) preds_unscaled = preds_prep y_test_unscaled = y_test_prep MSE_test_no_HW = ((y_test_unscaled - preds_unscaled) ** 2).mean() print("Test loss: ", MSE_test_no_HW) # show_result(y_test_prep, preds_prep) df = pd.DataFrame({'True value': y_test_unscaled.flatten(), 'Predictions': preds_unscaled.flatten()}) fname = "Conv_MLP_raw-" + datetime.now().strftime("%Y%m%d-%H%M") fpath_p = os.path.join(p_filepath, fname + ".csv") df.to_csv(fpath_p) return (history_Conv_MLP, preds_unscaled, y_test_unscaled, MSE_test_no_HW)
""" # Author: Johann Faouzi <*****@*****.**> # License: BSD-3-Clause import numpy as np import matplotlib.pyplot as plt from pyts.image import MarkovTransitionField # Create a toy time series using the sine function time_points = np.linspace(0, 4 * np.pi, 1000) x = np.sin(time_points) X = np.array([x]) # Compute Gramian angular fields mtf = MarkovTransitionField(n_bins=8) X_mtf = mtf.fit_transform(X) # Plot the time series and its Markov transition field width_ratios = (2, 7, 0.4) height_ratios = (2, 7) width = 6 height = width * sum(height_ratios) / sum(width_ratios) fig = plt.figure(figsize=(width, height)) gs = fig.add_gridspec(2, 3, width_ratios=width_ratios, height_ratios=height_ratios, left=0.1, right=0.9, bottom=0.1,
def _method(self, X, **kwargs): mtk = MarkovTransitionField(**kwargs) return mtk.fit_transform(X)
def __init__(self, size=224, cmap=None, n_bins=5, **kwargs): self.size, self.cmap = size, cmap self.encoder = MarkovTransitionField(n_bins=n_bins, **kwargs)
def GAF_data_2(df, step): col = ["Open", "High", "Close", "Low"] gasf = GramianAngularField(image_size=step, method="summation") gadf = GramianAngularField(image_size=step, method="difference") mtf = MarkovTransitionField(image_size=step) X_mtf = [] X_gasf = [] X_gadf = [] for i in range((step - 1), len(df[col[0]])): high = max(df["High"][i - (step - 1):i + 1]) low = min(df["Low"][i - (step - 1):i + 1]) ts_1 = [(x - low) / (high - low) for x in list(df[col[0]][i - (step - 1):i + 1])] ts_2 = [(x - low) / (high - low) for x in list(df[col[1]][i - (step - 1):i + 1])] ts_3 = [(x - low) / (high - low) for x in list(df[col[2]][i - (step - 1):i + 1])] ts_4 = [(x - low) / (high - low) for x in list(df[col[3]][i - (step - 1):i + 1])] ope = np.round(mtf.fit_transform([ts_1])[0] * 255) high = np.round(mtf.fit_transform([ts_2])[0] * 255) close = np.round(mtf.fit_transform([ts_3])[0] * 255) low = np.round(mtf.fit_transform([ts_4])[0] * 255) mtf_oh = np.hstack((ope, high)) mtf_cl = np.hstack((close, low)) mtf_ohcl = np.vstack((mtf_oh, mtf_cl)) X_mtf.append(mtf_ohcl.reshape(step * 2, step * 2, 1)) X_mtf = np.stack(X_mtf) for i in range((step - 1), len(df[col[0]])): high = max(df["High"][i - (step - 1):i + 1]) low = min(df["Low"][i - (step - 1):i + 1]) ts_1 = [(x - low) / (high - low) for x in list(df[col[0]][i - (step - 1):i + 1])] ts_2 = [(x - low) / (high - low) for x in list(df[col[1]][i - (step - 1):i + 1])] ts_3 = [(x - low) / (high - low) for x in list(df[col[2]][i - (step - 1):i + 1])] ts_4 = [(x - low) / (high - low) for x in list(df[col[3]][i - (step - 1):i + 1])] gadf_oh = np.hstack((np.round(gadf.fit_transform([ts_1])[0] * 255), np.round(gadf.fit_transform([ts_2])[0] * 255))) gadf_cl = np.hstack((np.round(gadf.fit_transform([ts_3])[0] * 255), np.round(gadf.fit_transform([ts_4])[0] * 255))) gadf_ohcl = np.vstack((gadf_oh, gadf_cl)) X_gadf.append(gadf_ohcl.reshape(step * 2, step * 2, 1)) X_gadf = np.stack(X_gadf) for i in range((step - 1), len(df[col[0]])): high = max(df["High"][i - (step - 1):i + 1]) low = min(df["Low"][i - (step - 1):i + 1]) ts_1 = [(x - low) / (high - low) for x in list(df[col[0]][i - (step - 1):i + 1])] ts_2 = [(x - low) / (high - low) for x in list(df[col[1]][i - (step - 1):i + 1])] ts_3 = [(x - low) / (high - low) for x in list(df[col[2]][i - (step - 1):i + 1])] ts_4 = [(x - low) / (high - low) for x in list(df[col[3]][i - (step - 1):i + 1])] gasf_oh = np.hstack((np.round(gasf.fit_transform([ts_1])[0] * 255), np.round(gasf.fit_transform([ts_2])[0] * 255))) gasf_cl = np.hstack((np.round(gasf.fit_transform([ts_3])[0] * 255), np.round(gasf.fit_transform([ts_4])[0] * 255))) gasf_ohcl = np.vstack((gasf_oh, gasf_cl)) X_gasf.append(gasf_ohcl.reshape(step * 2, step * 2, 1)) X_gasf = np.stack(X_gasf) return (X_gasf, X_gadf, X_mtf)
def markov_transition(data, img_size=24): X = data mtf = MarkovTransitionField(image_size=img_size) X_mtf = mtf.fit_transform(X) return X_mtf