Esempio n. 1
0
    def transform_to_2D(self, method, x_train):
        if method == 'gasf':
            gasf = GASF(image_size=x_train.shape[1] // 2,
                        overlapping=False,
                        scale=-1)
            x_tr = gasf.fit_transform(x_train)
            print('applying GASF')
        elif method == 'mtf':
            mtf = MTF(image_size=x_train.shape[1],
                      n_bins=4,
                      quantiles='empirical',
                      overlapping=False)
            x_tr = mtf.fit_transform(x_train)
            print('applying MTF')
        elif method == 'rp':
            rp = RecurrencePlots(dimension=1,
                                 epsilon='percentage_points',
                                 percentage=10)
            x_tr = rp.fit_transform(x_train)
            print('applying RP')
        else:
            print('wrong method')
            x_tr = []

        return x_tr
    def _build_images_one_stock(df_one_permno, window_len, retrain_freq,
                                encoding_method, image_size):
        """
        Encodes images as timeseries for one stock
        :param df_one_permno: dataframe of the timeseries of all data for one particular stock
        :param window_len: number of observations to consider (42 for 2 months)
        :param retrain_freq: lag to consider between making two samples
        :param encoding_method: method to encode the images
        :param image_size: final size of the image (using window_len*window_len will avoid any averaging)
        :return: np.ndarray of the samples of shape (N,window_len,window_len,M) where:
                - M is the number of features
                - N is the number of final samples ~ len(df_one_permno)/retrain_freq
        """

        n_days = df_one_permno.T.shape[-1]
        samples_list, dates_list, prc_list = [], [], []
        for i in range(window_len, n_days, retrain_freq):
            window_data = df_one_permno.T.iloc[:, i - window_len:i]

            # Use GADF algorithm to transform data
            if encoding_method == 'GADF':
                try:
                    from pyts.image import GADF
                    gadf = GADF(image_size)
                except:
                    from pyts.image import GramianAngularField
                    gadf = GramianAngularField(image_size, method='difference')
                samples_list.append(gadf.fit_transform(window_data).T)

            # Use GASF algorithm to transform data
            elif encoding_method == 'GASF':
                try:
                    from pyts.image import GASF
                    gasf = GASF(image_size)
                except:
                    from pyts.image import GramianAngularField
                    gasf = GramianAngularField(image_size, method='summation')
                samples_list.append(gasf.fit_transform(window_data).T)

            # Use MTF algorithm to transform data
            elif encoding_method == 'MTF':
                try:
                    from pyts.image import MTF
                    mtf = MTF(image_size)
                except:
                    from pyts.image import MarkovTransitionField
                    mtf = MarkovTransitionField(image_size)
                samples_list.append(mtf.fit_transform(window_data).T)
            else:
                raise BaseException(
                    'Method must be either GADF, GASF or MTF not {}'.format(
                        encoding_method))
        samples_list = np.asarray(samples_list)
        return samples_list
    def _show_images(self, df_window_data):
        """
        Plots a multi dimensional timeseries encoded as an image
        :param df_window_data: timeseries we want to encode as an image
        """
        data = df_window_data.reset_index().set_index('date').drop('PERMNO',
                                                                   axis=1).T
        channels = list(data.index)
        if self._encoding_method == 'GADF':
            try:
                from pyts.image import GADF
                gadf = GADF(self._image_size)
            except:
                from pyts.image import GramianAngularField
                gadf = GramianAngularField(self._image_size,
                                           method='difference')
            image_data = (gadf.fit_transform(data).T)

        elif self._encoding_method == 'GASF':
            try:
                from pyts.image import GASF
                gasf = GASF(self._image_size)
            except:
                from pyts.image import GramianAngularField
                gasf = GramianAngularField(self._image_size,
                                           method='summation')
            image_data = (gasf.fit_transform(data).T)
        elif self._encoding_method == 'MTF':
            try:
                from pyts.image import MTF
                mtf = MTF(self._image_size)
            except:
                from pyts.image import MarkovTransitionField
                mtf = MarkovTransitionField(self._image_size)
            image_data = (mtf.fit_transform(data).T)
        else:
            raise BaseException(
                'Method must be either GADF, GASF or MTF not {}'.format(
                    self._encoding_method))

        num_channels = image_data.shape[-1]
        plt.figure(figsize=(12, 14))
        for j in range(1, num_channels + 1):
            channel = image_data[:, :, j - 1]
            plt.subplot(int(num_channels / 2) + 1, 2, j)
            plt.imshow(channel, cmap='rainbow', origin='lower')
            plt.xlabel('$time$')
            plt.ylabel('$time$')
            plt.title(channels[j - 1])
            plt.tight_layout()

        plt.show()
Esempio n. 4
0
def transform_ECG(x, method):
    # transform ECG sequence(s) to binary image(s)
    if method == 'gasf':
        gasf = GASF(image_size=x.shape[1] // 2, overlapping=False, scale=-1)
        x = gasf.fit_transform(x)
        # print('applying GASF')
    elif method == 'mtf':
        mtf = MTF(image_size=x.shape[1], n_bins=4, quantiles='empirical', overlapping=False)
        x = mtf.fit_transform(x)
        # print('applying MTF')
    elif method == 'rp':
        rp = RecurrencePlots(dimension=1, epsilon='percentage_points', percentage=10)
        x = rp.fit_transform(x)
        # print('applying RP')
    else:
        raise ValueError("Invalid method: " + str(method))

    return x
Esempio n. 5
0
def encode_timeseries(timeseries_tensor: np.array,
                      save: bool = False) -> np.array:
    """
        Encodes the time-series object into images with GASF/GADF/MTF channels
        Args:
            timeseries_tensor: numpy array of clean data
            save: flag for saving
        Returns:
            images: encoded time-series into 3 channel images
    """
    # define the MTF, GASF and GADF transforms
    gasf = GASF(image_size=24)
    gadf = GADF(image_size=24)
    mtf = MTF(image_size=24)

    # transform the time-series
    X_gasf = gasf.fit_transform(timeseries_tensor)
    X_gadf = gadf.fit_transform(timeseries_tensor)
    X_mtf = mtf.fit_transform(timeseries_tensor)

    # get the dimensions of the data
    num_samples, height, width = X_mtf.shape

    # form the images
    images = np.empty(shape=(num_samples, 3, height, width))
    images[:, 0, :, :] = X_gasf[:, :, :]
    images[:, 1, :, :] = X_gadf[:, :, :]
    images[:, 2, :, :] = X_mtf[:, :, :]

    # normalize
    images = (images - np.min(images)) / (np.max(images) - np.min(images))

    # save if needed
    if save:
        if not os.path.isdir('./data'):
            os.mkdir('./data')
        np.save(file='./data/GASF_GADF_MTF_images.npy', arr=images)

    return images
def generate_gasf_gadf_mtf_compound_images(observations,
                                           image_size=128,
                                           batch_size=32):
    """
    Designed to take observations of time series data and generate compound images from it to analyze with a CNN.
    The research paper that came up with GASF-GADF-MTF images can be read here: https://arxiv.org/pdf/1506.00327.pdf

    :param observations: A read-only 2D numpy array. Shape: [n_observations, observation_window_length]
    :param image_size: Size of the images to generate. Must be equal to or smaller than the length of the
                       time series data in each observation.
    :param batch_size: The number of images generated per yield
    :raises ValueError: If observations is empty.
    :return: Yields an array of images ready to be used in a CNN. Shape: [batch_size, image_size, image_size, 3]
             If there are fewer observations left to generate images for, the batch size may be less than expected.
             The origin of each image is the top-left corner. When plotted, it would be the point (0,0).
    """
    if len(observations) == 0:
        raise ValueError("Observations cannot be empty.")

    gasf_transformer = GASF(image_size, scale=None)
    gadf_transformer = GADF(image_size, scale=None)
    mtf_transformer = MTF(image_size)

    # Split up the image generation into smaller batches to handle
    upper_bound = min(len(observations), batch_size)
    lower_bound = 0
    while lower_bound < len(observations):
        observations_batch = observations[lower_bound:upper_bound]

        gasf = gasf_transformer.fit_transform(observations_batch)
        gadf = gadf_transformer.fit_transform(observations_batch)
        mtf = mtf_transformer.fit_transform(observations_batch)

        yield np.stack((gasf, gadf, mtf), axis=3)

        lower_bound = upper_bound
        upper_bound += batch_size
        upper_bound = min(len(observations), upper_bound)
Esempio n. 7
0
def MTF_encoder(ts,
                size=None,
                n_bins=8,
                strategy='quantile',
                overlapping=False,
                **kwargs):
    ts = To2dArray(ts)
    assert ts.ndim == 2, 'ts ndim must be 2!'
    if size is None: size = ts.shape[-1]
    else: size = min(size, ts.shape[-1])
    ts = PAA(window_size=None, output_size=size).fit_transform(ts)
    encoder = MTF(size,
                  n_bins=n_bins,
                  strategy=strategy,
                  overlapping=overlapping)
    output = np.squeeze(encoder.fit_transform(ts), 0)
    return output
Esempio n. 8
0
sig = pd.read_csv('sample_1.csv').iloc[0:10000, 4:7]

n_samples, n_features = 100, 1

rng = np.random.RandomState(41)

X = rng.randn(n_samples, n_features)

# Recurrence plot transformation
rp = RecurrencePlots(dimension=1,
                     epsilon='percentage_points',
                     percentage=30)

X_rp = rp.fit_transform(X)

plt.figure(figsize=(8, 8))
plt.imshow(X_rp[0], cmap='binary', origin='lower')
plt.show()



# MTF transformation
image_size = 1
mtf = MTF(image_size)
X_mtf = mtf.fit_transform(sig)

# Show the results for the first time series
plt.figure(figsize=(8, 8))
plt.imshow(X_mtf[0], cmap='rainbow', origin='lower')
plt.show()