Ejemplo n.º 1
0
def _get_random_walk():
    numpy.random.seed(0)
    # Generate a random walk time series
    n_ts, sz, d = 1, 100, 1
    dataset = random_walks(n_ts=n_ts, sz=sz, d=d)
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)
    return scaler.fit_transform(dataset)
Ejemplo n.º 2
0
def perform_sax(dataset, gram_number, symbols, segments):
    scaler = TimeSeriesScalerMeanVariance(
        mu=0., std=np.std(dataset))  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # SAX transform
    sax = SymbolicAggregateApproximation(n_segments=segments,
                                         alphabet_size_avg=symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
    # print(pd.DataFrame(sax_dataset_inv[0])[0].value_counts())
    #     sax_dataset_inv = sax.fit_transform(dataset)
    #     print(len(sax_dataset_inv[0]))

    # Convert result to strings
    df_sax = pd.DataFrame(sax_dataset_inv[0])
    sax_series = df_sax[0]

    # Convert sax from numeric to characters
    sax_values = sax_series.unique()
    alphabet = 'abcdefghijklmnopqrstuvw'
    sax_dict = {x: alphabet[i] for i, x in enumerate(sax_values)}
    sax_list = [sax_dict[x] for x in sax_series]

    # Convert the list of characters to n_grams based on input parameter
    tri = n_grams(gram_number, sax_list)
    #     print(Counter(tri))
    return tri
Ejemplo n.º 3
0
def getNormalize(data_baseline):

    data_baseline = TimeSeriesScalerMeanVariance().fit_transform(data_baseline)
    data_baseline = data_baseline.reshape(
        (data_baseline.shape[0], data_baseline.shape[1]))

    return data_baseline
Ejemplo n.º 4
0
 def _update_centroids(self, X):
     for k in range(self.n_clusters):
         self.cluster_centers_[k] = self._shape_extraction(X, k)
     self.cluster_centers_ = TimeSeriesScalerMeanVariance(
         mu=0., std=1.).fit_transform(self.cluster_centers_)
     self._norms_centroids = numpy.linalg.norm(self.cluster_centers_,
                                               axis=(1, 2))
Ejemplo n.º 5
0
 def reshape_data(self):
     ts_value = self.input_df.T.values
     ts_value = ts_value.reshape(ts_value.shape[0], ts_value.shape[1], 1)
     scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)
     data_scaled = scaler.fit_transform(ts_value)
     data_scaled = np.nan_to_num(data_scaled)
     self.data_scaled = data_scaled
Ejemplo n.º 6
0
def test_single_value_ts_no_nan():
    X = to_time_series_dataset([[1, 1, 1, 1]])

    standard_scaler = TimeSeriesScalerMeanVariance()
    assert np.sum(np.isnan(standard_scaler.fit_transform(X))) == 0

    minmax_scaler = TimeSeriesScalerMinMax()
    assert np.sum(np.isnan(minmax_scaler.fit_transform(X))) == 0
Ejemplo n.º 7
0
def getStdData(originData):
    n_paa_segments = 120  #一天分成4份,每6个小时整合为一段
    paa_data = PiecewiseAggregateApproximation(
        n_segments=n_paa_segments).fit_transform(originData)
    #进行平均值归一化
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)
    dataset = scaler.fit_transform(paa_data)
    dataset = dataset.reshape(dataset.shape[0], dataset.shape[1])
    return dataset
Ejemplo n.º 8
0
def normalize(df):
    df_normalized = df.copy()
    df_normalized = df_normalized

    normalize = TimeSeriesScalerMeanVariance(mu=0, std=1)
    for col in df:
        df_normalized[col] = normalize.fit_transform(df_normalized[col])[0]

    return df_normalized
Ejemplo n.º 9
0
    def _transform(self, X, y=None):
        n_ts, sz, d = X.shape

        if d > 1:
            raise NotImplementedError("We currently don't support using "
                                      "multi-dimensional matrix profiles "
                                      "from the stumpy library.")

        output_size = sz - self.subsequence_length + 1
        X_transformed = np.empty((n_ts, output_size, 1))

        if self.implementation == "stump":
            if not STUMPY_INSTALLED:
                raise ImportError(stumpy_msg)

            for i_ts in range(n_ts):
                result = stumpy.stump(T_A=X[i_ts, :, 0].ravel(),
                                      m=self.subsequence_length)
                X_transformed[i_ts, :, 0] = result[:, 0].astype(np.float)

        elif self.implementation == "gpu_stump":
            if not STUMPY_INSTALLED:
                raise ImportError(stumpy_msg)

            for i_ts in range(n_ts):
                result = stumpy.gpu_stump(T_A=X[i_ts, :, 0].ravel(),
                                          m=self.subsequence_length)
                X_transformed[i_ts, :, 0] = result[:, 0].astype(np.float)

        elif self.implementation == "numpy":
            scaler = TimeSeriesScalerMeanVariance()
            band_width = int(np.ceil(self.subsequence_length / 4))
            for i_ts in range(n_ts):
                segments = _series_to_segments(X[i_ts],
                                               self.subsequence_length)
                if self.scale:
                    segments = scaler.fit_transform(segments)
                n_segments = segments.shape[0]
                segments_2d = segments.reshape(
                    (-1, self.subsequence_length * d))
                dists = squareform(pdist(segments_2d, "euclidean"))
                band = (np.tri(
                    n_segments, n_segments, band_width, dtype=np.bool
                ) & ~np.tri(
                    n_segments, n_segments, -(band_width + 1), dtype=np.bool))
                dists[band] = np.inf
                X_transformed[i_ts] = dists.min(axis=1, keepdims=True)

        else:
            available_implementations = ["numpy", "stump", "gpu_stump"]
            raise ValueError(
                'This "{}" matrix profile implementation is not'
                ' recognized. Available implementations are {}.'.format(
                    self.implementation, available_implementations))

        return X_transformed
Ejemplo n.º 10
0
def cor(x, y):
    """
    Correlation-based distance (COR) between two multivariate time series given as arrays of shape (timesteps, dim)
    """
    scaler = TimeSeriesScalerMeanVariance()
    x_norm = scaler.fit_transform(x)
    y_norm = scaler.fit_transform(y)
    pcc = np.mean(x_norm * y_norm)  # Pearson correlation coefficients
    d = np.sqrt(2.0 * (1.0 - pcc + 1e-9))  # correlation-based similarities
    return np.sum(d)
Ejemplo n.º 11
0
def saa_pax(dataset, title):
    """
    Show the graph of PAA and SAX of time series data
    :param dataset: time series of a stock
    :return:
    """
    n_ts, sz, d = 1, 100, 1
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    plt.figure()
    plt.subplot(2, 2, 1)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series " + title)

    plt.subplot(2, 2, 2)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA " + title)

    plt.subplot(2, 2, 3)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.title("SAX, %d symbols" % n_sax_symbols)

    plt.subplot(2, 2, 4)  # Finally, 1d-SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" %
              (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg,
               n_sax_symbols_slope))

    plt.tight_layout()
    plt.show()
Ejemplo n.º 12
0
def ApplyPaa(n_paa_segments, df, ckt):
    circuito = ckt
    print("Quantidade de segmentos de PAA: {}".format(n_paa_segments))
    paa = PiecewiseAggregateApproximation(n_paa_segments)
    scaler = TimeSeriesScalerMeanVariance()
    dadosPaa = df
    for i in range(0, len(df)):
        dataset = scaler.fit_transform(df[i])
        dadosPaa[i] = paa.inverse_transform(paa.fit_transform(dataset))[0]
    dadosPaa = dadosPaa.T

    return dadosPaa
Ejemplo n.º 13
0
 def standardize(self, data):
     """
     Standardize des TS, moyenne: 0 et ecart type: 1
     Data: dataframe
     """
     # prepare data for standardization
     values = data["Valeur"]
     values = values.values.reshape((len(values), 1))
     # train the standardization
     t = TimeSeriesScalerMeanVariance().fit_transform(values)
     print(t)
     data["valeur"] = TimeSeriesScalerMeanVariance().fit_transform(values)
     return data
Ejemplo n.º 14
0
def check_classifiers_classes(name, classifier_orig):
    # Case of shapelet models
    if name == 'SerializableShapeletModel':
        raise SkipTest('Skipping check_classifiers_classes for shapelets'
                       ' due to convergence issues...')
    elif name == 'ShapeletModel':
        X_multiclass, y_multiclass = _create_large_ts_dataset()
        classifier_orig = clone(classifier_orig)
        classifier_orig.max_iter = 1000
    else:
        X_multiclass, y_multiclass = _create_small_ts_dataset()

    X_multiclass, y_multiclass = shuffle(X_multiclass,
                                         y_multiclass,
                                         random_state=7)

    scaler = TimeSeriesScalerMeanVariance()
    X_multiclass = scaler.fit_transform(X_multiclass)

    X_multiclass = np.reshape(X_multiclass,
                              (X_multiclass.shape[0], X_multiclass.shape[1]))

    X_binary = X_multiclass[y_multiclass != 2]
    y_binary = y_multiclass[y_multiclass != 2]

    X_multiclass = pairwise_estimator_convert_X(X_multiclass, classifier_orig)
    X_binary = pairwise_estimator_convert_X(X_binary, classifier_orig)

    labels_multiclass = ["one", "two", "three"]
    labels_binary = ["one", "two"]

    y_names_multiclass = np.take(labels_multiclass, y_multiclass)
    y_names_binary = np.take(labels_binary, y_binary)

    problems = [(X_binary, y_binary, y_names_binary)]

    if not classifier_orig._get_tags()['binary_only']:
        problems.append((X_multiclass, y_multiclass, y_names_multiclass))

    for X, y, y_names in problems:
        for y_names_i in [y_names, y_names.astype('O')]:
            y_ = choose_check_classifiers_labels(name, y, y_names_i)
            check_classifiers_predictions(X, y_, name, classifier_orig)

    labels_binary = [-1, 1]
    y_names_binary = np.take(labels_binary, y_binary)
    y_binary = choose_check_classifiers_labels(name, y_binary, y_names_binary)
    check_classifiers_predictions(X_binary, y_binary, name, classifier_orig)
Ejemplo n.º 15
0
    def fit(self, X, y=None):
        """Compute k-Shape clustering.

        Parameters
        ----------
        X : array-like of shape=(n_ts, sz, d)
            Time series dataset.
        """

        X_ = to_time_series_dataset(X)
        X_ = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(X_)
        assert X_.shape[-1] == 1, "kShape is supposed to work on monomodal data, provided data has dimension %d" % \
                                  X_.shape[-1]
        rs = check_random_state(self.random_state)

        best_correct_centroids = None
        min_inertia = numpy.inf
        n_successful = 0
        n_attempts = 0
        while n_successful < self.n_init and n_attempts < self.max_attempts:
            try:
                if self.verbose and self.n_init > 1:
                    print("Init %d" % (n_successful + 1))
                n_attempts += 1
                self._fit_one_init(X_, rs)
                if self.inertia_ < min_inertia:
                    best_correct_centroids = self.cluster_centers_.copy()
                    min_inertia = self.inertia_
                n_successful += 1
            except EmptyClusterError:
                if self.verbose:
                    print("Resumed because of empty cluster")
        self._post_fit(X_, best_correct_centroids, min_inertia)
        return self
Ejemplo n.º 16
0
def load_casas(dataset, fixed_length):
    # X = np.load('./npy/{}-x.npy'.format(dataset), allow_pickle=True); X=X[:-1*(X.shape[0]%fixed_length)].reshape(-1, fixed_length, 1); print(X.shape)
    # Y = np.load('./npy/{}-y.npy'.format(dataset), allow_pickle=True); Y=Y[:-1*(Y.shape[0]%fixed_length)].reshape(-1, fixed_length, 1); print(Y.shape); Y=np.array(Y, dtype=int)
    X = np.load('./npy/{}-x-noidle.npy'.format(dataset), allow_pickle=True)
    if -1*(X.shape[0]%fixed_length)!=0:
      X=X[:-1*(X.shape[0]%fixed_length)]
    X=X.reshape(-1, fixed_length, 1); print(X.shape)
    Y = np.load('./npy/{}-y-noidle.npy'.format(dataset), allow_pickle=True)
    if -1*(Y.shape[0]%fixed_length)!=0:
      Y=Y[:-1*(Y.shape[0]%fixed_length)]
    Y=Y.reshape(-1, fixed_length, 1); print(Y.shape); Y=np.array(Y, dtype=int)

    y=[]
    for i in range(X.shape[0]):
      y.append(np.argmax(np.bincount(Y[i].flatten())))
    print(Counter(y))
    X=np.array(X, dtype=object)
    y=np.array(y, dtype=object); y = y.reshape(-1,1)

    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)

    dictActivities = np.load('./npy/{}-labels-noidle.npy'.format(dataset), allow_pickle=True).item()

    X_scaled=TimeSeriesScalerMeanVariance().fit_transform(X)
    return X_scaled, y, dictActivities
Ejemplo n.º 17
0
def preprocessing(lc_data, windowSize=50):
    lc_nor = TimeSeriesScalerMeanVariance(mu=0., std=1.).fit_transform(
        [lc_data['instances']])
    lc_data['lc_nor'] = lc_nor

    paa = ut_gen.genListPAA(instances_nor=lc_nor,
                            windowSize=windowSize,
                            timestamp=lc_data['timestamp'])
    lc_data['paa'] = paa
    sax = ut_gen.genListSAX(instances_nor=lc_nor,
                            windowSize=windowSize,
                            timestamp=lc_data['timestamp'],
                            n_sax_symbols=n_sax_symbols)
    lc_data['sax'] = sax
    one_sax = ut_gen.genList1D_SAX(instances_nor=lc_nor,
                                   windowSize=windowSize,
                                   timestamp=lc_data['timestamp'],
                                   n_sax_symbols_slope=n_sax_symbols_slope,
                                   n_sax_symbols_avg=n_sax_symbols_avg)
    lc_data['one_sax'] = one_sax

    corePlot = sketchDyBinService(windowSize=windowSize,
                                  initialBin=5,
                                  isOnline=False)
    sketchInstances = corePlot.sketchMode(instances=list(lc_nor[0].ravel()))
    lc_data['dy_bin'] = {
        'sketchInstances': sketchInstances,
        'timestamp': lc_data['timestamp']
    }
    print("a")
Ejemplo n.º 18
0
    def processData(self, transmission: Transmission):
        self.t = transmission
        self.set_data_column_combo_box()

        if not self.ctrls['Apply'].isChecked():
            return

        self.t = transmission.copy()

        mu = self.ctrls['mu'].value()
        std = self.ctrls['std'].value()

        params = {
            'data_column': self.data_column,
            'mu': mu,
            'std': std,
            'units': self.t.last_unit
        }

        output_column = '_SCALER_MEAN_VARIANCE'

        self.t.df[output_column] = self.t.df[self.data_column].apply(
            lambda a: TimeSeriesScalerMeanVariance(mu=mu, std=std
                                                   ).fit_transform(a)[:, :, 0])
        self.t.history_trace.add_operation(data_block_id='all',
                                           operation='scaler_mean_variance',
                                           parameters=params)
        self.t.last_output = output_column

        return self.t
Ejemplo n.º 19
0
    def update_plot_means(self, *args, **kwargs):
        """Update the means plot"""
        padded = self.pad_input_data(self.input_arrays, 'fill-size')
        scaled = TimeSeriesScalerMeanVariance().fit_transform(padded)[:, :, 0]

        if self.control_widget.ui.radioButtonXZeroZero.isChecked():
            xzero = 'zero'
        elif self.control_widget.ui.radioButtonXZeroMaxima.isChecked():
            xzero = 'maxima'
        else:
            raise ValueError('Must select an option for set x = 0 at')

        if self.control_widget.ui.comboBoxErrorBand.currentText(
        ) == 'standard deviation':
            ci = 'sd'
        elif self.control_widget.ui.comboBoxErrorBand.currentText(
        ) == 'confidence interval':
            ci = 95
        elif self.control_widget.ui.comboBoxErrorBand.currentText() == 'None':
            ci = None

        self.plot_means.set_plots(scaled,
                                  self.n_clusters,
                                  self.y_pred,
                                  xzero_pos=xzero,
                                  error_band=ci)
        self.plot_means.show()
Ejemplo n.º 20
0
def check_clustering(name, clusterer_orig, readonly_memmap=False):

    clusterer = clone(clusterer_orig)
    X, y = _create_small_ts_dataset()
    X, y = shuffle(X, y, random_state=7)
    X = TimeSeriesScalerMeanVariance().fit_transform(X)
    rng = np.random.RandomState(42)
    X_noise = X + (rng.randn(*X.shape) / 5)

    n_samples, n_features, dim = X.shape
    # catch deprecation and neighbors warnings
    if hasattr(clusterer, "n_clusters"):
        clusterer.set_params(n_clusters=3)
    set_random_state(clusterer)

    # fit
    clusterer.fit(X)
    # with lists
    clusterer.fit(X.tolist())

    pred = clusterer.labels_
    assert_equal(pred.shape, (n_samples,))
    assert_greater(adjusted_rand_score(pred, y), 0.4)

    if clusterer._get_tags()['non_deterministic']:
        return

    set_random_state(clusterer)
    with warnings.catch_warnings(record=True):
        pred2 = clusterer.fit_predict(X)
    assert_array_equal(pred, pred2)

    # fit_predict(X) and labels_ should be of type int
    assert pred.dtype in [np.dtype('int32'), np.dtype('int64')]
    assert pred2.dtype in [np.dtype('int32'), np.dtype('int64')]

    # Add noise to X to test the possible values of the labels
    labels = clusterer.fit_predict(X_noise)

    # There should be at least one sample in every original cluster
    labels_sorted = np.unique(labels)
    assert_array_equal(labels_sorted, np.arange(0, 3))

    # Labels should be less than n_clusters - 1
    if hasattr(clusterer, 'n_clusters'):
        n_clusters = getattr(clusterer, 'n_clusters')
        assert_greater_equal(n_clusters - 1, labels_sorted[-1])
Ejemplo n.º 21
0
def load_tslearn_data():
    """ Time series data with variable length """
    X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
    X_train = X_train[y_train < 4]  # Keep first 3 classes
    np.random.shuffle(X_train)
    X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train[:50])  # Keep only 50 time series
    X_train = TimeSeriesResampler(sz=40).fit_transform(X_train)  # Make time series shorter
    X_train = X_train.reshape(50,-1)
    return X_train
Ejemplo n.º 22
0
def test_variable_length_knn():
    X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [9, 8, 7, 6, 5, 2],
                                [8, 7, 6, 5, 3]])
    y = [0, 0, 1, 1]

    clf = KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])

    clf = KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])

    scaler = TimeSeriesScalerMeanVariance()
    clf = KNeighborsTimeSeriesClassifier(metric="sax",
                                         n_neighbors=1,
                                         metric_params={'n_segments': 2})
    X_transf = scaler.fit_transform(X)
    clf.fit(X_transf, y)
    assert_allclose(clf.predict(X_transf), [0, 0, 1, 1])
Ejemplo n.º 23
0
def load_ucr(dataset='CBF'):
    X_train, y_train, X_test, y_test = ucr.load_dataset(dataset)
    X = np.concatenate((X_train, X_test))
    y = np.concatenate((y_train, y_test))
    if dataset == 'HandMovementDirection':  # this one has special labels
        y = [yy[0] for yy in y]
    y = LabelEncoder().fit_transform(y)  # sometimes labels are strings or start from 1
    assert(y.min() == 0)  # assert labels are integers and start from 0
    # preprocess data (standardization)
    X_scaled = TimeSeriesScalerMeanVariance().fit_transform(X)
    return X_scaled, y
Ejemplo n.º 24
0
def get_distance_matrix(numpy_array):
    sc = TimeSeriesScalerMeanVariance()
    X_s = sc.fit_transform(to_time_series_dataset(numpy_array))

    size = len(X_s)

    idx = [(i, j) for i in range(0, size) for j in range(i + 1, size)]

    def calc_dtw(my_idx):
        i, j = my_idx
        return dtw(X_s[i], X_s[j])

    with mp.Pool(mp.cpu_count() - 1) as p:
        distances = p.map(calc_dtw, idx)

    dm = np.zeros(shape=(size, size))
    for (i, j), v in zip(idx, distances):
        dm[i, j] = v
        dm[j, i] = v

    return dm
Ejemplo n.º 25
0
 def _transform(self, X, y=None):
     n_ts, sz, d = X.shape
     output_size = sz - self.subsequence_length + 1
     X_transformed = numpy.empty((n_ts, output_size, 1))
     scaler = TimeSeriesScalerMeanVariance()
     for i_ts in range(n_ts):
         Xi = X[i_ts]
         elem_size = Xi.strides[0]
         segments = as_strided(
             Xi,
             strides=(elem_size, elem_size, Xi.strides[1]),
             shape=(Xi.shape[0] - self.subsequence_length + 1,
                    self.subsequence_length, d),
             writeable=False)
         if self.scale:
             segments = scaler.fit_transform(segments)
         segments_2d = segments.reshape((-1, self.subsequence_length * d))
         dists = squareform(pdist(segments_2d, "euclidean"))
         numpy.fill_diagonal(dists, numpy.inf)
         X_transformed[i_ts] = dists.min(axis=1, keepdims=True)
     return X_transformed
Ejemplo n.º 26
0
 def approximate(self,
                 series: np.ndarray,
                 window: int = 1,
                 should_fit: bool = True) -> np.ndarray:
     # series is already in batches
     debug('TSLearnApproximatorWrapper.approximate: series shape {}'.format(
         series.shape))
     debug(
         'TSLearnApproximatorWrapper.approximate: to_time_series shape {}'.
         format(series.shape))
     ts_representation = list()
     debug(
         f'TSLearnApproximatorWrapper.approximate: param series \n{series} '
     )
     for segment in series:
         if isinstance(self.transformer,
                       SymbolicAggregateApproximation) or isinstance(
                           self.transformer,
                           OneD_SymbolicAggregateApproximation):
             logger.info(
                 "Scaling the data so that they consist a normal distribution."
             )
             scaler = TimeSeriesScalerMeanVariance(
                 mu=0., std=1.)  # Rescale time series
             segment = scaler.fit_transform(segment)
         ts_representation.append(self.transformer.fit_transform(segment))
     # debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'.format(ts_representation))
     debug(
         'TSLearnApproximatorWrapper.approximate: ts_representation shape {}'
         .format(np.shape(ts_representation)))
     ts_representation = np.reshape(
         ts_representation,
         (np.shape(ts_representation)[0],
          np.shape(ts_representation)[1] * np.shape(ts_representation)[2]))
     debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'.
           format(ts_representation))
     debug(
         'TSLearnApproximatorWrapper.approximate: ts_representation shape {}'
         .format(ts_representation.shape))
     return ts_representation
Ejemplo n.º 27
0
def test_serialize_kshape():
    n, sz, d = 15, 10, 3
    rng = numpy.random.RandomState(0)
    time_series = rng.randn(n, sz, d)
    X = TimeSeriesScalerMeanVariance().fit_transform(time_series)

    ks = KShape(n_clusters=3, verbose=True)

    _check_not_fitted(ks)

    ks.fit(X)

    _check_params_predict(ks, X, ['predict'])
def ApplyPaa(n_paa_segments,df):
    '''
    Aplica o PAA no dataframe fornecido.

    :param n_paa_segments: quantidade de segmento do PAA para redução de dados
    :param df: dataframe com dados em que se deseja aplicar o PAA
    :return: df após aplicação do PAA
    '''
    df = df.values.T.tolist()
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)
    dadosPaa = scaler.fit_transform(df)
    print("Quantidade de segmentos de PAA: {}".format(n_paa_segments))
    paa = PiecewiseAggregateApproximation(n_paa_segments)
    dadosPaa = paa.inverse_transform(paa.fit_transform(dadosPaa))

    df = pd.DataFrame()

    for i in range(len(dadosPaa.T)):
        for j in range(len(dadosPaa.T[0])):
            df[j] = dadosPaa.T[i][j]

    return df
Ejemplo n.º 29
0
 def Preprocess(self, x=None):
     """
     dataを(batch, len(data)//time_span)の形に整形する。
     """
     if str(type(x)) == "<class 'NoneType'>":
         self.n_data = len(self.data) // self.time_span
         self.n_use = self.time_span * self.n_data
         ts = self.data.loc[:self.data.index[self.n_use - 1]]
         ts = np.array(ts.TEMPERATURE).reshape(1, -1)
         ts = TimeSeriesScalerMeanVariance().fit_transform(ts)
         ts = np.array(ts).reshape(self.n_data, -1)
         ts = TimeSeriesResampler(sz=self.batch).fit_transform(ts)
         self.ts = ts
     else:
         self.x_data = len(x) // self.time_span
         self.x_use = self.time_span * self.x_data
         ts = x.loc[:x.index[self.x_use - 1]]
         ts = np.array(ts.TEMPERATURE).reshape(1, -1)
         ts = TimeSeriesScalerMeanVariance().fit_transform(ts)
         ts = np.array(ts).reshape(self.x_data, -1)
         ts = TimeSeriesResampler(sz=self.batch).fit_transform(ts)
         return ts
Ejemplo n.º 30
0
def test_kshape():
    n, sz, d = 15, 10, 3
    rng = np.random.RandomState(0)
    time_series = rng.randn(n, sz, d)
    time_series = TimeSeriesScalerMeanVariance().fit_transform(time_series)

    ks = KShape(n_clusters=3, n_init=1, verbose=False,
                random_state=rng).fit(time_series)
    dists = ks._cross_dists(time_series)
    np.testing.assert_allclose(ks.labels_, dists.argmin(axis=1))
    np.testing.assert_allclose(ks.labels_, ks.predict(time_series))

    assert KShape(n_clusters=101, verbose=False,
                  random_state=rng).fit(time_series)._X_fit is None