Example #1
0
def OMP_cv(problem, **kwargs):
    r"""High level description.

    Requirements
    ------------
    kwargs['choose'] must be a positive integer

    kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------
    output : tuple
        (optimum, maximum)

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    OMP = OrthogonalMatchingPursuitCV(max_iter=kwargs['choose'])
    OMP.fit(data.T, problem.goal['data']['values'])
    OMP_coefficients = OMP.coef_
    optimum = [
        problem.data[index] for index, element in enumerate(OMP_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = OMP.score(data.T, problem.goal['data']['values'])
    output = (optimum, maximum)
    return output
Example #2
0
def plot_omp():
    n_components, n_features = 512, 100
    n_nonzero_coefs = 17

    # generate the data

    # y = Xw
    # |x|_0 = n_nonzero_coefs

    y, X, w = make_sparse_coded_signal(n_samples=1,
                                       n_components=n_components,
                                       n_features=n_features,
                                       n_nonzero_coefs=n_nonzero_coefs,
                                       random_state=0)

    idx, = w.nonzero()

    # distort the clean signal
    y_noisy = y + 0.05 * np.random.randn(len(y))

    # plot the sparse signal
    plt.figure(figsize=(7, 7))
    plt.subplot(4, 1, 1)
    plt.xlim(0, 512)
    plt.title("Sparse signal")
    plt.stem(idx, w[idx], use_line_collection=True)

    # plot the noise-free reconstruction
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
    omp.fit(X, y)
    coef = omp.coef_
    idx_r, = coef.nonzero()
    plt.subplot(4, 1, 2)
    plt.xlim(0, 512)
    plt.title("Recovered signal from noise-free measurements")
    plt.stem(idx_r, coef[idx_r], use_line_collection=True)

    # plot the noisy reconstruction
    omp.fit(X, y_noisy)
    coef = omp.coef_
    idx_r, = coef.nonzero()
    plt.subplot(4, 1, 3)
    plt.xlim(0, 512)
    plt.title("Recovered signal from noisy measurements")
    plt.stem(idx_r, coef[idx_r], use_line_collection=True)

    # plot the noisy reconstruction with number of non-zeros set by CV
    omp_cv = OrthogonalMatchingPursuitCV()
    omp_cv.fit(X, y_noisy)
    coef = omp_cv.coef_
    idx_r, = coef.nonzero()
    plt.subplot(4, 1, 4)
    plt.xlim(0, 512)
    plt.title("Recovered signal from noisy measurements with CV")
    plt.stem(idx_r, coef[idx_r], use_line_collection=True)

    plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)
    plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit',
                 fontsize=16)
    plt.show()
Example #3
0
def test_omp_cv():
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Example #4
0
def test_omp_cv():
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Example #5
0
def test_omp_cv():
    # FIXME: This test is unstable on Travis, see issue #3190 for more detail.
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
class _OrthogonalMatchingPursuitCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Example #7
0
def test_omp_cv():
    # FIXME: This test is unstable on Travis, see issue #3190 for more detail.
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True,
                                        fit_intercept=False,
                                        max_iter=10,
                                        cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True,
                                    fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Example #8
0
    def predict(self):
        """
         trains the scikit-learn  python machine learning algorithm library function
         https://scikit-learn.org

         then passes the trained algorithm the features set and returns the
         predicted y test values form, the function

         then compares the y_test values from scikit-learn predicted to
         y_test values passed in

         then returns the accuracy
         """

        n_nonzero_coefs = 17
        algorithm = OrthogonalMatchingPursuitCV()
        algorithm.fit(self.X_train, self.y_train)
        y_pred = list(algorithm.predict(self.X_test))
        self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test)
        return self.acc
Example #9
0
def _ompcv(*,
           train,
           test,
           x_predict=None,
           metrics,
           copy=True,
           fit_intercept=True,
           normalize=True,
           max_iter=None,
           cv=None,
           n_jobs=None,
           verbose=False):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.OrthogonalMatchingPursuitCV.html#sklearn.linear_model.OrthogonalMatchingPursuitCV
    """

    model = OrthogonalMatchingPursuitCV(fit_intercept=fit_intercept,
                                        copy=copy,
                                        normalize=normalize,
                                        max_iter=max_iter,
                                        cv=cv,
                                        n_jobs=n_jobs,
                                        verbose=verbose)
    model.fit(train[0], train[1])
    model_name = 'OrthogonalMatchingPursuitCV'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
Example #10
0
    def get_feature_coefficients(self, norm_prior=1):
        """
        get feature coefficients using linear regression.
        Linear models penalized with the L1 norm have sparse solutions: many of their estimated
        coefficients are zero.
        Args:
            norm_prior: 1 for L1-norm as default. use L0 to get the sparsest result.
        """
        model = None
        alphas = np.logspace(-4, -0.5, 30)
        tuned_parameters = [{'alpha': alphas}]
        coefficient_value = None
        if norm_prior == 0:
            # L0-norm
            model = OrthogonalMatchingPursuitCV()
            model.fit(self.X_df.values, self.y_df.values)
            coefficient_value = model.coef_
        elif norm_prior == 1:
            # L1-norm
            # Lasso
            lasso = Lasso(random_state=0)
            n_folds = 3
            gridsearch = GridSearchCV(lasso, tuned_parameters, cv=n_folds, refit=False)
            gridsearch.fit(self.X_df.values, self.y_df.values)
            coefficient_value = gridsearch.best_estimator_.coef_
        elif norm_prior == 2:
            # L2-norm
            # Ridge
            ridge = Ridge(random_state=0)
            n_folds = 3
            gridsearch = GridSearchCV(ridge, tuned_parameters, cv=n_folds, refit=False)
            gridsearch.fit(self.X_df.values, self.y_df.values)
            coefficient_value = gridsearch.best_estimator_.coef_
        else:
            print("invalid norm!")

        self.coef_ = coefficient_value
        return coefficient_value
Example #11
0
pl.subplot(4, 1, 2)
pl.xlim(0, 512)
pl.title("Recovered signal from noise-free measurements")
pl.stem(idx_r, coef[idx_r])

# plot the noisy reconstruction
###############################
omp.fit(X, y_noisy)
coef = omp.coef_
idx_r, = coef.nonzero()
pl.subplot(4, 1, 3)
pl.xlim(0, 512)
pl.title("Recovered signal from noisy measurements")
pl.stem(idx_r, coef[idx_r])

# plot the noisy reconstruction with number of non-zeros set by CV
##################################################################
omp_cv = OrthogonalMatchingPursuitCV()
omp_cv.fit(X, y_noisy)
coef = omp_cv.coef_
idx_r, = coef.nonzero()
pl.subplot(4, 1, 4)
pl.xlim(0, 512)
pl.title("Recovered signal from noisy measurements with CV")
pl.stem(idx_r, coef[idx_r])

pl.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)
pl.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit',
            fontsize=16)
pl.show()
# the constraint condition x_i = 1
for i in xrange(n_features):
    x[i] = 1.0
# x = np.arange(1,n_features+1,dtype = float)

G = np.dot(A_.T, A_)
# b = np.zeros((n_features))
b = np.dot(A, x) + np.random.rand(n_samples) * 0.1

# A^X^ = b^
b_head = np.zeros((n_samples, ))
A_head = np.eye(n_samples, n_features)

omp = OrthogonalMatchingPursuitCV()
# omp.fit (A_head, b_head)
omp.fit(A, b)
coef = omp.coef_
# coef = omp.n_nonzeros_coefs
# idx_r, = coef.nonzero()
# print coef.shape
idx_r, = coef.nonzero()
print "the index of the nonzero x_i"
print idx_r
print "the nonzeros solution of the AX = b"
print coef[idx_r]

print "\n"

print "all the solutions"
print coef
print "\n"
Example #13
0
def RunMP(aligned_data_root_path, output_path):
    do_compute_individual_k_motifs = True
    do_compute_anchored_chains = False
    do_compute_semantic_segmentation = False
    do_compute_multimodal_mp = False
    window_size = 1300
    #window_size = 1500
    data_dict = LoadAlignedTILESData(aligned_data_root_path)

    #plt.ion()

    pids = list(data_dict.keys())[0:1]
    streams = ['HeartRatePPG', 'StepCount']

    # Compute motifs from the individual MP using a greedy method
    if do_compute_individual_k_motifs:
        num_motifs = 2
        for pid in pids:
            fitbit_df = data_dict[pid]['fitbit']
            fitbit_df = fitbit_df.iloc[0:10000, :]  # HACK

            for stream in streams:
                exclusion_signal = fitbit_df[stream].copy()
                # Keep a NaN'd version for MP and interpolated one for OMP
                #nan_replace_value = -1000000
                #fitbit_df[stream][np.isnan(fitbit_df[stream])] = nan_replace_value
                #fitbit_df_smooth = fitbit_df[stream].interpolate(method='linear', axis=0, inplace=False)
                #fitbit_df_smooth = fitbit_df[stream].copy()
                fitbit_df_smooth = exclusion_signal.copy()

                if np.isnan(fitbit_df_smooth[0]
                            ):  # Fill NaNs at the beginning and end
                    idx = 0
                    while np.isnan(fitbit_df_smooth[idx]):
                        idx += 1
                    fitbit_df_smooth[0:idx] = fitbit_df_smooth[idx]
                if np.isnan(fitbit_df_smooth[fitbit_df_smooth.shape[0] - 1]):
                    idx = fitbit_df_smooth.shape[0] - 1
                    while np.isnan(fitbit_df_smooth[idx]):
                        idx -= 1
                    fitbit_df_smooth[idx:] = fitbit_df_smooth[idx]

                # Use Matrix Profile methods to learn a motif dictionary
                motifs = []
                while len(motifs) < num_motifs:
                    #fitbit_mp = stumpy.stump(fitbit_df[stream], m=window_size) # TODO - use the exclusion_signal
                    fitbit_mp = stumpy.stump(
                        exclusion_signal,
                        m=window_size)  # TODO - use the exclusion_signal
                    fitbit_mp_argsort = np.array(fitbit_mp[:, 0]).argsort()
                    for motif_idx in range(len(fitbit_mp_argsort)):
                        stream_motif_idx = fitbit_mp_argsort[motif_idx]
                        num_nan = np.sum(
                            np.isnan(exclusion_signal.
                                     values[stream_motif_idx:stream_motif_idx +
                                            window_size]))

                        # Avoid finding bad motifs
                        if num_nan >= 5.0 * window_size / 6.0:
                            continue
                        if stream == 'HeartRatePPG':
                            pass
                        break
                    motif_left_idx = fitbit_mp_argsort[motif_idx]
                    motif = fitbit_df_smooth[motif_left_idx:motif_left_idx +
                                             window_size]
                    motif[motif ==
                          0] = 1e-12  # OMP requires non-zeros in the support
                    motifs.append(motif)
                    plt.plot(range(motif_left_idx,
                                   motif_left_idx + window_size),
                             motifs[-1],
                             'g-',
                             linewidth=5)

                # Build a redundant dictionary from the motifs
                num_repetitions = len(fitbit_df_smooth) - window_size
                dictionary_mat = csr_matrix(
                    (len(motifs) * num_repetitions, len(fitbit_df_smooth)))
                for motif_idx in range(len(motifs)):
                    motif_values = motifs[motif_idx].values
                    for repeat_idx in range(num_repetitions):
                        # SLOW: TODO - find better way of generating this matrix.  Maybe I can change the sparse encoding directly and just push extra zeros in front of the motif sequence? Better yet, why not abandon the matrix representation and just use a list of motifs and their starting index in the signal
                        dictionary_mat[motif_idx * num_repetitions +
                                       repeat_idx, repeat_idx:repeat_idx +
                                       window_size] = motif_values

                # Reconstruct the signal using the motif dictionary
                # TODO : Write my own OMP with exclusion of each atom's support. Gram mat?
                # TODO : Use L1 optimization (Lasso)?
                #omp = OrthogonalMatchingPursuit(n_nonzero_coefs=2, fit_intercept=False)
                omp = OrthogonalMatchingPursuitCV(fit_intercept=False)
                omp.fit(dictionary_mat.T, fitbit_df_smooth)
                intercept = omp.intercept_
                coef = omp.coef_
                idx_r = coef.nonzero()
                num_nonzero = omp.n_nonzero_coefs_

                #max_nonzero = 20
                #skip_nan_percent = 0.1
                #coef = np.zeros((dictionary_mat.T.shape[1],1))
                #intercept = np.zeros((dictionary_mat.T.shape[0],1))
                #for num_nonzero in range(1,max_nonzero+1):
                #   # Reconstruct the signal using the motif dictionary
                #   best_dict_idx = -1
                #   best_error = np.inf
                #   best_dict_support = None
                #   for dict_idx in range(dictionary_mat.shape[0]):
                #      # SLOW
                #      dict_vec = dictionary_mat[dict_idx,:].toarray().reshape(-1,)

                #      # Find the support
                #      left_support_idx = 0
                #      right_support_idx = len(dict_vec)-1
                #      while dict_vec[left_support_idx] == 0 and left_support_idx < len(dict_vec):
                #         left_support_idx += 1
                #      while dict_vec[right_support_idx] == 0 and right_support_idx >= 0:
                #         right_support_idx -= 1

                #      # Skip mostly NaN regions
                #      if np.sum(np.isnan(exclusion_signal[left_support_idx:right_support_idx+1])) > skip_nan_percent*(right_support_idx-left_support_idx+1):
                #         continue

                #      # Find the best match
                #      residual = exclusion_signal[left_support_idx:right_support_idx+1] - dict_vec[left_support_idx:right_support_idx+1]
                #      np.nan_to_num(residual, copy=False) # Replace NaN with zero
                #      error = np.dot(residual, residual)
                #      if error < best_error:
                #         best_error = error
                #         coef_val = 1 # TODO - constrain between 0.5 and 2?
                #         best_dict_idx = dict_idx
                #         best_dict_support = (left_support_idx, right_support_idx)

                #   if best_dict_idx < 0:
                #      print("No best next dictionary element found")
                #      break

                #   # Update coef
                #   coef_nonzero = (coef != 0).reshape(-1,)
                #   if np.sum(coef_nonzero) > 0:
                #      dictionary_mat_reduced = dictionary_mat[coef_nonzero, :]
                #      coef_reduced = coef[coef_nonzero]

                #      #prev_fit_signal = np.matmul(dictionary_mat.T, coef)
                #      prev_fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced)
                #      prev_residual = fitbit_df_smooth - prev_fit_signal.reshape(-1,)
                #      np.nan_to_num(prev_residual, copy=False) # Replace NaN with zero
                #      prev_error = np.dot(prev_residual, prev_residual)

                #      coef[best_dict_idx] = coef_val
                #      #fit_signal = np.matmul(dictionary_mat.T, coef)
                #      fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced)
                #      fit_residual = fitbit_df_smooth - fit_signal.reshape(-1,)
                #      np.nan_to_num(fit_residual, copy=False) # Replace NaN with zero
                #      fit_error = np.dot(fit_residual, fit_residual)
                #   else:
                #      prev_residual = fitbit_df_smooth- np.zeros(len(fitbit_df_smooth))
                #      np.nan_to_num(prev_residual, copy=False) # Replace NaN with zero
                #      prev_error = np.dot(prev_residual, prev_residual)

                #      coef[best_dict_idx] = coef_val
                #      coef_nonzero = (coef != 0).reshape(-1,)
                #      dictionary_mat_reduced = dictionary_mat[coef_nonzero, :]
                #      coef_reduced = coef[coef_nonzero]

                #      fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced)
                #      fit_residual = fitbit_df_smooth - fit_signal.reshape(-1,)
                #      np.nan_to_num(fit_residual, copy=False) # Replace NaN with zero
                #      fit_error = np.dot(fit_residual, fit_residual)

                #   if best_dict_support is not None:
                #      exclusion_signal[best_dict_support[0]:best_dict_support[1]+1] = np.inf

                #   if prev_error < fit_error:
                #      print("Avoiding overfitting...")
                #      coef[best_dict_idx,0] = 0
                #      break

                coef_nonzero = (coef != 0).reshape(-1, )
                dictionary_mat_reduced = dictionary_mat[coef_nonzero, :]
                coef_reduced = coef[coef_nonzero]
                fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(),
                                       coef_reduced) + intercept
                plt.plot(range(fitbit_df[stream].shape[0]), fitbit_df[stream],
                         'b-')
                #plt.plot(range(fitbit_df_smooth.shape[0]), fitbit_df_smooth, 'k-')
                plt.plot(range(fitbit_df[stream].shape[0]), fit_signal, 'r--')
                plt.title('OMP (%d coefs) + MP Motifs (%d motifs)' %
                          (num_nonzero, num_motifs))
                plt.xlabel('Time')
                plt.ylabel(stream)
                plt.show()
                return
                pdb.set_trace()

    # Compute individual matrix profiles (stump)
    if do_compute_anchored_chains or do_compute_semantic_segmentation:
        for pid in pids:
            fitbit_df = data_dict[pid]['fitbit']
            for stream in streams:
                fitbit_mp = stumpy.stump(fitbit_df[stream], m=window_size)

                if do_compute_anchored_chains:
                    left_mp_idx = fitbit_mp[:, 2]
                    right_mp_idx = fitbit_mp[:, 3]
                    #atsc_idx = 10
                    #anchored_chain = stumpy.atsc(left_mp_idx, right_mp_idx, atsc_idx)
                    all_chain_set, unanchored_chain = stumpy.allc(
                        left_mp_idx, right_mp_idx)

                if do_compute_semantic_segmentation:
                    subseq_len = window_size
                    correct_arc_curve, regime_locations = stumpy.fluss(
                        fitbit_mp[:, 1],
                        L=subseq_len,
                        n_regimes=2,
                        excl_factor=5)

                # Find the first motif with nearly no NaN values in the stream signal
                fitbit_mp_argsort = np.array(fitbit_mp[:, 0]).argsort()
                for motif_idx in range(len(fitbit_mp_argsort)):
                    stream_motif_idx = fitbit_mp_argsort[motif_idx]
                    num_nan = np.sum(
                        np.isnan(fitbit_df[stream].
                                 values[stream_motif_idx:stream_motif_idx +
                                        window_size]))

                    # Avoid finding bad motifs
                    if num_nan >= 5.0 * window_size / 6.0:
                        continue
                    if stream == 'HeartRatePPG':
                        pass
                        # Check for flat heart rate
                        #nan_like_value = 70
                        #num_valid = np.count_nonzero((fitbit_df[stream] - nan_like_value)[stream_motif_idx:stream_motif_idx+window_size])
                        #if num_valid < window_size - 2:
                        #   continue

                        # Check for linear heart rate over time
                        #residual_threshold = window_size*(4.0**2)
                        #p, res, rank, sing_vals, rcond = np.polyfit(range(window_size), fitbit_df[stream][stream_motif_idx:stream_motif_idx+window_size], deg=1, full=True)
                        #if res < residual_threshold:
                        #   continue
                    break

                num_subplots = 3 if do_compute_semantic_segmentation else 2
                fig, axs = plt.subplots(num_subplots,
                                        sharex=True,
                                        gridspec_kw={'hspace': 0})
                plt.suptitle('Matrix Profile, %s, PID: %s' % (stream, pid),
                             fontsize='30')
                axs[0].plot(fitbit_df[stream].values)
                rect = plt.Rectangle((fitbit_mp_argsort[motif_idx], 0),
                                     window_size,
                                     2000,
                                     facecolor='lightgrey')
                axs[0].add_patch(rect)
                rect = plt.Rectangle((fitbit_mp_argsort[motif_idx + 1], 0),
                                     window_size,
                                     2000,
                                     facecolor='lightgrey')
                axs[0].add_patch(rect)
                axs[0].set_ylabel(stream, fontsize='20')
                axs[1].plot(fitbit_mp[:, 0])
                axs[1].axvline(x=fitbit_mp_argsort[motif_idx],
                               linestyle="dashed")
                axs[1].axvline(x=fitbit_mp_argsort[motif_idx + 1],
                               linestyle="dashed")
                axs[1].set_ylabel('Matrix Profile', fontsize='20')

                if do_compute_anchored_chains:
                    for i in range(unanchored_chain.shape[0]):
                        y = fitbit_df[stream].iloc[
                            unanchored_chain[i]:unanchored_chain[i] +
                            window_size]
                        x = y.index.values
                        axs[0].plot(x, y, linewidth=3)

                if do_compute_semantic_segmentation:
                    axs[2].plot(range(correct_arc_curve.shape[0]),
                                correct_arc_curve,
                                color='C1')
                    axs[0].axvline(x=regime_locations[0], linestyle="dashed")
                    axs[2].axvline(x=regime_locations[0], linestyle="dashed")

                plt.show()

    # Compute multi-dimensional matrix profiles (mstump)
    if do_compute_multimodal_mp:
        for pid in pids:
            fitbit_df = data_dict[pid]['fitbit']
            data = fitbit_df.loc[:, streams].values
            mp, mp_indices = stumpy.mstump(data.T, m=window_size)
            #print("Stumpy's mstump function does not handle NaN values. Skipping multi-dimensional MP")
            #break

            # TODO - This code is copied from above. Fix and finish it once mstump supports NaN
            # Find the first motif with nearly no NaN values in the stream signal
            fitbit_mp_argsort = np.array(fitbit_mp[:, 0]).argsort()
            for motif_idx in range(len(fitbit_mp_argsort)):
                stream_motif_idx = fitbit_mp_argsort[motif_idx]
                num_nan = np.sum(
                    np.isnan(fitbit_df[stream].
                             values[stream_motif_idx:stream_motif_idx +
                                    window_size]))

                # Avoid finding bad motifs
                if num_nan >= 2:
                    continue
                if stream == 'HeartRatePPG':
                    # Check for flat heart rate
                    nan_like_value = 70
                    num_valid = np.count_nonzero(
                        (fitbit_df[stream] -
                         nan_like_value)[stream_motif_idx:stream_motif_idx +
                                         window_size])
                    if num_valid < window_size - 2:
                        continue

                    # Check for linear heart rate over time
                    residual_threshold = window_size * (4.0**2)
                    p, res, rank, sing_vals, rcond = np.polyfit(
                        range(window_size),
                        fitbit_df[stream][stream_motif_idx:stream_motif_idx +
                                          window_size],
                        deg=1,
                        full=True)
                    if res < residual_threshold:
                        continue
                break

            fig, axs = plt.subplots(2, sharex=True, gridspec_kw={'hspace': 0})
            plt.suptitle('Matrix Profile, %s, PID: %s' % (stream, pid),
                         fontsize='30')
            axs[0].plot(fitbit_df[stream].values)
            rect = plt.Rectangle((fitbit_mp_argsort[motif_idx], 0),
                                 window_size,
                                 2000,
                                 facecolor='lightgrey')
            axs[0].add_patch(rect)
            rect = plt.Rectangle((fitbit_mp_argsort[motif_idx + 1], 0),
                                 window_size,
                                 2000,
                                 facecolor='lightgrey')
            axs[0].add_patch(rect)
            axs[0].set_ylabel(stream, fontsize='20')
            axs[1].plot(fitbit_mp[:, 0])
            axs[1].axvline(x=fitbit_mp_argsort[motif_idx], linestyle="dashed")
            axs[1].axvline(x=fitbit_mp_argsort[motif_idx + 1],
                           linestyle="dashed")
            axs[1].set_ylabel('Matrix Profile', fontsize='20')
            plt.show()

    plt.ioff()
    plt.figure()
    plt.plot()
    plt.title('Dummy plot')
    plt.show()
    return
Example #14
0
def OMP(Xtrain, Ytrain, OMP_options={}, *args, **kwargs):

    OMPModel = OrthogonalMatchingPursuitCV(**OMP_options)
    OMPModel.fit(Xtrain, Ytrain)

    return OMPModel
 def getActionFromSparseRecovery(self, Qtable,SparseRecFlag,w,trainingOn):
     n_nonzero_coefs =31
     if SparseRecFlag==0:
         Train_index = np.where(Qtable<0)
         R=Qtable[Train_index]
        #Phi = []
         for i in range(len(Train_index[0])):
             State_array = self.stateNum2Info(Train_index[0][i])
             Action_arr = self.ActionNum2Info(Train_index[1][i])
             '''[self.TTNV, self.speed, self.AdjFreeLane,
                      self.RLane, self.Turn, self.Lane,
                      self.Location, self.DTAC, self.SlowerTNV,
                      self.FasterTNV, self.DistanceToCrossCar, 
                      self.IntersectionOpen, 1]#self.Priority]'''
             Psi= np.array([])
             Psi = np.concatenate((Psi,State_array[0:2]))
             for j in range(2,7):
                 Psi = np.concatenate((Psi,self.convertToOneHot(np.array([State_array[j],agent_optionCounts[j]-1]))[0]))
             Psi = np.concatenate((Psi, State_array[7:11]))
             Psi = np.concatenate((Psi, self.convertToOneHot(np.array([State_array[11],agent_optionCounts[11]-1]))[0]))
             
             for k in range(3):
                 Psi = np.concatenate((Psi,self.convertToOneHot(np.array([Action_arr[k],2]))[0]))
             if i==0 :
                 Phi = Psi
             else:
                 Phi = np.vstack((Phi,Psi))
                 
         omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
         omp.fit(Phi, R)
         w = omp.coef_
         idx_r, = w.nonzero()
         
         omp_cv = OrthogonalMatchingPursuitCV(copy=True, fit_intercept=True, normalize=True, max_iter=n_nonzero_coefs, cv=None, n_jobs=1, verbose=False)
         omp_cv.fit(Phi, R)
         w1 = omp_cv.coef_
         idx_r_cv, = w1.nonzero()
       #  return w
         #use to predict the other columns 
     
     ActionpArr = np.ravel(np.where(Qtable[self.state,:]==np.amax(Qtable[self.state,:])))
     assert(len(ActionpArr) >= 1)
     if trainingOn:
         ActionpInd = np.random.choice(ActionpArr)
     else:
         ActionpInd = ActionpArr[0]
     if Qtable[self.state, ActionpInd]== 0 :
         for i in  np.ravel(np.where(Qtable[self.state,:]==0)):
             State_array = np.array(self.stateNum2Info(int(self.state)))
             if self.Location ==0 or self.Location==2:
                 action = i
             elif self.Location ==1:
                 action = i + 9
             else:
                 action = i + 18
                 
             Action_arr = self.ActionNum2Info(action)
             '''[self.TTNV, self.speed, self.AdjFreeLane,
                      self.RLane, self.Turn, self.Lane,
                      self.Location, self.DTAC, self.SlowerTNV,
                      self.FasterTNV, self.DistanceToCrossCar, 
                      self.IntersectionOpen, 1]#self.Priority]'''
           
             Psi= np.array([])
             Psi = np.concatenate((Psi,State_array[0:2]))
             for j in range(2,7):
                 Psi = np.concatenate((Psi,self.convertToOneHot(np.array([State_array[j],agent_optionCounts[j]-1]).astype(int))[0]))
             Psi = np.concatenate((Psi, State_array[7:11]))
             Psi = np.concatenate((Psi, self.convertToOneHot(np.array([State_array[11],agent_optionCounts[11]-1]).astype(int))[0]))
             
             for k in range(3):
                 Psi = np.concatenate((Psi,self.convertToOneHot(np.array([Action_arr[k],2]))[0]))
             print w
             print Psi 
             Qtable[self.state , i ]= np.dot(Psi,w)
     ActionpArr = np.ravel(np.where(Qtable[self.state,:]==np.amax(Qtable[self.state,:])))
     assert(len(ActionpArr) >= 1)            
     if trainingOn:
         ActionpInd = np.random.choice(ActionpArr)
     else:
         ActionpInd = ActionpArr[0]
     if Qtable[self.state, ActionpInd]== 0:
         self.NotTrainedFlag = True
         
     return  [w , ActionpInd]
Example #16
0
    test_Y_pred = omp.predict(test_X)
    print "测试集得分:", omp.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, omp.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试OrthogonalMatchingPursuitCV类**********"
    ompCV = OrthogonalMatchingPursuitCV(cv=5)
    # 拟合训练集
    ompCV.fit(train_X, train_Y.values.ravel())
    # 打印最好的n_nonzero_coefs值
    print "最好的n_nonzero_coefs值: ", ompCV.n_nonzero_coefs_
    # 打印模型的系数
    print "系数:", ompCV.coef_
    print "截距:", ompCV.intercept_
    print '训练集R2: ', r2_score(train_Y, ompCV.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = ompCV.predict(test_X)
    print "测试集得分:", ompCV.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)
Example #17
0
idx_r, = coef.nonzero()
plt.subplot(4, 1, 2)
plt.xlim(0, 512)
plt.title("Recovered signal from noise-free measurements")
plt.stem(idx_r, coef[idx_r])

# plot the noisy reconstruction
###############################
omp.fit(X, y_noisy)
coef = omp.coef_
idx_r, = coef.nonzero()
plt.subplot(4, 1, 3)
plt.xlim(0, 512)
plt.title("Recovered signal from noisy measurements")
plt.stem(idx_r, coef[idx_r])

# plot the noisy reconstruction with number of non-zeros set by CV
##################################################################
omp_cv = OrthogonalMatchingPursuitCV()
omp_cv.fit(X, y_noisy)
coef = omp_cv.coef_
idx_r, = coef.nonzero()
plt.subplot(4, 1, 4)
plt.xlim(0, 512)
plt.title("Recovered signal from noisy measurements with CV")
plt.stem(idx_r, coef[idx_r])

plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)
plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit',
             fontsize=16)
plt.show()
Example #18
0
opm = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
opm.fit(X, y)
coef = opm.coef_
idx_r, = coef.nonzero()
plt.subplot(4, 1, 2)
plt.xlim(0, 512)
plt.title("Recovered signal from noise-free measurements")
plt.stem(idx_r, coef[idx_r])

opm.fit(X, y_noise)
coef = opm.coef_
idx_r, = coef.nonzero()
plt.subplot(4, 1, 3)
plt.xlim(0, 512)
plt.title("Recovered signal from noisy measurements")
plt.stem(idx_r, coef[idx_r])

opm_cv = OrthogonalMatchingPursuitCV()
opm_cv.fit(X, y_noise)
coef = opm_cv.coef_
idx_r, = coef.nonzero()
plt.subplot(4, 1, 4)
plt.xlim(0, 512)
plt.title("Recovered signal from noisy measurements with CV")
plt.stem(idx_r, coef[idx_r])

plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38)
plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit',
             fontsize=16)

plt.show()