def test_actual_results_no_numerosity_reduction(): """Test that the actual results are the expected ones.""" bossvs = BOSSVS( word_size=4, n_bins=3, window_size=10, window_step=10, anova=False, drop_sum=False, norm_mean=False, norm_std=False, strategy='quantile', alphabet=None, numerosity_reduction=False, use_idf=True, smooth_idf=False, sublinear_tf=True ) X_windowed = X.reshape(8, 2, 10).reshape(16, 10) sfa = SymbolicFourierApproximation( n_coefs=4, drop_sum=False, anova=False, norm_mean=False, norm_std=False, n_bins=3, strategy='quantile', alphabet=None ) y_repeated = np.repeat(y, 2) X_sfa = sfa.fit_transform(X_windowed, y_repeated) X_word = np.asarray([''.join(X_sfa[i]) for i in range(16)]) X_word = X_word.reshape(8, 2) X_bow = np.asarray([' '.join(X_word[i]) for i in range(8)]) X_class = np.array([' '.join(X_bow[y == i]) for i in range(2)]) tfidf = TfidfVectorizer( norm=None, use_idf=True, smooth_idf=False, sublinear_tf=True ) tfidf_desired = tfidf.fit_transform(X_class).toarray() # Vocabulary vocabulary_desired = {value: key for key, value in tfidf.vocabulary_.items()} # Tf-idf tfidf_actual = bossvs.fit(X, y).tfidf_ # Decision function decision_function_actual = bossvs.decision_function(X) decision_function_desired = cosine_similarity( tfidf.transform(X_bow), tfidf_desired) # Predictions y_pred_actual = bossvs.predict(X) y_pred_desired = decision_function_desired.argmax(axis=1) # Testing assert bossvs.vocabulary_ == vocabulary_desired np.testing.assert_allclose(tfidf_actual, tfidf_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( decision_function_actual, decision_function_desired, atol=1e-5, rtol=0) np.testing.assert_allclose( y_pred_actual, y_pred_desired, atol=1e-5, rtol=0)
""" import numpy as np import matplotlib.pyplot as plt from pyts.classification import BOSSVS from pyts.datasets import load_gunpoint # Toy dataset X_train, X_test, y_train, y_test = load_gunpoint(return_X_y=True) # BOSSVS transformation bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10) bossvs.fit(X_train, y_train) tfidf = bossvs.tfidf_ vocabulary_length = len(bossvs.vocabulary_) X_new = bossvs.decision_function(X_test) # Visualize the transformation plt.figure(figsize=(14, 5)) width = 0.4 plt.subplot(121) plt.bar(np.arange(vocabulary_length) - width / 2, tfidf[0], width=width, label='Class 1') plt.bar(np.arange(vocabulary_length) + width / 2, tfidf[1], width=width, label='Class 2') plt.xticks(np.arange(vocabulary_length),
print('Feature-BOSSVS') ''' nps = input("Enter the number of segments for PAA: ") n_paa_segments = int(nps) #number of segments for PAA nss = input('Enter the number if segments for SAX: ') n_sax_symbols = int(nss) w = input('Enter the window size:') window_size = int(w) ''' PATH = "G:/Coding/ML/UCRArchive_2018/" # Change this value if necessary ds = input('Enter the Time series Dataset: ') dataset = str(ds) file_train = PATH + str(dataset) + "/" + str(dataset) + "_TRAIN.tsv" file_test = PATH + str(dataset) + "/" + str(dataset) + "_TEST.tsv" fulltrain = np.genfromtxt(fname=file_train, delimiter="\t", skip_header=0) fulltest = np.genfromtxt(fname=file_test, delimiter="\t", skip_header=0) X_train, y_train = fulltrain[:, 1:], fulltrain[:, 0] X_test, y_test = fulltest[:, 1:], fulltest[:, 0] # BOSSVS transformation bossvs = BOSSVS(word_size=2, n_bins=3, window_size=10) bossvs.fit(X_train, y_train) tfidf = bossvs.tfidf_ vocabulary_length = len(bossvs.vocabulary_) X_new = bossvs.decision_function(X_train) #tfidf vectors