def cluster(timeseries_df,data_labels):
    Shapelet_list = []
    D = '/home/abhilash/Datasets/UCRArchive_2018/TwoLeadECG/TXT_Files/'
    for i in range(1,max(data_labels)+1):
        print('Class',i)
        ts_df=timeseries_df[timeseries_df['0']==i]
        ts_df=ts_df.reset_index(drop=True)
        labels = ts_df['0']
        ts_df = ts_df.drop(ts_df.columns[0], axis=1)
        S='class'+str(i)+'labels.txt'
        pred_label=pd.read_csv(D+S,header=None)
        pred_label=np.ravel(np.array(pred_label))

        shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=ts_df.shape[0],
                                                               ts_sz=ts_df.shape[1],
                                                               n_classes=2,
                                                               l=0.36,
                                                               r=1)
        shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                            optimizer=Adagrad(lr=.1),
                            weight_regularizer=.01,
                            max_iter=50,
                            verbose=0)
        shp_clf.fit(ts_df, pred_label)
        shapelets=shp_clf.shapelets_;
        temp_list=[]
        for i in range(0, shapelets.shape[0]):
            temp= shapelets[i].T
            temp_list.append(temp[0])
        Shapelet_list.append(temp_list)
    return Shapelet_list
def cluster(shape,timeseries_df,data_labels,k):
    Shapelet_list = []
    for i in range(1,max(data_labels)+1):
        ts_df=timeseries_df[timeseries_df['0']==i]
        ts_df=ts_df.reset_index(drop=True)
        # cluster_list.append(extractU_Shapelets(shape)
        labels = ts_df['0']
        ts_df = ts_df.drop(ts_df.columns[0], axis=1)
        S=uShapeletClustering.extractU_Shapelets.extract_Shapelets(ts_df.copy(),shape,k)
        S = np.array(list(S))
        pred_label=uShapeletClustering.Kmeans.Kmeans(ts_df.copy(),S,k,labels)
        shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=ts_df.shape[0],
                                                               ts_sz=ts_df.shape[1],
                                                               n_classes=2,
                                                               l=0.5,
                                                               r=1)
        shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                            optimizer=Adagrad(lr=.1),
                            weight_regularizer=.01,
                            max_iter=50,
                            verbose=0)
        shp_clf.fit(ts_df, pred_label)
        shapelets=shp_clf.shapelets_;
        temp_list=[]
        for i in range(0, shapelets.shape[0]):
            temp= shapelets[i].T
            temp_list.append(temp[0])
        Shapelet_list.append(temp_list)
    return Shapelet_list
def learningShapeletClassifier(X_train, Y_train):

    shapelet_sizes = grabocka_params_to_shapelet_size_dict(
        n_ts=X_train.shape[0],
        ts_sz=X_train.shape[1],
        n_classes=len(set(Y_train)),
        l=0.1,
        r=2)
    shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                            optimizer=Adagrad(lr=.1),
                            weight_regularizer=.01,
                            max_iter=200,
                            verbose_level=0)

    shp_clf.fit(X_train, Y_train)
    return shp_clf
Esempio n. 4
0
def executeLearningShapelet(datasetName):
    # INPUT: Dataset name

    # Execution of a ShapeletTransformation algorithm over the dataset: datasetName

    X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset(
        datasetName)

    # RE-SIZE BY FUN X TRAIN
    dfTrain = computeLoadedDataset(X_train, y_train)

    y_train = dfTrain['target'].values
    y_train = y_train.astype(int)

    #get the number of classes
    le = LabelEncoder()
    distinct_classes = le.fit_transform(dfTrain['target'])
    distinct_classes = np.unique(distinct_classes, return_counts=False)
    num_classes = len(distinct_classes)

    print(distinct_classes)
    print(num_classes)

    del dfTrain['target']
    del dfTrain['TsIndex']

    # RE-SIZE BY FUN X TEST
    dfTest = computeLoadedDataset(X_test, y_test)

    y_test = dfTest['target'].values
    y_test = y_test.astype(int)

    del dfTest['target']
    del dfTest['TsIndex']

    # inizio preprocessing train
    start_timePreprocessingTrain = time.time()

    shapelet_sizes = grabocka_params_to_shapelet_size_dict(
        n_ts=len(dfTrain),
        ts_sz=len(dfTrain.iloc[0]),
        n_classes=num_classes,
        l=0.1,  # parametri fissi
        r=1)

    grabocka = LearningShapelets(n_shapelets_per_size=shapelet_sizes)
    grabocka.fit(dfTrain, y_train)
    X_train_distances = grabocka.transform(dfTrain)

    # fine preprocessing train
    PreprocessingTrainTime = time.time() - start_timePreprocessingTrain

    # inizio train
    start_timeTrain = time.time()

    dt = DecisionTreeClassifier(criterion='entropy',
                                max_depth=3,
                                min_samples_leaf=20)
    dt.fit(X_train_distances, y_train)

    # fine train
    TrainTime = time.time() - start_timeTrain

    # inizio preprocessing test
    start_timePreprocessingTest = time.time()

    X_test_distances = grabocka.transform(dfTest)

    # fine preprocessing test
    PreprocessingTestTime = time.time() - start_timePreprocessingTest

    # inizio test
    start_timeTest = time.time()

    y_predict = dt.predict(X_test_distances)

    # fine test
    TestTime = time.time() - start_timeTest

    print(accuracy_score(y_test, y_predict))

    row = [
        'LearningShapelets', datasetName,
        round(accuracy_score(y_test, y_predict), 2),
        round(PreprocessingTrainTime, 2),
        round(TrainTime, 2),
        round(PreprocessingTestTime, 2),
        round(TestTime, 2)
    ]

    WriteCsvShapeletAlgo('Shapelet_Algo_Experiments_29-12.csv', row)
Esempio n. 5
0
from tslearn.shapelets import ShapeletModel, \
    grabocka_params_to_shapelet_size_dict
from tslearn.utils import ts_size

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
X_test = TimeSeriesScalerMinMax().fit_transform(X_test)

n_ts, ts_sz = X_train.shape[:2]
n_classes = len(set(y_train))

# Set the number of shapelets per size as done in the original paper
shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=n_ts,
                                                       ts_sz=ts_sz,
                                                       n_classes=n_classes,
                                                       l=0.1,
                                                       r=2)

# Define the model using parameters provided by the authors (except that we use
# fewer iterations here)
shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                        optimizer=Adagrad(lr=.1),
                        weight_regularizer=.01,
                        max_iter=200,
                        verbose=0)
shp_clf.fit(X_train, y_train)
predicted_labels = shp_clf.predict(X_test)
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))

plt.figure()
Esempio n. 6
0
import matplotlib.pyplot as plt

from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.shapelets import ShapeletModel, grabocka_params_to_shapelet_size_dict
from tslearn.utils import ts_size

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
X_test = TimeSeriesScalerMinMax().fit_transform(X_test)

# Set the number of shapelets per size as done in the original paper
shapelet_sizes = grabocka_params_to_shapelet_size_dict(ts_sz=X_train.shape[1],
                                                       n_classes=len(
                                                           set(y_train)),
                                                       l=0.1,
                                                       r=2)

# Define the model using parameters provided by the authors (except that we use fewer iterations here)
shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                        optimizer=Adagrad(lr=.1),
                        weight_regularizer=.01,
                        max_iter=300,
                        verbose_level=0)
shp_clf.fit(X_train, y_train)
predicted_labels = shp_clf.predict(X_test)
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))

plt.figure()
for i, sz in enumerate(shapelet_sizes.keys()):
def classify_with_shapelets():
    from keras.optimizers import Adagrad
    from tslearn.datasets import CachedDatasets
    from tslearn.preprocessing import TimeSeriesScalerMinMax
    from tslearn.shapelets import ShapeletModel, grabocka_params_to_shapelet_size_dict

    feat_path = sys.argv[1]
    label_type = sys.argv[2]
    results_base_path = sys.argv[3]
    data_path = sys.argv[4]

    pitch_path = os.path.join(data_path, 'pitch.txt')
    energy_path = os.path.join(data_path, 'energy.txt')

    #Raw pitch and energy
    raw_pitch, raw_energy = read_pitch_energy(pitch_path, energy_path)

    # Tunable Parameters = shapelet length, threshold, shapelet redundancy value
    # sweep shapelet length
    pitch_shapelet = {}
    energy_shapelet = {}
    for shapelet_len in [10, 25, 50]:
        for spkr in raw_pitch:
            # Compute shapelets from raw frames (i.e. no segmented info like where phone/word is)
            pitch_shapelet[spkr] = compute_shapelet_frame(
                raw_pitch[spkr], shapelet_len, True)
            # energy_shapelet[spkr] = compute_shapelet_frame(raw_energy[spkr], shapelet_len)

            # pitch_shapelet[spkr] = np.array(raw_pitch[spkr])
            # print(len(raw_pitch[spkr]))
            # exit()

        acc = []
        for sim in range(10):
            y_true = []
            y_pred = []
            for spkr in tqdm(late_balanced.keys()):
                test_spkr = [spkr]
                train_spkrs = late_balanced.keys()
                train_spkrs.remove(test_spkr[0])

                X_train = np.array([
                    np.array(shapelet).reshape(shapelet_len, 1)
                    for x in train_spkrs for shapelet in pitch_shapelet[x]
                ])
                y_train = np.array([
                    late_balanced[x] for x in train_spkrs
                    for shapelet in pitch_shapelet[x]
                ])

                X_test = np.array([
                    np.array(shapelet).reshape(shapelet_len, 1)
                    for x in test_spkr for shapelet in pitch_shapelet[x]
                ])
                y_test = np.array([
                    late_balanced[x] for x in test_spkr
                    for shapelet in pitch_shapelet[x]
                ])

                # print('train data', X_train.shape)
                # #print('train data first', X_train[0])
                # print('train label', y_train.shape)
                # exit()

                shapelet_sizes = grabocka_params_to_shapelet_size_dict(
                    n_ts=X_train.shape[0],
                    ts_sz=X_train.shape[1],
                    n_classes=len(set(y_train)),
                    l=0.1,
                    r=2)

                shp_clf = ShapeletModel(n_shapelets_per_size=shapelet_sizes,
                                        optimizer=Adagrad(lr=.1),
                                        weight_regularizer=.01,
                                        max_iter=50,
                                        verbose_level=0)
                shp_clf.fit(X_train, y_train)

                predicted_locations = shp_clf.locate(X_test)

                print('predicted_locations.shape', predicted_locations.shape)
                # test_ts_id = 0
                # plt.figure()
                # plt.title("Example locations of shapelet matches (%d shapelets extracted)" % sum(shapelet_sizes.values()))
                # plt.plot(X_test[test_ts_id].ravel())
                # for idx_shp, shp in enumerate(shp_clf.shapelets_):
                #     t0 = predicted_locations[test_ts_id, idx_shp]
                #     plt.plot(np.arange(t0, t0 + len(shp)), shp, linewidth=2)

                # plt.tight_layout()
                # plt.savefig(test_ts_id+'_test.png', format='png')
                # exit()

                prediction = shp_clf.predict(X_test)
                prediction_prob = shp_clf.predict_proba(X_test)

                y_pred += prediction.tolist()
                y_true += y_test.tolist()

            ###After LOO
            # test_ts_id = 0
            # plt.figure()
            # plt.title("Example locations of shapelet matches (%d shapelets extracted)" % sum(shapelet_sizes.values()))
            # plt.plot(X_test[test_ts_id].ravel())
            # for idx_shp, shp in enumerate(shp_clf.shapelets_):
            #     t0 = predicted_locations[test_ts_id, idx_shp]
            #     plt.plot(np.arange(t0, t0 + len(shp)), shp, linewidth=2)

            # plt.tight_layout()
            # plt.savefig('test.png', format='png')

            local_acc = balanced_accuracy_score(y_true, y_pred)
            acc.append(local_acc)
        # print('acc', acc)
        # print('final acc', np.mean(acc))
        # print('final acc std', np.std(acc))

        if not os.path.exists(os.path.join(results_base_path, 'regression')):
            os.makedirs(os.path.join(results_base_path, 'regression'))
        results_file = os.path.join(results_base_path, 'regression',
                                    'shapelet_' + str(len_feats) + '.txt')

        with open(results_file, 'w') as w:
            # w.write("Confusion Matrix\n")
            # # w.write(confusion_matrix(y_true, y_pred).tolist())
            # w.write('{}\n\n'.format(confusion_matrix(y_true, y_pred)))

            w.write('regression: {} ({})\n'.format(np.mean(acc_list),
                                                   np.std(acc_list)))
            w.write('baseline: {} ({})'.format(np.mean(acc_baseline),
                                               np.std(acc_baseline)))
            w.write("\nFeature Importance\n")
            for i in tot_importance:
                tot_importance[i] = np.mean(tot_importance[i])
            for i in sorted(tot_importance.items(),
                            key=operator.itemgetter(1),
                            reverse=True):
                w.write("{} = {}\n".format(i[0], i[1]))
Esempio n. 8
0
    def fit(self, X, y):
        """Fit the model using X as training data and y as target values
        Parameters
        ----------
        X : {array-like}
            Training data. Shape [n_samples, n_features].
        y : {array-like, sparse matrix}
            Target values of shape = [n_samples] or [n_samples, n_outputs]
        """

        self.X = X
        self.y = y

        n_shapelets_per_size = self.shapelet_model_params.get(
            "n_shapelets_per_size", "heuristic")
        if n_shapelets_per_size == "heuristic":
            n_ts, ts_sz = X.shape[:2]
            n_classes = len(set(y))
            n_shapelets_per_size = grabocka_params_to_shapelet_size_dict(
                n_ts=n_ts,
                ts_sz=ts_sz,
                n_classes=n_classes,
                l=self.shapelet_model_params.get("l", 0.1),
                r=self.shapelet_model_params.get("r", 2))

        shp_clf = ShapeletModel(
            n_shapelets_per_size=n_shapelets_per_size,
            optimizer=self.shapelet_model_params.get("optimizer", "sgd"),
            weight_regularizer=self.shapelet_model_params.get(
                "weight_regularizer", .01),
            max_iter=self.shapelet_model_params.get("max_iter", 100),
            random_state=self.random_state,
            verbose=self.shapelet_model_params.get("verbose", 0))

        shp_clf.fit(X, y)
        X_transformed = shp_clf.transform(X)
        self.X_transformed = X_transformed

        if self.tau is not None:
            self.X_thresholded = 1 * (self.X_transformed < self.tau)
            clf = DecisionTreeClassifier()
            param_grid = self.decision_tree_grid_search_params
            grid = GridSearchCV(clf,
                                param_grid=param_grid,
                                scoring='accuracy',
                                n_jobs=-1,
                                verbose=0)
            grid.fit(self.X_thresholded, y)
        else:
            grids = []
            grids_scores = []
            for quantile in self.tau_quantiles:
                _X_thresholded = 1 * (self.X_transformed < (np.quantile(
                    self.X_transformed, quantile)))
                clf = DecisionTreeClassifier()
                param_grid = self.decision_tree_grid_search_params
                grid = GridSearchCV(clf,
                                    param_grid=param_grid,
                                    scoring='accuracy',
                                    n_jobs=-1,
                                    verbose=0)
                grid.fit(_X_thresholded, y)
                grids.append(grid)
                grids_scores.append(grid.best_score_)
            grid = grids[np.argmax(np.array(grids_scores))]
            best_quantile = self.tau_quantiles[np.argmax(
                np.array(grids_scores))]
            self.tau = np.quantile(self.X_transformed, best_quantile)
            self.X_thresholded = 1 * (self.X_transformed < self.tau)

        clf = DecisionTreeClassifier(**grid.best_params_)
        clf.fit(self.X_thresholded, y)
        if self.prune_duplicate_tree_leaves:
            prune_duplicate_leaves(
                clf)  # FIXME: does it influence the .tree properties?

        self.decision_tree = clf
        self.decision_tree_explorable = NewTree(clf)
        self.decision_tree_explorable.build_tree()
        self._shapelet_model = shp_clf
        self._build_tree_graph()

        return self