def __init__(self, train, name): loader = UCR_UEA_datasets() projector = None if train: data, targets, _, _ = loader.load_dataset(name) else: _, _, data, targets = loader.load_dataset(name) if name in {"PEMS-SF", "Libras"}: targets = targets.astype(float).astype(int) - 1 else: projector = {k: i for i, k in enumerate(np.unique(targets))} targets = np.array(list(map(lambda n: projector[n], targets))) data = torch.tensor(data.transpose(0, 2, 1).astype(np.float32)) targets = torch.tensor(targets.astype(int)) super(TSULUEADataset, self).__init__(data, targets) self.n_features = data.shape[1] self.n_targets = len(np.unique(targets)) self.n_targets = len(np.unique(self.target)) if projector is not None: self.projector = {k: v for v, k in projector.items()} else: self.projector = projector
def __init__(self, train=True, name: str = "RacketSports"): loader = UCR_UEA_datasets() projector = None if train: data, targets, _, _ = loader.load_dataset(name) else: _, _, data, targets = loader.load_dataset(name) if name in {"PEMS-SF", "Libras"}: targets = targets.astype(float).astype(int) - 1 elif name in {"UWaveGestureLibraryAll"}: targets = targets - 1 else: projector = {k: i for i, k in enumerate(np.unique(targets))} targets = np.array(list(map(lambda n: projector[n], targets))) self.data = data.transpose(0, 2, 1).astype(np.float32) self.targets = targets.astype(int) self.n_features = self.data.shape[1] self.n_targets = len(np.unique(self.targets)) if projector is not None: self.projector = {k: v for v, k in projector.items()} else: self.projector = projector
def fetch_dataset( name: str, ) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]: loader = UCR_UEA_datasets() x_train, y_train, x_test, y_test = loader.load_dataset(name) x_train = x_train.astype(np.float32) x_test = x_test.astype(np.float32) def projector(targets): if name in {"PEMS-SF", "Libras"}: targets = targets.astype(float).astype(int) - 1 elif name in {"UWaveGestureLibraryAll"}: targets = targets - 1 else: project = {k: i for i, k in enumerate(np.unique(targets))} targets = np.array(list(map(lambda n: project[n], targets))) return targets y_train = projector(y_train).astype(np.int64) y_test = projector(y_test).astype(np.int64) return (x_train, y_train), (x_test, y_test)
'ProximalPhalanxTW', 'UWaveGestureLibraryAll', 'Yoga', 'Mallat', 'PhalangesOutlinesCorrect', 'FiftyWords', 'Meat', 'ECG200', 'ECGFiveDays', 'MedicalImages', 'MiddlePhalanxTW', 'InlineSkate', 'Earthquakes', 'Lightning7', 'MiddlePhalanxOutlineAgeGroup', 'DiatomSizeReduction', 'ShapeletSim', 'SonyAIBORobotSurface1', 'Symbols', 'DistalPhalanxTW', 'WordSynonyms', 'FaceFour', 'Lightning2', 'NonInvasiveFatalECGThorax1', 'NonInvasiveFatalECGThorax2', 'CinCECGTorso', 'Phoneme', 'HandOutlines', 'StarLightCurves' ] if not os.path.isdir('results/genetic'): os.makedirs('results/genetic') for dataset in datasets: try: X_train, y_train, X_test, y_test = data_loader.load_dataset(dataset) print( sorted(data_loader.baseline_accuracy(dataset)[dataset].items(), key=lambda x: -x[1])) # Re-sample the test and train set with same sizes and strataified if X_test is None or len(X_test) == 0: continue nr_test_samples = len(X_test) X = np.vstack((X_train, X_test)) y = np.vstack((np.reshape(y_train, (-1, 1)), np.reshape(y_test, (-1, 1)))) y = pd.Series(np.reshape(y, (-1, ))) X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, test_size=nr_test_samples) test_idx = y_test.index train_idx = y_train.index
class UCRDataset(torch.utils.data.Dataset): """ A torch wrapper around tslearn UCR_Datasets datasets """ def __init__(self, name, partition="train", ratio=.75, randomstate=None, silent=True, augment_data_noise=0): r = np.random.RandomState(seed=randomstate) self.name = name self.dataset = UCR_UEA_datasets() self.augment_data_noise = augment_data_noise if name not in self.dataset.list_datasets(): raise ValueError("Dataset not found: Please choose from " + ", ".join(self.dataset.list_datasets())) X_trainvalid, y_trainvalid, X_test, y_test = self.dataset.load_dataset( name) self.nclasses = len(np.unique(np.append(y_test, y_trainvalid, axis=0))) self.ndims = 1 # UCR datasets have one featuredimension train_mask = r.rand(len(X_trainvalid)) < ratio valid_mask = np.logical_not(train_mask) if partition == "train": self.X = X_trainvalid[train_mask] self.y = y_trainvalid[train_mask] elif partition == "valid": self.X = X_trainvalid[valid_mask] self.y = y_trainvalid[valid_mask] elif partition == "trainvalid": self.X = X_trainvalid self.y = y_trainvalid elif partition == "test": self.X = X_test self.y = y_test else: raise ValueError( "Invalid partition! please provide either 'train','valid', 'trainvalid', or 'test'" ) # some binary datasets e.g. EGC200 or Lightning 2 have classes: -1, 1 -> clipping to 1:2 if self.y.min() < 0: if not silent: print("Found class ids < 0 in dataset. clipping to zero!") self.y = np.clip(self.y, 0, None) # some datasets (e.g. Coffee) have classes with zero index while all other start with 1... if self.y.min() > 0: if not silent: print( "Found class id starting from 1. reducing all class ids by one to start from zero" ) self.y -= 1 #self.classes = np.unique(self.y) self.sequencelength = X_trainvalid.shape[1] if not silent: msg = "Loaded dataset {}-{} T={}, classes={}: {}/{} samples" print( msg.format(name, partition, self.sequencelength, self.nclasses, len(self.X), len(X_trainvalid) + len(X_test))) def __len__(self): return self.X.shape[0] def __getitem__(self, idx): X = self.X[idx] X += np.random.rand(*X.shape) * self.augment_data_noise X = torch.from_numpy(X).type(torch.FloatTensor) y = torch.from_numpy(np.array([self.y[idx]])).type(torch.LongTensor) # add 1d hight and width dimensions and copy y for each time return X, y.expand(X.shape[0]), idx def __str__(self): str = """ UCR Dataset = {dataset} X.shape = {Xshape} y.shape = {yshape} nclasses = {nclasses} ndims = {ndims} """.format(dataset=self.name, Xshape=self.X.shape, yshape=self.y.shape, nclasses=self.nclasses, ndims=self.ndims) return str
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ @author: tung doan """ import numpy as np import matplotlib.pyplot as plt from tslearn.datasets import UCR_UEA_datasets from tmf import tmf """ load data """ data_loader = UCR_UEA_datasets() X_tr, y_tr, X_te, y_te = data_loader.load_dataset('Coffee') X = X_tr[:, ::2, 0] #reduce length a factor of 2 for fast demo y = y_tr # Ground truth indicator matrix grd = np.zeros((y.size, y.max() + 1)) grd[np.arange(y.size), y] = 1 """ run temporal matrix factorization """ k = y.max() + 1 l = X.shape[1] lambda_1 = lambda_2 = 1e-2 lambda_3 = 10 sigma = 0.05**2 eta = 1e-2 o_max = 15 i_max = 50 F_list, G_list = tmf(X, k, l, lambda_1, lambda_2, lambda_3, sigma, eta, o_max, i_max) """ plot """ plt.style.use(style='ggplot')
import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from sklearn.svm import SVC from tslearn.datasets import UCR_UEA_datasets from enchanter.addons import layers as L from enchanter.callbacks import EarlyStoppingForTSUS from enchanter.tasks import TimeSeriesUnsupervisedRunner from enchanter.engine.modules import fix_seed from enchanter.utils.datasets import TimeSeriesLabeledDataset fix_seed(800) downloader = UCR_UEA_datasets() x_train, y_train, x_test, y_test = downloader.load_dataset("Libras") x_train = torch.tensor(x_train.transpose(0, 2, 1), dtype=torch.float32) x_test = torch.tensor(x_test.transpose(0, 2, 1), dtype=torch.float32) y_train = y_train.astype(float).astype(int) - 1 y_train = torch.tensor(y_train, dtype=torch.long) y_test = y_test.astype(float).astype(int) - 1 y_test = torch.tensor(y_test, dtype=torch.long) class Encoder(nn.Module): def __init__(self, in_features, mid_features, out_features, representation_size): super(Encoder, self).__init__() self.conv = nn.Sequential( L.TemporalConvBlock(in_features,
data_loader = UCR_UEA_datasets() # Iterate over files in directory, process the predictions (_proba) and save # them in a dict. Afterwards, print a table with aggregated results & create # scatter plot (stat test if |values| > 1) datasets = set([x.split('_')[0] for x in os.listdir(DIR) if x != '.keep']) for dataset in datasets: glob_path = DIR + '{}_{}*lr_proba.csv' method1_files = glob.glob(glob_path.format(dataset, method1)) method2_files = glob.glob(glob_path.format(dataset, method2)) # First, we load the ground truth, needed to calculate accuracy _, _, _, ground_truth = data_loader.load_dataset(dataset) # Map classes to [0, ..., C-1] map_dict = {} for j, c in enumerate(set(ground_truth)): map_dict[c] = j ground_truth = pd.Series(ground_truth).map(map_dict).values # Iterate over files of method 1 and calculate accuracy for file in method1_files: preds = np.argmax(pd.read_csv(file, index_col=[0]).values, axis=1) accuracies1[dataset].append(accuracy_score(ground_truth, preds)) # Iterate over files of method 2 and calculate accuracy for file in method2_files: preds = np.argmax(pd.read_csv(file, index_col=[0]).values, axis=1)