コード例 #1
0
    def __init__(self, train, name):
        loader = UCR_UEA_datasets()
        projector = None
        if train:
            data, targets, _, _ = loader.load_dataset(name)
        else:
            _, _, data, targets = loader.load_dataset(name)

        if name in {"PEMS-SF", "Libras"}:
            targets = targets.astype(float).astype(int) - 1
        else:
            projector = {k: i for i, k in enumerate(np.unique(targets))}
            targets = np.array(list(map(lambda n: projector[n], targets)))

        data = torch.tensor(data.transpose(0, 2, 1).astype(np.float32))
        targets = torch.tensor(targets.astype(int))
        super(TSULUEADataset, self).__init__(data, targets)

        self.n_features = data.shape[1]
        self.n_targets = len(np.unique(targets))

        self.n_targets = len(np.unique(self.target))
        if projector is not None:
            self.projector = {k: v for v, k in projector.items()}
        else:
            self.projector = projector
コード例 #2
0
    def __init__(self, train=True, name: str = "RacketSports"):
        loader = UCR_UEA_datasets()
        projector = None
        if train:
            data, targets, _, _ = loader.load_dataset(name)
        else:
            _, _, data, targets = loader.load_dataset(name)

        if name in {"PEMS-SF", "Libras"}:
            targets = targets.astype(float).astype(int) - 1
        elif name in {"UWaveGestureLibraryAll"}:
            targets = targets - 1
        else:
            projector = {k: i for i, k in enumerate(np.unique(targets))}
            targets = np.array(list(map(lambda n: projector[n], targets)))

        self.data = data.transpose(0, 2, 1).astype(np.float32)
        self.targets = targets.astype(int)
        self.n_features = self.data.shape[1]
        self.n_targets = len(np.unique(self.targets))
        if projector is not None:
            self.projector = {k: v for v, k in projector.items()}
        else:
            self.projector = projector
コード例 #3
0
def fetch_dataset(
    name: str,
) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]:
    loader = UCR_UEA_datasets()
    x_train, y_train, x_test, y_test = loader.load_dataset(name)
    x_train = x_train.astype(np.float32)
    x_test = x_test.astype(np.float32)

    def projector(targets):
        if name in {"PEMS-SF", "Libras"}:
            targets = targets.astype(float).astype(int) - 1
        elif name in {"UWaveGestureLibraryAll"}:
            targets = targets - 1
        else:
            project = {k: i for i, k in enumerate(np.unique(targets))}
            targets = np.array(list(map(lambda n: project[n], targets)))

        return targets

    y_train = projector(y_train).astype(np.int64)
    y_test = projector(y_test).astype(np.int64)

    return (x_train, y_train), (x_test, y_test)
コード例 #4
0
    'ProximalPhalanxTW', 'UWaveGestureLibraryAll', 'Yoga', 'Mallat',
    'PhalangesOutlinesCorrect', 'FiftyWords', 'Meat', 'ECG200', 'ECGFiveDays',
    'MedicalImages', 'MiddlePhalanxTW', 'InlineSkate', 'Earthquakes',
    'Lightning7', 'MiddlePhalanxOutlineAgeGroup', 'DiatomSizeReduction',
    'ShapeletSim', 'SonyAIBORobotSurface1', 'Symbols', 'DistalPhalanxTW',
    'WordSynonyms', 'FaceFour', 'Lightning2', 'NonInvasiveFatalECGThorax1',
    'NonInvasiveFatalECGThorax2', 'CinCECGTorso', 'Phoneme', 'HandOutlines',
    'StarLightCurves'
]

if not os.path.isdir('results/genetic'):
    os.makedirs('results/genetic')

for dataset in datasets:
    try:
        X_train, y_train, X_test, y_test = data_loader.load_dataset(dataset)
        print(
            sorted(data_loader.baseline_accuracy(dataset)[dataset].items(),
                   key=lambda x: -x[1]))

        # Re-sample the test and train set with same sizes and strataified
        if X_test is None or len(X_test) == 0: continue
        nr_test_samples = len(X_test)
        X = np.vstack((X_train, X_test))
        y = np.vstack((np.reshape(y_train,
                                  (-1, 1)), np.reshape(y_test, (-1, 1))))
        y = pd.Series(np.reshape(y, (-1, )))
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, stratify=y, test_size=nr_test_samples)
        test_idx = y_test.index
        train_idx = y_train.index
コード例 #5
0
class UCRDataset(torch.utils.data.Dataset):
    """
    A torch wrapper around tslearn UCR_Datasets datasets
    """
    def __init__(self,
                 name,
                 partition="train",
                 ratio=.75,
                 randomstate=None,
                 silent=True,
                 augment_data_noise=0):
        r = np.random.RandomState(seed=randomstate)

        self.name = name
        self.dataset = UCR_UEA_datasets()

        self.augment_data_noise = augment_data_noise

        if name not in self.dataset.list_datasets():
            raise ValueError("Dataset not found: Please choose from " +
                             ", ".join(self.dataset.list_datasets()))

        X_trainvalid, y_trainvalid, X_test, y_test = self.dataset.load_dataset(
            name)

        self.nclasses = len(np.unique(np.append(y_test, y_trainvalid, axis=0)))
        self.ndims = 1  # UCR datasets have one featuredimension

        train_mask = r.rand(len(X_trainvalid)) < ratio
        valid_mask = np.logical_not(train_mask)

        if partition == "train":
            self.X = X_trainvalid[train_mask]
            self.y = y_trainvalid[train_mask]
        elif partition == "valid":
            self.X = X_trainvalid[valid_mask]
            self.y = y_trainvalid[valid_mask]
        elif partition == "trainvalid":
            self.X = X_trainvalid
            self.y = y_trainvalid
        elif partition == "test":
            self.X = X_test
            self.y = y_test
        else:
            raise ValueError(
                "Invalid partition! please provide either 'train','valid', 'trainvalid', or 'test'"
            )

        # some binary datasets e.g. EGC200 or Lightning 2 have classes: -1, 1 -> clipping to 1:2
        if self.y.min() < 0:
            if not silent:
                print("Found class ids < 0 in dataset. clipping to zero!")
            self.y = np.clip(self.y, 0, None)

        # some datasets (e.g. Coffee) have classes with zero index while all other start with 1...
        if self.y.min() > 0:
            if not silent:
                print(
                    "Found class id starting from 1. reducing all class ids by one to start from zero"
                )
            self.y -= 1

        #self.classes = np.unique(self.y)
        self.sequencelength = X_trainvalid.shape[1]

        if not silent:
            msg = "Loaded dataset {}-{} T={}, classes={}: {}/{} samples"
            print(
                msg.format(name, partition, self.sequencelength, self.nclasses,
                           len(self.X),
                           len(X_trainvalid) + len(X_test)))

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):

        X = self.X[idx]

        X += np.random.rand(*X.shape) * self.augment_data_noise

        X = torch.from_numpy(X).type(torch.FloatTensor)
        y = torch.from_numpy(np.array([self.y[idx]])).type(torch.LongTensor)

        # add 1d hight and width dimensions and copy y for each time
        return X, y.expand(X.shape[0]), idx

    def __str__(self):

        str = """
UCR Dataset = {dataset}
X.shape = {Xshape}
y.shape = {yshape}
nclasses = {nclasses}
ndims = {ndims}
        """.format(dataset=self.name,
                   Xshape=self.X.shape,
                   yshape=self.y.shape,
                   nclasses=self.nclasses,
                   ndims=self.ndims)

        return str
コード例 #6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: tung doan
"""
import numpy as np
import matplotlib.pyplot as plt

from tslearn.datasets import UCR_UEA_datasets
from tmf import tmf
""" load data """
data_loader = UCR_UEA_datasets()
X_tr, y_tr, X_te, y_te = data_loader.load_dataset('Coffee')
X = X_tr[:, ::2, 0]  #reduce length a factor of 2 for fast demo
y = y_tr
# Ground truth indicator matrix
grd = np.zeros((y.size, y.max() + 1))
grd[np.arange(y.size), y] = 1
""" run temporal matrix factorization """
k = y.max() + 1
l = X.shape[1]
lambda_1 = lambda_2 = 1e-2
lambda_3 = 10
sigma = 0.05**2
eta = 1e-2
o_max = 15
i_max = 50
F_list, G_list = tmf(X, k, l, lambda_1, lambda_2, lambda_3, sigma, eta, o_max,
                     i_max)
""" plot """
plt.style.use(style='ggplot')
コード例 #7
0
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.svm import SVC
from tslearn.datasets import UCR_UEA_datasets
from enchanter.addons import layers as L
from enchanter.callbacks import EarlyStoppingForTSUS
from enchanter.tasks import TimeSeriesUnsupervisedRunner
from enchanter.engine.modules import fix_seed
from enchanter.utils.datasets import TimeSeriesLabeledDataset

fix_seed(800)

downloader = UCR_UEA_datasets()
x_train, y_train, x_test, y_test = downloader.load_dataset("Libras")
x_train = torch.tensor(x_train.transpose(0, 2, 1), dtype=torch.float32)
x_test = torch.tensor(x_test.transpose(0, 2, 1), dtype=torch.float32)

y_train = y_train.astype(float).astype(int) - 1
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = y_test.astype(float).astype(int) - 1
y_test = torch.tensor(y_test, dtype=torch.long)


class Encoder(nn.Module):
    def __init__(self, in_features, mid_features, out_features,
                 representation_size):
        super(Encoder, self).__init__()
        self.conv = nn.Sequential(
            L.TemporalConvBlock(in_features,
コード例 #8
0
data_loader = UCR_UEA_datasets()

# Iterate over files in directory, process the predictions (_proba) and save 
# them in a dict. Afterwards, print a table with aggregated results & create 
# scatter plot (stat test if |values| > 1)
datasets = set([x.split('_')[0] for x in os.listdir(DIR) 
                if x != '.keep'])

for dataset in datasets:
    glob_path = DIR + '{}_{}*lr_proba.csv'
    method1_files = glob.glob(glob_path.format(dataset, method1))
    method2_files = glob.glob(glob_path.format(dataset, method2))

    # First, we load the ground truth, needed to calculate accuracy
    _, _, _, ground_truth = data_loader.load_dataset(dataset)

    # Map classes to [0, ..., C-1]
    map_dict = {}
    for j, c in enumerate(set(ground_truth)):
        map_dict[c] = j
    ground_truth = pd.Series(ground_truth).map(map_dict).values

    # Iterate over files of method 1 and calculate accuracy
    for file in method1_files:
        preds = np.argmax(pd.read_csv(file, index_col=[0]).values, axis=1)
        accuracies1[dataset].append(accuracy_score(ground_truth, preds))

    # Iterate over files of method 2 and calculate accuracy
    for file in method2_files:
        preds = np.argmax(pd.read_csv(file, index_col=[0]).values, axis=1)