Python load_from_tsfile_to_dataframeの例、sktime.utils.load_data.load_from_tsfile_to_dataframe Pythonの例

コード例 #1

0

ファイルを表示

def sktime_to_numpy(train_file, test_file):
    X_train_ts, y_train = load_from_tsfile_to_dataframe(train_file)
    X_test_ts, y_test = load_from_tsfile_to_dataframe(test_file)
    train_max_len = max([len(X_train_ts.iloc[i]['dim_0']) for i in range(len(X_train_ts))])
    test_max_len = max([len(X_test_ts.iloc[i]['dim_0']) for i in range(len(X_test_ts))])
    max_len = max(train_max_len, test_max_len)

    X_train = np.zeros((len(X_train_ts), len(X_train_ts.columns), max_len))
    X_test = np.zeros((len(X_test_ts), len(X_test_ts.columns), max_len))
    for i in range(len(X_train_ts)):
        for col_idx, col in enumerate(X_train_ts.columns):
            X_train[i, col_idx] = np.pad(X_train_ts.iloc[i][col].values, pad_width=(0,max_len-len(X_train_ts.iloc[i][col].values)), mode='constant')
    for i in range(len(X_test_ts)):
        for col_idx, col in enumerate(X_test_ts.columns):
            X_test[i, col_idx] = np.pad(X_test_ts.iloc[i][col].values, pad_width=(0,max_len-len(X_test_ts.iloc[i][col].values)), mode='constant')
    return np.transpose(X_train, (0, 2, 1)), pd.Categorical(pd.Series(y_train)).codes,\
           np.transpose(X_test, (0, 2, 1)), pd.Categorical(pd.Series(y_test)).codes

コード例 #2

0

ファイルを表示

ファイル: datasets.py プロジェクト: itouchz/TRepNet

def load_data(name, benchmark=True, univariate=True, sep=',', test_size=0.2):
    if benchmark and univariate:
        path = u_path
    elif benchmark and not univariate:
        path = m_path
    else:
        path = './datasets/realworld/'

    if benchmark:
        train_x, train_y = load_from_tsfile_to_dataframe(path + name + '/' +
                                                         name + '_TRAIN.ts')
        test_x, test_y = load_from_tsfile_to_dataframe(path + name + '/' +
                                                       name + '_TEST.ts')
        return train_x, train_y, test_x, test_y
    else:
        data = pd.read_csv(path + name + '/data.csv', sep=sep)
        return data

コード例 #3

0

ファイルを表示

ファイル: em_workflow.py プロジェクト: jiedali/oversampling_algorithm_multivariate_time_series

	def pre_process(self, test_data=False):
		#
		if test_data == True:
			train_x, train_y = load_from_tsfile_to_dataframe(os.path.join(self.data_dir, self.file_name_test))
		elif test_data == False:
			train_x, train_y = load_from_tsfile_to_dataframe(os.path.join(self.data_dir, self.file_name_train))
		# train_x is  samples * k features
		# expand train_x to be samples * (kn), where n is the number of temporal dimension
		concatenator = ColumnConcatenator()
		conc_fit = concatenator.fit(train_x)
		train_x_trans = conc_fit.transform(train_x)
		#
		for i in range(0, train_x_trans.shape[0]):
			row = train_x_trans.iloc[i, 0]
			row_df = row.to_frame()
			if i == 0:
				temp = row_df
			else:
				temp = pd.concat([temp, row_df], axis=1)
		# temp_t is samples * nk df
		train_x_expanded = temp.transpose()
		# convert y to binary label
		train_y_binary = self.convert_y_to_binary_label(train_y)

		# usually the original data has a balanced ratio, in that case, we will downsample the minority
		if test_data == False and self.down_sample_minority == True:
			train_x_to_be_downsample = train_x_expanded[train_y_binary == 1]
			# calculate the number of samples to keep for minority (we keep 1/3)
			sample_size = round(train_x_to_be_downsample.shape[0] / self.minority_div)
			# downsample minority
			train_x_downsampled = train_x_to_be_downsample.iloc[0:sample_size]
			# concat x_minoriy and x_majority
			train_x_maj = train_x_expanded[train_y_binary == 0]
			train_x_all = pd.concat([train_x_downsampled, train_x_maj], axis=0)
			# get y_labels after downsample
			train_y_binary_minority = np.ones(sample_size)
			train_y_binary_majority = np.zeros(train_y_binary[train_y_binary == 0].shape[0])
			# concat y_ones and y_zeros
			train_y_binary = np.concatenate((train_y_binary_minority, train_y_binary_majority), axis=0)
			# set train_x_all = train_x_expanded
			train_x_expanded = train_x_all

		return train_x_expanded, train_y_binary

コード例 #4

0

ファイルを表示

ファイル: sktime_classifiers.py プロジェクト: paudan/time-series-classification

    def evaluate_classifiers(dst):
        print("[%s] Processing dataset %s" %
              (datetime.now().strftime("%F %T"), dst))

        train_x, train_y = load_from_tsfile_to_dataframe(
            os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts"))
        test_x, test_y = load_from_tsfile_to_dataframe(
            os.path.join(UCR_DATASET_PATH, dst, dst + "_TEST.ts"))

        def evaluate_classifier(clf):
            clf.fit(train_x, train_y)
            pred = clf.predict(test_x)
            return accuracy_score(test_y, pred), f1_score(test_y,
                                                          pred,
                                                          average='macro')

        return list(
            itertools.chain(
                *[evaluate_classifier(clf) for _, clf in classifiers])), dst

コード例 #5

0

ファイルを表示

ファイル: 9featuresPAA_main.py プロジェクト: mlgig/mtsc_benchmark

def agent(path="./", dataset="" ,ratio =False,seg = 0.75, folder="temp"):

    current_process().name = dataset

    start1 = time.time()
    train_x, train_y = load_from_tsfile_to_dataframe(f"{path}/{dataset}/{dataset}_TRAIN.ts")
    test_x, test_y = load_from_tsfile_to_dataframe(f"{path}/{dataset}/{dataset}_TEST.ts")

    print(f"{dataset}: Train Shape {train_x.shape}")
    print(f"{dataset}: Test Shape {test_x.shape}")

    scaler = StandardScaler()

    transform_time1 = time.time()

    mod_train = PAAStat(paa_=ratio, seg_=seg).transform(train_x.values)
    mod_train = scaler.fit(mod_train).transform(mod_train)

    mod_test = PAAStat(paa_=ratio, seg_=seg).transform(test_x.values)
    mod_test = scaler.transform(mod_test)

    transform_time2 = time.time()
    model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10),
                                    normalize=True)
    train_time1 = time.time()
    model.fit(mod_train, train_y)
    preds = model.predict(mod_test)
    train_time2 = time.time()

    acc1 = accuracy_score(preds, test_y) * 100

    end1 = time.time()
    print(f"Dataset: {dataset}, AccuracyRidge: {acc1}, Time taken: {(end1 - start1)/60}, "
          f"Transfrom_time: {(transform_time2-transform_time1)/60}, train_time: {(train_time2-train_time1)/60}")

    results = pd.DataFrame({'Dataset': dataset, 'AccuracyRidge': [acc1], 'Time (min)': [(end1 - start1)/60],
                            'Transfrom_time (min)': [(transform_time2-transform_time1)/60], 'train_time (min)': [(train_time2-train_time1)/60]})

    temp_path = './'+folder
    if not os.path.exists(temp_path):
        os.mkdir(temp_path)
    results.to_csv(os.path.join(temp_path + f'/{dataset}.csv'), index=False)

コード例 #6

0

ファイルを表示

ファイル: data_utils.py プロジェクト: erap129/EEGNAS

def sktime_to_numpy(file):
    X_ts, y = load_from_tsfile_to_dataframe(file)
    max_len = global_vars.get('input_height')
    X = np.zeros((len(X_ts), len(X_ts.columns), max_len))
    for i in range(len(X_ts)):
        for col_idx, col in enumerate(X_ts.columns):
            X[i, col_idx] = np.pad(X_ts.iloc[i][col].values,
                                   pad_width=(0, max_len -
                                              len(X_ts.iloc[i][col].values)),
                                   mode='constant')
    return X, pd.Categorical(pd.Series(y)).codes

コード例 #7

0

ファイルを表示

    def evaluate_classifiers(dst):
        print("[%s] Processing dataset %s" % (datetime.now().strftime("%F %T"), dst))
        if dst in excluded:
            return None
        train_x, train_y = load_from_tsfile_to_dataframe(os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts"))
        test_x, test_y = load_from_tsfile_to_dataframe(os.path.join(UCR_DATASET_PATH, dst, dst + "_TEST.ts"))

        try:
            transform = ContractedShapeletTransform(time_limit_in_mins=5)
            train_data = transform.fit_transform(train_x, train_y)
            test_data = transform.transform(test_x)
        except:
            return None

        def evaluate_classifier(clf):
            try:
                clf.fit(train_data, train_y)
                pred = clf.predict(test_data)
                return accuracy_score(test_y, pred), f1_score(test_y, pred, average='macro')
            except:
                return float('nan'), float('nan')

        return list(itertools.chain(*[evaluate_classifier(clf) for clf in classifiers])), dst

コード例 #8

0

ファイルを表示

ファイル: feature_classifiers.py プロジェクト: paudan/time-series-classification

    def evaluate_classifiers(dst):
        print("[%s] Processing dataset %s" %
              (datetime.now().strftime("%F %T"), dst))
        if dst in excluded:
            return None
        train_x, train_y = load_from_tsfile_to_dataframe(
            os.path.join(UCR_DATASET_PATH, dst, dst + "_TRAIN.ts"))
        test_x, test_y = load_from_tsfile_to_dataframe(
            os.path.join(UCR_DATASET_PATH, dst, dst + "_TEST.ts"))
        if len(set(train_y)) == 1:
            return None
        data_train = data_test = None
        try:
            data_train = func(train_x)
            data_test = func(test_x)
        except Exception as e:
            print("Exception while evaluating classifier:", e.__str__())
            return None

        def evaluate_classifier(clf):
            try:
                clf.fit(data_train, train_y)
                pred = clf.predict(data_test)
                return accuracy_score(test_y, pred), f1_score(test_y,
                                                              pred,
                                                              average='macro')
            except Exception as e:
                print("Exception while evaluating classifier:", e.__str__())
                return float('nan'), float('nan')

        results = list(
            itertools.chain(*[evaluate_classifier(clf)
                              for clf in classifiers]))
        del train_x, train_y, test_x, test_y, data_train, data_test
        gc.collect()
        return results, dst

コード例 #9

0

ファイルを表示

ファイル: uea_ucr_dataset.py プロジェクト: ugurgudelek/berries

    def __build_class_mapping(name, kind):
        """Build a class mapping mapping from class labels to ids of int type.

        Args:
            name: Dataset name

        Return:
            dict with dict[class_label] = class_id

        """
        train_path = _build_UEA_UCR_data_path(name, kind, True)
        # test_path = build_UEA_UCR_data_path(name, False)
        _, train_y = load_from_tsfile_to_dataframe(train_path)
        # _, test_y = load_from_tsfile_to_dataframe(test_path)
        # all_labels = np.concatenate([train_y, test_y], axis=0)
        unique_labels = np.unique(train_y)
        return dict(zip(unique_labels, range(len(unique_labels))))

コード例 #10

0

ファイルを表示

ファイル: base.py プロジェクト: aabanda/sktime-kernels

def load_gunpoint_dataframe(split='TRAIN', return_X_y=False):

    """Loads the GunPoint time series classification problem and returns X and y

    Dimensionality:     univariate
    Series length:      150
    Train cases:        50
    Test cases:         150
    Number of classes:  2

    This dataset involves one female actor and one male actor making a motion with their
    hand. The two classes are: Gun-Draw and Point: For Gun-Draw the actors have their
    hands by their sides. They draw a replicate gun from a hip-mounted holster, point it
    at a target for approximately one second, then return the gun to the holster, and
    their hands to their sides. For Point the actors have their gun by their sides.
    They point with their index fingers to a target for approximately one second, and
    then return their hands to their sides. For both classes, we tracked the centroid
    of the actor's right hands in both X- and Y-axes, which appear to be highly
    correlated. The data in the archive is just the X-axis.

    Dataset details: http://timeseriesclassification.com/description.php?Dataset=GunPoint

    Parameters
    ----------
    split: string (either "TRAIN" or "TEST", default = 'TRAIN')
        Whether to load the default train or test partition of the problem

    Returns
    -------
    X: pandas DataFrame with m rows and c columns
        The time series data for the problem with m cases and c dimensions
    y: numpy array
        The class labels for each case in X
    """
    module_path = path.dirname(__file__)
    dname = 'data'
    pname = 'GunPoint'
    fname = pname+'_'+split+'.ts'
    abspath = path.join(module_path, dname, pname, fname)

    X, y = load_from_tsfile_to_dataframe(abspath)
    if return_X_y:
        X['class_val'] = pd.Series(y)
        return X
    else:
        return X, y

コード例 #11

0

ファイルを表示

ファイル: SciKitPredictor.py プロジェクト: jesmaelnezhad/pool-analysis

    def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6):
        logger("MODEL-FIT").debug(
            "max_horizon: {} / avg windows: {} / assmnt windows: {} / until: {} / total_data_size: {}".format(
                max_horizon,
                str(luck_average_windows),
                str(assessment_windows),
                until,
                len(self.data_points)))
        if until is not None and (until < 0 or until >= len(self.data_points)):
            logger("MODEL-FIT").error("Parameter until is too large for the given data points: {}".format(until))
            return
        self.horizon = max_horizon
        for wi, w in enumerate(assessment_windows):
            if w > self.horizon:
                break
            # prepare data frame for sktime package

            temporary_data_fit_file = self.prepare_ts_file(0, len(self.data_points) if until is None else until,
                                                           self.case_observation_size, wi, w)

            # parse data frames from the temporary fit data file
            X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100")
            # which label is the first one?
            true_index = 0
            if y[0] == "false":
                true_index = 1
            new_class_weights = self.create_class_weight_dict(true_index=true_index)
            estimators = []
            for i in range(0, len(luck_average_windows)):
                estimators.append(("TSF{}".format(i), TimeSeriesForestClassifier(
                    n_estimators=int(self.no_estimators),
                    n_jobs=16,
                    max_depth=self.max_depth,
                    class_weight=new_class_weights,
                    criterion=self.criterion,
                    min_samples_split=self.min_samples_split,
                    min_samples_leaf=self.min_samples_leaf,
                    oob_score=self.oob_score,
                    bootstrap=self.bootstrap),
                                   [i]))
            c = ColumnEnsembleClassifier(estimators=estimators)
            c.fit(X, y)
            # print(str(c.classes_))
            self.classifiers.append(c)

コード例 #12

0

ファイルを表示

ファイル: base.py プロジェクト: aabanda/sktime-kernels

def load_arrow_head_dataframe(split='TRAIN', return_X_y=False):
    """Loads the ArrowHead time series classification problem and returns X and y

    Dimensionality:     univariate
    Series length:      251
    Train cases:        36
    Test cases:         175
    Number of classes:  3

    The arrowhead data consists of outlines of the images of arrowheads. The shapes of the
    projectile points are converted into a time series using the angle-based method. The
    classification of projectile points is an important topic in anthropology. The classes
    are based on shape distinctions such as the presence and location of a notch in the
    arrow. The problem in the repository is a length normalised version of that used in
    Ye09shapelets. The three classes are called "Avonlea", "Clovis" and "Mix"."

    Dataset details: http://timeseriesclassification.com/description.php?Dataset=ArrowHead

    Parameters
    ----------
    split: string (either "TRAIN" or "TEST", default = 'TRAIN')
        Whether to load the default train or test partition of the problem

    Returns
    -------
    X: pandas DataFrame with m rows and c columns
        The time series data for the problem with m cases and c dimensions
    y: numpy array
        The class labels for each case in X
    """

    module_path = path.dirname(__file__)
    dname = 'data'
    pname = 'ArrowHead'
    fname = pname+'_'+split+'.ts'
    abspath = path.join(module_path, dname, pname, fname)

    X, y = load_from_tsfile_to_dataframe(abspath)
    if return_X_y:
        X['class_val'] = pd.Series(y)
        return X
    else:
        return X, y

コード例 #13

0

ファイルを表示

ファイル: uea_ucr_dataset.py プロジェクト: ugurgudelek/berries

    def __init__(self, name, kind, train=True):
        """Datasets from the UCR time series archiv.

        Args:
            name: Name of the dataset.
            kind: Kind of dataset. univariate or multivariate
            train: Return train split when True, test split when False.

        """
        data_path = _build_UEA_UCR_data_path(name, kind, train)

        self.data_x, self.data_y = load_from_tsfile_to_dataframe(data_path)
        # We do not support time series with time stamps yet. It seems as if
        # timestamps are stored in the index of the individual series. Thus
        # this check would fail if we don't have a regularly sampled time
        # series without time stamps.
        assert isinstance(self.data_x.iloc[0, 0].index, pd.RangeIndex)

        self.class_mapping = self.__build_class_mapping(name, kind)
        self._n_classes = len(self.class_mapping.keys())

コード例 #14

0

ファイルを表示

ファイル: base.py プロジェクト: aabanda/sktime-kernels

def load_italy_power_demand_dataframe(split='TRAIN', return_X_y=False):
    """Loads the ItalyPowerDemand time series classification problem and returns X and y

    Dimensionality:     univariate
    Series length:      24
    Train cases:        67
    Test cases:         1029
    Number of classes:  2

    The data was derived from twelve monthly electrical power demand time series from Italy and
    first used in the paper "Intelligent Icons: Integrating Lite-Weight Data Mining and
    Visualization into GUI Operating Systems". The classification task is to distinguish days
    from Oct to March (inclusive) from April to September.

    Dataset details: http://timeseriesclassification.com/description.php?Dataset=ItalyPowerDemand

    Parameters
    ----------
    split: string (either "TRAIN" or "TEST", default = 'TRAIN')
        Whether to load the default train or test partition of the problem

    Returns
    -------
    X: pandas DataFrame with m rows and c columns
        The time series data for the problem with m cases and c dimensions
    y: numpy array
        The class labels for each case in X
    """

    module_path = path.dirname(__file__)
    dname = 'data'
    pname = 'ItalyPowerDemand'
    fname = pname+'_'+split+'.ts'
    abspath = path.join(module_path, dname, pname, fname)

    X, y = load_from_tsfile_to_dataframe(abspath)
    if return_X_y:
        X['class_val'] = pd.Series(y)
        return X
    else:
        return X, y

コード例 #15

0

ファイルを表示

ファイル: loader.py プロジェクト: adam-urbanczyk/FDApy

def read_ts(filepath, **kwargs):
    """Read a ts file into Functional Data.

    Build a DenseFunctionalData or IrregularFunctionalData object upon a ts
    file passed as parameter.

    Notes
    -----
    It is assumed that the data are unidimensional. And so, it will not be
    checked.

    Parameters
    ----------
    filepath: str
        Any valid string path is acceptable.
    **kwargs:
        Keywords arguments to passed to the load_from_tsfile_to_dataframe
        function.

    Returns
    -------
    obj: DenseFunctionalData or IrregularFunctionalData
        The loaded csv file.
    labels: np.ndarray
        Labels

    """
    data, labels = load_from_tsfile_to_dataframe(filepath, **kwargs)

    len_argavals = data.applymap(len)['dim_0'].unique()

    if len(len_argavals) == 1:
        obj = read_ts_dense(data)
    else:
        obj = read_ts_irregular(data)
    return obj, labels

コード例 #16

0

ファイルを表示

    KNeighborsTimeSeriesClassifier(1, 'uniform', 'brute', 'dtw', None),
    "DTW-1NN"
],
               [
                   KNeighborsTimeSeriesClassifier(4, 'uniform', 'brute', 'dtw',
                                                  None), "DTW-4NN"
               ]]

# --------------- MAIN PROGRAM ---------------------------------

# Load data
# [ ((train_data, train_class), (test_data, test_class), dataset name), ...]
data = []
for train_path, test_path, name in datasets_path:
    data += [
        (load_from_tsfile_to_dataframe(os.path.join(DATA_PATH, train_path)),
         load_from_tsfile_to_dataframe(os.path.join(DATA_PATH,
                                                    test_path)), name)
    ]

for classifier, classifier_name in classifiers:
    print("|---" + classifier_name)
    for ((train_data, train_class), (test_data, test_class), name) in data:
        # Training
        classifier.fit(train_data, train_class)

        # Predicting class
        prediction = classifier.predict(test_data)

        # Computing accuracy
        accuracy = accuracy_score(test_class, prediction)

コード例 #17

0

ファイルを表示

ファイル: external.py プロジェクト: tomassa/timeseriesAI

def get_UCR_data(dsid,
                 path='.',
                 parent_dir='data/UCR',
                 verbose=False,
                 on_disk=True,
                 return_split=True):
    if verbose: print('Dataset:', dsid)
    assert dsid in get_UCR_univariate_list() + get_UCR_multivariate_list(
    ), f'{dsid} is not a UCR dataset'
    full_parent_dir = Path(path) / parent_dir
    full_tgt_dir = full_parent_dir / dsid
    if not all([
            os.path.isfile(f'{full_parent_dir}/{dsid}/{fn}.npy')
            for fn in ['X_train', 'X_valid', 'y_train', 'y_valid', 'X', 'y']
    ]):
        if dsid in ['InsectWingbeat', 'DuckDuckGeese']:
            if verbose:
                print(
                    'There are problems with the original zip file and data cannot correctly downloaded'
                )
            return None, None, None, None
        src_website = 'http://www.timeseriesclassification.com/Downloads'
        if verbose:
            print(f'Downloading and decompressing data to {full_tgt_dir}...')
        decompress_from_url(f'{src_website}/{dsid}.zip',
                            target_dir=full_tgt_dir,
                            verbose=verbose)
        if verbose: print('...data downloaded and decompressed')
        X_train_df, y_train = load_from_tsfile_to_dataframe(full_tgt_dir /
                                                            f'{dsid}_TRAIN.ts')
        X_valid_df, y_valid = load_from_tsfile_to_dataframe(full_tgt_dir /
                                                            f'{dsid}_TEST.ts')
        X_train_ = []
        X_valid_ = []
        for i in range(X_train_df.shape[-1]):
            X_train_.append(
                stack_pad(X_train_df[f'dim_{i}'])
            )  # stack arrays even if they have different lengths
            X_valid_.append(
                stack_pad(X_valid_df[f'dim_{i}'])
            )  # stack arrays even if they have different lengths
        X_train = np.transpose(np.stack(X_train_, axis=-1),
                               (0, 2, 1)).astype(np.float32)
        X_valid = np.transpose(np.stack(X_valid_, axis=-1),
                               (0, 2, 1)).astype(np.float32)
        np.save(f'{full_tgt_dir}/X_train.npy', X_train)
        np.save(f'{full_tgt_dir}/y_train.npy', y_train)
        np.save(f'{full_tgt_dir}/X_valid.npy', X_valid)
        np.save(f'{full_tgt_dir}/y_valid.npy', y_valid)
        np.save(f'{full_tgt_dir}/X.npy', concat(X_train, X_valid))
        np.save(f'{full_tgt_dir}/y.npy', concat(y_train, y_valid))
        del X_train, X_valid, y_train, y_valid
        delete_all_in_dir(full_tgt_dir, exception='.npy')

    mmap_mode = 'r+' if on_disk else None
    X_train = np.load(f'{full_tgt_dir}/X_train.npy', mmap_mode=mmap_mode)
    y_train = np.load(f'{full_tgt_dir}/y_train.npy', mmap_mode=mmap_mode)
    X_valid = np.load(f'{full_tgt_dir}/X_valid.npy', mmap_mode=mmap_mode)
    y_valid = np.load(f'{full_tgt_dir}/y_valid.npy', mmap_mode=mmap_mode)

    if return_split:
        if verbose:
            print('X_train:', X_train.shape)
            print('y_train:', y_train.shape)
            print('X_valid:', X_valid.shape)
            print('y_valid:', y_valid.shape, '\n')
        return X_train, y_train, X_valid, y_valid
    else:
        X = np.load(f'{full_tgt_dir}/X.npy', mmap_mode=mmap_mode)
        y = np.load(f'{full_tgt_dir}/y.npy', mmap_mode=mmap_mode)
        splits = get_predefined_splits(*[X_train, X_valid])
        if verbose:
            print('X      :', X.shape)
            print('y      :', y.shape)
            print('splits :', splits, '\n')
        return X, y, splits

コード例 #18

0

ファイルを表示

def test_load_from_tsfile_to_dataframe():
    """Test the load_from_tsfile_to_dataframe() function."""

    # Test that an empty file is classed an invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ""

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with an incomplete set of metadata is invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata but no data is invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = (
                "@problemName Test Problem\n@timeStamps "
                "true\n@univariate true\n@classLabel false\n@data")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and no data but
    # invalid metadata values is invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName\n@timeStamps\n@univariate "
                             "true\n@classLabel false\n@data")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and a single
    # case/dimension parses correctly

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "(0, 1), (1, 2)"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 1)
            np.testing.assert_equal(len(df.columns), 1)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 1)

            series = df["dim_0"][0]
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and 2 cases with 3
    # dimensions parses correctly

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6)\n"
            file_contents += ("(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, "
                              "16)     \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 2)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 3.0)
            np.testing.assert_equal(series[1], 4.0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 15.0)
            np.testing.assert_equal(series[1], 16.0)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and time-series of
    # different length parses correctly

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3):(0, 5), (1, 6)\n"
            file_contents += "(0, 11), (1, 12):(0, 13), (1,14):(0, 15)\n"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 2)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 1)
            np.testing.assert_equal(series[0], 3.0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 1)
            np.testing.assert_equal(series[0], 15.0)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data but an
    # inconsistent number of dimensions across cases is classed as invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6)\n"
            file_contents += "(0, 11), (1, 12):(0, 13), (1,14)    \n"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data but missing
    # values after a tuple is classed as invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5),\n"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data and some
    # empty dimensions is classed as valid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "(0, 1), (1, 2):     :(0, 5), (1, 6)\n"
            file_contents += "(0, 11), (1, 12):(0, 13), (1,14)    :       \n"
            file_contents += ("(0, 21), (1, 22):(0, 23), (1,24)    :   (0,"
                              "25), (1, 26)    \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 3)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_0"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 21.0)
            np.testing.assert_equal(series[1], 22.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_1"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 23.0)
            np.testing.assert_equal(series[1], 24.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 0)

            series = df["dim_2"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 25.0)
            np.testing.assert_equal(series[1], 26.0)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data that
    # contains datetimes as timestamps and has some empty dimensions is
    # classed as valid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += ("(01/01/2019 00:00:00, 1),  (01/02/2019 "
                              "00:00:00, 2)  :                               "
                              "                      : (01/05/2019 00:00:00, "
                              "5), (01/06/2019 00:00:00, 6)\n")
            file_contents += ("(01/01/2020 00:00:00, 11), (01/02/2020 "
                              "00:00:00, 12) : (01/03/2020 00:00:00, 13), "
                              "(01/04/2020 00:00:00, 14) :  \n")
            file_contents += ("(01/01/2021 00:00:00, 21), (01/02/2021 "
                              "00:00:00, 22) : (01/03/2021 00:00:00, 23), "
                              "(01/04/2021 00:00:00, 24) :  \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 3)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series["01/01/2019"], 1.0)
            np.testing.assert_equal(series["01/02/2019"], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series["01/01/2020"], 11.0)
            np.testing.assert_equal(series["01/02/2020"], 12.0)

            series = df["dim_0"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series["01/01/2021"], 21.0)
            np.testing.assert_equal(series["01/02/2021"], 22.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series["01/03/2020"], 13.0)
            np.testing.assert_equal(series["01/04/2020"], 14.0)

            series = df["dim_1"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series["01/03/2021"], 23.0)
            np.testing.assert_equal(series["01/04/2021"], 24.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series["01/05/2019"], 5.0)
            np.testing.assert_equal(series["01/06/2019"], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 0)

            series = df["dim_2"][2]
            np.testing.assert_equal(len(series), 0)

    finally:
        os.remove(path)

    # Test that a file that mixes timestamp conventions is invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += ("(01/01/2019 00:00:00, 1),  (01/02/2019 "
                              "00:00:00, 2)  :                               "
                              "                      : (01/05/2019 00:00:00, "
                              "5), (01/06/2019 00:00:00, 6)\n")
            file_contents += ("(00, 11), (1, 12) : (01/03/2020 00:00:00, 13), "
                              "(01/04/2020 00:00:00, 14) :  \n")
            file_contents += ("(01/01/2021 00:00:00, 21), (01/02/2021 "
                              "00:00:00, 22) : (01/03/2021 00:00:00, 23), "
                              "(01/04/2021 00:00:00, 24) :  \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data but missing
    # classes is classed as invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel true 0 1 "
                             "2\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, 6)\n"
            file_contents += ("(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, "
                              "16)     \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data but invalid
    # classes is classed as invalid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel true 0 1 "
                             "2\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, " "6) : 0 \n"
            file_contents += ("(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, "
                              "16)   : 3  \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file and assert that it is invalid

            np.testing.assert_raises(TsFileParseException,
                                     load_from_tsfile_to_dataframe, path)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data with classes
    # is classed as valid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "true\n@univariate true\n@classLabel true 0 1 "
                             "2\n@data\n")
            file_contents += "(0, 1), (1, 2):(0, 3), (1, 4):(0, 5), (1, " "6): 0\n"
            file_contents += ("(0, 11), (1, 12):(0, 13), (1,14):(0, 15), (1, "
                              "16): 2     \n")

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df, y = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame of X values returned accurately reflects
            # the data in the file

            np.testing.assert_equal(len(df), 2)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 3.0)
            np.testing.assert_equal(series[1], 4.0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 2)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 15.0)
            np.testing.assert_equal(series[1], 16.0)

            # Test that the class values are as expected

            np.testing.assert_equal(len(y), 2)
            np.testing.assert_equal(y[0], "0")
            np.testing.assert_equal(y[1], "2")

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data, with no
    # timestamps, is classed as valid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "false\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "1,2:3,4:5,6\n"
            file_contents += "11,12:13,14:15,16\n"
            file_contents += "21,22:23,24:25,26\n"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 3)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_0"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 21.0)
            np.testing.assert_equal(series[1], 22.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 3.0)
            np.testing.assert_equal(series[1], 4.0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_1"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 23.0)
            np.testing.assert_equal(series[1], 24.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 15.0)
            np.testing.assert_equal(series[1], 16.0)

            series = df["dim_2"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 25.0)
            np.testing.assert_equal(series[1], 26.0)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data, with no
    # timestamps and some empty dimensions, is classed as valid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:

            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "false\n@univariate true\n@classLabel "
                             "false\n@data\n")
            file_contents += "1,2::5,6\n"
            file_contents += "11,12:13,14:15,16\n"
            file_contents += "21,22:23,24:\n"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame returned accurately reflects the data in
            # the file

            np.testing.assert_equal(len(df), 3)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_0"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 21.0)
            np.testing.assert_equal(series[1], 22.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_1"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 23.0)
            np.testing.assert_equal(series[1], 24.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 15.0)
            np.testing.assert_equal(series[1], 16.0)

            series = df["dim_2"][2]
            np.testing.assert_equal(len(series), 0)

    finally:
        os.remove(path)

    # Test that a file with a complete set of metadata and data, with no
    # timestamps and some empty dimensions and classes, is classed as valid

    fd, path = tempfile.mkstemp()
    try:
        with os.fdopen(fd, "w") as tmp_file:
            # Write the contents of the file

            file_contents = ("@problemName Test Problem\n@timeStamps "
                             "false\n@univariate true\n@classLabel true cat "
                             "bear dog\n@data\n")
            file_contents += "1,2::5,6:cat  \n"
            file_contents += "11,12:13,14:15,16:  dog\n"
            file_contents += "21,22:23,24::   bear   \n"

            tmp_file.write(file_contents)
            tmp_file.flush()

            # Parse the file

            df, y = load_from_tsfile_to_dataframe(path)

            # Test the DataFrame of X values returned accurately reflects
            # the data in the file

            np.testing.assert_equal(len(df), 3)
            np.testing.assert_equal(len(df.columns), 3)

            series = df["dim_0"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_0"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 1.0)
            np.testing.assert_equal(series[1], 2.0)

            series = df["dim_0"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 11.0)
            np.testing.assert_equal(series[1], 12.0)

            series = df["dim_0"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 21.0)
            np.testing.assert_equal(series[1], 22.0)

            series = df["dim_1"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_1"][0]
            np.testing.assert_equal(len(series), 0)

            series = df["dim_1"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 13.0)
            np.testing.assert_equal(series[1], 14.0)

            series = df["dim_1"][2]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 23.0)
            np.testing.assert_equal(series[1], 24.0)

            series = df["dim_2"]
            np.testing.assert_equal(len(series), 3)

            series = df["dim_2"][0]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 5.0)
            np.testing.assert_equal(series[1], 6.0)

            series = df["dim_2"][1]
            np.testing.assert_equal(len(series), 2)
            np.testing.assert_equal(series[0], 15.0)
            np.testing.assert_equal(series[1], 16.0)

            series = df["dim_2"][2]
            np.testing.assert_equal(len(series), 0)

            # Test that the class values are as expected

            np.testing.assert_equal(len(y), 3)
            np.testing.assert_equal(y[0], "cat")
            np.testing.assert_equal(y[1], "dog")
            np.testing.assert_equal(y[2], "bear")

    finally:
        os.remove(path)

コード例 #19

0

ファイルを表示

                if eval(self.paa):
                    paas_ = []
                    for seg in self.segs_:
                        s = int((dim.shape[0]) * seg)
                        if s < 1:
                            continue
                        #print(f"Compression: {seg}")
                        paa_per_seg = PiecewiseAggregateApproximation(n_segments=s)\
                                .fit_transform(dim).flatten()
                        paas_.extend(extract_stats(paa_per_seg))

                    temp.extend(paas_)
                else:
                    temp.extend(extract_stats(dim))
            x_new.append(temp)

        x_new = np.asarray(x_new)
        imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
        imp_mean.fit(x_new)
        x_new = imp_mean.transform(x_new)

        return np.asarray(x_new)


if __name__ == '__main__':
    paa = PAAStat(paa_='True', seg_=0.75)
    train_x, train_y = load_from_tsfile_to_dataframe(
        "../mtsc/data/LSST/LSST_TRAIN.ts")
    s = paa.transform(train_x.values)
    print(s.shape)

コード例 #20

0

ファイルを表示

ファイル: Time_Series_AL.py プロジェクト: TobiBosse/Active-Learning-for-the-Annotation-of-Time-Series

    def getData(self,set_type): 
        ###################
        ## Data Generation
        ###################
        if set_type == "generator": 
            #Settings
            windowLength = 1
            samplesPerWindow = 100
            n_classes = 6
            n_windows = 1000
            Gen = ImbalancedDataGenerator(n_samples= samplesPerWindow,resolution = windowLength/samplesPerWindow,SNR_dB = 50, variation = 1, n_classes=n_classes, useseed = False, seed = 5)
            windows_pool, windows_test, y_pool, y_test = Gen.GeneratePool(n_windows)
            class_names = Gen.class_names

            ## Generator
            ##Data shape
            #n_samples = 100 #samples of 1 window (1 window exists of X samples)
            #resolution = 0.01 # time step between 2 samples
            #SNR_dB = 50 # Signal to Noise ration in dB
            #variation = 1 # 0 to 1 (0 to 100%), higher values possible
            #Gen = Datagenerator2.DataGenerator2(n_samples= n_samples,resolution = resolution,SNR_dB = SNR_dB, variation = variation, n_classes=30, useseed = False, seed = 5)
            
            # size var , length var, n classes 
            #if obj.fast_mode and not obj.singleErrorOutput:       
                #x_pool, x_test, y_pool, y_test = obj.GeneratePool(obj.n_windows)
                #train_windows = None
                #test_windows = None
            #else:
            #    windows_pool, windows_test, y_pool, y_test = Gen.GeneratePool(obj.n_windows)
            #    pass

        elif set_type == "GunPoint":
            ## Gunpoint Dataset
            windowLength = 1 #unspecified !
            samplesPerWindow = 50
            n_classes = 2
            n_windows = 50   
            from pyts.datasets import load_gunpoint
            windows_pool, windows_test, y_pool, y_test = load_gunpoint(return_X_y=True)  
            class_names = ['gun', 'point']
            if y_pool.min() > 0 or y_test.min() > 0 :
                print("1st class is decoded as zero (was 1)")
                y_pool -= 1  #class 1 = class 0
                y_test -= 1
            
        elif set_type == "Crop":
            #http://www.timeseriesclassification.com/description.php?Dataset=Crop
            windowLength = 1 #unspecified !
            samplesPerWindow = 46
            n_classes = 24
            n_windows = 7200 
            from sktime.utils.load_data import load_from_tsfile_to_dataframe
            windows_pool, y_pool = load_from_tsfile_to_dataframe("/home/tob/Datasets/Crop_TRAIN.ts")
            windows_test, y_test = load_from_tsfile_to_dataframe("/home/tob/Datasets/Crop_TEST.ts")
            windows_pool, windows_test, y_pool, y_test = self.postProcessSetfromTSCcom(windows_pool, windows_test, y_pool, y_test,samplesPerWindow)
            class_names = ['Class 1', 'Class 2', 'Class 3', 'Class 4', 'Class 5', 'Class 6', 'Class 7', 'Class 8', 'Class 9', 'Class 10', 'Class 11', 'Class 12', 'Class 13', 'Class 14', 'Class 15', 'Class 16', 'Class 17', 'Class 18', 'Class 19', 'Class 20', 'Class 21', 'Class 22', 'Class 23', 'Class 24']
            
        elif set_type == "FaceAll": 
            #http://www.timeseriesclassification.com/description.php?Dataset=FaceAll
            windowLength = 1 #unspecified !
            samplesPerWindow = 131
            n_classes = 14
            n_windows = 560
            from sktime.utils.load_data import load_from_tsfile_to_dataframe
            windows_pool, y_pool = load_from_tsfile_to_dataframe("/home/tob/Datasets/FaceAll_TRAIN.ts")
            windows_test, y_test = load_from_tsfile_to_dataframe("/home/tob/Datasets/FaceAll_TEST.ts")
            windows_pool, windows_test, y_pool, y_test = postProcessSetfromTSCcom(windows_pool, windows_test, y_pool, y_test,samplesPerWindow)
            class_names = ['Student 1', 'Student 2', 'Student 3', 'Student 4', 'Student 5', 'Student 6', 'Student 7', 'Student 8', 'Student 9', 'Student 10', 'Student 11', 'Student 12', 'Student 13', 'Student 14']

        elif set_type == "InsectWingbeat":
            #http://www.timeseriesclassification.com/description.php?Dataset=InsectWingbeat
            windowLength = 1 #unspecified !
            samplesPerWindow = 30
            n_classes = 10
            n_windows = 30000
            from sktime.utils.load_data import load_from_tsfile_to_dataframe
            windows_pool, y_pool = load_from_tsfile_to_dataframe("/home/tob/Datasets/InsectWingbeat_TRAIN.ts")
            windows_test, y_test = load_from_tsfile_to_dataframe("/home/tob/Datasets/InsectWingbeat_TEST.ts")
            windows_pool, windows_test, y_pool, y_test = postProcessSetfromTSCcom(windows_pool, windows_test, y_pool, y_test,samplesPerWindow)
            class_names = ['Insect 1', 'Insect 2', 'Insect 3', 'Insect 4', 'Insect 5', 'Insect 6', 'Insect 7', 'Insect 8', 'Insect 9', 'Insect 10']

        #make the Dataset imbalanced (class 0 to class 4 matters, class 5 don't care: exists of all other classes)
        #Setting
        n_classes = 6

        y_pool, y_test = np.clip(y_pool,0,n_classes-1), np.clip(y_test,0,n_classes-1)
        class_names = class_names[:n_classes]
        class_names[-1] = "don't care"
        print("imbalanced classes:")
        print(class_names)


        #reshaping for NN
        #if useNeuralNet == True:
        #    X = np.reshape(X, (X.shape[0],1,X.shape[1]))
        #    Y = np.reshape(Y, (Y.shape[0], 1))
        #    Y = to_categorical(Y,num_classes=self.n_classes)

        ## USE CASE 1 POOL : test = complete pool
        #x_test = np.copy(x_pool)
        #y_test = np.copy(y_pool)
        #test_windows = np.copy(train_windows)
        
        #obj.visualizeBoss(x_pool,y_pool)

        #Testing
        #print("Gen+ FST " + str(time.time() -test ))

        #End of Data Generation
        return windows_pool, windows_test, y_pool, y_test, class_names, windowLength, n_classes, samplesPerWindow

コード例 #21

0

ファイルを表示

            this_probs = this_probs * self.cv_accs[c]

            if output_probs is None:
                output_probs = this_probs
            else:
                output_probs = [[
                    output_probs[x][y] + this_probs[x][y]
                    for y in range(0, len(output_probs[x]))
                ] for x in range(0, len(output_probs))]

        output_probs /= self.cv_sum
        return output_probs

    def predict(self, X):
        probs = self.predict_proba(X)
        labels = self.classifiers[0].classes_
        preds = [labels[np.argmax(probs[x])] for x in range(0, len(probs))]
        return preds


# Example usage with a limited amount of the GunPoint problem
if __name__ == "__main__":
    dataset = "GunPoint"
    train_x, train_y = load_from_tsfile_to_dataframe(
        file_path="C:/temp/sktime_temp_data/" + dataset + "/",
        file_name=dataset + "_TRAIN.ts")
    ee = ElasticEnsemble()
    ee.fit(train_x.iloc[0:10], train_y[0:10])
    preds = ee.predict(train_x.iloc[10:15])
    print(preds)

コード例 #22

0

ファイルを表示

ファイル: shapelets_backup.py プロジェクト: vishalbelsare/py-hive-cote

                str(j.start_pos) + "," + str(j.length) + "\n")
            f.write(",".join(
                map(str, data[j.series_id,
                              j.start_pos:j.start_pos + j.length])) + "\n")
            f.write(",".join(map(str, j.data)) + "\n")
    f.close()


if __name__ == "__main__":
    dataset = "GunPoint"
    #    load_from_arff_to_tsfile("/home/david/arff-datasets/" + dataset + "/" + dataset + "_TRAIN.arff",
    #                             "/home/david/sktime-datasets/" + dataset + "/" + dataset + "_TRAIN.ts")
    #    load_from_arff_to_tsfile("/home/david/arff-datasets/" + dataset + "/" + dataset + "_TEST.arff",
    #                             "/home/david/sktime-datasets/" + dataset + "/" + dataset + "_TEST.ts")

    train_x, train_y = load_from_tsfile_to_dataframe(
        "/home/david/sktime-datasets/" + dataset + "/" + dataset + "_TRAIN.ts")
    test_x, test_y = load_from_tsfile_to_dataframe(
        "/home/david/sktime-datasets/" + dataset + "/" + dataset + "_TEST.ts")

    a = RandomShapeletTransform(type_shapelet="Random",
                                min_shapelet_length=3,
                                max_shapelet_length=12,
                                num_cases_to_sample=8,
                                num_shapelets_to_sample_per_case=10,
                                trim_shapelets=False,
                                remove_self_similar=False,
                                verbose=True)
    #    a = RandomShapeletTransform(type_shapelet="Contracted", time_limit_in_mins=0.3, min_shapelet_length=3, max_shapelet_length=300,
    #                                num_shapelets_to_sample_per_case=5, trim_shapelets = False, remove_self_similar = False, verbose=True)
    #    a = RandomShapeletTransform(type_shapelet="Full", verbose=True)

コード例 #23

0

ファイルを表示

        idx[1] += 1
        constraints.append(idx)

    return constraints


# change here the name of the extr
archive = "Univariate2018_ts"

for root, dirs, files in os.walk("./" + archive + "/"):
    for x in dirs:
        dataset = "./" + archive + "/" + x + "/" + x
        print(x)
        if not os.path.isdir("./" + archive + "_a2cnes/" + x):
            print(' --- > create')
            train_x, train_y = load_from_tsfile_to_dataframe(dataset +
                                                             "_TRAIN.ts")
            test_x, test_y = load_from_tsfile_to_dataframe(dataset +
                                                           "_TEST.ts")

            # compute min, max, for, normalization
            df_max = dict([(i, sys.float_info.min)
                           for i in train_x.iloc[0].index])
            df_min = dict([(i, sys.float_info.max)
                           for i in train_x.iloc[0].index])
            max_length = 0
            for index, row in train_x.iterrows():
                for id in row.index:
                    max_ = row[id].max()
                    if max_ > df_max[id]:
                        df_max[id] = max_
                    min_ = row[id].min()

コード例 #24

0

ファイルを表示

from sktime.utils.load_data import load_from_tsfile_to_dataframe
import os
import sktime
import numpy as np
import pandas as pd
import csv

# 全局变量需要修改
DATA_PATH = os.path.join(os.path.dirname(sktime.__file__), "F:/BasicMotions/")
datasetname ='BasicMotions'


train_x, train_y = load_from_tsfile_to_dataframe(os.path.join(DATA_PATH, datasetname+"_TRAIN.ts"))
test_x, test_y = load_from_tsfile_to_dataframe(os.path.join(DATA_PATH, datasetname+"_TEST.ts"))


#处理训练数据集，一个样本转换为一个csv文件
list_train_x=train_x.values.tolist()

for i in range(len(list_train_x)):
    list2=list_train_x[i]
    list3=zip(*list2)
    with open(DATA_PATH+'1.original/train/train'+str(i+1)+'.csv', 'w', newline='') as csvfile:
        writer  = csv.writer(csvfile)
        for row in list3:
            writer.writerow(row)
# 将标签统一用0，1，2...来进行表示（标签对应的序号可能跟标签出现的先后没有关系）
list_train_y = pd.Categorical(train_y).codes #数据类型为：numpy.ndarray
np.savetxt(DATA_PATH+'1.original/train/train_label.csv',list_train_y,fmt='%d',delimiter=',')

コード例 #25

0

ファイルを表示

]

n_datasets = len(datasets)

# Run the fit and predict
for i, dataset in enumerate(datasets):
    print(f'Dataset: {i + 1}/{n_datasets} {dataset}')

    # pre-allocate results
    results = np.zeros(3)

    # load data
    train_file = os.path.join(data_path, f'{dataset}/{dataset}_TRAIN.ts')
    test_file = os.path.join(data_path, f'{dataset}/{dataset}_TEST.ts')

    x_train, y_train = load_from_tsfile_to_dataframe(train_file)
    x_test, y_test = load_from_tsfile_to_dataframe(test_file)

    tsf = TimeSeriesForest()

    # fit
    try:
        s = time.time()
        tsf.fit(x_train, y_train)
        results[0] = time.time() - s

        # predict
        s = time.time()
        y_pred = tsf.predict(x_test)
        results[1] = time.time() - s

コード例 #26

0

ファイルを表示

import os
from sktime.utils.load_data import load_from_tsfile_to_dataframe
import constants as const

#
data_dir = const.DATA_DIR
file_name_train = const.FILE_NAME_TRAIN
train_x, train_y = load_from_tsfile_to_dataframe(
    os.path.join(data_dir, file_name_train))

コード例 #27

0

ファイルを表示

               'KNeighborsTimeSeriesClassifier',
               'ShapeDTW',
               'RandomIntervalSpectralForest',
               'TimeSeriesForest'
               ]

metamodels = [
              'MetaKNeighbors',
              'MetaRandomForest',
              'MetaLogisticRegression',
              'MetaLSTM'
              ]

dataset = sys.argv[1]

_, y_test = load_from_tsfile_to_dataframe('../datasets/Univariate_ts/'+dataset+'/'+dataset+'_TEST.ts')
y_test = pd.Series(y_test, dtype='float64', name='y_true_test')

classifiers_test_predictions = pd.DataFrame()

best_individual_classifier = {}

for classifier in classifiers:

    test_predictions = pd.read_csv('../datasets/Univariate_ts/'+dataset+'/'+classifier+'_PREDICTION_TEST.csv')

    individual_metric = get_metrics(y_test.astype(int), test_predictions)

    if 'acc' not in best_individual_classifier.keys():
        best_individual_classifier['classifier'] = classifier
        best_individual_classifier['metrics'] = individual_metric

コード例 #28

0

ファイルを表示

import numpy as np
from pandas import DataFrame
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sktime.classification.compose import ColumnEnsembleClassifier, TimeSeriesForestClassifier
from sktime.datasets import load_airline, load_arrow_head
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss
from sktime.utils.load_data import load_from_tsfile_to_dataframe

from db import block_data
from main import prepare_pools, prepare_average_assessment_windows, prepare_average_luck_windows
from prediction import predictor

if __name__ == "__main__":
    data_2d_list = [[10, -1, 2], [10, +1, 3], [10, -1, 4], [10, +1, 5], [10, -1, 6], [10, +1, 7], [10, -1, 8],
                    [10, +1, 9]]
    X, y = load_from_tsfile_to_dataframe('/home/jamshid/PycharmProjects/pool-analysis/prediction/test_pandas_data.ts')
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    classifier = ColumnEnsembleClassifier(estimators=[
        ("TSF1", TimeSeriesForestClassifier(n_estimators=100), [1]),
    ])
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(str(X_train))
    print(str(y_pred))
    print(str(accuracy_score(y_test, y_pred)))

コード例 #29

0

ファイルを表示

def transform_ts_to_npy_format(source_root_directory, archive_name,
                               output_root_directory):
    for dataset_name in dataset_names_for_archive[archive_name]:
        root_dir_dataset = source_root_directory + '/archives/' + archive_name + '_ts/' + dataset_name + '/'
        out_dir = output_root_directory + '/archives/' + archive_name + '_npy/' + dataset_name + '/'

        if create_directory(out_dir) is None:
            print('MAT to NPY transformation was already done for dataset {}'.
                  format(dataset_name))
            continue

        df_train_x, y_train = load_from_tsfile_to_dataframe(
            os.path.join(root_dir_dataset, dataset_name + '_TRAIN.ts'))
        df_test_x, y_test = load_from_tsfile_to_dataframe(
            os.path.join(root_dir_dataset, dataset_name + '_TEST.ts'))

        def to_numpy(df):
            columns = list(df)
            numpy_list = []
            print('Processing dataset of size {}'.format(len(df)))
            for i, row in df.iterrows():
                if i % 100 == 0:
                    print("Done {} / {}".format(i, len(df)))
                channel_arrays = []
                ns = []
                for c in columns:
                    channel_arrays.append(row[c].to_numpy())
                    ns.append(len(row[c]))
                # Pad to same length
                if len(channel_arrays) > 1:
                    N = max(ns)
                    channel_arrays_sl = []
                    for ch in channel_arrays:
                        channel_arrays_sl.append(
                            np.pad(ch, [(0, N - len(ch))], mode='constant'))
                    channel_arrays = channel_arrays_sl
                numpy_list.append(np.stack(channel_arrays, axis=0))
            return numpy_list

        x_train = to_numpy(df_train_x)
        x_test = to_numpy(df_test_x)

        # print(type(x_train.shape))
        # print(x_test.shape)
        # print("############")
        # print(y_train)
        # print(y_test)

        # znorm
        # std_ = x_train.std(axis=1, keepdims=True)
        # std_[std_ == 0] = 1.0
        # x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_

        # std_ = x_test.std(axis=1, keepdims=True)
        # std_[std_ == 0] = 1.0
        # x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_

        n_var = x_train[0].shape[0]

        max_length = get_func_length(x_train, x_test, func=max)
        min_length = get_func_length(x_train, x_test, func=min)

        print(dataset_name, 'max', max_length, 'min', min_length)
        print()

        print("Train data shape: ", x_train[0].shape)
        print("Test data shape: ", x_test[0].shape)
        x_train = transform_to_same_length(x_train, n_var, max_length)
        x_test = transform_to_same_length(x_test, n_var, max_length)
        print("Train data shape: ", x_train.shape)
        print("Test data shape: ", x_test.shape)
        # continue

        # save them
        np.save(out_dir + 'x_train.npy', x_train)
        np.save(out_dir + 'y_train.npy', y_train)
        np.save(out_dir + 'x_test.npy', x_test)
        np.save(out_dir + 'y_test.npy', y_test)

        print('Successfully transformed dataset {} from TS to NPY.'.format(
            dataset_name))

コード例 #30

0

ファイルを表示

ファイル: tsdataset.py プロジェクト: samsgood0310/til

 def __init__(self, filepath: pathlib.Path, transform=None) -> None:
     self.transform = transform
     self.x, self.y = load_from_tsfile_to_dataframe(str(filepath))