Exemple #1
0
def run_tsgan(data_name, dir_data, ratios, tag):
    ## train gan
    tf.reset_default_graph()
    x_tr, y_tr, _, _, _ = ucr.load_ucr_flat(data_name, dir_data)
    x = x_tr
    x = np.reshape(x, x.shape + (1, 1))
    conf = Config(x,
                  data_name,
                  tag,
                  x.shape[1],
                  x.shape[2],
                  x.shape[3],
                  state='train')
    train.train(conf)

    ## classification
    dir_gan = 'cache/{}'.format(tag)
    feature_type = 'local-max'
    norm_type = 'tanh'
    model_name = 'LR'
    model = base.StandardClassifierDic[model_name]
    features_tr, y_tr, features_te, y_te, n_classes = encode(
        data_name, dir_data, feature_type, norm_type, dir_gan)
    res = {'ratio': [], 'distr': [], 'acc': [], 'acc_te': []}
    for r in ratios:
        features_tr_cur, y_tr_cur, _, _ = utils.split_stratified(
            features_tr, y_tr, r)
        acc, t = base.classify(model, features_tr_cur, y_tr_cur, features_te,
                               y_te)
        res['ratio'].append(r)
        res['distr'].append(utils.distribute_y_json(y_tr_cur))
        res['acc'].append(acc[0])
        res['acc_te'].append(acc[1])
    df_res = pd.DataFrame(res)
    return df_res
Exemple #2
0
def encode(data_name, dir_data, feature_type, dir_gan):
    tf.reset_default_graph()
    ## load data
    x_tr_2d, y_tr, _, _, n_classes = ucr.load_ucr_flat(data_name, dir_data)
    x_tr = np.reshape(x_tr_2d, x_tr_2d.shape + (1, 1))
    ## set up GAN
    dir_checkpoint = os.path.join(dir_gan, data_name, 'checkpoint')
    conf = Config(x_tr,
                  '',
                  '',
                  x_tr.shape[1],
                  x_tr.shape[2],
                  x_tr.shape[3],
                  state='test')
    gan = TSGAN(conf.dim_z, conf.dim_h, conf.dim_w, conf.dim_c,
                conf.random_seed, conf.g_lr, conf.d_lr, conf.g_beta1,
                conf.d_beta1, conf.gf_dim, conf.df_dim)
    ## start to run
    with tf.Session(config=tf_conf) as sess:
        isload, counter = gan.load(sess, dir_checkpoint)
        if not isload:
            raise Exception("[!] Train a model first, then run test mode")
        input_shape = [x_tr.shape[1], x_tr.shape[2], x_tr.shape[3]]
        encoder = TSGANEncoder(gan, input_shape, type=feature_type)

        layer1, layer2 = encode_on_batch(encoder, sess, x_tr, conf.batch_size)

        return x_tr_2d, y_tr, layer1, layer2
Exemple #3
0
def run_baselines(dir_data, data_name_list, dir_out_root):
    dir_out = os.path.join(dir_out_root, 'raw')
    n_runs = 3
    for ir in range(n_runs):
        dir_out_cur = os.path.join(dir_out, 'run{}'.format(ir))
        make_dir(dir_out_cur)
        for model_name, model in base.StandardClassifierDic.items():
            print("******** processing model {}".format(model_name))
            res = {
                'dataset': [],
                'acc': [],
                'acc_te': [],
                'time': [],
                'time_te': []
            }
            for data_name in data_name_list:
                x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(
                    data_name, dir_data)
                acc, t = base.classify(model, x_tr, y_tr, x_te, y_te)
                res['dataset'].append(data_name)
                res['acc'].append(acc[0])
                res['acc_te'].append(acc[1])
                res['time'].append(t[0])
                res['time_te'].append(t[1])
                print(model_name, data_name, acc, t)
            df = pd.DataFrame(res)
            df.to_csv(os.path.join(dir_out_cur, '{}.csv'.format(model_name)),
                      index=False)
Exemple #4
0
def run_raw(data_name, dir_data, dir_out):
    path_out = os.path.join(dir_out, data_name, 'raw')
    if os.path.exists(path_out) is False:
        os.makedirs(path_out)

    x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(data_name, dir_data)

    np.savetxt(os.path.join(path_out, 'try'), y_tr, delimiter=',')
    np.savetxt(os.path.join(path_out, 'tey'), y_te, delimiter=',')

    run_manifold(x_tr, y_tr, x_te, y_te, path_out)
Exemple #5
0
def run_standard(data_name, dir_data, ratios):
    model_name = 'LR'
    model = base.StandardClassifierDic[model_name]
    x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(data_name, dir_data)
    res = {'ratio': [], 'distr': [], 'acc': [], 'acc_te': []}
    for r in ratios:
        x_tr_cur, y_tr_cur, _, _ = utils.split_stratified(x_tr, y_tr, r)
        acc, t = base.classify(model, x_tr_cur, y_tr_cur, x_te, y_te)
        res['ratio'].append(r)
        res['distr'].append(utils.distribute_y_json(y_tr_cur))
        res['acc'].append(acc[0])
        res['acc_te'].append(acc[1])
    df_res = pd.DataFrame(res)
    return df_res
Exemple #6
0
def run_fcn(data_name, dir_data, ratios):
    x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(data_name, dir_data)
    x_tr = x_tr.reshape(x_tr.shape + (1, ))
    x_te = x_te.reshape(x_te.shape + (1, ))
    y_te_onehot = utils.dense_to_one_hot(y_te, n_classes)
    n_epochs = 200
    res = {'ratio': [], 'distr': [], 'acc': [], 'acc_te': []}
    for r in ratios:
        print("*** processing ratio={}".format(r))
        x_tr_cur, y_tr_cur, _, _ = utils.split_stratified(x_tr, y_tr, r)
        y_tr_cur_onehot = utils.dense_to_one_hot(y_tr_cur, n_classes)
        model = FCN(x_tr_cur.shape[1:], n_classes)
        df_metrics = model.fit(x_tr_cur, y_tr_cur_onehot, n_epochs=n_epochs)
        acc_te = model.evaluate(x_te, y_te_onehot)
        last = df_metrics.loc[df_metrics.shape[0] - 1, :]
        res['ratio'].append(r)
        res['distr'].append(utils.distribute_y_json(y_tr_cur))
        res['acc'].append(last['acc'])
        res['acc_te'].append(acc_te)
    df_res = pd.DataFrame(res)
    return df_res
Exemple #7
0
def run_raw(data_name_list, dir_data, dir_out):
    print("******** kmeans over raw data")
    res = {'dataset': [], 'randIndex': [], 'time': []}
    n_datasets = len(data_name_list)
    for i, data_name in enumerate(data_name_list):
        print("******** [{}/{}] processing {}".format(i, n_datasets,
                                                      data_name))
        ## load data
        x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(
            data_name, dir_data)
        x = np.vstack([x_tr, x_te])
        y = np.hstack([y_tr, y_te])
        ## start to run
        ri, t = kmeans(x, y, n_classes)
        res['dataset'].append(data_name)
        res['randIndex'].append(ri)
        res['time'].append(t)
    ## save result
    df = pd.DataFrame(res)
    df.to_csv(os.path.join(dir_out, 'kmeans_raw.csv'), index=False)
    return df
Exemple #8
0
def start_training(data_name_list, dir_data_root, mode, tag):
    n_datasets = len(data_name_list)
    for i, data_name in enumerate(data_name_list):
        print("******* [{}/{}] processing {}".format(i + 1, n_datasets,
                                                     data_name))
        tf.reset_default_graph()
        x_tr, _, x_te, _, _ = ucr.load_ucr_flat(data_name, dir_data_root)
        x_tr = np.reshape(x_tr, x_tr.shape + (1, 1))
        x_te = np.reshape(x_te, x_te.shape + (1, 1))
        if mode == 'half':
            x = x_tr
        elif mode == 'all':
            x = np.vstack([x_tr, x_te])
        else:
            raise ValueError("Can not find mode = {}".format(mode))
        conf = Config(x,
                      data_name,
                      tag,
                      x.shape[1],
                      x.shape[2],
                      x.shape[3],
                      state='train')
        train.train(conf)
Exemple #9
0
def split_test2valid_stratified(
    in_data_dir_root,
    out_dir_parent,
    out_dir_base='UCR_TS_Archive_2015_split-test-to-valid-stratified_valid-size-same-as-train'
):
    """ Split the original test set to generate a validation set and the number samples of 
        each class in validation set is the same as the train set. 
    
    :param in_data_dir_root: this path shouldn't include Validation set.
    :param out_dir_parent: 
    :param out_dir_base:
    :param data_name_list: 
    :return: 
    """
    out_data_dir_root = os.path.join(out_dir_parent, out_dir_base)
    data_name_list = get_dataset_testset_double_than_trainset_for_each_class(
        in_data_dir_root)
    print("This program will process {} data sets.".format(
        len(data_name_list)))
    print("They are: ", data_name_list)
    print()
    for fname in data_name_list:
        print("processing data {}".format(fname))
        # prepare data
        X_train, y_train, X_test, y_test = ucr.load_ucr_flat(
            fname, in_data_dir_root)
        distr_train = utils.distribute_dataset(X_train, y_train)
        distr_test = utils.distribute_dataset(X_test, y_test)

        # split test set to validation set
        X_valid, y_valid = [], []
        X_test_new, y_test_new = [], []
        for key in distr_train.keys():
            num_tr = distr_train[key].shape[0]
            inds_te = np.arange(distr_test[key].shape[0])
            np.random.shuffle(inds_te)

            X_valid_temp = distr_test[key][inds_te[:num_tr]]
            X_valid.append(X_valid_temp)
            y_valid.append(np.ones([X_valid_temp.shape[0], 1], int) * key)

            X_test_new_temp = distr_test[key][inds_te[num_tr:]]
            X_test_new.append(X_test_new_temp)
            y_test_new.append(
                np.ones([X_test_new_temp.shape[0], 1], int) * key)
        X_valid = np.concatenate(X_valid, axis=0)
        y_valid = np.concatenate(y_valid, axis=0)
        X_test_new = np.concatenate(X_test_new, axis=0)
        y_test_new = np.concatenate(y_test_new, axis=0)

        # pack data set in UCR format
        trainset = np.concatenate([y_train[:, np.newaxis], X_train], axis=1)
        validset = np.concatenate([y_valid, X_valid], axis=1)
        testset = np.concatenate([y_test_new, X_test_new], axis=1)
        # make dir for specific data set
        out_path = os.path.join(out_data_dir_root, fname)
        if os.path.exists(out_path):
            shutil.rmtree(out_path)
        os.makedirs(out_path)
        # output data to file
        str_fmt = '%path,' + '%.4f,' * (X_train.shape[1])
        str_fmt = str_fmt[:(len(str_fmt) - 1)]
        np.savetxt(os.path.join(out_path, '{}_TRAIN'.format(fname)),
                   trainset,
                   fmt=str_fmt,
                   delimiter=',')
        np.savetxt(os.path.join(out_path, '{}_TEST'.format(fname)),
                   testset,
                   fmt=str_fmt,
                   delimiter=',')
        np.savetxt(os.path.join(out_path, '{}_VALID'.format(fname)),
                   validset,
                   fmt=str_fmt,
                   delimiter=',')
Exemple #10
0
from data import ucr

import os
import numpy as np
import pandas as pd

if __name__ == '__main__':
    dir_data = '../../dataset/UCR_TS_Archive_2015'
    dir_out = 'result'

    res = {'dataset': [], 'mean': [], 'std': []}
    data_name_list = ucr.get_data_name_list(dir_data)
    for data_name in data_name_list:
        x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(
            data_name, dir_data)
        x = np.vstack([x_tr, x_te])
        mean = np.round(np.mean(np.mean(x, axis=1)), 2)
        std = np.round(np.mean(np.std(x, axis=1)), 2)
        res['dataset'].append(data_name)
        res['mean'].append(mean)
        res['std'].append(std)

    df = pd.DataFrame(res)
    df.to_csv(os.path.join(dir_out, 'check_znorm.csv'), index=False)