def run_tsgan(data_name, dir_data, ratios, tag): ## train gan tf.reset_default_graph() x_tr, y_tr, _, _, _ = ucr.load_ucr_flat(data_name, dir_data) x = x_tr x = np.reshape(x, x.shape + (1, 1)) conf = Config(x, data_name, tag, x.shape[1], x.shape[2], x.shape[3], state='train') train.train(conf) ## classification dir_gan = 'cache/{}'.format(tag) feature_type = 'local-max' norm_type = 'tanh' model_name = 'LR' model = base.StandardClassifierDic[model_name] features_tr, y_tr, features_te, y_te, n_classes = encode( data_name, dir_data, feature_type, norm_type, dir_gan) res = {'ratio': [], 'distr': [], 'acc': [], 'acc_te': []} for r in ratios: features_tr_cur, y_tr_cur, _, _ = utils.split_stratified( features_tr, y_tr, r) acc, t = base.classify(model, features_tr_cur, y_tr_cur, features_te, y_te) res['ratio'].append(r) res['distr'].append(utils.distribute_y_json(y_tr_cur)) res['acc'].append(acc[0]) res['acc_te'].append(acc[1]) df_res = pd.DataFrame(res) return df_res
def encode(data_name, dir_data, feature_type, dir_gan): tf.reset_default_graph() ## load data x_tr_2d, y_tr, _, _, n_classes = ucr.load_ucr_flat(data_name, dir_data) x_tr = np.reshape(x_tr_2d, x_tr_2d.shape + (1, 1)) ## set up GAN dir_checkpoint = os.path.join(dir_gan, data_name, 'checkpoint') conf = Config(x_tr, '', '', x_tr.shape[1], x_tr.shape[2], x_tr.shape[3], state='test') gan = TSGAN(conf.dim_z, conf.dim_h, conf.dim_w, conf.dim_c, conf.random_seed, conf.g_lr, conf.d_lr, conf.g_beta1, conf.d_beta1, conf.gf_dim, conf.df_dim) ## start to run with tf.Session(config=tf_conf) as sess: isload, counter = gan.load(sess, dir_checkpoint) if not isload: raise Exception("[!] Train a model first, then run test mode") input_shape = [x_tr.shape[1], x_tr.shape[2], x_tr.shape[3]] encoder = TSGANEncoder(gan, input_shape, type=feature_type) layer1, layer2 = encode_on_batch(encoder, sess, x_tr, conf.batch_size) return x_tr_2d, y_tr, layer1, layer2
def run_baselines(dir_data, data_name_list, dir_out_root): dir_out = os.path.join(dir_out_root, 'raw') n_runs = 3 for ir in range(n_runs): dir_out_cur = os.path.join(dir_out, 'run{}'.format(ir)) make_dir(dir_out_cur) for model_name, model in base.StandardClassifierDic.items(): print("******** processing model {}".format(model_name)) res = { 'dataset': [], 'acc': [], 'acc_te': [], 'time': [], 'time_te': [] } for data_name in data_name_list: x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat( data_name, dir_data) acc, t = base.classify(model, x_tr, y_tr, x_te, y_te) res['dataset'].append(data_name) res['acc'].append(acc[0]) res['acc_te'].append(acc[1]) res['time'].append(t[0]) res['time_te'].append(t[1]) print(model_name, data_name, acc, t) df = pd.DataFrame(res) df.to_csv(os.path.join(dir_out_cur, '{}.csv'.format(model_name)), index=False)
def run_raw(data_name, dir_data, dir_out): path_out = os.path.join(dir_out, data_name, 'raw') if os.path.exists(path_out) is False: os.makedirs(path_out) x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(data_name, dir_data) np.savetxt(os.path.join(path_out, 'try'), y_tr, delimiter=',') np.savetxt(os.path.join(path_out, 'tey'), y_te, delimiter=',') run_manifold(x_tr, y_tr, x_te, y_te, path_out)
def run_standard(data_name, dir_data, ratios): model_name = 'LR' model = base.StandardClassifierDic[model_name] x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(data_name, dir_data) res = {'ratio': [], 'distr': [], 'acc': [], 'acc_te': []} for r in ratios: x_tr_cur, y_tr_cur, _, _ = utils.split_stratified(x_tr, y_tr, r) acc, t = base.classify(model, x_tr_cur, y_tr_cur, x_te, y_te) res['ratio'].append(r) res['distr'].append(utils.distribute_y_json(y_tr_cur)) res['acc'].append(acc[0]) res['acc_te'].append(acc[1]) df_res = pd.DataFrame(res) return df_res
def run_fcn(data_name, dir_data, ratios): x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat(data_name, dir_data) x_tr = x_tr.reshape(x_tr.shape + (1, )) x_te = x_te.reshape(x_te.shape + (1, )) y_te_onehot = utils.dense_to_one_hot(y_te, n_classes) n_epochs = 200 res = {'ratio': [], 'distr': [], 'acc': [], 'acc_te': []} for r in ratios: print("*** processing ratio={}".format(r)) x_tr_cur, y_tr_cur, _, _ = utils.split_stratified(x_tr, y_tr, r) y_tr_cur_onehot = utils.dense_to_one_hot(y_tr_cur, n_classes) model = FCN(x_tr_cur.shape[1:], n_classes) df_metrics = model.fit(x_tr_cur, y_tr_cur_onehot, n_epochs=n_epochs) acc_te = model.evaluate(x_te, y_te_onehot) last = df_metrics.loc[df_metrics.shape[0] - 1, :] res['ratio'].append(r) res['distr'].append(utils.distribute_y_json(y_tr_cur)) res['acc'].append(last['acc']) res['acc_te'].append(acc_te) df_res = pd.DataFrame(res) return df_res
def run_raw(data_name_list, dir_data, dir_out): print("******** kmeans over raw data") res = {'dataset': [], 'randIndex': [], 'time': []} n_datasets = len(data_name_list) for i, data_name in enumerate(data_name_list): print("******** [{}/{}] processing {}".format(i, n_datasets, data_name)) ## load data x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat( data_name, dir_data) x = np.vstack([x_tr, x_te]) y = np.hstack([y_tr, y_te]) ## start to run ri, t = kmeans(x, y, n_classes) res['dataset'].append(data_name) res['randIndex'].append(ri) res['time'].append(t) ## save result df = pd.DataFrame(res) df.to_csv(os.path.join(dir_out, 'kmeans_raw.csv'), index=False) return df
def start_training(data_name_list, dir_data_root, mode, tag): n_datasets = len(data_name_list) for i, data_name in enumerate(data_name_list): print("******* [{}/{}] processing {}".format(i + 1, n_datasets, data_name)) tf.reset_default_graph() x_tr, _, x_te, _, _ = ucr.load_ucr_flat(data_name, dir_data_root) x_tr = np.reshape(x_tr, x_tr.shape + (1, 1)) x_te = np.reshape(x_te, x_te.shape + (1, 1)) if mode == 'half': x = x_tr elif mode == 'all': x = np.vstack([x_tr, x_te]) else: raise ValueError("Can not find mode = {}".format(mode)) conf = Config(x, data_name, tag, x.shape[1], x.shape[2], x.shape[3], state='train') train.train(conf)
def split_test2valid_stratified( in_data_dir_root, out_dir_parent, out_dir_base='UCR_TS_Archive_2015_split-test-to-valid-stratified_valid-size-same-as-train' ): """ Split the original test set to generate a validation set and the number samples of each class in validation set is the same as the train set. :param in_data_dir_root: this path shouldn't include Validation set. :param out_dir_parent: :param out_dir_base: :param data_name_list: :return: """ out_data_dir_root = os.path.join(out_dir_parent, out_dir_base) data_name_list = get_dataset_testset_double_than_trainset_for_each_class( in_data_dir_root) print("This program will process {} data sets.".format( len(data_name_list))) print("They are: ", data_name_list) print() for fname in data_name_list: print("processing data {}".format(fname)) # prepare data X_train, y_train, X_test, y_test = ucr.load_ucr_flat( fname, in_data_dir_root) distr_train = utils.distribute_dataset(X_train, y_train) distr_test = utils.distribute_dataset(X_test, y_test) # split test set to validation set X_valid, y_valid = [], [] X_test_new, y_test_new = [], [] for key in distr_train.keys(): num_tr = distr_train[key].shape[0] inds_te = np.arange(distr_test[key].shape[0]) np.random.shuffle(inds_te) X_valid_temp = distr_test[key][inds_te[:num_tr]] X_valid.append(X_valid_temp) y_valid.append(np.ones([X_valid_temp.shape[0], 1], int) * key) X_test_new_temp = distr_test[key][inds_te[num_tr:]] X_test_new.append(X_test_new_temp) y_test_new.append( np.ones([X_test_new_temp.shape[0], 1], int) * key) X_valid = np.concatenate(X_valid, axis=0) y_valid = np.concatenate(y_valid, axis=0) X_test_new = np.concatenate(X_test_new, axis=0) y_test_new = np.concatenate(y_test_new, axis=0) # pack data set in UCR format trainset = np.concatenate([y_train[:, np.newaxis], X_train], axis=1) validset = np.concatenate([y_valid, X_valid], axis=1) testset = np.concatenate([y_test_new, X_test_new], axis=1) # make dir for specific data set out_path = os.path.join(out_data_dir_root, fname) if os.path.exists(out_path): shutil.rmtree(out_path) os.makedirs(out_path) # output data to file str_fmt = '%path,' + '%.4f,' * (X_train.shape[1]) str_fmt = str_fmt[:(len(str_fmt) - 1)] np.savetxt(os.path.join(out_path, '{}_TRAIN'.format(fname)), trainset, fmt=str_fmt, delimiter=',') np.savetxt(os.path.join(out_path, '{}_TEST'.format(fname)), testset, fmt=str_fmt, delimiter=',') np.savetxt(os.path.join(out_path, '{}_VALID'.format(fname)), validset, fmt=str_fmt, delimiter=',')
from data import ucr import os import numpy as np import pandas as pd if __name__ == '__main__': dir_data = '../../dataset/UCR_TS_Archive_2015' dir_out = 'result' res = {'dataset': [], 'mean': [], 'std': []} data_name_list = ucr.get_data_name_list(dir_data) for data_name in data_name_list: x_tr, y_tr, x_te, y_te, n_classes = ucr.load_ucr_flat( data_name, dir_data) x = np.vstack([x_tr, x_te]) mean = np.round(np.mean(np.mean(x, axis=1)), 2) std = np.round(np.mean(np.std(x, axis=1)), 2) res['dataset'].append(data_name) res['mean'].append(mean) res['std'].append(std) df = pd.DataFrame(res) df.to_csv(os.path.join(dir_out, 'check_znorm.csv'), index=False)