"RefrigerationDevices", "ScreenType", "ShapeletSim", "ShapesAll", "SmallKitchenAppliances", "SonyAIBORobotSurface1", "SonyAIBORobotSurface2", "StarLightCurves", "Strawberry", "SwedishLeaf", "Symbols", "SyntheticControl", "ToeSegmentation1", "ToeSegmentation2", "Trace", "TwoLeadECG", "TwoPatterns", "UWaveGestureLibraryAll", "UWaveGestureLibraryX", "UWaveGestureLibraryY", "UWaveGestureLibraryZ", "Wafer", "Wine", "WordSynonyms", "Worms", "WormsTwoClass", "Yoga", "ACSF1", "AllGestureWiimoteX", "AllGestureWiimoteY", "AllGestureWiimoteZ", "BME", "Chinatown", "Crop", "DodgerLoopDay", "DodgerLoopGame", "DodgerLoopWeekend", "EOGHorizontalSignal", "EOGVerticalSignal", "EthanolLevel", "FreezerRegularTrain", "FreezerSmallTrain", "Fungi", "GestureMidAirD1", "GestureMidAirD2", "GestureMidAirD3", "GesturePebbleZ1", "GesturePebbleZ2", "GunPointAgeSpan", "GunPointMaleVersusFemale", "GunPointOldVersusYoung", "HouseTwenty", "InsectEPGRegularTrain", "InsectEPGSmallTrain", "MelbournePedestrian", "MixedShapesRegularTrain", "MixedShapesSmallTrain", "PickupGestureWiimoteZ", "PigAirwayPressure", "PigArtPressure", "PigCVP", "PLAID", "PowerCons","Rock","SemgHandGenderCh2", "SemgHandMovementCh2","SemgHandSubjectCh2","ShakeGestureWiimoteZ","SmoothSubspace","UMD"] total = 0 for i, dataset in enumerate(datasets): args.dataset = dataset nb_class = ds.nb_classes(args.dataset) nb_dims = ds.nb_dims(args.dataset) # Load data x_train, y_train, x_test, y_test = get_datasets(args) nb_timesteps = int(x_train.shape[1] / nb_dims) input_shape = (nb_timesteps , nb_dims) # Process data x_test = x_test.reshape((-1, input_shape[0], input_shape[1])) x_train = x_train.reshape((-1, input_shape[0], input_shape[1])) y_test = to_categorical(ds.class_offset(y_test, args.dataset), nb_class) y_train = to_categorical(ds.class_offset(y_train, args.dataset), nb_class)
def load_ucr2018(dataset_path, dataset_name): ################## # load raw data ################## nb_class = ds.nb_classes(dataset_name) nb_dims = ds.nb_dims(dataset_name) if dataset_name in ['MFPT', 'XJTU']: x = np.load("{}/{}/{}_data.npy".format(dataset_path, dataset_name, dataset_name)) y = np.load("{}/{}/{}_label.npy".format(dataset_path, dataset_name, dataset_name)) (x_train, x_test) = (x[:100], x[100:]) (y_train, y_test) = (y[:100], y[100:]) elif dataset_name in ['EpilepticSeizure']: data_x, data_y = get_EpilepticSeizure(dataset_path, dataset_name) (x_train, x_test) = (data_x[:int(0.5 * data_x.shape[0])], data_x[int(0.5 * data_x.shape[0]):]) (y_train, y_test) = (data_y[:int(0.5 * data_x.shape[0])], data_y[int(0.5 * data_x.shape[0]):]) else: x_train, y_train, x_test, y_test = TSC_data_loader( dataset_path, dataset_name) nb_timesteps = int(x_train.shape[1] / nb_dims) input_shape = (nb_timesteps, nb_dims) ############################################ # Combine all train and test data for resample ############################################ x_all = np.concatenate((x_train, x_test), axis=0) y_all = np.concatenate((y_train, y_test), axis=0) ts_idx = list(range(x_all.shape[0])) np.random.shuffle(ts_idx) x_all = x_all[ts_idx] y_all = y_all[ts_idx] label_idxs = np.unique(y_all) class_stat_all = {} for idx in label_idxs: class_stat_all[idx] = len(np.where(y_all == idx)[0]) print("[Stat] All class: {}".format(class_stat_all)) test_idx = [] val_idx = [] train_idx = [] for idx in label_idxs: target = list(np.where(y_all == idx)[0]) nb_samp = int(len(target)) test_idx += target[:int(nb_samp * 0.2)] val_idx += target[int(nb_samp * 0.2):int(nb_samp * 0.4)] train_idx += target[int(nb_samp * 0.4):] x_test = x_all[test_idx] y_test = y_all[test_idx] x_val = x_all[val_idx] y_val = y_all[val_idx] x_train = x_all[train_idx] y_train = y_all[train_idx] label_idxs = np.unique(y_train) class_stat = {} for idx in label_idxs: class_stat[idx] = len(np.where(y_train == idx)[0]) # print("[Stat] Train class: {}".format(class_stat)) print("[Stat] Train class: mean={}, std={}".format( np.mean(list(class_stat.values())), np.std(list(class_stat.values())))) label_idxs = np.unique(y_val) class_stat = {} for idx in label_idxs: class_stat[idx] = len(np.where(y_val == idx)[0]) # print("[Stat] Test class: {}".format(class_stat)) print("[Stat] Val class: mean={}, std={}".format( np.mean(list(class_stat.values())), np.std(list(class_stat.values())))) label_idxs = np.unique(y_test) class_stat = {} for idx in label_idxs: class_stat[idx] = len(np.where(y_test == idx)[0]) # print("[Stat] Test class: {}".format(class_stat)) print("[Stat] Test class: mean={}, std={}".format( np.mean(list(class_stat.values())), np.std(list(class_stat.values())))) ######################################## # Data Split End ######################################## # Process data x_test = x_test.reshape((-1, input_shape[0], input_shape[1])) x_val = x_val.reshape((-1, input_shape[0], input_shape[1])) x_train = x_train.reshape((-1, input_shape[0], input_shape[1])) print("Train:{}, Test:{}, Class:{}".format(x_train.shape, x_test.shape, nb_class)) # Normalize x_train_max = np.max(x_train) x_train_min = np.min(x_train) x_train = 2. * (x_train - x_train_min) / (x_train_max - x_train_min) - 1. # Test is secret x_val = 2. * (x_val - x_train_min) / (x_train_max - x_train_min) - 1. x_test = 2. * (x_test - x_train_min) / (x_train_max - x_train_min) - 1. return x_train, y_train, x_val, y_val, x_test, y_test, nb_class, class_stat_all