def preprocess(X_train,X_test,y_train,y_test,fac,std=True): if std: X_train,X_test=standardize(X_train,X_test) nb_classes = len(np.unique(np.concatenate((y_train,y_test),axis =0))) classes = np.unique(y_train) le = LabelEncoder() y_ind = le.fit_transform(y_train.ravel()) recip_freq = len(y_train) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64)) class_weight = recip_freq[le.transform(classes)] # make the min to zero of labels y_train,y_test = transform_labels(y_train,y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) # transform the labels from integers to one hot vectors enc = sklearn.preprocessing.OneHotEncoder(categories='auto') enc.fit(np.concatenate((y_train,y_test),axis =0).reshape(-1,1)) y_train = enc.transform(y_train.reshape(-1,1)).toarray() y_test = enc.transform(y_test.reshape(-1,1)).toarray() X_train = X_train.reshape((X_train.shape[0],X_train.shape[2]//fac,X_train.shape[1]*fac)) X_test = X_test.reshape((X_test.shape[0],X_test.shape[2]//fac,X_test.shape[1]*fac)) return X_train,X_test,y_train,y_test,y_true,nb_classes,class_weight
def prepare_data(): x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] # unique class in y_train and y_test nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) y_true_train = y_train.astype(np.int64) # transform the labels from integers to one hot vectors enc = sklearn.preprocessing.OneHotEncoder() enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1)) y_train = enc.transform(y_train.reshape(-1, 1)).toarray() y_test = enc.transform(y_test.reshape(-1, 1)).toarray() if len(x_train.shape) == 2: x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc
def read_data_from_dataset(use_init_clusters=True): x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) classes, classes_counts = np.unique(y_train, return_counts=True) if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) # maximum number of prototypes which is the minimum count of a class max_prototypes = min(classes_counts.max() + 1, MAX_PROTOTYPES_PER_CLASS + 1) init_clusters = None if use_init_clusters == True: # set the array that contains the initial clusters for k-means init_clusters = get_random_initial_for_kmeans(x_train, y_train, max_prototypes, nb_classes) return x_train, y_train, x_test, y_test, nb_classes, classes, max_prototypes, init_clusters
def fit_classifier(): x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) # transform the labels from integers to one hot vectors enc = sklearn.preprocessing.OneHotEncoder() enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1)) y_train = enc.transform(y_train.reshape(-1, 1)).toarray() y_test = enc.transform(y_test.reshape(-1, 1)).toarray() if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) input_shape = x_train.shape[1:] classifier = create_classifier(classifier_name, input_shape, nb_classes, output_directory) classifier.fit(x_train, y_train, x_test, y_test, y_true)
def read_data_from_dataset(use_init_clusters=True): dataset_out_dir = root_dir_output + archive_name + '/' + dataset_name + '/' x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) classes, classes_counts = np.unique(y_train, return_counts=True) if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) # maximum number of prototypes which is the minimum count of a class max_prototypes = min(classes_counts.max() + 1, MAX_PROTOTYPES_PER_CLASS + 1) init_clusters = None return x_train, y_train, x_test, y_test, nb_classes, classes, max_prototypes, init_clusters
def fit_classifier(test_spec=False): x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] nb_classes = len(np.unique(np.concatenate((y_train,y_test),axis =0))) print(nb_classes) # make the min to zero of labels print(y_train.shape) y_train,y_test = transform_labels(y_train,y_test) print(y_train.shape) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) print("Y_TREU !!!!!: {} {}".format(y_true, y_true.shape)) # transform the labels from integers to one hot vectors if nb_classes<=2: nb_classes=1 y_train=y_train.reshape(-1,1) y_test=y_test.reshape(-1,1) y_true=y_true.reshape(-1,1) else: enc = sklearn.preprocessing.OneHotEncoder() enc.fit(np.concatenate((y_train,y_test),axis =0).reshape(-1,1)) y_train = enc.transform(y_train.reshape(-1,1)).toarray() y_test = enc.transform(y_test.reshape(-1,1)).toarray() print(y_train.shape) if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0],x_train.shape[1],1)) x_test = x_test.reshape((x_test.shape[0],x_test.shape[1],1)) print("Data preparationis finished with:",x_train.shape, y_train.shape) input_shape = x_train.shape[1:] classifier = create_classifier(classifier_name,input_shape, nb_classes, output_directory) if test_spec: classifier.fit(x_train,y_train,x_test,y_test, y_true) else: classifier.fit(x_train,y_train,x_test,y_test, y_true)
def fit_classifier(): x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] if NORMALIZE: for chan in range(x_train.shape[2]): scaler = MinMaxScaler() scaler.fit(x_train[:, :, chan]) x_train[:, :, chan] = scaler.transform(x_train[:, :, chan]) x_test[:, :, chan] = scaler.transform(x_test[:, :, chan]) nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) # transform the labels from integers to one hot vectors enc = sklearn.preprocessing.OneHotEncoder() enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1)) y_train = enc.transform(y_train.reshape(-1, 1)).toarray() y_test = enc.transform(y_test.reshape(-1, 1)).toarray() if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) input_shape = x_train.shape[1:] classifier = create_classifier(classifier_name, input_shape, nb_classes, output_directory_name) start_time = time.time() res = classifier.fit(x_train, y_train, x_test, y_test, y_true) total_time = time.time() - start_time return res, total_time
def prepare_data(classification): x_train = datasets_dict[dataset_name][0] y_train = datasets_dict[dataset_name][1] x_test = datasets_dict[dataset_name][2] y_test = datasets_dict[dataset_name][3] nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels if classification: y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) y_true_train = y_train.astype(np.int64) # transform the labels from integers to one hot vectors enc = sklearn.preprocessing.OneHotEncoder() enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1)) if classification: ''' y_train = enc.transform(y_train.reshape(-1, 1)).toarray() print(y_train) y_test = enc.transform(y_test.reshape(-1, 1)).toarray() ''' T = 100 n = len(y_train) a = 1 print(a) y_train = np.array([np.maximum(np.zeros(100), a-np.abs(np.asarray(range(T))+1-y_train[i])) for i in range(n)]) if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc
def train(pre_model=None): # read train, val and test sets x_train = datasets_dict[dataset_name_tranfer][0] y_train = datasets_dict[dataset_name_tranfer][1] y_true_val = None y_pred_val = None x_test = datasets_dict[dataset_name_tranfer][-2] y_test = datasets_dict[dataset_name_tranfer][-1] mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) # transform the labels from integers to one hot vectors y_train = keras.utils.to_categorical(y_train, nb_classes) y_test = keras.utils.to_categorical(y_test, nb_classes) if len(x_train.shape) == 2: # if univariate # add a dimension to make it multivariate with one dimension x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) start_time = time.time() # remove last layer to replace with a new one input_shape = (None, x_train.shape[2]) model = build_model(input_shape, nb_classes, pre_model) pre_model = None if verbose == True: model.summary() # b = model.layers[1].get_weights() hist = model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, verbose=verbose, validation_data=(x_test, y_test), callbacks=callbacks) # a = model.layers[1].get_weights() # compare_weights(a,b) model = keras.models.load_model(file_path) y_pred = model.predict(x_test) # convert the predicted from binary to integer y_pred = np.argmax(y_pred, axis=1) duration = time.time() - start_time df_metrics = save_logs(write_output_dir, hist, y_pred, y_true, duration, y_true_val, y_pred_val) print(df_metrics) keras.backend.clear_session()
pattern_len = pattern_len_ nb_classes = len(pattern_len) * len(pattern_pos) for ts_len in ts_lens: for ts_n in ts_ns: x_train, y_train, x_test, y_test = create_synthetic_dataset( pattern_len=pattern_len, pattern_pos=pattern_pos, ts_len=ts_len, ts_n=ts_n) # make the min to zero of labels y_train, y_test = transform_labels(y_train, y_test) # save orignal y because later we will use binary y_true = y_test.astype(np.int64) y_true_train = y_train.astype(np.int64) # transform the labels from integers to one hot vectors enc = sklearn.preprocessing.OneHotEncoder() enc.fit( np.concatenate((y_train, y_test), axis=0).reshape(-1, 1)) y_train = enc.transform(y_train.reshape(-1, 1)).toarray() y_test = enc.transform(y_test.reshape(-1, 1)).toarray() if len(x_train.shape) == 2: # if uni-variate add a dimension to make it multivariate with one dimension x_train = x_train.reshape(
csi_train_label = csi_train_label[index] # 训练conv网络时,需要reshape成为4维 # csi_train_data = csi_train_data.reshape((sample_count, sequence_max_len, input_feature, 1)) # 划分训练集和测试集 train, test, train_label, test_label = train_test_split(csi_train_data, csi_train_label, test_size=0.3) # 计算类别数量 num_class = len( np.unique(np.concatenate((train_label, test_label), axis=0))) # make the min to zero of labels train_label, test_label = transform_labels(train_label, test_label) # save orignal y because later we will use binary y_true = test_label.astype(np.int64) # transform the labels from integers to one hot vectors train_label = keras.utils.to_categorical(train_label, num_class) test_label = keras.utils.to_categorical(test_label, num_class) # build model # model = bilstm_model(sequence_max_len=sequence_max_len, # input_feature=input_feature, # dropout_rate=dropout_rate, # num_class=num_class, # hidden_unit_num=hidden_unit_num) # model = bilstm_crf_model(sequence_max_len, input_feature, dropout_rate, num_class, hidden_unit_num)