def prepare_data(augment_iter=0): X = [] y = [] for i in range(n_classes): if i == n_classes - 1: char = 'None' else: char = vocabulary[i] res_x = pickle.load(open(root_path + char + ".pkl", 'rb')) res_y = np.tile(i, (len(res_x), 1)).tolist() X += res_x y += res_y X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y) X_train, y_train = augment_data(X_train, y_train, iterations=augment_iter) # add features and normalize data pen_up = [] for i in range(len(X_train)): sequence = np.asarray(X_train[i]) pen_up.append(sequence[:, 2]) sequence = sequence[:, 0:2] sequence = add_features(sequence) X_train[i] = sequence data_scaler.fit(np.vstack(X_train)) for i in range(len(X_train)): sequence = np.asarray(X_train[i]) sequence = data_scaler.transform(sequence) X_train[i] = np.column_stack((sequence, pen_up[i])) for i in range(len(X_test)): sequence = np.asarray(X_test[i]) pen_up = sequence[:, 2] sequence = sequence[:, 0:2] sequence = add_features(sequence) sequence = data_scaler.transform(sequence) X_test[i] = np.column_stack((sequence, pen_up)) return X_train, X_test, y_train, y_test
def predict(sess, model): # prediction for sample observer = DataObserver("demo.log") print("Real-time prediction started.") while True: new_entry = observer.step() if new_entry != None: sequence = np.asarray(new_entry) pen_up = sequence[:, 2] sequence = sequence[:, 0:2] sequence = add_features(sequence) if CONFIG.use_normalization: sequence = data_scaler.transform(sequence) sequence = np.column_stack((sequence, pen_up)).tolist() #get my prediction output = model.predict(sess, [sequence]) prediction = np.argmax(output[0], axis=0) if prediction < len(vocabulary): print("Input detected: " + str(vocabulary[prediction]) + ", Probabilities: " + str(output[0])) time.sleep(0.05)
def prepare_data(pad_length=False): X = [] y = [] for i in range(CONFIG.n_classes): if i == CONFIG.n_classes-1: char = 'None' else: char = vocabulary[i] res_x = pickle.load(open(CONFIG.root_path + char + ".pkl", 'rb')) res_y = np.tile(np.eye(CONFIG.n_classes)[i], (len(res_x), 1)).tolist() X += res_x y += res_y X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) if CONFIG.use_augmentation: X_train, y_train = augment_data(X_train, y_train, iterations=CONFIG.augment_iter) # add features and normalize data to 0 mean and unit variance pen_up = [] for i in range(len(X_train)): sequence = np.asarray(X_train[i]) pen_up.append(sequence[:, 2]) sequence = sequence[:, 0:2] sequence = add_features(sequence) if CONFIG.use_normalization: X_train[i] = sequence else: X_train[i] = np.column_stack((sequence, pen_up[i])).tolist() if CONFIG.use_normalization: global data_scaler data_scaler.fit(np.vstack(X_train)) for i in range(len(X_train)): sequence = np.asarray(X_train[i]) sequence = data_scaler.transform(sequence) X_train[i] = np.column_stack((sequence, pen_up[i])).tolist() for i in range(len(X_test)): sequence = np.asarray(X_test[i]) pen_up = sequence[:, 2] sequence = sequence[:, 0:2] sequence = add_features(sequence) if CONFIG.use_normalization: sequence = data_scaler.transform(sequence) X_test[i] = np.column_stack((sequence, pen_up)).tolist() # # dimensionality reduction with PCA # pca = PCA(n_components=8) # pca.fit(np.vstack(X_train)) # # for i in range(len(X_train)): # X_train[i] = pca.transform(X_train[i]).tolist() # # for i in range(len(X_test)): # X_test[i] = pca.transform(X_test[i]).tolist() # plot pca result # fig, ax1 = plt.subplots() # ax1.plot() # ax1.set_xlabel('components') # ax1.set_ylabel('variance percentage') # ax1.plot(range(1, len(pca.explained_variance_ratio_) + 1), pca.explained_variance_ratio_, color='tab:blue') # plt.show() if pad_length: max_seqLen = max(len(max(X_train, key=len)), len(max(X_test, key=len))) # Pad sequences for dimension consistency padding_mask = np.zeros(CONFIG.n_features).tolist() for i in range(len(X_train)): X_train[i] += [padding_mask for _ in range(max_seqLen - len(X_train[i]))] for i in range(len(X_test)): X_test[i] += [padding_mask for _ in range(max_seqLen - len(X_test[i]))] return X_train, X_test, y_train, y_test
if len(char_idx) > 1: char_idx = list(range(char_idx[0], char_idx[-1] + 1)) label = index_and_label[1].replace(' ', '').replace( '"', '').replace('\n', '') sequence = [] for idx in char_idx: sequence.extend(strokes[idx]) sequence.extend([[0., 0., 1.]]) # add features sequence = np.asarray(sequence) pen_up.append(sequence[:, 2]) sequence = sequence[:, 0:2] sequence = add_features(sequence) onehot_label = np.zeros(len(vocabulary) + 1) if label.upper() in vocabulary: X.append(sequence) onehot_label[vocabulary.index(label.upper())] = 1. y.append(onehot_label) else: X.append(sequence) onehot_label[-1] = 1. y.append(onehot_label) # normalize data and add pen_up column unipen_data_scaler = StandardScaler() unipen_data_scaler.fit(np.vstack(X)) for i in range(len(X)):
def prepare_data(augment_iter=0): X = [] y = [] for i in range(n_classes): if i == n_classes - 1: char = 'None' else: char = vocabulary[i] res_x = pickle.load(open(root_path + char + ".pkl", 'rb')) res_y = np.tile(i, (len(res_x), 1)).tolist() X += res_x y += res_y X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y) X_train, y_train = augment_data(X_train, y_train, iterations=augment_iter) # add features and normalize data to 0 mean and unit variance pen_up = [] for i in range(len(X_train)): sequence = np.asarray(X_train[i]) pen_up.append(sequence[:, 2]) sequence = sequence[:, 0:2] sequence = add_features(sequence) X_train[i] = sequence data_scaler.fit(np.vstack(X_train)) for i in range(len(X_train)): sequence = np.asarray(X_train[i]) sequence = data_scaler.transform(sequence) X_train[i] = np.column_stack((sequence, pen_up[i])).tolist() for i in range(len(X_test)): sequence = np.asarray(X_test[i]) pen_up = sequence[:, 2] sequence = sequence[:, 0:2] sequence = add_features(sequence) sequence = data_scaler.transform(sequence) X_test[i] = np.column_stack((sequence, pen_up)).tolist() max_seqLen = max(len(max(X_train, key=len)), len(max(X_test, key=len))) # Pad sequences for dimension consistency padding_mask = np.zeros(n_features).tolist() for i in range(len(X_train)): X_train[i] += [ padding_mask for _ in range(max_seqLen - len(X_train[i])) ] for i in range(len(X_test)): X_test[i] += [padding_mask for _ in range(max_seqLen - len(X_test[i]))] # flat sequence X_train = np.asarray(X_train) shape = np.shape(X_train) X_train = np.reshape(X_train, (shape[0], shape[1] * shape[2])) X_test = np.asarray(X_test) shape = np.shape(X_test) X_test = np.reshape(X_test, (shape[0], shape[1] * shape[2])) return X_train, X_test, y_train, y_test