Ejemplo n.º 1
0
def prepare_data(augment_iter=0):
    X = []
    y = []

    for i in range(n_classes):
        if i == n_classes - 1:
            char = 'None'
        else:
            char = vocabulary[i]
        res_x = pickle.load(open(root_path + char + ".pkl", 'rb'))
        res_y = np.tile(i, (len(res_x), 1)).tolist()
        X += res_x
        y += res_y

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.15,
                                                        random_state=42,
                                                        stratify=y)

    X_train, y_train = augment_data(X_train, y_train, iterations=augment_iter)

    # add features and normalize data
    pen_up = []
    for i in range(len(X_train)):
        sequence = np.asarray(X_train[i])
        pen_up.append(sequence[:, 2])
        sequence = sequence[:, 0:2]
        sequence = add_features(sequence)
        X_train[i] = sequence

    data_scaler.fit(np.vstack(X_train))
    for i in range(len(X_train)):
        sequence = np.asarray(X_train[i])
        sequence = data_scaler.transform(sequence)
        X_train[i] = np.column_stack((sequence, pen_up[i]))

    for i in range(len(X_test)):
        sequence = np.asarray(X_test[i])
        pen_up = sequence[:, 2]
        sequence = sequence[:, 0:2]
        sequence = add_features(sequence)
        sequence = data_scaler.transform(sequence)
        X_test[i] = np.column_stack((sequence, pen_up))

    return X_train, X_test, y_train, y_test
Ejemplo n.º 2
0
def predict(sess, model):
    # prediction for sample
    observer = DataObserver("demo.log")
    print("Real-time prediction started.")
    while True:
        new_entry = observer.step()
        if new_entry != None:
            sequence = np.asarray(new_entry)
            pen_up = sequence[:, 2]
            sequence = sequence[:, 0:2]
            sequence = add_features(sequence)
            if CONFIG.use_normalization:
                sequence = data_scaler.transform(sequence)
            sequence = np.column_stack((sequence, pen_up)).tolist()

            #get my prediction
            output = model.predict(sess, [sequence])
            prediction = np.argmax(output[0], axis=0)

            if prediction < len(vocabulary):
                print("Input detected: " + str(vocabulary[prediction]) + ", Probabilities: " + str(output[0]))

        time.sleep(0.05)
Ejemplo n.º 3
0
def prepare_data(pad_length=False):
    X = []
    y = []

    for i in range(CONFIG.n_classes):
        if i == CONFIG.n_classes-1:
            char = 'None'
        else:
            char = vocabulary[i]
        res_x = pickle.load(open(CONFIG.root_path + char + ".pkl", 'rb'))
        res_y = np.tile(np.eye(CONFIG.n_classes)[i], (len(res_x), 1)).tolist()
        X += res_x
        y += res_y

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    if CONFIG.use_augmentation:
        X_train, y_train = augment_data(X_train, y_train, iterations=CONFIG.augment_iter)

    # add features and normalize data to 0 mean and unit variance
    pen_up = []
    for i in range(len(X_train)):
        sequence = np.asarray(X_train[i])
        pen_up.append(sequence[:, 2])
        sequence = sequence[:, 0:2]
        sequence = add_features(sequence)
        if CONFIG.use_normalization:
            X_train[i] = sequence
        else:
            X_train[i] = np.column_stack((sequence, pen_up[i])).tolist()

    if CONFIG.use_normalization:
        global data_scaler
        data_scaler.fit(np.vstack(X_train))

        for i in range(len(X_train)):
            sequence = np.asarray(X_train[i])
            sequence = data_scaler.transform(sequence)
            X_train[i] = np.column_stack((sequence, pen_up[i])).tolist()

    for i in range(len(X_test)):
        sequence = np.asarray(X_test[i])
        pen_up = sequence[:, 2]
        sequence = sequence[:, 0:2]
        sequence = add_features(sequence)
        if CONFIG.use_normalization:
            sequence = data_scaler.transform(sequence)
        X_test[i] = np.column_stack((sequence, pen_up)).tolist()

    # # dimensionality reduction with PCA
    # pca = PCA(n_components=8)
    # pca.fit(np.vstack(X_train))
    #
    # for i in range(len(X_train)):
    #     X_train[i] = pca.transform(X_train[i]).tolist()
    #
    # for i in range(len(X_test)):
    #     X_test[i] = pca.transform(X_test[i]).tolist()

    # plot pca result
    # fig, ax1 = plt.subplots()
    # ax1.plot()
    # ax1.set_xlabel('components')
    # ax1.set_ylabel('variance percentage')
    # ax1.plot(range(1, len(pca.explained_variance_ratio_) + 1), pca.explained_variance_ratio_, color='tab:blue')
    # plt.show()

    if pad_length:
        max_seqLen = max(len(max(X_train, key=len)), len(max(X_test, key=len)))
        # Pad sequences for dimension consistency
        padding_mask = np.zeros(CONFIG.n_features).tolist()
        for i in range(len(X_train)):
            X_train[i] += [padding_mask for _ in range(max_seqLen - len(X_train[i]))]

        for i in range(len(X_test)):
            X_test[i] += [padding_mask for _ in range(max_seqLen - len(X_test[i]))]

    return X_train, X_test, y_train, y_test
Ejemplo n.º 4
0
                    if len(char_idx) > 1:
                        char_idx = list(range(char_idx[0], char_idx[-1] + 1))

                    label = index_and_label[1].replace(' ', '').replace(
                        '"', '').replace('\n', '')

                    sequence = []
                    for idx in char_idx:
                        sequence.extend(strokes[idx])
                        sequence.extend([[0., 0., 1.]])

                    # add features
                    sequence = np.asarray(sequence)
                    pen_up.append(sequence[:, 2])
                    sequence = sequence[:, 0:2]
                    sequence = add_features(sequence)

                    onehot_label = np.zeros(len(vocabulary) + 1)
                    if label.upper() in vocabulary:
                        X.append(sequence)
                        onehot_label[vocabulary.index(label.upper())] = 1.
                        y.append(onehot_label)
                    else:
                        X.append(sequence)
                        onehot_label[-1] = 1.
                        y.append(onehot_label)

# normalize data and add pen_up column
unipen_data_scaler = StandardScaler()
unipen_data_scaler.fit(np.vstack(X))
for i in range(len(X)):
Ejemplo n.º 5
0
def prepare_data(augment_iter=0):
    X = []
    y = []

    for i in range(n_classes):
        if i == n_classes - 1:
            char = 'None'
        else:
            char = vocabulary[i]
        res_x = pickle.load(open(root_path + char + ".pkl", 'rb'))
        res_y = np.tile(i, (len(res_x), 1)).tolist()
        X += res_x
        y += res_y

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.15,
                                                        random_state=42,
                                                        stratify=y)

    X_train, y_train = augment_data(X_train, y_train, iterations=augment_iter)

    # add features and normalize data to 0 mean and unit variance
    pen_up = []
    for i in range(len(X_train)):
        sequence = np.asarray(X_train[i])
        pen_up.append(sequence[:, 2])
        sequence = sequence[:, 0:2]
        sequence = add_features(sequence)
        X_train[i] = sequence

    data_scaler.fit(np.vstack(X_train))
    for i in range(len(X_train)):
        sequence = np.asarray(X_train[i])
        sequence = data_scaler.transform(sequence)
        X_train[i] = np.column_stack((sequence, pen_up[i])).tolist()

    for i in range(len(X_test)):
        sequence = np.asarray(X_test[i])
        pen_up = sequence[:, 2]
        sequence = sequence[:, 0:2]
        sequence = add_features(sequence)
        sequence = data_scaler.transform(sequence)
        X_test[i] = np.column_stack((sequence, pen_up)).tolist()

    max_seqLen = max(len(max(X_train, key=len)), len(max(X_test, key=len)))
    # Pad sequences for dimension consistency
    padding_mask = np.zeros(n_features).tolist()
    for i in range(len(X_train)):
        X_train[i] += [
            padding_mask for _ in range(max_seqLen - len(X_train[i]))
        ]

    for i in range(len(X_test)):
        X_test[i] += [padding_mask for _ in range(max_seqLen - len(X_test[i]))]

    # flat sequence
    X_train = np.asarray(X_train)
    shape = np.shape(X_train)
    X_train = np.reshape(X_train, (shape[0], shape[1] * shape[2]))

    X_test = np.asarray(X_test)
    shape = np.shape(X_test)
    X_test = np.reshape(X_test, (shape[0], shape[1] * shape[2]))

    return X_train, X_test, y_train, y_test