def dump_data_for_nn():
    accu_dict, reverse_accu_dict = generator.read_accu()
    word_dict, embedding, reverse_dictionary = generator.get_dictionary_and_embedding(
    )

    print("reading data from training set...")
    train_data_x, train_data_y = generator.read_data_in_imprisonment_format(
        constant.DATA_TRAIN, embedding, word_dict, accu_dict)
    valid_data_x, valid_data_y = generator.read_data_in_imprisonment_format(
        constant.DATA_VALID, embedding, word_dict, accu_dict)
    print("reading complete!")

    # 随机打乱数据
    permutation_for_train = np.random.permutation(train_data_x.shape[0])
    train_data_x = train_data_x[permutation_for_train, :]
    train_data_y = train_data_y[permutation_for_train]

    permutation_for_valid = np.random.permutation(valid_data_y.shape[0])
    valid_data_x = valid_data_x[permutation_for_valid, :]
    valid_data_y = valid_data_y[permutation_for_valid, :]

    with open('./dump_data/nn/dump_train_x.txt', 'wb') as f:
        pickle.dump(train_data_x, f)

    with open('./dump_data/nn/dump_train_y_label.txt', 'wb') as f:
        pickle.dump(train_data_y, f)

    with open('./dump_data/nn/dump_valid_x.txt', 'wb') as f:
        pickle.dump(valid_data_x, f)

    with open('./dump_data/nn/dump_valid_y_label.txt', 'wb') as f:
        pickle.dump(valid_data_y, f)

    print("dump complete!")
예제 #2
0
    with open('./dump_data/nn/dump_train_y_label.txt', 'rb') as f:
        train_data_y = pickle.load(f)

    with open('./dump_data/nn/dump_valid_x.txt', 'rb') as f:
        valid_data_x = pickle.load(f)

    with open('./dump_data/nn/dump_valid_y_label.txt', 'rb') as f:
        valid_data_y = pickle.load(f)
except:
    print(
        "No dump file read original file! Please wait... "
        "If u want to accelerate this process, please see read_me -> transform_data_to_feature_and_dump"
    )
    accu_dict, reverse_accu_dict = generator.read_accu()
    word_dict, embedding, reverse_dictionary = generator.get_dictionary_and_embedding(
    )

    train_data_x, train_data_y = generator.read_data_in_accu_format(
        constant.DATA_TRAIN, embedding, word_dict, accu_dict, one_hot=True)
    valid_data_x, valid_data_y = generator.read_data_in_accu_format(
        constant.DATA_VALID, embedding, word_dict, accu_dict, one_hot=True)

print("reading complete!")

# just test generate_accu_batch
x, y = generator.generate_batch(training_batch_size, train_data_x,
                                train_data_y)
print(x.shape)

print("data load complete")
print("The model begin here")
예제 #3
0
def dump_data_for_cnn():
    article_dict, reverse_article_dict = generator.read_article()
    word_dict, embedding, reverse_dictionary = generator.get_dictionary_and_embedding(
    )

    print("reading data from training set...")
    train_data_x, train_data_y = matrix_generator.read_data_in_article_format_with_accu(
        constant.DATA_TRAIN,
        embedding,
        10,
        word_dict,
        article_dict,
        one_hot=True)
    valid_data_x, valid_data_y = matrix_generator.read_data_in_article_format_with_accu(
        constant.DATA_VALID,
        embedding,
        10,
        word_dict,
        article_dict,
        one_hot=True)
    test_data_x, test_data_y = matrix_generator.read_data_in_article_format_with_accu(
        constant.DATA_TEST,
        embedding,
        10,
        word_dict,
        article_dict,
        one_hot=True)
    print("reading complete!")

    # 随机打乱数据
    permutation_for_train = np.random.permutation(train_data_x.shape[0])
    train_data_x = train_data_x[permutation_for_train, :]
    train_data_y = train_data_y[permutation_for_train]

    permutation_for_valid = np.random.permutation(valid_data_y.shape[0])
    valid_data_x = valid_data_x[permutation_for_valid, :]
    valid_data_y = valid_data_y[permutation_for_valid, :]

    permutation_for_test = np.random.permutation(test_data_y.shape[0])
    test_data_x = test_data_x[permutation_for_test, :]
    test_data_y = test_data_y[permutation_for_test, :]

    with open('./dump_data/cnn/dump_train_x.txt', 'wb') as f:
        pickle.dump(train_data_x, f)

    with open('./dump_data/cnn/dump_train_y_label.txt', 'wb') as f:
        pickle.dump(train_data_y, f)

    with open('./dump_data/cnn/dump_valid_x.txt', 'wb') as f:
        pickle.dump(valid_data_x, f)

    with open('./dump_data/cnn/dump_valid_y_label.txt', 'wb') as f:
        pickle.dump(valid_data_y, f)

    with open('./dump_data/cnn/dump_test_x.txt', 'wb') as f:
        pickle.dump(test_data_x, f)

    with open('./dump_data/cnn/dump_test_y_label.txt', 'wb') as f:
        pickle.dump(test_data_y, f)

    print("dump complete!")