Ejemplo n.º 1
0
# # full_data_len = 190363

# full_data_len = 10

x_path = "data/t-post.txt"
x_tag_path = "data/t-post-tag.txt"
y_path = "data/t-response.txt"
y_tag_path = "data/t-response-tag.txt"
# test_path = "data/toy2.txt"


threshold = 0


# _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1)
X_seqs, y_seqs, y_tag_seqs, i2w, w2i, t2i, i2t, tf, data_x_y = get_data.processing(x_path, y_path, x_tag_path, y_tag_path, threshold, 0, 4, batch_size)
# test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size)

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
dim_tag = len(t2i)
num_sents = batch_size


print "save data dic..."
save_data_dic("data/i2w-test-t.pkl", "data/w2i-test-t.pkl", i2w, w2i)
save_data_dic("data/i2t-test-t.pkl", "data/t2i-test-t.pkl", i2t, t2i)
Ejemplo n.º 2
0
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "adadelta" 

x_path = "data/10000X.txt"
y_left_path = "data/10000left.txt"
y_right_path = "data/10000right.txt"


# x_path = "data/post.txt"
# y_left_path = "data/left_r.txt"
# y_right_path = "data/right_r.txt"

threshold = 1

xs, yls, yrs, i2w, w2i, tf, data_4 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 4, 5, 1)
xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t1 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10000, 10001, 1)
xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t2 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10001, 10002, 1)
# assert len(data_190353) == 1

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
num_sents = batch_size


print "save data dic..."
save_data_dic("data/i2w10000.pkl", "data/w2i10000.pkl", i2w, w2i)
Ejemplo n.º 3
0
# try: gru, lstm
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "sgd"

x_path = "data/nba/basketball-post_0527.txt"
x_tag_path = "data/nba/basketball-post_tag_0527.txt"
y_path = "data/nba/basketball-response_0527.txt"
y_tag_path = "data/nba/basketball-response-tag_0527.txt"
# test_path = "data/toy2.txt"
# test_path = "data/test-100.post"

threshold = 0

# _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1)
X_seqs, y_seqs, y_tag_seqs, i2w, w2i, t2i, i2t, tf, data_x_y = get_data.processing(
    x_path, y_path, x_tag_path, y_tag_path, threshold, 0, 4000, batch_size)
# test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size)

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
dim_tag = len(t2i)
num_sents = batch_size

print "save data dic..."
save_data_dic("data/nba/i2w-test-t.pkl", "data/nba/w2i-test-t.pkl", i2w, w2i)
save_data_dic("data/nba/i2t-test-t.pkl", "data/nba/t2i-test-t.pkl", i2t, t2i)

print "#features = ", dim_x, "#labels = ", dim_y
read_data_batch = 8000
# full_data_len = 190363

full_data_len =10000
hidden_size = [100,200]
# try: gru, lstm
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "adadelta" 

x_path = "data/SMT-train-8000.post"
y_path = "data/SMT-train-8000.response"

threshold = 0

xs, ys, i2w, w2i, tf, data_x_y = get_data.processing(x_path, y_path, threshold)
# txs, tys, data_tx_ty = get_data.test_processing

# print "#dic = " + str(len(w2i))
# # print "unknown = " + str(tf["<UNknown>"])

# dim_x = len(w2i)
# dim_y = len(w2i)
# num_sents = batch_size


# print "save data dic..."
# save_data_dic("data/i2w10000.pkl", "data/w2i10000.pkl", i2w, w2i)

# print "#features = ", dim_x, "#labels = ", dim_y
x_path = "data/10000X.txt"
y_left_path = "data/10000left.txt"
y_right_path = "data/10000right.txt"


# x_path = "data/post.txt"
# y_left_path = "data/left_r.txt"
# y_right_path = "data/right_r.txt"

threshold = 0

# xs, yls, yrs, i2w, w2i, tf, data_4 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 4, 5, 1)
# xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t1 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10000, 10001, 1)
# xs4, yls4, yrs4, i2w4, w2i4, tf4, data_t2 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 10001, 10002, 1)
# assert len(data_190353) == 1
xs, yls, yrs, i2w, w2i, tf, data_4 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 9902, 10002, batch_size)

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
num_sents = batch_size


print "save data dic..."
# save_data_dic("data/i2w10000.pkl", "data/w2i10000.pkl", i2w, w2i)

print "#features = ", dim_x, "#labels = ", dim_y

print "compiling..."
# batch_size = 10
# read_data_batch = 10
# # full_data_len = 190363

# full_data_len = 10

# x_path = "data/toy2.txt"
# y_path = "data/toy3.txt"
# test_path = "data/toy2.txt"


threshold = 1


_, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1)

test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size)

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
num_sents = batch_size


print "save data dic..."
save_data_dic("data/i2w8000.pkl", "data/w2i8000.pkl", i2w, w2i)

print "#features = ", dim_x, "#labels = ", dim_y
read_data_batch = 500
full_data_len = 190363
hidden_size = [200]
# try: gru, lstm
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "adadelta" 


x_path = "data/10000X.txt"
y_left_path = "data/10000r.txt"
y_right_path = "data/10000l.txt"

threshold = 0

xs, yls, yrs, i2w, w2i, tf, data_x_yl_yr = get_data.processing(x_path, y_left_path, y_right_path, threshold, 0, 10000, batch_size)
xs4, yls4, yrs4, i2w4, w2i4, tf4, data_49522 = get_data.processing(x_path, y_left_path, y_right_path, threshold, 49522, 49523, batch_size)
xs4, yls4, yrs4, i2w4, w2i4, tf4, data_49540 = get_data.processing("data/post.txt", "data/left_r.txt", "data/right_r.txt", threshold, 49540, 49540, batch_size)

# assert len(data_190353) == 1

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
num_sents = batch_size


print "save data dic..."
save_data_dic("data/i2w.pkl", "data/w2i.pkl", i2w, w2i)
# batch_size = 10
# read_data_batch = 10
# # full_data_len = 190363

# full_data_len = 10

# x_path = "data/toy2.txt"
# y_path = "data/toy3.txt"
# test_path = "data/toy2.txt"


threshold = 0


# _, _, i2w, w2i, tf, _ = get_data.processing(x_path, y_path, threshold, 0, 1, 1)
X_seqs, y_seqs, i2w, w2i, tf, data_x_y = get_data.processing(x_path, y_path, threshold, 0, 200, batch_size)
# test_data_x_y = get_data.test_processing(test_path, i2w, w2i, batch_size)

print "#dic = " + str(len(w2i))
# print "unknown = " + str(tf["<UNknown>"])

dim_x = len(w2i)
dim_y = len(w2i)
num_sents = batch_size

print "#features = ", dim_x, "#labels = ", dim_y

print "compiling..."
model = FANN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate, num_sents)
load_model("data/GRU-200_best.model", model)