from rnn import *
import data

use_gpu(0) # -1:cpu; 0,1,2,..: gpu

e = 0.0
lr = 0.5
drop_rate = 0.
batch_size = 1000
hidden_size = [500]
# try: gru, lstm
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "adadelta" 

seqs, i2w, w2i, data_xy = data.word_sequence("/data/toy.txt", batch_size)
dim_x = len(w2i)
dim_y = len(w2i)
num_sents = data_xy[0][3]
print "#features = ", dim_x, "#labels = ", dim_y

print "compiling..."
model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate, num_sents)

print "training..."
start = time.time()
g_error = 9999.9999
for i in xrange(2000):
    error = 0.0
    in_start = time.time()
    for batch_id, xy in data_xy.items():
Example #2
0
from rnn import *
import data

use_gpu(0)  # -1:cpu; 0,1,2,..: gpu

e = 0.0
lr = 0.5
drop_rate = 0.
batch_size = 1000
hidden_size = [500]
# try: gru, lstm
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "adadelta"

seqs, i2w, w2i, data_xy = data.word_sequence("/data/toy.txt", batch_size)
dim_x = len(w2i)
dim_y = len(w2i)
num_sents = data_xy[0][3]
print "#features = ", dim_x, "#labels = ", dim_y

print "compiling..."
model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate, num_sents)

print "training..."
start = time.time()
g_error = 9999.9999
for i in xrange(2000):
    error = 0.0
    in_start = time.time()
    for batch_id, xy in data_xy.items():
print "initializing model"
model = RNN(embedding_size, embedding_size, hidden_size)

for _ in xrange(epoch):
  print "new epoch"
  error = 0.0
  in_start = time.time()
  for i in xrange(number_of_batch):
    print "batch: ", i
    training_files = files[i*batch_size : (i+1)*batch_size]

    for file in training_files:
      print "loading data from file: ", file
      data_x, data_y, w2v_model, unknown_word_vector = data.word_sequence(os.path.join(training_folder, file),
        w2v_model,
        unknown_word_vector,
        embedding_size)

      X, mask_X = data_x
      Y, mask_Y = data_y

      print "training"
      cost, sents = model.train(X, mask_X, Y, mask_Y, lr)
      error += cost

      print "decoding to text"
      for s in xrange(int(sents.shape[1] / embedding_size)):
        xs = sents[:, s * embedding_size : (s + 1) * embedding_size]
        for w_i in xrange(xs.shape[0]):
          vector = xs[w_i, :]
          w, score = w2v_model.similar_by_vector(vector, topn=1)[0]
Example #4
0
import data
import test_data




def predict():
	pass



e = 0.01   #error
lr = 0.5
drop_rate = 0.
batch_size = 1   #must be same as the loading model's batch size
hidden_size = [500]
# try: gru, lstm
cell = "gru"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam
optimizer = "adadelta" 


test_path = "data/test.txt"
test_data = data.word_sequence(test_path, batch_size, left)

i2w, w2i = load_data_dic("data/i2w.pkl", "data/w2i.pkl")

dim_x = len(w2i)
dim_y = len(w2i)

num_sents = batch_size
parser.add_argument("--model")

args = parser.parse_args(sys.argv[1:])
use_gpu(args.gpu)  # -1:cpu; 0,1,2,..: gpu

e = 0.01
lr = 0.1
drop_rate = 0.
batch_size = args.bsize
hidden_size = [args.hid]
# try: gru, lstm
cell = "lstm"
# try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum
optimizer = "adadelta"

seqs, i2w, w2i, data_xy = data.word_sequence(args.path, batch_size)
dim_x = len(w2i)
dim_y = len(w2i)
num_sents = data_xy[0][3]
print "#features = ", dim_x, "#labels = ", dim_y

print "compiling..."
model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate, num_sents)
print "training..."
start = time.time()
g_error = 9999.9999
if args.model is None:
    for i in xrange(args.epoch):
        error = 0.0
        in_start = time.time()
        for batch_id, xy in data_xy.items():
Example #6
0
print "initializing model"
model = RNN(embedding_size, embedding_size, hidden_size)

for _ in xrange(epoch):
    print "new epoch"
    error = 0.0
    in_start = time.time()
    for i in xrange(number_of_batch):
        print "batch: ", i
        training_files = files[i * batch_size:(i + 1) * batch_size]

        for file in training_files:
            print "loading data from file: ", file
            data_x, data_y, w2v_model, unknown_word_vector = data.word_sequence(
                os.path.join(training_folder, file), w2v_model,
                unknown_word_vector, embedding_size)

            X, mask_X = data_x
            Y, mask_Y = data_y

            print "training"
            cost, sents = model.train(X, mask_X, Y, mask_Y, lr)
            error += cost

            print "decoding to text"
            for s in xrange(int(sents.shape[1] / embedding_size)):
                xs = sents[:, s * embedding_size:(s + 1) * embedding_size]
                for w_i in xrange(xs.shape[0]):
                    vector = xs[w_i, :]
                    w, score = w2v_model.similar_by_vector(vector, topn=1)[0]