def __init__(self, time_step, split, lr): self.dataset = Dataset(T=time_step, split_ratio=split, binary_file=config.BINARY_DATASET) self.policy_net_encoder = AttnEncoder( input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.policy_net_decoder = AttnDecoder( code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.policy_net = DQN(self.policy_net_encoder, self.policy_net_decoder) self.target_net_encoder = AttnEncoder( input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.target_net_decoder = AttnDecoder( code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.target_net = DQN(self.target_net_encoder, self.target_net_decoder) if torch.cuda.is_available(): self.policy_net_encoder = self.policy_net_encoder.cuda() self.policy_net_decoder = self.policy_net_decoder.cuda() self.target_net_encoder = self.target_net_encoder.cuda() self.target_net_decoder = self.target_net_decoder.cuda() self.policy_net = self.policy_net.cuda() self.target_net = self.target_net.cuda() self.memory = ReplayMemory(config.MEMORY_CAPACITY) self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=lr)
def __init__(self, driving, target, time_step, split, lr): self.dataset = Dataset(driving, target, time_step, split) self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) self.loss_func = nn.MSELoss() self.train_size, self.test_size = self.dataset.get_size()
def get_model(config): encoder = maybe_cuda(Encoder(config), cuda=config.cuda) if config.attn_method != "disabled": decoder = maybe_cuda(AttnDecoder(config), cuda=config.cuda) else: decoder = maybe_cuda(Decoder(config), cuda=config.cuda) return encoder, decoder
def __init__(self, time_step, split, lr): self.dataset = Dataset(T=time_step, split_ratio=split, binary_file=config.BINARY_DATASET) self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.model = Model(self.encoder, self.decoder) if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.model = self.model.cuda() self.model_optim = optim.Adam(self.model.parameters(), lr) # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) self.loss_func = nn.MSELoss() self.train_size, self.test_size = self.dataset.get_size()
def __init__(self, driving, target, time_step, split, lr, regression=True): self.dataset = Dataset(T=time_step, split_ratio=split, binary_file=config.BINARY_DATASET_HEADER) self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.model = Model(self.encoder, self.decoder) if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.model = self.model.cuda() self.model_optim = optim.Adam(self.model.parameters(), lr) # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) if (regression): # regression model self.loss_func = nn.MSELoss() else: # classification model weight = torch.Tensor([1, 1]) # weight = weight.cuda() self.loss_func = nn.CrossEntropyLoss(reduce=False, size_average=False, weight=weight) self.train_size, self.test_size, self.total_size = self.dataset.get_size( ) print("train_size = %d (in terms of number of binary files)" % self.train_size) print("test_size = %d (in terms of number of binary files)" % self.test_size)
def run(do_train, do_eval, do_predict, ckpt, get_rouge, max_epochs=100): train_set = Articles(test=False) test_set = Articles(test=True) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False, num_workers=1) test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=1) encoder = Encoder() attention_decoder = AttnDecoder() model = PointerGenerator(encoder, attention_decoder) model.to(device) optimizer = torch.optim.Adagrad(model.parameters(), lr=lr) loss_function = torch.nn.NLLLoss() if ckpt: model, optimizer, epoch = load_ckp(checkpoint_path=ckpt, model=model, optimizer=optimizer) if do_eval: eval(test_loader, model, loss_function) elif do_predict: vocab = Vocab('data/vocab', voc_size) batch = iter(train_loader).next() story, highlight = batch batcher = Batcher(story, highlight, vocab) stories, highlights, extra_zeros, story_extended, highlight_extended, vocab_extended = batcher.get_batch( get_vocab_extended=True) stories = stories.to(device) highlights = highlights.to(device) story_extended = story_extended.to(device) extra_zeros = extra_zeros.to(device) # stories, highlights = get_random_sentences(test_set, batch_size) with torch.no_grad(): output = model(stories, highlights, story_extended, extra_zeros) get_batch_prediction(stories, output, highlights) if get_rouge: get_rouge_files(model, test_loader) get_rouge_score() else: epoch = 0 if do_train: train(train_loader, test_loader, loss_function, model, optimizer, epoch, num_epochs=max_epochs - epoch)
class Trainer: def __init__(self, driving, target, time_step, split, lr): self.dataset = Dataset(driving, target, time_step, split) self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) self.loss_func = nn.MSELoss() self.train_size, self.test_size = self.dataset.get_size() def train_minibatch(self, num_epochs, batch_size, interval): x_train, y_train, y_seq_train = self.dataset.get_train_set() for epoch in range(num_epochs): i = 0 loss_sum = 0 while (i < self.train_size): self.encoder_optim.zero_grad() self.decoder_optim.zero_grad() batch_end = i + batch_size if (batch_end >= self.train_size): batch_end = self.train_size var_x = self.to_variable(x_train[i:batch_end]) var_y = self.to_variable(y_train[i:batch_end]) var_y_seq = self.to_variable(y_seq_train[i:batch_end]) if var_x.dim() == 2: var_x = var_x.unsqueeze(2) code = self.encoder(var_x) y_res = self.decoder(code, var_y_seq) loss = self.loss_func(y_res, var_y) loss.backward() self.encoder_optim.step() self.decoder_optim.step() # print('[%d], loss is %f' % (epoch, 10000 * loss.data[0])) loss_sum += loss.item() i = batch_end print('epoch [%d] finished, the average loss is %f' % (epoch, loss_sum)) if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs: torch.save( self.encoder.state_dict(), 'models/encoder' + str(epoch + 1) + '-norm' + '.model') torch.save( self.decoder.state_dict(), 'models/decoder' + str(epoch + 1) + '-norm' + '.model') def test(self, num_epochs, batch_size): x_train, y_train, y_seq_train = self.dataset.get_train_set() x_test, y_test, y_seq_test = self.dataset.get_test_set() y_pred_train = self.predict(x_train, y_train, y_seq_train, batch_size) y_pred_test = self.predict(x_test, y_test, y_seq_test, batch_size) plt.figure(figsize=(8, 6), dpi=100) plt.plot(range(2000, self.train_size), y_train[2000:], label='train truth', color='black') plt.plot(range(self.train_size, self.train_size + self.test_size), y_test, label='ground truth', color='black') plt.plot(range(2000, self.train_size), y_pred_train[2000:], label='predicted train', color='red') plt.plot(range(self.train_size, self.train_size + self.test_size), y_pred_test, label='predicted test', color='blue') plt.xlabel('Days') plt.ylabel('Stock price of AAPL.US(USD)') plt.savefig('results/res-' + str(num_epochs) + '-' + str(batch_size) + '.png') def predict(self, x, y, y_seq, batch_size): y_pred = np.zeros(x.shape[0]) i = 0 while (i < x.shape[0]): batch_end = i + batch_size if batch_end > x.shape[0]: batch_end = x.shape[0] var_x_input = self.to_variable(x[i:batch_end]) var_y_input = self.to_variable(y_seq[i:batch_end]) if var_x_input.dim() == 2: var_x_input = var_x_input.unsqueeze(2) code = self.encoder(var_x_input) y_res = self.decoder(code, var_y_input) for j in range(i, batch_end): y_pred[j] = y_res[j - i, -1] i = batch_end return y_pred def load_model(self, encoder_path, decoder_path): self.encoder.load_state_dict( torch.load(encoder_path, map_location=lambda storage, loc: storage)) self.decoder.load_state_dict( torch.load(decoder_path, map_location=lambda storage, loc: storage)) def to_variable(self, x): if torch.cuda.is_available(): return Variable(torch.from_numpy(x).float()).cuda() else: return Variable(torch.from_numpy(x).float())
def main(config): print(config) dictionary = json.load( open('./{}.lex2.dictionary.json'.format(config.data))) noun_id = [] for k, v in dictionary['const'].items(): if k[:2] == 'NN': noun_id.append(v) vocab_size = len(dictionary['word']) + 1 word_embedding_dim = 300 print("Vocabulary size:", vocab_size) word_vectors = np.random.uniform(low=-0.1, high=0.1, size=(vocab_size, word_embedding_dim)) batch_size = 40 if config.data == 'persona' else 10 if config.data in ['persona', 'movie']: train_loader = get_loader( './data/{}.train.src'.format(config.data), './data/{}.train.lex2.dat'.format(config.data), './data/{}.train.psn'.format(config.data), dictionary, batch_size) dev_loader = get_loader('./data/{}.valid.src'.format(config.data), './data/{}.valid.lex2.dat'.format(config.data), './data/{}.valid.psn'.format(config.data), dictionary, 10) else: train_loader = get_loader('./data/{}.train.src'.format(config.data), './data/{}.train.trg'.format(config.data), None, dictionary, 20) dev_loader = get_loader('./data/{}.valid.src'.format(config.data), './data/{}.valid.trg'.format(config.data), None, dictionary, 200) hidden_size = 512 cenc_input_size = hidden_size * 2 start_batch = 50000 start_kl_weight = config.start_kl_weight if not config.use_saved: hred = AttnDecoder(word_embedding_dim, hidden_size, hidden_size, vocab_size, word_vectors, dictionary['word'], config.data, 0.5).cuda() for p in hred.parameters(): torch.nn.init.uniform(p.data, a=-0.1, b=0.1) if config.glove: print("Loading word vecotrs.") word2vec_file = open('./glove.42B.300d.txt') next(word2vec_file) found = 0 for line in word2vec_file: word, vec = line.split(' ', 1) if word in dictionary['word']: word_vectors[dictionary['word'][word]] = np.fromstring( vec, dtype=np.float32, sep=' ') found += 1 print(found) else: hred = torch.load('attn.{}.pt'.format(config.data)).cuda() hred.flatten_parameters() hred.data = config.data params = filter(lambda x: x.requires_grad, hred.parameters()) optimizer = torch.optim.Adam(params, lr=0.001) best_loss = np.inf last_dev_loss = np.inf power = 2 for it in range(18, 30): ave_loss = 0 last_time = time.time() params = filter(lambda x: x.requires_grad, hred.parameters()) optimizer = torch.optim.SGD(params, lr=.1 * 0.95**it, momentum=0.9) hred.train() for _, (src_seqs, src_lengths, trg_seqs, trg_lengths, psn_seqs, psn_lengths, indices, pos_seqs) in enumerate(train_loader): if _ % config.print_every_n_batches == 1: print(ave_loss / min(_, config.print_every_n_batches), time.time() - last_time) ave_loss = 0 loss, noun_loss, count = hred.loss(src_seqs, src_lengths, indices, trg_seqs, trg_lengths, psn_seqs, psn_lengths, pos_seqs, noun_id, 1) ave_loss += loss.data[0] optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(params, .1) optimizer.step() # eval on dev dev_loss = 0 dev_nn_loss = 0 count = 0 nn_total_count = 0 hred.eval() for i, (src_seqs, src_lengths, trg_seqs, trg_lengths, psn_seqs, psn_lengths, indices, pos_seqs) in enumerate(dev_loader): loss, noun_loss, nn_count = hred.loss(src_seqs, src_lengths, indices, trg_seqs, trg_lengths, psn_seqs, psn_lengths, pos_seqs, noun_id, 1) dev_loss += loss.data[0] dev_nn_loss += noun_loss.data[0] * nn_count.data[0] nn_total_count += nn_count.data[0] count += 1 if dev_loss < best_loss: best_loss = dev_loss torch.save(hred, 'attn.{}.pt'.format(config.data)) if dev_loss > last_dev_loss: power += 1 hred = torch.load('attn.{}.pt'.format(config.data)) last_dev_loss = dev_loss print('dev loss: {} {}'.format(dev_loss / count, dev_nn_loss / nn_total_count))
def main(config): print(config) dictionary = json.load( open('./{}.parse.dictionary.json'.format(config.data))) vocab_size = len(dictionary) + 1 word_embedding_dim = 300 print("Vocabulary size:", len(dictionary)) word_vectors = np.random.uniform(low=-0.5, high=0.5, size=(vocab_size, word_embedding_dim)) found = 0 print("Loading word vecotrs.") if config.glove: word2vec_file = open('./glove.6B.300d.txt') next(word2vec_file) for line in word2vec_file: word, vec = line.split(' ', 1) if word in dictionary: word_vectors[dictionary[word]] = np.fromstring( vec, dtype=np.float32, sep=' ') found += 1 else: word2vec = Word2Vec.load('./word2vec.vector') for word in word2vec.wv.vocab: if word in dictionary: word_vectors[dictionary[word]] = word2vec.wv[word] found += 1 print(found) hidden_size = 512 cenc_input_size = hidden_size * 2 start_batch = 50000 start_kl_weight = config.start_kl_weight if config.vhred: train_loader = get_hr_loader('./data/{}.train.src'.format(config.data), './data/{}.train.trg'.format(config.data), dictionary, 40) dev_loader = get_hr_loader('./data/{}.valid.src'.format(config.data), './data/{}.valid.trg'.format(config.data), dictionary, 200) if not config.use_saved: hred = VHRED(dictionary, vocab_size, word_embedding_dim, word_vectors, hidden_size) print('load hred param') _hred = torch.load('hred.pt') hred.u_encoder = _hred.u_encoder hred.c_encoder = _hred.c_encoder hred.decoder.rnn = _hred.decoder.rnn hred.decoder.output_transform = _hred.decoder.output_transform hred.decoder.context_hidden_transform.weight.data[:,0:hidden_size] = \ _hred.decoder.context_hidden_transform.weight.data hred.flatten_parameters() else: hred = torch.load('vhred.pt') hred.flatten_parameters() elif config.attn: if config.data == 'persona': train_loader = get_ctc_loader( './data/{}.train.src'.format(config.data), './data/{}.train.trg'.format(config.data), './data/{}.train.psn'.format(config.data), dictionary, 40) dev_loader = get_ctc_loader( './data/{}.valid.src'.format(config.data), './data/{}.valid.trg'.format(config.data), './data/{}.valid.psn'.format(config.data), dictionary, 200) if not config.use_saved: hred = PersonaAttnDecoder(word_embedding_dim, hidden_size, vocab_size, word_vectors, dictionary).cuda() else: hred = torch.load('attn.pt') hred.flatten_parameters() else: train_loader = get_loader( './data/{}.train.src'.format(config.data), './data/{}.train.trg'.format(config.data), dictionary, 40) dev_loader = get_loader('./data/{}.valid.src'.format(config.data), './data/{}.valid.trg'.format(config.data), dictionary, 200) if not config.use_saved: hred = AttnDecoder(word_embedding_dim, hidden_size, vocab_size, word_vectors, dictionary).cuda() else: hred = torch.load('attn.pt') hred.flatten_parameters() else: train_loader = get_hr_loader('./data/{}.train.src'.format(config.data), './data/{}.train.trg'.format(config.data), dictionary, 40) dev_loader = get_hr_loader('./data/{}.valid.src'.format(config.data), './data/{}.valid.trg'.format(config.data), dictionary, 200) if not config.use_saved: disc = torch.load('discriminator.pt') hred = HRED(dictionary, vocab_size, word_embedding_dim, word_vectors, hidden_size, disc) else: hred = torch.load('hred.pt') hred.flatten_parameters() if hred.discriminator is not None: hred.discriminator.u_encoder.rnn.flatten_parameters() params = filter(lambda x: x.requires_grad, hred.parameters()) #optimizer = torch.optim.SGD(params, lr=config.lr, momentum=0.99) #q_optimizer = torch.optim.SGD(hred.q_network.parameters(), lr=0.01) optimizer = torch.optim.Adam(params, lr=0.001) best_loss = np.inf for it in range(0, 20): ave_loss = 0 last_time = time.time() for _, batch in enumerate(train_loader): if config.attn: if config.data == 'persona': src_seqs, src_lengths, trg_seqs, trg_lengths, ctc_seqs, ctc_lengths, indices = batch else: src_seqs, src_lengths, trg_seqs, trg_lengths, indices = batch else: src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len = batch if _ % config.print_every_n_batches == 1: print(ave_loss / min(_, config.print_every_n_batches), time.time() - last_time) ave_loss = 0 if config.vhred and config.kl_weight and it * len( train_loader) + _ <= start_batch: kl_weight = start_kl_weight + (1 - start_kl_weight) * float( it * len(train_loader) + _) / start_batch # kl_weight = 0.5 loss = hred.loss(src_seqs, src_lengths, indices, trg_seqs, trg_lengths, ctc_lengths, kl_weight) elif config.attn: if config.data == 'persona': loss = hred.loss(src_seqs, src_lengths, indices, trg_seqs, trg_lengths, ctc_seqs, ctc_lengths, 1.0) else: loss = hred.loss(src_seqs, src_lengths, indices, trg_seqs, trg_lengths, 1.0) else: loss = hred.loss(src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len, 0.2) #loss = hred.augmented_loss(src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len, 0.1) ave_loss += loss.data[0] optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(params, 0.1) optimizer.step() # eval on dev dev_loss = 0 count = 0 for _, batch in enumerate(dev_loader): if config.attn: if config.data == 'persona': src_seqs, src_lengths, trg_seqs, trg_lengths, ctc_seqs, ctc_lengths, indices = batch else: src_seqs, src_lengths, trg_seqs, trg_lengths, indices = batch else: src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len = batch if config.attn: if config.data == 'persona': dev_loss += hred.evaluate(src_seqs, src_lengths, indices, trg_seqs, trg_lengths, ctc_seqs, ctc_lengths).data[0] else: dev_loss += hred.evaluate(src_seqs, src_lengths, indices, trg_seqs, trg_lengths).data[0] else: dev_loss += hred.semantic_loss(src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len).data[0] count += 1 print('dev loss: {}'.format(dev_loss / count)) if dev_loss < best_loss: if config.vhred: torch.save(hred, 'vhred.pt') elif config.attn: torch.save(hred, 'attn.{}.pt'.format(config.data)) else: torch.save(hred, 'hred.pt') best_loss = dev_loss for it in range(0, 0): ave_loss = 0 last_time = time.time() for _, (src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len) in enumerate(train_loader): loss = hred.train_decoder(src_seqs, src_lengths, indices, turn_len, 30, 5, 5) ave_loss += loss.data[0] q_optimizer.zero_grad() loss.backward() q_optimizer.step()
attn_model = 'dot' #attn_model = 'general' #attn_model = 'concat' hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 voc, pairs = loadData('./datasets/conversations.csv') n_tokens = voc.size embedding = nn.Embedding(n_tokens, hidden_size) encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) #attn = Attn(attn_model, hidden_size) decoder = AttnDecoder(attn_model, embedding, hidden_size, n_tokens, decoder_n_layers, dropout) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") encoder = encoder.to(device) decoder = decoder.to(device) # Configure training/optimization clip = 50.0 teacher_forcing_ratio = 1.0 learning_rate = 0.0001 decoder_learning_ratio = 5.0 n_iteration = 4000 print_every = 1 save_every = 500 save_dir = "./checkpoints" corpus_name = "movie conversations"
class Trainer: def __init__(self, time_step, split, lr): self.dataset = Dataset(T=time_step, split_ratio=split, binary_file=config.BINARY_DATASET) self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.model = Model(self.encoder, self.decoder) if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.model = self.model.cuda() self.model_optim = optim.Adam(self.model.parameters(), lr) # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) self.loss_func = nn.MSELoss() self.train_size, self.test_size = self.dataset.get_size() def train_minibatch(self, num_epochs, batch_size, interval): x_train, y_train, y_seq_train = self.dataset.get_train_set() for epoch in range(num_epochs): max_acc = 0 i = 0 loss_sum = 0 while (i < self.train_size): self.model_optim.zero_grad() batch_end = i + batch_size if (batch_end >= self.train_size): break var_x = self.to_variable(x_train[i:batch_end]) var_y = self.to_variable(y_train[i:batch_end]) var_y_seq = self.to_variable(y_seq_train[i:batch_end]) if var_x.dim() == 2: var_x = var_x.unsqueeze(2) y_res, y_var = self.model(var_x, var_y_seq) loss = self.loss_func(y_res, var_y) loss.backward() self.model_optim.step() print('[%d], loss is %f' % (epoch, 10000 * loss.data[0])) loss_sum += loss.data.item() i = batch_end print('epoch [%d] finished, the average loss is %f' % (epoch, loss_sum)) x_dev, y_dev, y_seq_dev = self.dataset.get_dev_set() y_pred_dev = self.predict(x_dev, y_dev, y_seq_dev, batch_size) acc = direction_correctness(y_pred_test=y_pred_dev, y_test=y_dev) if (acc > max_acc): max_acc = acc elif acc < max_acc * 0.9: #prevent overfit break if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs: torch.save(self.encoder.state_dict(), 'models/encoder' + str(epoch + 1) + '.model') torch.save(self.decoder.state_dict(), 'models/decoder' + str(epoch + 1) + '.model') def test(self, num_epochs, batch_size): x_test, y_test, y_seq_test = self.dataset.get_test_set() y_pred_test = self.predict(x_test, y_seq_test, batch_size) f = open('y_test', 'wb') pickle.dump(y_test, f) f.close() f = open('y_pred_test', 'wb') pickle.dump(y_pred_test, f) f.close() # plt.figure() # plt.ylim(0,1) # # plt.plot(range(1, 1 + self.train_size), y_train, label='train') # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_test[:self.test_size//50], label='ground truth') # # plt.plot(range(1, 1 + self.train_size), y_pred_train, label.='predicted train') # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_pred_test[:self.test_size//50], label='predicted test') # plt.savefig('res-' + str(num_epochs) + '.png') def predict(self, x, y_seq, batch_size): y_pred = np.zeros(x.shape[0]) i = 0 while (i < x.shape[0]): batch_end = i + batch_size if batch_end > x.shape[0]: break #batch_end = x.shape[0] var_x_input = self.to_variable(x[i:batch_end]) var_y_input = self.to_variable(y_seq[i:batch_end]) if var_x_input.dim() == 2: var_x_input = var_x_input.unsqueeze(2) # code = self.encoder(var_x_input) # y_res = self.decoder(code, var_y_input) y_res, _ = self.model(var_x_input, var_y_input) for j in range(i, batch_end): y_pred[j] = y_res[j - i] i = batch_end return y_pred def single_predict(self, x, y_seq): var_x_input = self.to_variable(x) var_y_input = self.to_variable(y_seq) if var_x_input.dim() == 2: var_x_input = var_x_input.unsqueeze(2) y_res, _ = self.model(var_x_input, var_y_input) return y_res def load_model(self, encoder_path, decoder_path): self.encoder.load_state_dict( torch.load(encoder_path, map_location=lambda storage, loc: storage)) self.decoder.load_state_dict( torch.load(decoder_path, map_location=lambda storage, loc: storage)) self.model = Model(self.encoder, self.decoder) def to_variable(self, x): if torch.cuda.is_available(): return Variable(torch.from_numpy(x).float()).cuda() else: return Variable(torch.from_numpy(x).float())
class Agent: def __init__(self, time_step, split, lr): self.dataset = Dataset(T=time_step, split_ratio=split, binary_file=config.BINARY_DATASET) self.policy_net_encoder = AttnEncoder( input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.policy_net_decoder = AttnDecoder( code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.policy_net = DQN(self.policy_net_encoder, self.policy_net_decoder) self.target_net_encoder = AttnEncoder( input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.target_net_decoder = AttnDecoder( code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.target_net = DQN(self.target_net_encoder, self.target_net_decoder) if torch.cuda.is_available(): self.policy_net_encoder = self.policy_net_encoder.cuda() self.policy_net_decoder = self.policy_net_decoder.cuda() self.target_net_encoder = self.target_net_encoder.cuda() self.target_net_decoder = self.target_net_decoder.cuda() self.policy_net = self.policy_net.cuda() self.target_net = self.target_net.cuda() self.memory = ReplayMemory(config.MEMORY_CAPACITY) self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=lr) def select_action(self, state, test=False): global steps_done sample = random.random() eps_threshold = config.EPS_END + ( config.EPS_START - config.EPS_END) * math.exp( -1. * steps_done / config.EPS_DECAY) steps_done += 1 if sample > eps_threshold or test == True: with torch.no_grad(): return self.policy_net(state).max(1)[1].view(1, 1) else: if torch.cuda.is_available(): return torch.tensor([[random.randint(3)]], dtype=torch.long).cuda() else: return torch.tensor([[random.randint(3)]], dtype=torch.long) def optimize_model(self): if len(self.memory) < config.BATCH_SIZE: return transitions = self.memory.sample(config.BATCH_SIZE) batch = Transition(*zip(*transitions)) state_batch = tuple([ torch.cat( tuple([batch.state[i][j] for i in range(config.BATCH_SIZE)])) for j in range(3) ]) action_batch = torch.cat(batch.action) reward_batch = torch.cat(batch.reward) next_state_batch = tuple([ torch.cat( tuple( [batch.next_state[i][j] for i in range(config.BATCH_SIZE)])) for j in range(3) ]) state_action_values = self.policy_net(state_batch).gather( 1, action_batch) next_state_values = self.target_net(next_state_batch).max( 1)[0].detach() expected_state_action_values = (next_state_values * config.GAMMA) + reward_batch loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) self.optimizer.zero_grad() loss.backward() for param in self.policy_net.parameters(): if param.grad is not None: param.grad.data.clamp_(-1, 1) self.optimizer.step() def load_model(self, encoder_path=None, decoder_path=None, DQN_path=None): if (DQN_path != None): self.policy_net.load_state_dict( torch.load(DQN_path, map_location=lambda storage, loc: storage)) self.target_net.load_state_dict(self.policy_net.state_dict()) else: self.policy_net_encoder.load_state_dict( torch.load(encoder_path, map_location=lambda storage, loc: storage)) self.policy_net_decoder.load_state_dict( torch.load(decoder_path, map_location=lambda storage, loc: storage)) self.policy_net = DQN(self.policy_net_encoder, self.policy_net_decoder) self.target_net.load_state_dict(self.policy_net.state_dict()) def train(self, num_epochs, interval): env = Environment(np.array([0.5, 0.5])) episode = 0 for epoch in range(num_epochs): env.reset() state = (env.x[env.current_step].unsqueeze(0), env.y_seq[env.current_step].unsqueeze(0), env.position.unsqueeze(0)) while (1): action = self.select_action(state) _, next_state, reward = env.step(action.item()) if (next_state == None): break self.memory.push(state, action, next_state, reward) state = next_state self.optimize_model() episode += 1 if (episode % config.TARGET_UPDATE == 0): self.target_net.load_state_dict( self.policy_net.state_dict()) print(env.wealth, action, env.position) if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs: torch.save(self.policy_net.state_dict(), 'models/DQN' + str(epoch + 1) + '.model') def test(self, num_epochs): env = Environment(test=True) state = (env.x[env.current_step], env.y_seq[env.current_step], env.position) while (1): action = self.select_action(state, test=True) _, next_state, _ = env.step(action.item()) if (next_state == None): break state = next_state print(env.wealth)
def main(): epoch = 1000 batch_size = 256 hidden_dim = 128 encoder = Encoder(num_words, hidden_dim, n_layers=args.n_layers, bidirectional=args.bidirectional).to(device) if args.attn: decoder = AttnDecoder(hidden_dim, num_words, max_seqlen, n_layers=args.n_layers).to(device) else: decoder = Decoder(hidden_dim, num_words, n_layers=args.n_layers).to(device) if args.train: weight = torch.ones(num_words) weight[word2idx[PAD_TOKEN]] = 0 encoder = encoder.to(device) decoder = decoder.to(device) weight = weight.to(device) encoder_optimizer = Adam(encoder.parameters(), lr=0.001) decoder_optimizer = Adam(decoder.parameters(), lr=0.001) criterion = nn.CrossEntropyLoss(ignore_index=word2idx[PAD_TOKEN]) np.random.seed(1124) order = np.arange(len(train_X)) best_loss = 1e10 best_percentage = 0 best_percentage_epoch = 0 best_epoch = 0 start_epoch = 0 if args.resume: start_epoch, best_loss = load_checkpoint(args.model_path, encoder, encoder_optimizer, decoder, decoder_optimizer) for e in range(start_epoch, start_epoch + epoch): if e - best_percentage_epoch > 2: break np.random.shuffle(order) shuffled_train_X = train_X[order] shuffled_train_Y = train_Y[order] train_loss = 0 valid_loss = 0 for b in tqdm(range(int(len(order) // batch_size))): batch_x = torch.LongTensor( shuffled_train_X[b * batch_size:(b + 1) * batch_size].tolist()).t() batch_y = torch.LongTensor( shuffled_train_Y[b * batch_size:(b + 1) * batch_size].tolist()).t() batch_x, batch_y = batch_x.to(device), batch_y.to(device) train_loss += train(batch_x, batch_y, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) train_loss /= b all_control_cnt, all_hit_cnt = [], [] for b in range(len(valid_X) // batch_size): batch_x = torch.LongTensor(valid_X[b * batch_size:(b + 1) * batch_size].tolist()).t() batch_y = torch.LongTensor(valid_Y[b * batch_size:(b + 1) * batch_size].tolist()).t() batch_x, batch_y = batch_x.to(device), batch_y.to(device) val_loss, control_cnt, hit_cnt = valid(batch_x, batch_y, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) valid_loss += val_loss all_control_cnt.extend(control_cnt) all_hit_cnt.extend(hit_cnt) valid_loss /= b all_control_cnt = np.array(all_control_cnt) all_hit_cnt = np.array(all_hit_cnt) nonzero = all_control_cnt != 0 all_control_cnt = all_control_cnt[nonzero] all_hit_cnt = all_hit_cnt[nonzero] percentage = np.mean(all_hit_cnt / all_control_cnt) logger.info( "epoch {}, train_loss {:.4f}, valid_loss {:.4f}, best_epoch {}, best_loss {:.4f}, control_cnt {}, hit_cnt {}, percentage {:.4f}" .format(e, train_loss, valid_loss, best_epoch, best_loss, np.sum(all_control_cnt), np.sum(all_hit_cnt), percentage)) if percentage > best_percentage: best_percentage = percentage best_percentage_epoch = e torch.save( { 'encoder_state_dict': encoder.state_dict(), 'encoder_optimizer_state_dict': encoder_optimizer.state_dict(), 'decoder_state_dict': decoder.state_dict(), 'decoder_optimizer_state_dict': decoder_optimizer.state_dict(), 'epoch': e, 'loss': valid_loss, 'percentage': best_percentage, }, args.model_path) if valid_loss < best_loss: best_loss = valid_loss best_epoch = e torch.save( { 'encoder_state_dict': encoder.state_dict(), 'encoder_optimizer_state_dict': encoder_optimizer.state_dict(), 'decoder_state_dict': decoder.state_dict(), 'decoder_optimizer_state_dict': decoder_optimizer.state_dict(), 'epoch': e, 'loss': valid_loss }, args.model_path) batch_x = torch.LongTensor(valid_X[:batch_size].tolist()).t() batch_y = torch.LongTensor(valid_Y[:batch_size].tolist()).t() batch_x, batch_y = batch_x.to(device), batch_y.to(device) input_chinese, output_chinese = predict(batch_x, batch_y, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, 20) logger.info('*** Results ***') logger.info('Best Hit Accuracy: {}'.format(best_percentage)) logger.info( 'Best Hit Accuracy Epoch: {}'.format(best_percentage_epoch)) for inp, out in zip(input_chinese, output_chinese): logger.info('{}\t||\t{}'.format(inp, out)) logger.info(encoder) logger.info(decoder) logger.info('\n\n' + '=' * 100 + '\n\n') else: print(encoder) print(decoder)
class Trainer: def __init__(self, driving, target, time_step, split, lr, regression=True): self.dataset = Dataset(T=time_step, split_ratio=split, binary_file=config.BINARY_DATASET_HEADER) self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step) self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step) self.model = Model(self.encoder, self.decoder) if torch.cuda.is_available(): self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.model = self.model.cuda() self.model_optim = optim.Adam(self.model.parameters(), lr) # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr) # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr) if (regression): # regression model self.loss_func = nn.MSELoss() else: # classification model weight = torch.Tensor([1, 1]) # weight = weight.cuda() self.loss_func = nn.CrossEntropyLoss(reduce=False, size_average=False, weight=weight) self.train_size, self.test_size, self.total_size = self.dataset.get_size( ) print("train_size = %d (in terms of number of binary files)" % self.train_size) print("test_size = %d (in terms of number of binary files)" % self.test_size) def train_minibatch(self, num_epochs, batch_size, interval, cout, regression=True): #x_train, y_train, y_seq_train = self.dataset.get_train_set() already_trained = 100 best_model = -1 best_correctness = 0 for epoch in range(num_epochs): for file_num in range(self.train_size): x_train, y_train, y_seq_train = self.dataset.get_train_set( file_num) i = 0 loss_sum = 0 while (i < config.MAX_SINGLE_FILE_LINE_NUM): # self.encoder_optim.zero_grad() # self.decoder_optim.zero_grad() self.model_optim.zero_grad() batch_end = i + batch_size if (config.SPLIT_RATIO != 1.0 and file_num == self.train_size - 1 and batch_end > (config.MAX_SINGLE_FILE_LINE_NUM - config.VALIDATION_LINE_NUM)): break if (batch_end > config.MAX_SINGLE_FILE_LINE_NUM): break #batch_end = self.train_size var_x = self.to_variable(x_train[i:batch_end]) var_y = Variable( torch.from_numpy(y_train[i:batch_end]).float()) var_y_seq = self.to_variable(y_seq_train[i:batch_end]) #making sure the driving series has 3 dimensions if var_x.dim() == 2: var_x = var_x.unsqueeze(2) # code = self.encoder(var_x) # y_res = self.decoder(code, var_y_seq) y_res, y_var = self.model(var_x, var_y_seq) # m = torch.distributions.Normal(loc = y_loc,scale=y_var) # loss = torch.sum(-m.log_prob(var_y.unsqueeze(0))) if (regression): # regression model loss = self.loss_func(y_res, var_y) else: # classiication model var_y = var_y.long().cuda() print("y_res.requires_grad: ") print(y_res.requires_grad) print("y_res.type()") print(y_res.type()) print("y_res.shape") print(y_res.shape) print("var_y.requires_grad: ") print(var_y.requires_grad) print("var_y.type()") print(var_y.type()) print("var_y.shape") print(var_y.shape) loss = self.loss_func(y_res, var_y) loss.backward() # self.encoder_optim.step() # self.decoder_optim.step() self.model_optim.step() if cont: print('epoch[%d], file[%d], batch[%d], loss is %f' % (already_trained + epoch + 1, file_num, batch_end / batch_size, 10000 * loss.data[0])) else: print('epoch[%d], file[%d], batch[%d], loss is %f' % (epoch + 1, file_num, batch_end / batch_size, 10000 * loss.data[0])) loss_sum += loss.data.item() i = batch_end if cont: print('epoch [%d] finished, the average loss is %f' % (already_trained + epoch + 1, loss_sum)) if (epoch + 1) % (interval) == 0 or epoch + 1 == ( num_epochs + already_trained): torch.save( self.encoder.state_dict(), 'models/30min/encoder_EURUSD_30min_multifile_with_vali' + str(already_trained + epoch + 1) + '.model') torch.save( self.decoder.state_dict(), 'models/30min/decoder_EURUSD_30min_multifile_with_vali' + str(already_trained + epoch + 1) + '.model') else: print('epoch [%d] finished, the average loss is %f' % (epoch + 1, loss_sum)) if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs: torch.save( self.encoder.state_dict(), 'models/EURUSD/encoder_EURUSD_30min_multifile_with_vali_without_normalization_final_test_new_' + str(epoch + 1) + '.model') torch.save( self.decoder.state_dict(), 'models/EURUSD/decoder_EURUSD_30min_multifile_with_vali_without_normalization_final_test_new_' + str(epoch + 1) + '.model') x_vali, y_vali, y_seq_vali = self.dataset.get_validation_set() y_pred_validation = self.predict(x_vali, y_vali, y_seq_vali, batch_size) seq_len = len(y_vali) gt_direction = (y_vali[1:] - y_vali[:seq_len - 1]) > 0 pred_direction = (y_pred_validation[1:] - y_vali[:seq_len - 1]) > 0 correct = np.sum(gt_direction == pred_direction) print('number of correct in validation set = %d' % correct) print('length of validation set = %d' % seq_len) correct = correct / (seq_len - 1) if (correct > best_correctness): best_model = epoch + 1 best_correctness = correct print( 'epoch[%d] finished, current correctness is %f, best model so far is model %d with correctness %f' % (epoch + 1, correct, best_model, best_correctness)) def test(self, num_epochs, batch_size): start = self.train_size end = self.total_size for index in range(start, end, 1): #print('testing on part %d' % index) #x_train, y_train, y_seq_train = self.dataset.get_train_set(index) x_test, y_test, y_seq_test = self.dataset.get_test_set(index) # y_pred_train = self.predict(x_train, y_train, y_seq_train, batch_size) # f = open('y_train','wb') # pickle.dump(y_train,f) # f.close() # f = open('y_pred_train','wb') # pickle.dump(y_pred_train,f) # f.close() # y_pred_test = self.predict(x_test, y_test, y_seq_test, batch_size) #print(y_test) #print(y_pred_test) f = open( 'y_test_attention_weight_observation_epoch_' + str(num_epochs) + '_part' + str(index - start + 1), 'wb') pickle.dump(y_test, f) f.close() f = open( 'y_pred_test_attention_weight_observation_epoch_' + str(num_epochs) + '_part' + str(index - start + 1), 'wb') pickle.dump(y_pred_test, f) f.close() plt.figure() # plt.plot(range(1, 1 + self.train_size), y_train, label='train') # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_test[:self.test_size//50], label='ground truth') plt.plot(range( 1 + index * config.MAX_SINGLE_FILE_LINE_NUM, 1 + index * config.MAX_SINGLE_FILE_LINE_NUM + len(y_test) // 2), y_test[:len(y_test) // 2], label='ground truth') # plt.plot(range(1, 1 + self.train_size), y_pred_train, label.='predicted train') # plt.plot(range(1, 1 + self.train_size), y_pred_train, label.='predicted train') # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_pred_test[:self.test_size//50], label='predicted test') plt.plot(range( 1 + index * config.MAX_SINGLE_FILE_LINE_NUM, 1 + index * config.MAX_SINGLE_FILE_LINE_NUM + len(y_test) // 2), y_pred_test[:len(y_test) // 2], label='predicted test') plt.legend() plt.savefig('res-attention_weight_observation_epoch' + str(num_epochs) + '_part_' + str(index - start + 1) + '.png') def predict(self, x, y, y_seq, batch_size): y_pred = np.zeros(x.shape[0]) i = 0 while (i < x.shape[0]): #print('testing on batch %d' % (i / batch_size)) batch_end = i + batch_size if batch_end > x.shape[0]: break #batch_end = x.shape[0] var_x_input = self.to_variable(x[i:batch_end]) var_y_input = self.to_variable(y_seq[i:batch_end]) if var_x_input.dim() == 2: var_x_input = var_x_input.unsqueeze(2) # code = self.encoder(var_x_input) # y_res = self.decoder(code, var_y_input) y_res, _ = self.model(var_x_input, var_y_input) for j in range(i, batch_end): y_pred[j] = y_res[j - i] i = batch_end return y_pred def load_model(self, encoder_path, decoder_path): self.encoder.load_state_dict( torch.load(encoder_path, map_location=lambda storage, loc: storage)) self.decoder.load_state_dict( torch.load(decoder_path, map_location=lambda storage, loc: storage)) self.model = Model(self.encoder, self.decoder) def to_variable(self, x): if torch.cuda.is_available(): return Variable(torch.from_numpy(x).float()).cuda() else: return Variable(torch.from_numpy(x).float())
hidden_size = 500 encoder_n_layers = 2 decoder_n_layers = 2 dropout = 0.1 batch_size = 64 loadFilename = None checkpoint_iter = 4000 #load model print("Load checkpint...") checkpoint = torch.load(model_path) encoder_sd = checkpoint['en'] decoder_sd = checkpoint['de'] embedding_sd = checkpoint['embedding'] voc = Voc() voc.__dict__ = checkpoint['voc_dict'] size = voc.size embedding = nn.Embedding(size, hidden_size) embedding.load_state_dict(embedding_sd) encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) encoder.load_state_dict(encoder_sd) decoder = AttnDecoder(attn_model, embedding, hidden_size, size, decoder_n_layers, dropout) decoder.load_state_dict(decoder_sd) searcher = GreedySearchDecoder(encoder, decoder, voc) evaluateInput(searcher)