def main(): with open('data/wsj00-18.pos') as f: training_data = f.readlines() with open('data/wsj19-21.pos') as f: test_data = f.readlines() model = word2vec.KeyedVectors.load_word2vec_format( './data/GoogleNews-vectors-negative300.bin', binary=True) print(model.wv.most_similar(positive=['woman', 'king'], negative=['man'])) # print(model['hgoehgoehgoehg']) # print(len(model['hogehgoehgoe'])) labels = ('NNP', ',', 'CD', 'NNS', 'JJ', 'MD', 'VB', 'DT', 'NN', 'IN', '.', 'VBZ', 'VBG', 'CC', 'VBD', 'VBN', 'RB', 'TO', 'PRP', 'RBR', 'WDT', 'VBP', 'RP', 'PRP$', 'JJS', 'POS', '``', 'EX', "''", 'WP', ':', 'JJR', 'WRB', '$', 'NNPS', 'WP$', '-LRB-', '-RRB-', 'PDT', 'RBS', 'FW', 'UH', 'SYM', 'LS', '#') rnn = RNN(300, 1000, labels) training_vector_data = [line for line in training_data] test_vector_data = [line for line in test_data] manager = NetworkEvaluator(rnn, training_vector_data, test_vector_data)
def prepare(self, loc_count, user_count, hidden_size, gru_factory, device): self.hidden_size = hidden_size if self.use_user_embedding: self.model = RNN_user(loc_count, user_count, hidden_size, gru_factory).to(device) else: self.model = RNN(loc_count, hidden_size, gru_factory).to(device)
def load_checkpoint(filename, dir=GOOGLE_COLAB_CHECKPOINT_DIR): checkpoint = torch.load(os.path.join(dir, filename)) model = RNN(hidden_size=checkpoint['hidden_size'], output_size=checkpoint['output_size'], n_layers=checkpoint['n_layers'], batch_size=checkpoint['batch_size'], bidirectional=checkpoint['bidirectional']) model.load_state_dict(checkpoint['state_dict']) model.num_epochs_trained = checkpoint['num_epochs_trained'] return model
def setUpClass(cls): cls.W_in = np.eye(2) cls.W_rec = np.eye(2) cls.W_out = np.eye(2) cls.W_FB = -np.ones((2, 2)) + np.eye(2) cls.b_rec = np.zeros(2) cls.b_out = np.zeros(2) cls.rnn = RNN(cls.W_in, cls.W_rec, cls.W_out, cls.b_rec, cls.b_out, activation=identity, alpha=1, output=softmax, loss=softmax_cross_entropy) cls.rnn.a = np.ones(2) cls.rnn.error = np.ones(2) * 0.5
def train_rnn(file, batch_size, layers, learning_rate, dropout, num_steps, cell_size, epochs, cell, test_seed, delim, save): """ Train neural network """ model_name = "cell-{}-size-{}-batch-{}-steps-{}-layers-{}-lr-{}-dropout-{}".format( cell, cell_size, batch_size, num_steps, layers, learning_rate, dropout) ds = Dataset(file, batch_size=batch_size, num_steps=num_steps, with_delim=delim) n = RNN(data=ds, cell=cell, num_layers=layers, dropout=dropout, learning_rate=learning_rate, cell_size=cell_size, num_epochs=epochs) n.train(save=save, model_name=model_name, test_output=True, test_seed=test_seed, with_delim=delim) if save: n.save(model_name)
def run(checkpoint=None, dir=CHECKPOINT_DIR): gc.collect() batch_size = 1 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # torch.autograd.set_detect_anomaly(True) clean_train_loader = DataLoader(clean_train_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) clean_test_loader = DataLoader(clean_test_data, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) if checkpoint: model = load_checkpoint(checkpoint, dir=dir).to(device) else: model = RNN(hidden_size=80, output_size=5, n_layers=2, batch_size=batch_size, bidirectional=False).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) loss = torch.nn.CrossEntropyLoss() losses = train(clean_train_loader, clean_test_loader, 500, model, optimizer, loss, device, checkpoint_dir=CHECKPOINT_DIR) return model, losses
def test_mimic_task(self): """Verifies that the proper RNN output is returned as label in a simple case where the RNN simply counts the number of time steps.""" from network import RNN from functions import identity, mean_squared_error n_in = 2 n_h = 2 n_out = 2 W_in_target = np.eye(n_in) W_rec_target = np.eye(n_h) W_out_target = np.eye(n_out) b_rec_target = np.zeros(n_h) b_out_target = np.zeros(n_out) alpha = 1 rnn_target = RNN(W_in_target, W_rec_target, W_out_target, b_rec_target, b_out_target, activation=identity, alpha=alpha, output=identity, loss=mean_squared_error) task = Mimic_RNN(rnn_target, p_input=1, tau_task=1) data = task.gen_data(100, 0) y = np.arange(1, 101) y_correct = np.array([y, y]).T self.assertTrue(np.isclose(data['train']['Y'], y_correct).all())
from torch.utils.data import DataLoader import numpy as np import matplotlib.pyplot as plt from torch.autograd import Variable from dataset import dataset, dataset2 from network import RNN import os model_dir = '/home/lixiaoyu/project/airQuality/Analysis-of-Air-Quality-and-Outpatient-Quantity/ckpt/' TIME_STEP = 120 INPUT_SIZE = 7 HIDDEN_SIZE = 32 LR = 0.01 EPOCH = 1000 rnn = RNN(INPUT_SIZE=INPUT_SIZE, HIDDEN_SIZE=HIDDEN_SIZE) optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) loss_func = nn.MSELoss() LoadModel = False def train(train_loader, num_e): torch.manual_seed(1) if LoadModel: checkpoint = torch.load(model_dir + '{}.ckpt'.format(num_e)) rnn.load_state_dict(checkpoint['state_dict']) print('Loading model~~~~~~~~~~', num_e) for e in range(EPOCH): print('epoch>>>>>>> ', e)
def train(word2vec, dataset, parameters): modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"]) if not os.path.exists(modeldir): os.mkdir(modeldir) logdir = os.path.join(modeldir, "log") if not os.path.exists(logdir): os.mkdir(logdir) logdir_train = os.path.join(logdir, "train") if not os.path.exists(logdir_train): os.mkdir(logdir_train) logdir_test = os.path.join(logdir, "test") if not os.path.exists(logdir_test): os.mkdir(logdir_test) logdir_dev = os.path.join(logdir, "dev") if not os.path.exists(logdir_dev): os.mkdir(logdir_dev) savepath = os.path.join(modeldir, "save") device_string = "/gpu:{}".format( parameters["gpu"]) if parameters["gpu"] else "/cpu:0" with tf.device(device_string): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) premises_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="premises") hypothesis_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="hypothesis") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights( filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") # optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) with tf.variable_scope(name_or_scope="premise"): premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph) premise.process(sequence=premises_ph) with tf.variable_scope(name_or_scope="hypothesis"): hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph) hypothesis.process(sequence=hypothesis_ph) loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss( targets=targets_ph) weight_decay = tf.reduce_sum([ tf.reduce_sum(parameter) for parameter in premise.parameters + hypothesis.parameters ]) global_loss = loss + parameters["weight_decay"] * weight_decay train_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) # train_summary_op = tf.summary.merge([loss_summary, accuracy_summary]) train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph) # train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph) # test_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) dev_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) # test_summary_writer = tf.train.SummaryWriter(logdir_test) dev_summary_writer = tf.train.SummaryWriter(logdir_dev) saver = tf.train.Saver(max_to_keep=10) # summary_writer = tf.train.SummaryWriter(logdir) tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False) optimizer = tf.train.AdamOptimizer( learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) train_op = optimizer.minimize(global_loss) sess.run(tf.initialize_all_variables()) # sess.run(tf.global_variables_initializer()) batcher = Batcher(word2vec=word2vec, settings=parameters) #train_split = "train" #train_batches = batcher.batch_generator(dataset=dataset[train_split], num_epochs=parameters["num_epochs"], # batch_size=parameters["batch_size"]["train"], # sequence_length=parameters["sequence_length"]) #print("train data size: %d" % len(dataset["train"]["targets"])) #num_step_by_epoch = int(math.ceil(len(dataset[train_split]["targets"]) / parameters["batch_size"]["train"])) #best_dev_accuracy = 0 print("train data size: %d" % len(dataset["train"]["targets"])) best_dev_accuracy = 0.0 total_loss = 0.0 timestamp = time.time() for epoch in range(parameters["num_epochs"]): print("epoch %d" % epoch) train_batches = batcher.batch_generator( dataset=dataset["train"], num_epochs=1, batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"]) steps = len(dataset["train"] ["targets"]) / parameters["batch_size"]["train"] # progress bar http://stackoverflow.com/a/3002114 bar = progressbar.ProgressBar(maxval=steps / 10 + 1, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() for step, (train_batch, train_epoch) in enumerate(train_batches): feed_dict = { premises_ph: np.transpose(train_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(train_batch["hypothesis"], (1, 0, 2)), targets_ph: train_batch["targets"], keep_prob_ph: parameters["keep_prob"], } _, summary_str, train_loss, train_accuracy = sess.run( [train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict) total_loss += train_loss train_summary_writer.add_summary(summary_str, step) if step % 100 == 0: # eval 1 random dev batch # eval 1 random dev batch dev_batches = batcher.batch_generator( dataset=dataset["dev"], num_epochs=1, batch_size=parameters["batch_size"]["dev"], sequence_length=parameters["sequence_length"]) for dev_step, (dev_batch, _) in enumerate(dev_batches): feed_dict = { premises_ph: np.transpose(dev_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(dev_batch["hypothesis"], (1, 0, 2)), targets_ph: dev_batch["targets"], keep_prob_ph: 1., } summary_str, dev_loss, dev_accuracy = sess.run( [dev_summary_op, loss, accuracy], feed_dict=feed_dict) dev_summary_writer.add_summary(summary_str, step) break bar.update(step / 10 + 1) bar.finish() # eval on all dev dev_batches = batcher.batch_generator( dataset=dataset["dev"], num_epochs=1, batch_size=len(dataset["dev"]["targets"]), sequence_length=parameters["sequence_length"]) dev_accuracy = 0 for dev_step, (dev_batch, _) in enumerate(dev_batches): feed_dict = { premises_ph: np.transpose(dev_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(dev_batch["hypothesis"], (1, 0, 2)), targets_ph: dev_batch["targets"], keep_prob_ph: 1., } summary_str, dev_loss, dev_accuracy = sess.run( [dev_summary_op, loss, accuracy], feed_dict=feed_dict) print "\nDEV full | loss={0:.2f}, accuracy={1:.2f}% ".format( dev_loss, 100. * dev_accuracy) print "" if dev_accuracy > best_dev_accuracy: saver.save(sess, save_path=savepath + '_best', global_step=(epoch + 1) * steps) break saver.save(sess, save_path=savepath, global_step=(epoch + 1) * steps) current_time = time.time() print("Iter %3d Loss %-8.3f Dev Acc %-6.2f Time %-5.2f at %s" % (epoch, total_loss, dev_accuracy, (current_time - timestamp) / 60.0, str(datetime.datetime.now()))) total_loss = 0.0 print ""
""" make predictions using the network """ import torch import torch.nn import os import dataset from network import RNN dataset = dataset.Dataset() max_length = 20 rnn = RNN(dataset.n_letters, 128, dataset.n_letters, dataset.n_categories) rnn.eval() # load weights if os.path.exists("models/gen_names.pkl"): checkpoint = torch.load("models/gen_names.pkl") rnn.load_state_dict(checkpoint['nn_state_dict']) print("checkpoint loaded") def sample(category, start_char): with torch.no_grad(): category_tensor_var = dataset.category_tensor(category) input = dataset.input_tensor(start_char) hidden = rnn.init_hidden() output_name = start_char
def __init__(self, host='0.0.0.0', port=8080): self._host = host self._port = port self._model = RNN() self._app = Bottle() self._route()
from options import options options = options() opts = options.parse() #data loader data_loader = data.dataloader(opts) train_loader = util.create_dataset( [data_loader.train_data, data_loader.train_label], data_loader.wordIdx, data_loader.labelIdx, opts) from network import RNN from train import train from test import test '''RNN model''' RNN = RNN(opts, data_loader.wordIdx, data_loader.labelIdx, len(data_loader.labelIdx.items())).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2)) '''run training''' trainer = train(opts, RNN, RNN_optim, train_loader) trainer.trainer() test_loader = util.create_dataset( [data_loader.test_data, data_loader.test_label], data_loader.wordIdx,
training_loader = DataLoader(training_set, args.batch_size, shuffle=True) validation_data, validation_labels = joblib.load('%s/validation-%d.data' % (args.datapath, args.n)) validation_data = th.from_numpy(validation_data) validation_labels = onehot_sequence(th.from_numpy(validation_labels), 10, cuda) validation_set = TensorDataset(validation_data, validation_labels) validation_loader = DataLoader(validation_set, args.batch_size) test_data, test_labels = joblib.load('%s/test-%d.data' % (args.datapath, args.n)) test_data = th.from_numpy(test_data) test_labels = onehot_sequence(th.from_numpy(test_labels), 10, cuda) test_set = TensorDataset(test_data, test_labels) test_loader = DataLoader(test_set, args.batch_size) cnn_path = args.pretrained_cnn_path if args.pretrained_cnn else None model = RNN(args.n_units, 10, cnn_path, cuda) if args.gpu > -1: model.cuda() if args.criterion == 'regression_loss': from criterions import regression_loss criterion = regression_loss(args.entropy_scale) else: criterion = getattr(__import__('criterions'), args.criterion)() if args.gpu > -1: criterion.cuda() optimizer = Adam(model.parameters(), lr=1e-3) vis = visdom.Visdom() tb_path = args.tensorboard_path if args.tensorboard_log: tb_path += '/%s' % args.tensorboard_log TensorboardVisualizer.configure(tb_path)
def test(word2vec, dataset, parameters, loadpath): print "1" device_string = "/gpu:{}".format( parameters["gpu"]) if parameters["gpu"] else "/cpu:0" with tf.device(device_string): print "2" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) premises_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="premises") hypothesis_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="hypothesis") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights( filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") with tf.variable_scope(name_or_scope="premise"): premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph) premise.process(sequence=premises_ph) with tf.variable_scope(name_or_scope="hypothesis"): hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph) hypothesis.process(sequence=hypothesis_ph) loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss( targets=targets_ph) loader = tf.train.Saver() loader.restore(sess, loadpath) batcher = Batcher(word2vec=word2vec, settings=parameters) test_batches = batcher.batch_generator( dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"]) print "2.5" for test_step, (test_batch, _) in enumerate(test_batches): print "3" feed_dict = { premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(test_batch["hypothesis"], (1, 0, 2)), targets_ph: test_batch["targets"], keep_prob_ph: 1., } test_loss, test_accuracy = sess.run([loss, accuracy], feed_dict=feed_dict) print "\nTEST | loss={0:.2f}, accuracy={1:.2f}% ".format( test_loss, 100. * test_accuracy) print ""
def test_small_lr_case(self): alpha = 1 self.rnn_1 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_2 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_3 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) lr = 0.00001 self.optimizer_1 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_1 = RTRL(self.rnn_1) self.optimizer_2 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_2 = Future_BPTT(self.rnn_2, 25) self.optimizer_3 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_3 = Efficient_BPTT(self.rnn_3, 100) monitors = [] np.random.seed(1) self.sim_1 = Simulation(self.rnn_1) self.sim_1.run(self.data, learn_alg=self.learn_alg_1, optimizer=self.optimizer_1, monitors=monitors, verbose=False) np.random.seed(1) self.sim_2 = Simulation(self.rnn_2) self.sim_2.run(self.data, learn_alg=self.learn_alg_2, optimizer=self.optimizer_2, monitors=monitors, verbose=False) np.random.seed(1) self.sim_3 = Simulation(self.rnn_3) self.sim_3.run(self.data, learn_alg=self.learn_alg_3, optimizer=self.optimizer_3, monitors=monitors, verbose=False) #Assert networks learned similar weights with a small tolerance. assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec, atol=1e-4) assert_allclose(self.rnn_2.W_rec, self.rnn_3.W_rec, atol=1e-4) #But that there was some difference from initialization self.assertFalse( np.isclose(self.rnn_1.W_rec, self.W_rec, atol=1e-4).all())
def test_kernl_reduce_rflo(self): """Verifies that KeRNL reduces to RFLO in special case. If beta is initialized to the identity while the gammas are all initialized to the network inverse time constant alpha, and the KeRNL optimizer has 0 learning rate (i.e. beta and gamma do not change), then KeRNL should produce the same gradients as RFLO if the approximate KeRNL of (1 - alpha) (rather than exp(-alpha)) is used.""" self.task = Add_Task(4, 6, deterministic=True, tau_task=2) self.data = self.task.gen_data(100, 0) alpha = 0.3 self.rnn_1 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_2 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) #RFLO np.random.seed(1) self.optimizer_1 = Stochastic_Gradient_Descent(lr=0.001) self.learn_alg_1 = RFLO(self.rnn_1, alpha) #KeRNL with beta and gamma fixed to RFLO values np.random.seed(1) self.optimizer_2 = Stochastic_Gradient_Descent(lr=0.001) self.KeRNL_optimizer = Stochastic_Gradient_Descent(lr=0) A = np.eye(self.rnn_2.n_h) alpha_i = np.ones(self.rnn_2.n_h) * alpha self.learn_alg_2 = KeRNL(self.rnn_2, self.KeRNL_optimizer, A=A, alpha=alpha_i) monitors = [] np.random.seed(2) self.sim_1 = Simulation(self.rnn_1) self.sim_1.run(self.data, learn_alg=self.learn_alg_1, optimizer=self.optimizer_1, monitors=monitors, verbose=False) np.random.seed(2) self.sim_2 = Simulation(self.rnn_2) self.sim_2.run(self.data, learn_alg=self.learn_alg_2, optimizer=self.optimizer_2, monitors=monitors, verbose=False) #Assert networks learned the same weights assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec) #Assert networks' parameters changed appreciably, despite a large #tolerance for closeness. self.assertFalse(np.isclose(self.W_rec, self.rnn_2.W_rec).all())
transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opts.batch_size, shuffle=True) testset = datasets.MNIST('MNIST_data/', download=True, train=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False) from network import RNN from train import trainer from test import tester '''RNN model''' RNN = RNN(opts).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2)) '''Criterion''' criterion = nn.CrossEntropyLoss() # the target label is not one-hotted '''run training''' trainer(opts, RNN, RNN_optim, criterion, trainloader) '''test''' tester(opts, RNN, testloader)
def generate(model_path): with open('{}/rnn.pickle'.format(model_path)) as f: config = pickle.load(f) n = RNN(training=False, **config) print n.gen_text(sess=None, model_path=model_path)
options = options() opts = options.parse() #data loader data_loader = data.dataloader(opts) train_loader = util.create_dataset(data_loader.train_data, data_loader.letteridx, data_loader.labelidx, opts) test_loader = util.create_dataset(data_loader.test_data, data_loader.letteridx, data_loader.labelidx, opts) from network import RNN from train import trainer from test import tester '''RNN model''' RNN = RNN(opts, data_loader.letteridx).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2)) '''Criterion''' criterion = nn.NLLLoss() '''run training''' trainer(opts, RNN, RNN_optim, criterion, train_loader) '''test''' tester(opts, RNN, test_loader)
def train(word2vec, dataset, parameters, class_weights): modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"]) if not os.path.exists(modeldir): os.mkdir(modeldir) logdir = os.path.join(modeldir, "log") if not os.path.exists(logdir): os.mkdir(logdir) logdir_train = os.path.join(logdir, "train") if not os.path.exists(logdir_train): os.mkdir(logdir_train) logdir_test = os.path.join(logdir, "test") if not os.path.exists(logdir_test): os.mkdir(logdir_test) # logdir_dev = os.path.join(logdir, "dev") # if not os.path.exists(logdir_dev): # os.mkdir(logdir_dev) savepath = os.path.join(modeldir, "save") #device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0" device_string = "/cpu:0" with tf.device(device_string): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) headline_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="headline") body_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="body") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights( filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") optimizer = tf.train.AdamOptimizer( learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) with tf.variable_scope(name_or_scope="headline"): headline = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph, class_weights=class_weights) headline.process(sequence=headline_ph) with tf.variable_scope(name_or_scope="body"): body = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=headline.hiddens, states=headline.states, projecter=projecter, keep_prob=keep_prob_ph, class_weights=class_weights) body.process(sequence=body_ph) loss, loss_summary, accuracy, accuracy_summary = body.loss( targets=targets_ph) weight_decay = tf.reduce_sum([ tf.reduce_sum(parameter) for parameter in headline.parameters + body.parameters ]) global_loss = loss + parameters["weight_decay"] * weight_decay train_summary_op = tf.summary.merge([loss_summary, accuracy_summary]) train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph) test_summary_op = tf.summary.merge([loss_summary, accuracy_summary]) test_summary_writer = tf.summary.FileWriter(logdir_test) saver = tf.train.Saver(max_to_keep=10) summary_writer = tf.summary.FileWriter(logdir) tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False) loader = tf.train.Saver(tf.global_variables()) optimizer = tf.train.AdamOptimizer( learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) train_op = optimizer.minimize(global_loss) sess.run(tf.global_variables_initializer()) batcher = Batcher(word2vec=word2vec) train_batches = batcher.batch_generator( dataset=dataset["train"], num_epochs=parameters["num_epochs"], batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"]) num_step_by_epoch = int( math.ceil( len(dataset["train"]["targets"]) / parameters["batch_size"]["train"])) for train_step, (train_batch, epoch) in enumerate(train_batches): feed_dict = { headline_ph: np.transpose(train_batch["headline"], (1, 0, 2)), body_ph: np.transpose(train_batch["body"], (1, 0, 2)), targets_ph: train_batch["targets"], keep_prob_ph: parameters["keep_prob"], } _, summary_str, train_loss, train_accuracy = sess.run( [train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict) train_summary_writer.add_summary(summary_str, train_step) if train_step % 10 == 0: sys.stdout.write( "\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}% " .format(epoch + 1, parameters["num_epochs"], train_step % num_step_by_epoch, num_step_by_epoch, train_loss, 100. * train_accuracy)) sys.stdout.flush() if train_step % 500 == 0: test_batches = batcher.batch_generator( dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"]) for test_step, (test_batch, _) in enumerate(test_batches): feed_dict = { headline_ph: np.transpose(test_batch["headline"], (1, 0, 2)), body_ph: np.transpose(test_batch["body"], (1, 0, 2)), targets_ph: test_batch["targets"], keep_prob_ph: 1., } summary_str, test_loss, test_accuracy = sess.run( [test_summary_op, loss, accuracy], feed_dict=feed_dict) print "\nTEST | loss={0:.2f}, accuracy={1:.2f}% ".format( test_loss, 100. * test_accuracy) print "" test_summary_writer.add_summary(summary_str, train_step) break if train_step % 5000 == 0: saver.save(sess, save_path=savepath, global_step=train_step) print ""
keys = yaml.load(open("twitter_keys", "r")) consumer_key = keys["consumer_key"] consumer_secret = keys["consumer_secret"] access_token = keys["access_token"] access_secret = keys["access_secret"] handler = OAuthHandler(consumer_key, consumer_secret) handler.set_access_token(access_token, access_secret) a, b, c, d, inv_vocab = load_dataset() mx = len(inv_vocab) api = API(handler) rnn = RNN(mx, rnn_size, False) model = L.Classifier(rnn) serializers.load_hdf5("mymodel.h5", model) while True: nxt = np.random.randint(0, mx) result = "" for i in range(40): nxt = np.array([nxt], np.int32) prob = F.softmax(model.predictor(nxt)) nxt = np.argmax(prob.data) s = inv_vocab[nxt] if s == "。": break result += s