def main(): model = LSTM(settings.vocab_size, settings.word_embedding_size, settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out) ''' pre-train word embedding init ''' dataset = Dataset(args.data) model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding())) if torch.cuda.is_available(): torch.cuda.manual_seed(settings.seed) model.cuda() optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5) criteria = nn.CrossEntropyLoss() best_dev_acc = 0.0 best_test_acc = 0.0 for i in xrange(dataset.size/settings.batch_size*settings.max_epochs): batch_data = dataset.get_batch() loss = train(model, batch_data, optimizer, criteria) if (i+1) % settings.validate_freq == 0: print "validating..." dev_acc = test(model, dataset.dev_data) test_acc = test(model, dataset.test_data) if dev_acc > best_dev_acc: best_dev_acc = dev_acc best_test_acc = test_acc torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc))) with open(os.path.join(args.model_dir, "log.txt"), "a") as logger: logger.write("epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)) print "epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
def predict(self, inputs_data): """ """ x_batch = Dataset.preprocess_for_prediction(inputs_data, self.settings) # a single batch print(x_batch) feed_dict = self._feed_data_predict(x_batch) outputs = self._sess.run(self._outputs_predict, feed_dict = feed_dict) return outputs
# model_tag = 'cnn' # if model_tag == 'cnn': from model_graph_cnn import build_graph elif model_tag == 'csm': from model_graph_csm import build_graph elif model_tag == 'rnn': from model_graph_rnn import build_graph elif model_tag == 'mlp': from model_graph_mlp import build_graph # # data dataset = Dataset() dataset.load_vocab_tokens_and_emb() # # config = ModelSettings() config.vocab = dataset.vocab config.model_tag = model_tag config.model_graph = build_graph config.is_train = False config.check_settings() # model = ModelWrapper(config) model.prepare_for_prediction() #
# if model_tag == 'cnn': from model_graph_cnn import build_graph elif model_tag == 'csm': from model_graph_csm import build_graph elif model_tag == 'rnf': from model_graph_rnf import build_graph elif model_tag == 'rnn': from model_graph_rnn import build_graph elif model_tag == 'mlp': from model_graph_mlp import build_graph # # data dataset = Dataset() # # flag_load_data = True # data if flag_load_data: dataset.load_preprocessed_data() else: dataset.pretrained_emb_file = None dataset.emb_dim = 200 dataset.max_seq_len = 200 dataset.prepare_preprocessed_data(load_vocab=False) # data_train, data_test = dataset.split_train_and_test() #
def train_and_valid(self, train_data, valid_data): """ """ if not os.path.exists(self.model_dir): os.mkdir(self.model_dir) if not os.path.exists(self.model_dir + '_best'): os.mkdir(self.model_dir + '_best') print('Training and evaluating...') #start_time = time.time() total_batch = 0 best_acc_val = 0.0 last_improved = 0 lr = self.learning_rate_base with self._graph.as_default(): self._sess.run(tf.assign(self._lr, tf.constant(lr, dtype=tf.float32))) valid_batches = Dataset.do_batching_data(valid_data, self.batch_size_eval) valid_batches = Dataset.do_standardizing_batches(valid_batches, self.settings) print('Creating model for evaluation ...') config_e = self.settings config_e.keep_prob = 1.0 model_e = ModelWrapper(config_e) model_e.prepare_for_train_and_valid() flag_stop = False for epoch in range(self.num_epochs): print('Epoch: %d, training ...' % (epoch + 1) ) train_batches = Dataset.do_batching_data(train_data, self.batch_size) train_batches = Dataset.do_standardizing_batches(train_batches, self.settings) for data_batch in train_batches: feed_dict = self._feed_data_train(data_batch) # valid if total_batch % self.valid_per_batch == 0: # load ckpt = tf.train.get_checkpoint_state(self.model_dir) if ckpt and ckpt.model_checkpoint_path: model_e._saver.restore(model_e._sess, ckpt.model_checkpoint_path) #print('evaluation-model created') loss_val, acc_val = model_e.evaluate(valid_batches) #print('evaluated') # save best if acc_val >= best_acc_val: # >= best_acc_val = acc_val last_improved = total_batch model_e._saver_best.save(model_e._sess, os.path.join(model_e.model_dir + '_best', model_e.model_name), global_step = total_batch) # pb constant_graph = graph_util.convert_variables_to_constants( model_e._sess, model_e._sess.graph_def, output_node_names = self.pb_outputs_name) with tf.gfile.FastGFile(model_e.pb_file, mode='wb') as f: f.write(constant_graph.SerializeToString()) # # stop if total_batch - last_improved >= self.patience_stop: str_info = "no improvement for a long time, stop optimization at curr_batch: %d" \ % total_batch self._log_info(str_info) print(str_info) # flag_stop = True break # for batch # decay if total_batch - last_improved >= self.patience_decay or \ (not self.use_metric and \ total_batch > 0 and \ total_batch % self.patience_decay == 0): lr *= self.ratio_decay with self._graph.as_default(): self._sess.run(tf.assign(self._lr, tf.constant(lr, dtype=tf.float32))) last_improved = total_batch # str_info = 'learning_rate DECAYED at total_batch: %d' % total_batch self._log_info(str_info) print(str_info) # time # time_cost = time.time() - start_time # str_info = 'loss, metric, best_metric: %.6f, %.4f, %.4f' % (loss_val, acc_val, best_acc_val) self._log_info(str_info) # print(str_info) # str_info = 'curr_batch: %d, lr: %f' % (total_batch, lr) self._log_info(str_info) # print(str_info) # optim self._sess.run(self._train_op, feed_dict = feed_dict) total_batch += 1 # save if total_batch % self.save_per_batch == 0: #s = session.run(merged_summary, feed_dict=feed_dict) #writer.add_summary(s, total_batch) loss = self._sess.run(self._loss_tensor, feed_dict = feed_dict) metric = 0.0 if self.use_metric: metric = self._sess.run(self._metric_tensor, feed_dict = feed_dict) # self._log_info("") # print() str_info = "epoch: %d" % (epoch + 1) self._log_info(str_info) # print(str_info) # str_info = "loss, metric of train: %f, %f" % (loss, metric) self._log_info(str_info) # print(str_info) # self._saver.save(self._sess, os.path.join(self.model_dir, self.model_name), global_step = total_batch) # # if flag_stop: break # for epoch # # str_info = "training ended after total epoches: %d" % (epoch + 1) self._log_info(str_info) self._log_info("")
# model_tag = 'rnn' # if model_tag == 'cnn': from model_graph_cnn import build_graph elif model_tag == 'csm': from model_graph_csm import build_graph elif model_tag == 'rnn': from model_graph_rnn import build_graph elif model_tag == 'rnf': from model_graph_rnf import build_graph # # data dataset = Dataset() # # flag_load_data = True # data if flag_load_data: dataset.load_preprocessed_data() else: dataset.pretrained_emb_file = None dataset.prepare_preprocessed_data(load_vocab=False) # data_train = dataset.data_idx_train, dataset.labels_idx_train data_valid = dataset.data_idx_valid, dataset.labels_idx_valid #