def fit(self, X, y): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) y_tensor = self.convert_to_tensor(y, target=True) n_neurons_fc = self.hyper_parameters['n_neurons_fc'] hidden_dim = self.hyper_parameters['hidden_dim'] self.model = RNNModel(num_features=len(X.columns), num_class=2, hidden_dim=hidden_dim, n_neurons_fc=n_neurons_fc, sequence_length=self.sequence_length) self.init_loss_and_optimizer() epochs = self.hyper_parameters['epochs'] n_batches = 20 for i in range(epochs): for i in range(n_batches): # Local batches and labels local_X1, local_X2, local_y = self.get_batch( X_tweet_text_tensor, X_other_features_tensor, y_tensor, n_batches, i) self.optimizer.zero_grad() y_pred = self.model(local_X1, local_X2) loss = self.criterion(y_pred, local_y) loss.backward() self.optimizer.step()
def swipe_convert(src): src = [i - 64 for i in src] length = len(src) if len(src) > 10: src = src[:10] length = 10 else: src = src + [0] * (10 - len(src)) src = torch.LongTensor([src]) length = torch.LongTensor([length]) model = RNNModel('LSTM', 27, 1, 10, 1, 0.2) # Load the best saved model. with open('model.pt', 'rb') as f: model = torch.load(f) # after load the rnn params are not a continuous chunk of memory # this makes them a continuous chunk, and will speed up forward pass model.rnn.flatten_parameters() model.eval() hidden = model.init_hidden(1) with torch.no_grad(): output, hidden = model(src, length, hidden) pred = output.max(1, keepdim=True)[1] return pred.item()
def sample_text(sess, data_provider, iteration): model = RNNModel(data_provider.vocabulary_size, batch_size=1, sequence_length=1, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE, training=False) text = model.sample(sess, data_provider.chars, data_provider.vocabulary, TEXT_SAMPLE_LENGTH)#.encode("utf-8") output = open(output_file, "a") output.write("Iteration: " + str(iteration) + "\n") output.write(str(text) + "\n") output.write("\n") output.close()
def _init_model(self): # if not self.model: try: with tf.variable_scope('classifier'): self.model = RNNModel(self.args) # self.model = BIDIRNNModel(self.args) except ValueError as ve: with tf.variable_scope('classifier', reuse=True): self.model = RNNModel(self.args)
def __init__(self, train_config, model_config, is_training=True): self.train_config, self.model_config = train_config, model_config self.is_training = is_training self.cnn_model = CNNModel(self.train_config.cnn_keep_prob, is_training=is_training) self.rnn_model = RNNModel(train_config.learning_rate, model_config.n_fcs, model_config.n_views, model_config.n_hidden, model_config.n_classes, train_config.rnn_keep_prob if is_training else 1.0, is_training=self.is_training) self.gpu_config = tf.ConfigProto() self.gpu_config.gpu_options.allow_growth = True self.data = modelnet.read_data(FLAGS.modelnet_path)
def child_process(curr_step): print('Starting child process') model = RNNModel.Builder().set_max_steps(max_steps). \ set_word_feature_size(word_feature_size + pos_feature_size). \ set_read_path(os.path.join('records','eval')). \ set_epochs(1). \ set_char_emb_status(use_char_embeddings). \ set_cell_type(RNNModel.CellType.RNN_CELL_TYPE_GRU). \ set_cell_size(cell_size). \ set_batch_size(batch_size). \ set_class_size(num_classes). \ set_entity_class_size(num_entity_classes) .\ set_layer_size(num_layers). \ set_model_path(model_path). \ set_model_name(model_name).\ set_logs_path(logs_path). \ set_bi_directional(bi_directional). \ set_classifer_status(is_classifer). \ set_state_feedback(state_feeback). \ set_time_major(time_major).\ set_char_feature_size(char_feature_size).\ set_char_cell_size(char_cell_size).\ set_char_vocab_size(char_vocab_size).\ set_oper_mode(RNNModel.OperMode.OPER_MODE_EVAL). \ build() model.evaluate(curr_step=curr_step)
def load_model(model_save_fn, model_type): if model_type == 'lstm': model = LSTMModel.load(model_save_fn) elif model_type == 'rnn': model = RNNModel.load(model_save_fn) elif model_type == 'cnn': model = CNNModel.load(model_save_fn) return model
def main(args): if args.model == 'lstm': config = RNNConfig(max_sequence_length, n_classes) else: config = CNNConfig(max_sequence_length, n_classes) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=config.allow_soft_placement, log_device_placement=config.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): init_embedding = tf.random_uniform([vocab_size, config.embed_size], -1.0, 1.0) if args.model == 'lstm': model = RNNModel(config, init_embedding) else: model = CNNModel(config, init_embedding) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", args.model, timestamp)) print("Writing to {}\n".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) dataset.vocab_processor.save(os.path.join(out_dir, "vocab")) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) model.fit(sess, saver, dataset, checkpoint_dir)
def main(): """Sup Main!""" models = [CNNModel(), RNNModel()] for model in models: model.build_model() train = TrainModel(model, n_epochs=200, batch_size=128) train.train_model() train.reset_model()
def rnn(): data_provider = DataProvider(data_dir, BATCH_SIZE, SEQUENCE_LENGTH) model = RNNModel(data_provider.vocabulary_size, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE) with tf.Session() as sess: summaries = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) # Forward pass and one backward pass of all the training examples epoch = 0 temp_losses = [] smooth_losses = [] while True: sess.run( tf.assign(model.learning_rate, LEARNING_RATE * (DECAY_RATE**epoch))) data_provider.reset_batch_pointer() state = sess.run(model.initial_state) for batch in range(data_provider.batches_size): inputs, targets = data_provider.next_batch() feed = {model.input_data: inputs, model.targets: targets} for index, (c, h) in enumerate(model.initial_state): feed[c] = state[index].c feed[h] = state[index].h # Iteration is the number of times batch data has passed # through the neural network - both forward and backwards # propagation iteration = epoch * data_provider.batches_size + batch summary, loss, state, _ = sess.run( [summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summary, iteration) temp_losses.append(loss) if iteration % SAMPLING_FREQUENCY == 0: sample_model(sess, data_provider, iteration) if iteration % LOGGING_FREQUENCY == 0: smooth_loss = np.mean(temp_losses) smooth_losses.append(smooth_loss) temp_losses = [] plot(smooth_losses, "iterations (thousands)", "loss") print('{{"metric": "iteration", "value": {}}}'.format( iteration)) print('{{"metric": "epoch", "value": {}}}'.format(epoch)) print('{{"metric": "loss", "value": {}}}'.format( smooth_loss)) epoch += 1
def main(): args = parse_args() logging.basicConfig(stream=sys.stdout, format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S') with codecs.open(data_file, 'r') as f: text = f.read() train_size = len(text) train_text = text if args.test == 'false': vocab_index_dict, index_vocab_dict, vocab_size = create_vocab(text) save_vocab(vocab_index_dict, 'vocab.json') else: vocab_index_dict, index_vocab_dict, vocab_size = load_vocab( 'vocab.json') train_batches = BatchGenerator(train_text, batch_size, seq_length, vocab_size, vocab_index_dict) graph = tf.Graph() with graph.as_default(): model = RNNModel(args.test, hidden_size, rnn_layers, batch_size, seq_length, vocab_size, embedding_size, learning_rate, max_grad_norm) with tf.Session(graph=graph) as session: model_saver = tf.train.Saver() if args.test == 'false': tf.global_variables_initializer().run() for i in range(num_epochs): model.train(session, train_size, train_batches) if i % 100 == 0: logging.info("saving model") model_saver.save(session, rnn_model, global_step=model.global_step) else: module_file = tf.train.latest_checkpoint(restore_path) model_saver.restore(session, module_file) start_text = 'your' length = 20 print( model.predict(session, start_text, length, vocab_index_dict, index_vocab_dict))
def main(): config = deepcopy(QMixConfig) env = StarCraft2Env(map_name=config['scenario'], difficulty=config['difficulty']) env = SC2EnvWrapper(env) config['episode_limit'] = env.episode_limit config['obs_shape'] = env.obs_shape config['state_shape'] = env.state_shape config['n_agents'] = env.n_agents config['n_actions'] = env.n_actions rpm = EpisodeReplayBuffer(config['replay_buffer_size']) agent_model = RNNModel(config) qmixer_model = QMixerModel(config) algorithm = QMIX(agent_model, qmixer_model, config) qmix_agent = QMixAgent(algorithm, config) while rpm.count < config['memory_warmup_size']: train_reward, train_step, train_is_win, train_loss, train_td_error\ = run_train_episode(env, qmix_agent, rpm, config) total_steps = 0 last_test_step = -1e10 while total_steps < config['training_steps']: train_reward, train_step, train_is_win, train_loss, train_td_error\ = run_train_episode(env, qmix_agent, rpm, config) total_steps += train_step if total_steps - last_test_step >= config['test_steps']: last_test_step = total_steps eval_is_win_buffer = [] eval_reward_buffer = [] eval_steps_buffer = [] for _ in range(3): eval_reward, eval_step, eval_is_win = run_evaluate_episode( env, qmix_agent) eval_reward_buffer.append(eval_reward) eval_steps_buffer.append(eval_step) eval_is_win_buffer.append(eval_is_win) summary.add_scalar('train_loss', train_loss, total_steps) summary.add_scalar('eval_reward', np.mean(eval_reward_buffer), total_steps) summary.add_scalar('eval_steps', np.mean(eval_steps_buffer), total_steps) summary.add_scalar('eval_win_rate', np.mean(eval_is_win_buffer), total_steps) summary.add_scalar('exploration', qmix_agent.exploration, total_steps) summary.add_scalar('replay_buffer_size', rpm.count, total_steps) summary.add_scalar('target_update_count', qmix_agent.target_update_count, total_steps) summary.add_scalar('train_td_error:', train_td_error, total_steps)
def sample_text(sess, data_provider, iteration): model = RNNModel(data_provider.vocabulary_size, batch_size=1, sequence_length=1, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE, training=False) text = model.sample(sess, data_provider.chars, data_provider.vocabulary, TEXT_SAMPLE_LENGTH).encode("utf-8") with open(output_file, "a") as output: output.write("Iteration: " + str(iteration) + "\n") output.write(text + "\n") output.write("\n") analysis = get_linguistic_analysis(text) print(analysis) with open(data_dir + "analysis.txt", mode="a", encoding='utf-8') as analysis_file: analysis_file.write("Iteration: " + str(iteration) + "\n") analysis_file.write(analysis) analysis_file.write("\n")
def rnn(): data_provider = DataProvider(data_dir, BATCH_SIZE, SEQUENCE_LENGTH) model = RNNModel(data_provider.vocabulary_size, batch_size=BATCH_SIZE, sequence_length=SEQUENCE_LENGTH, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE) with tf.Session() as sess: summaries = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) epoch = 0 temp_losses = [] smooth_losses = [] while True: sess.run(tf.assign(model.learning_rate, LEARNING_RATE * (DECAY_RATE ** epoch))) data_provider.reset_batch_pointer() state = sess.run(model.initial_state) for batch in range(data_provider.batches_size): inputs, targets = data_provider.next_batch() feed = {model.input_data: inputs, model.targets: targets} for index, (c, h) in enumerate(model.initial_state): feed[c] = state[index].c feed[h] = state[index].h iteration = epoch * data_provider.batches_size + batch summary, loss, state, _ = sess.run([summaries, model.cost, model.final_state, model.train_op], feed) writer.add_summary(summary, iteration) temp_losses.append(loss) if iteration % SAMPLING_FREQUENCY == 0: sample_text(sess, data_provider, iteration) if iteration % LOGGING_FREQUENCY == 0: smooth_loss = np.mean(temp_losses) smooth_losses.append(smooth_loss) temp_losses = [] plot(smooth_losses, 'iterations (thousands)', 'loss') print(f'metric: iteration, value: {iteration}') print(f'metric: epoch, value: {epoch}') print(f'metric: loss, value: {smooth_loss}') epoch += 1
def build_model(self): """ Builds graph of FCNN, called only during __init__. """ self.cnn_in = tf.placeholder(tf.float64, [None, self.cnn_input_size], name="cnn_input") self.q_batch = tf.placeholder(tf.float64, [None, None, self.embed_size]) self.labels = tf.placeholder(tf.int32, [None], name="labels") self.attention_vec = AttentionModel(self.cnn_input_size, self.cnn_in, self.q_batch, embed_size=self.embed_size).output self.rnn_out = RNNModel(self.attention_vec, dense=False).output # self.rnn_out = tf.layers.dropout(self.rnn_out, self.dropout) self.prev_layer = self.rnn_out for layer_name, layer_nodes in self.net_struct.items(): self.prev_layer = tf.layers.dense(self.prev_layer, layer_nodes, activation=self.activation_fn, name=layer_name) # self.prev_layer = tf.layers.dropout(self.prev_layer, self.dropout) self.output = tf.layers.dense(self.prev_layer, self.output_size, activation=self.activation_fn, name="output") self.labels = tf.stop_gradient(self.labels) self.loss = tf.reduce_mean( self.loss_fn(labels=self.labels, logits=self.output)) # self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) self.global_step = tf.Variable(0, trainable=False) self.lr = tf.train.piecewise_constant(self.global_step, self.boundaries, self.values) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.train_op = self.optimizer.apply_gradients( self.grads_and_vars, global_step=self.global_step)
def sample_multi(save_dir, data, model_type): with open(os.path.join(save_dir, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) with open(os.path.join(save_dir, 'chars_vocab.pkl'), 'rb') as f: _, vocab = cPickle.load(f) if model_type == 'biLSTM': model = BiLSTM(saved_args, True) elif model_type == 'biSRU': model = BiLSTM(saved_args, True) elif model_type == 'stackBiLstm': model = StackedBiLstm(saved_args, True) elif model_type == 'cnn3layers': model = Conv3LayerModel(saved_args, True) elif model_type == 'conv1d': model = Conv1d3Layer(saved_args, True) elif model_type == 'cnn6layers': model = Conv6LayerModel(saved_args, True) elif model_type == 'cnn_lstm': model = ConvLSTMModel(saved_args, True) else: model = RNNModel(saved_args, True) config = tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.25)) with tf.Session(config=config) as sess: tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) vector = np.mean(model.valid_run(sess, vocab, data[0]), axis=0) vectors = np.zeros((len(data), vector.shape[0])) vectors[0, :] = vector for i in tqdm(range(1, len(data))): vectors[i, :] = np.mean(model.valid_run(sess, vocab, data[i]), axis=0) return vectors
def __init__(self, model_dir, model_type, sess, batch_size=64, seq_len=32): with open(os.path.join(model_dir, 'config.pkl'), 'rb') as f: saved_args = cPickle.load(f) with open(os.path.join(model_dir, 'chars_vocab.pkl'), 'rb') as f: _, vocab = cPickle.load(f) saved_args.batch_size = batch_size saved_args.seq_length = seq_len infer = False # for seqlen != 1 if model_type == 'biLSTM': model = BiLSTM(saved_args, infer) elif model_type == 'biSRU': model = BiLSTM(saved_args, infer) elif model_type == 'stackBiLstm': model = StackedBiLstm(saved_args, infer) elif model_type == 'cnn3layers': model = Conv3LayerModel(saved_args, infer) elif model_type == 'conv1d': model = Conv1d3Layer(saved_args, infer) elif model_type == 'cnn6layers': model = Conv6LayerModel(saved_args, infer) elif model_type == 'cnn_lstm': model = ConvLSTMModel(saved_args, infer) else: model = RNNModel(saved_args, infer) self.seq_len = seq_len self.model = model self.vocab = vocab self.sess = sess self.saver = tf.train.Saver() self.ckpt = tf.train.get_checkpoint_state(model_dir) if sess is not None: self.saver.restore(sess, self.ckpt.model_checkpoint_path) assert self.ckpt and self.ckpt.model_checkpoint_path
class RNNClassifier(BaseClassifier): def __init__(self): super().__init__('RNN') self.hyper_parameters = {} # dictionary of the chosen hyper-parameters self.model = None self.criterion = None self.optimizer = None self.sequence_length = None def get_hyper_parameters_grid(self): grid = { 'lr': [0.001, 0.01, 0.1], 'epochs': [10, 50, 100], 'n_neurons_fc': [64, 128, 256], 'hidden_dim': [64, 128, 256] } return grid def set_hyper_parameters(self, hyper_parameters_dict): self.hyper_parameters = hyper_parameters_dict def set_best_hyper_parameters(self): self.hyper_parameters = { 'lr': 0.01, 'epochs': 50, 'n_neurons_fc': 128, 'hidden_dim': 64 } def fit(self, X, y): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) y_tensor = self.convert_to_tensor(y, target=True) n_neurons_fc = self.hyper_parameters['n_neurons_fc'] hidden_dim = self.hyper_parameters['hidden_dim'] self.model = RNNModel(num_features=len(X.columns), num_class=2, hidden_dim=hidden_dim, n_neurons_fc=n_neurons_fc, sequence_length=self.sequence_length) self.init_loss_and_optimizer() epochs = self.hyper_parameters['epochs'] n_batches = 20 for i in range(epochs): for i in range(n_batches): # Local batches and labels local_X1, local_X2, local_y = self.get_batch( X_tweet_text_tensor, X_other_features_tensor, y_tensor, n_batches, i) self.optimizer.zero_grad() y_pred = self.model(local_X1, local_X2) loss = self.criterion(y_pred, local_y) loss.backward() self.optimizer.step() def predict(self, X): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) outputs = self.model(X_tweet_text_tensor, X_other_features_tensor) _, predictions = torch.max(outputs, 1) return predictions def predict_proba(self, X): X_tweet_text_tensor, X_other_features_tensor = self.get_X_tensors(X) outputs = self.model(X_tweet_text_tensor, X_other_features_tensor) predictions = outputs.detach().numpy() return predictions def init_loss_and_optimizer(self): """ Initializes the loss and optimizer for the current .fit """ self.criterion = CrossEntropyLoss() self.optimizer = Adam(self.model.parameters(), lr=self.hyper_parameters['lr']) def convert_to_tensor(self, df, target=False): """ converts the given DataFrame to a tensor. :param df: the DataFrame to convert :type df: pd.DataFrame :param target: indicates whether we are using the features df(False) or the target df(True). Defaults to False :type target: bool :return: the converted tensor :rtype: torch.Tensor """ if target: return torch.LongTensor(df.values) return torch.FloatTensor(df.values) def get_X_tensors(self, X): """ splits the given X df to tweet text indexes for embedding and other extracted features. :param X: the df to split :type X: pd.DataFrame :return: X_tweet_text_tensor, X_other_features_tensor :rtype: tuple """ X_tweet_text = X['tweet text'] X_other_features = X.drop(labels=['tweet text'], axis=1) X_tensor_other_features = self.convert_to_tensor(X_other_features, target=False) indices_list = [] for words_list in X_tweet_text.values: indices_list.append([word_to_ix[w] for w in words_list]) X_tensor_tweet_text = torch.LongTensor(indices_list) # X_tensor_tweet_text = torch.LongTensor([word_to_ix[w] for w in X_tweet_text.values]) return X_tensor_tweet_text, X_tensor_other_features def get_batch(self, X_tweet_text_tensor, X_other_features_tensor, y_tensor, n_batches, i): """ Creates the i'th batch from the given data. :param X_tweet_text_tensor: data to get batch from :type X_tweet_text_tensor: torch.Tensor :param X_other_features_tensor: data to get batch from :type X_other_features_tensor: torch.Tensor :param y_tensor: data to get batch from :type y_tensor: torch.Tensor :param n_batches: the amount of total batches we need :type n_batches: int :param i: the current batch we want to take :type i: int :return: a tuple of the batched data :rtype: tuple """ X1_batch = X_tweet_text_tensor[i * n_batches:(i + 1) * n_batches, ] X2_batch = X_other_features_tensor[i * n_batches:(i + 1) * n_batches, ] y_batch = y_tensor[i * n_batches:(i + 1) * n_batches, ] return X1_batch, X2_batch, y_batch
def train_rnn(training_articles, testing_articles, n_epochs, batch_size, seq_length, char_skip, dropout_pkeep, force_retrain): print "[ INFO] Parsing training articles..." training_batch_generator = BatchGenerator(training_articles, batch_size, seq_length, char_skip) print "[ INFO] Parsing validation articles..." validation_batch_generator = BatchGenerator(testing_articles, batch_size, seq_length, char_skip) model_file = get_model_file() if model_file and not force_retrain: rnn_model = RNNModel.load_from_model_file(model_file) state_file = os.path.join(MODEL_SAVE_DIR, 'saved-vars.npz') if not os.path.exists(state_file): raise IOError("Numpy state file does not exist") saved_vars = np.load(state_file) istate = saved_vars['cell-state'] training_batch_generator.restore_state_dict(**saved_vars) print "[ INFO] Resuming training from epoch %d, global step %d" % ( training_batch_generator.n_epochs, rnn_model.training_step_num) else: print "[ INFO] Initializing RNN" rnn_model = RNNModel(max_seq_length=seq_length) rnn_model.init_network() istate = np.zeros(shape=(rnn_model.n_layers, 2, batch_size, rnn_model.cell_size)) log_dir = os.path.join( LOG_DIR, 'training_%s' % datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")) os.makedirs(log_dir) log_file = open(os.path.join(log_dir, 'log.txt'), 'w') validation_accuracies = list() validation_losses = list() validation_steps = list() while training_batch_generator.n_epochs < n_epochs: batch, labels, seq_length_arr, istate = training_batch_generator.get_batch( istate) pred, ostate, acc = rnn_model.process_training_batch( batch, labels, seq_length_arr, istate, dropout_pkeep) if rnn_model.training_step_num % DISPLAY_INTERVAL == 0: print "[ INFO] Accuracy at step %d (epoch %d): %.3f" % ( rnn_model.training_step_num, training_batch_generator.n_epochs + 1, acc) print "[ INFO] Prediction of first sample in minibatch: %s" % idx_arr_to_str( pred[0]) if rnn_model.training_step_num % TEXT_PREDICTION_LOG_INTERVAL == 0: log_file.write("Text prediction at step %d:\n" % rnn_model.training_step_num) for i in range(batch_size): log_file.write(idx_arr_to_str(pred[i]) + '\n') log_file.write( "-----------------------------------------------------\n") if rnn_model.training_step_num % MODEL_SAVE_INTERVAL == 0: print "[ INFO] Saving model..." rnn_model.tf_saver.save(rnn_model.session, os.path.join(MODEL_SAVE_DIR, MODEL_PREFIX), global_step=rnn_model.training_step_num) # also save the cell state and counters of the BatchGenerator vars_to_store = training_batch_generator.get_state_dict() vars_to_store.update({'cell-state': ostate}) np.savez(os.path.join(MODEL_SAVE_DIR, 'saved-vars.npz'), **vars_to_store) if rnn_model.training_step_num % VALIDATION_INTERVAL == 0: print "[ INFO] Starting validation run" avg_loss, avg_accuracy = perform_validation_run( rnn_model, validation_batch_generator) validation_steps.append(rnn_model.training_step_num) validation_accuracies.append(avg_accuracy) validation_losses.append(avg_loss) plt.plot(validation_steps, validation_accuracies, label='accuracy') plt.plot(validation_steps, validation_losses, label='loss') plt.xlabel('Training Step') plt.yticks(np.arange(0., 1.05, 0.05)) plt.legend(loc='upper left') plt.grid(True) plt.savefig( os.path.join(log_dir, 'validation_loss-accuracy-plot.png')) plt.close() istate = ostate log_file.close()
import sys import torch import torch.nn as nn sys.path.append("../d2l_func/") from data_prepare import load_data_jay_song, data_iter_random, data_iter_consecutive, to_onehot from model_train import train_rnn_pytorch from predict import predict_rnn_pytorch from rnn_model import RNNModel if __name__ == "__main__": # load data corpus_index, char_to_idx, vocab_set, vocab_size = load_data_jay_song() # model hidden_num = 256 rnn_layer = nn.LSTM(vocab_size, hidden_num) model = RNNModel(rnn_layer, vocab_size) model = model.cuda() loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) params = { "epoch_num": 10, "model": model, "loss": loss, "optimizer": optimizer, "batch_size": 64, "num_step": 32, "corpus_index": corpus_index, "data_iter": data_iter_consecutive, "char_to_idx": char_to_idx, "vocab_set": vocab_set,
# eval_model.evaluate() train_model = RNNModel.Builder().set_max_steps(max_steps). \ set_char_emb_status(use_char_embeddings) .\ set_word_feature_size(word_feature_size + pos_feature_size).\ set_read_path(os.path.join('records', 'train')). \ set_epochs(train_epochs).\ set_cell_type(RNNModel.CellType.RNN_CELL_TYPE_GRU).\ set_cell_size(cell_size).\ set_batch_size(batch_size).\ set_class_size(num_classes). \ set_entity_class_size(num_entity_classes). \ set_layer_size(num_layers).\ set_learning_rate(learning_rate). \ set_model_path(model_path). \ set_model_name(model_name). \ set_logs_path(logs_path).\ set_eval_fn(evaluator). \ set_time_major(time_major). \ set_state_feedback(state_feeback). \ set_bi_directional(bi_directional) .\ set_classifer_status(is_classifer).\ set_char_feature_size(char_feature_size).\ set_char_cell_size(char_cell_size). \ set_char_vocab_size(char_vocab_size). \ set_oper_mode(RNNModel.OperMode.OPER_MODE_TRAIN). \ set_validation_step(validation_step).\ build() train_model.train(keep_prob)
import numpy as np from utils import preprocess_data from paths import * if __name__ == '__main__': model = RNNModel.Builder().set_max_steps(max_steps). \ set_word_feature_size(word_feature_size + pos_feature_size). \ set_cell_type(RNNModel.CellType.RNN_CELL_TYPE_GRU). \ set_cell_size(cell_size). \ set_batch_size(1). \ set_class_size(num_classes). \ set_entity_class_size(num_entity_classes). \ set_layer_size(num_layers). \ set_model_path(model_path). \ set_model_name(model_name). \ set_time_major(time_major).\ set_bi_directional(bi_directional). \ set_state_feedback(state_feeback). \ set_classifer_status(is_classifer). \ set_char_feature_size(char_feature_size). \ set_char_cell_size(char_cell_size). \ set_char_vocab_size(char_vocab_size). \ set_oper_mode(RNNModel.OperMode.OPER_MODE_TEST). \ build() model.init_graph() nlp = spacy.load(spacy_model_path) nlp_pos = spacy.load('en_core_web_sm')
class RNNClassifier(object): def __init__(self, model_path, args): assert os.path.isdir(model_path), '%s must be a path' % model_path self.model_path = model_path self.config_vocab_labels_file = os.path.join( self.model_path, 'config_vocab_labels.pkl') self.args = args self.args.label_size = None self.args.vocab_size = None self.vocab = None self.labels = None self.model = None self.sess = tf.Session() if os.path.exists(self.config_vocab_labels_file): self._load_config() def _load_config(self): with open(self.config_vocab_labels_file, 'rb') as f: saved_args, vocab, labels = pickle.load(f) assert saved_args, 'load config error' assert vocab, 'load vocab error' assert labels, 'load labels error' self.args = saved_args self.vocab = vocab self.labels = labels self.id2labels = dict( list(zip(list(labels.values()), list(labels.keys())))) def _load_model(self, batch_size=None): print('loading model ... ') # self.__load_config() if batch_size: self.args.batch_size = batch_size self._init_model() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(self.args.model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(self.sess, ckpt.model_checkpoint_path) def _init_model(self): # if not self.model: try: with tf.variable_scope('classifier'): self.model = RNNModel(self.args) # self.model = BIDIRNNModel(self.args) except ValueError as ve: with tf.variable_scope('classifier', reuse=True): self.model = RNNModel(self.args) # self.model = BIDIRNNModel(self.args) def _transform(self, text): text = text if type('') == type(text) else text.decode('utf-8') text = [word for word in jieba.cut(text)] if self.args.segment else text x = list(map(self.vocab.get, text)) x = [i if i else 0 for i in x] x_len = len(x) x = x[:self.args. seq_length] if x_len >= self.args.seq_length else x + [0] * ( self.args.seq_length - x_len) return x def load(self): self.close() self._load_model() def close(self): self.args = None self.vocab = None self.labels = None self.id2labels = None self.model = None if self.sess: self.sess.close() self.sess = None def train(self, data_file=None, data=None, dev_data_file=None, vocab_corpus_file=None, args=None, continued=True): train_data_loader = TextLoader(model_dir=self.args.model_path, data_file=data_file, vocab_corpus_file=vocab_corpus_file, batch_size=self.args.batch_size, seq_length=self.args.seq_length, vocab=self.vocab, labels=self.labels, segment=self.args.segment) if dev_data_file: if self.vocab and self.labels: vocab = self.vocab labels = self.labels else: vocab = train_data_loader.vocab labels = train_data_loader.labels dev_data_loader = TextLoader(model_dir=self.args.model_path, data_file=data_file, batch_size=self.args.batch_size, seq_length=self.args.seq_length, vocab=vocab, labels=labels, segment=self.args.segment) if not self.args.vocab_size and not self.args.label_size: self.args.vocab_size = train_data_loader.vocab_size self.args.label_size = train_data_loader.label_size self._init_model() init = tf.global_variables_initializer() self.sess.run(init) saver = tf.train.Saver(tf.global_variables()) if os.path.isfile(self.config_vocab_labels_file) and continued: ckpt = tf.train.get_checkpoint_state(self.args.model_path) assert ckpt, 'No checkpoint found' assert ckpt.model_checkpoint_path, 'No model path found in checkpoint' with open(self.config_vocab_labels_file, 'rb') as f: saved_args, vocab, labels = pickle.load(f) need_be_same = ['model', 'rnn_size', 'num_layers', 'seq_length'] for checkme in need_be_same: assert vars(saved_args)[checkme] == vars( self.args )[checkme], 'command line argument and saved model disagree on %s' % checkme assert len(self.vocab) == len( train_data_loader.vocab ), 'data and loaded model disagree on dictionary mappings' assert len(self.labels) == len( train_data_loader.labels ), 'data and loaded model disagree on label dictionary mappings' print('loading last training model and continue') saver.restore(self.sess, ckpt.model_checkpoint_path) else: self.vocab = train_data_loader.vocab self.labels = train_data_loader.labels self.args.vocab_size = train_data_loader.vocab_size self.args.label_size = train_data_loader.label_size with open(self.config_vocab_labels_file, 'wb') as f: pickle.dump([self.args, self.vocab, self.labels], f) with tf.Graph().as_default(): # Summaries for loss and accuracy loss_summary = tf.summary.scalar('loss', self.model.loss) acc_summary = tf.summary.scalar('accuracy', self.model.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(self.model_path, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter( train_summary_dir, self.sess.graph) if dev_data_loader: # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(self.model_path, 'summaries', 'dev') dev_summary_writer = tf.summary.FileWriter( dev_summary_dir, self.sess.graph) dev_batch_count = 0 for epoch in range(self.args.num_epochs): self.sess.run( tf.assign( self.model.lr, self.args.learning_rate * (self.args.decay_rate**epoch))) train_data_loader.reset_batch_pointer() for batch in range(train_data_loader.num_batches): start = time.time() x, y = train_data_loader.next_batch() feed = {self.model.input_data: x, self.model.targets: y} train_loss, _, accuracy, summaries = self.sess.run( [ self.model.loss, self.model.optimizer, self.model.accuracy, train_summary_op ], feed_dict=feed) end = time.time() print('{}/{} (epoch {}/{}), loss = {:.5f}, accuracy = {:.3f}, time/batch = {:.3f}'\ .format(epoch * train_data_loader.num_batches + batch + 1, self.args.num_epochs * train_data_loader.num_batches, epoch + 1, self.args.num_epochs, train_loss, accuracy, end - start)) train_summary_writer.add_summary( summaries, epoch * train_data_loader.num_batches + batch + 1) if (epoch * train_data_loader.num_batches + batch + 1) % args.save_every == 0 \ or (epoch == args.num_epochs-1 and batch == train_data_loader.num_batches-1): checkpoint_path = os.path.join(self.args.model_path, 'model.ckpt') saver.save( self.sess, checkpoint_path, global_step=epoch * train_data_loader.num_batches + batch + 1) print('model saved to {}'.format(checkpoint_path)) dev_batch_count += 1 if dev_batch_count == dev_data_loader.num_batches: dev_data_loader.reset_batch_pointer() dev_batch_count = 0 if dev_data_loader: x, y = dev_data_loader.next_batch() feed = { self.model.input_data: x, self.model.targets: y } dev_loss, _, dev_accuracy, dev_summaries = self.sess.run( [ self.model.loss, self.model.optimizer, self.model.accuracy, dev_summary_op ], feed_dict=feed) print('dev_loss = {:.5f}, dev_accuracy = {:.3f}'. format(dev_loss, dev_accuracy)) if dev_summary_writer: dev_summary_writer.add_summary( dev_summaries, epoch * train_data_loader.num_batches + batch + 1) def predict(self, contents, batch_size=64): if not self.model or not self.args or self.args.batch_size != batch_size or not self.vocab or not self.sess or not self.id2labels: self._load_model(batch_size=batch_size) x = [self._transform(i.strip()) for i in contents] n_chunks = math.ceil(len(x) / self.args.batch_size) x = np.array_split(x[:self.args.batch_size * n_chunks], n_chunks, axis=0) results = [] for m in range(n_chunks): results.extend( self.model.predict_label(self.sess, self.id2labels, x[m])) return results def test(self, test_file=None, data=None, batch_size=64): if not self.model or not self.args or self.args.batch_size != batch_size or not self.vocab or not self.sess or not self.id2labels or not self.labels: self._load_model(batch_size=batch_size) data_loader = TextLoader(model_dir=self.args.model_path, data_file=test_file, batch_size=self.args.batch_size, seq_length=self.args.seq_length, vocab=self.vocab, labels=self.labels, segment=self.args.segment) data = data_loader.tensor.copy() n_chunks = math.ceil(len(data) / self.args.batch_size) data_list = np.array_split(data[:self.args.batch_size * n_chunks], n_chunks, axis=0) correct_total = 0.0 num_total = 0.0 for m in range(n_chunks): start = time.time() x = data_list[m][:, :-1] y = data_list[m][:, -1] results = self.model.predict_class(self.sess, x) correct_num = np.sum(results == y) end = time.time() print(('batch {}/{} time = {:.3f}, sub_accuracy = {:.6f}'.format( m + 1, n_chunks, end - start, correct_num * 1.0 / len(x)))) correct_total += correct_num num_total += len(x) accuracy_total = correct_total / num_total print(('total_num = {}, total_accuracy = {:.6f}'.format( int(num_total), accuracy_total))) return accuracy_total
torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") train_data = dataloader('train_shuf.txt', args.batch_size, args.bptt) val_data = dataloader('val.txt', args.batch_size, args.bptt) eval_batch_size = args.batch_size ############################################################################### # Build the model ############################################################################### ntokens = 27 model = RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) optimizer = optim.SGD(model.parameters(), lr=args.lr) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h)
class MVModel(object): def __init__(self, train_config, model_config, is_training=True): self.train_config, self.model_config = train_config, model_config self.is_training = is_training self.cnn_model = CNNModel(self.train_config.cnn_keep_prob, is_training=is_training) self.rnn_model = RNNModel( train_config.learning_rate, model_config.n_fcs, model_config.n_views, model_config.n_hidden, model_config.n_classes, train_config.rnn_keep_prob if is_training else 1.0, is_training=self.is_training) self.gpu_config = tf.ConfigProto() self.gpu_config.gpu_options.allow_growth = True self.data = modelnet.read_data(FLAGS.modelnet_path) def build_model(self): self.images = tf.placeholder(tf.float32, [None, 224, 224, 3]) with tf.variable_scope('mv-cnn') as scope: self.cnn_model.build_model(self.images, FLAGS.vgg_path) with tf.variable_scope('mv-rnn') as scope: self.rnn_model.build_model(self.cnn_model.all_outputs()) self.optimizer = self.rnn_model.optimizer def co_train(self): with tf.Session() as sess: self.build_model() print('build model finished') init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=10) # TODO restore trained model before or run init op # saver.restore(sess, FLAGS.model_path) sess.run(init) print('init model parameter finished') epoch = 1 print('start training') with open('variables.txt', 'w') as f: f.writelines('\n'.join([v.name for v in tf.global_variables()])) #print([v.name for v in tf.global_variables()]) #saver.save(sess, FLAGS.model_path, global_step=0) fc6_weights = [ v for v in tf.global_variables() if v.name == 'mv-cnn/fc6/fc6_weights:0' ][0] #cell_biases = [v for v in tf.global_variables() if v.name=='rnn/gru_cell/gates/biases/Adam_1:0'][0] while epoch <= self.train_config.training_epoches: batch = 1 while batch * self.train_config.batch_size <= self.data.train.size( ): batch_imgpaths, batch_labels = self.data.train.next_batch( self.train_config.batch_size) batch_img = self.build_input(batch_imgpaths) sess.run(self.optimizer, feed_dict={ self.images: batch_img, self.rnn_model.y: batch_labels, self.cnn_model.train_mode: True }) acc, loss = sess.run( [self.rnn_model.accuracy, self.rnn_model.cost], feed_dict={ self.images: batch_img, self.rnn_model.y: batch_labels, self.cnn_model.train_mode: False }) print("epoch " + str(epoch) + ",batch " + str(epoch * batch) + ", Minibatch loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)) #print("fc6 weights:", sess.run(fc6_weights)) #print("cell biases:", sess.run(cell_biases)) batch += 1 if epoch % self.train_config.display_epoches == 0: acc, loss = sess.run( [self.rnn_model.accuracy, self.rnn_model.cost], feed_dict={ self.images: batch_img, self.rnn_model.y: batch_labels, self.cnn_model.train_mode: False }) print("epoch " + str(epoch) + ", Minibatch loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)) if epoch % self.train_config.save_epoches == 0: #test_imgpaths, test_labels = self.data.test.views(), self.data.test.labels #test_imgs = self.build_input(test_imgpaths) #acc = sess.run([self.rnn_model.accuracy], feed_dict={self.images:test_imgs, self.rnn_model.y:test_labels, self.cnn_model.train_mode:False}) #print("epoch" + str(epoch) + ", Testing accuracy=" + "{:.6f}".format(acc)) try: saver.save(sess, FLAGS.model_path, global_step=epoch) except Exception as e: print("save model exception:", e) epoch += 1 def test(self, test_model_path=""): with tf.Session() as sess: self.build_model() print("build model finished") saver = tf.train.Saver() saver.restore( sess, test_model_path if len(test_model_path) > 0 else FLAGS.model_path) print("restore model parameter finished") total_acc = 0.0 test_imgpaths, test_labels = self.data.test.views( ), self.data.test.labels with open('wrong_model.txt', 'w') as f: for i in xrange(len(test_labels)): test_imgs = self.build_input( test_imgpaths[i * self.model_config.n_views:(i + 1) * self.model_config.n_views]) acc = sess.run( [self.rnn_model.accuracy], feed_dict={ self.images: test_imgs, self.rnn_model.y: test_labels[i:i + 1], self.cnn_model.train_mode: False }) acc = acc[0] if acc < 0.5: f.write(test_imgpaths[self.model_config.n_views * i] + '\n') total_acc += acc print("accuracy in %d models: %f using model %s" % (len(test_labels), total_acc / len(test_labels), test_model_path)) def build_input(self, imgpaths): """ build input data for model input tensor :param imgpaths: images path array :return: 3 channel image array """ images = np.array( [imgutils.load_image(img_path) for img_path in imgpaths]) return [img.reshape(224, 224, 3) for img in images]
def train(args): # check compatibility if training is continued from previously saved model if args.init_from is None: print(args.init_from) data_loader = TextLoader(args) ckpt = '' else: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "chars_vocab.pkl") ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with codecs.open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = cPickle.load(f) need_be_same = ["model", "rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with codecs.open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f: saved_chars, saved_vocab = cPickle.load(f) data_loader = TextLoader(args, chars=saved_chars, vocab=saved_vocab) assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" args.vocab_size = data_loader.vocab_size args.letter_size = data_loader.letter_size args.word_vocab_size = data_loader.word_vocab_size os.makedirs(args.save_dir, exist_ok=True) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: cPickle.dump(args, f) with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f: cPickle.dump((data_loader.chars, data_loader.vocab), f) if args.model == 'biLSTM': model = BiLSTM(args) train_bidirectional_model(model, data_loader, args, ckpt) elif args.model == 'biSRU': model = BiLSTM(args) train_bidirectional_model(model, data_loader, args, ckpt) elif args.model == 'stackBiLstm': model = StackedBiLstm(args) train_bidirectional_model(model, data_loader, args, ckpt) elif args.model == 'cnn3layers': model = Conv3LayerModel(args) train_cnn_model(model, data_loader, args, ckpt) elif args.model == 'conv1d': model = Conv1d3Layer(args) train_cnn_model(model, data_loader, args, ckpt) elif args.model == 'cnn6layers': model = Conv6LayerModel(args) train_cnn_model(model, data_loader, args, ckpt) elif args.model == 'cnn_lstm': model = ConvLSTMModel(args) train_one_forward_model(model, data_loader, args, ckpt) else: model = RNNModel(args) train_one_forward_model(model, data_loader, args, ckpt)
thread.start() thread.join() # eval_model.evaluate() train_model = RNNModel.Builder().set_max_steps(max_steps).\ set_feature_size(feature_size).\ set_read_path(os.path.join('records', 'train')). \ set_epochs(train_epochs).\ set_cell_type(cell_type).\ set_cell_size(cell_size).\ set_batch_size(batch_size).\ set_class_size(num_classes).\ set_layer_size(num_layers).\ set_learning_rate(learning_rate). \ set_model_path(model_path). \ set_model_name(model_name). \ set_logs_path(logs_path).\ set_eval_fn(evaluator). \ set_time_major(time_major). \ set_state_feedback(state_feeback). \ set_bi_directional(bi_directional) .\ set_classifer_status(is_classifer).\ set_oper_mode(RNNModel.OperMode.OPER_MODE_TRAIN). \ set_validation_step(validation_step).\ build() train_model.train(keep_prob) # set_eval_fn(evaluator). \
def sample_text(sess, data_provider, iteration): model = RNNModel(data_provider.vocabulary_size, batch_size=1, sequence_length=1, hidden_layer_size=HIDDEN_LAYER_SIZE, cells_size=CELLS_SIZE, training=False) text = model.sample(sess, data_provider.chars, data_provider.vocabulary, TEXT_SAMPLE_LENGTH).encode('utf-8') with open(output_file, 'a') as output: output.write(f'Iteration: {iteration}\n{text}\n\n')
def build_model(self): """ Builds graph of FCNN, called only during __init__. """ self.cnn_in = tf.placeholder(tf.float64, [None, self.cnn_input_size], name="cnn_input") if self.embed_type == "RNN": self.q_batch = tf.placeholder(tf.int32, [None, None], name="q_batch") elif self.embed_type == "GloVe": self.q_batch = tf.placeholder(tf.float64, [None, self.glove_embed_size]) elif self.embed_type == "Word2Vec": self.q_batch = tf.placeholder(tf.float64, [None, self.word2vec_embed_size]) self.labels = tf.placeholder(tf.int32, [None], name="labels") if self.embed_type == "RNN": self.q_batch = tf.stop_gradient(self.q_batch) self.one_hot = tf.one_hot(self.q_batch, self.vocab_size, dtype=tf.float64) rnn = RNNModel(self.one_hot) self.embed_output = rnn.output self.embed_output = tf.nn.l2_normalize(self.embed_output) elif self.embed_type == "GloVe": self.embed_output = tf.stop_gradient(self.q_batch) elif self.embed_type == "Word2Vec": self.embed_output = tf.stop_gradient(self.q_batch) self.cnn_l2_reg = tf.nn.l2_normalize(tf.stop_gradient(self.cnn_in)) self.cnn_dense = tf.layers.dense(self.cnn_l2_reg, self.pointwise_layer_size, activation=self.activation_fn, name='cnn_in_layer') self.q_dense = tf.layers.dense(self.embed_output, self.pointwise_layer_size, activation=self.activation_fn, name='rnn_in_layer') self.pointwise_layer = tf.multiply(self.cnn_dense, self.q_dense, name="pointwise_layer") self.pointwise_layer = tf.layers.dropout(self.pointwise_layer, self.dropout) self.prev_layer = self.pointwise_layer for layer_name, layer_nodes in self.net_struct.items(): self.prev_layer = tf.layers.dense(self.prev_layer, layer_nodes, activation=self.activation_fn, name=layer_name) self.prev_layer = tf.layers.dropout(self.prev_layer, self.dropout) self.output = tf.layers.dense(self.prev_layer, self.output_size, activation=self.activation_fn, name="output") self.labels = tf.stop_gradient(self.labels) self.loss = tf.reduce_mean( self.loss_fn(labels=self.labels, logits=self.output)) # self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) self.global_step = tf.Variable(0, trainable=False) self.lr = tf.train.piecewise_constant(self.global_step, self.boundaries, self.values) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.train_op = self.optimizer.apply_gradients( self.grads_and_vars, global_step=self.global_step)
def train_with_validation(train_set, valid_set, corpus, n_hidden=128, n_emb=128, batch_size=32, conv_size=5, pooling_type='mean', model_type='lstm', w2v_fn=None, model_save_fn=None, disp_proc=True): '''pooling_type: mean or max model_type: lstm, rnn or cnn use_w2v: whether to use pre-trained embeddings from word2vec ''' # Only train_set is converted by theano.shared train_x, train_mask, train_y = [theano.shared(_) for _ in train_set] valid_x, valid_mask, valid_y = valid_set n_train, n_valid = len(train_x.get_value()), len(valid_x) print("%d training examples" % n_train) print("%d validation examples" % n_valid) rng = np.random.RandomState(1224) th_rng = RandomStreams(1224) if model_save_fn is None: model_save_fn = os.path.join('model-res', '%s-%s' % (model_type, pooling_type)) # Load Word2Vec if w2v_fn is None: gensim_w2v = None else: print('Loading word2vec model...') if not os.path.exists(w2v_fn): raise Exception("Word2Vec model doesn't exist!", model_type) gensim_w2v = Word2Vec.load(w2v_fn) # Define Model if model_type == 'lstm': model = LSTMModel(corpus, n_emb, n_hidden, pooling_type, rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v) elif model_type == 'rnn': model = RNNModel(corpus, n_emb, n_hidden, pooling_type, rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v) elif model_type == 'cnn': model = CNNModel(corpus, n_emb, n_hidden, batch_size, conv_size, pooling_type, rng=rng, th_rng=th_rng, gensim_w2v=gensim_w2v) else: raise Exception("Invalid model type!", model_type) x, mask, y = model.x, model.mask, model.y batch_idx_seq, use_noise = model.batch_idx_seq, model.use_noise f_update_1_gr = theano.function(inputs =[batch_idx_seq], outputs=model.cost, updates=model.gr_updates, givens ={x: train_x[batch_idx_seq], mask: train_mask[batch_idx_seq], y: train_y[batch_idx_seq]}, on_unused_input='ignore') f_update_2_gr_sqr = theano.function(inputs=[], updates=model.gr_sqr_updates) f_update_3_dp_sqr = theano.function(inputs=[], updates=model.dp_sqr_updates) f_update_4_params = theano.function(inputs=[], updates=model.param_updates) # keep validation set consistent keep_tail = False if model_type == 'cnn' else True valid_idx_batches = get_minibatches_idx(n_valid, batch_size, keep_tail=keep_tail) valid_y = np.concatenate([valid_y[idx_batch] for idx_batch in valid_idx_batches]) # train the model patience = 5000 patience_increase = 2 improvement_threshold = 0.995 disp_freq = 20 validation_freq = 100 max_epoch = 500 best_iter = 0 best_validation_err = np.inf epoch = 0 uidx = 0 done_looping = False start_time = time.time() while (epoch < max_epoch) and (not done_looping): epoch += 1 # Get new shuffled index for the training set. use rng to make result keep same with specific random-seed for idx_batch in get_minibatches_idx(n_train, batch_size, shuffle=True, rng=rng, keep_tail=keep_tail): uidx += 1 use_noise.set_value(1.) cost = f_update_1_gr(idx_batch) f_update_2_gr_sqr() f_update_3_dp_sqr() f_update_4_params() if uidx % disp_freq == 0 and disp_proc: print('epoch %i, minibatch %i, train cost %f' % (epoch, uidx, cost)) if uidx % validation_freq == 0: use_noise.set_value(0.) valid_y_pred = [model.predict_func(valid_x[idx_batch], valid_mask[idx_batch]) for idx_batch in valid_idx_batches] valid_y_pred = np.concatenate(valid_y_pred) this_validation_err = (valid_y_pred != valid_y).mean() print('epoch %i, minibatch %i, validation error %f %%' % (epoch, uidx, this_validation_err*100)) if this_validation_err < best_validation_err: if this_validation_err < best_validation_err*improvement_threshold: patience = max(patience, uidx*patience_increase) best_validation_err = this_validation_err best_iter = uidx model.save(model_save_fn) if patience < uidx: done_looping = True break end_time = time.time() print('Optimization complete with best validation score of %f %%, at iter %d' % (best_validation_err * 100, best_iter)) print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch / (end_time - start_time)))