def main(): args = setup_args() logging.info(args) dataset, vocab_table = build_dataset(data_file=args.data, vocab_file=args.vocab, batch_size=args.batch_size, t=args.t, prefetch_size=args.prefetch) V = vocab_table.size() model = Word2Vec(V, args.d) grad_fun = tfe.implicit_value_and_gradients(model.compute_loss) opt = tf.train.GradientDescentOptimizer(learning_rate=args.lr) train_step = 0 total_loss = 0.0 for src_words, tgt_words in dataset: batch_loss, gradients = grad_fun(src_words, tgt_words) opt.apply_gradients(gradients) total_loss += batch_loss train_step += 1 if train_step % args.log_freq == 0: log_msg(f'Step: {train_step} Loss: {total_loss/args.log_freq}') total_loss = 0. log_msg(f'Num steps: {train_step} Done!')
def fit(model, dataset, optimizer, verbose=False, logdir=None): """Fit the linear-regression model. Args: model: The LinearModel to fit. dataset: The tf.data.Dataset to use for training data. optimizer: The TensorFlow Optimizer object to be used. verbose: If true, will print out loss values at every iteration. logdir: The directory in which summaries will be written for TensorBoard (optional). """ # The loss function to optimize. mse = lambda xs, ys: mean_square_loss(model, xs, ys) loss_and_grads = tfe.implicit_value_and_gradients(mse) tf.train.get_or_create_global_step() if logdir: # Support for TensorBoard summaries. Once training has started, use: # tensorboard --logdir=<logdir> summary_writer = tf.contrib.summary.create_file_writer(logdir) # Training loop. for i, (xs, ys) in enumerate(tfe.Iterator(dataset)): loss, grads = loss_and_grads(xs, ys) if verbose: print("Iteration %d: loss = %s" % (i, loss.numpy())) optimizer.apply_gradients(grads, global_step=tf.train.get_global_step()) if logdir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", loss)
def train(loss_fn): """Train a regression model evaluated using `loss_fn`.""" print('Training; loss function: ' + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) # Define the function through which to differentiate. def loss_for_example(x, y): return loss_fn(y, prediction(x)) # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example` # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in # calculating it. grad_fn = tfe.implicit_value_and_gradients(loss_for_example) start = time.time() for epoch in range(100): total_loss = 0.0 for x_i, y_i in tfe.Iterator(dataset): loss, gradients = grad_fn(x_i, y_i) # Take an optimization step and update variables. optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) print('Took: %f seconds' % (time.time() - start)) print('Eager execution exhibits significant overhead per operation. ' 'As you increase your batch size, the impact of the overhead will ' 'become less noticeable. Eager execution is under active development: ' 'expect performance to increase substantially in the near future!')
def train(loss_fn): print('訓練: loss function名稱: ' + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) # 計算每次的預測結果誤差 def loss_for_example(x, y): return loss_fn(y, prediction(x)) # 計算loss function的微分 # `grad_fn(x_i, y_i)` returns # (1) the value of `loss_for_example` evaluated at `x_i`, `y_i` # (2) the gradients of any variables used in calculating it. # Returns a function which differentiates f with respect to variables grad_fn = tfe.implicit_value_and_gradients(loss_for_example) start = time.time() for epoch in range(100): total_loss = 0.0 # 利用tfe.Iterator for x_i, y_i in tfe.Iterator(dataset): loss, gradients = grad_fn(x_i, y_i) # Take an optimization step and update variables. optimizer.apply_gradients(gradients) total_loss += loss # 每10次列印目前的平均loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) print('總花費時間: %f 秒' % (time.time() - start)) print( 'Eager execution exhibits significant overhead per operation. ' 'As you increase your batch size, the impact of the overhead will ' 'become less noticeable. Eager execution is under active development: ' 'expect performance to increase substantially in the near future!')
def fit(model, dataset, optimizer, verbose=False, logdir=None): """Fit the linear-regression model. Args: model: The LinearModel to fit. dataset: The tf.data.Dataset to use for training data. optimizer: The TensorFlow Optimizer object to be used. verbose: If true, will print out loss values at every iteration. logdir: The directory in which summaries will be written for TensorBoard (optional). """ # The loss function to optimize. mse = lambda xs, ys: mean_square_loss(model, xs, ys) loss_and_grads = tfe.implicit_value_and_gradients(mse) if logdir: # Support for TensorBoard summaries. Once training has started, use: # tensorboard --logdir=<logdir> summary_writer = tf.contrib.summary.create_file_writer(logdir) # Training loop. for i, (xs, ys) in enumerate(tfe.Iterator(dataset)): loss, grads = loss_and_grads(xs, ys) if verbose: print("Iteration %d: loss = %s" % (i, loss.numpy())) optimizer.apply_gradients(grads) if logdir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", loss, step=i) tf.contrib.summary.scalar("step", i, step=i)
def gradients(self, params, forward, judges, aux_judges, xx, yy_true): ivag = tfe.implicit_value_and_gradients(self._ivag_inner) bridge = [] scores, grads_and_params = \ ivag(forward, judges, aux_judges, xx, yy_true, bridge) aux_scores = bridge.pop() return grads_and_params, scores, aux_scores
def fit(model, dataset, optimizer, verbose=False, logdir=None): """Fit the linear-regression model :param model: :param dataset: The tf.data.Dataset to use for training data. :param optimizer: :param verbose: If true, will print out loss values at every iteration :param logdir: The directory in which summaries will be written for TensorBoard (optional) :return: """ # the loss function to optimize. mse = lambda xs, ys: mean_square_loss(model, xs, ys) loss_and_grads = tfe.implicit_value_and_gradients(mse) if logdir: summary_writer = tf.contrib.summary.create_file_writer(logdir) for i,(xs, ys) in enumerate(tfe.Iterator(dataset)): loss, grads = loss_and_grads(xs, ys) if verbose: print("Iteration {}: loss = {}".format(i, loss.numpy())) optimizer.apply_gradients(grads) if logdir: with summary_writer.as_default(): tf.contrib.summary.scalar("loss", loss, step=i) tf.contrib.summary.scalar("step", i, step=i)
def train(loss_fn): """Train a regression model evaluated using `loss_fn`.""" print('Training; loss function: ' + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) # Define the function through which to differentiate. def loss_for_example(x, y): return loss_fn(y, prediction(x)) # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example` # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in # calculating it. grad_fn = tfe.implicit_value_and_gradients(loss_for_example) start = time.time() for epoch in range(100): total_loss = 0.0 for x_i, y_i in tfe.Iterator(dataset): loss, gradients = grad_fn(x_i, y_i) # Take an optimization step and update variables. optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) print('Took: %f seconds' % (time.time() - start)) print( 'Eager execution exhibits significant overhead per operation. ' 'As you increase your batch size, the impact of the overhead will ' 'become less noticeable. Eager execution is under active development: ' 'expect performance to increase substantially in the near future!')
def fit(model, dataset, optimizer, verbose=False, logdir=None): """Fit the linear-regression model. Args: model: The LinearModel to fit. dataset: The tf.data.Dataset to use for training data. optimizer: The TensorFlow Optimizer object to be used. verbose: If true, will print out loss values at every iteration. logdir: The directory in which summaries will be written for TensorBoard (optional). """ # The loss function to optimize. def mean_square_loss(xs, ys): return tf.reduce_mean(tf.square(model(xs) - ys)) loss_and_grads = tfe.implicit_value_and_gradients(mean_square_loss) if logdir: # Support for TensorBoard summaries. Once training has started, use: # tensorboard --logdir=<logdir> summary_writer = tfe.SummaryWriter(logdir) # Training loop. for i, (xs, ys) in enumerate(tfe.Iterator(dataset)): loss, grads = loss_and_grads(xs, ys) if verbose: print("Iteration %d: loss = %s" % (i, loss.numpy())) optimizer.apply_gradients(grads) if logdir: summary_writer.scalar("loss", loss) summary_writer.step()
def main(): #dataset = tf.data.Dataset.from_generator(gen, (tf.in32, tf.int32), # (tf.TensorShape([BATCH_SIZE]), # tf.TensorShape([BATCH_SIZE,1]))) dataset = tf.data.Dataset.from_generator( gen, (tf.int32, tf.int32), (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1]))) model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE) optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) grad_fn = tfe.implicit_value_and_gradients(model.compute_loss) total_loss = 0.0 num_train_steps = 0 while num_train_steps < NUM_TRAIN_STEPS: for center_words, target_words in tfe.Iterator(dataset): if num_train_steps >= NUM_TRAIN_STEPS: break loss_batch, grads = grad_fn(center_words, target_words) total_loss += loss_batch optimizer.apply_gradients(grads) if (num_train_steps + 1) % SKIP_STEP == 0: print('A loss at step {}:{:5.1f}'.format( num_train_steps, total_loss / SKIP_STEP)) num_train_steps += 1
def main(): dataset = tf.data.Dataset.from_generator( gen, (tf.int32, tf.int32), (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1]))) optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) # Create the model model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE) # Create the gradients function, using `tfe.implicit_value_and_gradients` grad_fn = tfe.implicit_value_and_gradients(model.compute_loss) total_loss = 0.0 # for average loss in the last SKIP_STEP steps num_train_steps = 0 while num_train_steps < NUM_TRAIN_STEPS: for center_words, target_words in tfe.Iterator(dataset): if num_train_steps >= NUM_TRAIN_STEPS: break # Compute the loss and gradients, and take an optimization step. loss_batch, grads = grad_fn(center_words, target_words) optimizer.apply_gradients(grads) total_loss += loss_batch if (num_train_steps + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format( num_train_steps, total_loss / SKIP_STEP)) total_loss = 0.0 num_train_steps += 1
def train(self, data, target): optimizer = tf.train.GradientDescentOptimizer(self.rate) def loss_fn(x, y): return self.loss(y, self.prediction(x)) grad_fn = tfe.implicit_value_and_gradients(loss_fn) loss, gradients = grad_fn(data, target) optimizer.apply_gradients(gradients) return loss
def main(): args = setup_args() log_msg(args) vocab_table = lookup_ops.index_table_from_file(args.vocab, default_value=args.unk_index) train_dataset = create_dataset(args.train, vocab_table, args.bs, args.eos, args.t) valid_dataset = create_dataset(args.valid, vocab_table, args.bs, args.eos, args.t) loss_and_grads_fun = tfe.implicit_value_and_gradients(train_loss) lm = LanguageModel(int(vocab_table.size()), d=args.nd, h=args.nh, cell=args.cell) log_msg('Model built!') best_valid_ppl = compute_ppl(lm, valid_dataset) log_msg(f'Start ppl: {best_valid_ppl: 0.4f}') if args.opt == 'adam': opt = tf.train.AdamOptimizer(args.lr) else: opt = tf.train.GradientDescentOptimizer(args.lr) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) ckpt_prefix = os.path.join(args.save_dir, args.ckpt_prefix) root = tfe.Checkpoint(optimizer=opt, model=lm, optimizer_step=tf.train.get_or_create_global_step()) for epoch_num in range(args.num_epochs): log_msg(f'Epoch: {epoch_num} START') batch_loss = [] for step_num, train_datum in enumerate(train_dataset, start=1): loss_value, gradients = loss_and_grads_fun(lm, train_datum) batch_loss.append(loss_value) if step_num % args.stats_step == 0: log_msg(f'Epoch: {epoch_num} Step: {step_num} Avg Loss: {np.average(np.asarray(loss_value)): 0.4f}') batch_loss = [] if step_num % args.eval_step == 0: better, ppl = check_if_ppl_better(best_valid_ppl, lm, valid_dataset, root, ckpt_prefix, epoch_num, step_num) if better: best_valid_ppl = ppl opt.apply_gradients(clip_gradients(gradients, args.clip_ratio)) log_msg(f'Epoch: {epoch_num} END') better, ppl = check_if_ppl_better(best_valid_ppl, lm, valid_dataset, root, ckpt_prefix, epoch_num, step_num=-1) if better: best_valid_ppl = ppl
def train(loss_fn): """ Train a regression model evaluated using `loss-fn` """ print("Training: loss function: " + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) def loss_for_example(x, y): return loss_fn(y, prediction(x)) grad_fn = tfe.implicit_value_and_gradients(loss_for_example) for epoch in range(100): total_loss = 0.0 for x_i, y_i in tfe.Iterator(dataset): loss, gradients = grad_fn(x_i, y_i) optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss/n_samples))
def train(): optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) def loss_func(x, y): return utils.huber_loss(y, prediction(x), 10.0) grad_fn = tfe.implicit_value_and_gradients(loss_func) start = time.time() for epoch in range(50): total_loss = 0.0 for x, y in tfe.Iterator(dataset): # y_pred = x * w + b loss, gradients = grad_fn(x, y) optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) print('Took: %f seconds' % (time.time() - start))
def main(): dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32), (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1]))) optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE) grad_fn = tfe.implicit_value_and_gradients(model.compute_loss) total_loss = 0.0 # for average loss in the last SKIP_STEP steps num_train_steps = 0 while num_train_steps < NUM_TRAIN_STEPS: for center_words, target_words in tfe.Iterator(dataset): if num_train_steps >= NUM_TRAIN_STEPS: break loss_batch, grads = grad_fn(center_words, target_words) total_loss += loss_batch optimizer.apply_gradients(grads) if (num_train_steps + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format( num_train_steps, total_loss / SKIP_STEP)) total_loss = 0.0 num_train_steps += 1
def train(loss_fn): print('Training; loss function: ' + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) # Define the function through which to differentiate. def loss_for_example(x, y): return loss_fn(y, prediction(x)) grad_fn = tfe.implicit_value_and_gradients(loss_for_example) n_epoch = 100 for epoch in range(n_epoch): total_loss = 0.0 for x_i, y_i in tfe.Iterator(dataset): loss, gradients = grad_fn(x_i, y_i) # Take an optimization step and update variables. optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
def train(loss_fn): """Train a regression model evaluated using `loss_fn`.""" print('Training; loss function: ' + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) # Define the function through which to differentiate. ############################# ########## TO DO ############ ############################# def loss_for_example(x, y): # y_predicted = prediction(x) # return huber_loss(y, y_predicted) return loss_fn(y, prediction(x)) # Obtain a gradients function using `tfe.implicit_value_and_gradients`. ############################# ########## TO DO ############ ############################# grad_fn = tfe.implicit_value_and_gradients(loss_for_example) start = time.time() for epoch in range(100): total_loss = 0.0 for x_i, y_i in tfe.Iterator(dataset): # Compute the loss and gradient, and take an optimization step. ############################# ########## TO DO ############ ############################# loss, gradients = grad_fn(x_i, y_i) optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) print('Took: %f seconds' % (time.time() - start)) print( 'Eager execution exhibits significant overhead per operation. ' 'As you increase your batch size, the impact of the overhead will ' 'become less noticeable. Eager execution is under active development: ' 'expect performance to increase substantially in the near future!')
def main(): dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32), (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1]))) optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) # Create the model ############################# ########## TO DO ############ ############################# model = Word2Vec(VOCAB_SIZE, EMBED_SIZE, NUM_SAMPLED) # Create the gradients function, using `tfe.implicit_value_and_gradients` ############################# ########## TO DO ############ ############################# grad_fn = tfe.implicit_value_and_gradients(model.compute_loss) total_loss = 0.0 # for average loss in the last SKIP_STEP steps num_train_steps = 0 while num_train_steps < NUM_TRAIN_STEPS: for center_words, target_words in tfe.Iterator(dataset): if num_train_steps >= NUM_TRAIN_STEPS: break # Compute the loss and gradients, and take an optimization step. ############################# ########## TO DO ############ ############################# loss_batch, grads = grad_fn(center_words, target_words) total_loss += loss_batch optimizer.apply_gradients(grads) if (num_train_steps + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format( num_train_steps, total_loss / SKIP_STEP)) total_loss = 0.0 num_train_steps += 1
def fit(model, dataset, optimizer, verbose=False, logdir=None): """Fit the linear-regression model. Args: model: The Linear Model to fit. dataset: The tf.data.Dataset to use for training data optimizer: The Tensorflow Optimizer object to be used verbose: If true, will print out loss values at every iteration. logdir: The directory in which summaries will be written for Tensorboard.(Optional) """ #The loss function to optimize. def mean_square_loss(xs, ys): return tf.reduce_mean(tf.square(model(xs) - ys)) #Returns a function which differentiates f with respect to variables. loss_and_grads = tfe.implicit_value_and_gradients(mean_square_loss) tf.train.get_or_create_global_step() if logdir: summary_writer = tf.contrib.summay.create_file_writer(logdir) #Training loop. for i, (xs, ys) in enumerate(tfe.Iterator(dataset)): loss, grads = loss_and_grads(xs, ys) if verbose: print("Iteration {}: loss {}".format(i, loss.numpy())) optimizer.apply_gradients(grads, global_step=tf.train.get_global_step()) if logdir: with summary_writer.as_default(): with tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar("loss", loss)
def train(loss_fn): print('Training; loss function: ' + loss_fn.__name__) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) def loss_for_example(x, y): return loss_fn(y, prediction(x)) grad_fn = tfe.implicit_value_and_gradients(loss_for_example) start = time.time() for epoch in range(100): total_loss = 0.0 for x_i, y_i in tfe.Iterator(dataset): loss, gradients = grad_fn(x_i, y_i) optimizer.apply_gradients(gradients) total_loss += loss if epoch % 10 == 0: print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples)) print('Took: %f seconds' % (time.time() - start)) print( 'Eager execution exhibits significant overhead per operation. ' 'As you increase your batch size, the impact of the overhead will ' 'become less noticeable. Eager execution is under active development: ' 'expect performance to increase substantially in the near future!')
def fit_generator(self, feed_dict_generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False, submodel=None): """Train this model on data from a generator. Parameters ---------- feed_dict_generator: generator this should generate batches, each represented as a dict that maps Layers to values. max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). Returns ------- the average loss over the most recent checkpoint interval """ if not self.built: self.build() with self._get_tf("Graph").as_default(): time1 = time.time() loss = self.loss if submodel is not None and submodel.loss is not None: loss = submodel.loss if tfe.in_eager_mode(): # In eager mode we want an optimizer and a function to compute the # gradient of the loss. submodel_vars = None if submodel is None: optimizer = self._get_tf("Optimizer") else: optimizer = submodel.create_optimizer() if submodel.layers is not None: submodel_vars = set() for layer in submodel.layers: for var in layer.variables: submodel_vars.add(var) val_grad_fn = tfe.implicit_value_and_gradients( lambda x: self._run_graph([loss], x, True)[0]) else: # In graph mode we want a training operation. if submodel is None: train_op = self._get_tf('train_op') else: train_op = submodel.get_train_op() if checkpoint_interval > 0: saver = tf.train.Saver( self.get_variables(), max_to_keep=max_checkpoints_to_keep, save_relative_paths=True) if restore: self.restore() avg_loss, n_averaged_batches = 0.0, 0.0 n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, feed_dict_generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(feed_dict_generator, True): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 should_log = (self.tensorboard and n_samples % self.tensorboard_log_frequency == 0) if tfe.in_eager_mode(): value, grads_and_vars = val_grad_fn(feed_dict) if submodel_vars is not None: grads_and_vars = [ x for x in grads_and_vars if x[1] in submodel_vars ] optimizer.apply_gradients(grads_and_vars) avg_loss += value else: fetches = [train_op, loss.out_tensor] if should_log: fetches.append(self._get_tf("summary_op")) fetched_values = self.session.run(fetches, feed_dict=feed_dict) if should_log: self._log_tensorboard(fetched_values[2]) avg_loss += fetched_values[1] n_averaged_batches += 1 self.global_step += 1 if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1: saver.save(self.session, self.save_file, global_step=self.global_step) avg_loss = float(avg_loss) / n_averaged_batches logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) avg_loss, n_averaged_batches = 0.0, 0.0 if n_averaged_batches > 0: avg_loss = float(avg_loss) / n_averaged_batches if checkpoint_interval > 0: if n_averaged_batches > 0: logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) saver.save(self.session, self.save_file, global_step=self.global_step) time2 = time.time() logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss
def train_model(): true_w = 3 true_b = 2 NUM_EXAMPLES = 1000 inputs = tf.random_normal(shape=[NUM_EXAMPLES, 1]) noise = tf.random_normal(shape=[NUM_EXAMPLES, 1]) labels = inputs * true_w + true_b + noise # 数据可视化 plt.scatter(inputs.numpy(), labels.numpy()) plt.show() wb = tf.layers.Dense(units=1, use_bias=True) # # 创建梯度函数 value_and_gradients_fn = tfe.implicit_value_and_gradients(loss_fn) # # 计算梯度 # loss, grads_and_vars = value_and_gradients_fn(inputs, labels, wb) # print('Loss: {}'.format(loss)) # for (grad, var) in grads_and_vars: # print("") # print('Gradient: {}\nVariable: {}'.format(grad, var)) # 创建优化器 optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) # print("w, b, 将求出的梯度应用到变量之前:") # w, b = wb.variables # print(w.read_value().numpy(), b.read_value().numpy()) # print() # # 计算梯度 # empirical_loss, gradients_and_variables = value_and_gradients_fn(inputs, labels, wb) # print('Loss: {}'.format(empirical_loss)) # for (grad, var) in gradients_and_variables: # print("") # print('Gradient: {}\nVariable: {}'.format(grad, var)) # optimizer.apply_gradients(gradients_and_variables) # print("w, b, 将求出的梯度应用到变量之后:") # w, b = wb.variables # print(w.read_value().numpy(), b.read_value().numpy()) # print() loss_at_step = [] w_at_step = [] b_at_step = [] print("\n训练") for step_num in range(num_training_steps): loss, gradients_and_variables = value_and_gradients_fn( inputs, labels, wb) print('Loss: {}'.format(loss)) loss_at_step.append(np.asscalar(loss.numpy())) w, b = wb.variables print("之前:", w.read_value().numpy(), b.read_value().numpy()) print() optimizer.apply_gradients(gradients_and_variables) w, b = wb.variables print("之后:", w.read_value().numpy(), b.read_value().numpy()) print() w_at_step.append(np.asscalar(w.read_value().numpy())) b_at_step.append(np.asscalar(b.read_value().numpy())) print(w_at_step) t = range(0, num_training_steps) plt.plot(t, loss_at_step, 'k', t, w_at_step, 'r', t, [true_w] * num_training_steps, 'r--', t, b_at_step, 'b', t, [true_b] * num_training_steps, 'b--') plt.legend(['loss', 'w estimate', 'w true', 'b estimate', 'b true']) plt.show()
def train(device): # hyper parameters z_dim = 100 epochs = 30 batch_size = 128 learning_rate = 0.0002 beta1 = 0.5 is_training = True # for validation purpose assets_dir = './assets' if not os.path.isdir(assets_dir): os.makedirs(assets_dir) val_block_size = 10 val_size = val_block_size * val_block_size # load mnist data mnist = input_data.read_data_sets('mnist-data', one_hot=True) inputs_shape = [-1, 28, 28, 1] # wrap with available device with tf.device(device): # create generator & discriminator generator = Generator() discriminator = Discriminator() # prepare optimizer d_val_grad = tfe.implicit_value_and_gradients(d_loss_fn) g_val_grad = tfe.implicit_value_and_gradients(g_loss_fn) d_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1) g_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1) # for loss savings d_loss_at_steps = [] g_loss_at_steps = [] for e in range(epochs): t = trange(mnist.train.num_examples // batch_size) # t = trange(1) for ii in t: t.set_description('{:04d}/{:04d}: '.format(e + 1, epochs)) # no need labels batch_x, _ = mnist.train.next_batch(batch_size) # rescale images to -1 ~ 1 batch_x = tf.reshape(batch_x, shape=inputs_shape) batch_x = batch_x * 2.0 - 1.0 # Sample random noise for G batch_z = tf.random_uniform(shape=[batch_size, z_dim], minval=-1., maxval=1.) # get loss related values & (gradients & vars) d_loss_val, d_grad_vars = d_val_grad(generator, discriminator, batch_z, batch_x, is_training) g_loss_val, g_grad_vars = g_val_grad(generator, discriminator, batch_z, is_training) # get appropriate gradients & variable pairs d_vars = [(grad, var) for (grad, var) in d_grad_vars if var.name.startswith('discriminator')] g_vars = [(grad, var) for (grad, var) in g_grad_vars if var.name.startswith('generator')] # save loss d_loss_at_steps.append(np.asscalar(d_loss_val.numpy())) g_loss_at_steps.append(np.asscalar(g_loss_val.numpy())) # apply gradient via pre-defined optimizer d_optimizer.apply_gradients(d_vars) g_optimizer.apply_gradients(g_vars) # display current losses if ii % 5 == 0: t.set_postfix(d_loss=d_loss_val.numpy(), g_loss=g_loss_val.numpy()) # validation results at every epoch val_z = np.random.uniform(-1, 1, size=(val_size, z_dim)) fake_image = generator.forward(val_z, is_trainig=False) image_fn = os.path.join(assets_dir, 'gan-val-e{:03d}.png'.format(e + 1)) save_result(fake_image.numpy(), val_block_size, image_fn, color_mode='L') return
o7 = self.fc7(o6) if(mode == 'train'): o7 = self.drop7(o7) logits = self.fc8(o7) return logits # Return def loss_alex(alexCNN, datum, mode): # Assuming datum[0] is data. datum[1] is labels logits = alexCNN(datum[0], mode) # print(tf.shape(datum[1])) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=datum[1]) # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=alexCNN(datum[0], mode), labels=datum[1]) return tf.reduce_sum(loss)/ tf.cast(tf.size(datum[1]), dtype=tf.float32) alex_loss_grads = tfe.implicit_value_and_gradients(loss_alex) #------------------------------Base Alexnet------------------------------# #------------------------------Attention Alexnet------------------------------# class AttnAlexnet(tf.keras.Model): def __init__(self, num_classes, keep_prob, cost='dp', combine='concat', sample='down'): super(AttnAlexnet, self).__init__() # Possibly experiment - different initializations # TODO - regularization? see paper self.num_classes = num_classes self.keep_prob = keep_prob self.cost = cost self.combine = combine
def train_and_eval(self, num_epochs, num_batchs_per_epoch, lrn_rate, train_data, test_data): best_acc, best_epoch = 0., 0 start_time = time.time() orig_begin_time = start_time val_and_grad_fn = tfe.implicit_value_and_gradients(self.loss) # grad_fn = tfe.implicit_gradients(self.loss) optimizer = tf.train.AdamOptimizer(lrn_rate) epoch = 0 moving_loss, moving_acc = 0, 0 max_norm = 0 device = "/gpu:0" if tfe.num_gpus() > 1 else "/cpu:0" with tf.device(device): for batch, batch_data in enumerate(tfe.Iterator(train_data)): loss, grad_and_var = val_and_grad_fn(batch_data) # grad_list = [grad for grad, _ in grad_and_var] # max_norm = max(max_norm, tf.global_norm(grad_list)) # max_norm < 2 acc = self.tensors['acc'] optimizer.apply_gradients(grad_and_var) # print(batch, loss.numpy(), acc.numpy()) moving_loss += loss moving_acc += acc if (batch + 1) % num_batchs_per_epoch == 0: moving_loss /= num_batchs_per_epoch moving_acc /= num_batchs_per_epoch # epoch duration now = time.time() duration = now - start_time start_time = now valid_acc = self.evaluate(28, test_data) if best_acc < valid_acc: best_acc = valid_acc best_epoch = epoch # var_list = [var for _, var in grad_and_var] # norm_list = [tf.norm(var) for var in var_list] # for var, norm in zip(var_list, norm_list): # print('%s\t%.2f' % (var.name, norm.numpy())) print( "Epoch %d loss %.2f acc %.2f %.4f time %.2f" % (epoch, moving_loss, moving_acc, valid_acc, duration)) sys.stdout.flush() epoch += 1 moving_loss = 0 moving_acc = 0 # max_norm = 0 if epoch == num_epochs: break duration = time.time() - orig_begin_time duration /= 3600 print('Done training, best_epoch: %d, best_acc: %.4f' % (best_epoch, best_acc)) print('duration: %.2f hours' % duration) sys.stdout.flush()
dtype=tf.float32) def loss_parse(encoder, decoder_parse, data, mode): encoder_state = encoder(data[0][0], data[0][1]) # def call(self, encoder_state, mode, datum=None): logits = decoder_parse(encoder_state, mode, data[1], 500) mask = tf.sequence_mask(data[2][1], dtype=tf.float32) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=data[2][0]) * mask encoder_state, logits, mask = None, None, None return tf.reduce_sum(loss) / tf.cast(tf.reduce_sum(data[2][1]), dtype=tf.float32) nli_loss_grads = tfe.implicit_value_and_gradients(loss_nli) nmt_loss_grads = tfe.implicit_value_and_gradients(loss_nmt) parse_loss_grads = tfe.implicit_value_and_gradients(loss_parse) # :: Perplexity :: def compute_ppl(encoder_model, decoder_model, dataset, task): total_loss = 0. total_words = 0 loss_fun = [] if (task == 'nli'): loss_fun = loss_nli elif (task == 'nmt'): loss_fun = loss_nmt elif (task == 'parse'):
def fit_generator(self, feed_dict_generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False, submodel=None): """Train this model on data from a generator. Parameters ---------- feed_dict_generator: generator this should generate batches, each represented as a dict that maps Layers to values. max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. submodel: Submodel an alternate training objective to use. This should have been created by calling create_submodel(). Returns ------- the average loss over the most recent checkpoint interval """ if not self.built: self.build() with self._get_tf("Graph").as_default(): time1 = time.time() loss = self.loss if submodel is not None and submodel.loss is not None: loss = submodel.loss if tfe.in_eager_mode(): # In eager mode we want an optimizer and a function to compute the # gradient of the loss. submodel_vars = None if submodel is None: optimizer = self._get_tf("Optimizer") else: optimizer = submodel.create_optimizer() if submodel.layers is not None: submodel_vars = set() for layer in submodel.layers: for var in layer.variables: submodel_vars.add(var) val_grad_fn = tfe.implicit_value_and_gradients( lambda x: self._run_graph([loss], x, True)[0]) else: # In graph mode we want a training operation. if submodel is None: train_op = self._get_tf('train_op') else: train_op = submodel.get_train_op() if checkpoint_interval > 0: saver = tf.train.Saver( self.get_variables(), max_to_keep=max_checkpoints_to_keep, save_relative_paths=True) if restore: self.restore() avg_loss, n_averaged_batches = 0.0, 0.0 n_samples = 0 n_enqueued = [0] final_sample = [None] if self.queue_installed: enqueue_thread = threading.Thread( target=_enqueue_batch, args=(self, feed_dict_generator, self._get_tf("Graph"), self.session, n_enqueued, final_sample)) enqueue_thread.start() for feed_dict in self._create_feed_dicts(feed_dict_generator, True): if self.queue_installed: # Don't let this thread get ahead of the enqueue thread, since if # we try to read more batches than the total number that get queued, # this thread will hang indefinitely. while n_enqueued[0] <= n_samples: if n_samples == final_sample[0]: break time.sleep(0) if n_samples == final_sample[0]: break n_samples += 1 should_log = (self.tensorboard and n_samples % self.tensorboard_log_frequency == 0) if tfe.in_eager_mode(): value, grads_and_vars = val_grad_fn(feed_dict) if submodel_vars is not None: grads_and_vars = [ x for x in grads_and_vars if x[1] in submodel_vars ] optimizer.apply_gradients(grads_and_vars) avg_loss += value else: fetches = [train_op, loss.out_tensor] if should_log: fetches.append(self._get_tf("summary_op")) fetched_values = self.session.run(fetches, feed_dict=feed_dict) if should_log: self._log_tensorboard(fetched_values[2]) avg_loss += fetched_values[1] n_averaged_batches += 1 self.global_step += 1 if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1: saver.save(self.session, self.save_file, global_step=self.global_step) avg_loss = float(avg_loss) / n_averaged_batches logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) avg_loss, n_averaged_batches = 0.0, 0.0 if n_averaged_batches > 0: avg_loss = float(avg_loss) / n_averaged_batches if checkpoint_interval > 0: if n_averaged_batches > 0: logger.info('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss)) saver.save(self.session, self.save_file, global_step=self.global_step) time2 = time.time() logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss