Beispiel #1
0
def main():
    args = setup_args()
    logging.info(args)

    dataset, vocab_table = build_dataset(data_file=args.data,
                                         vocab_file=args.vocab,
                                         batch_size=args.batch_size,
                                         t=args.t,
                                         prefetch_size=args.prefetch)
    V = vocab_table.size()

    model = Word2Vec(V, args.d)
    grad_fun = tfe.implicit_value_and_gradients(model.compute_loss)
    opt = tf.train.GradientDescentOptimizer(learning_rate=args.lr)

    train_step = 0
    total_loss = 0.0
    for src_words, tgt_words in dataset:
        batch_loss, gradients = grad_fun(src_words, tgt_words)
        opt.apply_gradients(gradients)
        total_loss += batch_loss
        train_step += 1
        if train_step % args.log_freq == 0:
            log_msg(f'Step: {train_step} Loss: {total_loss/args.log_freq}')
            total_loss = 0.
    log_msg(f'Num steps: {train_step} Done!')
def fit(model, dataset, optimizer, verbose=False, logdir=None):
  """Fit the linear-regression model.

  Args:
    model: The LinearModel to fit.
    dataset: The tf.data.Dataset to use for training data.
    optimizer: The TensorFlow Optimizer object to be used.
    verbose: If true, will print out loss values at every iteration.
    logdir: The directory in which summaries will be written for TensorBoard
      (optional).
  """

  # The loss function to optimize.
  mse = lambda xs, ys: mean_square_loss(model, xs, ys)
  loss_and_grads = tfe.implicit_value_and_gradients(mse)

  tf.train.get_or_create_global_step()
  if logdir:
    # Support for TensorBoard summaries. Once training has started, use:
    #   tensorboard --logdir=<logdir>
    summary_writer = tf.contrib.summary.create_file_writer(logdir)

  # Training loop.
  for i, (xs, ys) in enumerate(tfe.Iterator(dataset)):
    loss, grads = loss_and_grads(xs, ys)
    if verbose:
      print("Iteration %d: loss = %s" % (i, loss.numpy()))

    optimizer.apply_gradients(grads, global_step=tf.train.get_global_step())

    if logdir:
      with summary_writer.as_default():
        with tf.contrib.summary.always_record_summaries():
          tf.contrib.summary.scalar("loss", loss)
def train(loss_fn):
  """Train a regression model evaluated using `loss_fn`."""
  print('Training; loss function: ' + loss_fn.__name__)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

  # Define the function through which to differentiate.
  def loss_for_example(x, y):
    return loss_fn(y, prediction(x))

  # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example`
  # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in
  # calculating it.
  grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

  start = time.time()
  for epoch in range(100):
    total_loss = 0.0
    for x_i, y_i in tfe.Iterator(dataset):
      loss, gradients = grad_fn(x_i, y_i)
      # Take an optimization step and update variables.
      optimizer.apply_gradients(gradients)
      total_loss += loss
    if epoch % 10 == 0:
      print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
  print('Took: %f seconds' % (time.time() - start))
  print('Eager execution exhibits significant overhead per operation. '
        'As you increase your batch size, the impact of the overhead will '
        'become less noticeable. Eager execution is under active development: '
        'expect performance to increase substantially in the near future!')
Beispiel #4
0
def train(loss_fn):
    print('訓練: loss function名稱: ' + loss_fn.__name__)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

    # 計算每次的預測結果誤差
    def loss_for_example(x, y):
        return loss_fn(y, prediction(x))

    # 計算loss function的微分
    # `grad_fn(x_i, y_i)` returns
    # (1) the value of `loss_for_example` evaluated at `x_i`, `y_i`
    # (2) the gradients of any variables used in calculating it.
    # Returns a function which differentiates f with respect to variables
    grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

    start = time.time()
    for epoch in range(100):
        total_loss = 0.0
        # 利用tfe.Iterator
        for x_i, y_i in tfe.Iterator(dataset):
            loss, gradients = grad_fn(x_i, y_i)
            # Take an optimization step and update variables.
            optimizer.apply_gradients(gradients)
            total_loss += loss
        # 每10次列印目前的平均loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
    print('總花費時間: %f 秒' % (time.time() - start))
    print(
        'Eager execution exhibits significant overhead per operation. '
        'As you increase your batch size, the impact of the overhead will '
        'become less noticeable. Eager execution is under active development: '
        'expect performance to increase substantially in the near future!')
Beispiel #5
0
def fit(model, dataset, optimizer, verbose=False, logdir=None):
    """Fit the linear-regression model.

  Args:
    model: The LinearModel to fit.
    dataset: The tf.data.Dataset to use for training data.
    optimizer: The TensorFlow Optimizer object to be used.
    verbose: If true, will print out loss values at every iteration.
    logdir: The directory in which summaries will be written for TensorBoard
      (optional).
  """

    # The loss function to optimize.
    mse = lambda xs, ys: mean_square_loss(model, xs, ys)
    loss_and_grads = tfe.implicit_value_and_gradients(mse)

    if logdir:
        # Support for TensorBoard summaries. Once training has started, use:
        #   tensorboard --logdir=<logdir>
        summary_writer = tf.contrib.summary.create_file_writer(logdir)

    # Training loop.
    for i, (xs, ys) in enumerate(tfe.Iterator(dataset)):
        loss, grads = loss_and_grads(xs, ys)
        if verbose:
            print("Iteration %d: loss = %s" % (i, loss.numpy()))

        optimizer.apply_gradients(grads)

        if logdir:
            with summary_writer.as_default():
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar("loss", loss, step=i)
                    tf.contrib.summary.scalar("step", i, step=i)
Beispiel #6
0
 def gradients(self, params, forward, judges, aux_judges, xx, yy_true):
     ivag = tfe.implicit_value_and_gradients(self._ivag_inner)
     bridge = []
     scores, grads_and_params = \
         ivag(forward, judges, aux_judges, xx, yy_true, bridge)
     aux_scores = bridge.pop()
     return grads_and_params, scores, aux_scores
Beispiel #7
0
def fit(model, dataset, optimizer, verbose=False, logdir=None):
    """Fit the linear-regression model

    :param model:
    :param dataset: The tf.data.Dataset to use for training data.
    :param optimizer:
    :param verbose: If true, will print out loss values at every iteration
    :param logdir: The directory in which summaries will be written for TensorBoard
      (optional)
    :return:
    """

    # the loss function to optimize.
    mse = lambda xs, ys: mean_square_loss(model, xs, ys)
    loss_and_grads = tfe.implicit_value_and_gradients(mse)

    if logdir:
        summary_writer = tf.contrib.summary.create_file_writer(logdir)

    for i,(xs, ys) in enumerate(tfe.Iterator(dataset)):
        loss, grads = loss_and_grads(xs, ys)
        if verbose:
            print("Iteration {}: loss = {}".format(i, loss.numpy()))

        optimizer.apply_gradients(grads)

        if logdir:
            with summary_writer.as_default():
                tf.contrib.summary.scalar("loss", loss, step=i)
                tf.contrib.summary.scalar("step", i, step=i)
def train(loss_fn):
    """Train a regression model evaluated using `loss_fn`."""

    print('Training; loss function: ' + loss_fn.__name__)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

    # Define the function through which to differentiate.
    def loss_for_example(x, y):
        return loss_fn(y, prediction(x))

    # `grad_fn(x_i, y_i)` returns (1) the value of `loss_for_example`
    # evaluated at `x_i`, `y_i` and (2) the gradients of any variables used in
    # calculating it.
    grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

    start = time.time()
    for epoch in range(100):
        total_loss = 0.0
        for x_i, y_i in tfe.Iterator(dataset):
            loss, gradients = grad_fn(x_i, y_i)
            # Take an optimization step and update variables.
            optimizer.apply_gradients(gradients)
            total_loss += loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
    print('Took: %f seconds' % (time.time() - start))
    print(
        'Eager execution exhibits significant overhead per operation. '
        'As you increase your batch size, the impact of the overhead will '
        'become less noticeable. Eager execution is under active development: '
        'expect performance to increase substantially in the near future!')
Beispiel #9
0
def fit(model, dataset, optimizer, verbose=False, logdir=None):
    """Fit the linear-regression model.

  Args:
    model: The LinearModel to fit.
    dataset: The tf.data.Dataset to use for training data.
    optimizer: The TensorFlow Optimizer object to be used.
    verbose: If true, will print out loss values at every iteration.
    logdir: The directory in which summaries will be written for TensorBoard
      (optional).
  """

    # The loss function to optimize.
    def mean_square_loss(xs, ys):
        return tf.reduce_mean(tf.square(model(xs) - ys))

    loss_and_grads = tfe.implicit_value_and_gradients(mean_square_loss)

    if logdir:
        # Support for TensorBoard summaries. Once training has started, use:
        #   tensorboard --logdir=<logdir>
        summary_writer = tfe.SummaryWriter(logdir)

    # Training loop.
    for i, (xs, ys) in enumerate(tfe.Iterator(dataset)):
        loss, grads = loss_and_grads(xs, ys)
        if verbose:
            print("Iteration %d: loss = %s" % (i, loss.numpy()))

        optimizer.apply_gradients(grads)

        if logdir:
            summary_writer.scalar("loss", loss)
            summary_writer.step()
Beispiel #10
0
def main():
    #dataset = tf.data.Dataset.from_generator(gen, (tf.in32, tf.int32),
    #                                        (tf.TensorShape([BATCH_SIZE]),
    #                                        tf.TensorShape([BATCH_SIZE,1])))
    dataset = tf.data.Dataset.from_generator(
        gen, (tf.int32, tf.int32),
        (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1])))
    model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)

    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)

    grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)

    total_loss = 0.0
    num_train_steps = 0
    while num_train_steps < NUM_TRAIN_STEPS:
        for center_words, target_words in tfe.Iterator(dataset):
            if num_train_steps >= NUM_TRAIN_STEPS:
                break
            loss_batch, grads = grad_fn(center_words, target_words)
            total_loss += loss_batch
            optimizer.apply_gradients(grads)
            if (num_train_steps + 1) % SKIP_STEP == 0:
                print('A loss at step {}:{:5.1f}'.format(
                    num_train_steps, total_loss / SKIP_STEP))
            num_train_steps += 1
Beispiel #11
0
def main():
    dataset = tf.data.Dataset.from_generator(
        gen, (tf.int32, tf.int32),
        (tf.TensorShape([BATCH_SIZE]), tf.TensorShape([BATCH_SIZE, 1])))
    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
    # Create the model
    model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
    # Create the gradients function, using `tfe.implicit_value_and_gradients`
    grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)

    total_loss = 0.0  # for average loss in the last SKIP_STEP steps
    num_train_steps = 0
    while num_train_steps < NUM_TRAIN_STEPS:
        for center_words, target_words in tfe.Iterator(dataset):
            if num_train_steps >= NUM_TRAIN_STEPS:
                break

            # Compute the loss and gradients, and take an optimization step.
            loss_batch, grads = grad_fn(center_words, target_words)
            optimizer.apply_gradients(grads)
            total_loss += loss_batch
            if (num_train_steps + 1) % SKIP_STEP == 0:
                print('Average loss at step {}: {:5.1f}'.format(
                    num_train_steps, total_loss / SKIP_STEP))
                total_loss = 0.0
            num_train_steps += 1
    def train(self, data, target):
        optimizer = tf.train.GradientDescentOptimizer(self.rate)

        def loss_fn(x, y):
            return self.loss(y, self.prediction(x))

        grad_fn = tfe.implicit_value_and_gradients(loss_fn)
        loss, gradients = grad_fn(data, target)
        optimizer.apply_gradients(gradients)

        return loss
Beispiel #13
0
def main():
    args = setup_args()
    log_msg(args)

    vocab_table = lookup_ops.index_table_from_file(args.vocab, default_value=args.unk_index)
    train_dataset = create_dataset(args.train, vocab_table, args.bs, args.eos, args.t)
    valid_dataset = create_dataset(args.valid, vocab_table, args.bs, args.eos, args.t)

    loss_and_grads_fun = tfe.implicit_value_and_gradients(train_loss)
    lm = LanguageModel(int(vocab_table.size()), d=args.nd, h=args.nh, cell=args.cell)

    log_msg('Model built!')
    best_valid_ppl = compute_ppl(lm, valid_dataset)
    log_msg(f'Start ppl: {best_valid_ppl: 0.4f}')

    if args.opt == 'adam':
        opt = tf.train.AdamOptimizer(args.lr)
    else:
        opt = tf.train.GradientDescentOptimizer(args.lr)

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    ckpt_prefix = os.path.join(args.save_dir, args.ckpt_prefix)
    root = tfe.Checkpoint(optimizer=opt, model=lm, optimizer_step=tf.train.get_or_create_global_step())
    for epoch_num in range(args.num_epochs):
        log_msg(f'Epoch: {epoch_num} START')
        batch_loss = []
        for step_num, train_datum in enumerate(train_dataset, start=1):
            loss_value, gradients = loss_and_grads_fun(lm, train_datum)
            batch_loss.append(loss_value)

            if step_num % args.stats_step == 0:
                log_msg(f'Epoch: {epoch_num} Step: {step_num} Avg Loss: {np.average(np.asarray(loss_value)): 0.4f}')
                batch_loss = []

            if step_num % args.eval_step == 0:
                better, ppl = check_if_ppl_better(best_valid_ppl, lm, valid_dataset, root, ckpt_prefix, epoch_num, step_num)
                if better:
                    best_valid_ppl = ppl

            opt.apply_gradients(clip_gradients(gradients, args.clip_ratio))
        log_msg(f'Epoch: {epoch_num} END')
        better, ppl = check_if_ppl_better(best_valid_ppl, lm, valid_dataset, root, ckpt_prefix, epoch_num, step_num=-1)
        if better:
            best_valid_ppl = ppl
Beispiel #14
0
def train(loss_fn):
    """ Train a regression model evaluated using `loss-fn` """
    print("Training: loss function: " + loss_fn.__name__)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
    
    def loss_for_example(x, y):
        return loss_fn(y, prediction(x))

    grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

    for epoch in range(100):
        total_loss = 0.0
        for x_i, y_i in tfe.Iterator(dataset):
            loss, gradients = grad_fn(x_i, y_i)
            optimizer.apply_gradients(gradients)
            total_loss += loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss/n_samples))
def train():
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

    def loss_func(x, y):
        return utils.huber_loss(y, prediction(x), 10.0)

    grad_fn = tfe.implicit_value_and_gradients(loss_func)

    start = time.time()
    for epoch in range(50):
        total_loss = 0.0
        for x, y in tfe.Iterator(dataset):
            # y_pred = x * w + b
            loss, gradients = grad_fn(x, y)
            optimizer.apply_gradients(gradients)
            total_loss += loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
    print('Took: %f seconds' % (time.time() - start))
Beispiel #16
0
def main():
  dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
                              (tf.TensorShape([BATCH_SIZE]),
                              tf.TensorShape([BATCH_SIZE, 1])))
  optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
  model = Word2Vec(vocab_size=VOCAB_SIZE, embed_size=EMBED_SIZE)
  grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)
  total_loss = 0.0  # for average loss in the last SKIP_STEP steps
  num_train_steps = 0
  while num_train_steps < NUM_TRAIN_STEPS:
    for center_words, target_words in tfe.Iterator(dataset):
      if num_train_steps >= NUM_TRAIN_STEPS:
        break
      loss_batch, grads = grad_fn(center_words, target_words)
      total_loss += loss_batch
      optimizer.apply_gradients(grads)
      if (num_train_steps + 1) % SKIP_STEP == 0:
        print('Average loss at step {}: {:5.1f}'.format(
                num_train_steps, total_loss / SKIP_STEP))
        total_loss = 0.0
      num_train_steps += 1
def train(loss_fn):
    print('Training; loss function: ' + loss_fn.__name__)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

    # Define the function through which to differentiate.
    def loss_for_example(x, y):
        return loss_fn(y, prediction(x))

    grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

    n_epoch = 100

    for epoch in range(n_epoch):
        total_loss = 0.0
        for x_i, y_i in tfe.Iterator(dataset):
            loss, gradients = grad_fn(x_i, y_i)
            # Take an optimization step and update variables.
            optimizer.apply_gradients(gradients)
            total_loss += loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
def train(loss_fn):
    """Train a regression model evaluated using `loss_fn`."""
    print('Training; loss function: ' + loss_fn.__name__)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

    # Define the function through which to differentiate.
    #############################
    ########## TO DO ############
    #############################
    def loss_for_example(x, y):
        # y_predicted = prediction(x)
        # return huber_loss(y, y_predicted)
        return loss_fn(y, prediction(x))

    # Obtain a gradients function using `tfe.implicit_value_and_gradients`.
    #############################
    ########## TO DO ############
    #############################
    grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

    start = time.time()
    for epoch in range(100):
        total_loss = 0.0
        for x_i, y_i in tfe.Iterator(dataset):
            # Compute the loss and gradient, and take an optimization step.
            #############################
            ########## TO DO ############
            #############################
            loss, gradients = grad_fn(x_i, y_i)
            optimizer.apply_gradients(gradients)
            total_loss += loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
    print('Took: %f seconds' % (time.time() - start))
    print(
        'Eager execution exhibits significant overhead per operation. '
        'As you increase your batch size, the impact of the overhead will '
        'become less noticeable. Eager execution is under active development: '
        'expect performance to increase substantially in the near future!')
def main():
    dataset = tf.data.Dataset.from_generator(gen, (tf.int32, tf.int32),
                                             (tf.TensorShape([BATCH_SIZE]),
                                              tf.TensorShape([BATCH_SIZE, 1])))

    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
    # Create the model
    #############################
    ########## TO DO ############
    #############################
    model = Word2Vec(VOCAB_SIZE, EMBED_SIZE, NUM_SAMPLED)

    # Create the gradients function, using `tfe.implicit_value_and_gradients`
    #############################
    ########## TO DO ############
    #############################
    grad_fn = tfe.implicit_value_and_gradients(model.compute_loss)

    total_loss = 0.0  # for average loss in the last SKIP_STEP steps
    num_train_steps = 0
    while num_train_steps < NUM_TRAIN_STEPS:
        for center_words, target_words in tfe.Iterator(dataset):
            if num_train_steps >= NUM_TRAIN_STEPS:
                break

            # Compute the loss and gradients, and take an optimization step.
            #############################
            ########## TO DO ############
            #############################
            loss_batch, grads = grad_fn(center_words, target_words)
            total_loss += loss_batch
            optimizer.apply_gradients(grads)
            if (num_train_steps + 1) % SKIP_STEP == 0:
                print('Average loss at step {}: {:5.1f}'.format(
                    num_train_steps, total_loss / SKIP_STEP))
                total_loss = 0.0
            num_train_steps += 1
Beispiel #20
0
def fit(model, dataset, optimizer, verbose=False, logdir=None):
    """Fit the linear-regression model.
    
    Args:
        model: The Linear Model to fit.
        dataset: The tf.data.Dataset to use for training data
        optimizer: The Tensorflow Optimizer object to be used
        verbose: If true, will print out loss values at every iteration.
        logdir: The directory in which summaries will be written for Tensorboard.(Optional)
    """

    #The loss function to optimize.
    def mean_square_loss(xs, ys):
        return tf.reduce_mean(tf.square(model(xs) - ys))

    #Returns a function which differentiates f with respect to variables.
    loss_and_grads = tfe.implicit_value_and_gradients(mean_square_loss)

    tf.train.get_or_create_global_step()

    if logdir:
        summary_writer = tf.contrib.summay.create_file_writer(logdir)

    #Training loop.
    for i, (xs, ys) in enumerate(tfe.Iterator(dataset)):
        loss, grads = loss_and_grads(xs, ys)

        if verbose:
            print("Iteration {}: loss {}".format(i, loss.numpy()))

        optimizer.apply_gradients(grads,
                                  global_step=tf.train.get_global_step())

        if logdir:
            with summary_writer.as_default():
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar("loss", loss)
Beispiel #21
0
def train(loss_fn):
    print('Training; loss function: ' + loss_fn.__name__)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

    def loss_for_example(x, y):
        return loss_fn(y, prediction(x))

    grad_fn = tfe.implicit_value_and_gradients(loss_for_example)

    start = time.time()
    for epoch in range(100):
        total_loss = 0.0
        for x_i, y_i in tfe.Iterator(dataset):
            loss, gradients = grad_fn(x_i, y_i)
            optimizer.apply_gradients(gradients)
            total_loss += loss
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, total_loss / n_samples))
    print('Took: %f seconds' % (time.time() - start))
    print(
        'Eager execution exhibits significant overhead per operation. '
        'As you increase your batch size, the impact of the overhead will '
        'become less noticeable. Eager execution is under active development: '
        'expect performance to increase substantially in the near future!')
Beispiel #22
0
  def fit_generator(self,
                    feed_dict_generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000,
                    restore=False,
                    submodel=None):
    """Train this model on data from a generator.

    Parameters
    ----------
    feed_dict_generator: generator
      this should generate batches, each represented as a dict that maps
      Layers to values.
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      time1 = time.time()
      loss = self.loss
      if submodel is not None and submodel.loss is not None:
        loss = submodel.loss
      if tfe.in_eager_mode():
        # In eager mode we want an optimizer and a function to compute the
        # gradient of the loss.

        submodel_vars = None
        if submodel is None:
          optimizer = self._get_tf("Optimizer")
        else:
          optimizer = submodel.create_optimizer()
          if submodel.layers is not None:
            submodel_vars = set()
            for layer in submodel.layers:
              for var in layer.variables:
                submodel_vars.add(var)
        val_grad_fn = tfe.implicit_value_and_gradients(
            lambda x: self._run_graph([loss], x, True)[0])
      else:
        # In graph mode we want a training operation.

        if submodel is None:
          train_op = self._get_tf('train_op')
        else:
          train_op = submodel.get_train_op()
      if checkpoint_interval > 0:
        saver = tf.train.Saver(
            self.get_variables(),
            max_to_keep=max_checkpoints_to_keep,
            save_relative_paths=True)
      if restore:
        self.restore()
      avg_loss, n_averaged_batches = 0.0, 0.0
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
      if self.queue_installed:
        enqueue_thread = threading.Thread(
            target=_enqueue_batch,
            args=(self, feed_dict_generator, self._get_tf("Graph"),
                  self.session, n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(feed_dict_generator, True):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
          # this thread will hang indefinitely.
          while n_enqueued[0] <= n_samples:
            if n_samples == final_sample[0]:
              break
            time.sleep(0)
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        should_log = (self.tensorboard and
                      n_samples % self.tensorboard_log_frequency == 0)
        if tfe.in_eager_mode():
          value, grads_and_vars = val_grad_fn(feed_dict)
          if submodel_vars is not None:
            grads_and_vars = [
                x for x in grads_and_vars if x[1] in submodel_vars
            ]
          optimizer.apply_gradients(grads_and_vars)
          avg_loss += value
        else:
          fetches = [train_op, loss.out_tensor]
          if should_log:
            fetches.append(self._get_tf("summary_op"))
          fetched_values = self.session.run(fetches, feed_dict=feed_dict)
          if should_log:
            self._log_tensorboard(fetched_values[2])
          avg_loss += fetched_values[1]
        n_averaged_batches += 1
        self.global_step += 1
        if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1:
          saver.save(self.session, self.save_file, global_step=self.global_step)
          avg_loss = float(avg_loss) / n_averaged_batches
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
          avg_loss, n_averaged_batches = 0.0, 0.0
      if n_averaged_batches > 0:
        avg_loss = float(avg_loss) / n_averaged_batches
      if checkpoint_interval > 0:
        if n_averaged_batches > 0:
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
        saver.save(self.session, self.save_file, global_step=self.global_step)
        time2 = time.time()
        logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
    return avg_loss
Beispiel #23
0
def train_model():
    true_w = 3
    true_b = 2
    NUM_EXAMPLES = 1000
    inputs = tf.random_normal(shape=[NUM_EXAMPLES, 1])
    noise = tf.random_normal(shape=[NUM_EXAMPLES, 1])
    labels = inputs * true_w + true_b + noise
    # 数据可视化
    plt.scatter(inputs.numpy(), labels.numpy())
    plt.show()

    wb = tf.layers.Dense(units=1, use_bias=True)
    # # 创建梯度函数
    value_and_gradients_fn = tfe.implicit_value_and_gradients(loss_fn)

    # # 计算梯度
    # loss, grads_and_vars = value_and_gradients_fn(inputs, labels, wb)
    # print('Loss: {}'.format(loss))
    # for (grad, var) in grads_and_vars:
    #     print("")
    #     print('Gradient: {}\nVariable: {}'.format(grad, var))

    # 创建优化器
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)

    # print("w, b, 将求出的梯度应用到变量之前:")
    # w, b = wb.variables
    # print(w.read_value().numpy(), b.read_value().numpy())
    # print()
    # # 计算梯度
    # empirical_loss, gradients_and_variables = value_and_gradients_fn(inputs, labels, wb)
    # print('Loss: {}'.format(empirical_loss))
    # for (grad, var) in gradients_and_variables:
    #     print("")
    #     print('Gradient: {}\nVariable: {}'.format(grad, var))
    # optimizer.apply_gradients(gradients_and_variables)

    # print("w, b, 将求出的梯度应用到变量之后:")
    # w, b = wb.variables
    # print(w.read_value().numpy(), b.read_value().numpy())
    # print()

    loss_at_step = []
    w_at_step = []
    b_at_step = []

    print("\n训练")
    for step_num in range(num_training_steps):
        loss, gradients_and_variables = value_and_gradients_fn(
            inputs, labels, wb)
        print('Loss: {}'.format(loss))
        loss_at_step.append(np.asscalar(loss.numpy()))
        w, b = wb.variables
        print("之前:", w.read_value().numpy(), b.read_value().numpy())
        print()
        optimizer.apply_gradients(gradients_and_variables)
        w, b = wb.variables
        print("之后:", w.read_value().numpy(), b.read_value().numpy())
        print()
        w_at_step.append(np.asscalar(w.read_value().numpy()))
        b_at_step.append(np.asscalar(b.read_value().numpy()))

    print(w_at_step)
    t = range(0, num_training_steps)
    plt.plot(t, loss_at_step, 'k', t, w_at_step, 'r', t,
             [true_w] * num_training_steps, 'r--', t, b_at_step, 'b', t,
             [true_b] * num_training_steps, 'b--')
    plt.legend(['loss', 'w estimate', 'w true', 'b estimate', 'b true'])
    plt.show()
Beispiel #24
0
def train(device):
    # hyper parameters
    z_dim = 100
    epochs = 30
    batch_size = 128
    learning_rate = 0.0002
    beta1 = 0.5
    is_training = True

    # for validation purpose
    assets_dir = './assets'
    if not os.path.isdir(assets_dir):
        os.makedirs(assets_dir)
    val_block_size = 10
    val_size = val_block_size * val_block_size

    # load mnist data
    mnist = input_data.read_data_sets('mnist-data', one_hot=True)
    inputs_shape = [-1, 28, 28, 1]

    # wrap with available device
    with tf.device(device):
        # create generator & discriminator
        generator = Generator()
        discriminator = Discriminator()

        # prepare optimizer
        d_val_grad = tfe.implicit_value_and_gradients(d_loss_fn)
        g_val_grad = tfe.implicit_value_and_gradients(g_loss_fn)
        d_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1)
        g_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1)

        # for loss savings
        d_loss_at_steps = []
        g_loss_at_steps = []

        for e in range(epochs):
            t = trange(mnist.train.num_examples // batch_size)
            # t = trange(1)
            for ii in t:
                t.set_description('{:04d}/{:04d}: '.format(e + 1, epochs))

                # no need labels
                batch_x, _ = mnist.train.next_batch(batch_size)

                # rescale images to -1 ~ 1
                batch_x = tf.reshape(batch_x, shape=inputs_shape)
                batch_x = batch_x * 2.0 - 1.0

                # Sample random noise for G
                batch_z = tf.random_uniform(shape=[batch_size, z_dim], minval=-1., maxval=1.)

                # get loss related values & (gradients & vars)
                d_loss_val, d_grad_vars = d_val_grad(generator, discriminator, batch_z, batch_x, is_training)
                g_loss_val, g_grad_vars = g_val_grad(generator, discriminator, batch_z, is_training)

                # get appropriate gradients & variable pairs
                d_vars = [(grad, var) for (grad, var) in d_grad_vars if var.name.startswith('discriminator')]
                g_vars = [(grad, var) for (grad, var) in g_grad_vars if var.name.startswith('generator')]

                # save loss
                d_loss_at_steps.append(np.asscalar(d_loss_val.numpy()))
                g_loss_at_steps.append(np.asscalar(g_loss_val.numpy()))

                # apply gradient via pre-defined optimizer
                d_optimizer.apply_gradients(d_vars)
                g_optimizer.apply_gradients(g_vars)

                # display current losses
                if ii % 5 == 0:
                    t.set_postfix(d_loss=d_loss_val.numpy(), g_loss=g_loss_val.numpy())

            # validation results at every epoch
            val_z = np.random.uniform(-1, 1, size=(val_size, z_dim))
            fake_image = generator.forward(val_z, is_trainig=False)
            image_fn = os.path.join(assets_dir, 'gan-val-e{:03d}.png'.format(e + 1))
            save_result(fake_image.numpy(), val_block_size, image_fn, color_mode='L')
    return
Beispiel #25
0
        o7 = self.fc7(o6)
        if(mode == 'train'):
            o7 = self.drop7(o7)
        logits = self.fc8(o7)
        return logits 
        # Return 

def loss_alex(alexCNN, datum, mode):
    # Assuming datum[0] is data. datum[1] is labels    
    logits = alexCNN(datum[0], mode)
    # print(tf.shape(datum[1]))
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=datum[1])
    # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=alexCNN(datum[0], mode), labels=datum[1])
    return tf.reduce_sum(loss)/ tf.cast(tf.size(datum[1]), dtype=tf.float32)

alex_loss_grads = tfe.implicit_value_and_gradients(loss_alex)

#------------------------------Base Alexnet------------------------------#



#------------------------------Attention Alexnet------------------------------#
class AttnAlexnet(tf.keras.Model):
    def __init__(self, num_classes, keep_prob, cost='dp', combine='concat', sample='down'):
        super(AttnAlexnet, self).__init__()
        # Possibly experiment - different initializations
        # TODO - regularization? see paper
        self.num_classes = num_classes
        self.keep_prob = keep_prob
        self.cost = cost
        self.combine = combine
Beispiel #26
0
    def train_and_eval(self, num_epochs, num_batchs_per_epoch, lrn_rate,
                       train_data, test_data):
        best_acc, best_epoch = 0., 0
        start_time = time.time()
        orig_begin_time = start_time

        val_and_grad_fn = tfe.implicit_value_and_gradients(self.loss)
        # grad_fn = tfe.implicit_gradients(self.loss)

        optimizer = tf.train.AdamOptimizer(lrn_rate)

        epoch = 0
        moving_loss, moving_acc = 0, 0
        max_norm = 0
        device = "/gpu:0" if tfe.num_gpus() > 1 else "/cpu:0"

        with tf.device(device):
            for batch, batch_data in enumerate(tfe.Iterator(train_data)):
                loss, grad_and_var = val_and_grad_fn(batch_data)

                # grad_list = [grad for grad, _ in grad_and_var]
                # max_norm = max(max_norm, tf.global_norm(grad_list)) # max_norm < 2

                acc = self.tensors['acc']

                optimizer.apply_gradients(grad_and_var)
                # print(batch, loss.numpy(), acc.numpy())

                moving_loss += loss
                moving_acc += acc

                if (batch + 1) % num_batchs_per_epoch == 0:
                    moving_loss /= num_batchs_per_epoch
                    moving_acc /= num_batchs_per_epoch

                    # epoch duration
                    now = time.time()
                    duration = now - start_time
                    start_time = now

                    valid_acc = self.evaluate(28, test_data)
                    if best_acc < valid_acc:
                        best_acc = valid_acc
                        best_epoch = epoch

                    # var_list = [var for _, var in grad_and_var]
                    # norm_list = [tf.norm(var) for var in var_list]
                    # for var, norm in zip(var_list, norm_list):
                    #   print('%s\t%.2f' % (var.name, norm.numpy()))

                    print(
                        "Epoch %d loss %.2f acc %.2f %.4f time %.2f" %
                        (epoch, moving_loss, moving_acc, valid_acc, duration))
                    sys.stdout.flush()

                    epoch += 1
                    moving_loss = 0
                    moving_acc = 0
                    # max_norm = 0
                    if epoch == num_epochs:
                        break

        duration = time.time() - orig_begin_time
        duration /= 3600
        print('Done training, best_epoch: %d, best_acc: %.4f' %
              (best_epoch, best_acc))
        print('duration: %.2f hours' % duration)
        sys.stdout.flush()
Beispiel #27
0
                                         dtype=tf.float32)


def loss_parse(encoder, decoder_parse, data, mode):
    encoder_state = encoder(data[0][0], data[0][1])
    # def call(self, encoder_state, mode, datum=None):
    logits = decoder_parse(encoder_state, mode, data[1], 500)
    mask = tf.sequence_mask(data[2][1], dtype=tf.float32)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=data[2][0]) * mask
    encoder_state, logits, mask = None, None, None
    return tf.reduce_sum(loss) / tf.cast(tf.reduce_sum(data[2][1]),
                                         dtype=tf.float32)


nli_loss_grads = tfe.implicit_value_and_gradients(loss_nli)
nmt_loss_grads = tfe.implicit_value_and_gradients(loss_nmt)
parse_loss_grads = tfe.implicit_value_and_gradients(loss_parse)


# :: Perplexity ::
def compute_ppl(encoder_model, decoder_model, dataset, task):
    total_loss = 0.
    total_words = 0

    loss_fun = []
    if (task == 'nli'):
        loss_fun = loss_nli
    elif (task == 'nmt'):
        loss_fun = loss_nmt
    elif (task == 'parse'):
Beispiel #28
0
  def fit_generator(self,
                    feed_dict_generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000,
                    restore=False,
                    submodel=None):
    """Train this model on data from a generator.

    Parameters
    ----------
    feed_dict_generator: generator
      this should generate batches, each represented as a dict that maps
      Layers to values.
    max_checkpoints_to_keep: int
      the maximum number of checkpoints to keep.  Older checkpoints are discarded.
    checkpoint_interval: int
      the frequency at which to write checkpoints, measured in training steps.
      Set this to 0 to disable automatic checkpointing.
    restore: bool
      if True, restore the model from the most recent checkpoint and continue training
      from there.  If False, retrain the model from scratch.
    submodel: Submodel
      an alternate training objective to use.  This should have been created by
      calling create_submodel().

    Returns
    -------
    the average loss over the most recent checkpoint interval
    """
    if not self.built:
      self.build()
    with self._get_tf("Graph").as_default():
      time1 = time.time()
      loss = self.loss
      if submodel is not None and submodel.loss is not None:
        loss = submodel.loss
      if tfe.in_eager_mode():
        # In eager mode we want an optimizer and a function to compute the
        # gradient of the loss.

        submodel_vars = None
        if submodel is None:
          optimizer = self._get_tf("Optimizer")
        else:
          optimizer = submodel.create_optimizer()
          if submodel.layers is not None:
            submodel_vars = set()
            for layer in submodel.layers:
              for var in layer.variables:
                submodel_vars.add(var)
        val_grad_fn = tfe.implicit_value_and_gradients(
            lambda x: self._run_graph([loss], x, True)[0])
      else:
        # In graph mode we want a training operation.

        if submodel is None:
          train_op = self._get_tf('train_op')
        else:
          train_op = submodel.get_train_op()
      if checkpoint_interval > 0:
        saver = tf.train.Saver(
            self.get_variables(),
            max_to_keep=max_checkpoints_to_keep,
            save_relative_paths=True)
      if restore:
        self.restore()
      avg_loss, n_averaged_batches = 0.0, 0.0
      n_samples = 0
      n_enqueued = [0]
      final_sample = [None]
      if self.queue_installed:
        enqueue_thread = threading.Thread(
            target=_enqueue_batch,
            args=(self, feed_dict_generator, self._get_tf("Graph"),
                  self.session, n_enqueued, final_sample))
        enqueue_thread.start()
      for feed_dict in self._create_feed_dicts(feed_dict_generator, True):
        if self.queue_installed:
          # Don't let this thread get ahead of the enqueue thread, since if
          # we try to read more batches than the total number that get queued,
          # this thread will hang indefinitely.
          while n_enqueued[0] <= n_samples:
            if n_samples == final_sample[0]:
              break
            time.sleep(0)
          if n_samples == final_sample[0]:
            break
        n_samples += 1
        should_log = (self.tensorboard and
                      n_samples % self.tensorboard_log_frequency == 0)
        if tfe.in_eager_mode():
          value, grads_and_vars = val_grad_fn(feed_dict)
          if submodel_vars is not None:
            grads_and_vars = [
                x for x in grads_and_vars if x[1] in submodel_vars
            ]
          optimizer.apply_gradients(grads_and_vars)
          avg_loss += value
        else:
          fetches = [train_op, loss.out_tensor]
          if should_log:
            fetches.append(self._get_tf("summary_op"))
          fetched_values = self.session.run(fetches, feed_dict=feed_dict)
          if should_log:
            self._log_tensorboard(fetched_values[2])
          avg_loss += fetched_values[1]
        n_averaged_batches += 1
        self.global_step += 1
        if checkpoint_interval > 0 and self.global_step % checkpoint_interval == checkpoint_interval - 1:
          saver.save(self.session, self.save_file, global_step=self.global_step)
          avg_loss = float(avg_loss) / n_averaged_batches
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
          avg_loss, n_averaged_batches = 0.0, 0.0
      if n_averaged_batches > 0:
        avg_loss = float(avg_loss) / n_averaged_batches
      if checkpoint_interval > 0:
        if n_averaged_batches > 0:
          logger.info('Ending global_step %d: Average loss %g' %
                      (self.global_step, avg_loss))
        saver.save(self.session, self.save_file, global_step=self.global_step)
        time2 = time.time()
        logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1))
    return avg_loss