コード例 #1
0
ファイル: main.py プロジェクト: muzzynine/examples-1
def graph_builder(opts,
                  observed=None,
                  ground_truth=None,
                  learning_rate=0.001,
                  mode=util.Modes.TRAIN):

    # Build the neural network
    predictions = MLPModel(opts, mode=mode)(observed)

    # Loss
    loss = opts.loss_scaling * tf.cast(tf.losses.absolute_difference(
        ground_truth, predictions, reduction=tf.losses.Reduction.MEAN),
                                       dtype=getattr(tf, opts.dtypes[0]))

    # Error metric
    rmse_metric = util.exp_rmspe(ground_truth, predictions)

    if mode == util.Modes.TRAIN:
        # Training
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate)
        # Wrap in a CrossReplica if we're replicating across multiple IPUs
        if opts.replication_factor > 1:
            optimizer = cross_replica_optimizer.CrossReplicaOptimizer(
                optimizer)
        # Batch norm variable update dependency
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Op to calculate every variable gradient
            grads = tf.gradients(loss, tf.trainable_variables())
        grads = list(zip(grads, tf.trainable_variables()))

        # Loss scaling
        grads = [(grad / opts.loss_scaling, var) for grad, var in grads]

        # Apply weight_decay directly to gradients
        if opts.weight_decay != 0:
            grads = [(grad + (opts.weight_decay * var),
                      var) if 'l2tag' in var.name and 'kernel' in var.name else
                     (grad, var) for grad, var in grads]

        # clip gradients
        if opts.gradient_clipping:
            grads = [(tf.clip_by_value(grad, -1., 1.), var)
                     for grad, var in grads]

        # Op to update all variables according to their gradient
        apply_grads = optimizer.apply_gradients(grads_and_vars=grads)
        return loss / opts.loss_scaling, rmse_metric, apply_grads
    elif mode == util.Modes.VALID:
        return loss / opts.loss_scaling, rmse_metric, None
コード例 #2
0
ファイル: rl_benchmark.py プロジェクト: inejc/examples
def build_train_op(previous_loss, *infeed_data):
    """Construct loss and optimizer."""
    with ipu_scope("/device:IPU:0"):
        action_prob = create_policy(*infeed_data)
        loss = tf.reduce_sum(action_prob * infeed_data[-2])
        opt = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        if args.accumulate_grad:
            opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer(
                opt, num_mini_batches=args.num_mini_batches)
        opt = cross_replica_optimizer.CrossReplicaOptimizer(opt)
        train_op = opt.minimize(loss)
        with tf.control_dependencies([train_op]):
            loss = tf.identity(loss)
        return previous_loss + loss
コード例 #3
0
#  Create training examples / targets
ds = tf.data.Dataset.from_tensor_slices(text_as_int)
ds = ds.batch(sequence_length, drop_remainder=True)
ds = ds.shuffle(batch_size * batch_size)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat()

# The host side queues
infeed_queue = ipu_infeed_queue.IPUInfeedQueue(
    ds, feed_name="infeed", replication_factor=replication_factor)

# Set the learning rate
lr = 0.0001

# Create a momentum optimiser for replication
optimizer = cross_replica_optimizer.CrossReplicaOptimizer(
    tf.train.MomentumOptimizer(lr, 0.99))

# Create a host embedding object
embedding = embedding_ops.create_host_embedding(
    "char_embedding",
    shape=[256, 256],
    dtype=tf.float32,
    partition_strategy="TOKEN",
    optimizer_spec=embedding_ops.HostEmbeddingOptimizerSpec(lr))


# PopnnGRU is time-major
def gru(partials):
    gru_ = rnn_ops.PopnnGRU(256)
    partial_t = tf.transpose(partials, [1, 0, 2])
    gru_outputs_t, _ = gru_(partial_t)