Esempio n. 1
0
def model_fn(sync, num_replicas):
    global _train_op, _loss, _train_x, _test_x, _test_y, _pred, _learning_rate, _X, _Y, _n_rounds

    _X = tf.placeholder(tf.float32,
                        shape=[None, FLAGS.rnn_num_steps, NUM_FEATURES])
    _Y = tf.placeholder(tf.float32, shape=[None, 1])

    global_step = tf.contrib.framework.get_or_create_global_step()
    _pred = lstm(_X)
    _loss = tf.reduce_mean(tf.square(_pred - _Y))

    _n_rounds = (len(_train_x) - 1) / FLAGS.batch_size + 1
    _learning_rate = tf.train.exponential_decay(FLAGS.learning_rate_base,
                                                global_step,
                                                _n_rounds,
                                                FLAGS.learning_rate_decay,
                                                staircase=True)

    optimizer = tf.train.AdamOptimizer(_learning_rate)
    _train_op = optimizer.minimize(_loss, global_step=global_step)

    def mse_evalute_fn(session):
        return session.run(_loss, feed_dict={_X: _test_x, _Y: _test_y})

    # 定义模型评测(准确率)的计算方法
    model_metric_ops = {"adjusted_mse": mse_evalute_fn}

    return dist_base.ModelFnHandler(global_step=global_step,
                                    optimizer=optimizer,
                                    model_metric_ops=model_metric_ops,
                                    summary_op=None)
Esempio n. 2
0
def model_fn(sync, num_replicas):
    global x, y_, train_op, accuracy
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')

    # 计算不含滑动平均类的前向传播结果
    y = inference(x)

    # 定义训练轮数
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # 计算交叉熵及其平均值
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)

    # 优化损失函数
    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
    if sync:
        num_workers = num_replicas
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer,
            replicas_to_aggregate=num_workers,
            total_num_replicas=num_workers,
            name="mnist_sync_replicas")
    train_op = optimizer.minimize(cross_entropy_mean, global_step=global_step)

    # 计算正确率
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return dist_base.ModelFnHandler(global_step=global_step,
                                    optimizer=optimizer,
                                    summary_op=None)
def model_fn(sync, num_replicas):
    # 这些变量在后续的训练操作函数 train_fn() 中会使用到,
    # 所以这里使用了 global 变量。
    global input_images, loss, labels, optimizer, train_op, accuracy
    global mnist, global_step

    # 构建推理模型
    input_images = tf.placeholder(tf.float32, [None, 784], name='image')
    W = tf.Variable(tf.zeros([784, 10]), name='weights')
    tf.summary.histogram("weights", W)
    b = tf.Variable(tf.zeros([10]), name='bias')
    tf.summary.histogram("bias", b)
    logits = tf.matmul(input_images, W) + b

    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Define loss and optimizer
    labels = tf.placeholder(tf.float32, [None, 10], name='labels')
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    loss = tf.reduce_mean(cross_entropy, name='loss')
    tf.add_to_collection(tf.GraphKeys.LOSSES, loss)

    # Create optimizer to compute gradient
    optimizer = tf.train.AdagradOptimizer(0.01)
    if sync:
        num_workers = num_replicas
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer,
            replicas_to_aggregate=num_workers,
            total_num_replicas=num_workers,
            name="mnist_sync_replicas")

    train_op = optimizer.minimize(cross_entropy, global_step=global_step)

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def accuracy_evalute_fn(session):
        return session.run(accuracy,
                           feed_dict={
                               input_images: mnist.validation.images,
                               labels: mnist.validation.labels
                           })

    # 定义模型导出配置
    model_export_spec = model_exporter.ModelExportSpec(
        export_dir=FLAGS.export_dir,
        input_tensors={"image": input_images},
        output_tensors={"logits": logits})

    # 定义模型评测(准确率)的计算方法
    model_metric_ops = {"accuracy": accuracy_evalute_fn}

    return dist_base.ModelFnHandler(global_step=global_step,
                                    optimizer=optimizer,
                                    model_metric_ops=model_metric_ops,
                                    model_export_spec=model_export_spec)
Esempio n. 4
0
def model_fn(sync, num_replicas):
    global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _cost, _global_step

    _user_batch = tf.placeholder(tf.int32, shape=[None], name="user")
    _item_batch = tf.placeholder(tf.int32, shape=[None], name="item")
    _rate_batch = tf.placeholder(tf.float32, shape=[None], name="rate")

    _infer = inference(_user_batch, _item_batch, FLAGS.embedding_dim)
    _global_step = tf.contrib.framework.get_or_create_global_step()

    _cost = tf.square(_infer - _rate_batch)
    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)

    if sync:
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer,
            replicas_to_aggregate=num_replicas,
            total_num_replicas=num_replicas,
            name="mnist_sync_replicas")

    gradients, variables = zip(*optimizer.compute_gradients(_cost))
    gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
    _train_op = optimizer.apply_gradients(zip(gradients, variables),
                                          global_step=_global_step)

    _rmse = tf.sqrt(tf.reduce_mean(_cost))

    def rmse_evalute_fn(session):
        return session.run(_rmse,
                           feed_dict={
                               _user_batch: _test["user"],
                               _item_batch: _test["item"],
                               _rate_batch: _test["rate"]
                           })

    # 定义模型导出配置
    model_export_spec = model_exporter.ModelExportSpec(
        export_dir=FLAGS.export_dir,
        input_tensors={
            "user": _user_batch,
            "item": _item_batch
        },
        output_tensors={"infer": _infer})

    # 定义模型评测(准确率)的计算方法
    model_metric_ops = {"rmse": rmse_evalute_fn}

    return dist_base.ModelFnHandler(global_step=_global_step,
                                    optimizer=optimizer,
                                    model_metric_ops=model_metric_ops,
                                    model_export_spec=model_export_spec,
                                    summary_op=None)
def model_fn(sync, num_replicas):
    #####################
    # 构建模型
    # ###################
    #
    # 构建线性回归推理模型:
    #   y = 0.5*x + 2
    a = tf.Variable(0.5, name="a")
    b = tf.Variable(2.0, name="b")

    x = tf.placeholder(tf.float32, name="x")
    y = tf.add(tf.multiply(a, x), b, name="y")

    global_step = tf.Variable(0, name='global_step', trainable=False)

    #####################
    # 添加资产文件
    #####################
    #
    # 资产文件会在模型导出和导入时被当作模型的一部分进行处理。
    # 资产文件主要应用场景:训练模型的某些操作需要外部附加文件进行初始化等。
    # 在导出模型的时候,资产文件会被拷贝到模型导出路径的 assets 目录下。
    original_assets_directory = "/tmp/original/export/assets"
    original_assets_filename = "foo.txt"
    original_assets_filepath = _write_assets(original_assets_directory,
                                             original_assets_filename)
    assets_filepath = tf.constant(original_assets_filepath)
    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath)
    filename_tensor = tf.Variable(original_assets_filename,
                                  name="filename_tensor",
                                  trainable=False,
                                  collections=[])
    assign_filename_op = filename_tensor.assign(original_assets_filename)

    # 定义模型导出配置
    if os.path.exists(FLAGS.export_dir):
        print("The export path has existed, try to delete it...")
        shutil.rmtree(FLAGS.export_dir)
        print("The export path has been deleted.")
    model_export_spec = model_exporter.ModelExportSpec(
        export_dir=FLAGS.export_dir,
        input_tensors={'x': x},
        output_tensors={'y': y},
        assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
        legacy_init_op=tf.group(assign_filename_op))

    return dist_base.ModelFnHandler(global_step=global_step,
                                    model_export_spec=model_export_spec)
def model_fn(sync, num_replicas):
    """TensorFlow 模型定义函数。

    在任务执行的时候调用该函数用于生成 TensorFlow 模型的计算图(tf.Graph)。
    在函数中定义模型的前向推理算法、损失函数、优化器以及模型评估的指标和计算方法等信息。

    参数:
    - `sync`:当前是否采用参数同步更新模式。 
    - `num_replicas`:分布式 TensorFlow 的计算节点(worker)个数。
    """

    # TODO:添加业务模型定义操作。

    # model_fn 函数需要返回 ModelFnHandler 对象告知 TaaS 平台所构建的模型的一些信息,
    # 例如 global_step、优化器 Optimizer、模型评估指标以及模型导出的相关配置等等。
    # 详细信息请参考 docs.caicloud.io。
    return dist_base.ModelFnHandler()
Esempio n. 7
0
def model_fn(sync, num_replicas):
  #####################
  # 构建模型
  # ###################
  a1 = tf.Variable(0.5, name="a1")
  b1 = tf.Variable(2.0, name="b1")
  a2 = tf.Variable(2.0, name="a2")
  b2 = tf.Variable(3.0, name="b2")
  a3 = tf.Variable(4.0, name="a3")
  b3 = tf.Variable(5.0, name="b3")

  # y1 = 0.5*x1 + 2
  x1 = tf.placeholder(tf.float32, name="x1")
  y1 = tf.add(tf.multiply(a1, x1), b1, name="y1")

  # y2 = 2*x1 + 3
  y2 = tf.add(tf.multiply(a2, x1), b2, name="y2")
  
  # y1 = 4*x1 + 5
  x2 = tf.placeholder(tf.float32, name="x2")
  y3 = tf.add(tf.multiply(a3, x2), b3, name="y3")

  global_step = tf.Variable(0, name='global_step', trainable=False)

  # 定义模型导出配置
  if os.path.exists(FLAGS.export_dir):
    print("The export path has existed, try to delete it...")
    shutil.rmtree(FLAGS.export_dir)
    print("The export path has been deleted.")
  input_tensors = {
    'x1': x1,
    'x2': x2,
  }
  output_tensors = {
    'y1': y1,
    'y2': y2,
    'y3': y3,
  }
  model_export_spec = model_exporter.ModelExportSpec(
    export_dir=FLAGS.export_dir,
    input_tensors=input_tensors,
    output_tensors=output_tensors)

  return dist_base.ModelFnHandler(
    global_step=global_step,
    model_export_spec=model_export_spec)
Esempio n. 8
0
def model_fn(sync, num_replicas):
    global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _cost, _global_step
    
    _user_batch = tf.placeholder(tf.int32, shape=[None], name="user")
    _item_batch = tf.placeholder(tf.int32, shape=[None], name="item")
    _rate_batch = tf.placeholder(tf.float32, shape=[None], name="rate")

    _infer = inference(_user_batch, _item_batch, FLAGS.embedding_dim)
    _global_step = tf.contrib.framework.get_or_create_global_step()
    
    _cost = tf.square(_infer - _rate_batch)
    optimizer = tf.train.AdamOptimizer(0.001)
    _train_op = optimizer.minimize(_cost, global_step=_global_step)

    _rmse = tf.sqrt(tf.reduce_mean(_cost))
    
    def rmse_evalute_fn(session):
        return session.run(_rmse, feed_dict={
            _user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]})

    # 定义模型导出配置
    model_export_spec = model_exporter.ModelExportSpec(
        export_dir=FLAGS.export_dir,
        input_tensors={"user": _user_batch, "item": _item_batch},
        output_tensors={"infer": _infer})

    # 定义模型评测(准确率)的计算方法
    model_metric_ops = {
        "rmse": rmse_evalute_fn
    }
    
    return dist_base.ModelFnHandler(
        global_step=_global_step,
        optimizer=optimizer, 
        model_metric_ops=model_metric_ops,
        model_export_spec=model_export_spec,
        summary_op=None)
Esempio n. 9
0
def model_fn(sync, num_replicas):
    """TensorFlow 模型定义函数。

    在任务执行的时候调用该函数用于生成 TensorFlow 模型的计算图(tf.Graph)。
    在函数中定义模型的前向推理算法、损失函数、优化器以及模型评估的指标和计算方法等信息。

    参数:
    - `sync`:当前是否采用参数同步更新模式。 
    - `num_replicas`:分布式 TensorFlow 的计算节点(worker)个数。
    """
    global _train_op

    # TODO:添加业务模型定义操作。
    # global_step = ...
    # _train_op = ...

    # 添加模型评估配置:
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # def accuracy_evalute_fn(session):
    #     return session.run(accuracy, ...)
    # model_metric_ops = {
    #    "accuracy": accuracy_evalute_fn
    # }

    # 定义模型导出配置
    # model_export_spec = model_exporter.ModelExportSpec(
    #    export_dir=FLAGS.export_dir,
    #    input_tensors={"image": _input_images},
    #    output_tensors={"logits": logits})

    # model_fn 函数需要返回 ModelFnHandler 对象告知 TaaS 平台所构建的模型的一些信息,
    # 例如 global_step、优化器 Optimizer、模型评估指标以及模型导出的相关配置等等。
    # 详细信息请参考 docs.caicloud.io。
    return dist_base.ModelFnHandler(global_step=global_step,
                                    model_metric_ops=model_metric_ops,
                                    model_export_spec=model_export_spec)
def model_fn(sync, num_replicas):
    # 这些变量在后续的训练操作函数 train_fn() 中会使用到,
    # 所以这里使用了 global 变量。
    global _input_images, _loss, _labels, _train_op, _accuracy
    global _mnist, _global_step, _summary_op, _summary_writer

    # 构建推理模型
    _input_images = tf.placeholder(tf.float32, [None, 784], name='image')
    W = tf.Variable(tf.zeros([784, 10]), name='weights')
    tf.summary.histogram("weights", W)
    b = tf.Variable(tf.zeros([10]), name='bias')
    tf.summary.histogram("bias", b)
    logits = tf.matmul(_input_images, W) + b

    _global_step = tf.Variable(0, name='global_step', trainable=False)

    # Define loss and optimizer
    _labels = tf.placeholder(tf.float32, [None, 10], name='labels')
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=_labels))
    tf.summary.scalar("cross_entropy", cross_entropy)
    _loss = tf.reduce_mean(cross_entropy, name='loss')
    tf.add_to_collection(tf.GraphKeys.LOSSES, _loss)

    # Create optimizer to compute gradient
    optimizer = tf.train.AdagradOptimizer(0.01)
    if sync:
        num_workers = num_replicas
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer,
            replicas_to_aggregate=num_workers,
            total_num_replicas=num_workers,
            name="mnist_sync_replicas")

    _train_op = optimizer.minimize(cross_entropy, global_step=_global_step)

    # 自定义计算模型 summary 信息的 Operation,
    # 并定义一个 FileWriter 用于保存模型 summary 信息。
    # 其中  dist_base.cfg.logdir 是 TaaS 平台上设置的训练日志路径参数。
    _summary_op = tf.summary.merge_all()
    _summary_writer = tf.summary.FileWriter(dist_base.cfg.logdir)

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(_labels, 1))
    _accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    def accuracy_evalute_fn(session):
        return session.run(_accuracy,
                           feed_dict={
                               _input_images: _mnist.validation.images,
                               _labels: _mnist.validation.labels
                           })

    # 定义模型导出配置
    model_export_spec = model_exporter.ModelExportSpec(
        export_dir=FLAGS.export_dir,
        input_tensors={"image": _input_images},
        output_tensors={"logits": logits})

    # 定义模型评测(准确率)的计算方法
    model_metric_ops = {"accuracy": accuracy_evalute_fn}

    # 因为模型中需要计算 tf.summary.scalar(cross_entropy),而该 summary 的计算需要
    # feed 设置 _input_images 和 _labels,所以这里将 summary_op 设置成 None,将关闭
    # TaaS 的自动计算和保存模型 summary 信息机制。在 train_op 函数中自己来计算并收集
    # 模型 Graph 的 summary 信息。
    return dist_base.ModelFnHandler(global_step=_global_step,
                                    optimizer=optimizer,
                                    model_metric_ops=model_metric_ops,
                                    model_export_spec=model_export_spec,
                                    summary_op=None)