Ejemplo n.º 1
0
 def testErrorIfUsedBeforeMinimizeCalled(self):
   opt = training.SyncReplicasOptimizer(
       opt=gradient_descent.GradientDescentOptimizer(1.0),
       replicas_to_aggregate=1,
       total_num_replicas=1)
   hook = opt.make_session_run_hook(True)
   with self.assertRaisesRegex(ValueError, "apply_gradient should be called"):
     hook.begin()
Ejemplo n.º 2
0
 def testCanCreatedBeforeMinimizeCalled(self):
   """This behavior is required to be integrated with Estimators."""
   opt = training.SyncReplicasOptimizer(
       opt=gradient_descent.GradientDescentOptimizer(1.0),
       replicas_to_aggregate=1,
       total_num_replicas=1)
   hook = opt.make_session_run_hook(True)
   v = variables.VariableV1([0.])
   global_step = variables.VariableV1(0, name="global_step", trainable=False)
   opt.minimize(v, global_step=global_step)
   hook.begin()
Ejemplo n.º 3
0
 def testFetchVariableList(self):
     opt = training.SyncReplicasOptimizer(opt=adam.AdamOptimizer(0.01),
                                          replicas_to_aggregate=1,
                                          total_num_replicas=1)
     v = variables.Variable([0.], name="fetch_variable_test")
     global_step = variables.Variable(0,
                                      name="global_step",
                                      trainable=False)
     opt.minimize(v, global_step=global_step)
     opt_variables = opt.variables()
     self.assertIn(opt._opt._beta1_power, opt_variables)
     self.assertIn(opt._opt._beta2_power, opt_variables)
def get_workers(num_workers, replicas_to_aggregate, workers):
    sessions = []
    graphs = []
    train_ops = []
    for worker_id in range(num_workers):
        graph = ops.Graph()
        is_chief = (worker_id == 0)
        with graph.as_default():
            with ops.device("/job:ps/task:0"):
                global_step = variables.VariableV1(0,
                                                   name="global_step",
                                                   trainable=False)
                var_0 = variables.VariableV1(0.0, name="v0")
            with ops.device("/job:ps/task:1"):
                var_1 = variables.VariableV1(1.0, name="v1")
                var_sparse = variables.VariableV1([[3.0], [4.0]],
                                                  name="v_sparse")

            with ops.device("/job:worker/task:" + str(worker_id)):
                grads_0 = constant_op.constant(0.1 + worker_id * 0.2)
                grads_1 = constant_op.constant(0.9 + worker_id * 0.2)
                # This is to test against sparse gradients.
                grads_sparse = ops.IndexedSlices(
                    constant_op.constant([0.1 + worker_id * 0.2], shape=[1,
                                                                         1]),
                    constant_op.constant([1]), constant_op.constant([2, 1]))
                sgd_opt = gradient_descent.GradientDescentOptimizer(2.0)
                sync_rep_opt = training.SyncReplicasOptimizer(
                    sgd_opt,
                    replicas_to_aggregate=replicas_to_aggregate,
                    total_num_replicas=num_workers)
                train_op = [
                    sync_rep_opt.apply_gradients(
                        zip([grads_0, grads_1, grads_sparse],
                            [var_0, var_1, var_sparse]),
                        global_step=global_step)
                ]
                sync_replicas_hook = sync_rep_opt.make_session_run_hook(
                    is_chief, num_tokens=num_workers)

            # Creates MonitoredSession
            session = training.MonitoredTrainingSession(
                master=workers[worker_id].target,
                is_chief=is_chief,
                hooks=[sync_replicas_hook])

        sessions.append(session)
        graphs.append(graph)
        train_ops.append(train_op)

    return sessions, graphs, train_ops
def get_workers(num_workers, replicas_to_aggregate, workers):
  sessions = []
  graphs = []
  train_ops = []
  for worker_id in range(num_workers):
    graph = ops.Graph()
    is_chief = (worker_id == 0)
    with graph.as_default():
      with ops.device("/job:ps/task:0"):
        global_step = variables.Variable(0, name="global_step", trainable=False)
        var_0 = variables.Variable(0.0, name="v0")
      with ops.device("/job:ps/task:1"):
        var_1 = variables.Variable(1.0, name="v1")
        var_sparse = variables.Variable([[3.0], [4.0]], name="v_sparse")

      with ops.device("/job:worker/task:" + str(worker_id)):
        grads_0 = constant_op.constant(0.1 + worker_id * 0.2)
        grads_1 = constant_op.constant(0.9 + worker_id * 0.2)
        # This is to test against sparse gradients.
        grads_sparse = ops.IndexedSlices(
            constant_op.constant(
                [0.1 + worker_id * 0.2], shape=[1, 1]),
            constant_op.constant([1]),
            constant_op.constant([2, 1]))
        sgd_opt = gradient_descent.GradientDescentOptimizer(2.0)
        sync_rep_opt = training.SyncReplicasOptimizer(
            sgd_opt,
            replicas_to_aggregate=replicas_to_aggregate,
            total_num_replicas=num_workers)
        train_op = [
            sync_rep_opt.apply_gradients(
                zip([grads_0, grads_1, grads_sparse],
                    [var_0, var_1, var_sparse]),
                global_step=global_step)
        ]

        init_op = variables.global_variables_initializer()
        # Needed ops from the sync_rep optimizer. This is mainly for the
        # local_step initialization.
        local_init_op = sync_rep_opt.local_step_init_op
        if is_chief:
          local_init_op = sync_rep_opt.chief_init_op
        ready_for_local_init_op = sync_rep_opt.ready_for_local_init_op

        # Chief_queue_runner
        chief_queue_runner = sync_rep_opt.get_chief_queue_runner()
        sync_init_op = sync_rep_opt.get_init_tokens_op(num_workers)

    # Creates session for chief.
    supervisor = supervisor_lib.Supervisor(
        graph=graph,
        is_chief=is_chief,
        recovery_wait_secs=1,
        init_op=init_op,
        local_init_op=local_init_op,
        ready_for_local_init_op=ready_for_local_init_op)
    session = supervisor.prepare_or_wait_for_session(workers[worker_id].target)

    # Chief should execute the sync_init_op and start the chief queue runner.
    if is_chief:
      session.run(sync_init_op)
      supervisor.StartQueueRunners(session, [chief_queue_runner])

    sessions.append(session)
    graphs.append(graph)
    train_ops.append(train_op)

  return sessions, graphs, train_ops