def create_local_cluster(num_workers, num_ps, protocol="grpc"):
    """Create local GRPC servers and return them."""
    worker_ports = [
        net_lib.pick_unused_port_or_die() for _ in range(num_workers)
    ]
    ps_ports = [net_lib.pick_unused_port_or_die() for _ in range(num_ps)]
    cluster_dict = {
        "worker": ["localhost:%s" % port for port in worker_ports],
        "ps": ["localhost:%s" % port for port in ps_ports]
    }
    cs = tf.train.ClusterSpec(cluster_dict)

    workers = [
        tf.train.Server(cs,
                        job_name="worker",
                        protocol=protocol,
                        task_index=ix,
                        start=True) for ix in range(num_workers)
    ]
    ps_servers = [
        tf.train.Server(cs,
                        job_name="ps",
                        protocol=protocol,
                        task_index=ix,
                        start=True) for ix in range(num_ps)
    ]

    return workers, ps_servers
Ejemplo n.º 2
0
 def testPickUnusedPortOrDie(self):
     port0 = net_lib.pick_unused_port_or_die()
     port1 = net_lib.pick_unused_port_or_die()
     self.assertGreater(port0, 0)
     self.assertLess(port0, 65536)
     self.assertGreater(port1, 0)
     self.assertLess(port1, 65536)
     self.assertNotEqual(port0, port1)
Ejemplo n.º 3
0
 def testPickUnusedPortOrDie(self):
   port0 = net_lib.pick_unused_port_or_die()
   port1 = net_lib.pick_unused_port_or_die()
   self.assertGreater(port0, 0)
   self.assertLess(port0, 65536)
   self.assertGreater(port1, 0)
   self.assertLess(port1, 65536)
   self.assertNotEqual(port0, port1)
def create_local_cluster(num_workers, num_ps, protocol="grpc"):
  """Create local GRPC servers and return them."""
  worker_ports = [net_lib.pick_unused_port_or_die() for _ in range(num_workers)]
  ps_ports = [net_lib.pick_unused_port_or_die() for _ in range(num_ps)]
  cluster_dict = {
      "worker": ["localhost:%s" % port for port in worker_ports],
      "ps": ["localhost:%s" % port for port in ps_ports]}
  cs = tf.train.ClusterSpec(cluster_dict)

  workers = [
      tf.train.Server(
          cs, job_name="worker", protocol=protocol, task_index=ix, start=True)
      for ix in range(num_workers)]
  ps_servers = [
      tf.train.Server(
          cs, job_name="ps", protocol=protocol, task_index=ix, start=True)
      for ix in range(num_ps)]

  return workers, ps_servers