Exemple #1
0
    def __init__(self, monitor_interval=1):
        self._num_workers = current_cluster_size()
        self._step = counter()

        self._interval = monitor_interval
        self._summed_variance = None
        self._variances = None
Exemple #2
0
    def __init__(self, device_batch_size, monitor_interval=1):
        self._num_workers = current_cluster_size()
        self._step = counter()

        self._interval = monitor_interval
        self._device_batch_size = tf.cast(device_batch_size, dtype=tf.float32)
        self._global_batch_size = self._device_batch_size * self._num_workers
Exemple #3
0
def test_counter():
    from kungfu.tensorflow.ops import counter
    c = counter()
    with tf.Session() as sess:
        for i in range(10):
            n = sess.run(c)
            assert (n == i)
def test_counter():
    c = counter()
    with tf.Session() as sess:
        for i in range(10):
            n = sess.run(c)
            print(n)
            if n != i:
                raise RuntimeError('counter failed!')
Exemple #5
0
def build_ops():
    init_step = int(_get_init_step())
    print('init_step is %d' % (init_step))

    step = counter(init_step)
    schedule = step_based_schedule(config, step)
    ckpt_tensor = tf.as_string(step + 1)
    resize_op = resize_cluster(ckpt_tensor, schedule)
    return init_step, resize_op
Exemple #6
0
    def __init__(self,
                 device_batch_size,
                 monitor_interval=1,
                 alpha=0.9,
                 verbose=False):
        self._num_workers = tf.cast(cluster_size(), tf.float32)
        self._alpha = alpha
        self._step = counter()
        self._verbose = verbose

        self._interval = monitor_interval
        self._device_batch_size = tf.cast(device_batch_size, dtype=tf.float32)
        self._global_batch_size = self._device_batch_size * self._num_workers
Exemple #7
0
 def __init__(self, fuse_requests, fused_model_name=None):
     self._step = counter()
     self._fuse_requests = fuse_requests
     self._fused_model_name = fused_model_name
Exemple #8
0
 def _build_resize_op(self, config, init_step):
     step = counter(init_step)
     new_size = step_based_schedule(config, step)
     ckpt_tensor = tf.as_string(step + 1)
     resize_op = resize_cluster(ckpt_tensor, new_size)
     return resize_op
Exemple #9
0
def test_counter():
    c = counter()
    with tf.Session() as sess:
        for i in range(10):
            n = sess.run(c)
            assert (n == i)
Exemple #10
0
def test_counter_init():
    c = counter(init=1)
    with tf.Session() as sess:
        for i in range(10):
            n = sess.run(c)
            assert (n == i + 1)