Example #1
0
def get_peer_latencies():
    """Returns the vector V of round-trip time from this peer to all other peers.

    For the peer of rank i, V[j] is the RTT from i to j (j != i), V[i] = 0.
    """
    return _op_lib.kungfu_get_peer_latencies(
        cluster_size=current_cluster_size())
Example #2
0
def get_neighbour_mask(edges):
    """Compute a bool vector of neighbours for the current peer.

    For the peer of rank i, v[j] = true if (i, j) is an edge of the MST,
    otherwise v[j] = false.
    """
    return _op_lib.kungfu_get_neighbour_mask(
        edges, self_rank=current_rank(), cluster_size=current_cluster_size())
Example #3
0
def get_peer_latencies(local_step=None):
    """Returns the vector V of round-trip time from this peer to all other peers.

    For the peer of rank i, V[j] is the RTT from i to j (j != i), V[i] = 0.
    """
    # FIXME: don't require input
    if local_step is None:
        import tensorflow as tf
        local_step = tf.Variable(tf.zeros([], tf.int64), trainable=False)
    return _op_lib.kungfu_get_peer_latencies(
        local_step, cluster_size=current_cluster_size())
Example #4
0
def _parse_schedule(schedule, batch_size, num_train):
    # schedule is of the form
    # f1;e1;f2;e2;f3;e3
    tokens = schedule.split(",")
    print("Num train: " + str(num_train))
    print("Batch size: " + str(batch_size))
    to_gs = lambda epoch: int(epoch * num_train /
                              (batch_size * current_cluster_size()))
    pairs = [(to_gs(int(t.split(":")[0])), float(t.split(":")[1]))
             for t in tokens]
    steps, fractions = zip(*pairs)

    print("Steps: " + str(steps))
    print("Fractions: " + str(fractions))
    return steps, fractions