예제 #1
0
 def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None):
     self.var_list = var_list
     self.beta1 = beta1
     self.beta2 = beta2
     self.epsilon = epsilon
     self.scale_grad_by_procs = scale_grad_by_procs
     size = sum(U.numel(v) for v in var_list)
     self.m = np.zeros(size, 'float32')
     self.v = np.zeros(size, 'float32')
     self.t = 0
     self.setfromflat = U.SetFromFlat(var_list)
     self.getflat = U.GetFlat(var_list)
예제 #2
0
 def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08, scale_grad_by_procs=True, comm=None):
     self.var_list = var_list
     self.beta1 = beta1
     self.beta2 = beta2
     self.epsilon = epsilon
     self.scale_grad_by_procs = scale_grad_by_procs
     size = sum(U.numel(v) for v in var_list)
     self.m = np.zeros(size, 'float32')
     self.v = np.zeros(size, 'float32')
     self.t = 0
     self.setfromflat = U.SetFromFlat(var_list)
     self.getflat = U.GetFlat(var_list)
     self.comm = MPI.COMM_WORLD if comm is None and MPI is not None else comm
예제 #3
0
    def __init__(self,
                 var_list,
                 *,
                 beta1=0.9,
                 beta2=0.999,
                 epsilon=1e-08,
                 scale_grad_by_procs=True,
                 comm=None,
                 sess=None):
        """
        A parallel MPI implementation of the Adam optimizer for TensorFlow
        https://arxiv.org/abs/1412.6980

        :param var_list: ([TensorFlow Tensor]) the variables
        :param beta1: (float) Adam beta1 parameter
        :param beta2: (float) Adam beta1 parameter
        :param epsilon: (float) to help with preventing arithmetic issues
        :param scale_grad_by_procs: (bool) if the scaling should be done by processes
        :param comm: (MPI Communicators) if None, mpi4py.MPI.COMM_WORLD
        :param sess: (TensorFlow Session) if None, tf.get_default_session()
        """
        self.var_list = var_list
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.scale_grad_by_procs = scale_grad_by_procs
        size = sum(tf_utils.numel(v) for v in var_list)
        # Exponential moving average of gradient values
        # "first moment estimate" m in the paper
        self.exp_avg = np.zeros(size, 'float32')
        # Exponential moving average of squared gradient values
        # "second raw moment estimate" v in the paper
        self.exp_avg_sq = np.zeros(size, 'float32')
        self.step = 0
        self.setfromflat = tf_utils.SetFromFlat(var_list, sess=sess)
        self.getflat = tf_utils.GetFlat(var_list, sess=sess)
        self.comm = mpi4py.MPI.COMM_WORLD if comm is None else comm
예제 #4
0
def flatten_grads(var_list, grads):
    """Flattens a variables and their gradients.
    """
    return tf.concat(
        [tf.reshape(grad, [U.numel(v)]) for (v, grad) in zip(var_list, grads)],
        0)