def learning_rate(self): if self._learning_rate is None: iterations = K.cast(self.iterations + 1, K.floatx()) learning_rate = K.minimum(1.0 / K.sqrt(iterations), 0.01) if self.multiply_by_parameter_scale: return learning_rate else: return learning_rate * 0.05 else: if not hasattr(self, '__learning_rate'): with K.name_scope(self.__class__.__name__): self.__learning_rate = K.variable(self._learning_rate, name='learning_rate') return self.__learning_rate
def __init__(self, *args, **kwargs): super(AdaFactorV1, self).__init__(*args, **kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations')