def __init__(self, approx_func='leaky_relu', degrees=(5, 4), cuda=False, version="A", trainable=True, train_numerator=True, train_denominator=True): super(Rational, self).__init__() w_numerator, w_denominator = get_parameters(version, degrees, approx_func) self.device = gpu() if cuda else cpu() with self.name_scope(): self.numerator = self.params.get(name='w_numerator', shape=(len(w_numerator),), init=initializer.Constant(w_numerator), grad_req='write' if train_numerator and trainable else 'null') self.denominator = self.params.get(name='w_denominator', shape=(len(w_denominator),), init=initializer.Constant(w_denominator), grad_req='write' if train_denominator and trainable else 'null') self.degrees = degrees self.version = version self.training = trainable self.init_approximation = approx_func if version == "A": rational_func = Rational_MXNET_A_F elif version == "B": rational_func = Rational_MXNET_B_F elif version == "C": rational_func = Rational_MXNET_C_F elif version == "D": rational_func = Rational_MXNET_D_F else: raise ValueError("version %s not implemented" % version) self.activation_function = rational_func
def __init__(self, approx_func='leaky_relu', degrees=(5, 4), cuda=False, version='A', trainable=True, **kwargs): super(Rational, self).__init__(**kwargs) # read initial parameter configuration from external files w_numerator, w_denominator = get_parameters(version, degrees, approx_func) # convert w_numerator and w_denominator to mxnet arrays w_numerator = mx.nd.array(w_numerator) w_denominator = mx.nd.array(w_denominator) # register the amount of weights in numerator and denominator, since we need them during # symbolic execution, but are unable to retrieve them at later stages self.numerator_length = len(w_numerator) self.denominator_length = len(w_denominator) self.training = trainable self.degrees = degrees self.version = version self.init_approximation = approx_func # set specified context (currently not happening, since unclear, how and why helpful) # self.device = gpu() if cuda else cpu() # register and configure weights (numerator and denominator coefficients) with self.name_scope(): self.numerator = self.params.get( name='w_numerator', shape=(len(w_numerator), ), init=initializer.Constant(w_numerator), grad_req='write' if trainable else 'null', differentiable=trainable) self.denominator = self.params.get( name='w_denominator', shape=(len(w_denominator), ), init=initializer.Constant(w_denominator), grad_req='write' if trainable else 'null', differentiable=trainable) # register whether function is trainable, since this information needs to be passed to # version D self.training = trainable self.init_approximation = approx_func # set rational activation function version self.rational_func = {'A': _version_a, 'B': _version_b, 'C': _version_c, 'D': _version_d} \ .get(version) if self.rational_func is None: raise ValueError( "rational activation function version %s not implemented" % version)
def initialize_params(self, graphs, observed_uuid): """ :param graphs: a list of graphs in which the parameters will be optimized. :type graphs: a list of FactorGraph :param observed_uuid: Parameter Variables that are passed in directly as data, not to be inferred. :type observed_uuid: list, set """ if self._params is not None: warnings.warn("InferenceParameters has already been initialized. The existing one will be overwritten.") self._params = ParameterDict() for g in graphs: for var in g.get_constants(): self._constants[var.uuid] = var.constant excluded = set(self._constants.keys()).union(observed_uuid) for var in g.get_parameters(excluded=excluded): var_shape = realize_shape(var.shape, self._constants) init = initializer.Constant(var.initial_value_before_transformation) \ if var.initial_value is not None else None self._params.get(name=var.uuid, shape=var_shape, dtype=self.dtype, allow_deferred_init=True, init=init) for m in g.modules.values(): m.initialize_hidden_parameters(self._params, excluded, self._constants) self._params.initialize(ctx=self.mxnet_context)
def initialize_hidden_parameters(self, param_dict=None, excluded=None, constants=None): """ Initialize all the hidden parameters. :param param_dict: the MXNet ParameterDict for parameter initialization :type param_dict: MXNet ParameterDict :param excluded: the set of variables that are excluded from initialization :type excluded: set(str(UUID)) :param constants: the constants discovered during initialization, to be used for shape inference :type constants: {str(UUID): float or int} """ if param_dict is None: param_dict = ParameterDict() if excluded is None: excluded = set() if constants is None: constants = {} for g in [self._module_graph]+self._extra_graphs: for var in g.get_parameters( excluded=set([v.uuid for _, v in self.inputs] + [v.uuid for _, v in self.outputs] ).union(constants.keys()).union(excluded), include_inherited=True): var_shape = realize_shape(var.shape, constants) init = initializer.Constant(var.initial_value_before_transformation) \ if var.initial_value is not None else None param_dict.get(name=var.uuid, shape=var_shape, dtype=self.dtype, allow_deferred_init=True, init=init) return param_dict
def build_initializer(type, kerasDefaults, constant=0.): if type == 'constant': return initializer.Constant(constant) elif type == 'uniform': return initializer.Uniform(scale=kerasDefaults['maxval_uniform']) elif type == 'normal': return initializer.Normal(sigma=kerasDefaults['stddev_normal']) elif type == 'glorot_uniform': return initializer.Xavier(rnd_type='uniform', factor_type='avg', magnitude=3.) elif type == 'lecun_uniform': return initializers.Xavier(rnd_type='uniform', factor_type='in', magnitude=3.) elif type == 'he_normal': return initializer.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2.)
def initialize_params(self, graphs, observed_uuid): """ :param graphs: a list of graphs in which the parameters will be optimized. :type graphs: a list of FactorGraph :param observed_uuid: Parameter Variables that are passed in directly as data, not to be inferred. :type observed_uuid: list, set """ if self._params is not None: warnings.warn( "InferenceParameters has already been initialized. The existing one will be overwritten." ) self._params = ParameterDict() for g in graphs: # load in parameterdict from external gluon blocks. for f in g.functions.values(): if isinstance(f, GluonFunctionEvaluation): self._params.update( f.function_wrapper.collect_internal_parameters()) for var in g.get_constants(): self._constants[var.uuid] = var.constant excluded = set(self._constants.keys()).union(observed_uuid) for var in g.get_parameters(excluded=excluded, include_inherited=False): var_shape = realize_shape(var.shape, self._constants) init = initializer.Constant( var.initial_value ) if var.initial_value is not None else None self._params.get(name=var.uuid, shape=var_shape, dtype=self.dtype, allow_deferred_init=True, init=init) self._params.initialize(ctx=self.mxnet_context)
a.backward() @use_np @with_environment('MXNET_ENGINE_TYPE', 'NaiveEngine') def test_18934_empty_leaky_relu(): arr = np.random.rand(0,2) arr_grad = np.empty_like(arr) autograd.mark_variables([arr], [arr_grad]) with autograd.record(): res = npx.leaky_relu(arr) res.backward() @use_np @pytest.mark.parametrize('initializer',[ 'zeros', 'ones', initializer.Constant(3), initializer.Uniform(), initializer.Normal(), initializer.Orthogonal(), initializer.Orthogonal(rand_type='normal'), initializer.Xavier(), initializer.Xavier(rnd_type='gaussian'), initializer.MSRAPrelu(), initializer.MSRAPrelu(factor_type='in'), initializer.MSRAPrelu(factor_type='out'), initializer.LSTMBias(), ]) @pytest.mark.parametrize('dtype', [ 'float32', 'float64' ]) def test_19118(initializer, dtype):
@with_environment('MXNET_ENGINE_TYPE', 'NaiveEngine') def test_18934_empty_leaky_relu(): arr = np.random.rand(0, 2) arr_grad = np.empty_like(arr) autograd.mark_variables([arr], [arr_grad]) with autograd.record(): res = npx.leaky_relu(arr) res.backward() @use_np @pytest.mark.parametrize('initializer', [ 'zeros', 'ones', initializer.Constant(3), initializer.Uniform(), initializer.Normal(), initializer.Orthogonal(), initializer.Orthogonal(rand_type='normal'), initializer.Xavier(), initializer.Xavier(rnd_type='gaussian'), initializer.MSRAPrelu(), initializer.MSRAPrelu(factor_type='in'), initializer.MSRAPrelu(factor_type='out'), initializer.LSTMBias(), ]) @pytest.mark.parametrize('dtype', ['float32', 'float64']) def test_19118(initializer, dtype): net = gluon.nn.Dense(16, in_units=16) net.cast(dtype)