Example #1
0
    def __call__(self, cost_func):
        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        batch_size = cost_func.axes.batch_axes()[0].length

        grads = [
            ng.deriv(batch_cost, v) / batch_size
            for v in batch_cost.variables()
        ]
        scale_factor = clip_gradient_norm(grads, batch_size,
                                          self.gradient_clip_norm)

        epsilon, decay = (self.epsilon, self.decay_rate)
        for i, (variable, grad) in enumerate(zip(batch_cost.variables(),
                                                 grads)):
            grad = clip_gradient_value(grad, self.gradient_clip_value)
            state = ng.persistent_tensor(axes=variable.axes, initial_value=0.)
            all_updates.append(
                ng.sequential([
                    ng.assign(state,
                              decay * state + (1.0 - decay) * ng.square(grad)),
                    ng.assign(
                        variable,
                        variable - ((scale_factor * grad * self.lrate) /
                                    (ng.sqrt(state + epsilon) + epsilon)))
                ]))

        return ng.doall(all_updates)
 def variable_update(self, variable, grad, scale_factor):
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=grad.axes, initial_value=0.)
     updates = ng.sequential([
         ng.assign(state, state + ng.square(grad)),
         ng.assign(
             variable, variable - (scale_factor * self.lrate * grad) /
             (ng.sqrt(state + self.epsilon)))
     ])
     return updates
Example #3
0
 def variable_update(self, variable, grad, scale_factor):
     epsilon, decay = (self.epsilon, self.decay_rate)
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=variable.axes, initial_value=0.)
     updates = ng.sequential([
         ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)),
         ng.assign(variable, variable - ((scale_factor * grad * self.lrate)
                                         / (ng.sqrt(state + epsilon) + epsilon)))
     ])
     return updates
Example #4
0
 def variable_update(self, variable, grad, scale_factor):
     epsilon, decay = (self.epsilon, self.decay_rate)
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=variable.axes, initial_value=1.)
     velocity = ng.persistent_tensor(
         axes=variable.axes, initial_value=0.).named(variable.name + '_vel')
     updates = ng.sequential([
         ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)),
         ng.assign(
             velocity, velocity * self.momentum +
             (self.lrate * scale_factor * grad / ng.sqrt(state + epsilon)) +
             self.lrate * self.wdecay * variable),
         ng.assign(variable, variable - velocity)
     ])
     return updates
Example #5
0
    def Square(self, tf_node, inputs):
        """
        Performs the x^2 on the each element of input.

        Arguments:
            tf_node: NodeDef object, the tensorflow node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the tensorflow node.

        Inputs to tf_node:
            input
        """
        return ng.square(inputs[0]).named(tf_node.name)
Example #6
0
    def __call__(self, cost_func):
        with ng.Op.saved_user_deps():
            state_updates, param_updates = [], []
            batch_cost = ng.sum(cost_func, out_axes=())
            batch_size = cost_func.axes.batch_axes()[0].length

            grads = [
                ng.deriv(batch_cost, v) / batch_size
                for v in batch_cost.variables()
            ]
            scale_factor = clip_gradient_norm(
                grads) if self.gradient_clip_norm else 1

            epsilon, decay = (self.epsilon, self.decay_rate)
            for i, (variable,
                    grad) in enumerate(zip(batch_cost.variables(), grads)):
                grad = clip_gradient_value(grad, self.gradient_clip_value)

                state = ng.persistent_tensor(axes=variable.axes,
                                             initial_value=0.)
                state_updates.append(
                    ng.assign(lvalue=state,
                              rvalue=decay * state +
                              (1.0 - decay) * ng.square(grad)).named(
                                  'state_u_%s' % i))

                param_updates.append(
                    ng.assign(
                        lvalue=variable,
                        rvalue=variable -
                        ((scale_factor * grad * self.learning_rate) /
                         (ng.sqrt(state + epsilon) + epsilon)),
                    ).named('var_u_%s' % i))

            lr_update = [
                ng.assign(
                    self.learning_rate,
                    self.schedule.get_learning_rate(self.learning_rate,
                                                    self.iteration_index))
            ]

            updates = ng.doall(state_updates + param_updates + lr_update)
            self.iteration_index += 1

        return updates
    def __call__(self,
                 in_obj,
                 channel_axes="C",
                 spatial_axes=("D", "H", "W"),
                 **kwargs):
        """
        Arguments:
            in_obj (Op): Input op
            channel_axes (str): name of the expected channel axis type - defaults to "C"
            spatial_axes (tuple): names of expected depth, height and width axis types - defaults
                                  to "D", "H", and "W"
        """
        if isinstance(spatial_axes, dict):
            spatial_axes = tuple(
                spatial_axes.get(name, name) for name in ("D", "H", "W"))
        elif isinstance(spatial_axes, tuple):
            if len(spatial_axes) < 3:
                raise ValueError(
                    "spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))")
            spatial_axes = tuple(
                name if name else default
                for name, default in zip(spatial_axes, ("D", "H", "W")))

        orig_axes = in_obj.axes
        in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes)
        channel_axes = in_obj.axes.get_by_names(channel_axes)
        spatial_axes = in_obj.axes.get_by_names(*spatial_axes)

        filter_axes = self._filter_axes(channel_axes, spatial_axes)

        # mark 'K' as a shadow axis for the initializers.
        axes_map = shadow_axes_map(filter_axes.find_by_name('K'))
        filter_axes = ng.make_axes([
            axis if axis.name != 'K' else list(axes_map.keys())[0]
            for axis in filter_axes
        ])

        if not self.initialized:
            if not self.weight_norm:
                self.W = ng.variable(axes=filter_axes,
                                     initial_value=self.init,
                                     metadata={
                                         "label": LABELS["weight"]
                                     }).named("W")
            else:
                self.v = ng.variable(axes=filter_axes,
                                     initial_value=self.init,
                                     metadata={
                                         "label": LABELS["weight"]
                                     }).named("v")
                out_axes = ng.make_axes(
                    [filter_axes.get_by_names("K__NG_SHADOW")])
                v_norm = ng.mean(ng.square(self.v), out_axes=out_axes)
                self.g = ng.variable(axes=out_axes,
                                     initial_value=self.init,
                                     metadata={
                                         "label": LABELS["weight"]
                                     }).named("g")
                self.W = self.g * self.v * ng.reciprocal(
                    ng.sqrt(v_norm + 1e-3))
        else:
            if filter_axes != self.W.axes:
                raise ValueError(
                    ("{layer_name} layer has already been initialized with an "
                     "input object which has resulted in filter axes: "
                     "{existing_filter_axes}. This new input object has axes: "
                     "{input_axes}, which implies the need for filter axes: "
                     "{new_filter_axes} which are different than the existing "
                     "filter axes.").format(
                         layer_name=self.name,
                         existing_filter_axes=self.W.axes,
                         input_axes=in_obj.axes,
                         new_filter_axes=filter_axes,
                     ))

        output = ng.map_roles(
            self._conv_op(in_obj, channel_axes, spatial_axes), axes_map)
        # Reorder the output to match the input order
        output_axis_order = ng.make_axes(
            [output.axes.find_by_name(ax.name)[0] for ax in orig_axes])
        # Remove introduced axes. If their length is > 1, then perhaps they should be kept
        slices = [
            0 if (ax not in orig_axes) and ax.length == 1 else slice(None)
            for ax in output.axes
        ]
        output = ng.tensor_slice(output, slices)
        # New axes with length > 1 may have been introduced. Add them to the end.
        output_axis_order = output_axis_order | output.axes
        return ng.axes_with_order(output, output_axis_order)
Example #8
0
 def __init__(self):
     self.ng_computation = lambda Y, T: ng.mean(ng.square(Y - T),
                                                out_axes=()) / 2.
D1 = discriminator(data)
D2 = discriminator(generated)

# TODO
# Original Implementation with epsilon - wait till fixed
# https://github.com/NervanaSystems/private-ngraph/issues/2011
# x = ng.variable(initial_value=0.5, axes=[])
# eps = ng.uniform(x)

eps = ng.constant(0.5)  # delete after uniform works
interpolated = eps * data + (1 - eps) * generated

D3 = discriminator(interpolated)
gradient = ng.deriv(ng.sum(D3, out_axes=[]), interpolated)
grad_norm = ng.L2_norm(gradient)
gradient_penalty = ng.square(grad_norm - 1)

if args.loss_type == "WGAN-GP":
    gp = args.gp_scale * gradient_penalty
    weight_clipping = None

elif args.loss_type == "WGAN":  # standard WGAN with no gradient penalty
    gp = None
    weight_clipping = args.w_clip

if gp:
    loss_d = D1 - D2 + gp
else:
    loss_d = D1 - D2

mean_cost_d = ng.mean(loss_d, out_axes=[])
Example #10
0
def square(x, name=None):
    return ng.square(x).named(name)
Example #11
0
 def cost(y, t):
     return ng.mean(ng.square(y - t), out_axes=()) / 2.