def __call__(self, cost_func): all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) epsilon, decay = (self.epsilon, self.decay_rate) for i, (variable, grad) in enumerate(zip(batch_cost.variables(), grads)): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) all_updates.append( ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign( variable, variable - ((scale_factor * grad * self.lrate) / (ng.sqrt(state + epsilon) + epsilon))) ])) return ng.doall(all_updates)
def variable_update(self, variable, grad, scale_factor): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=grad.axes, initial_value=0.) updates = ng.sequential([ ng.assign(state, state + ng.square(grad)), ng.assign( variable, variable - (scale_factor * self.lrate * grad) / (ng.sqrt(state + self.epsilon))) ]) return updates
def variable_update(self, variable, grad, scale_factor): epsilon, decay = (self.epsilon, self.decay_rate) grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) updates = ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign(variable, variable - ((scale_factor * grad * self.lrate) / (ng.sqrt(state + epsilon) + epsilon))) ]) return updates
def variable_update(self, variable, grad, scale_factor): epsilon, decay = (self.epsilon, self.decay_rate) grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=1.) velocity = ng.persistent_tensor( axes=variable.axes, initial_value=0.).named(variable.name + '_vel') updates = ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign( velocity, velocity * self.momentum + (self.lrate * scale_factor * grad / ng.sqrt(state + epsilon)) + self.lrate * self.wdecay * variable), ng.assign(variable, variable - velocity) ]) return updates
def Square(self, tf_node, inputs): """ Performs the x^2 on the each element of input. Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: input """ return ng.square(inputs[0]).named(tf_node.name)
def __call__(self, cost_func): with ng.Op.saved_user_deps(): state_updates, param_updates = [], [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm( grads) if self.gradient_clip_norm else 1 epsilon, decay = (self.epsilon, self.decay_rate) for i, (variable, grad) in enumerate(zip(batch_cost.variables(), grads)): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) state_updates.append( ng.assign(lvalue=state, rvalue=decay * state + (1.0 - decay) * ng.square(grad)).named( 'state_u_%s' % i)) param_updates.append( ng.assign( lvalue=variable, rvalue=variable - ((scale_factor * grad * self.learning_rate) / (ng.sqrt(state + epsilon) + epsilon)), ).named('var_u_%s' % i)) lr_update = [ ng.assign( self.learning_rate, self.schedule.get_learning_rate(self.learning_rate, self.iteration_index)) ] updates = ng.doall(state_updates + param_updates + lr_update) self.iteration_index += 1 return updates
def __call__(self, in_obj, channel_axes="C", spatial_axes=("D", "H", "W"), **kwargs): """ Arguments: in_obj (Op): Input op channel_axes (str): name of the expected channel axis type - defaults to "C" spatial_axes (tuple): names of expected depth, height and width axis types - defaults to "D", "H", and "W" """ if isinstance(spatial_axes, dict): spatial_axes = tuple( spatial_axes.get(name, name) for name in ("D", "H", "W")) elif isinstance(spatial_axes, tuple): if len(spatial_axes) < 3: raise ValueError( "spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))") spatial_axes = tuple( name if name else default for name, default in zip(spatial_axes, ("D", "H", "W"))) orig_axes = in_obj.axes in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes) channel_axes = in_obj.axes.get_by_names(channel_axes) spatial_axes = in_obj.axes.get_by_names(*spatial_axes) filter_axes = self._filter_axes(channel_axes, spatial_axes) # mark 'K' as a shadow axis for the initializers. axes_map = shadow_axes_map(filter_axes.find_by_name('K')) filter_axes = ng.make_axes([ axis if axis.name != 'K' else list(axes_map.keys())[0] for axis in filter_axes ]) if not self.initialized: if not self.weight_norm: self.W = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("W") else: self.v = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("v") out_axes = ng.make_axes( [filter_axes.get_by_names("K__NG_SHADOW")]) v_norm = ng.mean(ng.square(self.v), out_axes=out_axes) self.g = ng.variable(axes=out_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("g") self.W = self.g * self.v * ng.reciprocal( ng.sqrt(v_norm + 1e-3)) else: if filter_axes != self.W.axes: raise ValueError( ("{layer_name} layer has already been initialized with an " "input object which has resulted in filter axes: " "{existing_filter_axes}. This new input object has axes: " "{input_axes}, which implies the need for filter axes: " "{new_filter_axes} which are different than the existing " "filter axes.").format( layer_name=self.name, existing_filter_axes=self.W.axes, input_axes=in_obj.axes, new_filter_axes=filter_axes, )) output = ng.map_roles( self._conv_op(in_obj, channel_axes, spatial_axes), axes_map) # Reorder the output to match the input order output_axis_order = ng.make_axes( [output.axes.find_by_name(ax.name)[0] for ax in orig_axes]) # Remove introduced axes. If their length is > 1, then perhaps they should be kept slices = [ 0 if (ax not in orig_axes) and ax.length == 1 else slice(None) for ax in output.axes ] output = ng.tensor_slice(output, slices) # New axes with length > 1 may have been introduced. Add them to the end. output_axis_order = output_axis_order | output.axes return ng.axes_with_order(output, output_axis_order)
def __init__(self): self.ng_computation = lambda Y, T: ng.mean(ng.square(Y - T), out_axes=()) / 2.
D1 = discriminator(data) D2 = discriminator(generated) # TODO # Original Implementation with epsilon - wait till fixed # https://github.com/NervanaSystems/private-ngraph/issues/2011 # x = ng.variable(initial_value=0.5, axes=[]) # eps = ng.uniform(x) eps = ng.constant(0.5) # delete after uniform works interpolated = eps * data + (1 - eps) * generated D3 = discriminator(interpolated) gradient = ng.deriv(ng.sum(D3, out_axes=[]), interpolated) grad_norm = ng.L2_norm(gradient) gradient_penalty = ng.square(grad_norm - 1) if args.loss_type == "WGAN-GP": gp = args.gp_scale * gradient_penalty weight_clipping = None elif args.loss_type == "WGAN": # standard WGAN with no gradient penalty gp = None weight_clipping = args.w_clip if gp: loss_d = D1 - D2 + gp else: loss_d = D1 - D2 mean_cost_d = ng.mean(loss_d, out_axes=[])
def square(x, name=None): return ng.square(x).named(name)
def cost(y, t): return ng.mean(ng.square(y - t), out_axes=()) / 2.