def local_abstract_batch_norm_train(fgraph, node): if not isinstance(node.op, AbstractBatchNormTrain): return None x, scale, bias, epsilon, running_average_factor = node.inputs[:5] axes = node.op.axes if min(axes) < 0 or max(axes) > x.ndim: return None if ( not isinstance(x.type, TensorType) or not isinstance(scale.type, TensorType) or not isinstance(bias.type, TensorType) or not isinstance(epsilon.type, TensorType) or not isinstance(running_average_factor.type, TensorType) ): return None # optional running_mean and running_var if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType): return None if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType): return None mean = x.mean(axes, keepdims=True) var = x.var(axes, keepdims=True) # The epsilon should not upcast the dtype. if var.dtype == "float32" and epsilon.dtype == "float64": epsilon = epsilon.astype("float32") invstd = tt.inv(tt.sqrt(var + epsilon)) out = (x - mean) * (scale * invstd) + bias results = [out, mean, invstd] if len(node.inputs) > 5: running_mean = node.inputs[5] running_mean = ( running_mean * (1.0 - running_average_factor) + mean * running_average_factor ) results.append(running_mean) if len(node.inputs) > 6: m = tt.cast(tt.prod(x.shape) / tt.prod(scale.shape), theano.config.floatX) running_var = node.inputs[6] running_var = ( running_var * (1.0 - running_average_factor) + (m / (m - 1)) * var * running_average_factor ) results.append(running_var) results = [ tt.patternbroadcast(r, r_orig.broadcastable) for (r, r_orig) in zip(results, node.outputs) ] for var in theano.gof.graph.variables(node.inputs, results): if var not in node.inputs: copy_stack_trace(node.outputs[0], var) return results
def local_abstract_batch_norm_train(node): if not isinstance(node.op, AbstractBatchNormTrain): return None x, scale, bias, epsilon, running_average_factor = node.inputs[:5] axes = node.op.axes if min(axes) < 0 or max(axes) > x.ndim: return None if not isinstance(x.type, TensorType) or \ not isinstance(scale.type, TensorType) or \ not isinstance(bias.type, TensorType) or \ not isinstance(epsilon.type, TensorType) or \ not isinstance(running_average_factor.type, TensorType): return None # optional running_mean and running_var if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType): return None if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType): return None mean = x.mean(axes, keepdims=True) var = x.var(axes, keepdims=True) # The epsilon should not upcast the dtype. if var.dtype == 'float32' and epsilon.dtype == 'float64': epsilon = epsilon.astype('float32') invstd = T.inv(T.sqrt(var + epsilon)) out = (x - mean) * (scale * invstd) + bias results = [out, mean, invstd] if len(node.inputs) > 5: running_mean = node.inputs[5] running_mean = running_mean * (1.0 - running_average_factor) + \ mean * running_average_factor results.append(running_mean) if len(node.inputs) > 6: m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX) running_var = node.inputs[6] running_var = running_var * (1.0 - running_average_factor) + \ (m / (m - 1)) * var * running_average_factor results.append(running_var) results = [T.patternbroadcast(r, r_orig.broadcastable) for (r, r_orig) in zip(results, node.outputs)] for var in theano.gof.graph.variables(node.inputs, results): if var not in node.inputs: copy_stack_trace(node.outputs[0], var) return results
def infer_shape(self, fgraph, node, i0_shapes): ret = fgraph.shape_feature.default_infer_shape(fgraph, node, i0_shapes) if self.axis is not None: self_axis = self.axis ndim = len(i0_shapes[0]) if self_axis < 0: self_axis += ndim if self_axis < 0 or self_axis >= ndim: raise RuntimeError( f"Unique axis `{self.axis}` is outside of input ndim = {ndim}." ) ret[0] = tuple([ fgraph.shape_feature.shape_ir(i, node.outputs[0]) for i in range(ndim) ]) if self.return_inverse: if self.axis is None: shape = (basic.prod(i0_shapes[0]), ) else: shape = (i0_shapes[0][self_axis], ) if self.return_index: ret[2] = shape return ret ret[1] = shape return ret return ret
def infer_shape(self, node, i0_shapes): ret = node.fgraph.shape_feature.default_infer_shape(node, i0_shapes) if self.axis is not None: self_axis = self.axis ndim = len(i0_shapes[0]) if self_axis < 0: self_axis += ndim if self_axis < 0 or self_axis >= ndim: raise RuntimeError( "Unique axis `{}` is outside of input ndim = " "{}.".format(self.axis, ndim) ) ret[0] = tuple([node.fgraph.shape_feature.shape_ir(i, node.outputs[0]) for i in xrange(ndim)]) if self.return_inverse: if self.axis is None: shape = (basic.prod(i0_shapes[0]), ) else: shape = (i0_shapes[0][self_axis], ) if self.return_index: ret[2] = shape return ret ret[1] = shape return ret return ret
def infer_shape(self, node, i0_shapes): ret = node.fgraph.shape_feature.default_infer_shape(node, i0_shapes) if self.return_inverse: shape = (basic.prod(i0_shapes[0]), ) if self.return_index: ret[2] = shape return ret ret[1] = shape return ret return ret
def infer_shape(self, fgraph, node, shapes): if self.axis is None: return [(basic.prod(shapes[0]), )] # Flatten return shapes