def opt(fgraph, node): if ( isinstance(node.op, GpuElemwise) and node.op.scalar_op == aes.add and node.nin == 2 ): targ = find_node(fgraph, node.inputs[0], cls) W = node.inputs[1] if targ is None: targ = find_node(fgraph, node.inputs[1], cls) W = node.inputs[0] if targ is None: return None if W.dtype != targ.outputs[0].dtype: return None if not is_equal(targ.inputs[beta_in], 0.0): # other cases are too complex for now return None if W.broadcastable != targ.inputs[out_in].broadcastable: # Would need to explicitly tile the output to fill # the full shape here. Disable for now. return None inputs = list(targ.inputs) inputs[out_in] = W dtype = inputs[beta_in].dtype one = aes.constant(np.asarray(1.0, dtype=dtype)) inputs[beta_in] = one with inherit_stack_trace(node.outputs): return maker(targ, *inputs)
def opt(fgraph, node): if ( isinstance(node.op, GpuElemwise) and node.op.scalar_op == aes.mul and node.nin == 2 ): targ = find_node(fgraph, node.inputs[0], cls) if targ is None: targ = find_node(fgraph, node.inputs[1], cls) if targ is None: return lr = grab_cpu_scalar(node.inputs[0], nd=targ.outputs[0].ndim) else: lr = grab_cpu_scalar(node.inputs[1], nd=targ.outputs[0].ndim) if lr is None or lr.dtype != targ.outputs[0].dtype: return None inputs = list(targ.inputs) try: c = get_scalar_constant_value(lr) if c == 0: inputs[alpha_in] = lr inputs[beta_in] = lr elif c == 1: inputs[alpha_in] = targ.inputs[alpha_in] inputs[beta_in] = targ.inputs[beta_in] else: inputs[alpha_in] = lr * targ.inputs[alpha_in] inputs[beta_in] = lr * targ.inputs[beta_in] except NotScalarConstantError: inputs[alpha_in] = lr * targ.inputs[alpha_in] inputs[beta_in] = lr * targ.inputs[beta_in] with inherit_stack_trace(node.outputs): return maker(targ, *inputs)
def local_abstractconv_cudnn(fgraph, node): ctx = infer_context_name(*node.inputs) if not isinstance(node.inputs[0].type, GpuArrayType): return if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None if isinstance(node.op, AbstractConv2d): with inherit_stack_trace(node.outputs): return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) elif isinstance(node.op, AbstractConv3d): with inherit_stack_trace(node.outputs): return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
def opt(fgraph, node): if type(node.op) != op or node.op.inplace: return inputs = list(node.inputs) alloc = inputs[idx] if (alloc.owner and isinstance(alloc.owner.op, GpuAllocEmpty) and len(fgraph.clients[alloc]) > 1): alloc_op = GpuAllocEmpty(alloc.owner.op.dtype, alloc.owner.op.context_name) inputs[idx] = alloc_op(*alloc.owner.inputs) with inherit_stack_trace(node.outputs): return maker(node, inputs)
def local_dnn_reduction(fgraph, node): if not isinstance(node.op, GpuCAReduceCuda): return if not dnn_available(node.inputs[0].type.context_name): return if version(raises=False) < 6000: return if node.inputs[0].ndim > 8: return acc_dtype = node.op._acc_dtype(node.inputs[0].dtype) if node.inputs[0].dtype != node.outputs[0].dtype: # We can mix float16 and float32, but not float64. if node.inputs[0].dtype == "float64" or node.outputs[ 0].dtype == "float64": return if acc_dtype != "float32": return if node.inputs[0].dtype not in ("float16", "float32", "float64"): return if node.inputs[0].dtype == "float64" and acc_dtype != "float64": return if node.inputs[0].dtype == "float32" and acc_dtype != "float32": return if node.inputs[0].dtype == "float16" and acc_dtype == "float64": return def _identity(a): return a def _square(a): return GpuElemwise(aesara.scalar.basic.sqr)(a) scal = node.op.scalar_op.name post = _identity if node.op.pre_scalar_op is not None: if isinstance(node.op.scalar_op, aesara.scalar.basic.Add): if isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Sqr): scal = "norm2" post = _square elif isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Abs): scal = "norm1" else: return elif isinstance(node.op.scalar_op, aesara.scalar.basic.ScalarMaximum) and isinstance( node.op.pre_scalar_op, aesara.scalar.basic.Abs): scal = "absmax" else: return if not cudnn.cudnnReduceTensorOp_t.has_alias(scal): return with inherit_stack_trace(node.outputs): ret = GpuDnnReduction(scal, node.op.axis, acc_dtype, node.op.dtype, False)(node.inputs[0]) return [post(ret)]