Example #1
0
 def opt(fgraph, node):
     if (
         isinstance(node.op, GpuElemwise)
         and node.op.scalar_op == aes.add
         and node.nin == 2
     ):
         targ = find_node(fgraph, node.inputs[0], cls)
         W = node.inputs[1]
         if targ is None:
             targ = find_node(fgraph, node.inputs[1], cls)
             W = node.inputs[0]
         if targ is None:
             return None
         if W.dtype != targ.outputs[0].dtype:
             return None
         if not is_equal(targ.inputs[beta_in], 0.0):
             # other cases are too complex for now
             return None
         if W.broadcastable != targ.inputs[out_in].broadcastable:
             # Would need to explicitly tile the output to fill
             # the full shape here.  Disable for now.
             return None
         inputs = list(targ.inputs)
         inputs[out_in] = W
         dtype = inputs[beta_in].dtype
         one = aes.constant(np.asarray(1.0, dtype=dtype))
         inputs[beta_in] = one
         with inherit_stack_trace(node.outputs):
             return maker(targ, *inputs)
Example #2
0
 def opt(fgraph, node):
     if (
         isinstance(node.op, GpuElemwise)
         and node.op.scalar_op == aes.mul
         and node.nin == 2
     ):
         targ = find_node(fgraph, node.inputs[0], cls)
         if targ is None:
             targ = find_node(fgraph, node.inputs[1], cls)
             if targ is None:
                 return
             lr = grab_cpu_scalar(node.inputs[0], nd=targ.outputs[0].ndim)
         else:
             lr = grab_cpu_scalar(node.inputs[1], nd=targ.outputs[0].ndim)
         if lr is None or lr.dtype != targ.outputs[0].dtype:
             return None
         inputs = list(targ.inputs)
         try:
             c = get_scalar_constant_value(lr)
             if c == 0:
                 inputs[alpha_in] = lr
                 inputs[beta_in] = lr
             elif c == 1:
                 inputs[alpha_in] = targ.inputs[alpha_in]
                 inputs[beta_in] = targ.inputs[beta_in]
             else:
                 inputs[alpha_in] = lr * targ.inputs[alpha_in]
                 inputs[beta_in] = lr * targ.inputs[beta_in]
         except NotScalarConstantError:
             inputs[alpha_in] = lr * targ.inputs[alpha_in]
             inputs[beta_in] = lr * targ.inputs[beta_in]
         with inherit_stack_trace(node.outputs):
             return maker(targ, *inputs)
Example #3
0
def local_abstractconv_cudnn(fgraph, node):
    ctx = infer_context_name(*node.inputs)
    if not isinstance(node.inputs[0].type, GpuArrayType):
        return
    if node.op.unshared:
        return None
    if isinstance(node.op.border_mode, tuple) and any(
            isinstance(p, tuple) for p in node.op.border_mode):
        # Asymmetric padding not yet supported
        return None
    if isinstance(node.op, AbstractConv2d):
        with inherit_stack_trace(node.outputs):
            return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs,
                                                  node.outputs)
    elif isinstance(node.op, AbstractConv3d):
        with inherit_stack_trace(node.outputs):
            return local_abstractconv3d_cudnn_graph(node.op, ctx, node.inputs,
                                                    node.outputs)
Example #4
0
 def opt(fgraph, node):
     if type(node.op) != op or node.op.inplace:
         return
     inputs = list(node.inputs)
     alloc = inputs[idx]
     if (alloc.owner and isinstance(alloc.owner.op, GpuAllocEmpty)
             and len(fgraph.clients[alloc]) > 1):
         alloc_op = GpuAllocEmpty(alloc.owner.op.dtype,
                                  alloc.owner.op.context_name)
         inputs[idx] = alloc_op(*alloc.owner.inputs)
     with inherit_stack_trace(node.outputs):
         return maker(node, inputs)
Example #5
0
def local_dnn_reduction(fgraph, node):
    if not isinstance(node.op, GpuCAReduceCuda):
        return

    if not dnn_available(node.inputs[0].type.context_name):
        return

    if version(raises=False) < 6000:
        return

    if node.inputs[0].ndim > 8:
        return

    acc_dtype = node.op._acc_dtype(node.inputs[0].dtype)

    if node.inputs[0].dtype != node.outputs[0].dtype:
        # We can mix float16 and float32, but not float64.
        if node.inputs[0].dtype == "float64" or node.outputs[
                0].dtype == "float64":
            return
        if acc_dtype != "float32":
            return

    if node.inputs[0].dtype not in ("float16", "float32", "float64"):
        return

    if node.inputs[0].dtype == "float64" and acc_dtype != "float64":
        return

    if node.inputs[0].dtype == "float32" and acc_dtype != "float32":
        return

    if node.inputs[0].dtype == "float16" and acc_dtype == "float64":
        return

    def _identity(a):
        return a

    def _square(a):
        return GpuElemwise(aesara.scalar.basic.sqr)(a)

    scal = node.op.scalar_op.name
    post = _identity

    if node.op.pre_scalar_op is not None:
        if isinstance(node.op.scalar_op, aesara.scalar.basic.Add):
            if isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Sqr):
                scal = "norm2"
                post = _square
            elif isinstance(node.op.pre_scalar_op, aesara.scalar.basic.Abs):
                scal = "norm1"
            else:
                return
        elif isinstance(node.op.scalar_op,
                        aesara.scalar.basic.ScalarMaximum) and isinstance(
                            node.op.pre_scalar_op, aesara.scalar.basic.Abs):
            scal = "absmax"
        else:
            return

    if not cudnn.cudnnReduceTensorOp_t.has_alias(scal):
        return

    with inherit_stack_trace(node.outputs):
        ret = GpuDnnReduction(scal, node.op.axis, acc_dtype, node.op.dtype,
                              False)(node.inputs[0])
        return [post(ret)]