Esempio n. 1
0
    _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
    info['gpu_hash'] = hash(_cmodule_key)

    def typebuild(dtype, broadcastable, context_name=context_name):
        return GpuArrayType(dtype=dtype,
                            broadcastable=broadcastable,
                            context_name=context_name)

    nw_op = scan_op.Scan(scan_ins, scan_outs, info,
                         typeConstructor=typebuild).make_node(*nw_ins)
    return nw_op.outputs


def _scan_type_infer(node):
    context_name = infer_context_name(*node.inputs)

    def typebuild(dtype, broadcastable, context_name=context_name):
        return GpuArrayType(dtype=dtype,
                            broadcastable=broadcastable,
                            context_name=context_name)

    return typebuild


# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register(
    'gpua_scanOp_make_inplace',
    scan_opt.ScanInplaceOptimizer(typeInfer=_scan_type_infer, gpua_flag=True),
    75, 'gpuarray', 'inplace', 'scan')
Esempio n. 2
0
    b = e
    e = e + node.op.n_nit_sot
    nw_ins += node.inputs[b:e]
    nw_ins += [safe_to_gpu(x) for x in node.inputs[e:]]
    scan_ins = [tensor_to_gpu(x) for x in node.op.inputs]
    scan_outs = [safe_to_gpu(x) for x in node.op.outputs]
    scan_outs = scan_utils.clone(
        scan_outs,
        replace=zip(node.op.inputs, [safe_to_cpu(x) for x in scan_ins]))

    # We need to construct the hash here, because scan
    # __init__ does not know about the gpu and can not
    # handle graphs with inputs being on the gpu
    tmp_in, tmp_out = gpu_reconstruct_graph(scan_ins, scan_outs)
    local_fgraph = gof.FunctionGraph(tmp_in, tmp_out, clone=False)
    _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
    info['gpu_hash'] = hash(_cmodule_key)

    nw_op = scan_op.Scan(scan_ins,
                         scan_outs,
                         info,
                         typeConstructor=GpuArrayType).make_node(*nw_ins)
    return nw_op.outputs


optdb.register(
    'gpua_scanOp_make_inplace',
    scan_opt.ScanInplaceOptimizer(typeConstructor=GpuArrayType,
                                  gpua_flag=True), 75, 'gpua', 'fast_run',
    'inplace', 'scan')
Esempio n. 3
0
    e = e + node.op.n_nit_sot
    nw_ins += node.inputs[b:e]
    nw_ins += [safe_to_gpu(x) for x in node.inputs[e:]]
    scan_ins = [tensor_to_gpu(x) for x in node.op.inputs]
    scan_outs = [safe_to_gpu(x) for x in node.op.outputs]
    scan_outs = scan_utils.clone(
        scan_outs,
        replace=zip(node.op.inputs,
                    [safe_to_cpu(x) for x in scan_ins]))

    # We need to construct the hash here, because scan
    # __init__ does not know about the gpu and can not
    # handle graphs with inputs being on the gpu
    tmp_in, tmp_out = gpu_reconstruct_graph(scan_ins, scan_outs)
    local_fgraph = gof.FunctionGraph(tmp_in, tmp_out, clone=False)
    _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
    info['gpu_hash'] = hash(_cmodule_key)

    nw_op = scan_op.Scan(scan_ins, scan_outs, info,
                         typeConstructor=GpuArrayType).make_node(*nw_ins)
    return nw_op.outputs

optdb.register('gpua_scanOp_make_inplace',
               scan_opt.ScanInplaceOptimizer(typeConstructor=GpuArrayType,
                                             gpua_flag=True),
               75,
               'gpua',
               'fast_run',
               'inplace',
               'scan')
Esempio n. 4
0
    info['gpua'] = True
    nw_ins = [node.inputs[0]]
    e = (1 + node.op.n_seqs + node.op.n_mit_mot + node.op.n_mit_sot +
         node.op.n_sit_sot + node.op.n_shared_outs)
    nw_ins += [safe_to_gpu(x) for x in node.inputs[1:e]]
    b = e
    e = e + node.op.n_nit_sot
    nw_ins += node.inputs[b:e]
    nw_ins += [safe_to_gpu(x) for x in node.inputs[e:]]
    scan_ins = [tensor_to_gpu(x) for x in node.op.inputs]
    scan_outs = [safe_to_gpu(x) for x in node.op.outputs]
    scan_outs = scan_utils.clone(
        scan_outs,
        replace=zip(node.op.inputs, [safe_to_cpu(x) for x in scan_ins]))

    # We need to construct the hash here, because scan
    # __init__ does not know about the gpu and can not
    # handle graphs with inputs being on the gpu
    tmp_in, tmp_out = gpu_reconstruct_graph(scan_ins, scan_outs)
    local_fgraph = gof.FunctionGraph(tmp_in, tmp_out, clone=False)
    _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, [])
    info['gpu_hash'] = hash(_cmodule_key)

    nw_op = scan_op.Scan(scan_ins, scan_outs, info).make_node(*nw_ins)
    return nw_op.outputs


optdb.register('gpua_scanOp_make_inplace',
               scan_opt.ScanInplaceOptimizer(gpua_flag=True), 75, 'gpua',
               'fast_run', 'inplace', 'scan')