_cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, []) info['gpu_hash'] = hash(_cmodule_key) def typebuild(dtype, broadcastable, context_name=context_name): return GpuArrayType(dtype=dtype, broadcastable=broadcastable, context_name=context_name) nw_op = scan_op.Scan(scan_ins, scan_outs, info, typeConstructor=typebuild).make_node(*nw_ins) return nw_op.outputs def _scan_type_infer(node): context_name = infer_context_name(*node.inputs) def typebuild(dtype, broadcastable, context_name=context_name): return GpuArrayType(dtype=dtype, broadcastable=broadcastable, context_name=context_name) return typebuild # Do not register in fast_run or fast_compile. # It will be added to fast_run if the GPU is enabled. optdb.register( 'gpua_scanOp_make_inplace', scan_opt.ScanInplaceOptimizer(typeInfer=_scan_type_infer, gpua_flag=True), 75, 'gpuarray', 'inplace', 'scan')
b = e e = e + node.op.n_nit_sot nw_ins += node.inputs[b:e] nw_ins += [safe_to_gpu(x) for x in node.inputs[e:]] scan_ins = [tensor_to_gpu(x) for x in node.op.inputs] scan_outs = [safe_to_gpu(x) for x in node.op.outputs] scan_outs = scan_utils.clone( scan_outs, replace=zip(node.op.inputs, [safe_to_cpu(x) for x in scan_ins])) # We need to construct the hash here, because scan # __init__ does not know about the gpu and can not # handle graphs with inputs being on the gpu tmp_in, tmp_out = gpu_reconstruct_graph(scan_ins, scan_outs) local_fgraph = gof.FunctionGraph(tmp_in, tmp_out, clone=False) _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, []) info['gpu_hash'] = hash(_cmodule_key) nw_op = scan_op.Scan(scan_ins, scan_outs, info, typeConstructor=GpuArrayType).make_node(*nw_ins) return nw_op.outputs optdb.register( 'gpua_scanOp_make_inplace', scan_opt.ScanInplaceOptimizer(typeConstructor=GpuArrayType, gpua_flag=True), 75, 'gpua', 'fast_run', 'inplace', 'scan')
e = e + node.op.n_nit_sot nw_ins += node.inputs[b:e] nw_ins += [safe_to_gpu(x) for x in node.inputs[e:]] scan_ins = [tensor_to_gpu(x) for x in node.op.inputs] scan_outs = [safe_to_gpu(x) for x in node.op.outputs] scan_outs = scan_utils.clone( scan_outs, replace=zip(node.op.inputs, [safe_to_cpu(x) for x in scan_ins])) # We need to construct the hash here, because scan # __init__ does not know about the gpu and can not # handle graphs with inputs being on the gpu tmp_in, tmp_out = gpu_reconstruct_graph(scan_ins, scan_outs) local_fgraph = gof.FunctionGraph(tmp_in, tmp_out, clone=False) _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, []) info['gpu_hash'] = hash(_cmodule_key) nw_op = scan_op.Scan(scan_ins, scan_outs, info, typeConstructor=GpuArrayType).make_node(*nw_ins) return nw_op.outputs optdb.register('gpua_scanOp_make_inplace', scan_opt.ScanInplaceOptimizer(typeConstructor=GpuArrayType, gpua_flag=True), 75, 'gpua', 'fast_run', 'inplace', 'scan')
info['gpua'] = True nw_ins = [node.inputs[0]] e = (1 + node.op.n_seqs + node.op.n_mit_mot + node.op.n_mit_sot + node.op.n_sit_sot + node.op.n_shared_outs) nw_ins += [safe_to_gpu(x) for x in node.inputs[1:e]] b = e e = e + node.op.n_nit_sot nw_ins += node.inputs[b:e] nw_ins += [safe_to_gpu(x) for x in node.inputs[e:]] scan_ins = [tensor_to_gpu(x) for x in node.op.inputs] scan_outs = [safe_to_gpu(x) for x in node.op.outputs] scan_outs = scan_utils.clone( scan_outs, replace=zip(node.op.inputs, [safe_to_cpu(x) for x in scan_ins])) # We need to construct the hash here, because scan # __init__ does not know about the gpu and can not # handle graphs with inputs being on the gpu tmp_in, tmp_out = gpu_reconstruct_graph(scan_ins, scan_outs) local_fgraph = gof.FunctionGraph(tmp_in, tmp_out, clone=False) _cmodule_key = gof.CLinker().cmodule_key_(local_fgraph, []) info['gpu_hash'] = hash(_cmodule_key) nw_op = scan_op.Scan(scan_ins, scan_outs, info).make_node(*nw_ins) return nw_op.outputs optdb.register('gpua_scanOp_make_inplace', scan_opt.ScanInplaceOptimizer(gpua_flag=True), 75, 'gpua', 'fast_run', 'inplace', 'scan')