def create_one_copy(dst_type, all_types): copy_body = [] for src_type in all_types: if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue cuda = '' state = [] if src_type['Backend'] == 'CUDA' or dst_type['Backend'] == 'CUDA': state.append('context->getTHCState()') if src_type['Backend'] == 'CUDA': if dst_type['Backend'] == 'CUDA': cuda = 'Cuda' else: # don't attempt to process CPU-CUDA; this is handled in the # redispatch continue body_env = nested_dict({ 'src_scalar_name': src_type['ScalarName'], 'case_id': src_type['TypeID'], 'src_tensor': src_type['Tensor'], 'dst_tensor': dst_type['Tensor'], 'cuda': cuda, 'state': state, }, dst_type) copies = [] if dst_type['ScalarType'] == src_type['ScalarType']: if dst_type['Backend'] == 'CUDA' and src_type['Backend'] == 'CPU': copies.append(COPY_ASYNC_CPU.substitute(body_env)) copies.append(COPY.substitute(body_env)) copy_body.append(CASE.substitute(body_env, copies=copies)) if dst_type['Backend'] == 'CPU': # CPU fallthrough needs to redispatch to _s_copy_from # (Backend == CPU implies Dense) assert dst_type['Density'] == 'Dense' function_fallthrough = FUNCTION_FALLTHROUGH_REDISPATCH else: function_fallthrough = FUNCTION_FALLTHROUGH_ERROR env = nested_dict({ 'function_fallthrough': function_fallthrough }, dst_type) return FUNCTION.substitute(env, copy_body=copy_body)
def create_one(env, all_types): copy_body = [] for src_type in all_types: if env['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue state = [] cuda = '' if src_type['Backend'] == 'CUDA': cuda = 'Cuda' if env['Backend'] == 'CUDA' or src_type['Backend'] == 'CUDA': state.append('context->thc_state') combined = nested_dict( { 'src_scalar_name': src_type['ScalarName'], 'src_id': src_type['TypeID'], 'src_tensor': src_type['Tensor'], 'cuda': cuda, 'state': state, }, env) copies = [] if env['ScalarType'] == src_type['ScalarType']: if env['Backend'] == 'CUDA' and src_type['Backend'] == 'CPU': copies.append(COPY_ASYNC_CPU.substitute(combined)) if env['Backend'] == 'CPU' and src_type['Backend'] == 'CUDA': copies.append(COPY_ASYNC_CUDA.substitute(combined)) copies.append(COPY.substitute(combined)) copy_body.append(CASE.substitute(combined, copies=copies)) return FUNCTION.substitute(env, copy_body=copy_body)
def create_one(env, all_types): copy_body = [] for src_type in all_types: if env['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue state = [] cuda = '' if src_type['Backend'] == 'CUDA': cuda = 'Cuda' if env['Backend'] == 'CUDA' or src_type['Backend'] == 'CUDA': state.append('context->thc_state') combined = nested_dict({ 'src_scalar_name': src_type['ScalarName'], 'src_id': src_type['TypeID'], 'src_tensor': src_type['Tensor'], 'cuda': cuda, 'state': state, }, env) copies = [] if env['ScalarType'] == src_type['ScalarType']: if env['Backend'] == 'CUDA' and src_type['Backend'] == 'CPU': copies.append(COPY_ASYNC_CPU.substitute(combined)) if env['Backend'] == 'CPU' and src_type['Backend'] == 'CUDA': copies.append(COPY_ASYNC_CUDA.substitute(combined)) copies.append(COPY.substitute(combined)) copy_body.append(CASE.substitute(combined, copies=copies)) return FUNCTION.substitute(env, copy_body=copy_body)
def create_one_copy_from(src_type, all_types): if src_type['DenseBackend'] == 'CPU': return FUNCTION_FROM_SWAP.substitute(src_type) copy_body = [] for dst_type in all_types: if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue cuda = '' state = [] if src_type['Backend'] == 'CUDA': cuda = 'Cuda' if dst_type['Backend'] == 'CUDA' or src_type['Backend'] == 'CUDA': state.append('globalContext().getTHCState()') body_env = nested_dict( { 'src_scalar_name': src_type['ScalarName'], 'case_id': dst_type['TypeID'], 'src_tensor': src_type['Tensor'], 'dst_tensor': dst_type['Tensor'], 'cuda': cuda, 'state': state, }, dst_type) copies = [] if dst_type['ScalarType'] == src_type['ScalarType']: # NB: Technically, we have already short-circuited the # src_type['Backend'] == 'CUDA' case at the beginning of this # function if dst_type['Backend'] == 'CPU' and src_type['Backend'] == 'CUDA': copies.append(COPY_ASYNC_CUDA.substitute(body_env)) if dst_type['Backend'] == 'CPU' and src_type['Backend'] == 'CPU': copies.append(COPY_CPU.substitute()) else: copies.append(COPY.substitute(body_env)) copy_body.append(CASE.substitute(body_env, copies=copies)) # See Note [checked_cast_tensor is for dense only] checked_cast_src = '' if src_type['Density'] != 'Sparse': checked_cast_src = \ 'checked_tensor_unwrap(src, "src", 0, false, Backend::{}, ScalarType::{});' \ .format(src_type['Backend'], src_type['ScalarName']) return FUNCTION_FROM.substitute(src_type, copy_body=copy_body, checked_cast_src=checked_cast_src)
def create_one_copy_from(src_type, all_types): if src_type['DenseBackend'] == 'CPU': return FUNCTION_FROM_SWAP.substitute(src_type) copy_body = [] for dst_type in all_types: if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue cuda = '' state = [] if src_type['Backend'] == 'CUDA': cuda = 'Cuda' if dst_type['Backend'] == 'CUDA' or src_type['Backend'] == 'CUDA': state.append('context->getTHCState()') body_env = nested_dict( { 'src_scalar_name': src_type['ScalarName'], 'case_id': dst_type['TypeID'], 'src_tensor': src_type['Tensor'], 'dst_tensor': dst_type['Tensor'], 'cuda': cuda, 'state': state, }, dst_type) copies = [] if dst_type['ScalarType'] == src_type['ScalarType']: # NB: Technically, we have already short-circuited the # src_type['Backend'] == 'CUDA' case at the beginning of this # function if dst_type['Backend'] == 'CPU' and src_type['Backend'] == 'CUDA': copies.append(COPY_ASYNC_CUDA.substitute(body_env)) copies.append(COPY.substitute(body_env)) copy_body.append(CASE.substitute(body_env, copies=copies)) return FUNCTION_FROM.substitute(src_type, copy_body=copy_body)
def create_one_copy_from(src_type, all_types): if src_type['DenseBackend'] == 'CPU': return FUNCTION_FROM_SWAP.substitute(src_type) copy_body = [] for dst_type in all_types: if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue cuda = '' state = [] if src_type['Backend'] == 'CUDA': cuda = 'Cuda' if dst_type['Backend'] == 'CUDA' or src_type['Backend'] == 'CUDA': state.append('context->getTHCState()') body_env = nested_dict({ 'src_scalar_name': src_type['ScalarName'], 'case_id': dst_type['TypeID'], 'src_tensor': src_type['Tensor'], 'dst_tensor': dst_type['Tensor'], 'cuda': cuda, 'state': state, }, dst_type) copies = [] if dst_type['ScalarType'] == src_type['ScalarType']: # NB: Technically, we have already short-circuited the # src_type['Backend'] == 'CUDA' case at the beginning of this # function if dst_type['Backend'] == 'CPU' and src_type['Backend'] == 'CUDA': copies.append(COPY_ASYNC_CUDA.substitute(body_env)) copies.append(COPY.substitute(body_env)) copy_body.append(CASE.substitute(body_env, copies=copies)) return FUNCTION_FROM.substitute(src_type, copy_body=copy_body)
def create_one_copy(dst_type, all_types): copy_body = [] for src_type in all_types: if dst_type['Density'] == 'Sparse' or src_type['Density'] == 'Sparse': # skip sparse copies, which are not yet implemented continue cuda = '' state = [] if src_type['Backend'] == 'CUDA' or dst_type['Backend'] == 'CUDA': state.append('globalContext().getTHCState()') if src_type['Backend'] == 'CUDA': if dst_type['Backend'] == 'CUDA': cuda = 'Cuda' else: # don't attempt to process CPU-CUDA; this is handled in the # redispatch continue body_env = nested_dict( { 'src_scalar_name': src_type['ScalarName'], 'case_id': src_type['TypeID'], 'src_tensor': src_type['Tensor'], 'dst_tensor': dst_type['Tensor'], 'cuda': cuda, 'state': state, }, dst_type) copies = [] if dst_type['ScalarType'] == src_type['ScalarType']: if dst_type['Backend'] == 'CUDA' and src_type['Backend'] == 'CPU': copies.append(COPY_ASYNC_CPU.substitute(body_env)) copies.append(COPY.substitute(body_env)) copy_body.append(CASE.substitute(body_env, copies=copies)) if dst_type['Backend'] == 'CPU': # CPU fallthrough needs to redispatch to _s_copy_from # (Backend == CPU implies Dense) assert dst_type['Density'] == 'Dense' function_fallthrough = FUNCTION_FALLTHROUGH_REDISPATCH else: function_fallthrough = FUNCTION_FALLTHROUGH_ERROR # Note [checked_cast_tensor is for dense only] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # checked_cast_tensor is only needed for backends which implement # copy and thus do a cast. Sparse does not support copies, so there # is no need to do a checked cast. (Furthermore, the code as written # will not work, as it will try to there is no derived Tensor type # for sparse.) checked_cast_dst = '' if dst_type['Density'] == 'Dense': checked_cast_dst = \ 'checked_tensor_unwrap(dst, "dst", 0, false, Backend::{}, ScalarType::{});' \ .format(dst_type['Backend'], dst_type['ScalarName']) env = nested_dict( { 'function_fallthrough': function_fallthrough, 'checked_cast_dst': checked_cast_dst, }, dst_type) return FUNCTION.substitute(env, copy_body=copy_body)