def update(self) -> bool: base = self.base # type: Concat y = base.outputs["y"] workspace = base.inputs["workspace"] flag_changed = False if y.order != workspace.order: flag_changed = True workspace.change_order(base.outputs["y"].order) if TextureShape.get(y) != TextureShape.get(workspace): flag_changed = True width, height = TextureShape.get(y) TextureShape.set(workspace, height, width) return flag_changed
def optimize(self, graph: Graph): traverse.dump(graph) MAX_SIZE = config.WEBGL_MAX_TEXTURE_SIZE for v in traverse.listup_variables(graph): height, width = TextureShape.get(v) assert height <= MAX_SIZE and width <= MAX_SIZE, f"Texture size is invalid: {v.name} \n" \ f" (variable shape)={v.shape}, \n" \ f" (channel mode)={ChannelMode.get(v).name}, \n" \ f" (texture shape)=(width={width}, height={height}), \n" \ f" (WEBGL_MAX_TEXTURE_SIZE)={config.WEBGL_MAX_TEXTURE_SIZE}" return graph, False
def _replace_input(op: Operator, var_name: str, target: ChannelModeEnum): """ before) v -{op}- after) v -{conversion}- v' -{op}- """ v = op.inputs[var_name] if ChannelMode.get(v) == target: return False if target == ChannelModeEnum.RGBA: v_new = convert_r_to_rgba(v) else: v_new = convert_rgba_to_r(v) TextureShape.set(v_new, height=TextureShape.get(v)[0], width=TextureShape.get(v)[1]) op.replace_input(v, v_new) return True
def optimize(self, graph: Graph): MAX_TEXTURE_SIZE = config.WEBGL_MAX_TEXTURE_SIZE flag_changed = False for v in traverse.listup_variables(graph): height, width = TextureShape.get(v) if height <= MAX_TEXTURE_SIZE and width <= MAX_TEXTURE_SIZE: continue if not v.has_attribute(SplitTarget): flag_changed = True v.attributes.add(SplitTarget(v)) return graph, flag_changed
def optimize(self, graph: Graph): traverse.dump(graph) MAX_SIZE = config.WEBGL_MAX_TEXTURE_SIZE for v in traverse.listup_variables(graph): if not Placeholder.check_resolved(v.size): continue height, width = TextureShape.get(v) assert height <= MAX_SIZE and width <= MAX_SIZE, f""" [SplitTexture] Texture size is invalid: {v.name} (variable shape)={v.shape} (channel mode)={ChannelMode.get(v).name} (texture shape)=(width={width}, height={height}) (WEBGL_MAX_TEXTURE_SIZE)={config.WEBGL_MAX_TEXTURE_SIZE}""" return graph, False
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Tensordot): # type: Tensordot A = op.inputs["A"] B = op.inputs["B"] axes = op.axes K = mul(A.shape_dict[a] for a in axes[0]) M = A.size // K N = B.size // K if all([self.optimize_channel_mode, K % 4 == 0]): if ChannelMode.get(A) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(A, ChannelModeEnum.RGBA) if ChannelMode.get(B) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(B, ChannelModeEnum.RGBA) texture_shape_A = [M, K // 4] texture_shape_B = [N, K // 4] else: if ChannelMode.get(A) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(A, ChannelModeEnum.R) if ChannelMode.get(B) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(B, ChannelModeEnum.R) texture_shape_A = [M, K] texture_shape_B = [N, K] if TextureShape.get(A) != texture_shape_A: flag_changed = True TextureShape.set(A, height=texture_shape_A[0], width=texture_shape_A[1]) if TextureShape.get(B) != texture_shape_B: flag_changed = True TextureShape.set(B, height=texture_shape_B[0], width=texture_shape_B[1]) return graph, flag_changed
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Tensordot): A = op.inputs["A"] B = op.inputs["B"] axes = op.axes K = mul(A.shape_dict[a] for a in axes[0]) M = A.size // K N = B.size // K if K % 4 == 0: if ChannelMode.get(A) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(A, ChannelModeEnum.RGBA) if ChannelMode.get(B) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(B, ChannelModeEnum.RGBA) else: if ChannelMode.get(A) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(A, ChannelModeEnum.R) if ChannelMode.get(B) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(B, ChannelModeEnum.R) if TextureShape.get(A) != (M, K): flag_changed = True TextureShape.set(A, height=M, width=K) if TextureShape.get(B) != (N, K): flag_changed = True TextureShape.set(B, height=N, width=K) return graph, flag_changed
def _get_allocations(graph: Graph, operators: List[Operator], variables: List[Variable]) -> WebGLAllocationDict: T_LAST = len(operators) allocations = {} # type: WebGLAllocationDict retain_count = {v: 0 for v in variables} # type: Dict[Variable, int] allocated = set() # type: Set[Variable] for v in traverse.filter_nodes(variables, ConstantVariable): # type: ConstantVariable # Constant variable cannot be released height, width = TextureShape.get(v) width = (width + ChannelMode.elements_per_pixel(v) - 1) // ChannelMode.elements_per_pixel(v) allocations[v] = WebGLAllocation(width=width, height=height, channel_mode=ChannelMode.get(v), begin=0, end=T_LAST, name=v.name) allocated.add(v) for v in graph.inputs: # Input variable cannot be released height, width = TextureShape.get(v) width = (width + ChannelMode.elements_per_pixel(v) - 1) // ChannelMode.elements_per_pixel(v) allocations[v] = WebGLAllocation(width=width, height=height, channel_mode=ChannelMode.get(v), begin=0, end=T_LAST, name=v.name) allocated.add(v) for v in graph.outputs: # Output variable cannot be released, but it's not needed to be allocated from the begin height, width = TextureShape.get(v) width = (width + ChannelMode.elements_per_pixel(v) - 1) // ChannelMode.elements_per_pixel(v) allocations[v] = WebGLAllocation(width=width, height=height, channel_mode=ChannelMode.get(v), begin=_T_UNKNOWN, end=T_LAST, name=v.name) allocated.add(v) for t, op in enumerate(operators): for v in op.outputs.values(): if v in allocated: # Allocation object is already created (output variable, etc.) if allocations[v].begin == _T_UNKNOWN: allocations[v].begin = t else: # Create new allocation object height, width = TextureShape.get(v) width = (width + ChannelMode.elements_per_pixel(v) - 1) // ChannelMode.elements_per_pixel(v) allocations[v] = WebGLAllocation(width=width, height=height, channel_mode=ChannelMode.get(v), begin=t, end=_T_UNKNOWN, name=v.name) retain_count[v] = len(v.input_to) allocated.add(v) for v in op.inputs.values(): if v not in allocated: # Allocate height, width = TextureShape.get(v) width = (width + ChannelMode.elements_per_pixel(v) - 1) // ChannelMode.elements_per_pixel(v) allocations[v] = WebGLAllocation(width=width, height=height, channel_mode=ChannelMode.get(v), begin=t, end=_T_UNKNOWN, name=v.name) retain_count[v] = len(v.input_to) allocated.add(v) if allocations[v].end != _T_UNKNOWN: # Release timing is already determined (input, output, or constant variable). continue # Release input variable retain_count[v] -= 1 if retain_count[v] == 0: # `t + 1` means that `v` will be released *AFTER* `op` will be finished. allocations[v].end = t + 1 return allocations
def _choose_split_axis(v: Variable) -> Axis: """ For too-large texture `v`, choose one axis which is the best one to reduce texture size by splitting `v` in that axis. Args: v: Variable, whose size is too large (= this variable has :code:`SplitTarget` attribute) Returns: axis """ ops = list(v.input_to) if v.output_from is not None: ops += [v.output_from] splittable_axes = list(v.order.axes) for op in ops: _op_splittable_axes = _listup_splittable_axis( v, op) + [attr.axis for attr in op.get_attribute(Tensorwise)] for a in list(splittable_axes): if a not in _op_splittable_axes: splittable_axes.remove(a) if len(splittable_axes) == 0: raise ValueError("No axis is splittable") # Calculate the size of a side of texture which will be changed when each axis is split # # ex) OrderNC, N=512, C=2048, texture(width=2048, height=512) # => If axis `N` is split, then height will be changed => N: 512 (=height) # If axis `C` is split, then width will be changed => C: 2048 (=width) # # ex) OrderNCHW, N=1, C=512, H=13, W=13, texture(width=2048, height=43) # => TexW == W*H*(partial of C) texture width consists of axis W, H and C. # TexH == (partial of C)*N texture height consists of axis C and N. # => N cannot be split => N: -1 # C is related both width and height. In this case, use large one. => C: 2048 # H is included in width => H: 2048 # W is also included in width => W: 2048 axis_corresponding_texture_size = AxisKeyDict() element_per_pixel = ChannelMode.elements_per_pixel(v) tex_h, tex_w = TextureShape.get(v) tex_w = (tex_w + element_per_pixel - 1) // element_per_pixel for a in v.order.axes: if v.shape_dict[a] == 1: # This axis cannot be split axis_corresponding_texture_size[a] = -1 elif v.stride_dict[a] >= tex_w * element_per_pixel: axis_corresponding_texture_size[a] = tex_h elif v.stride_dict[a] * v.shape_dict[a] >= tex_w * element_per_pixel: axis_corresponding_texture_size[a] = max(tex_h, tex_w) else: axis_corresponding_texture_size[a] = tex_w splittable_axes.sort(key=lambda a: axis_corresponding_texture_size[a], reverse=True) target_axis = splittable_axes[0] console.debug( f"===========================================================================" ) console.debug(f"{v}") console.debug(f" original order: {v.order}") console.debug(f" original shape: {v.shape}") console.debug(f" texture shape: {TextureShape.get(v)}") console.debug(f"") console.debug(f" splittable axis: {splittable_axes}") console.debug(f" split axis: {target_axis}") console.debug(f"") console.debug(f" related operators:") for related_op in ops: console.debug( f"---------------------------------------------------------------------------" ) traverse.dump_op(related_op) console.debug(f"") if axis_corresponding_texture_size[target_axis] <= 0: raise NotImplementedError( f"Variable is too large to handle in WebGL backend: {v}") return target_axis
def texture_shape(v: Variable): height, width = TextureShape.get(v) elements_per_pixel = ChannelMode.elements_per_pixel(v) width = (width + elements_per_pixel - 1) // elements_per_pixel return height, width, elements_per_pixel
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for sgemm in traverse.filter_nodes(traverse.listup_operators(graph), Sgemm): # type: Sgemm A = sgemm.inputs["A"] B = sgemm.inputs["B"] M = sgemm.M N = sgemm.N K = sgemm.K transpose_A = sgemm.transpose_A transpose_B = sgemm.transpose_B if all([ self.optimize_channel_mode, K % 4 == 0, isinstance(A, ConstantVariable) or transpose_A == True, isinstance(B, ConstantVariable) or transpose_B == False ]): if transpose_A != True: assert isinstance(A, ConstantVariable) flag_changed = True old_A = A A = ConstantVariable( A.data.reshape([K, M]).transpose(), Order([Axis(None), Axis(None)])) ChannelMode.set(A, ChannelMode.get(old_A)) sgemm.replace_input(old_A, A, with_assert=False) sgemm.parameters["transpose_A"] = transpose_A = True if transpose_B != False: assert isinstance(B, ConstantVariable) flag_changed = True old_B = B B = ConstantVariable( B.data.reshape([K, N]).transpose(), Order([Axis(None), Axis(None)])) ChannelMode.set(B, ChannelMode.get(old_B)) sgemm.replace_input(old_B, B, with_assert=False) sgemm.parameters["transpose_B"] = transpose_B = False if ChannelMode.get(A) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(A, ChannelModeEnum.RGBA) if ChannelMode.get(B) != ChannelModeEnum.RGBA: flag_changed = True ChannelMode.set(B, ChannelModeEnum.RGBA) texture_shape_A = [M, K // 4] if transpose_A else [K // 4, M] texture_shape_B = [K // 4, N] if transpose_B else [N, K // 4] else: if ChannelMode.get(A) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(A, ChannelModeEnum.R) if ChannelMode.get(B) != ChannelModeEnum.R: flag_changed = True ChannelMode.set(B, ChannelModeEnum.R) texture_shape_A = [M, K] if transpose_A else [K, M] texture_shape_B = [K, N] if transpose_B else [N, K] if TextureShape.get(A) != texture_shape_A: flag_changed = True TextureShape.set(A, height=texture_shape_A[0], width=texture_shape_A[1]) if TextureShape.get(B) != texture_shape_B: flag_changed = True TextureShape.set(B, height=texture_shape_B[0], width=texture_shape_B[1]) if flag_changed: graph, _ = ConstantFolding().optimize(graph) return graph, flag_changed
def texture_shape(v: Variable): height, width = TextureShape.get(v) return [width, height]
def optimize(self, graph: Graph) -> Tuple[Graph, bool]: flag_changed = False for op in traverse.filter_nodes(traverse.listup_operators(graph), Sgemm): # type: Sgemm A = op.inputs["A"] B = op.inputs["B"] M = op.M N = op.N K = op.K transpose_A = op.transpose_A transpose_B = op.transpose_B if transpose_A: if TextureShape.get(A) != [M, K]: flag_changed = True TextureShape.set(A, width=K, height=M) else: if TextureShape.get(A) != [K, M]: flag_changed = True TextureShape.set(A, width=M, height=K) if transpose_B: if TextureShape.get(B) != [K, N]: flag_changed = True TextureShape.set(B, width=N, height=K) else: if TextureShape.get(B) != [N, K]: flag_changed = True TextureShape.set(B, width=K, height=N) return graph, flag_changed