def _create_cc(self, x, dropout_ratio, e=Engine()): self.dropout_op = dropout_f32(dropout_ratio) self.mask = np.ndarray(shape=x.shape, dtype=np.float32) self._mask = array(self.mask, _format(self.mask.ndim), e) self._hint = mdarray(self.x.memory.get_primitive_desc())
def _create_cc(self, xs, gy, hint, axis, e): self.axis = axis gy = array(gy[0], m.memory.nchw, e) fmt = m.memory.nchw gy_mpd = gy.memory.get_primitive_desc() offsets = (0, 0, 0, 0) self.outputs = () for x in xs: view_pd = view.primitive_desc(gy_mpd, x.shape, offsets) fmt = m.get_fmt(gy_mpd) assert x.dtype == numpy.dtype('float32') gx = mdarray(x.shape, m.memory.f32, fmt, e) # gx = mdarray(x.memory.get_primitive_desc()) # gx = array(x, m.memory.nchw, e) reorder_pd = r.primitive_desc(view_pd.dst_primitive_desc(), gx.memory.get_primitive_desc()) reorder_prim = r.reorder(reorder_pd, at(gy.memory), gx.memory) self.dag_.push_back(reorder_prim) self.outputs += (gx, ) new_off = offsets[axis] + x.shape[axis] offsets = offsets[:axis] + (new_off, ) + offsets[axis + 1:] self.gy = gy self.xs = xs self._hint = hint
def mkl_sum(xs, func=None): e = Engine() xarrays = () # prevent the obj from gc xs_arrays = () # prevent the obj from gc itm_arr = None # prvent the obj from gc xs_mpdl = m.mpd_list() xs_pl = () scales = m.vectord() pl = primitive_list() for i in range(len(xs)): xarray = array(xs[i], _x_format(xs[i].ndim), e) xmpd = xarray.memory.get_primitive_desc() if i == 0: xmpd_best = xmpd else: if m.get_fmt(xmpd) > m.get_fmt(xmpd_best): xmpd_best = xmpd xs_arrays += (xarray,) for x in xs_arrays: outputs = reorder_if_must(x, xmpd_best, e, pl) if len(outputs) == 2: xarray, itm_arr = outputs[:2] else: xarray = outputs[0] xarrays += (xarray,) scales.push_back(1.0) xs_mpdl.push_back(xarray.memory.get_primitive_desc()) xs_pl += (at(xarray.memory), ) cc_pd = sum.primitive_desc(scales, xs_mpdl) if func is not None and hasattr(func, 'hint'): # this is only used for grad accumulate currently cc = ComputeComplex.get_bd_cc(func.hint, pos=(func.rank, func.fanout)) if cc is not None: y = cc.gy else: y = mdarray(cc_pd.dst_primitive_desc()) else: y = mdarray(cc_pd.dst_primitive_desc()) pl.push_back(sum.sum(cc_pd, xs_pl, y.memory)) s = Stream() s.submit(pl) s.wait() return y
def _create_cc(self, x, ksize, stride, pad, cover_all, e): self.ksize = ksize self.stride = stride self.pad = pad self.cover_all = cover_all self.x = array(x, m.memory.nchw, e) # TODO: check avx512? n, c, h, w = x.shape sy, sx = _pair(stride) kh, kw = _pair(ksize) p_upper, p_left = _pair(pad) yh = conv.get_conv_outsize(h, kh, sy, p_upper, cover_all=cover_all) assert yh > 0, 'Height in the output should be positive.' yw = conv.get_conv_outsize(w, kw, sx, p_left, cover_all=cover_all) assert yw > 0, 'Width in the output should be positive.' y_shape = (n, c, yh, yw) p_down = sy * (yh - 1) + kh - h - p_upper p_right = sx * (yw - 1) + kw - w - p_left y_md = m.desc(y_shape, m.memory.f32, m.memory.any) x_md = self.x.memory.get_primitive_desc().desc() cc_d = pooling_forward.desc(forward_training, self.alg_kind, x_md, y_md, stride, ksize, (p_upper, p_left), (p_down, p_right), zero) cc_pd = pooling_forward.primitive_desc(cc_d, e) y = mdarray(cc_pd.dst_primitive_desc()) if self.alg_kind is pooling_max: ws = mdarray(cc_pd.workspace_primitive_desc()) self.dag_.push_back( pooling_forward.pooling_forward(cc_pd, at(self.x.memory), y.memory, ws.memory)) else: # There is no workspace for average pooling ws = None self.dag_.push_back( pooling_forward.pooling_forward(cc_pd, at(self.x.memory), y.memory)) self._hint = cc_pd self.outputs = y, self.ws = ws
def _create_cc(self, x, gy, hint, y, ws, ksize, stride, pad, cover_all, e): self.ksize = ksize self.stride = stride self.pad = pad self.cover_all = cover_all self.x = array(x, m.memory.nchw, e) gy = array(gy, m.memory.nchw, e) if self.alg_kind is pooling_max: gy_md = y.memory.get_primitive_desc().desc() else: gy_md = gy.memory.get_primitive_desc().desc() gx_md = m.desc(x.shape, m.memory.f32, m.memory.any) # x_md = self.x.memory.get_primitive_desc().desc() n, c, h, w = x.shape sy, sx = _pair(stride) kh, kw = _pair(ksize) p_upper, p_left = _pair(pad) yh = conv.get_conv_outsize(h, kh, sy, p_upper, cover_all=cover_all) assert yh > 0, 'Height in the output should be positive.' yw = conv.get_conv_outsize(w, kw, sx, p_left, cover_all=cover_all) assert yw > 0, 'Width in the output should be positive.' p_down = sy * (yh - 1) + kh - h - p_upper p_right = sx * (yw - 1) + kw - w - p_left cc_d = pooling_backward.desc(self.alg_kind, gx_md, gy_md, stride, ksize, (p_upper, p_left), (p_down, p_right), zero) cc_pd = pooling_backward.primitive_desc(cc_d, e, hint) gx = mdarray(cc_pd.diff_src_primitive_desc()) if self.alg_kind is pooling_max: # For max pooling reorder y if needed outputs = reorder_if_must(gy, y.memory.get_primitive_desc(), e, self.dag_) if len(outputs) == 2: self.reordered_gy, self.itm_arr = outputs[:2] else: self.reordered_gy = outputs[0] self.dag_.push_back( pooling_backward.pooling_backward( cc_pd, at(self.reordered_gy.memory), at(ws.memory), gx.memory)) else: # There is no workspace for average pooling self.dag_.push_back( pooling_backward.pooling_backward(cc_pd, at(gy.memory), gx.memory)) self._hint = hint self.gy = gy self.outputs = gx,
def _create_cc(self, inputs, fwd_x, gy, hint, flags, eps, mean, var, e): self.train = configuration.config.train self.flags = flags self.eps = eps x, gamma, beta = inputs[:3] # self.x = array(x, m.memory.nchw, e) self.x = fwd_x x_mpd = self.x.memory.get_primitive_desc() x_md = x_mpd.desc() gy = array(gy, m.memory.nchw, e) outputs = reorder_if_must(gy, x_mpd, e, self.dag_) if len(outputs) == 2: self.gy_src = gy gy, self.itm_arr = outputs[:2] else: self.gy_src = gy gy = outputs[0] gy_md = gy.memory.get_primitive_desc().desc() cc_d = bn_backward.desc(backward, gy_md, x_md, eps, flags) cc_pd = bn_backward.primitive_desc(cc_d, e, hint) gx = mdarray(self.x.memory.get_primitive_desc(), gy.memory) if flags & use_scale_shift: w = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) self.w = array(w, m.memory.nc, e) self.mean = array(mean, m.memory.x, e) self.var = array(var, m.memory.x, e) self.gw = mdarray(cc_pd.diff_weights_primitive_desc()) bwd_p = bn_backward.batch_normalization_backward( cc_pd, at(self.x.memory), at(self.mean.memory), at(self.var.memory), at(gy.memory), at(self.w.memory), gx.memory, self.gw.memory) else: bwd_p = bn_backward.batch_normalization_backward( cc_pd, at(self.x.memory), at(self.mean.memory), at(self.var.memory), at(gy.memory), gx.memory) self.dag_.push_back(bwd_p) self._hint = hint self.gy = gy self.outputs = gx, self.gw
def _create_cc(self, x, n, k, alpha, beta, e): self.n = n self.k = k self.alpha = alpha self.beta = beta # TODO: check avx512? self.x = array(x, m.memory.nchw, e) x_md = self.x.memory.get_primitive_desc().desc() cc_d = lrn_forward.desc(forward_training, lrn_across_channels, x_md, n, alpha, beta, k) cc_pd = lrn_forward.primitive_desc(cc_d, e) y = mdarray(cc_pd.dst_primitive_desc()) ws = mdarray(cc_pd.workspace_primitive_desc()) self.dag_.push_back( lrn_forward.lrn_forward(cc_pd, at(self.x.memory), ws.memory, y.memory)) self._hint = cc_pd self.outputs = y, self.ws = ws
def reorder_if_must(x, expect, e, net_): usr_m = x.memory if (usr_m.get_primitive_desc() != expect): reorded_array = mdarray(expect) reorded = reorded_array.memory reorder = r.reorder(at(usr_m), reorded) net_.push_back(reorder) return reorded_array, else: return x,
def array(obj, *args): """Convert the input to an mdarray Parameters ---------- obj : numpy ndarray object """ if isinstance(obj, mdarray): return obj elif isinstance(obj, numpy.ndarray): obj = numpy.ascontiguousarray(obj) return mdarray(obj, *args) else: raise NotImplementedError
def warray(w): fmt = None if w.ndim == 1: fmt = m.memory.x elif w.ndim == 2: fmt = m.memory.oi elif w.ndim == 4: fmt = m.memory.oihw else: raise NotImplementedError if w.dtype != numpy.float32: raise NotImplementedError e = Engine() return mdarray(w, fmt, e)
def as_tensor(obj, fmt): """Convert the input to an internal tensor acceptable by MKL-DNN Parameters ---------- obj : object support buffer protocol fmt : tensor data format (m.nchw, m.oihw, etc.) """ if isinstance(obj, mdarray): return obj elif isinstance(obj, numpy.ndarray): obj = numpy.ascontiguousarray(obj) return mdarray(obj, fmt) else: raise NotImplementedError
def __init__(self, src, dst_shape, dst_dtype, dst_format): self.src = src self.fwd_dag = primitive_list() self.bwd_dag = primitive_list() self.src_mpd = src.memory.get_primitive_desc() dst_dtype = m.memory.f32 if dst_dtype is numpy.float32 or \ dst_dtype.kind is 'f' else m.memory.s32 self.expected_mpd = m.primitive_desc( m.desc(dst_shape, dst_dtype, dst_format), Engine()) if self.src_mpd != self.expected_mpd: self.dst = mdarray(self.expected_mpd) self.fwd_dag.push_back( r.reorder(at(self.src.memory), self.dst.memory)) self.bwd_dag.push_back( r.reorder(at(self.dst.memory), self.src.memory)) else: self.dst = self.src
def _create_cc(self, inputs, e): x0, x1 = inputs[:2] xs_mpdl = m.mpd_list() xs_pl = () scales = m.vectord() self.x0 = x0 self.x1 = x1 self.x1_reordered = reorder_if_must(x1, x0.memory.get_primitive_desc(), e, self.dag_)[0] scales.push_back(1.0) scales.push_back(1.0) xs_mpdl.push_back(x0.memory.get_primitive_desc()) xs_mpdl.push_back(self.x1_reordered.memory.get_primitive_desc()) cc_pd = sum.primitive_desc(scales, xs_mpdl) xs_pl = (at(x0.memory), at(self.x1_reordered.memory)) y = mdarray(cc_pd.dst_primitive_desc()) self.dag_.push_back(sum.sum(cc_pd, xs_pl, y.memory)) self.outputs = y,
def _create_cc(self, x, gy, hint, ws, n, k, alpha, beta, e): self.n = n self.k = k self.alpha = alpha self.beta = beta self.x = array(x, m.memory.nchw, e) x_md = self.x.memory.get_primitive_desc().desc() # TODO: check avx512? gy = array(gy, m.memory.nchw, e) gy_md = gy.memory.get_primitive_desc().desc() cc_d = lrn_backward.desc(lrn_across_channels, x_md, gy_md, n, alpha, beta, k) cc_pd = lrn_backward.primitive_desc(cc_d, e, hint) gx = mdarray(cc_pd.diff_src_primitive_desc()) self.dag_.push_back( lrn_backward.lrn_backward(cc_pd, at(self.x.memory), at(gy.memory), at(ws.memory), gx.memory)) self._hint = hint self.gy = gy self.outputs = gx,
def _create_cc(self, x, e=Engine()): if x.ndim == 2: fmt = m.memory.nc elif x.ndim == 4: fmt = m.memory.nchw x = array(x, fmt, e) mem_pd = x.memory.get_primitive_desc() cc_d = eltwise_forward.desc( forward, eltwise_relu, mem_pd.desc(), 0.0, 0.0) cc_pd = eltwise_forward.primitive_desc(cc_d, e) y = mdarray(cc_pd.dst_primitive_desc()) self.x = x self.dag.push_back(eltwise_forward.eltwise_forward(cc_pd, at(x.memory), y.memory)) self._hint = cc_pd self.outputs = y,
def _create_cc(self, xs, axis, e): self.axis = axis xarrays = () axis_dim = 0 xs_mpdl = m.mpd_list() # xs_pl = primitive_list() xs_pl = () for x in xs: axis_dim += x.shape[1] xarray = array(x, m.memory.nchw, e) xarrays += (xarray, ) xs_mpdl.push_back(xarray.memory.get_primitive_desc()) # xs_pl.push_back(xarray.memory) xs_pl += (at(xarray.memory), ) cc_pd = concat.primitive_desc(axis, xs_mpdl) y = mdarray(cc_pd.dst_primitive_desc()) self.dag_.push_back(concat.concat(cc_pd, xs_pl, y.memory)) self._hint = cc_pd self.outputs = y, self.xarrays = xarrays
def w_tensor(W): """Convert the input to an weight tensor of MKL-DNN Paramters --------- W : object support buffer protocol """ if W.ndim == 1: fmt = m.memory.x elif W.ndim == 2: fmt = m.memory.oi elif W.ndim == 4: fmt = m.memory.oihw else: raise NotImplementedError if W.dtype != numpy.float32: raise NotImplementedError return mdarray(W, fmt, Engine())
def _create_cc(self, hint): self.gx = mdarray(self.gy.memory.get_primitive_desc()) self._hint = hint
def _create_cc(self, inputs, eps, mean, var, e): self.eps = eps self.mean = None self.var = None self.w = None self.train = configuration.config.train x, gamma, beta = inputs[:3] fmt_desired = m.get_desired_format(x.shape[1]) x = array(x, m.memory.nchw, e) # x = array(x, fmt_desired, e) assert x.dtype == numpy.dtype('float32') x_desired_md = m.desc(x.shape, m.memory.f32, fmt_desired) x_desired_mpd = m.primitive_desc(x_desired_md, e) outputs = reorder_if_must(x, x_desired_mpd, e, self.dag_) if len(outputs) == 2: self.x, self.itm_arr = outputs[:2] self.x_src = x else: self.x = outputs[0] self.x_src = x w = numpy.concatenate((gamma, beta), axis=0).reshape((2, -1)) self.numpy_w = w self.w = array(w, m.memory.nc, e) scale_shift = True self.flags = use_scale_shift if mean is None: fwd_prop_kind = forward_training global_stats = False else: fwd_prop_kind = forward_scoring self.flags |= use_global_stats global_stats = True self.mean = array(mean, m.memory.x, e) self.var = array(var, m.memory.x, e) x_md = self.x.memory.get_primitive_desc().desc() cc_d = bn_forward.desc(fwd_prop_kind, x_md, eps, self.flags) cc_pd = bn_forward.primitive_desc(cc_d, e) y = mdarray(cc_pd.dst_primitive_desc()) # TODO reorder weight # if scale_shift is True: # w = mdarray(cc_pd.weights_primitive_desc()) if scale_shift is True and global_stats is False: self.mean = mdarray(cc_pd.mean_primitive_desc()) self.var = mdarray(cc_pd.variance_primitive_desc()) if (not configuration.config.train) and (not global_stats): if scale_shift is True: bnf = bn_forward.batch_normalization_forward( cc_pd, at(self.x.memory), at(self.w.memory), y.memory) else: bnf = bn_forward.batch_normalization_forward( cc_pd, at(self.x.memory), y.memory) elif global_stats is True: if scale_shift is True: bnf = bn_forward.batch_normalization_forward( cc_pd, at(self.x.memory), at(self.mean.memory), at(self.var.memory), at(self.w.memory), y.memory) else: bnf = bn_forward.batch_normalization_forward( cc_pd, at(self.x.memory), self.mean.memory, self.var.memory, y.memory) else: if scale_shift is True: bnf = bn_forward.batch_normalization_forward( cc_pd, at(self.x.memory), at(self.w.memory), y.memory, self.mean.memory, self.var.memory) else: bnf = bn_forward.batch_normalization_forward( cc_pd, at(self.x.memory), y.memory, self.mean.memory, self.var.memory) self.dag_.push_back(bnf) self._hint = cc_pd self.outputs = y, self.flags, self.mean, self.var