def conv_efficient(self, x, w, b, output_size, vec_idx_key, strides=1): batches = x.shape[0] depth_i = x.shape[1] filter_size = w.shape[2] depth_o = w.shape[0] if 4 == x.ndim: # 原始规格: input_size = x.shape[2] # p = int(((output_size - 1) * strides + filter_size - input_size) / 2) # padding尺寸 # logger.debug("padding begin..") if p > 0: # 需要padding处理 x_pad = Tools.padding(x, p, self.dataType) else: x_pad = x st = time.time() logger.debug("vecting begin..") # 可以根据自己的硬件环境,在三种优化方式中选择较快的一种 x_col = self.vectorize4conv_batches(x_pad, filter_size, output_size, strides) #x_col = spd.vectorize4conv_batches(x_pad, filter_size, output_size, strides) #x_col = vec_by_idx(x_pad, filter_size, filter_size,vec_idx_key,0, strides) logger.debug("vecting end.. %f s" % (time.time() - st)) else: # x_col规格 x_col = x w_row = w.reshape(depth_o, x_col.shape[1]) conv = np.zeros((batches, depth_o, (output_size * output_size)), dtype=self.dataType) st1 = time.time() logger.debug("matmul begin..") #不广播,提高处理效率 for batch in range(batches): conv[batch] = Tools.matmul(w_row, x_col[batch]) + b logger.debug("matmul end.. %f s" % (time.time() - st1)) conv_return = conv.reshape(batches, depth_o, output_size, output_size) return conv_return
def conv4dw(self, x, w, output_size, b=0, strides=1, x_v=False): batches = x.shape[0] depth_i = x.shape[1] filter_size = w.shape[2] # 过滤器尺寸,对应卷积层误差矩阵尺寸 x_per_filter = filter_size * filter_size depth_o = w.shape[1] if False == x_v: # 原始规格: input_size = x.shape[2] # p = int(((output_size - 1) * strides + filter_size - input_size) / 2) # padding尺寸 if p > 0: # 需要padding处理 x_pad = Tools.padding(x, p, self.dataType) else: x_pad = x logger.debug("vec4dw begin..") x_col = self.vectorize4convdw_batches(x_pad, filter_size, output_size, strides) logger.debug("vec4dw end..") else: # x_col规格 x_col = x w_row = w.reshape(batches, depth_o, x_per_filter) conv = np.zeros( (batches, depth_i, depth_o, (output_size * output_size)), dtype=self.dataType) logger.debug("conv4dw matmul begin..") for batch in range(batches): for col in range(depth_i): conv[batch, col] = Tools.matmul(w_row[batch], x_col[batch, col]) conv_sum = np.sum(conv, axis=0) # transpose而不是直接reshape避免错位 conv = conv_sum.transpose(1, 0, 2).reshape(depth_o, depth_i, output_size, output_size) logger.debug("conv4dw matmul end..") return conv, x_col