def _(out_y): self.Y[0][out_y].assign_vector( _unreduced_squant( unreduced[0][out_y].get_vector(), (self.input_squant.params, self.weight_squant.params), self.output_squant.params, n_summands).reduce_after_mul())
def _(i): res = _unreduced_squant( sint.load_mem(unreduced.address + i * n_per_thread, size=n_per_thread), (self.input_squant.params, self.weight_squant.params), self.output_squant.params, n_summands).reduce_after_mul() res.store_in_mem(self.Y.address + i * n_per_thread)