def test_softmax_optimizations(self): x = matrix("x") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot # xe = op(x, one_of_n) fgraph = FunctionGraph([x, one_of_n], [op(softmax_legacy(x), one_of_n)]) assert fgraph.outputs[0].owner.op == op optdb.query(OPT_FAST_RUN).optimize(fgraph) assert fgraph.outputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_softmax_optimizations_w_bias2(self): x = matrix("x") b = vector("b") c = vector("c") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot fgraph = FunctionGraph([x, b, c, one_of_n], [op(softmax_legacy(add(x, b, c)), one_of_n)]) assert fgraph.outputs[0].owner.op == op optdb.query(OPT_FAST_RUN).optimize(fgraph) assert len(fgraph.toposort()) == 2 assert fgraph.outputs[ 0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
def test_broadcast(self): # test that we don't raise an error during optimization for no good # reason as softmax_with_bias don't support correctly some/all # broadcasted inputs pattern initial_W = np.asarray( [[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]], dtype=config.floatX, ) W = aesara.shared(value=initial_W, name="W") vbias = aesara.shared(value=0.1, name="vbias") # 0.01 hid = vector("hid") f = aesara.function([hid], softmax_legacy(dot(hid, W.T) + vbias)) ops = [node.op for node in f.maker.fgraph.toposort()] assert softmax_with_bias not in ops assert softmax_legacy in ops f([0, 1, 0])
def test_softmax_grad_optimizations(self): x = matrix("x") one_of_n = lvector("one_of_n") op = crossentropy_categorical_1hot xe = op(softmax_legacy(x), one_of_n) sum_xe = aet_sum(xe) g_x = grad(sum_xe, x) fgraph = FunctionGraph([x, one_of_n], [g_x]) assert check_stack_trace( fgraph, ops_to_check=[ crossentropy_softmax_1hot_with_bias_dx, softmax_legacy ], ) optdb.query(OPT_FAST_RUN).optimize(fgraph) ops = {node.op for node in fgraph.toposort()} assert crossentropy_softmax_argmax_1hot_with_bias not in ops assert crossentropy_softmax_1hot_with_bias_dx in ops assert softmax_legacy in ops assert softmax_grad_legacy not in ops
def f(inputs): y = softmax_legacy(x) return aesara.grad(None, x, known_grads={y: inputs})