def test_param_allow_downcast_floatX(self): a = fscalar("a") b = fscalar("b") c = fscalar("c") f = pfunc( [ In(a, allow_downcast=True), In(b, allow_downcast=False), In(c, allow_downcast=None), ], (a + b + c), ) # If the values can be accurately represented, everything is OK assert np.all(f(0, 0, 0) == 0) # If allow_downcast is True, idem assert np.allclose(f(0.1, 0, 0), 0.1) # If allow_downcast is False, nope with pytest.raises(TypeError): f(0, 0.1, 0) # If allow_downcast is None, it should work iff floatX=float32 if config.floatX == "float32": assert np.allclose(f(0, 0, 0.1), 0.1) else: with pytest.raises(TypeError): f(0, 0, 0.1)
def test_one_sequence_one_output_weights_gpu2(self): def f_rnn(u_t, x_tm1, W_in, W): return u_t * W_in + x_tm1 * W u = fvector("u") x0 = fscalar("x0") W_in = fscalar("win") W = fscalar("w") output, updates = scan( f_rnn, u, x0, [W_in, W], n_steps=None, truncate_gradient=-1, go_backwards=False, mode=mode_with_gpu, ) f2 = aesara.function( [u, x0, W_in, W], output, updates=updates, allow_input_downcast=True, mode=mode_with_gpu, ) # get random initial values rng = np.random.default_rng(utt.fetch_seed()) v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0) v_x0 = rng.uniform() W = rng.uniform() W_in = rng.uniform() # compute the output in numpy v_out = np.zeros((4, )) v_out[0] = v_u[0] * W_in + v_x0 * W for step in range(1, 4): v_out[step] = v_u[step] * W_in + v_out[step - 1] * W aesara_values = f2(v_u, v_x0, W_in, W) utt.assert_allclose(aesara_values, v_out) topo = f2.maker.fgraph.toposort() assert sum([isinstance(node.op, HostFromGpu) for node in topo]) == 1 assert sum([isinstance(node.op, GpuFromHost) for node in topo]) == 4 scan_node = [ node for node in topo if isinstance(node.op, scan.op.Scan) ] assert len(scan_node) == 1 scan_node = scan_node[0] scan_node_topo = scan_node.op.fn.maker.fgraph.toposort() # check that there is no gpu transfer in the inner loop. assert any(isinstance(node.op, GpuElemwise) for node in scan_node_topo) assert not any( isinstance(node.op, HostFromGpu) for node in scan_node_topo) assert not any( isinstance(node.op, GpuFromHost) for node in scan_node_topo)
def test_mean(mode): a = iscalar("a") b = iscalar("b") z = mean(a, b) z_fn = aesara.function([a, b], z, mode=mode) res = z_fn(1, 1) assert np.allclose(res, 1.0) a = fscalar("a") b = fscalar("b") c = fscalar("c") z = mean(a, b, c) z_fn = aesara.function([a, b, c], aesara.grad(z, [a]), mode=mode) res = z_fn(3, 4, 5) assert np.allclose(res, 1 / 3) z_fn = aesara.function([a, b, c], aesara.grad(z, [b]), mode=mode) res = z_fn(3, 4, 5) assert np.allclose(res, 1 / 3) z = mean() z_fn = aesara.function([], z, mode=mode) assert z_fn() == 0
def test_gpu3_mixture_dtype_outputs(self): def f_rnn(u_t, x_tm1, W_in, W): return (u_t * W_in + x_tm1 * W, aet.cast(u_t + x_tm1, "int64")) u = fvector("u") x0 = fscalar("x0") W_in = fscalar("win") W = fscalar("w") output, updates = scan( f_rnn, u, [x0, None], [W_in, W], n_steps=None, truncate_gradient=-1, go_backwards=False, mode=self.mode_with_gpu, ) f2 = aesara.function( [u, x0, W_in, W], output, updates=updates, allow_input_downcast=True, mode=self.mode_with_gpu, ) # get random initial values rng = np.random.RandomState(utt.fetch_seed()) v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0) v_x0 = rng.uniform() W = rng.uniform() W_in = rng.uniform() # compute the output in numpy v_out1 = np.zeros((4, )) v_out2 = np.zeros((4, ), dtype="int64") v_out1[0] = v_u[0] * W_in + v_x0 * W v_out2[0] = v_u[0] + v_x0 for step in range(1, 4): v_out1[step] = v_u[step] * W_in + v_out1[step - 1] * W v_out2[step] = np.int64(v_u[step] + v_out1[step - 1]) aesara_out1, aesara_out2 = f2(v_u, v_x0, W_in, W) utt.assert_allclose(aesara_out1, v_out1) utt.assert_allclose(aesara_out2, v_out2) topo = f2.maker.fgraph.toposort() scan_node = [node for node in topo if isinstance(node.op, Scan)] assert len(scan_node) == 1 scan_node = scan_node[0] assert self.is_scan_on_gpu(scan_node)
def test_allow_downcast_floatX(self): a = fscalar("a") b = fvector("b") f = pfunc([a, b], (a + b), allow_input_downcast=True) g = pfunc([a, b], (a + b), allow_input_downcast=False) h = pfunc([a, b], (a + b), allow_input_downcast=None) # If the values can be accurately represented, OK assert np.all(f(0, [0]) == 0) assert np.all(g(0, [0]) == 0) assert np.all(h(0, [0]) == 0) # For the vector: OK iff allow_input_downcast is True assert np.allclose(f(0, [0.1]), 0.1) with pytest.raises(TypeError): g(0, [0.1]) with pytest.raises(TypeError): h(0, [0.1]) # For the scalar: OK if allow_input_downcast is True, # or None and floatX==float32 assert np.allclose(f(0.1, [0]), 0.1) with pytest.raises(TypeError): g(0.1, [0]) if config.floatX == "float32": assert np.allclose(h(0.1, [0]), 0.1) else: with pytest.raises(TypeError): h(0.1, [0])
def test_copy_delete_updates(self): w = iscalar("w") x = fscalar("x") # SharedVariable for tests, one of them has update y = shared(value=1, name="y") z = shared(value=2, name="z") out = x + y + z # Test for different linkers # for mode in ["FAST_RUN","FAST_COMPILE"]: # second_time = False for mode in ["FAST_RUN", "FAST_COMPILE"]: ori = function([x], out, mode=mode, updates={z: z * 2}) cpy = ori.copy(delete_updates=True) assert cpy(1)[0] == 4 assert cpy(1)[0] == 4 assert cpy(1)[0] == 4 # Test if unused implicit and explicit inputs from delete_updates # are ignored as intended. for mode in ["FAST_RUN", "FAST_COMPILE"]: ori = function([x], x, mode=mode, updates={z: z * 2}) cpy = ori.copy(delete_updates=True) ori = function([x, w], x, mode=mode, updates={z: z + w}) cpy = ori.copy(delete_updates=True)
def test_copy_share_memory(self): x = fscalar("x") # SharedVariable for tests, one of them has update y = shared(value=1) z = shared(value=2) out = tanh((x + y + 2) / (x + z - 0.2) ** 2) # Test for different linkers for mode in ["FAST_RUN", "FAST_COMPILE"]: ori = function([x], [out], mode=mode, updates={z: z + 1}) cpy = ori.copy(share_memory=True) # Test if memories shared storage_map_ori = ori.fn.storage_map storage_map_cpy = cpy.fn.storage_map fgraph_cpy = cpy.maker.fgraph # Assert intermediate and Constants storages are shared. # and output stoarges are not shared i_o_variables = fgraph_cpy.inputs + fgraph_cpy.outputs ori_storages = storage_map_ori.values() l = [ val for key, val in storage_map_cpy.items() if key not in i_o_variables or isinstance(key, Constant) ] for storage in l: assert any([storage is s for s in ori_storages]) # Assert storages of SharedVariable without updates are shared for (input, _1, _2), here, there in zip( ori.indices, ori.input_storage, cpy.input_storage ): assert here.data is there.data
def test_grad_abs(): a = fscalar("a") b = aesara.tensor.nnet.relu(a) c = aesara.grad(b, a) f = aesara.function([a], c, mode=Mode(optimizer=None)) # Currently Aesara return 0.5, but it isn't sure it won't change # in the futur. ret = f(0.0) assert ret == 0.5, ret
def test_grad_inrange(): for bound_definition in [(True, True), (False, False)]: # Instantiate op, and then take the gradient op = InRange(*bound_definition) x = fscalar("x") low = fscalar("low") high = fscalar("high") out = op(x, low, high) gx, glow, ghigh = aesara.gradient.grad(out, [x, low, high]) # We look if the gradient are equal to zero # if x is lower than the lower bound, # equal to the lower bound, between lower and higher bound, # equal to the higher bound and higher than the higher # bound. # Mathematically we should have an infinite gradient when # x is equal to the lower or higher bound but in that case # Aesara defines the gradient to be zero for stability. f = aesara.function([x, low, high], [gx, glow, ghigh]) utt.assert_allclose(f(0, 1, 5), [0, 0, 0]) utt.assert_allclose(f(1, 1, 5), [0, 0, 0]) utt.assert_allclose(f(2, 1, 5), [0, 0, 0]) utt.assert_allclose(f(5, 1, 5), [0, 0, 0]) utt.assert_allclose(f(7, 1, 5), [0, 0, 0])
def test_downcast_dtype(self): # Test that the gradient of a cost wrt a float32 variable does not # get upcasted to float64. # x has dtype float32, regardless of the value of floatX x = fscalar("x") y = x * 2 z = lscalar("z") c = y + z dc_dx, dc_dy, dc_dz, dc_dc = grad(c, [x, y, z, c]) # The dtype of dc_dy and dc_dz can be either float32 or float64, # that might depend on floatX, but is not specified. assert dc_dc.dtype in ("float32", "float64") assert dc_dz.dtype in ("float32", "float64") assert dc_dy.dtype in ("float32", "float64") # When the output gradient of y is passed to op.grad, it should # be downcasted to float32, so dc_dx should also be float32 assert dc_dx.dtype == "float32"
def setup_method(self): super().setup_method() # Sample computation that involves tensors with different numbers # of dimensions self.input1 = fmatrix() self.input2 = fscalar() self.output = dot((self.input1 - self.input2), (self.input1 - self.input2).transpose()) # Declare the conditional breakpoint self.breakpointOp = PdbBreakpoint("Sum of output too high") self.condition = gt(self.output.sum(), 1000) ( self.monitored_input1, self.monitored_input2, self.monitored_output, ) = self.breakpointOp(self.condition, self.input1, self.input2, self.output)
def test_one_sequence_one_output_weights_gpu1(self): def f_rnn(u_t, x_tm1, W_in, W): return u_t * W_in + x_tm1 * W u = fvector("u") x0 = fscalar("x0") W_in = fscalar("win") W = fscalar("w") # The following line is needed to have the first case being used # Otherwise, it is the second that is tested. mode = self.mode_with_gpu.excluding("InputToGpuOptimizer") output, updates = scan( f_rnn, u, x0, [W_in, W], n_steps=None, truncate_gradient=-1, go_backwards=False, mode=mode, ) output = self.gpu_backend.gpu_from_host(output) f2 = aesara.function( [u, x0, W_in, W], output, updates=updates, allow_input_downcast=True, mode=self.mode_with_gpu, ) # get random initial values rng = np.random.RandomState(utt.fetch_seed()) v_u = rng.uniform(size=(4, ), low=-5.0, high=5.0) v_x0 = rng.uniform() W = rng.uniform() W_in = rng.uniform() v_u = np.asarray(v_u, dtype="float32") v_x0 = np.asarray(v_x0, dtype="float32") W = np.asarray(W, dtype="float32") W_in = np.asarray(W_in, dtype="float32") # compute the output in numpy v_out = np.zeros((4, )) v_out[0] = v_u[0] * W_in + v_x0 * W for step in range(1, 4): v_out[step] = v_u[step] * W_in + v_out[step - 1] * W aesara_values = f2(v_u, v_x0, W_in, W) utt.assert_allclose(aesara_values, v_out) # TO DEL topo = f2.maker.fgraph.toposort() scan_node = [node for node in topo if isinstance(node.op, Scan)] assert len(scan_node) == 1 scan_node = scan_node[0] topo = f2.maker.fgraph.toposort() assert (sum([ isinstance(node.op, self.gpu_backend.HostFromGpu) for node in topo ]) == 0) assert (sum([ isinstance(node.op, self.gpu_backend.GpuFromHost) for node in topo ]) == 4) scan_node = [node for node in topo if isinstance(node.op, Scan)] assert len(scan_node) == 1 scan_node = scan_node[0] scan_node_topo = scan_node.op.fn.maker.fgraph.toposort() # check that there is no gpu transfer in the inner loop. assert any([ isinstance(node.op, self.gpu_backend.GpuElemwise) for node in scan_node_topo ]) assert not any([ isinstance(node.op, self.gpu_backend.HostFromGpu) for node in scan_node_topo ]) assert not any([ isinstance(node.op, self.gpu_backend.GpuFromHost) for node in scan_node_topo ])