def test_constant_add(dtype): """check sparsemax proposition 2.""" random = np.random.RandomState(5) z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype) c = random.uniform(low=-3, high=3, size=(test_obs, 1)).astype(dtype) tf_sparsemax_zpc = sparsemax((z + c)) tf_sparsemax_z = sparsemax(z) test_utils.assert_allclose_according_to_type( tf_sparsemax_zpc, tf_sparsemax_z, half_atol=5e-3 )
def test_permutation(dtype): """check sparsemax proposition 3.""" random = np.random.RandomState(6) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) p = sparsemax(z.astype(dtype)).numpy() for i in range(test_obs): per = random.permutation(10) tf_sparsemax_out = sparsemax(z[i, per].reshape(1, -1).astype(dtype)) p_expected = p[i, per].reshape(1, -1) test_utils.assert_allclose_according_to_type( p_expected, tf_sparsemax_out, half_atol=5e-3 ) assert p_expected.shape == tf_sparsemax_out.shape
def test_sparsemax_of_zero(dtype): """check sparsemax proposition 1, part 1.""" z = np.zeros((1, 10)) tf_sparsemax_out = sparsemax(z.astype(dtype)) np_sparsemax = np.ones_like(z, dtype=dtype) / z.size test_utils.assert_allclose_according_to_type(np_sparsemax, tf_sparsemax_out)
def _tf_sparsemax_loss(z, q, dtype): z = z.astype(dtype) q = q.astype(dtype) tf_sparsemax_op = sparsemax(z) tf_loss_op = sparsemax_loss(z, tf_sparsemax_op, q) tf_loss_out = tf_loss_op return tf_loss_op, tf_loss_out
def _tf_sparsemax_loss(self, z, q, dtype): z = z.astype(dtype) q = q.astype(dtype) tf_sparsemax_op = sparsemax(z) tf_loss_op = sparsemax_loss(z, tf_sparsemax_op, q) tf_loss_out = self.evaluate(tf_loss_op) return tf_loss_op, tf_loss_out
def test_sparsemax_against_numpy(dtype): """check sparsemax kernel against numpy.""" random = np.random.RandomState(1) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) tf_sparsemax_out = sparsemax(z.astype(dtype)) np_sparsemax = _np_sparsemax(z).astype(dtype) test_utils.assert_allclose_according_to_type(np_sparsemax, tf_sparsemax_out)
def test_sparsemax_of_inf(dtype): """check sparsemax is infinity safe.""" z_neg = np.asarray( [[0, -np.inf, 0], [0, -np.inf, -np.inf], [-np.inf, -np.inf, -np.inf]] ).astype(dtype) z_pos = np.asarray( [[0, np.inf, 0], [0, np.inf, np.inf], [np.inf, np.inf, np.inf]] ).astype(dtype) z_mix = np.asarray( [[0, np.inf, 0], [0, np.inf, -np.inf], [-np.inf, np.inf, -np.inf]] ).astype(dtype) tf_sparsemax_neg = sparsemax(z_neg) np.testing.assert_equal( np.array([[0.5, 0, 0.5], [1, 0, 0], [np.nan, np.nan, np.nan]]), tf_sparsemax_neg ) tf_sparsemax_pos = sparsemax(z_pos) np.testing.assert_equal( np.array( [ [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], ] ), tf_sparsemax_pos, ) tf_sparsemax_mix = sparsemax(z_mix) np.testing.assert_equal( np.array( [ [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], ] ), tf_sparsemax_mix, )
def test_sparsemax_against_numpy_low_rank(dtype): """check sparsemax kernel against numpy.""" random = np.random.RandomState(1) z = random.uniform(low=-3, high=3, size=(10)) tf_sparsemax_out = sparsemax(z.astype(dtype)).numpy() np_sparsemax = np.reshape(_np_sparsemax(np.reshape(z, [1, 10])), [10]).astype(dtype) test_utils.assert_allclose_according_to_type( np_sparsemax, tf_sparsemax_out, half_atol=5e-3 ) assert np_sparsemax.shape == tf_sparsemax_out.shape
def test_two_dimentional(dtype): """check two dimentation sparsemax case.""" t = np.linspace(-2, 2, test_obs, dtype=dtype) z = np.vstack([t, np.zeros(test_obs, dtype=dtype)]).T tf_sparsemax_out = sparsemax(z.astype(dtype)).numpy() p0_expected = np.select([t < -1, t <= 1, t > 1], [0, (t + 1) / 2, 1]) test_utils.assert_allclose_according_to_type(p0_expected, tf_sparsemax_out[:, 0]) test_utils.assert_allclose_according_to_type( 1 - p0_expected, tf_sparsemax_out[:, 1] ) assert z.shape == tf_sparsemax_out.shape
def test_gradient_against_estimate(self, dtype=None): """check sparsemax Rop, against estimated Rop.""" random = np.random.RandomState(9) # sparsemax is not a smooth function so gradient estimation is only # possible for float64. if dtype != 'float64': return z = random.uniform(low=-1, high=1, size=(test_obs, 10)).astype(dtype) (jacob_sym,), (jacob_num,) = tf.test.compute_gradient( lambda logits: sparsemax(logits), [z], delta=1e-6) self.assertAllCloseAccordingToType(jacob_sym, jacob_num)
def test_gradient_against_estimate(self, dtype=None): """check sparsemax-loss Rop, against estimated-loss Rop.""" random = np.random.RandomState(7) # sparsemax is not a smooth function so gradient estimation is only # possible for float64. if dtype != "float64": return z = random.uniform(low=-3, high=3, size=(test_obs, 10)).astype(dtype) q = np.zeros((test_obs, 10)).astype(dtype) q[np.arange(0, test_obs), np.random.randint(0, 10, size=test_obs)] = 1 (jacob_sym, ), (jacob_num, ) = tf.test.compute_gradient( lambda logits: sparsemax_loss(logits, sparsemax(logits), q), [z]) self.assertAllCloseAccordingToType(jacob_sym, jacob_num)
def test_diffrence(dtype): """check sparsemax proposition 4.""" random = np.random.RandomState(7) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) p = sparsemax(z.astype(dtype)).numpy() etol = {np.float32: 1e-6, np.float64: 1e-9}[dtype] for val in range(0, test_obs): for i in range(0, 10): for j in range(0, 10): # check condition, the obesite pair will be checked anyway if z[val, i] > z[val, j]: continue assert 0 <= p[val, j] - p[val, i] <= z[val, j] - z[val, i] + etol
def test_sparsemax_of_nan(dtype): """check sparsemax transfers nan.""" z_nan = np.asarray( [[0, np.nan, 0], [0, np.nan, np.nan], [np.nan, np.nan, np.nan]] ).astype(dtype) tf_sparsemax_nan = sparsemax(z_nan) np.testing.assert_equal( np.array( [ [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], ] ), tf_sparsemax_nan, )
def test_sparsemax_of_to_inf(dtype): """check sparsemax proposition 1, part 2.""" random = np.random.RandomState(4) z = random.uniform(low=-3, high=3, size=(test_obs, 10)) # assume |A(z)| = 1, as z is continues random z_sort_arg = np.argsort(z, axis=1)[:, ::-1] z_sort = np.sort(z, axis=-1)[:, ::-1] gamma_z = z_sort[:, 0] - z_sort[:, 1] epsilon = (0.99 * gamma_z * 1).reshape(-1, 1) # construct the expected 1_A(z) array p_expected = np.zeros((test_obs, 10), dtype=dtype) p_expected[np.arange(0, test_obs), z_sort_arg[:, 0]] = 1 tf_sparsemax_out = sparsemax(((1 / epsilon) * z).astype(dtype)) test_utils.assert_allclose_according_to_type(p_expected, tf_sparsemax_out)
def test_sparsemax_loss_zero(dtype): """check sparsemax-loss proposition 5.""" random = np.random.RandomState(6) # construct z and q, such that z_k >= 1 + max_{j!=k} z_k holds for # delta_0 = 1. z = random.uniform(low=-3, high=3, size=(test_obs, 10)) z[:, 0] = np.max(z, axis=1) + 1.05 q = np.zeros((test_obs, 10)) q[:, 0] = 1 tf_loss_op, tf_loss_out = _tf_sparsemax_loss(z, q, dtype) tf_sparsemax_op = sparsemax(z.astype(dtype)) test_utils.assert_allclose_according_to_type(np.zeros(test_obs), tf_loss_out) assert np.zeros(test_obs).shape == tf_loss_op.shape test_utils.assert_allclose_according_to_type(q, tf_sparsemax_op) assert q.shape == tf_sparsemax_op.shape
def call(self, x, prior_scales, training=None, alpha: float = 0.0): x = self.block(x, training=training, alpha=alpha) return sparsemax(x * prior_scales)
def _tf_sparsemax(self, z, dtype): tf_sparsemax_op = sparsemax(z.astype(dtype)) tf_sparsemax_out = self.evaluate(tf_sparsemax_op) return tf_sparsemax_op, tf_sparsemax_out
def call(self, inputs, training=None, alpha: float = 0.0): x_for_mask, prior_scales = inputs x = self.block(x_for_mask, training=training, alpha=alpha) return sparsemax(x * prior_scales)
tf.tpu.experimental.initialize_tpu_system(tpu) strategy = tf.distribute.experimental.TPUStrategy(tpu) else: strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU. print("REPLICAS: ", strategy.num_replicas_in_sync) ''' with strategy.scope(): atten_cnn = build_attpool_cnn_model() atten_bilstm = make_atten_bilstm_model() x = multiply([atten_cnn.output, atten_bilstm.output]) #x = build_cnn_model() #x = make_bilstm_model() predictions = Dense(num_classes)(x) predictions = sparsemax(predictions) #predictions = Dense(num_classes, activation='softmax')(cnn.output) #predictions = Dense(num_classes, activation='softmax')(atten_bilstm.output) model = Model(inputs=[atten_cnn.input, atten_bilstm.input], outputs=predictions) #model = Model(inputs=atten_bilstm.input, outputs=predictions) #model = Model(inputs=cnn.input, outputs=predictions) #flatten = model.Flatten() #dense3 = model.Dense(512) #act3 = model.Activation("relu") #bn3 = model.BatchNormalization() #do3 = model.Dropout(0.5) # initialize the layers in the softmax classifier layer set #dense4 = model.Dense(classes) #softmax = model.Activation("softmax") opt = keras.optimizers.Adam(lr=0.0001,decay=1e-3 / 200, epsilon=1e-07)