def test__parabolic_crossentropy(self): """when""" categories = 11 loss_function = parabolic_crossentropy(categories, 1) """then""" for truth in range(categories): losses = [] for prediction in range(categories): loss = K.eval( loss_function(K.variable(one_hot(truth, categories)), K.variable(one_hot(prediction, categories)))) losses.append(loss) # all predictions left of truth need to increase for i in range(1, truth): self.assertGreater(losses[i - 1], losses[i]) # right of truth need to decrease for i in range(truth, categories - 1): self.assertLess(losses[i], losses[i + 1]) if truth > 0 and truth < categories - 1: if truth > categories / 2: # right tail: self.assertGreater(losses[truth - 1], losses[truth + 1]) else: # left tail self.assertGreater(losses[truth + 1], losses[truth - 1])
def test__tailed_categorical_crossentropy(self): """when""" categories = 11 loss = tailed_categorical_crossentropy(categories, 1) """then""" l = K.eval(loss(K.variable(one_hot(3, 11)), K.variable(one_hot(6, 11)))) pickle """then""" self.assertGreater(l, 0) save_object(loss, '/tmp/test__tailed_categorical_crossentropy.dill')
def ta_one_hot_categories(df: _pd.DataFrame): """ Take a category column or a column of integers and turn them into a one hot encoded data frame :param df: a series or a data frame which has category columns or integer columns :return: a multi index data frame with one hot encoded integer columns note: can be empty """ df = df.to_frame() if isinstance(df, _pd.Series) else df res = None for col in df.columns: if hasattr(df[col], "cat"): categories = [str(cat) for cat in df[col].cat.categories] df_of_categories = index_of_categories(df[col]) elif df[col].dtype.kind in 'iu': categories = sorted(set(df[col].values)) df_of_categories = df else: continue number_of_categories = len(categories) ohdf = df_of_categories[[col]].apply(lambda r: util.one_hot(r, number_of_categories), axis=1, result_type='expand') ohdf.columns = _pd.MultiIndex.from_product([[col], categories]) res = ohdf if res is None else res.join(ohdf) if res is None: _log.warning(f'non of the {df.columns} are of type category index or integer value!\n' f'You might want to call df.ta_one_hot_categories(df.ta_bucketize(3))') return res
def test__differentiable_argmax(self): """given""" args = 10 argmax = DifferentiableArgmax(args) """when""" res = np.array([ K.eval(argmax(K.variable(one_hot(i, args)))) for i in range(args) ]) """then""" print(res) np.testing.assert_array_almost_equal(res, np.arange(0, args))
def convoluted(df: _pd.Series) -> _np.ndarray: min = df.min() max = df.max() interval_index = _np.linspace(min, max, buckets) indexes = _np.digitize(df, interval_index) - 1 _np.array([util.one_hot(index, buckets) for index in indexes])
def test_normal_penalized_crossentropy(self): """when""" loss = normal_penalized_crossentropy(11) """then""" for i in range(11): self.assertLess( K.eval( loss(K.variable(one_hot(i, 11)), K.variable(one_hot(i, 11)))), 0.00001) self.assertLess( K.eval(loss(K.variable(one_hot(7, 11)), K.variable(one_hot(8, 11)))), K.eval(loss(K.variable(one_hot(7, 11)), K.variable(one_hot(6, 11))))) self.assertLess( K.eval(loss(K.variable(one_hot(6, 11)), K.variable(one_hot(7, 11)))), K.eval(loss(K.variable(one_hot(6, 11)), K.variable(one_hot(5, 11))))) self.assertLess( K.eval(loss(K.variable(one_hot(3, 11)), K.variable(one_hot(2, 11)))), K.eval(loss(K.variable(one_hot(3, 11)), K.variable(one_hot(4, 11)))))