def instances(): lengths = numpy.asarray([5, 4], dtype="int32") keys = numpy.arange(9, dtype="uint64") values = numpy.ones(9, dtype="float32") X = (keys, values, lengths) y = numpy.asarray([0, 2], dtype="int32") return X, to_categorical(y, n_classes=3)
def get_characters_loss(ops, docs, prediction, nr_char): """Compute a loss based on a number of characters predicted from the docs.""" target_ids = numpy.vstack([doc.to_utf8_array(nr_char=nr_char) for doc in docs]) target_ids = target_ids.reshape((-1,)) target = ops.asarray(to_categorical(target_ids, n_classes=256), dtype="f") target = target.reshape((-1, 256 * nr_char)) diff = prediction - target loss = (diff ** 2).sum() d_target = diff / float(prediction.shape[0]) return loss, d_target
def get_dummy_data(n_samples, n_tags, n_vocab, length_mean, length_variance): Xs = [] Ys = [] for _ in range(n_samples): length = numpy.random.normal(size=1, scale=length_variance) + length_mean shape = (max(1, int(length)), ) X = numpy.random.uniform(0, n_vocab - 1, shape) Y = numpy.random.uniform(0, n_tags - 1, shape) assert X.size, length assert Y.size, length Xs.append(X.reshape((-1, 1)).astype("i")) Ys.append(to_categorical(Y.astype("i"))) return Xs, Ys