def test_bad_label_shape(shape):
    """
    Ensures that softmax_crossentropy checks for shape-(N,) `y_true`
    """
    scores = mg.arange(12).reshape(3, 4)
    labels = mg.zeros(shape, dtype=int)
    with raises(ValueError):
        softmax_crossentropy(scores, labels)
def test_bad_label_type(type):
    """
    Ensures that softmax_crossentropy checks integer-type `y_true`
    """
    scores = mg.arange(12).reshape(3, 4)
    labels = np.zeros((3, ), dtype=type)
    with raises(TypeError):
        softmax_crossentropy(scores, labels)
Esempio n. 3
0
def test_negative_log_likelihood_vs_softmax_cross_entropy(
        data: st.DataObject, labels_as_tensor: bool):
    s = data.draw(
        hnp.arrays(
            shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2),
            dtype=float,
            elements=st.floats(-100, 100),
        ))
    y_true = data.draw(
        hnp.arrays(
            shape=(s.shape[0], ),
            dtype=hnp.integer_dtypes(),
            elements=st.integers(min_value=0, max_value=s.shape[1] - 1),
        ).map(Tensor if labels_as_tensor else lambda x: x))
    scores = Tensor(s)
    nll = negative_log_likelihood(mg.log(mg.nnet.softmax(scores)), y_true)
    nll.backward()

    cross_entropy_scores = Tensor(s)
    ce = softmax_crossentropy(cross_entropy_scores, y_true)
    ce.backward()

    assert_allclose(nll.data, ce.data, atol=1e-5, rtol=1e-5)
    assert_allclose(scores.grad,
                    cross_entropy_scores.grad,
                    atol=1e-5,
                    rtol=1e-5)
def test_softmax_crossentropy(data: st.DataObject, labels_as_tensor: bool):
    s = data.draw(
        hnp.arrays(
            shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2),
            dtype=float,
            elements=st.floats(-100, 100),
        ))
    y_true = data.draw(
        hnp.arrays(
            shape=(s.shape[0], ),
            dtype=hnp.integer_dtypes(),
            elements=st.integers(min_value=0, max_value=s.shape[1] - 1),
        ).map(Tensor if labels_as_tensor else lambda x: x))
    scores = Tensor(s)
    softmax_cross = softmax_crossentropy(scores, y_true, constant=False)
    softmax_cross.backward()

    mygrad_scores = Tensor(s)
    probs = softmax(mygrad_scores)

    correct_labels = (range(len(y_true)),
                      y_true.data if labels_as_tensor else y_true)
    truth = np.zeros(mygrad_scores.shape)
    truth[correct_labels] = 1

    mygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum()
    mygrad_cross.backward()
    assert_allclose(softmax_cross.data,
                    mygrad_cross.data,
                    atol=1e-5,
                    rtol=1e-5)
    assert_allclose(scores.grad, mygrad_scores.grad, atol=1e-5, rtol=1e-5)
def train():
    fact_dict = load_pickle()

    sentences, ratings = split(fact_dict)

    plotter, fig, ax = create_plot(["loss", "accuracy"])

    model = fm.Model(dim_input=50, dim_recurrent=100, dim_output=1)
    optimizer = Adam(model.parameters)

    plot_every = 500

    for k in range(100000):
        output = model(sentences)

        loss = softmax_crossentropy(output, ratings)

        acc = float(output.data.squeeze() == ratings.item())

        plotter.set_train_batch({
            "loss": loss.item(),
            "accuracy": acc
        },
                                batch_size=1,
                                plot=False)

        if k % plot_every == 0 and k > 0:
            plotter.set_train_epoch()

        loss.backward()
        optimizer.step()
        loss.null_gradients()
Esempio n. 6
0
 def step(self, tetrisBoards, executor,
          done):  # calculates and stores derivatives
     for i, tetrisBoard in enumerate(tetrisBoards):
         if done[i]:
             continue
         #temp = time.time()
         data = self.preprocess(tetrisBoard)
         #print("Preprocess: {}".format(time.time() - temp))
         #temp = time.time()
         outNeurons = self.model.policyForward(data)
         probabilities = softmax(outNeurons)
         #print("Forward: {}".format(time.time() - temp))
         #temp = time.time()
         choice = np.random.choice(self.cache['arange'], p=probabilities)
         #print("Choice: {}".format(time.time() - temp))
         #temp = time.time()
         loss = softmax_crossentropy(outNeurons.reshape(1, len(outNeurons)),
                                     [choice])
         loss.backward()
         #print("Backprop: {}".format(time.time() - temp))
         #temp = time.time()
         self.derivatives[i].append(self.model.getDerivatives())
         loss.null_gradients()
         #print("Store: {}".format(time.time() - temp))
         executor(tetrisBoard, choice)
Esempio n. 7
0
def test_softmax_crossentropy(data):
    """ Test the built-in implementation of multiclass hinge against the pure pygrad version"""
    s = data.draw(
        hnp.arrays(
            shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2),
            dtype=float,
            elements=st.floats(-100, 100),
        ))
    l = data.draw(
        hnp.arrays(
            shape=(s.shape[0], ),
            dtype=hnp.integer_dtypes(),
            elements=st.integers(min_value=0, max_value=s.shape[1] - 1),
        ))
    scores = Tensor(s)
    softmax_cross = softmax_crossentropy(scores, l, constant=False)
    softmax_cross.backward()

    pygrad_scores = Tensor(s)
    probs = softmax(pygrad_scores)

    correct_labels = (range(len(l)), l)
    truth = np.zeros(pygrad_scores.shape)
    truth[correct_labels] = 1

    pygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum()
    pygrad_cross.backward()
    assert_allclose(softmax_cross.data,
                    pygrad_cross.data,
                    atol=1e-5,
                    rtol=1e-5)
    assert_allclose(scores.grad, pygrad_scores.grad, atol=1e-5, rtol=1e-5)
Esempio n. 8
0
 def step(self, tetrisBoards, done, executor):
     boards = [(i, board) for i, board in enumerate(tetrisBoards)
               if not done[i]]
     for i, board in boards:
         outNeurons = self.model.policyForward(
             self.preprocess(board)[np.newaxis, np.newaxis, :, :])
         probabilities = softmax(outNeurons)
         choice = np.random.choice(5, p=probabilities.reshape(5))
         loss = softmax_crossentropy(outNeurons, np.array([choice]))
         loss.backward()
         self.derivatives.append(self.model.getDerivatives())
         if not board in self.rewardMap:
             self.rewardMap[board] = []
         self.rewardMap[board].append(len(self.rewards))
         self.rewards.append(None)  # Temp placeholder
         loss.null_gradients()
         executor(board, choice)
Esempio n. 9
0
 def step(self, tetrisBoards, done, executor):
     boards = [(i, board) for i, board in enumerate(tetrisBoards)
               if not done[i]]
     stacked = np.stack([self.preprocess(board)
                         for i, board in boards])[:, np.newaxis, :, :]
     outNeurons = self.model.policyForward(stacked)
     probabilities = softmax(outNeurons)
     choices = self.sample(probabilities.data)
     assert len(choices) == len(boards)
     loss = softmax_crossentropy(outNeurons, choices)
     loss.backward()
     # Store gradients
     converted = self.convert(self.model.getDerivatives())
     print(len(converted))
     self.derivatives.extend(converted)  # Derivatives are batched
     self.rewards.append([1 if choice == 0 else 0 for choice in choices])
     print(len(self.model.getDerivatives()))
     print(len(self.rewards[-1]))
     assert len(self.derivatives[-1]) == len(self.rewards[-1])
     # Calculate rewards array
     loss_null_gradients()
     for choice, i, board in zip(choices, *boards):
         executor(board, choice)
Esempio n. 10
0
batch_size = 1000
idxs = np.arange(len(xtrain))  # -> array([0, 1, ..., 9999])
np.random.shuffle(idxs)

for batch_cnt in range(0, len(xtrain) // batch_size):
    batch_indices = idxs[batch_cnt * batch_size: (batch_cnt + 1) * batch_size]
    batch = xtrain[batch_indices]  # random batch of our training data

    # compute the predictions for this batch by calling on model
    # print(batch.shape)
    batch = batch.reshape(-1, 1, 200, 200)
    predictions = model(batch)
    truth = ytrain[batch_indices]

    # compute the loss
    loss = softmax_crossentropy(predictions, truth)
    acc = accuracy(predictions, truth)

    # back-propagate through your computational graph through your loss
    loss.backward()
    # compute the accuracy between the prediction and the truth
    # acc = accuracy(predictions, truth)

    # execute gradient descent by calling step() of optimization
    optimization.step()

    # null your gradients
    loss.null_gradients()

    plotter.set_test_batch({"loss": loss.item(), "accuracy": acc}, batch_size=batch_size)
    plotter.set_test_epoch()
def test_input_validation(data, labels):
    with raises((ValueError, TypeError)):
        softmax_crossentropy(data, labels)