コード例 #1
0
def momentum(nn,
             X_train,
             y_train,
             val_set=None,
             alpha=1e-3,
             mb_size=256,
             n_iter=2000,
             print_after=100):
    velocity = {k: np.zeros_like(v) for k, v in nn.model.items()}
    gamma = .9

    minibatches = get_minibatch(X_train, y_train, mb_size)

    if val_set:
        X_val, y_val = val_set

    for iter in range(1, n_iter + 1):
        idx = np.random.randint(0, len(minibatches))
        X_mini, y_mini = minibatches[idx]

        grad, loss = nn.train_step(X_mini, y_mini)

        if iter % print_after == 0:
            if val_set:
                val_acc = loss_fun.accuracy(y_val, nn.predict(X_val))
                print('Iter-{} loss: {:.4f} validation: {:4f}'.format(
                    iter, loss, val_acc))
            else:
                print('Iter-{} loss: {:.4f}'.format(iter, loss))

        for layer in grad:
            velocity[layer] = gamma * velocity[layer] + alpha * grad[layer]
            nn.model[layer] -= velocity[layer]

    return nn
コード例 #2
0
def sgd(nn,
        X_train,
        y_train,
        val_set=None,
        alpha=1e-3,
        mb_size=256,
        n_iter=2000,
        print_after=100):
    minibatches = get_minibatch(X_train, y_train, mb_size)

    if val_set:
        X_val, y_val = val_set

    for iter in range(1, n_iter + 1):
        idx = np.random.randint(0, len(minibatches))
        X_mini, y_mini = minibatches[idx]

        grad, loss = nn.train_step(X_mini, y_mini)

        if iter % print_after == 0:
            if val_set:
                val_acc = loss_fun.accuracy(y_val, nn.predict(X_val))
                print('Iter-{} loss: {:.4f} validation: {:4f}'.format(
                    iter, loss, val_acc))
            else:
                print('Iter-{} loss: {:.4f}'.format(iter, loss))

        for layer in grad:
            nn.model[layer] -= alpha * grad[layer]

    return nn
コード例 #3
0
def rmsprop(nn,
            X_train,
            y_train,
            val_set=None,
            alpha=1e-3,
            mb_size=256,
            n_iter=2000,
            print_after=100):
    cache = {k: np.zeros_like(v) for k, v in nn.model.items()}
    gamma = .9

    minibatches = get_minibatch(X_train, y_train, mb_size)

    if val_set:
        X_val, y_val = val_set

    for iter in range(1, n_iter + 1):
        idx = np.random.randint(0, len(minibatches))
        X_mini, y_mini = minibatches[idx]

        grad, loss = nn.train_step(X_mini, y_mini)

        if iter % print_after == 0:
            if val_set:
                val_acc = loss_fun.accuracy(y_val, nn.predict(X_val))
                print('Iter-{} loss: {:.4f} validation: {:4f}'.format(
                    iter, loss, val_acc))
            else:
                print('Iter-{} loss: {:.4f}'.format(iter, loss))

        for k in grad:
            cache[k] = loss_fun.exp_running_avg(cache[k], grad[k]**2, gamma)
            nn.model[k] -= alpha * grad[k] / (np.sqrt(cache[k]) + 1e-8)

    return nn
コード例 #4
0
def adam(nn,
         X_train,
         y_train,
         val_set=None,
         alpha=0.001,
         mb_size=256,
         n_iter=2000,
         print_after=100):
    M = {k: np.zeros_like(v) for k, v in nn.model.items()}
    R = {k: np.zeros_like(v) for k, v in nn.model.items()}
    beta1 = .9
    beta2 = .999

    minibatches = get_minibatch(X_train, y_train, mb_size)

    if val_set:
        X_val, y_val = val_set

    for iter in range(1, n_iter + 1):
        t = iter
        idx = np.random.randint(0, len(minibatches))
        X_mini, y_mini = minibatches[idx]

        grad, loss = nn.train_step(X_mini, y_mini)

        if iter % print_after == 0:
            if val_set:
                val_acc = loss_fun.accuracy(y_val, nn.predict(X_val))
                print('Iter-{} loss: {:.4f} validation: {:4f}'.format(
                    iter, loss, val_acc))
            else:
                print('Iter-{} loss: {:.4f}'.format(iter, loss))

        for k in grad:
            M[k] = loss_fun.exp_running_avg(M[k], grad[k], beta1)
            R[k] = loss_fun.exp_running_avg(R[k], grad[k]**2, beta2)

            m_k_hat = M[k] / (1. - beta1**(t))
            r_k_hat = R[k] / (1. - beta2**(t))

            nn.model[k] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + 1e-8)

    return nn