Example #1
0
def logistic_regression(
    T, features, target, steps, learning_rate, sample, add_intercept=False
):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1), dtype=T)
        features = np.hstack((intercept, features))

    weights = np.zeros(features.shape[1], dtype=T)

    for step in range(steps):
        scores = np.dot(features, weights)
        predictions = sigmoid(scores)

        error = target - predictions
        gradient = np.dot(error, features)
        weights += learning_rate * gradient

        if step % sample == 0:
            print(
                "Log Likelihood of step "
                + str(step)
                + ": "
                + str(log_likelihood(features, target, weights))
            )

    return weights
Example #2
0
def run_gemm(N, I, ft):  # noqa: E741
    print("Problem Size:     M=" + str(N) + " N=" + str(N) + " K=" + str(N))
    print("Total Iterations: " + str(I))
    flops = total_flops(N, N, N)
    print("Total Flops:      " + str(flops / 1e9) + " GFLOPS/iter")
    space = total_space(N, N, N, ft)
    print("Total Size:       " + str(space / 1e6) + " MB")
    A, B, C = initialize(N, N, N, ft)
    # Compute some sums and check for NaNs to force synchronization
    # before we start the timing
    assert not math.isnan(np.sum(A))
    assert not math.isnan(np.sum(B))
    assert not math.isnan(np.sum(C))
    start = datetime.datetime.now()
    # Run for as many iterations as was requested
    for idx in range(I):
        np.dot(A, B, out=C)
        # We need to rotate the matrices to keep Legate honest
        # about moving data so it can't just duplicate A and B
        # on the first iteration and reuse them, this means
        # that A, B, C all need to be square
        A, B, C = B, C, A
    # Do another sum to synchronize for timings, B is last output
    assert not math.isnan(np.sum(B))
    stop = datetime.datetime.now()
    delta = stop - start
    total = delta.total_seconds() * 1000.0
    print("Elapsed Time:     " + str(total) + " ms")
    average = total / I
    print("Average GEMM:     " + str(average) + " ms")
    print("FLOPS/s:          " + str(flops / (average * 1e6)) + " GFLOPS/s")
    return total
Example #3
0
def solve(A, b, iters, verbose):
    print("Solving system...")
    x = np.zeros(A.shape[1])
    d = np.diag(A)
    R = A - np.diag(d)
    for i in range(iters):
        x = (b - np.dot(R, x)) / d
    return x
Example #4
0
def calculate_distances(data, centroids, data_dots):
    centroid_dots = np.square(np.linalg.norm(centroids, ord=2, axis=1))
    pairwise_distances = (data_dots[:, np.newaxis] +
                          centroid_dots[np.newaxis, :])
    # ||x-y||^2 = ||x||^2 + ||y||^2 - 2 x . y
    # pairwise_distances has ||x||^2 + ||y||^2, so beta = 1
    # The gemm calculates x.y for all x and y, so alpha = -2.0
    pairwise_distances -= 2.0 * np.dot(data, centroids.T)
    return pairwise_distances
Example #5
0
def forward(x, h_prev, C_prev, H_size, X_size, p):
    assert x.shape == (X_size, 1)
    assert h_prev.shape == (H_size, 1)
    assert C_prev.shape == (H_size, 1)

    z = np.row_stack((h_prev, x))
    f = sigmoid(np.dot(p.W_f.v, z) + p.b_f.v)
    i = sigmoid(np.dot(p.W_i.v, z) + p.b_i.v)
    C_bar = tanh(np.dot(p.W_C.v, z) + p.b_C.v)

    C = f * C_prev + i * C_bar
    o = sigmoid(np.dot(p.W_o.v, z) + p.b_o.v)
    h = o * tanh(C)

    v = np.dot(p.W_v.v, h) + p.b_v.v
    y = np.exp(v) / np.sum(np.exp(v))  # softmax

    return z, f, i, C_bar, C, o, h, v, y
Example #6
0
def linear_regression(T,
                      features,
                      target,
                      steps,
                      learning_rate,
                      sample,
                      add_intercept=False):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1), dtype=T)
        features = np.hstack((intercept, features))

    weights = np.zeros(features.shape[1], dtype=T)

    for step in range(steps):
        scores = np.dot(features, weights)
        error = scores - target
        gradient = -(1.0 / len(features)) * error.dot(features)
        weights += learning_rate * gradient

        if step % sample == 0:
            print("Error of step " + str(step) + ": " +
                  str(np.sum(np.power(error, 2))))

    return weights
Example #7
0
def log_likelihood(features, target, weights):
    scores = np.dot(features, weights)
    return np.sum(target * scores - np.log(1.0 + np.exp(scores)))
Example #8
0
def backward(
    target,
    dh_next,
    dC_next,
    C_prev,
    H_size,
    X_size,
    z,
    f,
    i,
    C_bar,
    C,
    o,
    h,
    v,
    y,
    p,
):

    assert z.shape == (X_size + H_size, 1)
    assert v.shape == (X_size, 1)
    assert y.shape == (X_size, 1)

    for param in [dh_next, dC_next, C_prev, f, i, C_bar, C, o, h]:
        assert param.shape == (H_size, 1)

    dv = np.copy(y)
    dv[target] -= 1

    p.W_v.d += np.dot(dv, h.T)
    p.b_v.d += dv

    dh = np.dot(p.W_v.v.T, dv)
    dh += dh_next
    do = dh * tanh(C)
    do = dsigmoid(o) * do
    p.W_o.d += np.dot(do, z.T)
    p.b_o.d += do

    dC = np.copy(dC_next)
    dC += dh * o * dtanh(tanh(C))
    dC_bar = dC * i
    dC_bar = dtanh(C_bar) * dC_bar
    p.W_C.d += np.dot(dC_bar, z.T)
    p.b_C.d += dC_bar

    di = dC * C_bar
    di = dsigmoid(i) * di
    p.W_i.d += np.dot(di, z.T)
    p.b_i.d += di

    df = dC * C_prev
    df = dsigmoid(f) * df
    p.W_f.d += np.dot(df, z.T)
    p.b_f.d += df

    dz = (
        np.dot(p.W_f.v.T, df)
        + np.dot(p.W_i.v.T, di)
        + np.dot(p.W_C.v.T, dC_bar)
        + np.dot(p.W_o.v.T, do)
    )
    dh_prev = dz[:H_size, :]
    dC_prev = f * dC

    return dh_prev, dC_prev
Example #9
0
    def backward(dHout_in, cache, dcn=None, dhn=None):

        WLSTM = cache["WLSTM"]
        Hout = cache["Hout"]
        IFOGf = cache["IFOGf"]
        IFOG = cache["IFOG"]
        C = cache["C"]
        Ct = cache["Ct"]
        Hin = cache["Hin"]
        c0 = cache["c0"]
        # h0 = cache["h0"]
        n, b, d = Hout.shape
        input_size = WLSTM.shape[0] - d - 1  # -1 due to bias

        # backprop the LSTM
        dIFOG = np.zeros(IFOG.shape)
        dIFOGf = np.zeros(IFOGf.shape)
        dWLSTM = np.zeros(WLSTM.shape)
        dHin = np.zeros(Hin.shape)
        dC = np.zeros(C.shape)
        dX = np.zeros((n, b, input_size))
        dh0 = np.zeros((b, d))
        dc0 = np.zeros((b, d))
        dHout = (dHout_in.copy()
                 )  # make a copy so we don't have any funny side effects
        if dcn is not None:
            dC[n - 1] += dcn.copy()  # carry over gradients from later
        if dhn is not None:
            dHout[n - 1] += dhn.copy()
        for t in reversed(range(n)):

            tanhCt = Ct[t]
            dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t]
            # backprop tanh non-linearity first then continue backprop
            dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t])
            if t > 0:
                dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t]
                dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t]
            else:
                dIFOGf[t, :, d:2 * d] = c0 * dC[t]
                dc0 = IFOGf[t, :, d:2 * d] * dC[t]
            dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t]
            dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t]

            # backprop activation functions
            dIFOG[t, :,
                  3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:]
            y = IFOGf[t, :, :3 * d]
            dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d]

            # backprop matrix multiply
            dWLSTM += np.dot(Hin[t].transpose(), dIFOG[t])
            dHin[t] = dIFOG[t].dot(WLSTM.transpose())

            # backprop the identity transforms into Hin
            dX[t] = dHin[t, :, 1:input_size + 1]
            if t > 0:
                dHout[t - 1, :] += dHin[t, :, input_size + 1:]
            else:
                dh0 += dHin[t, :, input_size + 1:]

        return dX, dWLSTM, dc0, dh0