def logistic_regression( T, features, target, steps, learning_rate, sample, add_intercept=False ): if add_intercept: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) for step in range(steps): scores = np.dot(features, weights) predictions = sigmoid(scores) error = target - predictions gradient = np.dot(error, features) weights += learning_rate * gradient if step % sample == 0: print( "Log Likelihood of step " + str(step) + ": " + str(log_likelihood(features, target, weights)) ) return weights
def run_gemm(N, I, ft): # noqa: E741 print("Problem Size: M=" + str(N) + " N=" + str(N) + " K=" + str(N)) print("Total Iterations: " + str(I)) flops = total_flops(N, N, N) print("Total Flops: " + str(flops / 1e9) + " GFLOPS/iter") space = total_space(N, N, N, ft) print("Total Size: " + str(space / 1e6) + " MB") A, B, C = initialize(N, N, N, ft) # Compute some sums and check for NaNs to force synchronization # before we start the timing assert not math.isnan(np.sum(A)) assert not math.isnan(np.sum(B)) assert not math.isnan(np.sum(C)) start = datetime.datetime.now() # Run for as many iterations as was requested for idx in range(I): np.dot(A, B, out=C) # We need to rotate the matrices to keep Legate honest # about moving data so it can't just duplicate A and B # on the first iteration and reuse them, this means # that A, B, C all need to be square A, B, C = B, C, A # Do another sum to synchronize for timings, B is last output assert not math.isnan(np.sum(B)) stop = datetime.datetime.now() delta = stop - start total = delta.total_seconds() * 1000.0 print("Elapsed Time: " + str(total) + " ms") average = total / I print("Average GEMM: " + str(average) + " ms") print("FLOPS/s: " + str(flops / (average * 1e6)) + " GFLOPS/s") return total
def solve(A, b, iters, verbose): print("Solving system...") x = np.zeros(A.shape[1]) d = np.diag(A) R = A - np.diag(d) for i in range(iters): x = (b - np.dot(R, x)) / d return x
def calculate_distances(data, centroids, data_dots): centroid_dots = np.square(np.linalg.norm(centroids, ord=2, axis=1)) pairwise_distances = (data_dots[:, np.newaxis] + centroid_dots[np.newaxis, :]) # ||x-y||^2 = ||x||^2 + ||y||^2 - 2 x . y # pairwise_distances has ||x||^2 + ||y||^2, so beta = 1 # The gemm calculates x.y for all x and y, so alpha = -2.0 pairwise_distances -= 2.0 * np.dot(data, centroids.T) return pairwise_distances
def forward(x, h_prev, C_prev, H_size, X_size, p): assert x.shape == (X_size, 1) assert h_prev.shape == (H_size, 1) assert C_prev.shape == (H_size, 1) z = np.row_stack((h_prev, x)) f = sigmoid(np.dot(p.W_f.v, z) + p.b_f.v) i = sigmoid(np.dot(p.W_i.v, z) + p.b_i.v) C_bar = tanh(np.dot(p.W_C.v, z) + p.b_C.v) C = f * C_prev + i * C_bar o = sigmoid(np.dot(p.W_o.v, z) + p.b_o.v) h = o * tanh(C) v = np.dot(p.W_v.v, h) + p.b_v.v y = np.exp(v) / np.sum(np.exp(v)) # softmax return z, f, i, C_bar, C, o, h, v, y
def linear_regression(T, features, target, steps, learning_rate, sample, add_intercept=False): if add_intercept: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) for step in range(steps): scores = np.dot(features, weights) error = scores - target gradient = -(1.0 / len(features)) * error.dot(features) weights += learning_rate * gradient if step % sample == 0: print("Error of step " + str(step) + ": " + str(np.sum(np.power(error, 2)))) return weights
def log_likelihood(features, target, weights): scores = np.dot(features, weights) return np.sum(target * scores - np.log(1.0 + np.exp(scores)))
def backward( target, dh_next, dC_next, C_prev, H_size, X_size, z, f, i, C_bar, C, o, h, v, y, p, ): assert z.shape == (X_size + H_size, 1) assert v.shape == (X_size, 1) assert y.shape == (X_size, 1) for param in [dh_next, dC_next, C_prev, f, i, C_bar, C, o, h]: assert param.shape == (H_size, 1) dv = np.copy(y) dv[target] -= 1 p.W_v.d += np.dot(dv, h.T) p.b_v.d += dv dh = np.dot(p.W_v.v.T, dv) dh += dh_next do = dh * tanh(C) do = dsigmoid(o) * do p.W_o.d += np.dot(do, z.T) p.b_o.d += do dC = np.copy(dC_next) dC += dh * o * dtanh(tanh(C)) dC_bar = dC * i dC_bar = dtanh(C_bar) * dC_bar p.W_C.d += np.dot(dC_bar, z.T) p.b_C.d += dC_bar di = dC * C_bar di = dsigmoid(i) * di p.W_i.d += np.dot(di, z.T) p.b_i.d += di df = dC * C_prev df = dsigmoid(f) * df p.W_f.d += np.dot(df, z.T) p.b_f.d += df dz = ( np.dot(p.W_f.v.T, df) + np.dot(p.W_i.v.T, di) + np.dot(p.W_C.v.T, dC_bar) + np.dot(p.W_o.v.T, do) ) dh_prev = dz[:H_size, :] dC_prev = f * dC return dh_prev, dC_prev
def backward(dHout_in, cache, dcn=None, dhn=None): WLSTM = cache["WLSTM"] Hout = cache["Hout"] IFOGf = cache["IFOGf"] IFOG = cache["IFOG"] C = cache["C"] Ct = cache["Ct"] Hin = cache["Hin"] c0 = cache["c0"] # h0 = cache["h0"] n, b, d = Hout.shape input_size = WLSTM.shape[0] - d - 1 # -1 due to bias # backprop the LSTM dIFOG = np.zeros(IFOG.shape) dIFOGf = np.zeros(IFOGf.shape) dWLSTM = np.zeros(WLSTM.shape) dHin = np.zeros(Hin.shape) dC = np.zeros(C.shape) dX = np.zeros((n, b, input_size)) dh0 = np.zeros((b, d)) dc0 = np.zeros((b, d)) dHout = (dHout_in.copy() ) # make a copy so we don't have any funny side effects if dcn is not None: dC[n - 1] += dcn.copy() # carry over gradients from later if dhn is not None: dHout[n - 1] += dhn.copy() for t in reversed(range(n)): tanhCt = Ct[t] dIFOGf[t, :, 2 * d:3 * d] = tanhCt * dHout[t] # backprop tanh non-linearity first then continue backprop dC[t] += (1 - tanhCt**2) * (IFOGf[t, :, 2 * d:3 * d] * dHout[t]) if t > 0: dIFOGf[t, :, d:2 * d] = C[t - 1] * dC[t] dC[t - 1] += IFOGf[t, :, d:2 * d] * dC[t] else: dIFOGf[t, :, d:2 * d] = c0 * dC[t] dc0 = IFOGf[t, :, d:2 * d] * dC[t] dIFOGf[t, :, :d] = IFOGf[t, :, 3 * d:] * dC[t] dIFOGf[t, :, 3 * d:] = IFOGf[t, :, :d] * dC[t] # backprop activation functions dIFOG[t, :, 3 * d:] = (1 - IFOGf[t, :, 3 * d:]**2) * dIFOGf[t, :, 3 * d:] y = IFOGf[t, :, :3 * d] dIFOG[t, :, :3 * d] = (y * (1.0 - y)) * dIFOGf[t, :, :3 * d] # backprop matrix multiply dWLSTM += np.dot(Hin[t].transpose(), dIFOG[t]) dHin[t] = dIFOG[t].dot(WLSTM.transpose()) # backprop the identity transforms into Hin dX[t] = dHin[t, :, 1:input_size + 1] if t > 0: dHout[t - 1, :] += dHin[t, :, input_size + 1:] else: dh0 += dHin[t, :, input_size + 1:] return dX, dWLSTM, dc0, dh0