def _neg_log_likelihood(x: Vector, y: float, beta: Vector) -> float: """ The negative log-likelihood for logistic regression w.r.t. a single data point """ if y == 1: return -math.log(logistic(dot(x, beta))) else: return -math.log(1 - logistic(dot(x, beta)))
def biarc_h(p0, t0, p1, t1, r): t0 = unit(t0) t1 = unit(t1) chord = point(p0) - point(p1) f = dot(t0, t1) g = dot(chord, r * t0 + t1) r = (1 / 3) * (f + 1) / 2 + 1 * (1 - (f + 1) / 2) c = dot(chord, chord) b = 2 * dot(chord, r * t0 + t1) a = 2 * r * (dot(t0, t1) - 1) if a == 0: c2 = lerp(0.5, p0, p1) w1 = dot(t0, unit(c2 - p0)) w2 = dot(t1, unit(p1 - c2)) return hom(t0, w1), hom(c2, 1), hom(-t1, -w2), 0, 0 D = b * b - 4 * a * c if D < 0: print(D, "<0") beta = norm2(chord) / 4 * dot(chord, t0) else: sqD = D**.5 beta1 = (-b - sqD) / 2 / a beta2 = (-b + sqD) / 2 / a if beta1 > 0 and beta2 > 0: print(beta1, beta2, ">0") return None, None, None, 0, 0 beta = max(beta1, beta2) if beta < 0: print(beta, "<0") return None, None, None, 0, 0 alpha = beta * r ab = alpha + beta c1 = point(p0) + alpha * t0 c3 = point(p1) - beta * t1 c2 = (beta / ab) * point(c1) + (alpha / ab) * point(c3) print(alpha, beta) w1 = dot(t0, unit(c2 - p0)) w2 = dot(t1, unit(p1 - c2)) return hom(c1, w1), hom(c2, 1), hom(c3, w2), 0, 0
def _negative_log_partial_j(x: Vector, y: float, beta: Vector, j: int) -> float: """ The jth partial derivative of a single datapoint produced by y = logistic(dot(x, beta)) """ # TODO: why this value? return -(y - logistic(dot(x, beta))) * x[j]
def circparam(p0, p1, p2): chord = p2 - p0 n = perp2(unit(p1 - p0)) dotnc = dot(chord, n) rad = norm2(chord) / (2 * dotnc) center = p0 + n * rad
def dv_gradient(data: List[Vector], w: Vector) -> Vector: """ Given a dataset and direction, compute the gradient of the directional variance relative to that direction """ # if variance is sum of squares, do we just want to sum 2 * i in the dot projections? dir_w = direction(w) return [sum(2 * dot(v, dir_w) * v[i] for v in data) for i in range(len(w))]
def offset(self, o): b = deepcopy(self) pt0 = perp2(b.t0) pt1 = perp2(b.t1) c1 = proj(b.bp[0]) c2 = proj(b.bp[1]) c3 = proj(b.bp[2]) t2 = unit(c3 - c1) pt2 = perp2(t2) cc1 = unit(c2 - b.p0) cc2 = unit(b.p1 - c2) dp1 = dot(b.t0, cc1) dp2 = dot(b.t1, cc2) #t2 = unit(pt0 + refl(pt0,cc1)) #t3 = unit(pt1 + refl(pt1,cc2)) t2 = perp2(cc1) t3 = perp2(cc2) b.p0 = b.p0 + o * pt0 b.p1 = b.p1 + o * pt1 c2 = c2 + o * pt2 c1 = c1 + o / dp1 * t2 c3 = c3 + o / dp2 * t3 w1 = b.bp[0][2] #dot( b.t0,unit(c2 - b.p0)) w2 = b.bp[2][2] #dot( b.t1,unit(b.p1 - c2)) w1 = dot(b.t0, cc1) w2 = dot(b.t1, cc2) alpha = norm(c1 - c2) beta = norm(c3 - c2) b.r = alpha / beta b.bp = (hom(c1, w1), hom(c2, 1), hom(c3, w2), alpha, beta) b.h1 = [hom(b.p0, 1), b.bp[0], b.bp[1]] b.h2 = [b.bp[1], b.bp[2], hom(b.p1, 1)] return b
def rot(u, orientation='+'): '''Rotation of 2d scalar --> 2d vector. Default is counter-clockwise rot.''' assert isinstance(u, Expr), 'Can only take rot of scalar' assert xyz[2] not in u.atoms(), 'Scalar must be function of x, y only' R = Tensor([[0, -1], [1, 0]]) R = R if orientation == '+' else -R return dot(R, grad(u))
def directional_variance(data: List[Vector], w: Vector) -> float: """ Given a dataset and a vector w from which to take a direction, return the variance in the data along that direction """ dir_w = direction(w) # key insight: the dot product of two orthogonal vectors is zero. dot product against a magnitude vector is the portion # of the magnitude of the query vector in THAT direction dot_projections: Vector = [dot(v, dir_w) for v in data] return variance(dot_projections) # books code doesn't center mean but that should be ok, we already centered it
def test(): mats1 = [sp.eye(3) for i in range(5)] mats2 = [spm(np.arange(9).reshape(3,3)) for i in range(5)] dst1 = DST(mats1) dst2 = DST(mats2) res = dot(dst1,dst2) assert np.all(map(lambda mm:mm[0].all()==mm[1].all(), zip(dst1.mats,dst2.mats)))
def covariance(xs: Vector, ys: Vector) -> float: """return the covariance of two vectors""" assert xs and ys and len(xs) == len( ys), 'vectors must exist and have equal length' x_mean, y_mean = mean(xs), mean(ys) x_bar, y_bar = [x - x_mean for x in xs], [y - y_mean for y in ys] # now we have two vectors of the form x_i - x_mean, the total covariance # is a dot product sum((x_i - x_mean)*(y_i - y_mean)) / length (length alone shouldn't dictate covariance) return dot(x_bar, y_bar) / (len(xs) - 1) # Note the Bessel correction
def cos_sim(vec1, vec2): """ Computes cosine similarity between two document vectors Notes: input vectors can be numpy.ndarray, scipy.sparse or Gensim BoW """ versor1 = lg.to_unit(vec1) versor2 = lg.to_unit(vec2) return lg.dot(versor1, versor2)
def curl(u): '''Curl of 3d vector --> vector. Curl 2d vecror --> scalar.''' assert isinstance(u, Vector), 'Need vector for curl' if len(u) == 3: return -Vector([Dx(u[1], xyz[2]) - Dx(u[2], xyz[1]), Dx(u[2], xyz[0]) - Dx(u[0], xyz[2]), Dx(u[0], xyz[1]) - Dx(u[1], xyz[0])]) else: R = Tensor([[0, 1], [-1, 0]]) return div(dot(R, u))
def computeScores(votes, weighting): ''' The score a candidate receives is the dot product of the ranked votes he/she received with the weighting vector Input: dict[str -> arr], arr Output: dict[str -> num] ''' scores = {} for key in votes.keys(): scores[key] = round(linalg.dot(votes[key], weighting), 2) return scores
def circarc(t, v, ps): n1, l1 = unit_length(vector(ps[1], ps[0])) n2, l2 = unit_length(vector(ps[1], ps[2])) l = min(l1, l2) p0 = hom(point(ps[1]) - l * v * n1, 1) p2 = hom(point(ps[1]) - l * v * n2, 1) w = dot(n1, unit((p2[0] - p0[0], p2[1] - p0[1]))) p1 = hom(point(ps[1]), w) x = bezier2(t, [p0, p1, p2]) return proj(x)
def biarc(p0, t0, p1, t1, r): t0 = unit(t0) t1 = unit(t1) chord = point(p0) - point(p1) f = dot(t0, t1) g = dot(chord, r * t0 + t1) r = (1 / 3) * (f + 1) / 2 + 1 * (1 - (f + 1) / 2) c = dot(chord, chord) b = 2 * dot(chord, r * t0 + t1) a = 2 * r * (dot(t0, t1) - 1) if a == 0: return None, None, None, 0, 0 D = b * b - 4 * a * c if D < 0: print(D, "<0") beta = norm2(chord) / 4 * dot(chord, t0) else: sqD = D**.5 beta1 = (-b - sqD) / 2 / a beta2 = (-b + sqD) / 2 / a if beta1 > 0 and beta2 > 0: print(beta1, beta2, ">0") return None, None, None, 0, 0 beta = max(beta1, beta2) if beta < 0: print(beta, "<0") return None, None, None, 0, 0 alpha = beta * r ab = alpha + beta c1 = point(p0) + alpha * t0 c3 = point(p1) - beta * t1 c2 = (beta / ab) * point(c1) + (alpha / ab) * point(c3) #print(alpha,beta) return c1, c2, c3, alpha, beta
def cosine_similarity(v, w): return dot(v, w) / math.sqrt(dot(v, v) * dot(w, w))
def transform_vector(vector, components): return [dot(vector, component) for component in components]
def project(vector, direction_vector): projection_length = dot(vector, direction_vector) return scalar_multiply(projection_length, direction_vector)
def directional_variance_row(row, vector): """the variance of the row in the direction determined by the vector""" return dot(row, direction(vector))**2
def predict(x_i, beta): return dot(x_i, beta)
def arc(p0, p1, p2): w = la.dot(la.unit(p1 - p0), la.unit(p2 - p0)) return [la.hom(p0, 1), la.hom(p1, w), la.hom(p2, 1)]
random.seed(0) x_train, x_test, y_train, y_test = train_test_split(rescaled_x, y, 0.33) # maximize log likelihood on the training data fn = partial(logistic_log_likelihood, x_train, y_train) gradient_fn = partial(logistic_log_gradient, x_train, y_train) beta_0 = [random.random() for _ in range(3)] beta_hat = maximize_batch(fn, gradient_fn, beta_0) print beta_hat tp = fp = tn = fn = 0 for x_i, y_i in zip(x_test, y_test): predict = logistic(dot(beta_hat, x_i)) if y_i == 1 and predict >= 0.5: # true positive tp += 1 elif y_i == 1: # false negative fn += 1 elif predict >= 0.5: # false positive fp += 1 else: # true negative tn += 1 precision = tp / (tp + fp) recall = tp / (tp + fn) print tp, fp, tn, fn print precision, recall
def test_dot_product(self): self.assertEqual(8, linalg.dot([1, 2, 0, -1], [4, 4, 4, 4]))
def directional_variance_gradient_row(row, vector): """the contribution of this row to the gradient of the direction(vector) variance""" return [2 * component * dot(row, direction(vector)) for component in row]
# Script for benchmarking OOC matrix matrix multiplication (only 2D supported) import shutil, os.path from time import time import blaze from linalg import dot # Remove pre-existent data directories for d in ('a', 'b', 'out'): if os.path.exists(d): shutil.rmtree(d) # Create simple inputs t0 = time() a = blaze.ones(blaze.dshape('2000, 2000, float64'), params=blaze.params(storage='a')) print "Time for matrix a creation : ", round(time() - t0, 3) t0 = time() b = blaze.ones(blaze.dshape('2000, 3000, float64'), params=blaze.params(storage='b')) print "Time for matrix b creation : ", round(time() - t0, 3) # Do the dot product t0 = time() out = dot(a, b, outname='out') print "Time for ooc matmul : ", round(time() - t0, 3) print "out:", out
def ridge_penalty(beta, alpha): """alpha is the penalty strength hyperparameter (often called lambda but in python it's a reserved word)""" return alpha * dot(beta[1:], beta[1:])
def matrix_product_entry(A, B, i, j): return dot(get_row(A, i), get_column(B, j))
def sum_of_squares(xs: Vector) -> float: """ Return the sum of the square of each element in xs """ # this is equivalent to x dot x return dot(xs, xs)
def covariance(vector1, vector2): if len(vector1) <= 1: return 0 return dot(de_mean(vector1), de_mean(vector2)) / (len(vector1) - 1)
def logistic_log_partial_ij(x_i, y_i, beta, j): """j is the index of the derivative""" return (y_i - logistic(dot(x_i, beta))) * x_i[j]
23.39, 30.93, 15.03, 21.67, 31.09, 33.29, 22.61, 26.89, 23.48, 8.38, 27.81, 32.35, 23.84] random.seed(0) beta = estimate_beta(x, daily_minutes_good) # [30.63, 0.972, -1.868, 0.911] print "beta", rounded(beta) print "r-squared", rounded(multiple_r_squared(x, daily_minutes_good, beta)) close_to_100 = [99.5 + random.random() for _ in range(101)] far_from_100 = ([99.5 + random.random()] + [random.random() for _ in range(50)] + [200 + random.random() for _ in range(50)]) print print "medians for bootstrapped tight distribution", [round(val, 2) for val in sorted(bootstrap_statistic(close_to_100, median, 100))] print "medians for bootstrapped extreme distribution", [round(val, 2) for val in sorted(bootstrap_statistic(far_from_100, median, 100))] random.seed(0) bootstrap_betas = bootstrap_statistic(zip(x, daily_minutes_good), estimate_sample_beta, 100) bootstrap_standard_errors = [standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4)] print print bootstrap_standard_errors print random.seed(0) for alpha in (0.0, 0.01, 0.1, 1, 10): beta_0 = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print alpha, ' : ', rounded(beta_0), rounded(dot(beta_0[1:], beta_0[1:])), rounded(multiple_r_squared(x, daily_minutes_good, beta_0))
[random.random() for _ in range(50)] + [200 + random.random() for _ in range(50)]) print print "medians for bootstrapped tight distribution", [ round(val, 2) for val in sorted(bootstrap_statistic(close_to_100, median, 100)) ] print "medians for bootstrapped extreme distribution", [ round(val, 2) for val in sorted(bootstrap_statistic(far_from_100, median, 100)) ] random.seed(0) bootstrap_betas = bootstrap_statistic(zip(x, daily_minutes_good), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print print bootstrap_standard_errors print random.seed(0) for alpha in (0.0, 0.01, 0.1, 1, 10): beta_0 = estimate_beta_ridge(x, daily_minutes_good, alpha=alpha) print alpha, ' : ', rounded(beta_0), rounded( dot(beta_0[1:], beta_0[1:])), rounded( multiple_r_squared(x, daily_minutes_good, beta_0))
def logistic_log_likelihood_i(x_i, y_i, beta): if y_i == 1: return math.log(logistic(dot(x_i, beta))) else: return math.log(1 - logistic(dot(x_i, beta)))
def directional_variance_row(row, vector): """the variance of the row in the direction determined by the vector""" return dot(row, direction(vector)) ** 2
def project2(v: Vector, w: Vector) -> Vector: """return the projection of v onto the direction w""" projection_length = dot(v, w) return scalar_multiply(projection_length, w)
def transform_vector(v: Vector, span: List[Vector]) -> Vector: """ transform the vector to live in the span of the components passed in """ return [dot(v, w) for w in span]
# Script for benchmarking OOC matrix matrix multiplication (only 2D supported) import shutil, os.path from time import time import blaze from linalg import dot # Remove pre-existent data directories for d in ('a', 'b', 'out'): if os.path.exists(d): shutil.rmtree(d) # Create simple inputs t0 = time() a = blaze.ones(blaze.dshape('2000, 2000, float64'), params=blaze.params(storage='a')) print "Time for matrix a creation : ", round(time()-t0, 3) t0 = time() b = blaze.ones(blaze.dshape('2000, 3000, float64'), params=blaze.params(storage='b')) print "Time for matrix b creation : ", round(time()-t0, 3) # Do the dot product t0 = time() out = dot(a, b, outname='out') print "Time for ooc matmul : ", round(time()-t0, 3) print "out:", out