def read_training_data(fname, D=None): """Given a file in appropriate format, and given a set D of features, returns the pair (A, b) consisting of a P-by-D matrix A and a P-vector b, where P is a set of patient identification integers (IDs). For each patient ID p, - row p of A is the D-vector describing patient p's tissue sample, - entry p of b is +1 if patient p's tissue is malignant, and -1 if it is benign. The set D of features must be a subset of the features in the data (see text). """ file = open(fname) params = [ "radius", "texture", "perimeter", "area", "smoothness", "compactness", "concavity", "concave points", "symmetry", "fractal dimension" ] stats = ["(mean)", "(stderr)", "(worst)"] feature_labels = set([y + x for x in stats for y in params]) feature_map = { params[i] + stats[j]: j * len(params) + i for i in range(len(params)) for j in range(len(stats)) } if D is None: D = feature_labels feature_vectors = {} patient_diagnoses = {} for line in file: row = line.split(",") patient_ID = int(row[0]) patient_diagnoses[patient_ID] = -1 if row[1] == 'B' else +1 feature_vectors[patient_ID] = Vec( D, {f: float(row[feature_map[f] + 2]) for f in D}) return rowdict2mat(feature_vectors), Vec(set(patient_diagnoses.keys()), patient_diagnoses)
def determine_consumption(): D = {'radio', 'sensor', 'memory', 'CPU'} v0 = Vec(D, {'radio': .1, 'CPU':.3}) v1 = Vec(D, {'sensor': .2, 'CPU':.4}) v2 = Vec(D, {'memory': .3, 'CPU':.1}) v3 = Vec(D, {'memory': .5, 'CPU':.4}) v4 = Vec(D, {'radio': .2, 'CPU':.5}) b = Vec({0, 1, 2, 3, 4}, {0: 140.0, 1: 170.0, 2: 60.0, 3: 170.0, 4:250.0}) A = matutil.rowdict2mat([v0, v1, v2, v3, v4]) rate = solver.solve(A, b)
def projection_matrix(v): # 1/||v|| * (v * v^T) v_trans = matutil.rowdict2mat({0: v}) unit_length_projection = v * v_trans length_squared_inverse = 1 / (v * v) return length_squared_inverse * unit_length_projection # problem 8.3.16 # The rank of matrix M such that project_v(x) = M*x should be 1, # because it is a linear combination of a rank 1 matrix another.
def make_matrix(feature_vectors, diagnoses, features): ids = {i for i in feature_vectors} D = ids | features | {'gamma'} constraints = { d: Vec(D, main_constraint(d, feature_vectors[d], diagnoses[d], features)) for d in feature_vectors } constraints.update( {-d: Vec(D, nonneg_constraint(d)) for d in feature_vectors}) return matutil.rowdict2mat(constraints)
import cancer_data import vec from vector import vecutil from matrix import matutil import math test_ident = matutil.rowdict2mat({ 0: vecutil.list2vec([1, 0, 0]), 1: vecutil.list2vec([0, 1, 0]), 2: vecutil.list2vec([0, 0, 1]), }) test_1 = vecutil.list2vec([-1, -1, -1]) test_2 = vecutil.list2vec([1, 1, 1]) def read_training_data(): return cancer_data.read_training_data('inner_product/train.data') def signum(u): """ input: a Vec u output: the Vec v with the same domain as u such that +1 if u[d] >= 0 v[d]= { -1 if u[d] < 0 """ return vec.Vec(u.D, {d: 1 if u[d] >= 0 else -1 for d in u.D}) assert (signum(vec.Vec({'A', 'B'}, {
('y2', 'x1'): -x1, ('y2', 'x2'): -x2, ('y2', 'x3'): -1 }) return [u, v] w = Vec(D, { ('y1', 'x1'): 1 }) l0, l1 = make_equations(358, 36, 0, 0) l4, l5 = make_equations(329, 597, 0, 1) l2, l3 = make_equations(592, 157, 0, 1) l6, l7 = make_equations(580, 483, 0, 1) L = matutil.rowdict2mat({ 0: l0, 1: l1, 2: l2, 3: l3, 4: l4, 5: l5, 6: l6, 7: l7, 8: w, }) b = vecutil.list2vec([0, 0, 0, 0, 0, 0, 0, 0, 1]) print(solver.solve(L, b))
def print_rowlist(rowlist): as_mat = matutil.rowdict2mat(rowlist) print("%s" % as_mat)
return (1 / len(images)) * final faces = import_faces('singular_value_decomposition/faces') centroid = compute_image_centroid(faces) # image.image2display(vector2image(centroid)) centered_image_vectors = {k: faces[k] - centroid for k in faces} def norm(v): return math.sqrt(v * v) A = matutil.rowdict2mat(centered_image_vectors) U, Sigma, V = svd.factor(A) Vt = V.transpose() tenset = {i for i in range(10)} Uten = matutil.submatrix(U, tenset, tenset) Sigmaten = matutil.submatrix(Sigma, tenset, tenset) Vten = matutil.submatrix(Vt, {i for i in range(10)}, Vt.D[1]) eigenfaces_basis = Uten * Sigmaten * Vten def projected_representation(M, x): """ input: a matrix M with orthonormal rows and a vector x with D from Col M output: the coordinate representation of the parallel of projection of x onto Row M
candidates = generate_candidates() all_vectors = [a0, b0] + candidates while not choose_three_independent(all_vectors): candidates = generate_candidates() return candidates coded = "memelover" codedbits = bitutil.str2bits(coded) U = matutil.coldict2mat({ column: choose_secret_vector(codedbits[column][0], codedbits[column][1]) for column in codedbits.D[1] }) remaining = generate_remaining_vectors() A = matutil.rowdict2mat({ 0: a0, 1: b0, 2: remaining[0], 3: remaining[1], 4: remaining[2], 5: remaining[3], 6: remaining[4], 7: remaining[5], }) shares = A * U