def read_training_data(fname, D=None): """Given a file in appropriate format, and given a set D of features, returns the pair (A, b) consisting of a P-by-D matrix A and a P-vector b, where P is a set of patient identification integers (IDs). For each patient ID p, - row p of A is the D-vector describing patient p's tissue sample, - entry p of b is +1 if patient p's tissue is malignant, and -1 if it is benign. The set D of features must be a subset of the features in the data (see text). """ file = open(fname) params = [ "radius", "texture", "perimeter", "area", "smoothness", "compactness", "concavity", "concave points", "symmetry", "fractal dimension" ] stats = ["(mean)", "(stderr)", "(worst)"] feature_labels = set([y + x for x in stats for y in params]) feature_map = { params[i] + stats[j]: j * len(params) + i for i in range(len(params)) for j in range(len(stats)) } if D is None: D = feature_labels feature_vectors = {} patient_diagnoses = {} for line in file: row = line.split(",") patient_ID = int(row[0]) patient_diagnoses[patient_ID] = -1 if row[1] == 'B' else +1 feature_vectors[patient_ID] = Vec( D, {f: float(row[feature_map[f] + 2]) for f in D}) return rowdict2mat(feature_vectors), Vec(set(patient_diagnoses.keys()), patient_diagnoses)
def list2vec(L): return Vec(set(range(len(L))), {index:item for index, item in enumerate(L)}) # write a procedure triangular_solve_n(rowlist, b) with the following spec: # • input: for some integer n, a triangular system consisting of a list rowlist of n-vectors, and a length-n list b of numbers # • output: a vector xˆ such that, for i = 0, 1, . . . , n − 1, the dot-product of rowlist[i] with xˆ equals b[i] def triangular_solve_n(rowlist, b):
def zero_vec(D): return Vec(D, {d:0 for d in D}) # Quiz 2.7.3: Write a procedure scalar_mul(v, alpha) with the following spec: # • input: an instance of Vec and a scalar alpha # • output: a new instance of Vec that represents the scalar-vector product alpha times v. def scalar_mul(v, alpha): return Vec(v.D, {d: alpha * v.f[d] for d in v.f})
def matrix_vector_mul(M, v): """ Returns the product of matrix M and vector v. Consider using brackets notation v[...] in your procedure to access entries of the input vector. This avoids some sparsity bugs. >>> N1 = Mat(({1, 3, 5, 7}, {'a', 'b'}), {(1, 'a'): -1, (1, 'b'): 2, (3, 'a'): 1, (3, 'b'):4, (7, 'a'): 3, (5, 'b'):-1}) >>> u1 = Vec({'a', 'b'}, {'a': 1, 'b': 2}) >>> N1*u1 == Vec({1, 3, 5, 7},{1: 3, 3: 9, 5: -2, 7: 3}) True >>> N1 == Mat(({1, 3, 5, 7}, {'a', 'b'}), {(1, 'a'): -1, (1, 'b'): 2, (3, 'a'): 1, (3, 'b'):4, (7, 'a'): 3, (5, 'b'):-1}) True >>> u1 == Vec({'a', 'b'}, {'a': 1, 'b': 2}) True >>> N2 = Mat(({('a', 'b'), ('c', 'd')}, {1, 2, 3, 5, 8}), {}) >>> u2 = Vec({1, 2, 3, 5, 8}, {}) >>> N2*u2 == Vec({('a', 'b'), ('c', 'd')},{}) True >>> M3 = Mat(({0,1},{'a','b'}),{(0,'a'):1, (0,'b'):1, (1,'a'):1, (1,'b'):1}) >>> v3 = Vec({'a','b'},{'a':1,'b':1}) >>> M3*v3 == Vec({0, 1},{0: 2, 1: 2}) True """ assert M.D[1] == v.D return Vec(M.D[0], {rowKey: sum([M[(rowKey, vectorKey)] * v[vectorKey] for vectorKey in v.D]) for rowKey in M.D[0]})
def neg(v): return Vec(v.D, {d: -v.f[d] for d in v.f}) # Quiz 2.9.4: # Write a procedure list_dot(u, v) with the following spec: # • input: equal-length lists u and v of field elements # • output: the dot-product of u and v interpreted as vectors def list_dot(u, v): return sum( [u[iter] * v[iter] for iter in range(len(u))])
def scalar_mul(v, alpha): return Vec(v.D, {d: alpha * v.f[d] for d in v.f}) # Quiz 2.7.5: # Write a Python procedure neg(v) with the following spec: # • input: an instance v of Vec # • output: a dictionary representing the negative of v def neg(v): return Vec(v.D, {d: -v.f[d] for d in v.f})
def move2board(q): return Vec(q.D, {key: q[key]/q['y3'] for key in q.D}) # Task 5.12.2: Define the domain D = R × C. # Write a procedure make equations(x1, x2, w1, w2) that outputs a list [u, v] consisting # of two D-vectors u and v such that Equations 5.11 and 5.12 are expressed as # u·h=0 v·h=0 # where h is the D-vector of unknown entries of H. R = {'y1', 'y2', 'y3'}
def aug_orthogonalize(vlist): vstarlist = [] sigma_vecs = [] D = set(range(len(vlist))) for v in vlist: (vstar, sigmadict) = aug_project_orthogonal(v, vstarlist) vstarlist.append(vstar) sigma_vecs.append(Vec(D, sigmadict)) return vstarlist, sigma_vecs
def mat2coldict(A): """Given a matrix, return a dictionary mapping column labels of A to columns of A e.g.: >>> M = Mat(({0, 1, 2}, {0, 1}), {(0, 1): 1, (2, 0): 8, (1, 0): 4, (0, 0): 3, (2, 1): -2}) >>> mat2coldict(M) {0: Vec({0, 1, 2},{0: 3, 1: 4, 2: 8}), 1: Vec({0, 1, 2},{0: 1, 1: 0, 2: -2})} >>> mat2coldict(Mat(({0,1},{0,1}),{})) {0: Vec({0, 1},{0: 0, 1: 0}), 1: Vec({0, 1},{0: 0, 1: 0})} """ return { col: Vec(A.D[0], {row: A[row, col] for row in A.D[0]}) for col in A.D[1] }
def mat2rowdict(A): """Given a matrix, return a dictionary mapping row labels of A to rows of A e.g.: >>> M = Mat(({0, 1, 2}, {0, 1}), {(0, 1): 1, (2, 0): 8, (1, 0): 4, (0, 0): 3, (2, 1): -2}) >>> mat2rowdict(M) {0: Vec({0, 1},{0: 3, 1: 1}), 1: Vec({0, 1},{0: 4, 1: 0}), 2: Vec({0, 1},{0: 8, 1: -2})} >>> mat2rowdict(Mat(({0,1},{0,1}),{})) {0: Vec({0, 1},{0: 0, 1: 0}), 1: Vec({0, 1},{0: 0, 1: 0})} """ return { row: Vec(A.D[1], {col: A[row, col] for col in A.D[1]}) for row in A.D[0] }
def signum(u): return Vec(u.D, {key: 1 if u[key] >= 0 else -1 for key in u.f})
from VectorClass import Vec from cancer_data import read_training_data from matutil import mat2rowdict, listlist2mat from vectorTasks import zero_vec, list2vec from orthogonality import QR_solve (A, b) = read_training_data('train.data') (C, d) = read_training_data('validate.data') w = Vec(A.D[1], {key: 0 for key in A.D[1]}) def signum(u): return Vec(u.D, {key: 1 if u[key] >= 0 else -1 for key in u.f}) # the procedure fraction wrong(A, b, w) with the following spec: # • input: An R×C matrix A whose rows are feature vectors, an R-vector b whose entries are +1 and −1, and a C-vector w # • output: The fraction of of row labels r of A such that the sign of (row r of A)·w differs from that of b[r]. def fraction_wrong(A, b, w): outputVec = signum(A * w) return len([key for key in outputVec.D if outputVec[key] != b[key]]) / len( A.D[0]) # a procedure loss(A, b, w) that takes as input the training data A, b and a hypothesis vector w, # and returns the value L(w) of the loss function for input w. def loss(A, b, w): print(((A * w) - b) * ((A * w) - b)) def find_grad(A, b, w):
from VectorClass import Vec from vectorTasks import zero_vec D = {'e1', 'e2', 'e3', 'e4'} vecList = [ Vec(D, {'e1': 1, 'e2': 4, 'e3': 5}), Vec(D, {'e1': 2, 'e2': 0, 'e3': 5, 'e4': 6}), Vec(D, {'e1': 1, 'e2': 0, 'e3': 8, 'e4': 0}) ] vecDict = {1: vecList[0], 2: vecList[1], 4: vecList[2]} # Quiz 3.1.7: # A procedure lin_comb(vlist, clist) with the following spec: # • input: a list vlist of vectors, a list clist of the same length consisting of scalars # • output: the vector that is the linear combination of the vectors in vlist with corresponding coefficients clist def list_comb(vlist, clist): return sum(clist[iter] * vlist[iter] for iter in range(len(vlist))) # Problem 3.8.1: # A procedure vec_select using a comprehension for the following computational problem: # • input: a list veclist of vectors over the same domain, and an element k of the domain # • output: the sublist of veclist consisting of the vectors v in veclist where v[k] is zero def vec_select(veclist, k): return [v for v in veclist if v[k] == 0] # A procedure vec_sum using the built-in procedure sum(·) for the following: # • input: a list veclist of vectors, and a set D that is the common domain of these vectors # • output: the vector sum of the vectors in veclist. def vec_sum(veclist, D) : return sum(veclist, zero_vec(D))
def make_equations(x1, x2, w1, w2): return[ Vec(D, {('y3', 'x1'): w1 * x1, ('y3', 'x2'): w1 * x2, ('y3', 'x3'): w1, ('y1', 'x1'): -x1, ('y1', 'x2'): -x2, ('y1', 'x3'): -1}), Vec(D, {('y3', 'x1'): w2 * x1, ('y3', 'x2'): w2 * x2, ('y3', 'x3'): w2, ('y2', 'x1'): -x1, ('y2', 'x2'): -x2, ('y1', 'x3'): -1}) ]
C = {'x1', 'x2', 'x3'} D = {(y, x) for y in R for x in C } def make_equations(x1, x2, w1, w2): return[ Vec(D, {('y3', 'x1'): w1 * x1, ('y3', 'x2'): w1 * x2, ('y3', 'x3'): w1, ('y1', 'x1'): -x1, ('y1', 'x2'): -x2, ('y1', 'x3'): -1}), Vec(D, {('y3', 'x1'): w2 * x1, ('y3', 'x2'): w2 * x2, ('y3', 'x3'): w2, ('y2', 'x1'): -x1, ('y2', 'x2'): -x2, ('y1', 'x3'): -1}) ] # Task 5.12.3: Write the D-vector w with a 1 in the (’y1’, ’x1’) entry. topLeft = make_equations(358, 36, 0, 0) topRight = make_equations(329, 597, 1, 0) bottomLeft = make_equations(592, 157, 0, 1) bottomRight = make_equations(580, 483, 1, 1) w = Vec(D, {('y1', 'x1'): 1}) e = {0, 1, 2, 3, 4, 5, 6, 7, 8} rowDict = { 0: topLeft[0], 1: topLeft[1], 2: topRight[0], 3: topRight[1], 4: bottomLeft[0], 5:bottomLeft[1], 6: bottomRight[0], 7: bottomRight[1], 8: w } # print(rowDict) # L1Matrix = rowdict2mat(rowDict)