def read_training_data(): return cancer_data.read_training_data('inner_product/train.data')
v.D = features | set(feature_vectors.keys()) | {'gamma'} #print(main[919555]) #print(next(iter(main.values()))) # print(main[-919555]) return matutil.rowdict2mat(main) def make_b(ids): return Vec(ids, {k: 1 if k > 0 else 0 for k in ids}) def make_c(features, ids): return Vec(features | ids | {'gamma'}, {k: 1 for k in ids}) F, d = read_training_data('train.data') # print(A.D[0]) features = {'texture(worst)', 'area(worst)'} A = make_matrix(matutil.mat2rowdict(F), d, features) b = make_b(A.D[0]) c = make_c(features, F.D[0]) R_square = F.D[0].copy() n = len(A.D[1]) it = iter(F.D[0]) while len(R_square) < n: R_square.add(-next(it)) print(A.D[1]) find_vertex(A, b, R_square) print(R_square) print('vertex found')
def test_classifier(fname, w, gamma): patient_features, diagnosis = read_training_data(fname, FEATUREs) return sum(1 if C(features, w, gamma) == diagnoses[lbl] else 0 for lbl, features in mat2rowdict(patient_features))
from cancer_data import read_training_data from matutil import rowdict2mat, mat2rowdict import simplex from vec import Vec from mat import Mat FEATUREs = {"area(worst)", "smoothness(worst)", "texture(mean)"} ### read training data print("== Read Training Data ==") (patient_features, diagnoses) = read_training_data("train.data", FEATUREs) IDs = patient_features.D[0] #### Set matrix A ### set A labels A_COLS = FEATUREs.union(IDs) A_COLS = A_COLS.union({"gamma"}) A_ROWS = IDs.union({-i for i in IDs}) ### set A content ## Task 13.13.1 def main_constraint(i, pf_i, d_i, features): v = Vec(A_COLS, {f: d_i * pf_i[f] for f in features}) v["gamma"] = -1 * d_i v[i] = 1 return v ## Task 13.13.2 def make_matrix(feature_vectors, diagnoses, features):
from VectorClass import Vec from cancer_data import read_training_data from matutil import mat2rowdict, listlist2mat from vectorTasks import zero_vec, list2vec from orthogonality import QR_solve (A, b) = read_training_data('train.data') (C, d) = read_training_data('validate.data') w = Vec(A.D[1], {key: 0 for key in A.D[1]}) def signum(u): return Vec(u.D, {key: 1 if u[key] >= 0 else -1 for key in u.f}) # the procedure fraction wrong(A, b, w) with the following spec: # • input: An R×C matrix A whose rows are feature vectors, an R-vector b whose entries are +1 and −1, and a C-vector w # • output: The fraction of of row labels r of A such that the sign of (row r of A)·w differs from that of b[r]. def fraction_wrong(A, b, w): outputVec = signum(A * w) return len([key for key in outputVec.D if outputVec[key] != b[key]]) / len( A.D[0]) # a procedure loss(A, b, w) that takes as input the training data A, b and a hypothesis vector w, # and returns the value L(w) of the loss function for input w. def loss(A, b, w): print(((A * w) - b) * ((A * w) - b)) def find_grad(A, b, w):
for i in range(T): w = gradient_descent_step(A, b, w, sigma) # if i % 30 == 0: # print_statistics(A, b, w) # elapsed_time = time.time() - start_time #print("Elapsed time running ", T, " iterations: ", elapsed_time) return w def print_statistics(A, b, w): print("Loss function: ", loss(A, b, w)) print("Percent wrong: ", fraction_wrong(A, b, w)) if __name__ == '__main__': A_train, b_train = read_training_data('train.data') w_0 = Vec(A_train.D[1], {col: 0 for col in A_train.D[1]}) w_1 = Vec(A_train.D[1], {col: 1 for col in A_train.D[1]}) step_a = 10**(-9) step_b = 2 * 10**(-9) hyp_0a = gradient_descent(A_train, b_train, w_0, step_a, 1000) print("Statistics for initial weights 0, step size 10^-9:") print_statistics(A_train, b_train, hyp_0a) hyp_1a = gradient_descent(A_train, b_train, w_1, step_a, 1000) print("Statistics for initial weights 1, step size 10^-9:") print_statistics(A_train, b_train, hyp_0a) hyp_0b = gradient_descent(A_train, b_train, w_0, step_b, 1000) print("Statistics for initial weights 0, step size 2*10^-9:")