def hstack(list_of_matrix): # from copy import deepcopy # list_of_matrix = deepcopy(list_of_matrix) assert (type(list_of_matrix) == list and len(list_of_matrix) > 0) high = shape(list_of_matrix[0])[0] stacking_length = [] # add @2018-04-11 for i in range(len(list_of_matrix)): if dim(list_of_matrix[i]) == 1: list_of_matrix[i] = [[x] for x in list_of_matrix[i]] for i in range(len(list_of_matrix)): assert (dim(list_of_matrix[i]) == 2) assert (shape(list_of_matrix[i])[0] == high) stacking_length.append(shape(list_of_matrix[i])[1]) R = zeros(high, sum(stacking_length)) for i in range(len(list_of_matrix)): m, n = shape(list_of_matrix[i]) start = sum(stacking_length[:i]) # element wise copy for j in range(m): for k in range(n): R[j][k + start] = list_of_matrix[i][j][k] return R
def train_cv(self, X, y, shuffle=False, cv='full'): assert (type(cv) == int or cv == 'full') assert (dim(X) == 2 and dim(y) == 2) self.shape_Y = shape(y) for i in range(shape(y)[1]): max_score = None best_clf = None best_keep = None y_ = fancy(y, -1, i) for _ in range(self.max_iter): clf = self.estimator(**(self.parameter)) X_, keep = self._rand_X(X) clf.fit(X_, y_) score = cross_val_score(clf, X, y, return_mean=True, cv=cv, shuffle=shuffle) if not max_score or max_score < score: max_score = score best_clf = clf best_keep = keep self.keeps.append(best_keep) self.clfs.append(best_clf)
def _check(self, X, y): assert ((dim(X) == 2 and dim(y) == 2) or (dim(X) == 2 and dim(y) == 1)) assert (shape(X)[0] == shape(y)[0]) self.dim_Y = dim(y) if self.dim_Y == 1: y = [[k] for k in y] return X, y
def fit(self, X, y): self._check(X, y) if dim(y) == 1: raw_X = X if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum( minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) # # add whatch # self.beta = beta # self._whatch(raw_X,y) if self.fit_intercept: self.intercept_ = beta[0] self.coef_ = beta[1:] else: self.coef_ = beta self.beta = beta return self elif dim(y) == 2: if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) y_t = matrix_transpose(y) betas = [] for i in range(shape(y)[1]): betas.append(self._fit(X, y_t[i])) batas = matrix_transpose(betas) self.betas = batas
def l2_loss(y, y_, return_losses=False): assert (dim(y) <= 2 and dim(y_) <= 2) def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) return numerator if dim(y) == 1: return _score_calc(y, y_) else: losses = [_score_calc(y[i], y_[i]) for i in range(len(y))] if return_losses: return losses else: return mean(losses)
def _fit(self, X, y): self._check(X, y) assert (dim(y) == 1) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot(X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): print(_) start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( X, beta[1:]))) / (shape(X)[0]) return beta
def corrcoef(A): assert (dim(A) == 2) m, n = shape(A) def _corr(A, i, j): assert (dim(A) == 2) m, n = shape(A) A_T = matrix_transpose(A) X, Y = A_T[i], A_T[j] # X,Y = col(A,i),col(A,j) mean_X, mean_Y = mean(X), mean(Y) X_ = [k - mean_X for k in X] Y_ = [k - mean_Y for k in Y] numerator = mean(multiply(X_, Y_)) # print(sqrt(mean(square(X_)))) denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_))) if denominator == 0: return 0 else: r = (numerator) / (denominator) return r R = zeros((n, n)) for i in range(n): for j in range(n): if i == j: R[i][j] = 1 elif i > j: R[i][j] = R[j][i] else: R[i][j] = _corr(A, i, j) return R
def predict(self, X): assert (dim(X) == 2) result = [] for i in range(self.shape_Y[1]): X_ = self._get_keep_X(X, self.keeps[i]) result.append(self.clfs[i].predict(X_)) return matrix_transpose(result)
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index) ys = [] for i in index: ys.append(self.y[i]) k_loss_raw = sorted(loss)[:self.k] k_loss = [1 / l if l != 0 else 0 for l in k_loss_raw] k_loss_sum = sum(k_loss) weights = [ l / float(k_loss_sum) if k_loss_sum != 0 else 1 for l in k_loss ] weight_m = diag(weights) ys = matrix_matmul(weight_m, ys) result.append(sum(ys, axis=0)) if len(self.shape_Y) == 1: result = matrix_transpose(result)[0] return result
def matrix_inverse(A): assert (dim(A) == 2) N = shape(A)[0] L = identity_matrix(N) R = identity_matrix(N) def _row_assign(A, dest, source, factor): assert (dim(A) == 2) A[dest] = [ factor * A[source][i] + A[dest][i] for i in range(len(A[source])) ] def _row_switch(A, dest, source): assert (dim(A) == 2) t = A[dest] A[dest] = A[source] A[source] = t def _col_switch(A, dest, source): assert (dim(A) == 2) m, n = shape(A) for i in range(m): t = A[i][dest] A[i][dest] = A[i][source] A[i][source] = t #down triangle for j in range(N): for i in range(N): # select biggest element if i == j: max_k = i max_w = j for k in range(i, N): for w in range(j, N): if A[k][w] > A[max_k][max_w]: max_k, max_w = k, w _row_switch(A, i, max_k) _row_switch(L, i, max_k) _col_switch(A, j, max_w) _col_switch(R, j, max_w) if i > j: if A[j][j] == 0: raise Exception fa = -A[i][j] / A[j][j] _row_assign(A, i, j, fa) _row_assign(L, i, j, fa) #upper triangle for j in range(N)[::-1]: for i in range(N)[::-1]: if i < j: if A[j][j] == 0: raise Exception fa = -A[i][j] / A[j][j] _row_assign(A, i, j, fa) _row_assign(L, i, j, fa) for i in range(len(L)): L[i] = [x / A[i][i] for x in L[i]] return matrix_matmul(R, L)
def predict(self,X): if dim(X) == 1: return [0 for _ in X] R = [[0 for _ in range(shape(X)[1])]] for i in range(shape(X)[0]-1): R.append(X[i]) return R
def _col_switch(A, dest, source): assert (dim(A) == 2) m, n = shape(A) for i in range(m): t = A[i][dest] A[i][dest] = A[i][source] A[i][source] = t
def matrix_copy(A): assert (dim(A) == 2) m, n = shape(A) R = zeros((m, n)) for i in range(m): for j in range(n): R[i][j] = A[i][j] return R
def fit(self, X, y): self.X = X if dim(y) == 1: self.y = [[k] for k in y] else: self.y = y self.shape_X = shape(X) self.shape_Y = shape(y)
def stdev(X, axis=0): assert (dim(X) == 2) assert (axis == 0) X_T = matrix_transpose(X) m = mean(X, axis=0) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def matrix_matmul(A, B): assert (dim(A) == 2 and dim(B) == 2 and shape(A)[1] == shape(B)[0]) def __sub_product(A, i, B, j): N = len(A[i]) partial_sum = 0 for k in range(N): partial_sum += A[i][k] * B[k][j] return partial_sum m = shape(A)[0] n = shape(B)[1] R = [] for i in range(m): r = [] for j in range(n): r.append(__sub_product(A, i, B, j)) R.append(r) return R
def official_score(y, y_, return_scores=False): assert (dim(y) <= 2 and dim(y_) <= 2) def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_))) if denominator == 0: return 0 else: return 1 - (numerator / float(denominator)) if dim(y) == 1: return _score_calc(y, y_) else: scores = [_score_calc(y[i], y_[i]) for i in range(len(y))] if return_scores: return scores else: return mean(scores)
def train(self, X, y, X_val, Y_val): assert (dim(X) == 2 and dim(y) == 2) self.shape_Y = shape(y) for i in range(shape(y)[1]): max_score = None best_clf = None best_keep = None y_ = fancy(y, -1, i) for _ in range(self.max_iter): clf = self.estimator(**(self.parameter)) X_, keep = self._rand_X(X) clf.fit(X_, y_) score = clf.score(self._get_keep_X(X_val, keep), fancy(Y_val, -1, i)) if not max_score or max_score < score: max_score = score best_clf = clf best_keep = keep self.keeps.append(best_keep) self.clfs.append(best_clf)
def fit(self,X,y): assert(dim(X)==2) assert(dim(y)==1 or dim(y)==2) self.shape_X = shape(X) self.shape_Y = shape(y) if dim(y) == 1: y = [[k] for k in y] best_w = None min_err = None for i in range(self.max_iter): W = self.random_w((shape(X)[1],shape(y)[1])) y_ = matrix_matmul(X,W) err = mean(sqrt(mean(square(minus(y,y_)),axis=1))) if not best_w or min_err>err: best_w = W min_err = err print(err) self.W = best_w
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def _corr(A, i, j): assert (dim(A) == 2) m, n = shape(A) A_T = matrix_transpose(A) X, Y = A_T[i], A_T[j] # X,Y = col(A,i),col(A,j) mean_X, mean_Y = mean(X), mean(Y) X_ = [k - mean_X for k in X] Y_ = [k - mean_Y for k in Y] numerator = mean(multiply(X_, Y_)) # print(sqrt(mean(square(X_)))) denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_))) if denominator == 0: return 0 else: r = (numerator) / (denominator) return r
def matrix_transpose(A): assert (dim(A) == 2) # m = shape(A)[0] # n = shape(A)[1] # # R = zeros((n,m)) # R = [[0 for _ in range(m)]]*n # # result = [] # # for j in range(n): # # r = [] # # for i in range(m): # # print(j,len(A[i])) # # r.append(A[i][j]) # # result.append(r) # for i in range(m): # for j in range(n): # R[j][i] = A[i][j] # return R from copy import deepcopy B = deepcopy(A) result = [list(i) for i in zip(*B)] return result
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) from preprocessing import standard_scaling new_X = standard_scaling(self.X, axis=0) x = sqrt(square(minus(x, mean(x)))) loss = minus(loss, multiply(dot(new_X, x), self.alpha)) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def shift(A, shift_step, fill=None): assert (dim(A) == 2) R = zeros(shape(A)) for i in range(shape(A)[0]): for j in range(shape(A)[1]): if shift_step >= 0: if i >= shift_step: R[i][j] = A[i - shift_step][j] else: if type(fill) == list: R[i][j] = fill[j] else: R[i][j] = fill else: if (i - shift_step) < shape(A)[0]: R[i][j] = A[i - shift_step][j] else: if type(fill) == list: R[i][j] = fill[j] else: R[i][j] = fill return R
import sys sys.path.append('..') from linalg.common import dim print(dim([[1.5]])) print(dim([[1.5], [1.5]])) print(dim([[[1.5]]]))
def _row_switch(A, dest, source): assert (dim(A) == 2) t = A[dest] A[dest] = A[source] A[source] = t
def _row_assign(A, dest, source, factor): assert (dim(A) == 2) A[dest] = [ factor * A[source][i] + A[dest][i] for i in range(len(A[source])) ]
def diag(A): assert (dim(A) == 1) R = zeros((shape(A)[0], shape(A)[0])) for i in range(len(A)): R[i][i] = A[i] return R