def predict(self,X): if dim(X) == 1: return [0 for _ in X] R = [[0 for _ in range(shape(X)[1])]] for i in range(shape(X)[0]-1): R.append(X[i]) return R
def hstack(list_of_matrix): # from copy import deepcopy # list_of_matrix = deepcopy(list_of_matrix) assert (type(list_of_matrix) == list and len(list_of_matrix) > 0) high = shape(list_of_matrix[0])[0] stacking_length = [] # add @2018-04-11 for i in range(len(list_of_matrix)): if dim(list_of_matrix[i]) == 1: list_of_matrix[i] = [[x] for x in list_of_matrix[i]] for i in range(len(list_of_matrix)): assert (dim(list_of_matrix[i]) == 2) assert (shape(list_of_matrix[i])[0] == high) stacking_length.append(shape(list_of_matrix[i])[1]) R = zeros(high, sum(stacking_length)) for i in range(len(list_of_matrix)): m, n = shape(list_of_matrix[i]) start = sum(stacking_length[:i]) # element wise copy for j in range(m): for k in range(n): R[j][k + start] = list_of_matrix[i][j][k] return R
def fit(self, X, y, weights=None): X, y = self._check(X, y) if self.fit_intercept: m, n = shape(X) bias = ones(m, 1) X = hstack([bias, X]) eye = identity_matrix(shape(X)[1]) from linalg.matrix import diag if not self.penalty_bias: eye[0][0] = 0 # add weights if weights != None: assert (len(weights) == shape(X)[0]) X = matrix_matmul(diag(weights), X) X_T = matrix_transpose(X) self.W = matrix_matmul( matrix_matmul( matrix_inverse( plus(matrix_matmul(X_T, X), multiply(eye, self.alpha * shape(X)[0])) # plus(matrix_matmul(X_T,X),multiply(eye,self.alpha)) ), X_T), y) self.importance_ = sum(self.W, axis=1) if self.fit_intercept: self.importance_ = self.importance_[1:]
def random_w(self,s): assert(len(s)==2) R = zeros(s) for i in range(shape(R)[0]): for j in range(shape(R)[1]): R[i][j] = random.random() return R
def train_cv(self, X, y, shuffle=False, cv='full'): assert (type(cv) == int or cv == 'full') assert (dim(X) == 2 and dim(y) == 2) self.shape_Y = shape(y) for i in range(shape(y)[1]): max_score = None best_clf = None best_keep = None y_ = fancy(y, -1, i) for _ in range(self.max_iter): clf = self.estimator(**(self.parameter)) X_, keep = self._rand_X(X) clf.fit(X_, y_) score = cross_val_score(clf, X, y, return_mean=True, cv=cv, shuffle=shuffle) if not max_score or max_score < score: max_score = score best_clf = clf best_keep = keep self.keeps.append(best_keep) self.clfs.append(best_clf)
def _fit(self, X, y): self._check(X, y) assert (dim(y) == 1) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot(X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): print(_) start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( X, beta[1:]))) / (shape(X)[0]) return beta
def _check(self, X, y): assert ((dim(X) == 2 and dim(y) == 2) or (dim(X) == 2 and dim(y) == 1)) assert (shape(X)[0] == shape(y)[0]) self.dim_Y = dim(y) if self.dim_Y == 1: y = [[k] for k in y] return X, y
def normalize(X, y=None, norm='l2', axis=1, return_norm=False, return_norm_inv=False): assert (axis == 0 or axis == 1) assert (norm == 'l2' or norm == 'l1') X_T = matrix_transpose(X) y_norm = None if y != None: if norm == 'l2': y_norm = sqrt(sum(square(y))) elif norm == 'l1': y_norm = sqrt(sum(abs(y))) if y and y_norm == 0: return X norms = [] if axis == 0: A = matrix_copy(X) for i in range(shape(X)[0]): n = 0 if norm == 'l2': n = sqrt(sum(square( X_T[i]))) if not y else sqrt(sum(square(X_T[i]))) / y_norm elif norm == 'l1': n = sqrt(sum(abs( X_T[i]))) if not y else sqrt(sum(square(X_T[i]))) / y_norm if n != 0: A[i] = (multiply(X[i], 1 / float(n))) norms.append(n) elif axis == 1: A = matrix_transpose(X) for j in range(shape(X)[1]): n = 0 if norm == 'l2': n = sum(square( X_T[j])) if not y else sqrt(sum(square(X_T[j]))) / y_norm elif norm == 'l1': n = sum(abs( X_T[j])) if not y else sqrt(sum(square(X_T[j]))) / y_norm if n != 0: A[j] = (multiply(X_T[j], 1 / float(n))) norms.append(n) A = matrix_transpose(A) norms_inv = [0 if x == 0 else 1 / float(x) for x in norms] if return_norm and return_norm_inv: return A, norms, norms_inv elif return_norm: return A, norms elif return_norm_inv: return A, norms_inv else: return A
def fit(self, X, y): self.X = X if dim(y) == 1: self.y = [[k] for k in y] else: self.y = y self.shape_X = shape(X) self.shape_Y = shape(y)
def exponential_smoothing(A, axis=0, alpha=0.1): assert (axis == 0) R = [] C = zeros(shape(A)[1]) for i in range(shape(A)[0]): P = multiply(A[i], (1 - alpha)) Q = multiply(C, alpha) C = plus(P, Q) R.append(C) return R
def outlier_handling(sample, method='mean', max_sigma=3): assert (method == 'mean' or method == 'dynamic') std_ = stdev(sample) mean_ = mean(sample, axis=0) for i in range(shape(sample)[0]): for j in range(shape(sample)[1]): if sample[i][j] - mean_[j] > max_sigma * std_[j]: if method == 'mean': sample[i][j] = mean_[j] elif method == 'dynamic': if i < len(sample) / 2.0: sample[i][j] = (mean_[j] + sample[i][j]) / 2.0 return sample
def matrix_inverse(A): assert (dim(A) == 2) N = shape(A)[0] L = identity_matrix(N) R = identity_matrix(N) def _row_assign(A, dest, source, factor): assert (dim(A) == 2) A[dest] = [ factor * A[source][i] + A[dest][i] for i in range(len(A[source])) ] def _row_switch(A, dest, source): assert (dim(A) == 2) t = A[dest] A[dest] = A[source] A[source] = t def _col_switch(A, dest, source): assert (dim(A) == 2) m, n = shape(A) for i in range(m): t = A[i][dest] A[i][dest] = A[i][source] A[i][source] = t #down triangle for j in range(N): for i in range(N): # select biggest element if i == j: max_k = i max_w = j for k in range(i, N): for w in range(j, N): if A[k][w] > A[max_k][max_w]: max_k, max_w = k, w _row_switch(A, i, max_k) _row_switch(L, i, max_k) _col_switch(A, j, max_w) _col_switch(R, j, max_w) if i > j: if A[j][j] == 0: raise Exception fa = -A[i][j] / A[j][j] _row_assign(A, i, j, fa) _row_assign(L, i, j, fa) #upper triangle for j in range(N)[::-1]: for i in range(N)[::-1]: if i < j: if A[j][j] == 0: raise Exception fa = -A[i][j] / A[j][j] _row_assign(A, i, j, fa) _row_assign(L, i, j, fa) for i in range(len(L)): L[i] = [x / A[i][i] for x in L[i]] return matrix_matmul(R, L)
def outlier_handling(sample,method='mean',max_sigma=3): assert(method=='mean' or method=='zero' or method=='dynamic') sample = matrix_copy(sample) std_ = stdev(sample) mean_ = mean(sample,axis=1) for i in range(shape(sample)[0]): for j in range(shape(sample)[1]): if sample[i][j]-mean_[j] >max_sigma*std_[j]: if method=='mean': sample[i][j] = mean_[j] elif method=='zero': sample[i][j] = 0 elif method=='dynamic': sample[i][j] = (sample[i][j] + mean_[j])/2.0 return sample
def _col_switch(A, dest, source): assert (dim(A) == 2) m, n = shape(A) for i in range(m): t = A[i][dest] A[i][dest] = A[i][source] A[i][source] = t
def corrcoef(A): assert (dim(A) == 2) m, n = shape(A) def _corr(A, i, j): assert (dim(A) == 2) m, n = shape(A) A_T = matrix_transpose(A) X, Y = A_T[i], A_T[j] # X,Y = col(A,i),col(A,j) mean_X, mean_Y = mean(X), mean(Y) X_ = [k - mean_X for k in X] Y_ = [k - mean_Y for k in Y] numerator = mean(multiply(X_, Y_)) # print(sqrt(mean(square(X_)))) denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_))) if denominator == 0: return 0 else: r = (numerator) / (denominator) return r R = zeros((n, n)) for i in range(n): for j in range(n): if i == j: R[i][j] = 1 elif i > j: R[i][j] = R[j][i] else: R[i][j] = _corr(A, i, j) return R
def predict(self, X): assert (self.beta != None or self.betas != None) if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) if self.beta != None: return dot(X, self.beta) else: return matrix_matmul(X, self.betas)
def matrix_copy(A): assert (dim(A) == 2) m, n = shape(A) R = zeros((m, n)) for i in range(m): for j in range(n): R[i][j] = A[i][j] return R
def stdev(X): # X = matrix_copy(X) X_T = matrix_transpose(X) m = mean(X, axis=1) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def get_feature_grid(sample,i,fill_na='mean',max_na_rate=1,col_count=None,with_test=True): assert(fill_na=='mean' or fill_na=='zero') col = fancy(sample,None,i) R = [] for j in range(len(col)): left = [None for _ in range(len(col)-j)] right = col[:j] r = [] r.extend(left) r.extend(right) R.append(r) def _mean_with_none(A): if len(A)==0: return 0 else: count = 0 for i in range(len(A)): if A[i]!=None: count+=A[i] return count/float(len(A)) means = [] for j in range(shape(R)[1]): means.append(_mean_with_none(fancy(R,None,j))) width = int((1-max_na_rate) * shape(R)[1]) R = fancy(R,None,(width,)) for _ in range(shape(R)[0]): for j in range(shape(R)[1]): if R[_][j]==None: if fill_na=='mean': R[_][j] = means[j] elif fill_na=='zero': R[_][j]=0 if with_test: if col_count!=None: return fancy(R,None,(-col_count,)) else: return R else: if col_count!=None: return fancy(R,(0,-1),(-col_count,)) else: return R[:-1]
def stdev(X, axis=0): assert (dim(X) == 2) assert (axis == 0) X_T = matrix_transpose(X) m = mean(X, axis=0) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def predict(self, X): assert (self.W != None) if self.fit_intercept: m, n = shape(X) bias = ones(m, 1) X = hstack([bias, X]) result = matrix_matmul(X, self.W) if self.dim_Y == 1: result = [x[0] for x in result] return result
def _rand_X(self, X): N = shape(X)[1] keep_length = math.ceil((1 - self.drop_out) * N) keep_set = set() while len(keep_set) != keep_length: i = random.randrange(N) if i not in keep_set: keep_set.add(i) keep = [True if i in keep_set else False for i in range(N)] X_ = fancy(X, -1, keep) return X_, keep
def fit(self, X, y): X, y = self._check(X, y) if self.fit_intercept: m, n = shape(X) bias = ones(m, 1) X = hstack([bias, X]) X_T = matrix_transpose(X) # print matrix_matmul(X_T,X) self.W = matrix_matmul( matrix_matmul(matrix_inverse(matrix_matmul(X_T, X)), X_T), y)
def resampling(ecs_logs, flavors_unique, training_start_time, predict_start_time, frequency=7, strike=1, skip=0): # checked def __get_flavors_unique_mapping(flavors_unique): mapping_index = {}.fromkeys(flavors_unique) c = 0 for f in flavors_unique: mapping_index[f] = c c += 1 return mapping_index predict_start_time = predict_start_time - timedelta(days=skip) days_total = (predict_start_time - training_start_time).days sample_length = ((days_total - frequency) / strike) + 1 mapping_index = __get_flavors_unique_mapping(flavors_unique) sample = zeros((sample_length, len(flavors_unique))) last_time = [None for i in range(len(flavors_unique))] for i in range(sample_length): for f, ecs_time in ecs_logs: # 0 - 6 for example # fix serious bug @ 2018-04-11 if (predict_start_time - ecs_time).days >= (i) * strike and ( predict_start_time - ecs_time).days < (i) * strike + frequency: if last_time[mapping_index[f]] == None: sample[i][mapping_index[f]] += 1 last_time[mapping_index[f]] = ecs_time else: if (ecs_time - last_time[mapping_index[f]]).seconds < 10: sample[i][mapping_index[f]] += 1 continue else: sample[i][mapping_index[f]] += 1 last_time[mapping_index[f]] = ecs_time # ----------------------------# sample = sample[::-1] # [ old data ] # [ ... ] # [ ... ] # [ new_data ] # ----------------------------# assert (shape(sample) == (sample_length, len(flavors_unique))) return sample
def matrix_matmul(A, B): assert (dim(A) == 2 and dim(B) == 2 and shape(A)[1] == shape(B)[0]) def __sub_product(A, i, B, j): N = len(A[i]) partial_sum = 0 for k in range(N): partial_sum += A[i][k] * B[k][j] return partial_sum m = shape(A)[0] n = shape(B)[1] R = [] for i in range(m): r = [] for j in range(n): r.append(__sub_product(A, i, B, j)) R.append(r) return R
def train_test_split(X, y, test_size=0.2, random_state=None, align=None): assert (shape(X)[0] == shape(y)[0]) N = shape(X)[0] if test_size >= 1: test_length = test_size else: test_length = round(N * test_size) if test_length == 0: test_length = 1 if random_state != None: random.seed(random_state) taining_length = N - test_length assert (align == None or align == 'left' or align == 'right') if align == 'right': return X[:taining_length], X[taining_length:], y[:taining_length], y[ taining_length:] elif align == 'left': X[:test_length], X[test_length:], y[:test_length], y[test_length:] test_set = set() while len(test_set) != test_length: i = random.randrange(N) if i not in test_set: test_set.add(i) X_train, X_test, Y_train, Y_test = [], [], [], [] for i in range(N): if i not in test_set: X_train.append(X[i]) Y_train.append(y[i]) else: X_test.append(X[i]) Y_test.append(y[i]) return X_train, X_test, Y_train, Y_test
def fit(self, X, y): self._check(X, y) if dim(y) == 1: raw_X = X if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum( minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) # # add whatch # self.beta = beta # self._whatch(raw_X,y) if self.fit_intercept: self.intercept_ = beta[0] self.coef_ = beta[1:] else: self.coef_ = beta self.beta = beta return self elif dim(y) == 2: if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) y_t = matrix_transpose(y) betas = [] for i in range(shape(y)[1]): betas.append(self._fit(X, y_t[i])) batas = matrix_transpose(betas) self.betas = batas
def flavor_clustering(sample,k=3,variance_threshold=None): corrcoef_sample = corrcoef(sample) clustering_paths = [] for i in range(shape(sample)[1]): col = corrcoef_sample[i] col_index_sorted = argsort(col)[::-1] if variance_threshold!=None: col_index_sorted = col_index_sorted[1:] index = [i for i in col_index_sorted if col[i]>variance_threshold] else: index = col_index_sorted[1:k+1] clustering_paths.append(index) return clustering_paths,corrcoef_sample
def minmax_scaling(X, axis=1): assert (axis == 1) R = [] for j in range(shape(X)[1]): col = fancy(X, None, j) max_ = max(col) min_ = min(col) mean_ = mean(col) if max_ - min_ == 0: R.append(col) else: R.append([(x - mean_) / (max_ - min_) for x in col]) return matrix_transpose(R)
def shift(A, shift_step, fill=None): assert (dim(A) == 2) R = zeros(shape(A)) for i in range(shape(A)[0]): for j in range(shape(A)[1]): if shift_step >= 0: if i >= shift_step: R[i][j] = A[i - shift_step][j] else: if type(fill) == list: R[i][j] = fill[j] else: R[i][j] = fill else: if (i - shift_step) < shape(A)[0]: R[i][j] = A[i - shift_step][j] else: if type(fill) == list: R[i][j] = fill[j] else: R[i][j] = fill return R