def corrcoef(A): assert (dim(A) == 2) m, n = shape(A) def _corr(A, i, j): assert (dim(A) == 2) m, n = shape(A) A_T = matrix_transpose(A) X, Y = A_T[i], A_T[j] # X,Y = col(A,i),col(A,j) mean_X, mean_Y = mean(X), mean(Y) X_ = [k - mean_X for k in X] Y_ = [k - mean_Y for k in Y] numerator = mean(multiply(X_, Y_)) # print(sqrt(mean(square(X_)))) denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_))) if denominator == 0: return 0 else: r = (numerator) / (denominator) return r R = zeros((n, n)) for i in range(n): for j in range(n): if i == j: R[i][j] = 1 elif i > j: R[i][j] = R[j][i] else: R[i][j] = _corr(A, i, j) return R
def hstack(list_of_matrix): # from copy import deepcopy # list_of_matrix = deepcopy(list_of_matrix) assert (type(list_of_matrix) == list and len(list_of_matrix) > 0) high = shape(list_of_matrix[0])[0] stacking_length = [] # add @2018-04-11 for i in range(len(list_of_matrix)): if dim(list_of_matrix[i]) == 1: list_of_matrix[i] = [[x] for x in list_of_matrix[i]] for i in range(len(list_of_matrix)): assert (dim(list_of_matrix[i]) == 2) assert (shape(list_of_matrix[i])[0] == high) stacking_length.append(shape(list_of_matrix[i])[1]) R = zeros(high, sum(stacking_length)) for i in range(len(list_of_matrix)): m, n = shape(list_of_matrix[i]) start = sum(stacking_length[:i]) # element wise copy for j in range(m): for k in range(n): R[j][k + start] = list_of_matrix[i][j][k] return R
def _fit(self, X, y): self._check(X, y) assert (dim(y) == 1) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot(X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): print(_) start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( X, beta[1:]))) / (shape(X)[0]) return beta
def random_w(self,s): assert(len(s)==2) R = zeros(s) for i in range(shape(R)[0]): for j in range(shape(R)[1]): R[i][j] = random.random() return R
def matrix_copy(A): assert (dim(A) == 2) m, n = shape(A) R = zeros((m, n)) for i in range(m): for j in range(n): R[i][j] = A[i][j] return R
def exponential_smoothing(A, axis=0, alpha=0.1): assert (axis == 0) R = [] C = zeros(shape(A)[1]) for i in range(shape(A)[0]): P = multiply(A[i], (1 - alpha)) Q = multiply(C, alpha) C = plus(P, Q) R.append(C) return R
def fit(self, X, y): self._check(X, y) if dim(y) == 1: raw_X = X if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum( minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) # # add whatch # self.beta = beta # self._whatch(raw_X,y) if self.fit_intercept: self.intercept_ = beta[0] self.coef_ = beta[1:] else: self.coef_ = beta self.beta = beta return self elif dim(y) == 2: if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) y_t = matrix_transpose(y) betas = [] for i in range(shape(y)[1]): betas.append(self._fit(X, y_t[i])) batas = matrix_transpose(betas) self.betas = batas
def resampling(ecs_logs, flavors_unique, training_start_time, predict_start_time, frequency=7, strike=1, skip=0): # checked def __get_flavors_unique_mapping(flavors_unique): mapping_index = {}.fromkeys(flavors_unique) c = 0 for f in flavors_unique: mapping_index[f] = c c += 1 return mapping_index predict_start_time = predict_start_time - timedelta(days=skip) days_total = (predict_start_time - training_start_time).days sample_length = ((days_total - frequency) / strike) + 1 mapping_index = __get_flavors_unique_mapping(flavors_unique) sample = zeros((sample_length, len(flavors_unique))) last_time = [None for i in range(len(flavors_unique))] for i in range(sample_length): for f, ecs_time in ecs_logs: # 0 - 6 for example # fix serious bug @ 2018-04-11 if (predict_start_time - ecs_time).days >= (i) * strike and ( predict_start_time - ecs_time).days < (i) * strike + frequency: if last_time[mapping_index[f]] == None: sample[i][mapping_index[f]] += 1 last_time[mapping_index[f]] = ecs_time else: if (ecs_time - last_time[mapping_index[f]]).seconds < 10: sample[i][mapping_index[f]] += 1 continue else: sample[i][mapping_index[f]] += 1 last_time[mapping_index[f]] = ecs_time # ----------------------------# sample = sample[::-1] # [ old data ] # [ ... ] # [ ... ] # [ new_data ] # ----------------------------# assert (shape(sample) == (sample_length, len(flavors_unique))) return sample
def shift(A, shift_step, fill=None): assert (dim(A) == 2) R = zeros(shape(A)) for i in range(shape(A)[0]): for j in range(shape(A)[1]): if shift_step >= 0: if i >= shift_step: R[i][j] = A[i - shift_step][j] else: if type(fill) == list: R[i][j] = fill[j] else: R[i][j] = fill else: if (i - shift_step) < shape(A)[0]: R[i][j] = A[i - shift_step][j] else: if type(fill) == list: R[i][j] = fill[j] else: R[i][j] = fill return R
def resampling(ecs_logs,flavors_unique,training_start_time,predict_start_time,frequency=7,strike=1,skip=0): predict_start_time = predict_start_time-timedelta(days=skip) days_total = (predict_start_time-training_start_time).days sample_length = ((days_total-frequency)/strike) + 1 mapping_index = get_flavors_unique_mapping(flavors_unique) sample = zeros((sample_length,len(flavors_unique))) for i in range(sample_length): for f,ecs_time in ecs_logs: # 0 - 6 for example # fix serious bug @ 2018-04-11 if (predict_start_time-ecs_time).days >=(i)*strike and (predict_start_time-ecs_time).days<(i)*strike+frequency: sample[i][mapping_index[f]] += 1 # ----------------------------# sample = sample[::-1] # [ old data ] # [ ... ] # [ ... ] # [ new_data ] # ----------------------------# assert(shape(sample)==(sample_length,len(flavors_unique))) return sample
def diag(A): assert (dim(A) == 1) R = zeros((shape(A)[0], shape(A)[0])) for i in range(len(A)): R[i][i] = A[i] return R