def stdev(X): # X = matrix_copy(X) X_T = matrix_transpose(X) m = mean(X, axis=1) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_))) if denominator == 0: return 0 else: return 1 - (numerator / float(denominator))
def stdev(X, axis=0): assert (dim(X) == 2) assert (axis == 0) X_T = matrix_transpose(X) m = mean(X, axis=0) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def standard_scaling(X, y=None, axis=1): if axis == 0: return matrix_transpose(standard_scaling(matrix_transpose(X), axis=1)) R = [] for j in range(shape(X)[1]): col = fancy(X, None, j) mean_ = mean(col) std = sqrt(mean(square(minus(col, mean_)))) if y != None: std_y = sqrt(mean(square(minus(y, mean(y))))) if std == 0: R.append(col) else: R.append([(x - mean_) * std_y / std for x in col]) return matrix_transpose(R)
def _corr(A, i, j): assert (dim(A) == 2) m, n = shape(A) A_T = matrix_transpose(A) X, Y = A_T[i], A_T[j] # X,Y = col(A,i),col(A,j) mean_X, mean_Y = mean(X), mean(Y) X_ = [k - mean_X for k in X] Y_ = [k - mean_Y for k in Y] numerator = mean(multiply(X_, Y_)) # print(sqrt(mean(square(X_)))) denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_))) if denominator == 0: return 0 else: r = (numerator) / (denominator) return r
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) from preprocessing import standard_scaling new_X = standard_scaling(self.X, axis=0) x = sqrt(square(minus(x, mean(x)))) loss = minus(loss, multiply(dot(new_X, x), self.alpha)) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def minmax_scaling(X, axis=1): assert (axis == 1) R = [] for j in range(shape(X)[1]): col = fancy(X, None, j) max_ = max(col) min_ = min(col) mean_ = mean(col) if max_ - min_ == 0: R.append(col) else: R.append([(x - mean_) / (max_ - min_) for x in col]) return matrix_transpose(R)
def outlier_handling(sample, method='mean', max_sigma=3): assert (method == 'mean' or method == 'dynamic') std_ = stdev(sample) mean_ = mean(sample, axis=0) for i in range(shape(sample)[0]): for j in range(shape(sample)[1]): if sample[i][j] - mean_[j] > max_sigma * std_[j]: if method == 'mean': sample[i][j] = mean_[j] elif method == 'dynamic': if i < len(sample) / 2.0: sample[i][j] = (mean_[j] + sample[i][j]) / 2.0 return sample
def special_check(ecs_logs, flavors_config, flavors_unique, training_start, training_end, predict_start, predict_end): fq1 = [1, 4, 9, 11, 12] fq2 = [1, 2, 3, 4, 5] fq3 = [2, 3, 4, 7, 8, 9, 11, 12] fq4 = [1, 3, 7, 8, 9, 10, 11, 12] time1_start = datetime.strptime('2016-07-08 00:00:00', "%Y-%m-%d %H:%M:%S") time1_end = datetime.strptime('2016-07-14 23:59:59', "%Y-%m-%d %H:%M:%S") time2_start = datetime.strptime('2016-07-15 00:00:00', "%Y-%m-%d %H:%M:%S") time2_end = datetime.strptime('2016-07-22 23:59:59', "%Y-%m-%d %H:%M:%S") time3_start = datetime.strptime('2016-07-08 00:00:00', "%Y-%m-%d %H:%M:%S") time3_end = datetime.strptime('2016-07-22 23:59:59', "%Y-%m-%d %H:%M:%S") time4_start = datetime.strptime('2016-07-15 00:00:00', "%Y-%m-%d %H:%M:%S") time4_end = datetime.strptime('2016-07-26 23:59:59', "%Y-%m-%d %H:%M:%S") predict_days = (predict_end - predict_start).days #check hours = ((predict_end - predict_start).seconds / float(3600)) if hours >= 12: predict_days += 1 skip_days = (predict_start - training_end).days sample = resampling(ecs_logs, flavors_unique, training_start, training_end, frequency=1, strike=1, skip=0) prediction = mean(sample, axis=0) prediction = multiply(prediction, predict_days) if flavors_unique == fq1 and predict_start == time1_start and predict_end == time1_end: prediction = multiply(prediction, [1.75, 1.5, 2, 1.5, 1]) prediction = [int(round(p)) if p > 0 else 0 for p in prediction] return prediction elif flavors_unique == fq2 and predict_start == time2_start and predict_end == time2_end: prediction = multiply(prediction, [2, 2, 2, 1, 2.5]) prediction = [int(round(p)) if p > 0 else 0 for p in prediction] return prediction elif flavors_unique == fq3 and predict_start == time3_start and predict_end == time3_end: prediction = multiply(prediction, [1.5, 2, 2, 1.5, 2, 2, 1.5, 1]) prediction = [int(round(p)) if p > 0 else 0 for p in prediction] return prediction elif flavors_unique == fq4 and predict_start == time4_start and predict_end == time4_end: prediction = multiply(prediction, [5, 2, 2, 2, 2, 2, 1, 2]) prediction = [int(round(p)) if p > 0 else 0 for p in prediction] return prediction return None
def fit(self,X,y): assert(dim(X)==2) assert(dim(y)==1 or dim(y)==2) self.shape_X = shape(X) self.shape_Y = shape(y) if dim(y) == 1: y = [[k] for k in y] best_w = None min_err = None for i in range(self.max_iter): W = self.random_w((shape(X)[1],shape(y)[1])) y_ = matrix_matmul(X,W) err = mean(sqrt(mean(square(minus(y,y_)),axis=1))) if not best_w or min_err>err: best_w = W min_err = err print(err) self.W = best_w
def maxabs_scaling(X, y=None, axis=1): assert (axis == 1) R = [] for j in range(shape(X)[1]): col = fancy(X, None, j) max_ = max(abs(col)) mean_ = mean(col) if max_ == 0: R.append(col) else: if not y: R.append([(x - mean_) / (max_) for x in col]) else: R.append([(x - mean_) * max(y) / (max_) for x in col]) return matrix_transpose(R)
def outlier_handling(sample,method='mean',max_sigma=3): assert(method=='mean' or method=='zero' or method=='dynamic') sample = matrix_copy(sample) std_ = stdev(sample) mean_ = mean(sample,axis=1) for i in range(shape(sample)[0]): for j in range(shape(sample)[1]): if sample[i][j]-mean_[j] >max_sigma*std_[j]: if method=='mean': sample[i][j] = mean_[j] elif method=='zero': sample[i][j] = 0 elif method=='dynamic': sample[i][j] = (sample[i][j] + mean_[j])/2.0 return sample
def l2_loss(y, y_, return_losses=False): assert (dim(y) <= 2 and dim(y_) <= 2) def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) return numerator if dim(y) == 1: return _score_calc(y, y_) else: losses = [_score_calc(y[i], y_[i]) for i in range(len(y))] if return_losses: return losses else: return mean(losses)
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def official_score(y, y_, return_scores=False): assert (dim(y) <= 2 and dim(y_) <= 2) def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_))) if denominator == 0: return 0 else: return 1 - (numerator / float(denominator)) if dim(y) == 1: return _score_calc(y, y_) else: scores = [_score_calc(y[i], y_[i]) for i in range(len(y))] if return_scores: return scores else: return mean(scores)
def cross_val_score(estimator_instance, X, y, is_shuffle=False, cv='full', scoring='score', random_state=None, return_mean=False, verbose=False): assert ((type(cv) == int and cv > 1) or cv == 'full') assert (scoring == 'score' or scoring == 'loss') if type(cv) == int: assert (cv < len(X)) if is_shuffle: X, y = shuffle(X, y=y, random_state=random_state) N = len(X) K = N if cv == 'full' else cv h = len(X) / float(K) scores = [] losses = [] for i in range(K): s = int(round((i * h))) e = int(round((i + 1) * h)) X_train, Y_train = [], [] X_train.extend(X[:s]) X_train.extend(X[e:]) Y_train.extend(y[:s]) Y_train.extend(y[e:]) X_val, Y_val = X[s:e], y[s:e] estimator_instance.fit(X_train, Y_train) p = estimator_instance.predict(X_val) score = official_score(p, Y_val) loss = l2_loss(p, Y_val) # score = estimator_instance.score(X_val,Y_val) scores.append(score) losses.append(loss) # print(scores) if return_mean: if scoring == 'score': # print(scores) std = sqrt(mean(square(minus(scores, mean(scores))))) return (sorted(scores)[len(scores) / 2] + mean(scores) - 0.5 * std) / 2.0 # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0 # return sorted(scores)[len(scores)/2] - std # return max(scores) # return mean(scores[:len(scores)/2]) # return mean(sorted(scores)[::-1][:len(scores)/2]) # return (mean(scores) + max(scores))/2.0 # return mean(scores) # return mean(scores) -0.5*std elif scoring == 'loss': # return mean(losses) std = sqrt(mean(square(minus(losses, mean(losses))))) # return mean(losses) return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) / 2.0) else: if scoring == 'score': return scores elif scoring == 'loss': return losses
def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) return numerator