def _fit(self, X, y): self._check(X, y) assert (dim(y) == 1) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot(X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): print(_) start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( X, beta[1:]))) / (shape(X)[0]) return beta
def fit(self, X, y): self._check(X, y) if dim(y) == 1: raw_X = X if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) beta = zeros(shape(X)[1]) # row vector X_T = matrix_transpose(X) if self.fit_intercept: beta[0] = sum(minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) for _ in range(self.max_iter): start = 1 if self.fit_intercept else 0 for j in range(start, len(beta)): tmp_beta = [x for x in beta] tmp_beta[j] = 0.0 r_j = minus(reshape(y, -1), dot(X, beta)) # r_j = minus(reshape(y,-1) , dot(X, tmp_beta)) arg1 = dot(X_T[j], r_j) arg2 = self.alpha * shape(X)[0] if sum(square(X_T[j])) != 0: beta[j] = self._soft_thresholding_operator( arg1, arg2) / sum(square(X_T[j])) else: beta[j] = 0 if self.fit_intercept: beta[0] = sum( minus(reshape(y, -1), dot( raw_X, beta[1:]))) / (shape(X)[0]) # # add whatch # self.beta = beta # self._whatch(raw_X,y) if self.fit_intercept: self.intercept_ = beta[0] self.coef_ = beta[1:] else: self.coef_ = beta self.beta = beta return self elif dim(y) == 2: if self.fit_intercept: X = hstack([ones(shape(X)[0], 1), X]) y_t = matrix_transpose(y) betas = [] for i in range(shape(y)[1]): betas.append(self._fit(X, y_t[i])) batas = matrix_transpose(betas) self.betas = batas
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index) ys = [] for i in index: ys.append(self.y[i]) k_loss_raw = sorted(loss)[:self.k] k_loss = [1 / l if l != 0 else 0 for l in k_loss_raw] k_loss_sum = sum(k_loss) weights = [ l / float(k_loss_sum) if k_loss_sum != 0 else 1 for l in k_loss ] weight_m = diag(weights) ys = matrix_matmul(weight_m, ys) result.append(sum(ys, axis=0)) if len(self.shape_Y) == 1: result = matrix_transpose(result)[0] return result
def standard_scaling(X, y=None, axis=1): if axis == 0: return matrix_transpose(standard_scaling(matrix_transpose(X), axis=1)) R = [] for j in range(shape(X)[1]): col = fancy(X, None, j) mean_ = mean(col) std = sqrt(mean(square(minus(col, mean_)))) if y != None: std_y = sqrt(mean(square(minus(y, mean(y))))) if std == 0: R.append(col) else: R.append([(x - mean_) * std_y / std for x in col]) return matrix_transpose(R)
def stdev(X): # X = matrix_copy(X) X_T = matrix_transpose(X) m = mean(X, axis=1) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_))) if denominator == 0: return 0 else: return 1 - (numerator / float(denominator))
def stdev(X, axis=0): assert (dim(X) == 2) assert (axis == 0) X_T = matrix_transpose(X) m = mean(X, axis=0) R = [] for j in range(shape(X)[1]): R.append(sqrt(mean(square(minus(X_T[j], m[j]))))) return R
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) from preprocessing import standard_scaling new_X = standard_scaling(self.X, axis=0) x = sqrt(square(minus(x, mean(x)))) loss = minus(loss, multiply(dot(new_X, x), self.alpha)) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def predict(self, X): result = [] # dim_X = dim(X) if dim(X) == 1: X = [X] for x in X: loss = sum(square(minus(self.X, x)), axis=1) # loss = sum(abs(minus(self.X,x)),axis=1) index = argsort(loss)[:self.k] if self.verbose: print(index, '/len', len(loss)) ys = [] for i in index: ys.append(self.y[i]) result.append(mean(ys, axis=0)) return result
def fit(self,X,y): assert(dim(X)==2) assert(dim(y)==1 or dim(y)==2) self.shape_X = shape(X) self.shape_Y = shape(y) if dim(y) == 1: y = [[k] for k in y] best_w = None min_err = None for i in range(self.max_iter): W = self.random_w((shape(X)[1],shape(y)[1])) y_ = matrix_matmul(X,W) err = mean(sqrt(mean(square(minus(y,y_)),axis=1))) if not best_w or min_err>err: best_w = W min_err = err print(err) self.W = best_w
def cross_val_score(estimator_instance, X, y, is_shuffle=False, cv='full', scoring='score', random_state=None, return_mean=False, verbose=False): assert ((type(cv) == int and cv > 1) or cv == 'full') assert (scoring == 'score' or scoring == 'loss') if type(cv) == int: assert (cv < len(X)) if is_shuffle: X, y = shuffle(X, y=y, random_state=random_state) N = len(X) K = N if cv == 'full' else cv h = len(X) / float(K) scores = [] losses = [] for i in range(K): s = int(round((i * h))) e = int(round((i + 1) * h)) X_train, Y_train = [], [] X_train.extend(X[:s]) X_train.extend(X[e:]) Y_train.extend(y[:s]) Y_train.extend(y[e:]) X_val, Y_val = X[s:e], y[s:e] estimator_instance.fit(X_train, Y_train) p = estimator_instance.predict(X_val) score = official_score(p, Y_val) loss = l2_loss(p, Y_val) # score = estimator_instance.score(X_val,Y_val) scores.append(score) losses.append(loss) # print(scores) if return_mean: if scoring == 'score': # print(scores) std = sqrt(mean(square(minus(scores, mean(scores))))) return (sorted(scores)[len(scores) / 2] + mean(scores) - 0.5 * std) / 2.0 # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0 # return sorted(scores)[len(scores)/2] - std # return max(scores) # return mean(scores[:len(scores)/2]) # return mean(sorted(scores)[::-1][:len(scores)/2]) # return (mean(scores) + max(scores))/2.0 # return mean(scores) # return mean(scores) -0.5*std elif scoring == 'loss': # return mean(losses) std = sqrt(mean(square(minus(losses, mean(losses))))) # return mean(losses) return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) / 2.0) else: if scoring == 'score': return scores elif scoring == 'loss': return losses
def _whatch(self, X, y): p = self.predict(X) loss = sum(square(minus(p, y))) print(loss)
def greedy_99_backpack(machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction, score_treadhold=0.99): backpack_result = None solutions = get_approximate_meta_solutions(machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction, score_treadhold=score_treadhold) # print(prediction) # print(solutions) def possible(prediction, picker): for i in range(len(prediction)): if picker[i] > prediction[i]: return False return True backpack_result = [[] for _ in range(machine_number)] fit = True while (fit): fit = False for i in range(len(solutions))[::-1]: pickers = solutions[i] for picker in pickers: picker = list(picker) if possible(prediction, picker): prediction = minus(prediction, picker) em = {}.fromkeys(flavors_unique) for j in range(len(flavors_unique)): em[flavors_unique[j]] = picker[j] backpack_result[i].append(em) fit = True # _,backpack_result_2 = backpack(machine_number,machine_name,machine_config,flavors_number,flavors_unique,flavors_config,prediction,is_random=True) _, backpack_result_2 = random_k_times(machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction, k=1000) # backpack merge for i in range(len(backpack_result)): backpack_result[i].extend(backpack_result_2[i]) pass backpack_count = [len(b) for b in backpack_result] # backpack_count: entity machine sum # backpack_result: # [[{f1:3,f2:8}..etc....] # [.......] # [.......]] return backpack_count, backpack_result
def backpack(machine_number, machine_name, machine_config, flavors_number, flavors_unique, flavors_config, prediction, is_random=False): # parameters: # machine_number,machine_name,machine_config,flavors_number,flavors_unique,flavors_config,prediction # --> # (3, # ['General', 'Large-Memory', 'High-Performance'], # [{'MEM': 128, 'CPU': 56}, {'MEM': 256, 'CPU': 84}, {'MEM': 192, 'CPU': 112}], # 5, # [1, 2, 4, 5, 8], # [{'MEM': 1, 'CPU': 1}, {'MEM': 2, 'CPU': 1}, {'MEM': 2, 'CPU': 2}, {'MEM': 4, 'CPU': 2}, {'MEM': 8, 'CPU': 4}], # [32, 32, 11, 21,44]) machine_rate = [c['CPU'] / float(c['MEM']) for c in machine_config] cpu_predict = 0 mem_predict = 0 for i in range(len(prediction)): cpu_predict += (prediction[i] * flavors_config[i]['CPU']) mem_predict += (prediction[i] * flavors_config[i]['MEM']) type_i_fix = argmin( abs(minus(machine_rate, cpu_predict / float(mem_predict)))) vms = [] for i in range(len(prediction)): f_config = flavors_config[i] vms.extend([[ flavors_unique[i], { 'CPU': f_config['CPU'], 'MEM': f_config['MEM'] } ] for _ in range(prediction[i])]) if is_random: from random import shuffle shuffle(vms) # vms: # [(1, {'MEM': 1, 'CPU': 1}), (2, {'MEM': 1, 'CPU': 1}), (4, {'MEM': 1,'CPU': 1}), (5, {'MEM': 1, 'CPU': 1}), (8, {'MEM': 1, 'CPU': 1}), (1,{'MEM': 2, 'CPU': 1}), (2, {'MEM': 2, 'CPU': 1}), (4, {'MEM': 2, 'CPU': 1}), (5, {'MEM': 2, 'CPU': 1}), (8, {'MEM': 2, 'CPU': 1}), (1, {'MEM': 2, 'CPU': 2}), (2, {'MEM': 2, 'CPU': 2}), (4, {'MEM': 2, 'CPU': 2}), (5, {'MEM': 2, 'CPU': 2}), (8, {'MEM': 2, 'CPU': 2}), (1, {'MEM': 4, 'CPU': 2}), (2, {'MEM': 4, 'CPU': 2}), (4, {'MEM': 4, 'CPU': 2}), (5, {'MEM': 4, 'CPU': 2}), (8, {'MEM': 4, 'CPU': 2}), (1, {'MEM': 8, 'CPU': 4}), (2, {'MEM': 8, 'CPU': 4}), (4, {'MEM': 8, 'CPU': 4}), (5, {'MEM': 8, 'CPU': 4}), (8, {'MEM': 8, 'CPU': 4})] # [[{f1:3,f5:2},{f8:2,f7:4}] <== type1 machine # [....] <== type2 machine # [....] <== type3 machine backpack_result = [[] for _ in range(machine_number)] # same size of backpack_result,for reduce repected calclation backpack_capcity = [[] for _ in range(machine_number)] placing = [None for _ in range(machine_number)] def _get_em_weights_of_cpu_and_mem(flavors_unique, flavors_config, em): cpu = 0 mem = 0 for k, v in em.items(): cpu += flavors_config[flavors_unique.index(k)]['CPU'] * v mem += flavors_config[flavors_unique.index(k)]['MEM'] * v return cpu, mem type_i = type_i_fix while (len(vms) != 0): vm_flavor = vms[0][0] vm_config = vms[0][1] # ------------------refiting ------------------------------ refit = False insert_order = list(range(machine_number)) # shuffle(insert_order) for i in insert_order: for j in range(len(backpack_result[i])): cpu_cap, mem_cap = backpack_capcity[i][j] if cpu_cap >= vm_config['CPU'] and mem_cap >= vm_config['MEM']: backpack_result[i][j][vm_flavor] += 1 # used for estimate the cpu/mem rate cpu_predict -= vm_config['CPU'] mem_predict -= vm_config['MEM'] # success backpack_capcity[i][j] = cpu_cap - vm_config[ 'CPU'], mem_cap - vm_config['MEM'] refit = True break if refit: break if refit: vms.pop(0) continue # -------------------normal fitting------------------------ if placing[type_i] == None: placing[type_i] = {}.fromkeys(flavors_unique) for f in flavors_unique: placing[type_i][f] = 0 continue else: cpu_total, mem_total = machine_config[type_i][ 'CPU'], machine_config[type_i]['MEM'] cpu_used, mem_used = _get_em_weights_of_cpu_and_mem( flavors_unique, flavors_config, placing[type_i]) if cpu_total - cpu_used < vm_config[ 'CPU'] or mem_total - mem_used < vm_config['MEM']: # add to backpack_list and create a new entity_machine backpack_result[type_i].append(placing[type_i]) backpack_capcity[type_i].append( (cpu_total - cpu_used, mem_total - mem_used)) placing[type_i] = None else: placing[type_i][vm_flavor] += 1 # used for estimate the cpu/mem rate cpu_predict -= vm_config['CPU'] mem_predict -= vm_config['MEM'] vms.pop(0) # add @2018-04-18 # select next type of entity machine # type_i = random.choice(range(machine_number)) if mem_predict == 0: break # 1.Greedy Select type_i = argmin( abs(minus(machine_rate, cpu_predict / float(mem_predict)))) for i in range(len(placing)): if placing[i] != None: # add @2018-04-18 cpu_used, mem_used = _get_em_weights_of_cpu_and_mem( flavors_unique, flavors_config, placing[i]) if cpu_used != 0 and mem_used != 0: possible = [] for k in range(machine_number): if machine_config[k]['CPU'] >= cpu_used and machine_config[ k]['MEM'] >= mem_used: possible.append(True) else: possible.append(False) scores = [(cpu_used / float(machine_config[k]['CPU']) + mem_used / float(machine_config[k]['MEM'])) / 2.0 if possible[k] else 0 for k in range(machine_number)] best_i = argmax(scores) backpack_result[best_i].append(placing[i]) # backpack_result[i].append(placing[i]) backpack_count = [len(b) for b in backpack_result] return backpack_count, backpack_result
def _score_calc(y, y_): y_ = [int(round(i)) for i in y_] numerator = sqrt(mean(square(minus(y, y_)))) return numerator