def merge(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time): predict = {}.fromkeys(flavors_unique) for f in flavors_unique: predict[f] = 0 virtual_machine_sum = 0 mapping_index = get_flavors_unique_mapping(flavors_unique) R = [] X_trainS_raw,Y_trainS_raw,X_testS = features_building(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time) # penalty = [1,1,1,1,0.5,0.5] X_trainS = fancy(X_trainS_raw,None,(0,-1),None) # X_trainS = X_trainS_raw Y_trainS = fancy(Y_trainS_raw,None,(0,-1)) # Y_trainS = Y_trainS_raw X_valS = fancy(X_trainS_raw,None,(-1,),None) Y_valS = fancy(Y_trainS_raw,None,(-1,)) #adjustable #5 Ridge Regression alpha clf = Ridge(alpha=1) from model_selection import grid_search_cv_early_stoping test = [] train = [] val = [] for i in range(len(flavors_unique)): X = X_trainS[i] y = Y_trainS[i] # clf = grid_search_cv(Ridge,{"alpha":[0.001,0.01,0.1,0.4,0.7,1,1.5,2]},X,y,cv=20,random_state=42,is_shuffle=True,verbose=True) clf = early_stoping(Ridge,{"alpha":sorted([0.01,0.02,0.1,0.4,0.7,1,1.5,2])[::-1]},X,y,X_valS[i],Y_valS[i],verbose=False) # clf = grid_search_cv_early_stoping(Ridge,{"alpha":sorted([0.01,0.02,0.1,0.4,0.7,1,1.5,2])[::-1]},X,y,X_valS[i],Y_valS[i],cv=10,random_state=42,is_shuffle=True,verbose=True) # clf = Ridge(alpha=(clf_1.alpha + clf_2.alpha)) # clf = Ridge(alpha=1) # clf.fit(X,y) train.append(clf.predict(X)) val.append(clf.predict(X_valS[i])) test.append(clf.predict(X_testS[i])) # print("shape(train)",shape(train)) train = matrix_transpose(train) Y_trainS = matrix_transpose(Y_trainS) R.extend(test) print("training_score-->",official_score(train,Y_trainS)) val = matrix_transpose(val) Y_valS = matrix_transpose(Y_valS) print("validation_score-->",official_score(val,Y_valS)) result = flatten(R) result = [0 if r<0 else r for r in result] for f in flavors_unique: p = result[mapping_index[f]] predict[f] = int(round(p)) virtual_machine_sum += int(round(p)) return predict,virtual_machine_sum
def grid_search_cv_early_stoping(estimator, paramaters, X, y, X_val, y_val, is_shuffle=False, cv='full', scoring='score', random_state=None, verbose=False, return_parameter=False): assert (scoring == 'score') def paramater_gen(paramaters): N = len(paramaters) from itertools import product value = list(product(*paramaters.values())) for v in value: yield dict(zip(paramaters.keys(), v)) max_model = None max_parameter = None max_score = None min_loss = None for p in paramater_gen(paramaters): clf = estimator(**p) clf.fit(X, y) score = cross_val_score(clf, X, y, return_mean=True, is_shuffle=is_shuffle, cv=cv, scoring=scoring, random_state=random_state) score_val = official_score(y_val, clf.predict(X_val)) score = ((0 / 3.0) * score + (3 / 3.0) * score_val) # clf.score(X,y) if verbose: print(p, score) pass if max_parameter == None or max_score < score: max_parameter = p max_score = score max_model = clf if verbose: print("max_parameter", max_parameter) if return_parameter: return max_model, max_parameter else: return max_model
def cross_val_score(estimator_instance, X, y, is_shuffle=False, cv='full', scoring='score', random_state=None, return_mean=False, verbose=False): assert ((type(cv) == int and cv > 1) or cv == 'full') assert (scoring == 'score' or scoring == 'loss') if type(cv) == int: assert (cv < len(X)) if is_shuffle: X, y = shuffle(X, y=y, random_state=random_state) N = len(X) K = N if cv == 'full' else cv h = len(X) / float(K) scores = [] losses = [] for i in range(K): s = int(round((i * h))) e = int(round((i + 1) * h)) X_train, Y_train = [], [] X_train.extend(X[:s]) X_train.extend(X[e:]) Y_train.extend(y[:s]) Y_train.extend(y[e:]) X_val, Y_val = X[s:e], y[s:e] estimator_instance.fit(X_train, Y_train) p = estimator_instance.predict(X_val) score = official_score(p, Y_val) loss = l2_loss(p, Y_val) # score = estimator_instance.score(X_val,Y_val) scores.append(score) losses.append(loss) # print(scores) if return_mean: if scoring == 'score': # print(scores) std = sqrt(mean(square(minus(scores, mean(scores))))) return (sorted(scores)[len(scores) / 2] + mean(scores) - 0.5 * std) / 2.0 # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0 # return sorted(scores)[len(scores)/2] - std # return max(scores) # return mean(scores[:len(scores)/2]) # return mean(sorted(scores)[::-1][:len(scores)/2]) # return (mean(scores) + max(scores))/2.0 # return mean(scores) # return mean(scores) -0.5*std elif scoring == 'loss': # return mean(losses) std = sqrt(mean(square(minus(losses, mean(losses))))) # return mean(losses) return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) / 2.0) else: if scoring == 'score': return scores elif scoring == 'loss': return losses
def early_stoping(estimator, paramaters, X, y, X_val, Y_val, scoring='score', verbose=False): assert (scoring == 'score' or scoring == 'loss') def paramater_gen(paramaters): N = len(paramaters) from itertools import product value = list(product(*paramaters.values())) for v in value: yield dict(zip(paramaters.keys(), v)) max_model = None max_parameter = None max_score = None min_loss = None last_score = None last_loss = None score = None loss = None for p in paramater_gen(paramaters): clf = estimator(**p) clf.fit(X, y) last_score = score last_loss = loss score = official_score(Y_val, clf.predict(X_val)) loss = l2_loss(Y_val, clf.predict(X_val)) if verbose: # print(p,score,loss) pass if last_loss != None and last_loss < loss: return max_model if last_score != None and last_score > score: return max_model if scoring == "score": if max_parameter == None or max_score < score: max_parameter = p max_score = score max_model = clf if scoring == "loss": if max_parameter == None or min_loss > score: max_parameter = p min_loss = score max_model = clf if verbose: print("max_parameter", max_parameter) return max_model
def score(self,X,y): y_ = self.predict(X) # print(shape(y_)) return official_score(y,y_)