コード例 #1
0
def merge(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time):
    predict = {}.fromkeys(flavors_unique)
    for f in flavors_unique:
        predict[f] = 0
    virtual_machine_sum = 0
    mapping_index = get_flavors_unique_mapping(flavors_unique)

    R = []
    X_trainS_raw,Y_trainS_raw,X_testS = features_building(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time)
    # penalty = [1,1,1,1,0.5,0.5]
    X_trainS = fancy(X_trainS_raw,None,(0,-1),None)
    # X_trainS = X_trainS_raw
    
    Y_trainS = fancy(Y_trainS_raw,None,(0,-1))
    # Y_trainS = Y_trainS_raw

    X_valS = fancy(X_trainS_raw,None,(-1,),None)
    Y_valS = fancy(Y_trainS_raw,None,(-1,))

    #adjustable #5 Ridge Regression alpha
    clf = Ridge(alpha=1)
    from model_selection import grid_search_cv_early_stoping
    
    test = []
    train = []
    val = []
    for i in range(len(flavors_unique)):    
        X = X_trainS[i]
        y = Y_trainS[i]
        # clf = grid_search_cv(Ridge,{"alpha":[0.001,0.01,0.1,0.4,0.7,1,1.5,2]},X,y,cv=20,random_state=42,is_shuffle=True,verbose=True)
        clf = early_stoping(Ridge,{"alpha":sorted([0.01,0.02,0.1,0.4,0.7,1,1.5,2])[::-1]},X,y,X_valS[i],Y_valS[i],verbose=False)
        # clf = grid_search_cv_early_stoping(Ridge,{"alpha":sorted([0.01,0.02,0.1,0.4,0.7,1,1.5,2])[::-1]},X,y,X_valS[i],Y_valS[i],cv=10,random_state=42,is_shuffle=True,verbose=True)
        # clf = Ridge(alpha=(clf_1.alpha + clf_2.alpha))
        # clf = Ridge(alpha=1)
        # clf.fit(X,y)
        train.append(clf.predict(X))
        val.append(clf.predict(X_valS[i]))
        test.append(clf.predict(X_testS[i]))

    # print("shape(train)",shape(train))

    train = matrix_transpose(train)
    
    Y_trainS = matrix_transpose(Y_trainS)
    R.extend(test)

    print("training_score-->",official_score(train,Y_trainS))
    val = matrix_transpose(val)
    Y_valS = matrix_transpose(Y_valS)
    print("validation_score-->",official_score(val,Y_valS))
    
    result = flatten(R)
    result = [0 if r<0 else r for r in result]
    for f in flavors_unique:
        p = result[mapping_index[f]]
        predict[f] = int(round(p))
        virtual_machine_sum += int(round(p))
    return predict,virtual_machine_sum
コード例 #2
0
 def train_cv(self, X, y, shuffle=False, cv='full'):
     assert (type(cv) == int or cv == 'full')
     assert (dim(X) == 2 and dim(y) == 2)
     self.shape_Y = shape(y)
     for i in range(shape(y)[1]):
         max_score = None
         best_clf = None
         best_keep = None
         y_ = fancy(y, -1, i)
         for _ in range(self.max_iter):
             clf = self.estimator(**(self.parameter))
             X_, keep = self._rand_X(X)
             clf.fit(X_, y_)
             score = cross_val_score(clf,
                                     X,
                                     y,
                                     return_mean=True,
                                     cv=cv,
                                     shuffle=shuffle)
             if not max_score or max_score < score:
                 max_score = score
                 best_clf = clf
                 best_keep = keep
         self.keeps.append(best_keep)
         self.clfs.append(best_clf)
コード例 #3
0
    def get_feature_grid(sample,i,fill_na='mean',max_na_rate=1,col_count=None,with_test=True):
        assert(fill_na=='mean' or fill_na=='zero')
        col = fancy(sample,None,i)
        R = []
        for j in range(len(col)):
            left = [None for _ in range(len(col)-j)]
            right = col[:j]
            r = []
            r.extend(left)
            r.extend(right)
            R.append(r)

        def _mean_with_none(A):
            if len(A)==0:
                return 0
            else:
                count = 0
                for i in range(len(A)):
                    if A[i]!=None:
                        count+=A[i]
                return count/float(len(A))
        
        means = []
        for j in range(shape(R)[1]):
            means.append(_mean_with_none(fancy(R,None,j)))
        
        width = int((1-max_na_rate) * shape(R)[1])
        R = fancy(R,None,(width,))
        for _ in range(shape(R)[0]):
            for j in range(shape(R)[1]):
                    if R[_][j]==None:
                        if fill_na=='mean':
                            R[_][j] = means[j]
                        elif fill_na=='zero':
                            R[_][j]=0
        if with_test:
            if col_count!=None:
                return fancy(R,None,(-col_count,))
            else:
                return R
        else:
            if col_count!=None:
                return fancy(R,(0,-1),(-col_count,))
            else:            
                return R[:-1]
コード例 #4
0
 def _rand_X(self, X):
     N = shape(X)[1]
     keep_length = math.ceil((1 - self.drop_out) * N)
     keep_set = set()
     while len(keep_set) != keep_length:
         i = random.randrange(N)
         if i not in keep_set:
             keep_set.add(i)
     keep = [True if i in keep_set else False for i in range(N)]
     X_ = fancy(X, -1, keep)
     return X_, keep
コード例 #5
0
def minmax_scaling(X, axis=1):
    assert (axis == 1)
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        max_ = max(col)
        min_ = min(col)
        mean_ = mean(col)
        if max_ - min_ == 0:
            R.append(col)
        else:
            R.append([(x - mean_) / (max_ - min_) for x in col])
    return matrix_transpose(R)
コード例 #6
0
    def train(self, X, y, X_val, Y_val):
        assert (dim(X) == 2 and dim(y) == 2)
        self.shape_Y = shape(y)

        for i in range(shape(y)[1]):
            max_score = None
            best_clf = None
            best_keep = None
            y_ = fancy(y, -1, i)
            for _ in range(self.max_iter):
                clf = self.estimator(**(self.parameter))
                X_, keep = self._rand_X(X)
                clf.fit(X_, y_)
                score = clf.score(self._get_keep_X(X_val, keep),
                                  fancy(Y_val, -1, i))

                if not max_score or max_score < score:
                    max_score = score
                    best_clf = clf
                    best_keep = keep

            self.keeps.append(best_keep)
            self.clfs.append(best_clf)
コード例 #7
0
def maxabs_scaling(X, y=None, axis=1):
    assert (axis == 1)
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        max_ = max(abs(col))
        mean_ = mean(col)
        if max_ == 0:
            R.append(col)
        else:
            if not y:
                R.append([(x - mean_) / (max_) for x in col])
            else:
                R.append([(x - mean_) * max(y) / (max_) for x in col])

    return matrix_transpose(R)
コード例 #8
0
def standard_scaling(X, y=None, axis=1):
    if axis == 0:
        return matrix_transpose(standard_scaling(matrix_transpose(X), axis=1))
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        mean_ = mean(col)
        std = sqrt(mean(square(minus(col, mean_))))

        if y != None:
            std_y = sqrt(mean(square(minus(y, mean(y)))))

        if std == 0:
            R.append(col)
        else:
            R.append([(x - mean_) * std_y / std for x in col])
    return matrix_transpose(R)
コード例 #9
0
 def retrain(self, X, y):
     assert (len(self.keeps) != 0)
     for i in range(self.shape_Y[1]):
         X_ = self._get_keep_X(X, self.keeps[i])
         self.clfs[i].fit(X_, fancy(y, -1, i))
コード例 #10
0
 def _get_keep_X(self, X, keep):
     return fancy(X, -1, keep)
コード例 #11
0
def features_building(ecs_logs,flavors_config,flavors_unique,training_start_time,training_end_time,predict_start_time,predict_end_time):
    mapping_index = get_flavors_unique_mapping(flavors_unique)
    predict_days = (predict_end_time-predict_start_time).days

    sample = resampling(ecs_logs,flavors_unique,training_start_time,predict_start_time,frequency=predict_days,strike=1,skip=0)

    def outlier_handling(sample,method='mean',max_sigma=3):
        assert(method=='mean' or method=='zero' or method=='dynamic')
        sample = matrix_copy(sample)
        std_ = stdev(sample)
        mean_ = mean(sample,axis=1)
        for i in range(shape(sample)[0]):
            for j in range(shape(sample)[1]):
               if sample[i][j]-mean_[j] >max_sigma*std_[j]:
                    if method=='mean':
                        sample[i][j] = mean_[j]
                    elif method=='zero':
                        sample[i][j] = 0
                    elif method=='dynamic':
                        sample[i][j] = (sample[i][j] + mean_[j])/2.0
                        
        return sample

    sample = outlier_handling(sample,method='mean',max_sigma=3)
    # sample = exponential_smoothing(sample,alpha=0.2)

    Ys = sample[1:]

    def flavor_clustering(sample,k=3,variance_threshold=None):
        corrcoef_sample = corrcoef(sample)
        clustering_paths = []
        for i in range(shape(sample)[1]):
            col = corrcoef_sample[i]
            col_index_sorted = argsort(col)[::-1]
            if variance_threshold!=None:
                col_index_sorted = col_index_sorted[1:]
                index = [i  for i in col_index_sorted if col[i]>variance_threshold]
            else:
                index = col_index_sorted[1:k+1]
            clustering_paths.append(index)
        return clustering_paths,corrcoef_sample


    # adjustable # 1
    variance_threshold = 0.6 #76.234

    clustering_paths,coef_sample = flavor_clustering(sample,variance_threshold=variance_threshold)

    def get_feature_grid(sample,i,fill_na='mean',max_na_rate=1,col_count=None,with_test=True):
        assert(fill_na=='mean' or fill_na=='zero')
        col = fancy(sample,None,i)
        R = []
        for j in range(len(col)):
            left = [None for _ in range(len(col)-j)]
            right = col[:j]
            r = []
            r.extend(left)
            r.extend(right)
            R.append(r)

        def _mean_with_none(A):
            if len(A)==0:
                return 0
            else:
                count = 0
                for i in range(len(A)):
                    if A[i]!=None:
                        count+=A[i]
                return count/float(len(A))
        
        means = []
        for j in range(shape(R)[1]):
            means.append(_mean_with_none(fancy(R,None,j)))
        
        width = int((1-max_na_rate) * shape(R)[1])
        R = fancy(R,None,(width,))
        for _ in range(shape(R)[0]):
            for j in range(shape(R)[1]):
                    if R[_][j]==None:
                        if fill_na=='mean':
                            R[_][j] = means[j]
                        elif fill_na=='zero':
                            R[_][j]=0
        if with_test:
            if col_count!=None:
                return fancy(R,None,(-col_count,))
            else:
                return R
        else:
            if col_count!=None:
                return fancy(R,(0,-1),(-col_count,))
            else:            
                return R[:-1]


    # def get_rate_X(sample,j):
    #     sum_row = sum(sample,axis=1)
    #     A = [sample[i][j]/float(sum_row[i]) if sum_row[i]!=0 else 0 for i in range(shape(sample)[0])]
    #     return A

    # def get_cpu_rate_X(sample,i):
    #     cpu_config,mem_config = get_machine_config(flavors_unique)
    #     sample_copy = matrix_copy(sample)
    #     for i in range(shape(sample_copy)[0]):
    #         for j in range(shape(sample_copy)[1]):
    #             sample_copy[i][j] *= cpu_config[j]

    #     sample = sample_copy
    #     sum_row = sum(sample,axis=1)
    #     A = [sample[i][j]/float(sum_row[i]) if sum_row[i]!=0 else 0 for i in range(shape(sample)[0])]
    #     return A

    # def get_men_rate_X(sample,i):
    #     cpu_config,mem_config = get_machine_config(flavors_unique)
    #     sample_copy = matrix_copy(sample)
    #     for i in range(shape(sample_copy)[0]):
    #         for j in range(shape(sample_copy)[1]):
    #             sample_copy[i][j] *= mem_config[j]

    #     sample = sample_copy
    #     sum_row = sum(sample,axis=1)
    #     A = [sample[i][j]/float(sum_row[i]) if sum_row[i]!=0 else 0 for i in range(shape(sample)[0])]
    #     return A

    X_trainS,Y_trainS,X_test_S = [],[],[]


    # adjustable # 2 
    col_count = 5 # n_feature

    for f in flavors_unique:
        X = get_feature_grid(sample,mapping_index[f],col_count=col_count,fill_na='mean',max_na_rate=1,with_test=True)
        X_test = X[-1:]
        X = X[:-1]
        y = fancy(Ys,None,(mapping_index[f],mapping_index[f]+1))


        clustering = True
        # 1.data clustering 
        if clustering:
            print(clustering_paths[mapping_index[f]])
            # improve weights of X and y
            X.extend(X)
            y.extend(y)

            for cluster_index in clustering_paths[mapping_index[f]]:
                X_cluster = get_feature_grid(sample,mapping_index[f],col_count=col_count,fill_na='mean',max_na_rate=1,with_test=False)
                y_cluster = fancy(Ys,None,(cluster_index,cluster_index+1))
                w =  coef_sample[mapping_index[f]][cluster_index]

                # important
                X_cluster = apply(X_cluster,lambda x:x*w)
                y_cluster = apply(y_cluster,lambda x:x*w)

                X.extend(X_cluster)
                y.extend(y_cluster)

        # do not delete
        X.extend(X_test)


        # --------------------------------------------------------- #
        add_list= [X]
        # add_list = []
        # add_list.extend([sqrt(X)])
        add_list.extend([apply(X,lambda x:math.log1p(x))]) # important
        X = hstack(add_list)
        # --------------------------------------------------------- #

        def multi_exponential_smoothing(A,list_of_alpha):
            R = A
            for a in list_of_alpha:
                R = exponential_smoothing(R,alpha=a)
            return R

        # #adjustable #3 smoothing degree 
        # # 77.291 3
        # #	77.405 no.63
        # depth = 3
        # #adjustable #4 smoothing weights
        # # base = [0.3,0.5,0.7,0.8] # 3.0.6,0.7,0.8 77.163
        # # base = [0.1,0.3,0.5] # 3.0.6,0.7,0.8 77.163
        base = [0.6,0.7,0.8]

        depth = 3
        # base = [0.7,0.8,0.9]


        alphas = [[ base[i]  for _ in range(depth)]for i in range(len(base))]

        X_data_list = [multi_exponential_smoothing(X[:-1],a) for a in alphas]
        Y_data_list = [multi_exponential_smoothing(y,a) for a in alphas]
        
        X_data_list.extend([X])
        Y_data_list.extend([y])
        X = vstack(X_data_list)
        y = vstack(Y_data_list)

        # # # --------------------------------------------------------- #



        
        # -----------------------------------------------------------#

        y = flatten(y)
        X = normalize(X,y=y,norm='l1')

        assert(shape(X)[0]==shape(y)[0]+1)
        X_trainS.append(X[:-1])
        X_test_S.append(X[-1:])
        Y_trainS.append(y)

    return X_trainS,Y_trainS,X_test_S
コード例 #12
0
def predict_flavors(ecs_logs, flavors_config, flavors_unique, training_start,
                    training_end, predict_start, predict_end):
    predict_days = (predict_end - predict_start).days  #check
    hours = ((predict_end - predict_start).seconds / float(3600))
    if hours >= 12:
        predict_days += 1

    skip_days = (predict_start - training_end).days

    # print(skip_days) #checked
    # print(predict_days) #checked

    # sample = resampling(ecs_logs,flavors_unique,training_start,training_end,frequency=predict_days,strike=predict_days,skip=0)
    sample = resampling(ecs_logs,
                        flavors_unique,
                        training_start,
                        training_end,
                        frequency=1,
                        strike=1,
                        skip=0)

    def outlier_handling(sample, method='mean', max_sigma=3):
        assert (method == 'mean' or method == 'dynamic')
        std_ = stdev(sample)
        mean_ = mean(sample, axis=0)
        for i in range(shape(sample)[0]):
            for j in range(shape(sample)[1]):
                if sample[i][j] - mean_[j] > max_sigma * std_[j]:
                    if method == 'mean':
                        sample[i][j] = mean_[j]
                    elif method == 'dynamic':
                        if i < len(sample) / 2.0:
                            sample[i][j] = (mean_[j] + sample[i][j]) / 2.0
        return sample

    # sample = outlier_handling(sample,method='dynamic',max_sigma=3)
    # sample = outlier_handling(sample,method='mean',max_sigma=3.5)

    # from preprocessing import exponential_smoothing
    # sample = exponential_smoothing(exponential_smoothing(sample,alpha=0.2),alpha=0.2)

    skip_days -= 1
    prediction = []
    for i in range(shape(sample)[1]):

        clf = Ridge(alpha=1, fit_intercept=True)

        X = reshape(list(range(len(sample))), (-1, 1))
        y = fancy(sample, None, (i, i + 1))

        X_test = reshape(
            list(range(len(sample),
                       len(sample) + skip_days + predict_days)), (-1, 1))

        X_list = [X]
        X = hstack(X_list)

        X_test_list = [X_test]
        X_test = hstack(X_test_list)

        clf.fit(X, y)
        p = clf.predict(X_test)

        prediction.append(sum(flatten(p)))

    prediction = [int(round(p)) if p > 0 else 0 for p in prediction]

    return prediction