Example #1
0
    def _fit(self, X, y):
        self._check(X, y)
        assert (dim(y) == 1)

        beta = zeros(shape(X)[1])  # row vector
        X_T = matrix_transpose(X)

        if self.fit_intercept:
            beta[0] = sum(minus(reshape(y, -1), dot(X,
                                                    beta[1:]))) / (shape(X)[0])

        for _ in range(self.max_iter):
            print(_)
            start = 1 if self.fit_intercept else 0
            for j in range(start, len(beta)):
                tmp_beta = [x for x in beta]
                tmp_beta[j] = 0.0

                r_j = minus(reshape(y, -1), dot(X, beta))
                # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                arg1 = dot(X_T[j], r_j)
                arg2 = self.alpha * shape(X)[0]

                if sum(square(X_T[j])) != 0:
                    beta[j] = self._soft_thresholding_operator(
                        arg1, arg2) / sum(square(X_T[j]))
                else:
                    beta[j] = 0

                if self.fit_intercept:
                    beta[0] = sum(minus(reshape(y, -1), dot(
                        X, beta[1:]))) / (shape(X)[0])
        return beta
Example #2
0
    def fit(self, X, y):
        self._check(X, y)
        if dim(y) == 1:
            raw_X = X
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])

            beta = zeros(shape(X)[1])  # row vector
            X_T = matrix_transpose(X)

            if self.fit_intercept:
                beta[0] = sum(minus(reshape(y, -1), dot(
                    raw_X, beta[1:]))) / (shape(X)[0])

            for _ in range(self.max_iter):
                start = 1 if self.fit_intercept else 0
                for j in range(start, len(beta)):
                    tmp_beta = [x for x in beta]
                    tmp_beta[j] = 0.0

                    r_j = minus(reshape(y, -1), dot(X, beta))
                    # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                    arg1 = dot(X_T[j], r_j)
                    arg2 = self.alpha * shape(X)[0]

                    if sum(square(X_T[j])) != 0:
                        beta[j] = self._soft_thresholding_operator(
                            arg1, arg2) / sum(square(X_T[j]))
                    else:
                        beta[j] = 0

                    if self.fit_intercept:
                        beta[0] = sum(
                            minus(reshape(y, -1), dot(
                                raw_X, beta[1:]))) / (shape(X)[0])
                # # add whatch
                # self.beta = beta
                # self._whatch(raw_X,y)

            if self.fit_intercept:
                self.intercept_ = beta[0]
                self.coef_ = beta[1:]
            else:
                self.coef_ = beta
            self.beta = beta
            return self
        elif dim(y) == 2:
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])
            y_t = matrix_transpose(y)
            betas = []
            for i in range(shape(y)[1]):
                betas.append(self._fit(X, y_t[i]))
            batas = matrix_transpose(betas)
            self.betas = batas
Example #3
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)

        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)

            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index)

            ys = []
            for i in index:
                ys.append(self.y[i])

            k_loss_raw = sorted(loss)[:self.k]
            k_loss = [1 / l if l != 0 else 0 for l in k_loss_raw]
            k_loss_sum = sum(k_loss)
            weights = [
                l / float(k_loss_sum) if k_loss_sum != 0 else 1 for l in k_loss
            ]
            weight_m = diag(weights)
            ys = matrix_matmul(weight_m, ys)
            result.append(sum(ys, axis=0))

        if len(self.shape_Y) == 1:
            result = matrix_transpose(result)[0]

        return result
Example #4
0
def standard_scaling(X, y=None, axis=1):
    if axis == 0:
        return matrix_transpose(standard_scaling(matrix_transpose(X), axis=1))
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        mean_ = mean(col)
        std = sqrt(mean(square(minus(col, mean_))))

        if y != None:
            std_y = sqrt(mean(square(minus(y, mean(y)))))

        if std == 0:
            R.append(col)
        else:
            R.append([(x - mean_) * std_y / std for x in col])
    return matrix_transpose(R)
Example #5
0
def stdev(X):
    # X = matrix_copy(X)
    X_T = matrix_transpose(X)
    m = mean(X, axis=1)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
Example #6
0
 def _score_calc(y, y_):
     y_ = [int(round(i)) for i in y_]
     numerator = sqrt(mean(square(minus(y, y_))))
     denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_)))
     if denominator == 0:
         return 0
     else:
         return 1 - (numerator / float(denominator))
Example #7
0
def stdev(X, axis=0):
    assert (dim(X) == 2)
    assert (axis == 0)
    X_T = matrix_transpose(X)
    m = mean(X, axis=0)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
Example #8
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)
        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)
            # loss = sum(abs(minus(self.X,x)),axis=1)

            from preprocessing import standard_scaling
            new_X = standard_scaling(self.X, axis=0)
            x = sqrt(square(minus(x, mean(x))))
            loss = minus(loss, multiply(dot(new_X, x), self.alpha))

            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index, '/len', len(loss))
            ys = []
            for i in index:
                ys.append(self.y[i])
            result.append(mean(ys, axis=0))
        return result
Example #9
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)

        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)
            # loss = sum(abs(minus(self.X,x)),axis=1)
            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index, '/len', len(loss))
            ys = []
            for i in index:
                ys.append(self.y[i])
            result.append(mean(ys, axis=0))
        return result
Example #10
0
    def fit(self,X,y):
        assert(dim(X)==2)
        assert(dim(y)==1 or dim(y)==2)
        self.shape_X = shape(X)
        self.shape_Y = shape(y)

        if dim(y) == 1:
            y = [[k] for k in y]
        
        best_w = None
        min_err = None
        for i in range(self.max_iter):
            
            

            W = self.random_w((shape(X)[1],shape(y)[1]))
            
            y_ = matrix_matmul(X,W)
            err = mean(sqrt(mean(square(minus(y,y_)),axis=1)))
            if not best_w or min_err>err:
                best_w = W
                min_err = err
            print(err)
        self.W = best_w
Example #11
0
def cross_val_score(estimator_instance,
                    X,
                    y,
                    is_shuffle=False,
                    cv='full',
                    scoring='score',
                    random_state=None,
                    return_mean=False,
                    verbose=False):
    assert ((type(cv) == int and cv > 1) or cv == 'full')
    assert (scoring == 'score' or scoring == 'loss')

    if type(cv) == int:
        assert (cv < len(X))
    if is_shuffle:
        X, y = shuffle(X, y=y, random_state=random_state)
    N = len(X)
    K = N if cv == 'full' else cv

    h = len(X) / float(K)

    scores = []
    losses = []
    for i in range(K):
        s = int(round((i * h)))
        e = int(round((i + 1) * h))

        X_train, Y_train = [], []
        X_train.extend(X[:s])
        X_train.extend(X[e:])
        Y_train.extend(y[:s])
        Y_train.extend(y[e:])

        X_val, Y_val = X[s:e], y[s:e]
        estimator_instance.fit(X_train, Y_train)
        p = estimator_instance.predict(X_val)
        score = official_score(p, Y_val)
        loss = l2_loss(p, Y_val)
        # score = estimator_instance.score(X_val,Y_val)
        scores.append(score)
        losses.append(loss)

    # print(scores)
    if return_mean:
        if scoring == 'score':
            # print(scores)
            std = sqrt(mean(square(minus(scores, mean(scores)))))
            return (sorted(scores)[len(scores) / 2] + mean(scores) -
                    0.5 * std) / 2.0
            # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0
            # return sorted(scores)[len(scores)/2] - std
            # return max(scores)
            # return mean(scores[:len(scores)/2])
            # return mean(sorted(scores)[::-1][:len(scores)/2])
            # return (mean(scores) + max(scores))/2.0
            # return mean(scores)
            # return mean(scores) -0.5*std
        elif scoring == 'loss':
            # return mean(losses)
            std = sqrt(mean(square(minus(losses, mean(losses)))))
            # return mean(losses)
            return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) /
                    2.0)

    else:
        if scoring == 'score':
            return scores
        elif scoring == 'loss':
            return losses
Example #12
0
 def _whatch(self, X, y):
     p = self.predict(X)
     loss = sum(square(minus(p, y)))
     print(loss)
Example #13
0
def greedy_99_backpack(machine_number,
                       machine_name,
                       machine_config,
                       flavors_number,
                       flavors_unique,
                       flavors_config,
                       prediction,
                       score_treadhold=0.99):
    backpack_result = None
    solutions = get_approximate_meta_solutions(machine_number,
                                               machine_name,
                                               machine_config,
                                               flavors_number,
                                               flavors_unique,
                                               flavors_config,
                                               prediction,
                                               score_treadhold=score_treadhold)

    # print(prediction)
    # print(solutions)

    def possible(prediction, picker):
        for i in range(len(prediction)):
            if picker[i] > prediction[i]:
                return False
        return True

    backpack_result = [[] for _ in range(machine_number)]

    fit = True
    while (fit):
        fit = False
        for i in range(len(solutions))[::-1]:
            pickers = solutions[i]
            for picker in pickers:
                picker = list(picker)
                if possible(prediction, picker):
                    prediction = minus(prediction, picker)
                    em = {}.fromkeys(flavors_unique)
                    for j in range(len(flavors_unique)):
                        em[flavors_unique[j]] = picker[j]
                    backpack_result[i].append(em)
                    fit = True

    # _,backpack_result_2 = backpack(machine_number,machine_name,machine_config,flavors_number,flavors_unique,flavors_config,prediction,is_random=True)
    _, backpack_result_2 = random_k_times(machine_number,
                                          machine_name,
                                          machine_config,
                                          flavors_number,
                                          flavors_unique,
                                          flavors_config,
                                          prediction,
                                          k=1000)

    # backpack merge
    for i in range(len(backpack_result)):
        backpack_result[i].extend(backpack_result_2[i])
        pass

    backpack_count = [len(b) for b in backpack_result]

    # backpack_count: entity machine sum
    # backpack_result:
    # [[{f1:3,f2:8}..etc....]
    # [.......]
    # [.......]]
    return backpack_count, backpack_result
Example #14
0
def backpack(machine_number,
             machine_name,
             machine_config,
             flavors_number,
             flavors_unique,
             flavors_config,
             prediction,
             is_random=False):
    # parameters:
    # machine_number,machine_name,machine_config,flavors_number,flavors_unique,flavors_config,prediction
    # -->
    # (3,
    # ['General', 'Large-Memory', 'High-Performance'],
    # [{'MEM': 128, 'CPU': 56}, {'MEM': 256, 'CPU': 84}, {'MEM': 192, 'CPU': 112}],
    #  5,
    # [1, 2, 4, 5, 8],
    # [{'MEM': 1, 'CPU': 1}, {'MEM': 2, 'CPU': 1}, {'MEM': 2, 'CPU': 2}, {'MEM': 4, 'CPU': 2}, {'MEM': 8, 'CPU': 4}],
    # [32, 32, 11, 21,44])

    machine_rate = [c['CPU'] / float(c['MEM']) for c in machine_config]

    cpu_predict = 0
    mem_predict = 0
    for i in range(len(prediction)):
        cpu_predict += (prediction[i] * flavors_config[i]['CPU'])
        mem_predict += (prediction[i] * flavors_config[i]['MEM'])

    type_i_fix = argmin(
        abs(minus(machine_rate, cpu_predict / float(mem_predict))))

    vms = []
    for i in range(len(prediction)):
        f_config = flavors_config[i]
        vms.extend([[
            flavors_unique[i], {
                'CPU': f_config['CPU'],
                'MEM': f_config['MEM']
            }
        ] for _ in range(prediction[i])])

    if is_random:
        from random import shuffle
        shuffle(vms)
    # vms:
    # [(1, {'MEM': 1, 'CPU': 1}), (2, {'MEM': 1, 'CPU': 1}), (4, {'MEM': 1,'CPU': 1}), (5, {'MEM': 1, 'CPU': 1}), (8, {'MEM': 1, 'CPU': 1}), (1,{'MEM': 2, 'CPU': 1}), (2, {'MEM': 2, 'CPU': 1}), (4, {'MEM': 2, 'CPU': 1}), (5, {'MEM': 2, 'CPU': 1}), (8, {'MEM': 2, 'CPU': 1}), (1, {'MEM': 2, 'CPU': 2}), (2, {'MEM': 2, 'CPU': 2}), (4, {'MEM': 2, 'CPU': 2}), (5, {'MEM': 2, 'CPU': 2}), (8, {'MEM': 2, 'CPU': 2}), (1, {'MEM': 4, 'CPU': 2}), (2, {'MEM': 4, 'CPU': 2}), (4, {'MEM': 4, 'CPU': 2}), (5, {'MEM': 4, 'CPU': 2}), (8, {'MEM': 4, 'CPU': 2}), (1, {'MEM': 8, 'CPU': 4}), (2, {'MEM': 8, 'CPU': 4}), (4, {'MEM': 8, 'CPU': 4}), (5, {'MEM': 8, 'CPU': 4}), (8, {'MEM': 8, 'CPU': 4})]

    # [[{f1:3,f5:2},{f8:2,f7:4}] <== type1 machine
    # [....]                     <== type2 machine
    # [....]                     <== type3 machine
    backpack_result = [[] for _ in range(machine_number)]

    # same size of backpack_result,for reduce repected calclation
    backpack_capcity = [[] for _ in range(machine_number)]

    placing = [None for _ in range(machine_number)]

    def _get_em_weights_of_cpu_and_mem(flavors_unique, flavors_config, em):
        cpu = 0
        mem = 0
        for k, v in em.items():
            cpu += flavors_config[flavors_unique.index(k)]['CPU'] * v
            mem += flavors_config[flavors_unique.index(k)]['MEM'] * v
        return cpu, mem

    type_i = type_i_fix
    while (len(vms) != 0):
        vm_flavor = vms[0][0]
        vm_config = vms[0][1]
        # ------------------refiting ------------------------------
        refit = False
        insert_order = list(range(machine_number))
        # shuffle(insert_order)
        for i in insert_order:
            for j in range(len(backpack_result[i])):
                cpu_cap, mem_cap = backpack_capcity[i][j]
                if cpu_cap >= vm_config['CPU'] and mem_cap >= vm_config['MEM']:
                    backpack_result[i][j][vm_flavor] += 1

                    # used for estimate the cpu/mem rate
                    cpu_predict -= vm_config['CPU']
                    mem_predict -= vm_config['MEM']

                    # success
                    backpack_capcity[i][j] = cpu_cap - vm_config[
                        'CPU'], mem_cap - vm_config['MEM']
                    refit = True
                    break
            if refit:
                break
        if refit:
            vms.pop(0)
            continue
        # -------------------normal fitting------------------------
        if placing[type_i] == None:
            placing[type_i] = {}.fromkeys(flavors_unique)
            for f in flavors_unique:
                placing[type_i][f] = 0
            continue
        else:
            cpu_total, mem_total = machine_config[type_i][
                'CPU'], machine_config[type_i]['MEM']
            cpu_used, mem_used = _get_em_weights_of_cpu_and_mem(
                flavors_unique, flavors_config, placing[type_i])
            if cpu_total - cpu_used < vm_config[
                    'CPU'] or mem_total - mem_used < vm_config['MEM']:
                # add to backpack_list and create a new entity_machine
                backpack_result[type_i].append(placing[type_i])
                backpack_capcity[type_i].append(
                    (cpu_total - cpu_used, mem_total - mem_used))

                placing[type_i] = None
            else:
                placing[type_i][vm_flavor] += 1

                # used for estimate the cpu/mem rate
                cpu_predict -= vm_config['CPU']
                mem_predict -= vm_config['MEM']

                vms.pop(0)

                # add @2018-04-18
                # select next type of entity machine
                # type_i = random.choice(range(machine_number))

                if mem_predict == 0:
                    break
                # 1.Greedy Select
                type_i = argmin(
                    abs(minus(machine_rate, cpu_predict / float(mem_predict))))

    for i in range(len(placing)):
        if placing[i] != None:

            # add @2018-04-18
            cpu_used, mem_used = _get_em_weights_of_cpu_and_mem(
                flavors_unique, flavors_config, placing[i])
            if cpu_used != 0 and mem_used != 0:
                possible = []
                for k in range(machine_number):
                    if machine_config[k]['CPU'] >= cpu_used and machine_config[
                            k]['MEM'] >= mem_used:
                        possible.append(True)
                    else:
                        possible.append(False)
                scores = [(cpu_used / float(machine_config[k]['CPU']) +
                           mem_used / float(machine_config[k]['MEM'])) /
                          2.0 if possible[k] else 0
                          for k in range(machine_number)]

                best_i = argmax(scores)
                backpack_result[best_i].append(placing[i])

                # backpack_result[i].append(placing[i])

    backpack_count = [len(b) for b in backpack_result]

    return backpack_count, backpack_result
Example #15
0
 def _score_calc(y, y_):
     y_ = [int(round(i)) for i in y_]
     numerator = sqrt(mean(square(minus(y, y_))))
     return numerator