Ejemplo n.º 1
0
    def _fit(self, X, y):
        self._check(X, y)
        assert (dim(y) == 1)

        beta = zeros(shape(X)[1])  # row vector
        X_T = matrix_transpose(X)

        if self.fit_intercept:
            beta[0] = sum(minus(reshape(y, -1), dot(X,
                                                    beta[1:]))) / (shape(X)[0])

        for _ in range(self.max_iter):
            print(_)
            start = 1 if self.fit_intercept else 0
            for j in range(start, len(beta)):
                tmp_beta = [x for x in beta]
                tmp_beta[j] = 0.0

                r_j = minus(reshape(y, -1), dot(X, beta))
                # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                arg1 = dot(X_T[j], r_j)
                arg2 = self.alpha * shape(X)[0]

                if sum(square(X_T[j])) != 0:
                    beta[j] = self._soft_thresholding_operator(
                        arg1, arg2) / sum(square(X_T[j]))
                else:
                    beta[j] = 0

                if self.fit_intercept:
                    beta[0] = sum(minus(reshape(y, -1), dot(
                        X, beta[1:]))) / (shape(X)[0])
        return beta
Ejemplo n.º 2
0
 def _score_calc(y, y_):
     y_ = [int(round(i)) for i in y_]
     numerator = sqrt(mean(square(minus(y, y_))))
     denominator = sqrt(mean(square(y))) + sqrt(mean(square(y_)))
     if denominator == 0:
         return 0
     else:
         return 1 - (numerator / float(denominator))
Ejemplo n.º 3
0
    def fit(self, X, y):
        self._check(X, y)
        if dim(y) == 1:
            raw_X = X
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])

            beta = zeros(shape(X)[1])  # row vector
            X_T = matrix_transpose(X)

            if self.fit_intercept:
                beta[0] = sum(minus(reshape(y, -1), dot(
                    raw_X, beta[1:]))) / (shape(X)[0])

            for _ in range(self.max_iter):
                start = 1 if self.fit_intercept else 0
                for j in range(start, len(beta)):
                    tmp_beta = [x for x in beta]
                    tmp_beta[j] = 0.0

                    r_j = minus(reshape(y, -1), dot(X, beta))
                    # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                    arg1 = dot(X_T[j], r_j)
                    arg2 = self.alpha * shape(X)[0]

                    if sum(square(X_T[j])) != 0:
                        beta[j] = self._soft_thresholding_operator(
                            arg1, arg2) / sum(square(X_T[j]))
                    else:
                        beta[j] = 0

                    if self.fit_intercept:
                        beta[0] = sum(
                            minus(reshape(y, -1), dot(
                                raw_X, beta[1:]))) / (shape(X)[0])
                # # add whatch
                # self.beta = beta
                # self._whatch(raw_X,y)

            if self.fit_intercept:
                self.intercept_ = beta[0]
                self.coef_ = beta[1:]
            else:
                self.coef_ = beta
            self.beta = beta
            return self
        elif dim(y) == 2:
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])
            y_t = matrix_transpose(y)
            betas = []
            for i in range(shape(y)[1]):
                betas.append(self._fit(X, y_t[i]))
            batas = matrix_transpose(betas)
            self.betas = batas
Ejemplo n.º 4
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)

        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)

            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index)

            ys = []
            for i in index:
                ys.append(self.y[i])

            k_loss_raw = sorted(loss)[:self.k]
            k_loss = [1 / l if l != 0 else 0 for l in k_loss_raw]
            k_loss_sum = sum(k_loss)
            weights = [
                l / float(k_loss_sum) if k_loss_sum != 0 else 1 for l in k_loss
            ]
            weight_m = diag(weights)
            ys = matrix_matmul(weight_m, ys)
            result.append(sum(ys, axis=0))

        if len(self.shape_Y) == 1:
            result = matrix_transpose(result)[0]

        return result
Ejemplo n.º 5
0
def standard_scaling(X, y=None, axis=1):
    if axis == 0:
        return matrix_transpose(standard_scaling(matrix_transpose(X), axis=1))
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        mean_ = mean(col)
        std = sqrt(mean(square(minus(col, mean_))))

        if y != None:
            std_y = sqrt(mean(square(minus(y, mean(y)))))

        if std == 0:
            R.append(col)
        else:
            R.append([(x - mean_) * std_y / std for x in col])
    return matrix_transpose(R)
Ejemplo n.º 6
0
def stdev(X):
    # X = matrix_copy(X)
    X_T = matrix_transpose(X)
    m = mean(X, axis=1)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
Ejemplo n.º 7
0
def stdev(X, axis=0):
    assert (dim(X) == 2)
    assert (axis == 0)
    X_T = matrix_transpose(X)
    m = mean(X, axis=0)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
Ejemplo n.º 8
0
def normalize(X,
              y=None,
              norm='l2',
              axis=1,
              return_norm=False,
              return_norm_inv=False):
    assert (axis == 0 or axis == 1)
    assert (norm == 'l2' or norm == 'l1')
    X_T = matrix_transpose(X)

    y_norm = None
    if y != None:
        if norm == 'l2':
            y_norm = sqrt(sum(square(y)))
        elif norm == 'l1':
            y_norm = sqrt(sum(abs(y)))
    if y and y_norm == 0:
        return X

    norms = []
    if axis == 0:
        A = matrix_copy(X)

        for i in range(shape(X)[0]):
            n = 0
            if norm == 'l2':
                n = sqrt(sum(square(
                    X_T[i]))) if not y else sqrt(sum(square(X_T[i]))) / y_norm
            elif norm == 'l1':
                n = sqrt(sum(abs(
                    X_T[i]))) if not y else sqrt(sum(square(X_T[i]))) / y_norm
            if n != 0:
                A[i] = (multiply(X[i], 1 / float(n)))
            norms.append(n)
    elif axis == 1:
        A = matrix_transpose(X)
        for j in range(shape(X)[1]):
            n = 0
            if norm == 'l2':
                n = sum(square(
                    X_T[j])) if not y else sqrt(sum(square(X_T[j]))) / y_norm
            elif norm == 'l1':
                n = sum(abs(
                    X_T[j])) if not y else sqrt(sum(square(X_T[j]))) / y_norm
            if n != 0:
                A[j] = (multiply(X_T[j], 1 / float(n)))
            norms.append(n)

        A = matrix_transpose(A)

    norms_inv = [0 if x == 0 else 1 / float(x) for x in norms]
    if return_norm and return_norm_inv:
        return A, norms, norms_inv
    elif return_norm:
        return A, norms
    elif return_norm_inv:
        return A, norms_inv
    else:
        return A
Ejemplo n.º 9
0
    def _corr(A, i, j):
        assert (dim(A) == 2)
        m, n = shape(A)
        A_T = matrix_transpose(A)

        X, Y = A_T[i], A_T[j]  # X,Y = col(A,i),col(A,j)

        mean_X, mean_Y = mean(X), mean(Y)
        X_ = [k - mean_X for k in X]
        Y_ = [k - mean_Y for k in Y]
        numerator = mean(multiply(X_, Y_))
        # print(sqrt(mean(square(X_))))

        denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_)))
        if denominator == 0:
            return 0
        else:
            r = (numerator) / (denominator)
            return r
Ejemplo n.º 10
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)
        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)
            # loss = sum(abs(minus(self.X,x)),axis=1)

            from preprocessing import standard_scaling
            new_X = standard_scaling(self.X, axis=0)
            x = sqrt(square(minus(x, mean(x))))
            loss = minus(loss, multiply(dot(new_X, x), self.alpha))

            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index, '/len', len(loss))
            ys = []
            for i in index:
                ys.append(self.y[i])
            result.append(mean(ys, axis=0))
        return result
Ejemplo n.º 11
0
    def predict(self, X):
        result = []
        # dim_X = dim(X)

        if dim(X) == 1:
            X = [X]
        for x in X:
            loss = sum(square(minus(self.X, x)), axis=1)
            # loss = sum(abs(minus(self.X,x)),axis=1)
            index = argsort(loss)[:self.k]
            if self.verbose:
                print(index, '/len', len(loss))
            ys = []
            for i in index:
                ys.append(self.y[i])
            result.append(mean(ys, axis=0))
        return result
Ejemplo n.º 12
0
    def fit(self,X,y):
        assert(dim(X)==2)
        assert(dim(y)==1 or dim(y)==2)
        self.shape_X = shape(X)
        self.shape_Y = shape(y)

        if dim(y) == 1:
            y = [[k] for k in y]
        
        best_w = None
        min_err = None
        for i in range(self.max_iter):
            
            

            W = self.random_w((shape(X)[1],shape(y)[1]))
            
            y_ = matrix_matmul(X,W)
            err = mean(sqrt(mean(square(minus(y,y_)),axis=1)))
            if not best_w or min_err>err:
                best_w = W
                min_err = err
            print(err)
        self.W = best_w
Ejemplo n.º 13
0
def cross_val_score(estimator_instance,
                    X,
                    y,
                    is_shuffle=False,
                    cv='full',
                    scoring='score',
                    random_state=None,
                    return_mean=False,
                    verbose=False):
    assert ((type(cv) == int and cv > 1) or cv == 'full')
    assert (scoring == 'score' or scoring == 'loss')

    if type(cv) == int:
        assert (cv < len(X))
    if is_shuffle:
        X, y = shuffle(X, y=y, random_state=random_state)
    N = len(X)
    K = N if cv == 'full' else cv

    h = len(X) / float(K)

    scores = []
    losses = []
    for i in range(K):
        s = int(round((i * h)))
        e = int(round((i + 1) * h))

        X_train, Y_train = [], []
        X_train.extend(X[:s])
        X_train.extend(X[e:])
        Y_train.extend(y[:s])
        Y_train.extend(y[e:])

        X_val, Y_val = X[s:e], y[s:e]
        estimator_instance.fit(X_train, Y_train)
        p = estimator_instance.predict(X_val)
        score = official_score(p, Y_val)
        loss = l2_loss(p, Y_val)
        # score = estimator_instance.score(X_val,Y_val)
        scores.append(score)
        losses.append(loss)

    # print(scores)
    if return_mean:
        if scoring == 'score':
            # print(scores)
            std = sqrt(mean(square(minus(scores, mean(scores)))))
            return (sorted(scores)[len(scores) / 2] + mean(scores) -
                    0.5 * std) / 2.0
            # return (sorted(scores)[len(scores)/2] + mean(scores) - std)/2.0
            # return sorted(scores)[len(scores)/2] - std
            # return max(scores)
            # return mean(scores[:len(scores)/2])
            # return mean(sorted(scores)[::-1][:len(scores)/2])
            # return (mean(scores) + max(scores))/2.0
            # return mean(scores)
            # return mean(scores) -0.5*std
        elif scoring == 'loss':
            # return mean(losses)
            std = sqrt(mean(square(minus(losses, mean(losses)))))
            # return mean(losses)
            return ((sorted(losses)[len(losses) / 2] + mean(losses) + std) /
                    2.0)

    else:
        if scoring == 'score':
            return scores
        elif scoring == 'loss':
            return losses
Ejemplo n.º 14
0
 def _whatch(self, X, y):
     p = self.predict(X)
     loss = sum(square(minus(p, y)))
     print(loss)
Ejemplo n.º 15
0
 def _score_calc(y, y_):
     y_ = [int(round(i)) for i in y_]
     numerator = sqrt(mean(square(minus(y, y_))))
     return numerator