Exemple #1
0
 def predict(self,X):
     if dim(X) == 1:
         return [0 for _ in X]
     R = [[0 for _ in range(shape(X)[1])]]
     for i in range(shape(X)[0]-1):
         R.append(X[i])
     return R
Exemple #2
0
def hstack(list_of_matrix):
    # from copy import deepcopy
    # list_of_matrix = deepcopy(list_of_matrix)
    assert (type(list_of_matrix) == list and len(list_of_matrix) > 0)
    high = shape(list_of_matrix[0])[0]
    stacking_length = []

    # add @2018-04-11
    for i in range(len(list_of_matrix)):
        if dim(list_of_matrix[i]) == 1:
            list_of_matrix[i] = [[x] for x in list_of_matrix[i]]

    for i in range(len(list_of_matrix)):
        assert (dim(list_of_matrix[i]) == 2)
        assert (shape(list_of_matrix[i])[0] == high)
        stacking_length.append(shape(list_of_matrix[i])[1])

    R = zeros(high, sum(stacking_length))
    for i in range(len(list_of_matrix)):
        m, n = shape(list_of_matrix[i])
        start = sum(stacking_length[:i])
        # element wise copy
        for j in range(m):
            for k in range(n):
                R[j][k + start] = list_of_matrix[i][j][k]
    return R
    def fit(self, X, y, weights=None):
        X, y = self._check(X, y)

        if self.fit_intercept:
            m, n = shape(X)
            bias = ones(m, 1)
            X = hstack([bias, X])

        eye = identity_matrix(shape(X)[1])
        from linalg.matrix import diag
        if not self.penalty_bias:
            eye[0][0] = 0

        # add weights
        if weights != None:
            assert (len(weights) == shape(X)[0])
            X = matrix_matmul(diag(weights), X)

        X_T = matrix_transpose(X)

        self.W = matrix_matmul(
            matrix_matmul(
                matrix_inverse(
                    plus(matrix_matmul(X_T, X),
                         multiply(eye,
                                  self.alpha * shape(X)[0]))
                    # plus(matrix_matmul(X_T,X),multiply(eye,self.alpha))
                ),
                X_T),
            y)
        self.importance_ = sum(self.W, axis=1)
        if self.fit_intercept:
            self.importance_ = self.importance_[1:]
Exemple #4
0
 def random_w(self,s):
     assert(len(s)==2)
     R = zeros(s)
     for i in range(shape(R)[0]):
         for j in range(shape(R)[1]):
             R[i][j] = random.random()
     return R
Exemple #5
0
 def train_cv(self, X, y, shuffle=False, cv='full'):
     assert (type(cv) == int or cv == 'full')
     assert (dim(X) == 2 and dim(y) == 2)
     self.shape_Y = shape(y)
     for i in range(shape(y)[1]):
         max_score = None
         best_clf = None
         best_keep = None
         y_ = fancy(y, -1, i)
         for _ in range(self.max_iter):
             clf = self.estimator(**(self.parameter))
             X_, keep = self._rand_X(X)
             clf.fit(X_, y_)
             score = cross_val_score(clf,
                                     X,
                                     y,
                                     return_mean=True,
                                     cv=cv,
                                     shuffle=shuffle)
             if not max_score or max_score < score:
                 max_score = score
                 best_clf = clf
                 best_keep = keep
         self.keeps.append(best_keep)
         self.clfs.append(best_clf)
Exemple #6
0
    def _fit(self, X, y):
        self._check(X, y)
        assert (dim(y) == 1)

        beta = zeros(shape(X)[1])  # row vector
        X_T = matrix_transpose(X)

        if self.fit_intercept:
            beta[0] = sum(minus(reshape(y, -1), dot(X,
                                                    beta[1:]))) / (shape(X)[0])

        for _ in range(self.max_iter):
            print(_)
            start = 1 if self.fit_intercept else 0
            for j in range(start, len(beta)):
                tmp_beta = [x for x in beta]
                tmp_beta[j] = 0.0

                r_j = minus(reshape(y, -1), dot(X, beta))
                # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                arg1 = dot(X_T[j], r_j)
                arg2 = self.alpha * shape(X)[0]

                if sum(square(X_T[j])) != 0:
                    beta[j] = self._soft_thresholding_operator(
                        arg1, arg2) / sum(square(X_T[j]))
                else:
                    beta[j] = 0

                if self.fit_intercept:
                    beta[0] = sum(minus(reshape(y, -1), dot(
                        X, beta[1:]))) / (shape(X)[0])
        return beta
 def _check(self, X, y):
     assert ((dim(X) == 2 and dim(y) == 2) or (dim(X) == 2 and dim(y) == 1))
     assert (shape(X)[0] == shape(y)[0])
     self.dim_Y = dim(y)
     if self.dim_Y == 1:
         y = [[k] for k in y]
     return X, y
Exemple #8
0
def normalize(X,
              y=None,
              norm='l2',
              axis=1,
              return_norm=False,
              return_norm_inv=False):
    assert (axis == 0 or axis == 1)
    assert (norm == 'l2' or norm == 'l1')
    X_T = matrix_transpose(X)

    y_norm = None
    if y != None:
        if norm == 'l2':
            y_norm = sqrt(sum(square(y)))
        elif norm == 'l1':
            y_norm = sqrt(sum(abs(y)))
    if y and y_norm == 0:
        return X

    norms = []
    if axis == 0:
        A = matrix_copy(X)

        for i in range(shape(X)[0]):
            n = 0
            if norm == 'l2':
                n = sqrt(sum(square(
                    X_T[i]))) if not y else sqrt(sum(square(X_T[i]))) / y_norm
            elif norm == 'l1':
                n = sqrt(sum(abs(
                    X_T[i]))) if not y else sqrt(sum(square(X_T[i]))) / y_norm
            if n != 0:
                A[i] = (multiply(X[i], 1 / float(n)))
            norms.append(n)
    elif axis == 1:
        A = matrix_transpose(X)
        for j in range(shape(X)[1]):
            n = 0
            if norm == 'l2':
                n = sum(square(
                    X_T[j])) if not y else sqrt(sum(square(X_T[j]))) / y_norm
            elif norm == 'l1':
                n = sum(abs(
                    X_T[j])) if not y else sqrt(sum(square(X_T[j]))) / y_norm
            if n != 0:
                A[j] = (multiply(X_T[j], 1 / float(n)))
            norms.append(n)

        A = matrix_transpose(A)

    norms_inv = [0 if x == 0 else 1 / float(x) for x in norms]
    if return_norm and return_norm_inv:
        return A, norms, norms_inv
    elif return_norm:
        return A, norms
    elif return_norm_inv:
        return A, norms_inv
    else:
        return A
Exemple #9
0
 def fit(self, X, y):
     self.X = X
     if dim(y) == 1:
         self.y = [[k] for k in y]
     else:
         self.y = y
     self.shape_X = shape(X)
     self.shape_Y = shape(y)
Exemple #10
0
def exponential_smoothing(A, axis=0, alpha=0.1):
    assert (axis == 0)
    R = []
    C = zeros(shape(A)[1])
    for i in range(shape(A)[0]):
        P = multiply(A[i], (1 - alpha))
        Q = multiply(C, alpha)
        C = plus(P, Q)
        R.append(C)
    return R
 def outlier_handling(sample, method='mean', max_sigma=3):
     assert (method == 'mean' or method == 'dynamic')
     std_ = stdev(sample)
     mean_ = mean(sample, axis=0)
     for i in range(shape(sample)[0]):
         for j in range(shape(sample)[1]):
             if sample[i][j] - mean_[j] > max_sigma * std_[j]:
                 if method == 'mean':
                     sample[i][j] = mean_[j]
                 elif method == 'dynamic':
                     if i < len(sample) / 2.0:
                         sample[i][j] = (mean_[j] + sample[i][j]) / 2.0
     return sample
Exemple #12
0
def matrix_inverse(A):
    assert (dim(A) == 2)
    N = shape(A)[0]
    L = identity_matrix(N)
    R = identity_matrix(N)

    def _row_assign(A, dest, source, factor):
        assert (dim(A) == 2)
        A[dest] = [
            factor * A[source][i] + A[dest][i] for i in range(len(A[source]))
        ]

    def _row_switch(A, dest, source):
        assert (dim(A) == 2)
        t = A[dest]
        A[dest] = A[source]
        A[source] = t

    def _col_switch(A, dest, source):
        assert (dim(A) == 2)
        m, n = shape(A)
        for i in range(m):
            t = A[i][dest]
            A[i][dest] = A[i][source]
            A[i][source] = t

    #down triangle
    for j in range(N):
        for i in range(N):
            # select biggest element
            if i == j:
                max_k = i
                max_w = j
                for k in range(i, N):
                    for w in range(j, N):
                        if A[k][w] > A[max_k][max_w]:
                            max_k, max_w = k, w
                _row_switch(A, i, max_k)
                _row_switch(L, i, max_k)
                _col_switch(A, j, max_w)
                _col_switch(R, j, max_w)
            if i > j:
                if A[j][j] == 0:
                    raise Exception
                fa = -A[i][j] / A[j][j]
                _row_assign(A, i, j, fa)
                _row_assign(L, i, j, fa)
    #upper triangle
    for j in range(N)[::-1]:
        for i in range(N)[::-1]:
            if i < j:
                if A[j][j] == 0:
                    raise Exception
                fa = -A[i][j] / A[j][j]
                _row_assign(A, i, j, fa)
                _row_assign(L, i, j, fa)

    for i in range(len(L)):
        L[i] = [x / A[i][i] for x in L[i]]
    return matrix_matmul(R, L)
 def outlier_handling(sample,method='mean',max_sigma=3):
     assert(method=='mean' or method=='zero' or method=='dynamic')
     sample = matrix_copy(sample)
     std_ = stdev(sample)
     mean_ = mean(sample,axis=1)
     for i in range(shape(sample)[0]):
         for j in range(shape(sample)[1]):
            if sample[i][j]-mean_[j] >max_sigma*std_[j]:
                 if method=='mean':
                     sample[i][j] = mean_[j]
                 elif method=='zero':
                     sample[i][j] = 0
                 elif method=='dynamic':
                     sample[i][j] = (sample[i][j] + mean_[j])/2.0
                     
     return sample
Exemple #14
0
 def _col_switch(A, dest, source):
     assert (dim(A) == 2)
     m, n = shape(A)
     for i in range(m):
         t = A[i][dest]
         A[i][dest] = A[i][source]
         A[i][source] = t
Exemple #15
0
def corrcoef(A):
    assert (dim(A) == 2)
    m, n = shape(A)

    def _corr(A, i, j):
        assert (dim(A) == 2)
        m, n = shape(A)
        A_T = matrix_transpose(A)

        X, Y = A_T[i], A_T[j]  # X,Y = col(A,i),col(A,j)

        mean_X, mean_Y = mean(X), mean(Y)
        X_ = [k - mean_X for k in X]
        Y_ = [k - mean_Y for k in Y]
        numerator = mean(multiply(X_, Y_))
        # print(sqrt(mean(square(X_))))

        denominator = sqrt(mean(square(X_))) * sqrt(mean(square(Y_)))
        if denominator == 0:
            return 0
        else:
            r = (numerator) / (denominator)
            return r

    R = zeros((n, n))
    for i in range(n):
        for j in range(n):
            if i == j:
                R[i][j] = 1
            elif i > j:
                R[i][j] = R[j][i]
            else:
                R[i][j] = _corr(A, i, j)
    return R
Exemple #16
0
 def predict(self, X):
     assert (self.beta != None or self.betas != None)
     if self.fit_intercept:
         X = hstack([ones(shape(X)[0], 1), X])
     if self.beta != None:
         return dot(X, self.beta)
     else:
         return matrix_matmul(X, self.betas)
Exemple #17
0
def matrix_copy(A):
    assert (dim(A) == 2)
    m, n = shape(A)
    R = zeros((m, n))
    for i in range(m):
        for j in range(n):
            R[i][j] = A[i][j]
    return R
Exemple #18
0
def stdev(X):
    # X = matrix_copy(X)
    X_T = matrix_transpose(X)
    m = mean(X, axis=1)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
    def get_feature_grid(sample,i,fill_na='mean',max_na_rate=1,col_count=None,with_test=True):
        assert(fill_na=='mean' or fill_na=='zero')
        col = fancy(sample,None,i)
        R = []
        for j in range(len(col)):
            left = [None for _ in range(len(col)-j)]
            right = col[:j]
            r = []
            r.extend(left)
            r.extend(right)
            R.append(r)

        def _mean_with_none(A):
            if len(A)==0:
                return 0
            else:
                count = 0
                for i in range(len(A)):
                    if A[i]!=None:
                        count+=A[i]
                return count/float(len(A))
        
        means = []
        for j in range(shape(R)[1]):
            means.append(_mean_with_none(fancy(R,None,j)))
        
        width = int((1-max_na_rate) * shape(R)[1])
        R = fancy(R,None,(width,))
        for _ in range(shape(R)[0]):
            for j in range(shape(R)[1]):
                    if R[_][j]==None:
                        if fill_na=='mean':
                            R[_][j] = means[j]
                        elif fill_na=='zero':
                            R[_][j]=0
        if with_test:
            if col_count!=None:
                return fancy(R,None,(-col_count,))
            else:
                return R
        else:
            if col_count!=None:
                return fancy(R,(0,-1),(-col_count,))
            else:            
                return R[:-1]
Exemple #20
0
def stdev(X, axis=0):
    assert (dim(X) == 2)
    assert (axis == 0)
    X_T = matrix_transpose(X)
    m = mean(X, axis=0)
    R = []
    for j in range(shape(X)[1]):
        R.append(sqrt(mean(square(minus(X_T[j], m[j])))))
    return R
Exemple #21
0
 def predict(self, X):
     assert (self.W != None)
     if self.fit_intercept:
         m, n = shape(X)
         bias = ones(m, 1)
         X = hstack([bias, X])
     result = matrix_matmul(X, self.W)
     if self.dim_Y == 1:
         result = [x[0] for x in result]
     return result
Exemple #22
0
 def _rand_X(self, X):
     N = shape(X)[1]
     keep_length = math.ceil((1 - self.drop_out) * N)
     keep_set = set()
     while len(keep_set) != keep_length:
         i = random.randrange(N)
         if i not in keep_set:
             keep_set.add(i)
     keep = [True if i in keep_set else False for i in range(N)]
     X_ = fancy(X, -1, keep)
     return X_, keep
Exemple #23
0
    def fit(self, X, y):
        X, y = self._check(X, y)
        if self.fit_intercept:
            m, n = shape(X)
            bias = ones(m, 1)
            X = hstack([bias, X])

        X_T = matrix_transpose(X)
        # print matrix_matmul(X_T,X)
        self.W = matrix_matmul(
            matrix_matmul(matrix_inverse(matrix_matmul(X_T, X)), X_T), y)
def resampling(ecs_logs,
               flavors_unique,
               training_start_time,
               predict_start_time,
               frequency=7,
               strike=1,
               skip=0):
    # checked
    def __get_flavors_unique_mapping(flavors_unique):
        mapping_index = {}.fromkeys(flavors_unique)
        c = 0
        for f in flavors_unique:
            mapping_index[f] = c
            c += 1
        return mapping_index

    predict_start_time = predict_start_time - timedelta(days=skip)
    days_total = (predict_start_time - training_start_time).days

    sample_length = ((days_total - frequency) / strike) + 1
    mapping_index = __get_flavors_unique_mapping(flavors_unique)

    sample = zeros((sample_length, len(flavors_unique)))

    last_time = [None for i in range(len(flavors_unique))]

    for i in range(sample_length):
        for f, ecs_time in ecs_logs:
            # 0 - 6 for example
            # fix serious bug @ 2018-04-11
            if (predict_start_time - ecs_time).days >= (i) * strike and (
                    predict_start_time -
                    ecs_time).days < (i) * strike + frequency:
                if last_time[mapping_index[f]] == None:
                    sample[i][mapping_index[f]] += 1
                    last_time[mapping_index[f]] = ecs_time

                else:
                    if (ecs_time - last_time[mapping_index[f]]).seconds < 10:
                        sample[i][mapping_index[f]] += 1
                        continue
                    else:
                        sample[i][mapping_index[f]] += 1
                        last_time[mapping_index[f]] = ecs_time

    # ----------------------------#
    sample = sample[::-1]
    # [       old data            ]
    # [         ...               ]
    # [         ...               ]
    # [       new_data            ]
    # ----------------------------#
    assert (shape(sample) == (sample_length, len(flavors_unique)))
    return sample
Exemple #25
0
def matrix_matmul(A, B):
    assert (dim(A) == 2 and dim(B) == 2 and shape(A)[1] == shape(B)[0])

    def __sub_product(A, i, B, j):
        N = len(A[i])
        partial_sum = 0
        for k in range(N):
            partial_sum += A[i][k] * B[k][j]
        return partial_sum

    m = shape(A)[0]
    n = shape(B)[1]

    R = []
    for i in range(m):
        r = []
        for j in range(n):
            r.append(__sub_product(A, i, B, j))
        R.append(r)
    return R
Exemple #26
0
def train_test_split(X, y, test_size=0.2, random_state=None, align=None):
    assert (shape(X)[0] == shape(y)[0])

    N = shape(X)[0]

    if test_size >= 1:
        test_length = test_size
    else:
        test_length = round(N * test_size)
        if test_length == 0:
            test_length = 1

    if random_state != None:
        random.seed(random_state)

    taining_length = N - test_length

    assert (align == None or align == 'left' or align == 'right')
    if align == 'right':
        return X[:taining_length], X[taining_length:], y[:taining_length], y[
            taining_length:]
    elif align == 'left':
        X[:test_length], X[test_length:], y[:test_length], y[test_length:]

    test_set = set()
    while len(test_set) != test_length:
        i = random.randrange(N)
        if i not in test_set:
            test_set.add(i)

    X_train, X_test, Y_train, Y_test = [], [], [], []

    for i in range(N):
        if i not in test_set:
            X_train.append(X[i])
            Y_train.append(y[i])
        else:
            X_test.append(X[i])
            Y_test.append(y[i])

    return X_train, X_test, Y_train, Y_test
Exemple #27
0
    def fit(self, X, y):
        self._check(X, y)
        if dim(y) == 1:
            raw_X = X
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])

            beta = zeros(shape(X)[1])  # row vector
            X_T = matrix_transpose(X)

            if self.fit_intercept:
                beta[0] = sum(minus(reshape(y, -1), dot(
                    raw_X, beta[1:]))) / (shape(X)[0])

            for _ in range(self.max_iter):
                start = 1 if self.fit_intercept else 0
                for j in range(start, len(beta)):
                    tmp_beta = [x for x in beta]
                    tmp_beta[j] = 0.0

                    r_j = minus(reshape(y, -1), dot(X, beta))
                    # r_j = minus(reshape(y,-1) , dot(X, tmp_beta))
                    arg1 = dot(X_T[j], r_j)
                    arg2 = self.alpha * shape(X)[0]

                    if sum(square(X_T[j])) != 0:
                        beta[j] = self._soft_thresholding_operator(
                            arg1, arg2) / sum(square(X_T[j]))
                    else:
                        beta[j] = 0

                    if self.fit_intercept:
                        beta[0] = sum(
                            minus(reshape(y, -1), dot(
                                raw_X, beta[1:]))) / (shape(X)[0])
                # # add whatch
                # self.beta = beta
                # self._whatch(raw_X,y)

            if self.fit_intercept:
                self.intercept_ = beta[0]
                self.coef_ = beta[1:]
            else:
                self.coef_ = beta
            self.beta = beta
            return self
        elif dim(y) == 2:
            if self.fit_intercept:
                X = hstack([ones(shape(X)[0], 1), X])
            y_t = matrix_transpose(y)
            betas = []
            for i in range(shape(y)[1]):
                betas.append(self._fit(X, y_t[i]))
            batas = matrix_transpose(betas)
            self.betas = batas
 def flavor_clustering(sample,k=3,variance_threshold=None):
     corrcoef_sample = corrcoef(sample)
     clustering_paths = []
     for i in range(shape(sample)[1]):
         col = corrcoef_sample[i]
         col_index_sorted = argsort(col)[::-1]
         if variance_threshold!=None:
             col_index_sorted = col_index_sorted[1:]
             index = [i  for i in col_index_sorted if col[i]>variance_threshold]
         else:
             index = col_index_sorted[1:k+1]
         clustering_paths.append(index)
     return clustering_paths,corrcoef_sample
Exemple #29
0
def minmax_scaling(X, axis=1):
    assert (axis == 1)
    R = []
    for j in range(shape(X)[1]):
        col = fancy(X, None, j)
        max_ = max(col)
        min_ = min(col)
        mean_ = mean(col)
        if max_ - min_ == 0:
            R.append(col)
        else:
            R.append([(x - mean_) / (max_ - min_) for x in col])
    return matrix_transpose(R)
Exemple #30
0
def shift(A, shift_step, fill=None):
    assert (dim(A) == 2)
    R = zeros(shape(A))
    for i in range(shape(A)[0]):
        for j in range(shape(A)[1]):
            if shift_step >= 0:
                if i >= shift_step:
                    R[i][j] = A[i - shift_step][j]
                else:
                    if type(fill) == list:
                        R[i][j] = fill[j]
                    else:
                        R[i][j] = fill
            else:
                if (i - shift_step) < shape(A)[0]:
                    R[i][j] = A[i - shift_step][j]
                else:
                    if type(fill) == list:
                        R[i][j] = fill[j]
                    else:
                        R[i][j] = fill
    return R