def ols(x, y, const=True): """ Desc: Do linear regression by OLS method. Parameters: x: A matrix contain explanatory variables y: A column vector contain dependent variable const: A bool, indicating whether we add constant or not. Default True, which means add constant. Return: A dict contain fitted values, coefficients and errors. """ # (1) Handle Exceptions type_array = type(np.array([[0]])) if not isinstance(x, type_array) or not isinstance(y, type_array): raise TypeError("Both x and y must be array.") if x.ndim != 2 or y.ndim != 2: raise Exception("Dimension of x and y must be 2.") if x.shape[0] != y.shape[0]: raise Exception("The number of observations of x and y must be the same.") # (2) Calculate coefficients, errors and fitted values. if const: c = np.array([[1] * x.shape[0]]).T x = np.concatenate([c, x], axis=1) if rank(x) < x.shape[1]: raise Exception("Columns of x are linearly dependent.") mat1 = inverse(x.T @ x) @ x.T beta = mat1 @ y y_hat = x @ beta error = y - y_hat # (3) Tidy results result = {'coef': beta, 'fitted_value': y_hat, 'resid': error} return result
def one_side_inverse(mat): """ Desc: Calculate right or left inverse matrix Parameters: mat: A given matrix Return: The one side inverse matrix """ try: inv = inverse(mat) warn("The matrix mat has two-side inverse") return {"two_side": inv} except Exception: r = rank(mat) nrow, ncol = mat.shape if r < min(nrow, ncol): raise Exception( "Matrix mat are neither row full rank nor col full rank. One-side inverse doesn't exist." ) elif r == nrow and r < ncol: # Best right-inverse: A' @ inverse(AA'). We can see, if AA' is non-singular, then AA' @ inverse(AA') = I. tt = mat @ mat.T inv = mat.T @ inverse(tt) return {'right': inv} elif r == ncol and r < nrow: # Best left-inverse: inverse(A'A) @ A'. We can see, if A'A is non-singular, then inverse(A'A) @ A'A = I. tt = mat.T @ mat inv = inverse(tt) @ mat.T return {'left': inv}
def wls(x, y, const=True, weight=None): """ Desc: Do linear regression by WLS method. Parameters: x: A matrix contain explanatory variables y: A column vector contain dependent variable const: A bool, indicating whether we add constant or not. Default True, which means add constant. weight: An 1 dimensional array. Default None. Return: A dict contain fitted values, coefficients, errors, weight, transfer x and transfer y. """ # (1) Handle Exceptions type_array = type(np.array([[0]])) if not isinstance(x, type_array) or not isinstance(y, type_array): raise TypeError("Both x and y must be array.") if x.ndim != 2 or y.ndim != 2: raise Exception("Dimension of x and y must be 2.") if x.shape[0] != y.shape[0]: raise Exception("The number of observations of x and y must be the same.") # (2) Deal weight if weight == None: # 用户不填权重时我们默认用残差绝对值的倒数作为权重 ols_fit = ols(x, y, const) error= np.abs(ols_fit['resid'][:, 0]) weight = np.diag(error) weight = np.sqrt(weight) elif isinstance(weight, type_array): if (weight < 0).any(): raise Exception("Negative weight is forbidden") elif weight.ndim != 1: raise Exception("The dimension of weight must be 1") else: weight = np.diag(weight) weight = np.sqrt(weight) else: raise TypeError("Type of weight must be array") # (3) Calculate coef, fitted value and error if const: c = np.array([[1] * x.shape[0]]).T x = np.concatenate([c, x], axis=1) if rank(x) < x.shape[1]: raise Exception("Columns of x are linearly dependent.") X = weight @ x Y = weight @ y mat1 = inverse(X.T @ X) @ X.T beta = mat1 @ Y Y_hat = X @ beta error = Y - Y_hat # (3) Tidy results result = {'coef': beta, 'fitted_value': Y_hat, 'resid': error, 'weight': weight.diagonal(), 'transfer_x': X, 'transfer_y': Y} return result
def coord_exchange(coordinate, current_basis, object_basis): # Desc: Get coordinate in object_basis in the given coordinate of current_basis. Note that the object_basis # and current_basis are the same basis of a space. # Args: # coordinate: An array represent the coordinate in the basis of current_basis. # current_basis: An array whose columns represent the current basis. # object_basis: An array whose columns represent the object basis. # Return: An array represent the coordinate in the basis of object basis. # (1) Deal Exception rank_current = rank(current_basis) rank_object = rank(object_basis) mat = np.concatenate([current_basis, object_basis], axis=1) rank_both = rank(mat) if current_basis.shape != object_basis.shape: raise Exception( "The number of columns and rows in current_basis and object_basis must be equal." ) if coordinate.shape[0] != object_basis.shape[1]: raise Exception( "The number of rows of coordinate must equal to columns of object_basis" ) if rank_object != object_basis.shape[1]: raise Exception( "Columns of object_basis is linear dependent and it's not a basis") if rank_current != current_basis.shape[1]: raise Exception( "Columns of current_basis is linear dependent and it's not a basis" ) if rank_both > rank_current: raise Exception( "current_basis and object_basis aren't in the same space") # (2) Get basis-change-matrix inv = one_side_inverse(object_basis) try: inv = inv['two_side'] except KeyError: inv = inv['left'] basis_change = inv @ current_basis # (3) Get coordinate of object_basis from coord object_coord = basis_change @ coordinate return object_coord
def project_mat(mat): """ Desc: Generate projection matrix of given matrix. parameters: mat: A matrix return: The projection matrix. """ r = rank(mat) if r != mat.shape[1]: raise Exception("The columns of mat is not linear independent") inv = inverse(mat.T @ mat) proj = mat @ inv @ mat.T return proj
def ortho(mat, unit): """ Desc: Orthogonalize by Gram-Schmidt process. Parameters: mat: A matrix unit: A bool indicating whether to scale each column to unit or not Return: The matrix Q. If unit is False return a orthogonal matrix, else return a orthonormal matrix. """ # (1) Deal exceptions if not isinstance(mat, type(np.array([0]))): raise Exception('mat must be an array') if mat.ndim != 2: raise Exception("Dimension of mat must be 2") nrow, ncol = mat.shape r = rank(mat) if r < ncol: warn("Columns of mat are not linearly independent.") # (2) Do Gram-schmidt process to obtain Q Q = [] for i in range(ncol): col = mat[:, [i]] if i == 0: q = col else: qi = np.concatenate(Q, axis=1) proj = qi @ inverse(qi.T @ qi) @ qi.T orthogonal = col - proj @ col q = orthogonal.round(10) if unit and np.abs(q).sum() > 10**(-10): q = q/np.sqrt(np.dot(q.T, q)) Q.append(q) Q = np.concatenate(Q, axis=1) return Q
def getNullSpace(mat): """ Desc: Solve equation Ax=0 by Gaussian elimination. Parameters: mat: An 2-D array. Return: null space: A Dataframe whose columns are the basis of null space of mat column space: A Dataframe whose columns are the basis of column space of mat pivot_idx: A list of int indicating the column location of pivot variable of mat free_idx: A list of int indicating the column location of free variable of mat """ # (1) Get the simplest reduced row echelon form matrix ref = rref(mat)['rref'] r = rank(ref) # (2) Find column space and null space of ref nrow, ncol = ref.shape if nrow == ncol and (ref == np.identity(nrow)).all(): colspace = np.identity(nrow) nullspace = np.array([[0] * ref.shape[0]]).T pivot_col = list(range(ncol)) free_col = [] elif r == ncol: colspace = mat nullspace = np.array([[0] * ref.shape[0]]).T pivot_col = list(range(ncol)) free_col = [] else: col_bool = [] df = pd.DataFrame(ref) # (2.1) Get the columns index of pivot_variable and free_variable for col in df: the_col = df[col].round(6) cond1 = set(the_col.unique()) == set([1, 0]) cond2 = the_col[the_col == 1].shape[0] == 1 if col == 0 and cond1 and cond2: col_bool.append(True) elif col != 0 and cond1 and cond2: equal_or_not = pd.Series([(the_col == df[i]).all() for i in range(col)]) col_bool.append( False) if equal_or_not.any() else col_bool.append(True) else: col_bool.append(False) col_bool = pd.Series(col_bool) colspace = mat[:, col_bool] pivot_col = list(col_bool[col_bool].index) free_col = list(col_bool[~col_bool].index) # (2.2) Iter each free variable to set it to 1 and Numerical free variables to 0. for sp_idx in range(len(free_col)): pivot_free1 = [ 1 if i in pivot_col or i == free_col[sp_idx] else 0 for i in range(df.shape[1]) ] make_zero = np.array([pivot_free1]) r = np.multiply(ref, make_zero) drop_all_zero_row = [ False if (r[i, :] == 0).all() else True for i in range(r.shape[0]) ] r = r[drop_all_zero_row, :] free_solution = {} for idx, value in enumerate(free_col): if idx == sp_idx: free_solution[value] = 1 else: free_solution[value] = 0 free_solution = pd.DataFrame(free_solution, index=['solution' + str(sp_idx + 1)]) pivot_solution = {} for j in range(r.shape[0]): this_row = r[j, :] free_var_col_nonzero = free_col[sp_idx] free = this_row[free_var_col_nonzero] pivot = this_row[this_row != 0][0] pivot_idx = np.where(this_row != 0)[0][0] pivot_solution[pivot_idx] = -free / pivot pivot_solution = pd.DataFrame(pivot_solution, index=['solution' + str(sp_idx + 1)]) special_solution = pd.concat([pivot_solution, free_solution], axis=1) special_solution = special_solution.reindex( columns=range(special_solution.shape[1])) if 'nullspace' not in locals(): nullspace = special_solution.T else: nullspace = pd.concat([nullspace, special_solution.T], axis=1) return { 'null_space': nullspace, 'column_space': colspace, 'pivot_idx': pivot_col, 'free_idx': free_col }