def matrix_multiply(A, B): n1, k1 = shape(A) n2, k2 = shape(B) if k1 != n2: raise ArithmeticError("incompatible shapes!") return make_matrix(n1, k2, partial(matrix_product_entry, A, B))
def correlation_matrix(data): _, num_columns = shape(data) def matrix_entry(i, j): return correlation(get_column(data, i), get_column(data, j)) return make_matrix(num_columns, num_columns, matrix_entry)
def scale(data_matrix): num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [ standard_deviation(get_column(data_matrix, j)) for j in range(num_cols) ] return means, stdevs
def scale(data_matrix): """returns the mean and standard deviation of each column""" num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix,j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)] return means, stdevs
def scale(data_matrix): num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix,j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data_matrix,j)) for j in range(num_cols)] return means, stdevs
def print_distances(data_matrix): num_rows, num_cols = shape(data_matrix) print("Distances:") for i in range(num_rows): for i_next in range(num_rows): if i_next > i: d = distance(data_matrix[i], data_matrix[i_next]) print(i, "to", i_next, d)
def scale(data_matrix): """returns the mean and standard deviation of each column""" num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [ standard_deviation(get_column(data_matrix, j)) for j in range(num_cols) ] return means, stdevs
def correlation_matrix(data): #i列とj列のデータ間の相関を(i, j)の値とすると、列数×列数の行列を返す _, num_colums = shape(data) def matrix_entry(i, j): return correlation(get_column(data, i), get_column(data, j)) return make_matrix(num_colums, num_colums, matrix_entry)
def correlation_matrix(data): """returns the num_columns x num_columns matrix whose (i, j)th entry is the correlation between columns i and j of data""" _, num_columns = shape(data) def matrix_entry(i, j): return correlation(get_column(data, i), get_column(data, j)) return make_matrix(num_columns, num_columns, matrix_entry)
def rescale(data_matrix): means, stdevs = scale(data_matrix) def rescaled(i, j): if stdevs[j] > 0: return (data_matrix[i][j] - means[j]) / stdevs[j] else: return data_matrix[i][j] num_rows, num_cols = shape(data_matrix) return make_matrix(num_rows, num_cols, rescaled)
def make_scatterplot_matrix(): # first, generate some random data #初始化点数量 num_points = 100 def random_row(): row = [None, None, None, None] row[0] = random_normal() row[1] = -5 * row[0] + random_normal() row[2] = row[0] + row[1] + 5 * random_normal() row[3] = 6 if row[2] > -2 else 0 return row random.seed(0) #生成上方函数的矩阵 data = [random_row() for _ in range(num_points)] # then plot it _, num_columns = shape(data) #创建子图 fig, ax = plt.subplots(num_columns, num_columns) for i in range(num_columns): for j in range(num_columns): # scatter column_j on the x-axis vs column_i on the y-axis #i,j不相等则绘制散点图 if i != j: ax[i][j].scatter(get_column(data, j), get_column(data, i)) # unless i == j, in which case show the series name else: ax[i][j].annotate("series " + str(i), (0.5, 0.5), xycoords='axes fraction', ha="center", va="center") # then hide axis labels except left and bottom charts #隐藏部分坐标轴 if i < num_columns - 1: ax[i][j].xaxis.set_visible(False) if j > 0: ax[i][j].yaxis.set_visible(False) # fix the bottom right and top left axis labels, which are wrong because # their charts only have text in them #固化左上右下图像 ax[-1][-1].set_xlim(ax[0][-1].get_xlim()) ax[0][0].set_ylim(ax[0][1].get_ylim()) plt.show()
def rescale(data_matrix): #各列が平均0、標準偏差1となるように入力データのスケールを修正する #標準偏差が0の列は表示しない maens, stdevs = scal(data_matrix) def rescaled(i, j): if stdevs[j] > 0: return (data_matrix[i][j] - means[j]) / stdevs[j] else: return data_matrix[i][j] num_rows, num_colums = shape(data_matrix) return make_matrix(num_rows, num_colums, rescaled)
def correlation_matrix(data): """returns the num_columns x num_columns matrix whose (i, j)th entry is the correlation between columns i and j of data""" #得到矩阵的行数列数 _, num_columns = shape(data) #得到两个维度的相关性 def matrix_entry(i, j): #得到相关系数(第i列和第j列) return correlation(get_column(data, i), get_column(data, j)) return make_matrix(num_columns, num_columns, matrix_entry)
def rescale(data_matrix): """rescales the input data so that each column has mean 0 and standard deviation 1 leaves alone columns with no deviation""" means, stdevs = scale(data_matrix) def rescaled(i, j): if stdevs[j] > 0: return (data_matrix[i][j] - means[j]) / stdevs[j] else: return data_matrix[i][j] num_rows, num_cols = shape(data_matrix) return make_matrix(num_rows, num_cols, rescaled)
def rescale(data_matrix): """rescales the input data so that each column has mean 0 and standard deviation 1 ignores columns with no deviation""" means, stdevs = scale(data_matrix) def rescaled(i, j): if stdevs[j] > 0: return (data_matrix[i][j] - means[j]) / stdevs[j] else: return data_matrix[i][j] num_rows, num_cols = shape(data_matrix) return make_matrix(num_rows, num_cols, rescaled)
def make_scatterplot_matrix(): # first, generate some random data num_points = 100 def random_row(): row = [None for _ in range(6)] row[0] = random_normal() row[1] = -5 * row[0] + random_normal() row[2] = row[0] + row[1] + 5 * random_normal() row[3] = 6 if row[2] > -2 else 0 row[4] = row[0] + row[1] + -1 * random_normal() row[5] = 3 if row[2] < -2 else 0 return row random.seed(0) data = [random_row() for _ in range(num_points)] # then plot it _, num_columns = shape(data) fig, ax = plt.subplots(num_columns, num_columns) for i in range(num_columns): for j in range(num_columns): # scatter column_j on the x-axis vs column_i on the y-axis if i != j: ax[i][j].scatter(get_column(data, j), get_column(data, i)) # unless i == j, in which case show the series name else: ax[i][j].annotate("series " + str(i), (0.5, 0.5), xycoords='axes fraction', ha="center", va="center") # then hide axis labels except left and bottom charts if i < num_columns - 1: ax[i][j].xaxis.set_visible(False) if j > 0: ax[i][j].yaxis.set_visible(False) # fix the bottom right and top left axis labels, which are wrong because # their charts only have text in them ax[-1][-1].set_xlim(ax[0][-1].get_xlim()) ax[0][0].set_ylim(ax[0][1].get_ylim()) plt.show()
def visual_approach(): num_points = 100 def random_row(): row = [None, None, None, None] row[0] = random_normal() row[1] = -5 * row[0] + random_normal() row[2] = row[0] + row[1] + 5 * random_normal() row[3] = 6 if row[2] > -2 else 0 return row random.seed(0) data = [random_row() for _ in range(num_points)] _, num_columns = shape(data) fig, ax = plt.subplots(num_columns, num_columns) for i in range(num_columns): for j in range(num_columns): # Scatter columm_j on the x-axis vs column_i on the y-axis if i != j: ax[i][j].scatter(get_column(data, j), get_column(data, i)) # unlesss i == j, in which case show the series name else: ax[i][j].annotate("series {}".format(i), (0.5, 0.5), xycoords='axes fraction', ha="center", va="center") # then hide dxis labels except left and bottom charts if i < num_columns - 1: ax[i][j].xaxis.set_visible(False) if j > 0: ax[i][j].yaxis.set_visible(False) # fix the bottom right and top left except left and bottom charts ax[-1][-1].set_xlim(ax[0][-1].get_xlim()) ax[1][1].set_xlim(ax[0][1].get_ylim()) plt.show()
def de_mean_matrix(A): """returns the result of subtracting from every value in A the mean value of its column. the resulting matrix has mean 0 in every column""" nr, nc = shape(A) column_means, _ = scale(A) return make_matrix(nr, nc, lambda i, j: A[i][j] - column_means[j])
def de_mean_matrix(A): nr, nc = shape(A) column_means, _ = scale(A) return make_matrix(nr, nc, lambda i, j: A[i][j] - column_means[j])
C = la.sum_of_squares(A) print("A^2's summary = ", C) C = la.magnitude(A) print("A's magnitude = ", C) C = la.distance(A, B) print("A's distance = ", C) print() print("*** matrix ......") M = [[1, 2, 3], [5, 6, 7], [3, 6, 9]] print("M = ", M) shape = la.shape(M) print("M's shape = ", shape) row_1 = la.get_row(M, 1) print("M[1,:] = ", row_1) col_1 = la.get_column(M, 1) print("M[:1] = ", col_1) I = la.make_matrix(5, 5, la.is_diagonal) print("identity matrix = ", I) print("\n\n") print("*** Test Module <stats> ***") A = [1, 3, 5, 7, 9, 2, 3, 4, 4, 4, 6, 8, 10, 13, 15, 17]
random.seed(0) data = [random_row() for _ in range(num_points)] def correlation_matrix(data): #i列とj列のデータ間の相関を(i, j)の値とすると、列数×列数の行列を返す _, num_colums = shape(data) def matrix_entry(i, j): return correlation(get_column(data, i), get_column(data, j)) return make_matrix(num_colums, num_colums, matrix_entry) _, num_colums = shape(data) fig, ax = plt.subplots(num_colums, num_colums) for i in range(num_colums): for j in range(num_colums): #X軸のcolumns_j、Y軸のcolumns_iの位置に散布図を描画する if i != j: ax[i][j].scatter(get_column(data, j), get_column(data, i)) #i == jであれば列名を表示する else: ax[i][j].annotate("series " + str(i), (0.5, 0.5), xycoords='axes fraction', ha="center", va='center')
def de_mean_matrix(A): #Aのすべての値と各列の平均との差を返す。 #結果の行列は各列の平均が0となる nr, nc = shape(A) column_means, _ = scale(A) return make_matrix(nr, nc, lambda i, j: A[i][j] - column_means[j])