def make_scatterplot_matrix(): num_points = 100 def random_row(): row = [None, None, None, None] row[0] = random_normal() row[1] = -5 * row[0] + random_normal() row[2] = row[0] + row[1] + 5 * random_normal() row[3] = 6 if row[2] > -2 else 0 return row random.seed(0) data = [random_row() for _ in range(num_points)] _, num_cols = shape(data) fig, ax = plt.subplots(num_cols, num_cols) for i in range(num_cols): for j in range(num_cols): if i != j: ax[i][j].scatter(get_col(data, j), get_col(data, i)) else: ax[i][j].annotate("Series " + str(i), (.5, .5), xycoords='axes fraction', ha="center", va="center") if i < num_cols - 1: ax[i][j].xaxis.set_visible(False) if j > 0: ax[i][j].yaxis.set_visible(False) ax[-1][-1].set_xlim(ax[0][-1].get_xlim()) ax[0][0].set_ylim(ax[0][1].get_ylim())
def correlation_matrix(data): _, num_columns = shape(data) def matrix_entry(i, j): return correlation(get_col(data, i), get_col(data, j)) return make_matrix(num_columns, num_columns, matrix_entry)
def rescale(data_matrix): means, stdevs = scale(data_matrix) def rescaled(i, j): if stdevs[j] > 0: return (data_matrix[i][j] - means[j] / stdevs[j]) else: return data_matrix[i][j] num_rows, num_cols = shape(data_matrix) return make_matrix(num_rows, num_cols, rescaled)
def de_mean_matrix(A): num_rows, num_cols = shape(A) columns_means, _ = scale(A) return make_matrix(num_rows, num_cols, lambda i, j: A[i][j] - columns_means[j])
def scale(data_matrix): num_rows, num_cols = shape(data_matrix) means = [mean(get_col(data_matrix, j)) for j in range(num_cols)] stdevs = [std_dev(get_col(data_matrix, j)) for j in range(num_cols)] return means, stdevs