Example #1
0
def make_scatterplot_matrix():
    num_points = 100

    def random_row():
        row = [None, None, None, None]
        row[0] = random_normal()
        row[1] = -5 * row[0] + random_normal()
        row[2] = row[0] + row[1] + 5 * random_normal()
        row[3] = 6 if row[2] > -2 else 0
        return row

    random.seed(0)
    data = [random_row() for _ in range(num_points)]

    _, num_cols = shape(data)
    fig, ax = plt.subplots(num_cols, num_cols)

    for i in range(num_cols):
        for j in range(num_cols):
            if i != j: ax[i][j].scatter(get_col(data, j), get_col(data, i))
            else:
                ax[i][j].annotate("Series " + str(i), (.5, .5),
                                  xycoords='axes fraction',
                                  ha="center",
                                  va="center")

            if i < num_cols - 1: ax[i][j].xaxis.set_visible(False)
            if j > 0: ax[i][j].yaxis.set_visible(False)

    ax[-1][-1].set_xlim(ax[0][-1].get_xlim())
    ax[0][0].set_ylim(ax[0][1].get_ylim())
Example #2
0
def correlation_matrix(data):
    _, num_columns = shape(data)

    def matrix_entry(i, j):
        return correlation(get_col(data, i), get_col(data, j))

    return make_matrix(num_columns, num_columns, matrix_entry)
Example #3
0
def rescale(data_matrix):
    means, stdevs = scale(data_matrix)

    def rescaled(i, j):
        if stdevs[j] > 0:
            return (data_matrix[i][j] - means[j] / stdevs[j])
        else:
            return data_matrix[i][j]

    num_rows, num_cols = shape(data_matrix)
    return make_matrix(num_rows, num_cols, rescaled)
Example #4
0
def de_mean_matrix(A):
    num_rows, num_cols = shape(A)

    columns_means, _ = scale(A)
    return make_matrix(num_rows, num_cols,
                       lambda i, j: A[i][j] - columns_means[j])
Example #5
0
def scale(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_col(data_matrix, j)) for j in range(num_cols)]
    stdevs = [std_dev(get_col(data_matrix, j)) for j in range(num_cols)]

    return means, stdevs