def run_num_correlations(question_pairs, data):
    print("Running correlations.")
    correlation_results = []
    for t in question_pairs:
        full_response_entries = tools.get_responses_to_numbers(t, data)
        answers_1, answers_2 = tools.extract_vals_from_responses(*full_response_entries)
        invalid_1, invalid_2 = tools.get_indexes_of_invalid_repsonse_types(
                [int], answers_1, answers_2
        )
        invalid_all = tools.merge_invalid_indexes(invalid_1, invalid_2)

        final_answers_1, final_answers_2 = tools.remove_entries_at_indexes(
            invalid_all, answers_1, answers_2)

        # print(answers_1, answers_2)
        # print(final_answers_1, final_answers_2)
        # print(len(answers_1), len(answers_2))
        # print(len(final_answers_1), len(final_answers_2))

        slope, intercept, r_value, p_value, std_err = stats.linregress(
            final_answers_1, final_answers_2
        )

        r_squared = r_value**2
        result = {"questions": t, "slope": slope, "intercept": intercept,
            "r_value": r_value, "p_value": p_value, "std_err": std_err,
            "r_squared": r_squared
        }
        correlation_results.append(result)
    print("Finished running correlations.")
    return correlation_results
def plot_correlations(results, data, pdf):
    print("Saving {} result plots to pdf.".format(len(results)))
    for result in results:
        print('.'),
        sys.stdout.flush()

        q1, q2 = result['questions']
        title_1 = tools.get_question_title(q1, data)
        title_2 = tools.get_question_title(q2, data)

        x_raw = tools.get_responses_to_number(q1, data)
        y_raw = tools.get_responses_to_number(q2, data)
        x,y = tools.extract_vals_from_responses(x_raw, y_raw)
        invalid_x, invalid_y = tools.get_indexes_of_invalid_repsonse_types(
                [int], x, y
        )
        invalid_all = tools.merge_invalid_indexes(invalid_x, invalid_y)

        x, y = tools.remove_entries_at_indexes(invalid_all, x, y)
        # Calculate the point density
        xy = np.vstack([x,y])
        try:
            z = stats.gaussian_kde(xy)(xy)
        except Exception as e:
            print(xy)
            raise e
        size = 5000*z
        final_size = []
        for s in size:
            final_size.append(max(s,60))


        # Calculate axis numbers
        x_range = (min(x)-1, max(x)+1)
        y_range = (min(y)-1, max(y)+1)

        # generate data for best fit line
        slope = result['slope']
        intercept = result['intercept']
        x_fit_points = x_range
        y_fit_points = (x_range[0]*slope + intercept, x_range[1]*slope + intercept)

        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)

        ax.set_title("{} vs {}\nr_squared = {:.4f}".format(title_1, title_2, result['r_squared']))

        ax.set_xlabel("{} (Q{})".format(title_1, q1))
        ax.set_ylabel("{} (Q{})".format(title_2, q2))

        ax.scatter(x, y, c=z, s=final_size, edgecolor='')
        ax.plot(x_fit_points, y_fit_points, '-')

        pdf.savefig(fig)
        plt.close(fig)
    print("\nDone saving plots to pdf.\n")