def plot_correlations(results, data, pdf):
    print("Saving {} result plots to pdf.".format(len(results)))
    for result in results:
        print('.'),
        sys.stdout.flush()

        q1, q2 = result['questions']
        title_1 = tools.get_question_title(q1, data)
        title_2 = tools.get_question_title(q2, data)

        x_raw = tools.get_responses_to_number(q1, data)
        y_raw = tools.get_responses_to_number(q2, data)
        x,y = tools.extract_vals_from_responses(x_raw, y_raw)
        invalid_x, invalid_y = tools.get_indexes_of_invalid_repsonse_types(
                [int], x, y
        )
        invalid_all = tools.merge_invalid_indexes(invalid_x, invalid_y)

        x, y = tools.remove_entries_at_indexes(invalid_all, x, y)
        # Calculate the point density
        xy = np.vstack([x,y])
        try:
            z = stats.gaussian_kde(xy)(xy)
        except Exception as e:
            print(xy)
            raise e
        size = 5000*z
        final_size = []
        for s in size:
            final_size.append(max(s,60))


        # Calculate axis numbers
        x_range = (min(x)-1, max(x)+1)
        y_range = (min(y)-1, max(y)+1)

        # generate data for best fit line
        slope = result['slope']
        intercept = result['intercept']
        x_fit_points = x_range
        y_fit_points = (x_range[0]*slope + intercept, x_range[1]*slope + intercept)

        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)

        ax.set_title("{} vs {}\nr_squared = {:.4f}".format(title_1, title_2, result['r_squared']))

        ax.set_xlabel("{} (Q{})".format(title_1, q1))
        ax.set_ylabel("{} (Q{})".format(title_2, q2))

        ax.scatter(x, y, c=z, s=final_size, edgecolor='')
        ax.plot(x_fit_points, y_fit_points, '-')

        pdf.savefig(fig)
        plt.close(fig)
    print("\nDone saving plots to pdf.\n")
def gen_num_correlations(data, question_types):
    numerical_questions = tools.get_num_questions(question_types)

    response_dict = {}

    for question in numerical_questions:
        response_dict[question] = tools.get_responses_to_number(question, data)

    num_numerical_questions = len(numerical_questions)
    total_correlations = sum(xrange(1,num_numerical_questions))
    print("There are {} numerical questions.".format(num_numerical_questions))
    print("Thus {} correlation tests will be run.".format(total_correlations))

    print("Building correlations to run.")
    correlations_to_run = []
    count = 0
    for question in numerical_questions:
        linking_questions = xrange(count+1, len(numerical_questions))
        for i in linking_questions:
            correlations_to_run.append((question, numerical_questions[i]))
        count+=1

    print("Created {} tests to run.".format(len(correlations_to_run)))
    assert(len(correlations_to_run) == total_correlations)
    return correlations_to_run
def base_demographic(data, demographic_questions):
    breakdowns = {}
    for question_num in demographic_questions:
        responses = tools.get_responses_to_number(question_num, data)
        title = tools.get_question_title(question_num, data)
        values = tools.extract_vals_from_responses(responses)[0]

        breakdown = create_breakdown(values)

        breakdowns[title] = breakdown

    return breakdowns
def generate_answer_response_lists(data, opinion_questions):
    print("Generating answer response list.")
    answer_response_dict = {}
    for question_num in opinion_questions:
        responses = tools.get_responses_to_number(question_num, data)
        values = tools.extract_vals_from_responses(responses, data)[0]
        title = tools.get_question_title(question_num, data)

        index_breakdown = create_index_breakdown(values)
        answer_response_dict[title] = index_breakdown
    print("Done generating answer response list.")
    return answer_response_dict