def plot_correlations(results, data, pdf): print("Saving {} result plots to pdf.".format(len(results))) for result in results: print('.'), sys.stdout.flush() q1, q2 = result['questions'] title_1 = tools.get_question_title(q1, data) title_2 = tools.get_question_title(q2, data) x_raw = tools.get_responses_to_number(q1, data) y_raw = tools.get_responses_to_number(q2, data) x,y = tools.extract_vals_from_responses(x_raw, y_raw) invalid_x, invalid_y = tools.get_indexes_of_invalid_repsonse_types( [int], x, y ) invalid_all = tools.merge_invalid_indexes(invalid_x, invalid_y) x, y = tools.remove_entries_at_indexes(invalid_all, x, y) # Calculate the point density xy = np.vstack([x,y]) try: z = stats.gaussian_kde(xy)(xy) except Exception as e: print(xy) raise e size = 5000*z final_size = [] for s in size: final_size.append(max(s,60)) # Calculate axis numbers x_range = (min(x)-1, max(x)+1) y_range = (min(y)-1, max(y)+1) # generate data for best fit line slope = result['slope'] intercept = result['intercept'] x_fit_points = x_range y_fit_points = (x_range[0]*slope + intercept, x_range[1]*slope + intercept) fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.set_title("{} vs {}\nr_squared = {:.4f}".format(title_1, title_2, result['r_squared'])) ax.set_xlabel("{} (Q{})".format(title_1, q1)) ax.set_ylabel("{} (Q{})".format(title_2, q2)) ax.scatter(x, y, c=z, s=final_size, edgecolor='') ax.plot(x_fit_points, y_fit_points, '-') pdf.savefig(fig) plt.close(fig) print("\nDone saving plots to pdf.\n")
def gen_num_correlations(data, question_types): numerical_questions = tools.get_num_questions(question_types) response_dict = {} for question in numerical_questions: response_dict[question] = tools.get_responses_to_number(question, data) num_numerical_questions = len(numerical_questions) total_correlations = sum(xrange(1,num_numerical_questions)) print("There are {} numerical questions.".format(num_numerical_questions)) print("Thus {} correlation tests will be run.".format(total_correlations)) print("Building correlations to run.") correlations_to_run = [] count = 0 for question in numerical_questions: linking_questions = xrange(count+1, len(numerical_questions)) for i in linking_questions: correlations_to_run.append((question, numerical_questions[i])) count+=1 print("Created {} tests to run.".format(len(correlations_to_run))) assert(len(correlations_to_run) == total_correlations) return correlations_to_run
def base_demographic(data, demographic_questions): breakdowns = {} for question_num in demographic_questions: responses = tools.get_responses_to_number(question_num, data) title = tools.get_question_title(question_num, data) values = tools.extract_vals_from_responses(responses)[0] breakdown = create_breakdown(values) breakdowns[title] = breakdown return breakdowns
def generate_answer_response_lists(data, opinion_questions): print("Generating answer response list.") answer_response_dict = {} for question_num in opinion_questions: responses = tools.get_responses_to_number(question_num, data) values = tools.extract_vals_from_responses(responses, data)[0] title = tools.get_question_title(question_num, data) index_breakdown = create_index_breakdown(values) answer_response_dict[title] = index_breakdown print("Done generating answer response list.") return answer_response_dict