This repository has been archived by the owner on Aug 16, 2018. It is now read-only.
/
summarize_questions.py
executable file
·61 lines (44 loc) · 1.79 KB
/
summarize_questions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import dataset
import sys
from collections import OrderedDict
from db import query, initialize_counts
ANALYSIS_QUESTIONS = ['v{0}'.format(i) for i in range(12,23)]
ANALYSIS_QUESTIONS += ['v45', 'v47', 'v48']
ANALYSIS_QUESTIONS += ['v{0}'.format(i) for i in range(50,55)]
ANALYSIS_QUESTIONS += ['v80', 'v123', 'v139', 'v145', 'v147', 'v148', 'v152', 'v168', 'v182', 'v203a']
ANALYSIS_QUESTIONS += ['v{0}'.format(i) for i in range(204,210)]
ANALYSIS_QUESTIONS += ['v{0}'.format(i) for i in range(240,243)]
ANALYSIS_QUESTIONS += ['v250']
def summarize_question(question_id):
"""
Summarize responses for a given question ID
"""
question, result = query(question_id)
print '{0}: {1}'.format(question_id, question['label'])
counts = OrderedDict()
for row in result:
if not row['country'] in counts.keys():
counts[row['country']] = initialize_counts(question_id)
counts[row["country"]][row["response"]] += 1
output = []
for country, values in counts.items():
output_row = OrderedDict((('country', country),))
total = 0
for label, value in values.items():
total += int(value)
output_row['total_responses'] = total
for label, value in values.items():
output_row[label] = value
pct_label = '{0} pct'.format(label.encode('ascii', 'ignore').decode('ascii'))
output_row[pct_label] = float(value) / total
output.append(output_row)
dataset.freeze(output, format='csv', filename='output/{0}.csv'.format(question_id))
if __name__ == '__main__':
if len(sys.argv) > 1:
questions = sys.argv[1:]
else:
questions = ANALYSIS_QUESTIONS
for question_id in questions:
summarize_question(question_id)