def populate_poll_word_clouds(apps, schema_editor): PollQuestion = apps.get_model("polls", "PollQuestion") Org = apps.get_model("orgs", "Org") PollWordCloud = apps.get_model("stats", "PollWordCloud") for org in Org.objects.all(): questions = (PollQuestion.objects.filter( poll__org_id=org.id).select_related("poll").prefetch_related( "response_categories")) for question in questions: open_ended = (question.response_categories.filter( is_active=True).exclude( category__icontains="no response").count() == 1) if open_ended: custom_sql = """ SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s' AND polls_pollresult.text IS NOT NULL AND polls_pollresult.text NOT ILIKE '%s') w group by w.label; """ % ( org.id, question.poll.flow_uuid, question.ruleset_uuid, "http%", ) with connection.cursor() as cursor: cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = {} for category in unclean_categories: categories[category["label"]] = int(category["count"]) poll_word_cloud = PollWordCloud.objects.get_or_create( org=org, question=question)[0] poll_word_cloud.words = categories poll_word_cloud.save()
def calculate_results(self, segment=None): org = self.poll.org open_ended = self.is_open_ended() responded = self.get_responded() polled = self.get_polled() results = [] if open_ended and not segment: custom_sql = """ SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s' AND polls_pollresult.text IS NOT NULL AND polls_pollresult.text NOT ILIKE '%s') w group by w.label; """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid, "http%") with connection.cursor() as cursor: cursor.execute(custom_sql) from ureport.utils import get_dict_from_cursor unclean_categories = get_dict_from_cursor(cursor) ureport_languages = getattr(settings, 'LANGUAGES', [('en', 'English')]) org_languages = [ lang[1].lower() for lang in ureport_languages if lang[0] == org.language ] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) categories = [] # sort by count, then alphabetically unclean_categories = sorted(unclean_categories, key=lambda c: (-c['count'], c['label'])) for category in unclean_categories: if len(category['label']) > 1 and category[ 'label'] not in ignore_words and len(categories) < 100: categories.append( dict(label=category['label'], count=int(category['count']))) results.append( dict(open_ended=open_ended, set=responded, unset=polled - responded, categories=categories)) else: categories_label = self.response_categories.filter( is_active=True).values_list('category', flat=True) question_results = self.get_question_results() if segment: location_part = segment.get('location', '').lower() age_part = segment.get('age', '').lower() gender_part = segment.get('gender', '').lower() if location_part in ['state', 'district', 'ward']: location_boundaries = org.get_segment_org_boundaries( segment) for boundary in location_boundaries: categories = [] osm_id = boundary.get('osm_id').upper() set_count = 0 unset_count_key = "ruleset:%s:nocategory:%s:%s" % ( self.ruleset_uuid, location_part, osm_id) unset_count = question_results.get(unset_count_key, 0) for categorie_label in categories_label: if categorie_label.lower( ) not in PollResponseCategory.IGNORED_CATEGORY_RULES: category_count_key = "ruleset:%s:category:%s:%s:%s" % ( self.ruleset_uuid, categorie_label.lower(), location_part, osm_id) category_count = question_results.get( category_count_key, 0) set_count += category_count categories.append( dict(count=category_count, label=categorie_label)) results.append( dict(open_ended=open_ended, set=set_count, unset=unset_count, boundary=osm_id, label=boundary.get('name'), categories=categories)) elif age_part: poll_year = self.poll.poll_date.year born_results = { k: v for k, v in question_results.iteritems() if k[-9:-5] == 'born' } age_intervals = dict() age_intervals['35+'] = (35, 2000) age_intervals['31-34'] = (31, 34) age_intervals['25-30'] = (25, 30) age_intervals['20-24'] = (20, 24) age_intervals['15-19'] = (15, 19) age_intervals['0-14'] = (0, 14) for age_group in age_intervals.keys(): lower_bound, upper_bound = age_intervals[age_group] unset_count = 0 categories_count = dict() for categorie_label in categories_label: if categorie_label.lower( ) not in PollResponseCategory.IGNORED_CATEGORY_RULES: categories_count[categorie_label.lower()] = 0 for result_key, result_count in born_results.iteritems( ): age = poll_year - int(result_key[-4:]) if lower_bound <= age < upper_bound: if 'nocategory' in result_key: unset_count += result_count for categorie_label in categories_label: if categorie_label.lower( ) not in PollResponseCategory.IGNORED_CATEGORY_RULES: if result_key.startswith( 'ruleset:%s:category:%s:' % (self.ruleset_uuid, categorie_label.lower())): categories_count[ categorie_label.lower( )] += result_count categories = [ dict(count=v, label=k) for k, v in categories_count.iteritems() ] set_count = sum([elt['count'] for elt in categories]) results.append( dict(set=set_count, unset=unset_count, label=age_group, categories=categories)) results = sorted(results, key=lambda i: i['label']) elif gender_part: genders = ['f', 'm'] gender_labels = dict(f=_('Female'), m=_('Male')) for gender in genders: categories = [] set_count = 0 unset_count_key = "ruleset:%s:nocategory:%s:%s" % ( self.ruleset_uuid, 'gender', gender) unset_count = question_results.get(unset_count_key, 0) for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s:%s:%s" % ( self.ruleset_uuid, categorie_label.lower(), 'gender', gender) if categorie_label.lower( ) not in PollResponseCategory.IGNORED_CATEGORY_RULES: category_count = question_results.get( category_count_key, 0) set_count += category_count categories.append( dict(count=category_count, label=categorie_label)) results.append( dict(set=set_count, unset=unset_count, label=gender_labels.get(gender), categories=categories)) else: categories = [] for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s" % ( self.ruleset_uuid, categorie_label.lower()) if categorie_label.lower( ) not in PollResponseCategory.IGNORED_CATEGORY_RULES: category_count = question_results.get( category_count_key, 0) categories.append( dict(count=category_count, label=categorie_label)) results.append( dict(open_ended=open_ended, set=responded, unset=polled - responded, categories=categories)) cache_time = PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT if not segment: cache_time = None if segment and segment.get('location', '').lower() == 'state': cache_time = None if segment and segment.get('age', '').lower() == 'age': cache_time = None if segment and segment.get('gender', '').lower() == 'gender': cache_time = None key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % ( self.poll.org.pk, self.poll.pk, self.pk) if segment: substituted_segment = self.poll.org.substitute_segment(segment) key += ":" + slugify(unicode(json.dumps(substituted_segment))) cache.set(key, {"results": results}, cache_time) return results
def calculate_results(self, segment=None): org = self.poll.org open_ended = self.is_open_ended() responded = self.get_responded() polled = self.get_polled() results = [] if open_ended and not segment: custom_sql = """ SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s' AND polls_pollresult.text IS NOT NULL AND polls_pollresult.text NOT ILIKE '%s') w group by w.label; """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid, "http%") with connection.cursor() as cursor: cursor.execute(custom_sql) from ureport.utils import get_dict_from_cursor unclean_categories = get_dict_from_cursor(cursor) ureport_languages = getattr(settings, 'LANGUAGES', [('en', 'English')]) org_languages = [lang[1].lower() for lang in ureport_languages if lang[0] == org.language] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) categories = [] for category in unclean_categories: if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100: categories.append(dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories)) else: categories_label = self.response_categories.filter(is_active=True).values_list('category', flat=True) question_results = self.get_question_results() if segment: location_part = segment.get('location', '').lower() age_part = segment.get('age', '').lower() gender_part = segment.get('gender', '').lower() if location_part in ['state', 'district', 'ward']: location_boundaries = org.get_segment_org_boundaries(segment) for boundary in location_boundaries: categories = [] osm_id = boundary.get('osm_id').upper() set_count = 0 unset_count_key = "ruleset:%s:nocategory:%s:%s" % (self.ruleset_uuid, location_part, osm_id) unset_count = question_results.get(unset_count_key, 0) for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s:%s:%s" % (self.ruleset_uuid, categorie_label.lower(), location_part, osm_id) category_count = question_results.get(category_count_key, 0) set_count += category_count categories.append(dict(count=category_count, label=categorie_label)) results.append(dict(open_ended=open_ended, set=set_count, unset=unset_count, boundary=osm_id, label=boundary.get('name'), categories=categories)) elif age_part: poll_year = self.poll.poll_date.year born_results = {k: v for k, v in question_results.iteritems() if k[-9:-5] == 'born'} age_intervals = dict() age_intervals['35+'] = (35, 2000) age_intervals['31-34'] = (31, 34) age_intervals['25-30'] = (25, 30) age_intervals['20-24'] = (20, 24) age_intervals['15-19'] = (15, 19) age_intervals['0-14'] = (0, 14) for age_group in age_intervals.keys(): lower_bound, upper_bound = age_intervals[age_group] unset_count = 0 categories_count = dict() for categorie_label in categories_label: if categorie_label.lower() != 'other': categories_count[categorie_label.lower()] = 0 for result_key, result_count in born_results.iteritems(): age = poll_year - int(result_key[-4:]) if lower_bound <= age < upper_bound: if 'nocategory' in result_key: unset_count += result_count for categorie_label in categories_label: if categorie_label.lower() != 'other': if result_key.startswith('ruleset:%s:category:%s:' % (self.ruleset_uuid, categorie_label.lower())): categories_count[categorie_label.lower()] += result_count categories = [dict(count=v, label=k) for k, v in categories_count.iteritems()] set_count = sum([elt['count'] for elt in categories]) results.append(dict(set=set_count, unset=unset_count, label=age_group, categories=categories)) results = sorted(results, key=lambda i:i['label']) elif gender_part: genders = ['f', 'm'] gender_labels = dict(f=_('Female'), m=_('Male')) for gender in genders: categories = [] set_count = 0 unset_count_key = "ruleset:%s:nocategory:%s:%s"% (self.ruleset_uuid, 'gender', gender) unset_count = question_results.get(unset_count_key, 0) for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s:%s:%s" % (self.ruleset_uuid, categorie_label.lower(), 'gender', gender) if categorie_label.lower() != 'other': category_count = question_results.get(category_count_key, 0) set_count += category_count categories.append(dict(count=category_count, label=categorie_label)) results.append(dict(set=set_count, unset=unset_count, label=gender_labels.get(gender), categories=categories)) else: categories = [] for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s" % (self.ruleset_uuid, categorie_label.lower()) if categorie_label.lower() != 'other': category_count = question_results.get(category_count_key, 0) categories.append(dict(count=category_count, label=categorie_label)) results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories)) cache_time = PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT if not segment: cache_time = None if segment and segment.get('location', '').lower() == 'state': cache_time = None if segment and segment.get('age', '').lower() == 'age': cache_time = None if segment and segment.get('gender', '').lower() == 'gender': cache_time = None key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % (self.poll.org.pk, self.poll.pk, self.pk) if segment: substituted_segment = self.poll.org.substitute_segment(segment) key += ":" + slugify(unicode(json.dumps(substituted_segment))) cache.set(key, {"results": results}, cache_time) return results
def get_results(self, segment=None): key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % (self.poll.org.pk, self.poll.pk, self.pk) if segment: substituted_segment = self.poll.org.substitute_segment(segment) key += ":" + slugify(unicode(json.dumps(substituted_segment))) cached_value = cache.get(key, None) if cached_value: return cached_value["results"] org = self.poll.org open_ended = self.is_open_ended() responded = self.get_responded() polled = self.get_polled() results = [] if open_ended and not segment: cursor = connection.cursor() custom_sql = """ SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s') w group by w.label order by count desc; """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid) cursor.execute(custom_sql) from ureport.utils import get_dict_from_cursor unclean_categories = get_dict_from_cursor(cursor) categories = [] ureport_languages = getattr(settings, 'LANGUAGES', [('en', 'English')]) org_languages = [lang[1].lower() for lang in ureport_languages if lang[0] == org.language] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) categories = [] for category in unclean_categories: if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100: categories.append(dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories)) else: categories_label = self.response_categories.filter(is_active=True).values_list('category', flat=True) question_results = self.get_question_results() if segment: location_part = segment.get('location').lower() if location_part not in ['state', 'district']: return None location_boundaries = org.get_segment_org_boundaries(segment) for boundary in location_boundaries: categories = [] osm_id = boundary.get('osm_id').upper() set_count = 0 unset_count_key = "ruleset:%s:nocategory:%s:%s" % (self.ruleset_uuid, location_part, osm_id) unset_count = question_results.get(unset_count_key, 0) for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s:%s:%s" % (self.ruleset_uuid, categorie_label.lower(), location_part, osm_id) category_count = question_results.get(category_count_key, 0) set_count += category_count categories.append(dict(count=category_count, label=categorie_label)) if open_ended: # For home page best and worst location responses from ureport.contacts.models import Contact if segment.get('location') == 'District': boundary_contacts_count = Contact.objects.filter(org=org, district=osm_id).count() else: boundary_contacts_count = Contact.objects.filter(org=org, state=osm_id).count() unset_count = boundary_contacts_count - set_count results.append(dict(open_ended=open_ended, set=set_count, unset=unset_count, boundary=osm_id, label=boundary.get('name'), categories=categories)) else: categories = [] for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s" % (self.ruleset_uuid, categorie_label.lower()) if categorie_label.lower() != 'other': category_count = question_results.get(category_count_key, 0) categories.append(dict(count=category_count, label=categorie_label)) results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories)) cache.set(key, {"results": results}, PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT) return results
def get_results(self, segment=None): key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % ( self.poll.org.pk, self.poll.pk, self.pk) if segment: substituted_segment = self.poll.org.substitute_segment(segment) key += ":" + slugify(unicode(json.dumps(substituted_segment))) cached_value = cache.get(key, None) if cached_value: return cached_value["results"] org = self.poll.org open_ended = self.is_open_ended() responded = self.get_responded() polled = self.get_polled() results = [] if open_ended and not segment: cursor = connection.cursor() custom_sql = """ SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s') w group by w.label order by count desc; """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid) cursor.execute(custom_sql) from ureport.utils import get_dict_from_cursor unclean_categories = get_dict_from_cursor(cursor) categories = [] ureport_languages = getattr(settings, 'LANGUAGES', [('en', 'English')]) org_languages = [ lang[1].lower() for lang in ureport_languages if lang[0] == org.language ] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) categories = [] for category in unclean_categories: if len(category['label']) > 1 and category[ 'label'] not in ignore_words and len(categories) < 100: categories.append( dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append( dict(open_ended=open_ended, set=responded, unset=polled - responded, categories=categories)) else: categories_label = self.response_categories.filter( is_active=True).values_list('category', flat=True) question_results = self.get_question_results() if segment: location_part = segment.get('location').lower() if location_part not in ['state', 'district']: return None location_boundaries = org.get_segment_org_boundaries(segment) for boundary in location_boundaries: categories = [] osm_id = boundary.get('osm_id').upper() set_count = 0 unset_count_key = "ruleset:%s:nocategory:%s:%s" % ( self.ruleset_uuid, location_part, osm_id) unset_count = question_results.get(unset_count_key, 0) for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s:%s:%s" % ( self.ruleset_uuid, categorie_label.lower(), location_part, osm_id) category_count = question_results.get( category_count_key, 0) set_count += category_count categories.append( dict(count=category_count, label=categorie_label)) if open_ended: # For home page best and worst location responses from ureport.contacts.models import Contact if segment.get('location') == 'District': boundary_contacts_count = Contact.objects.filter( org=org, district=osm_id).count() else: boundary_contacts_count = Contact.objects.filter( org=org, state=osm_id).count() unset_count = boundary_contacts_count - set_count results.append( dict(open_ended=open_ended, set=set_count, unset=unset_count, boundary=osm_id, label=boundary.get('name'), categories=categories)) else: categories = [] for categorie_label in categories_label: category_count_key = "ruleset:%s:category:%s" % ( self.ruleset_uuid, categorie_label.lower()) if categorie_label.lower() != 'other': category_count = question_results.get( category_count_key, 0) categories.append( dict(count=category_count, label=categorie_label)) results.append( dict(open_ended=open_ended, set=responded, unset=polled - responded, categories=categories)) cache.set(key, {"results": results}, PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT) return results