Esempio n. 1
0
def populate_poll_word_clouds(apps, schema_editor):
    PollQuestion = apps.get_model("polls", "PollQuestion")
    Org = apps.get_model("orgs", "Org")
    PollWordCloud = apps.get_model("stats", "PollWordCloud")

    for org in Org.objects.all():
        questions = (PollQuestion.objects.filter(
            poll__org_id=org.id).select_related("poll").prefetch_related(
                "response_categories"))

        for question in questions:
            open_ended = (question.response_categories.filter(
                is_active=True).exclude(
                    category__icontains="no response").count() == 1)

            if open_ended:

                custom_sql = """
                          SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s' AND polls_pollresult.text IS NOT NULL AND polls_pollresult.text NOT ILIKE '%s') w group by w.label;
                          """ % (
                    org.id,
                    question.poll.flow_uuid,
                    question.ruleset_uuid,
                    "http%",
                )
                with connection.cursor() as cursor:
                    cursor.execute(custom_sql)
                    unclean_categories = get_dict_from_cursor(cursor)

                categories = {}
                for category in unclean_categories:
                    categories[category["label"]] = int(category["count"])

                poll_word_cloud = PollWordCloud.objects.get_or_create(
                    org=org, question=question)[0]
                poll_word_cloud.words = categories
                poll_word_cloud.save()
Esempio n. 2
0
    def calculate_results(self, segment=None):

        org = self.poll.org
        open_ended = self.is_open_ended()
        responded = self.get_responded()
        polled = self.get_polled()

        results = []

        if open_ended and not segment:
            custom_sql = """
                      SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s' AND polls_pollresult.text IS NOT NULL AND polls_pollresult.text NOT ILIKE '%s') w group by w.label;
                      """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid,
                             "http%")
            with connection.cursor() as cursor:
                cursor.execute(custom_sql)
                from ureport.utils import get_dict_from_cursor
                unclean_categories = get_dict_from_cursor(cursor)

            ureport_languages = getattr(settings, 'LANGUAGES',
                                        [('en', 'English')])

            org_languages = [
                lang[1].lower() for lang in ureport_languages
                if lang[0] == org.language
            ]

            if 'english' not in org_languages:
                org_languages.append('english')

            ignore_words = []
            for lang in org_languages:
                ignore_words += safe_get_stop_words(lang)

            categories = []

            # sort by count, then alphabetically
            unclean_categories = sorted(unclean_categories,
                                        key=lambda c:
                                        (-c['count'], c['label']))

            for category in unclean_categories:
                if len(category['label']) > 1 and category[
                        'label'] not in ignore_words and len(categories) < 100:
                    categories.append(
                        dict(label=category['label'],
                             count=int(category['count'])))

            results.append(
                dict(open_ended=open_ended,
                     set=responded,
                     unset=polled - responded,
                     categories=categories))

        else:
            categories_label = self.response_categories.filter(
                is_active=True).values_list('category', flat=True)
            question_results = self.get_question_results()

            if segment:

                location_part = segment.get('location', '').lower()
                age_part = segment.get('age', '').lower()
                gender_part = segment.get('gender', '').lower()

                if location_part in ['state', 'district', 'ward']:

                    location_boundaries = org.get_segment_org_boundaries(
                        segment)

                    for boundary in location_boundaries:
                        categories = []
                        osm_id = boundary.get('osm_id').upper()
                        set_count = 0
                        unset_count_key = "ruleset:%s:nocategory:%s:%s" % (
                            self.ruleset_uuid, location_part, osm_id)
                        unset_count = question_results.get(unset_count_key, 0)

                        for categorie_label in categories_label:
                            if categorie_label.lower(
                            ) not in PollResponseCategory.IGNORED_CATEGORY_RULES:
                                category_count_key = "ruleset:%s:category:%s:%s:%s" % (
                                    self.ruleset_uuid, categorie_label.lower(),
                                    location_part, osm_id)
                                category_count = question_results.get(
                                    category_count_key, 0)
                                set_count += category_count
                                categories.append(
                                    dict(count=category_count,
                                         label=categorie_label))

                        results.append(
                            dict(open_ended=open_ended,
                                 set=set_count,
                                 unset=unset_count,
                                 boundary=osm_id,
                                 label=boundary.get('name'),
                                 categories=categories))
                elif age_part:
                    poll_year = self.poll.poll_date.year

                    born_results = {
                        k: v
                        for k, v in question_results.iteritems()
                        if k[-9:-5] == 'born'
                    }

                    age_intervals = dict()
                    age_intervals['35+'] = (35, 2000)
                    age_intervals['31-34'] = (31, 34)
                    age_intervals['25-30'] = (25, 30)
                    age_intervals['20-24'] = (20, 24)
                    age_intervals['15-19'] = (15, 19)
                    age_intervals['0-14'] = (0, 14)

                    for age_group in age_intervals.keys():
                        lower_bound, upper_bound = age_intervals[age_group]
                        unset_count = 0

                        categories_count = dict()
                        for categorie_label in categories_label:
                            if categorie_label.lower(
                            ) not in PollResponseCategory.IGNORED_CATEGORY_RULES:
                                categories_count[categorie_label.lower()] = 0

                        for result_key, result_count in born_results.iteritems(
                        ):
                            age = poll_year - int(result_key[-4:])

                            if lower_bound <= age < upper_bound:
                                if 'nocategory' in result_key:
                                    unset_count += result_count

                                for categorie_label in categories_label:
                                    if categorie_label.lower(
                                    ) not in PollResponseCategory.IGNORED_CATEGORY_RULES:
                                        if result_key.startswith(
                                                'ruleset:%s:category:%s:' %
                                            (self.ruleset_uuid,
                                             categorie_label.lower())):
                                            categories_count[
                                                categorie_label.lower(
                                                )] += result_count

                        categories = [
                            dict(count=v, label=k)
                            for k, v in categories_count.iteritems()
                        ]

                        set_count = sum([elt['count'] for elt in categories])

                        results.append(
                            dict(set=set_count,
                                 unset=unset_count,
                                 label=age_group,
                                 categories=categories))

                    results = sorted(results, key=lambda i: i['label'])

                elif gender_part:

                    genders = ['f', 'm']
                    gender_labels = dict(f=_('Female'), m=_('Male'))

                    for gender in genders:
                        categories = []
                        set_count = 0
                        unset_count_key = "ruleset:%s:nocategory:%s:%s" % (
                            self.ruleset_uuid, 'gender', gender)
                        unset_count = question_results.get(unset_count_key, 0)

                        for categorie_label in categories_label:
                            category_count_key = "ruleset:%s:category:%s:%s:%s" % (
                                self.ruleset_uuid, categorie_label.lower(),
                                'gender', gender)
                            if categorie_label.lower(
                            ) not in PollResponseCategory.IGNORED_CATEGORY_RULES:
                                category_count = question_results.get(
                                    category_count_key, 0)
                                set_count += category_count
                                categories.append(
                                    dict(count=category_count,
                                         label=categorie_label))

                        results.append(
                            dict(set=set_count,
                                 unset=unset_count,
                                 label=gender_labels.get(gender),
                                 categories=categories))

            else:
                categories = []
                for categorie_label in categories_label:
                    category_count_key = "ruleset:%s:category:%s" % (
                        self.ruleset_uuid, categorie_label.lower())
                    if categorie_label.lower(
                    ) not in PollResponseCategory.IGNORED_CATEGORY_RULES:
                        category_count = question_results.get(
                            category_count_key, 0)
                        categories.append(
                            dict(count=category_count, label=categorie_label))

                results.append(
                    dict(open_ended=open_ended,
                         set=responded,
                         unset=polled - responded,
                         categories=categories))

        cache_time = PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT
        if not segment:
            cache_time = None

        if segment and segment.get('location', '').lower() == 'state':
            cache_time = None

        if segment and segment.get('age', '').lower() == 'age':
            cache_time = None

        if segment and segment.get('gender', '').lower() == 'gender':
            cache_time = None

        key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % (
            self.poll.org.pk, self.poll.pk, self.pk)
        if segment:
            substituted_segment = self.poll.org.substitute_segment(segment)
            key += ":" + slugify(unicode(json.dumps(substituted_segment)))

        cache.set(key, {"results": results}, cache_time)

        return results
Esempio n. 3
0
    def calculate_results(self, segment=None):

        org = self.poll.org
        open_ended = self.is_open_ended()
        responded = self.get_responded()
        polled = self.get_polled()

        results = []

        if open_ended and not segment:
            custom_sql = """
                      SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s' AND polls_pollresult.text IS NOT NULL AND polls_pollresult.text NOT ILIKE '%s') w group by w.label;
                      """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid, "http%")
            with connection.cursor() as cursor:
                cursor.execute(custom_sql)
                from ureport.utils import get_dict_from_cursor
                unclean_categories = get_dict_from_cursor(cursor)

            ureport_languages = getattr(settings, 'LANGUAGES', [('en', 'English')])

            org_languages = [lang[1].lower() for lang in ureport_languages if lang[0] == org.language]

            if 'english' not in org_languages:
                org_languages.append('english')

            ignore_words = []
            for lang in org_languages:
                ignore_words += safe_get_stop_words(lang)

            categories = []

            for category in unclean_categories:
                if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100:
                    categories.append(dict(label=category['label'], count=int(category['count'])))

            # sort by count, then alphabetically
            categories = sorted(categories, key=lambda c: (-c['count'], c['label']))
            results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories))

        else:
            categories_label = self.response_categories.filter(is_active=True).values_list('category', flat=True)
            question_results = self.get_question_results()

            if segment:

                location_part = segment.get('location', '').lower()
                age_part = segment.get('age', '').lower()
                gender_part = segment.get('gender', '').lower()

                if location_part in ['state', 'district', 'ward']:

                    location_boundaries = org.get_segment_org_boundaries(segment)

                    for boundary in location_boundaries:
                        categories = []
                        osm_id = boundary.get('osm_id').upper()
                        set_count = 0
                        unset_count_key = "ruleset:%s:nocategory:%s:%s" % (self.ruleset_uuid, location_part, osm_id)
                        unset_count = question_results.get(unset_count_key, 0)

                        for categorie_label in categories_label:
                            category_count_key = "ruleset:%s:category:%s:%s:%s" % (self.ruleset_uuid, categorie_label.lower(), location_part, osm_id)
                            category_count = question_results.get(category_count_key, 0)
                            set_count += category_count
                            categories.append(dict(count=category_count, label=categorie_label))

                        results.append(dict(open_ended=open_ended, set=set_count, unset=unset_count,
                                            boundary=osm_id, label=boundary.get('name'),
                                            categories=categories))
                elif age_part:
                    poll_year = self.poll.poll_date.year

                    born_results = {k: v for k, v in question_results.iteritems() if k[-9:-5] == 'born'}

                    age_intervals = dict()
                    age_intervals['35+'] = (35, 2000)
                    age_intervals['31-34'] = (31, 34)
                    age_intervals['25-30'] = (25, 30)
                    age_intervals['20-24'] = (20, 24)
                    age_intervals['15-19'] = (15, 19)
                    age_intervals['0-14'] = (0, 14)

                    for age_group in age_intervals.keys():
                        lower_bound, upper_bound = age_intervals[age_group]
                        unset_count = 0

                        categories_count = dict()
                        for categorie_label in categories_label:
                            if categorie_label.lower() != 'other':
                                categories_count[categorie_label.lower()] = 0

                        for result_key, result_count in born_results.iteritems():
                            age = poll_year - int(result_key[-4:])

                            if lower_bound <= age < upper_bound:
                                if 'nocategory' in result_key:
                                    unset_count += result_count

                                for categorie_label in categories_label:
                                    if categorie_label.lower() != 'other':
                                        if result_key.startswith('ruleset:%s:category:%s:' % (self.ruleset_uuid, categorie_label.lower())):
                                            categories_count[categorie_label.lower()] += result_count

                        categories = [dict(count=v, label=k) for k, v in categories_count.iteritems()]

                        set_count = sum([elt['count'] for elt in categories])

                        results.append(dict(set=set_count, unset=unset_count, label=age_group,
                                            categories=categories))

                    results = sorted(results, key=lambda i:i['label'])

                elif gender_part:

                    genders = ['f', 'm']
                    gender_labels = dict(f=_('Female'), m=_('Male'))

                    for gender in genders:
                        categories = []
                        set_count = 0
                        unset_count_key = "ruleset:%s:nocategory:%s:%s"% (self.ruleset_uuid, 'gender', gender)
                        unset_count = question_results.get(unset_count_key, 0)

                        for categorie_label in categories_label:
                            category_count_key = "ruleset:%s:category:%s:%s:%s" % (self.ruleset_uuid, categorie_label.lower(), 'gender', gender)
                            if categorie_label.lower() != 'other':
                                category_count = question_results.get(category_count_key, 0)
                                set_count += category_count
                                categories.append(dict(count=category_count, label=categorie_label))

                        results.append(dict(set=set_count, unset=unset_count, label=gender_labels.get(gender),
                                            categories=categories))

            else:
                categories = []
                for categorie_label in categories_label:
                    category_count_key = "ruleset:%s:category:%s" % (self.ruleset_uuid, categorie_label.lower())
                    if categorie_label.lower() != 'other':
                        category_count = question_results.get(category_count_key, 0)
                        categories.append(dict(count=category_count, label=categorie_label))

                results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories))

        cache_time = PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT
        if not segment:
            cache_time = None

        if segment and segment.get('location', '').lower() == 'state':
            cache_time = None

        if segment and segment.get('age', '').lower() == 'age':
            cache_time = None

        if segment and segment.get('gender', '').lower() == 'gender':
            cache_time = None

        key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % (self.poll.org.pk, self.poll.pk, self.pk)
        if segment:
            substituted_segment = self.poll.org.substitute_segment(segment)
            key += ":" + slugify(unicode(json.dumps(substituted_segment)))

        cache.set(key, {"results": results}, cache_time)

        return results
Esempio n. 4
0
    def get_results(self, segment=None):
        key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % (self.poll.org.pk, self.poll.pk, self.pk)
        if segment:
            substituted_segment = self.poll.org.substitute_segment(segment)
            key += ":" + slugify(unicode(json.dumps(substituted_segment)))

        cached_value = cache.get(key, None)
        if cached_value:
            return cached_value["results"]

        org = self.poll.org
        open_ended = self.is_open_ended()
        responded = self.get_responded()
        polled = self.get_polled()

        results = []

        if open_ended and not segment:
            cursor = connection.cursor()

            custom_sql = """
                      SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s') w group by w.label order by count desc;
                      """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid)

            cursor.execute(custom_sql)
            from ureport.utils import get_dict_from_cursor
            unclean_categories = get_dict_from_cursor(cursor)
            categories = []

            ureport_languages = getattr(settings, 'LANGUAGES', [('en', 'English')])

            org_languages = [lang[1].lower() for lang in ureport_languages if lang[0] == org.language]

            if 'english' not in org_languages:
                org_languages.append('english')

            ignore_words = []
            for lang in org_languages:
                ignore_words += safe_get_stop_words(lang)

            categories = []

            for category in unclean_categories:
                if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100:
                    categories.append(dict(label=category['label'], count=int(category['count'])))

            # sort by count, then alphabetically
            categories = sorted(categories, key=lambda c: (-c['count'], c['label']))
            results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories))

        else:
            categories_label = self.response_categories.filter(is_active=True).values_list('category', flat=True)
            question_results = self.get_question_results()

            if segment:

                location_part = segment.get('location').lower()

                if location_part not in ['state', 'district']:
                    return None

                location_boundaries = org.get_segment_org_boundaries(segment)

                for boundary in location_boundaries:
                    categories = []
                    osm_id = boundary.get('osm_id').upper()
                    set_count = 0
                    unset_count_key = "ruleset:%s:nocategory:%s:%s" % (self.ruleset_uuid, location_part, osm_id)
                    unset_count = question_results.get(unset_count_key, 0)

                    for categorie_label in categories_label:
                        category_count_key = "ruleset:%s:category:%s:%s:%s" % (self.ruleset_uuid, categorie_label.lower(), location_part, osm_id)
                        category_count = question_results.get(category_count_key, 0)
                        set_count += category_count
                        categories.append(dict(count=category_count, label=categorie_label))

                    if open_ended:
                        # For home page best and worst location responses
                        from ureport.contacts.models import Contact
                        if segment.get('location') == 'District':
                            boundary_contacts_count = Contact.objects.filter(org=org, district=osm_id).count()
                        else:
                            boundary_contacts_count = Contact.objects.filter(org=org, state=osm_id).count()
                        unset_count = boundary_contacts_count - set_count

                    results.append(dict(open_ended=open_ended, set=set_count, unset=unset_count,
                                        boundary=osm_id, label=boundary.get('name'),
                                        categories=categories))

            else:
                categories = []
                for categorie_label in categories_label:
                    category_count_key = "ruleset:%s:category:%s" % (self.ruleset_uuid, categorie_label.lower())
                    if categorie_label.lower() != 'other':
                        category_count = question_results.get(category_count_key, 0)
                        categories.append(dict(count=category_count, label=categorie_label))

                results.append(dict(open_ended=open_ended, set=responded, unset=polled-responded, categories=categories))

        cache.set(key, {"results": results}, PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT)

        return results
Esempio n. 5
0
    def get_results(self, segment=None):
        key = PollQuestion.POLL_QUESTION_RESULTS_CACHE_KEY % (
            self.poll.org.pk, self.poll.pk, self.pk)
        if segment:
            substituted_segment = self.poll.org.substitute_segment(segment)
            key += ":" + slugify(unicode(json.dumps(substituted_segment)))

        cached_value = cache.get(key, None)
        if cached_value:
            return cached_value["results"]

        org = self.poll.org
        open_ended = self.is_open_ended()
        responded = self.get_responded()
        polled = self.get_polled()

        results = []

        if open_ended and not segment:
            cursor = connection.cursor()

            custom_sql = """
                      SELECT w.label, count(*) AS count FROM (SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM polls_pollresult WHERE polls_pollresult.org_id = %d AND polls_pollresult.flow = '%s' AND polls_pollresult.ruleset = '%s') w group by w.label order by count desc;
                      """ % (org.id, self.poll.flow_uuid, self.ruleset_uuid)

            cursor.execute(custom_sql)
            from ureport.utils import get_dict_from_cursor
            unclean_categories = get_dict_from_cursor(cursor)
            categories = []

            ureport_languages = getattr(settings, 'LANGUAGES',
                                        [('en', 'English')])

            org_languages = [
                lang[1].lower() for lang in ureport_languages
                if lang[0] == org.language
            ]

            if 'english' not in org_languages:
                org_languages.append('english')

            ignore_words = []
            for lang in org_languages:
                ignore_words += safe_get_stop_words(lang)

            categories = []

            for category in unclean_categories:
                if len(category['label']) > 1 and category[
                        'label'] not in ignore_words and len(categories) < 100:
                    categories.append(
                        dict(label=category['label'],
                             count=int(category['count'])))

            # sort by count, then alphabetically
            categories = sorted(categories,
                                key=lambda c: (-c['count'], c['label']))
            results.append(
                dict(open_ended=open_ended,
                     set=responded,
                     unset=polled - responded,
                     categories=categories))

        else:
            categories_label = self.response_categories.filter(
                is_active=True).values_list('category', flat=True)
            question_results = self.get_question_results()

            if segment:

                location_part = segment.get('location').lower()

                if location_part not in ['state', 'district']:
                    return None

                location_boundaries = org.get_segment_org_boundaries(segment)

                for boundary in location_boundaries:
                    categories = []
                    osm_id = boundary.get('osm_id').upper()
                    set_count = 0
                    unset_count_key = "ruleset:%s:nocategory:%s:%s" % (
                        self.ruleset_uuid, location_part, osm_id)
                    unset_count = question_results.get(unset_count_key, 0)

                    for categorie_label in categories_label:
                        category_count_key = "ruleset:%s:category:%s:%s:%s" % (
                            self.ruleset_uuid, categorie_label.lower(),
                            location_part, osm_id)
                        category_count = question_results.get(
                            category_count_key, 0)
                        set_count += category_count
                        categories.append(
                            dict(count=category_count, label=categorie_label))

                    if open_ended:
                        # For home page best and worst location responses
                        from ureport.contacts.models import Contact
                        if segment.get('location') == 'District':
                            boundary_contacts_count = Contact.objects.filter(
                                org=org, district=osm_id).count()
                        else:
                            boundary_contacts_count = Contact.objects.filter(
                                org=org, state=osm_id).count()
                        unset_count = boundary_contacts_count - set_count

                    results.append(
                        dict(open_ended=open_ended,
                             set=set_count,
                             unset=unset_count,
                             boundary=osm_id,
                             label=boundary.get('name'),
                             categories=categories))

            else:
                categories = []
                for categorie_label in categories_label:
                    category_count_key = "ruleset:%s:category:%s" % (
                        self.ruleset_uuid, categorie_label.lower())
                    if categorie_label.lower() != 'other':
                        category_count = question_results.get(
                            category_count_key, 0)
                        categories.append(
                            dict(count=category_count, label=categorie_label))

                results.append(
                    dict(open_ended=open_ended,
                         set=responded,
                         unset=polled - responded,
                         categories=categories))

        cache.set(key, {"results": results},
                  PollQuestion.POLL_QUESTION_RESULTS_CACHE_TIMEOUT)

        return results