Example #1
0
    def get_value_summary(cls,
                          ruleset=None,
                          contact_field=None,
                          filters=None,
                          segment=None):
        """
        Returns the results for the passed in ruleset or contact field given the passed in filters and segments.

        Filters are expected in the following formats:
            { field: rulesetId, categories: ["Red", "Blue", "Yellow"] }

        Segments are expected in these formats instead:
            { ruleset: 1515, categories: ["Red", "Blue"] }  // segmenting by another field, for those categories
            { groups: 124,151,151 }                         // segment by each each group in the passed in ids
            { location: "State", parent: null }             // segment for each admin boundary within the parent
            { contact_field: "Country", values: ["US", "EN", "RW"] } // segment by a contact field for these values
        """
        from temba.contacts.models import ContactGroup, ContactField
        from temba.flows.models import TrueTest, RuleSet

        # start = time.time()
        results = []

        if (not ruleset and not contact_field) or (
                ruleset and contact_field):  # pragma: needs cover
            raise ValueError("Must specify either a RuleSet or Contact field.")

        org = ruleset.flow.org if ruleset else contact_field.org

        open_ended = ruleset and ruleset.ruleset_type == RuleSet.TYPE_WAIT_MESSAGE and len(
            ruleset.get_rules()) == 1

        # default our filters to an empty list if None are passed in
        if filters is None:
            filters = []

        # build the kwargs for our subcall
        kwargs = dict(ruleset=ruleset,
                      contact_field=contact_field,
                      filters=filters)

        # this is our list of dependencies, that is things that will blow away our results
        dependencies = set()
        fingerprint_dict = dict(filters=filters, segment=segment)
        if ruleset:
            fingerprint_dict['ruleset'] = ruleset.id
            dependencies.add(RULESET_KEY % ruleset.id)
        if contact_field:
            fingerprint_dict['contact_field'] = contact_field.id
            dependencies.add(CONTACT_KEY % contact_field.id)

        for contact_filter in filters:
            if 'ruleset' in contact_filter:
                dependencies.add(RULESET_KEY % contact_filter['ruleset'])
            if 'groups' in contact_filter:
                for group_id in contact_filter['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in contact_filter:  # pragma: needs cover
                field = ContactField.get_by_label(org,
                                                  contact_filter['location'])
                dependencies.add(CONTACT_KEY % field.id)

        if segment:
            if 'ruleset' in segment:
                dependencies.add(RULESET_KEY % segment['ruleset'])
            if 'groups' in segment:  # pragma: needs cover
                for group_id in segment['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in segment:
                field = ContactField.get_by_label(org, segment['location'])
                dependencies.add(CONTACT_KEY % field.id)

        # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the
        # kwargs passed to this method, generate that hash
        fingerprint = hash(dict_to_json(fingerprint_dict))

        # generate our key
        key = VALUE_SUMMARY_CACHE_KEY + ":" + str(org.id) + ":".join(
            sorted(list(dependencies))) + ":" + str(fingerprint)

        # does our value exist?
        r = get_redis_connection()
        cached = r.get(key)

        if cached is not None:
            try:
                return json_to_dict(cached)
            except Exception:  # pragma: needs cover
                # failed decoding, oh well, go calculate it instead
                pass

        if segment:
            # segmenting a result is the same as calculating the result with the addition of each
            # category as a filter so we expand upon the passed in filters to do this
            if 'ruleset' in segment and 'categories' in segment:
                for category in segment['categories']:
                    category_filter = list(filters)
                    category_filter.append(
                        dict(ruleset=segment['ruleset'],
                             categories=[category]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count,
                     categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(
                        dict(label=category,
                             open_ended=open_ended,
                             set=set_count,
                             unset=unset_count,
                             categories=categories))

            # segmenting by groups instead, same principle but we add group filters
            elif 'groups' in segment:  # pragma: needs cover
                for group_id in segment['groups']:
                    # load our group
                    group = ContactGroup.user_groups.get(org=org, pk=group_id)

                    category_filter = list(filters)
                    category_filter.append(dict(groups=[group_id]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count,
                     categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(
                        dict(label=group.name,
                             open_ended=open_ended,
                             set=set_count,
                             unset_count=unset_count,
                             categories=categories))

            # segmenting by a contact field, only for passed in categories
            elif 'contact_field' in segment and 'values' in segment:
                # look up the contact field
                field = ContactField.get_by_label(org,
                                                  segment['contact_field'])

                for value in segment['values']:
                    value_filter = list(filters)
                    value_filter.append(
                        dict(contact_field=field.pk, values=[value]))

                    # calculate our results for this segment
                    kwargs['filters'] = value_filter
                    (set_count, unset_count,
                     categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(
                        dict(label=value,
                             open_ended=open_ended,
                             set=set_count,
                             unset=unset_count,
                             categories=categories))

            # segmenting by a location field
            elif 'location' in segment:
                # look up the contact field
                field = ContactField.get_by_label(org, segment['location'])

                # make sure they are segmenting on a location type that makes sense
                if field.value_type not in [
                        Value.TYPE_STATE, Value.TYPE_DISTRICT, Value.TYPE_WARD
                ]:  # pragma: needs cover
                    raise ValueError(
                        _("Cannot segment on location for field that is not a State or District type"
                          ))

                # make sure our org has a country for location based responses
                if not org.country:  # pragma: needs cover
                    raise ValueError(
                        _("Cannot segment by location until country has been selected for organization"
                          ))

                # the boundaries we will segment by
                parent = org.country

                # figure out our parent
                parent_osm_id = segment.get('parent', None)
                if parent_osm_id:
                    parent = AdminBoundary.objects.get(osm_id=parent_osm_id)

                # get all the boundaries we are segmenting on
                boundaries = list(
                    AdminBoundary.objects.filter(
                        parent=parent).order_by('name'))

                # if the field is a district field, they need to specify the parent state
                if not parent_osm_id and field.value_type == Value.TYPE_DISTRICT:  # pragma: needs cover
                    raise ValueError(
                        _("You must specify a parent state to segment results by district"
                          ))

                if not parent_osm_id and field.value_type == Value.TYPE_WARD:  # pragma: needs cover
                    raise ValueError(
                        _("You must specify a parent state to segment results by ward"
                          ))

                # if this is a district, we can speed things up by only including those districts in our parent, build
                # the filter for that
                if parent and field.value_type in [
                        Value.TYPE_DISTRICT, Value.TYPE_WARD
                ]:
                    location_filters = [
                        filters,
                        dict(location=field.pk,
                             boundary=[b.osm_id for b in boundaries])
                    ]
                else:
                    location_filters = filters

                # get all the contacts segment by location first
                (location_set_contacts, location_unset_contacts, location_results) = \
                    cls.get_filtered_value_summary(contact_field=field, filters=location_filters, return_contacts=True)

                # now get the contacts for our primary query
                kwargs['return_contacts'] = True
                kwargs['filter_contacts'] = location_set_contacts
                (primary_set_contacts, primary_unset_contacts,
                 primary_results) = cls.get_filtered_value_summary(**kwargs)

                # build a map of osm_id to location_result
                osm_results = {lr['label']: lr for lr in location_results}
                empty_result = dict(contacts=list())

                for boundary in boundaries:
                    location_result = osm_results.get(boundary.osm_id,
                                                      empty_result)

                    # clone our primary results
                    segmented_results = dict(label=boundary.name,
                                             boundary=boundary.osm_id,
                                             open_ended=open_ended)

                    location_categories = list()
                    location_contacts = set(location_result['contacts'])

                    for category in primary_results:
                        category_contacts = set(category['contacts'])

                        intersection = location_contacts & category_contacts
                        location_categories.append(
                            dict(label=category['label'],
                                 count=len(intersection)))

                    segmented_results['set'] = len(location_contacts
                                                   & primary_set_contacts)
                    segmented_results['unset'] = len(location_contacts
                                                     & primary_unset_contacts)
                    segmented_results['categories'] = location_categories
                    results.append(segmented_results)

                results = sorted(results, key=lambda r: r['label'])

        else:
            (set_count, unset_count,
             categories) = cls.get_filtered_value_summary(**kwargs)

            # Check we have and we have an OPEN ENDED ruleset
            if ruleset and len(ruleset.get_rules()) == 1 and isinstance(
                    ruleset.get_rules()[0].test, TrueTest):
                cursor = connection.cursor()

                custom_sql = """SELECT w.label, count(*) AS count FROM (
                    SELECT
                      regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label
                    FROM msgs_msg INNER JOIN contacts_contact ON ( msgs_msg.contact_id = contacts_contact.id )
                    WHERE msgs_msg.id IN (
                      SELECT
                        msg_id
                        FROM flows_flowstep_messages, flows_flowstep
                        WHERE flowstep_id = flows_flowstep.id AND
                        flows_flowstep.step_uuid = '%s'
                      ) AND contacts_contact.is_test = False
                  ) w group by w.label order by count desc;""" % ruleset.uuid

                cursor.execute(custom_sql)
                unclean_categories = get_dict_from_cursor(cursor)
                categories = []

                org_languages = [
                    lang.name.lower()
                    for lang in org.languages.filter(orgs=None).distinct()
                ]

                if 'english' not in org_languages:
                    org_languages.append('english')

                ignore_words = []
                for lang in org_languages:
                    ignore_words += safe_get_stop_words(lang)

                for category in unclean_categories:
                    if len(category['label']) > 1 and category[
                            'label'] not in ignore_words and len(
                                categories) < 100:
                        categories.append(
                            dict(label=category['label'],
                                 count=int(category['count'])))

                # sort by count, then alphabetically
                categories = sorted(categories,
                                    key=lambda c: (-c['count'], c['label']))

            results.append(
                dict(label=six.text_type(_("All")),
                     open_ended=open_ended,
                     set=set_count,
                     unset=unset_count,
                     categories=categories))

        # for each of our dependencies, add our key as something that depends on it
        pipe = r.pipeline()
        for dependency in dependencies:
            pipe.sadd(dependency, key)
            pipe.expire(dependency, VALUE_SUMMARY_CACHE_TIME)

        # and finally set our result
        pipe.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME)
        pipe.execute()

        # leave me: nice for profiling..
        # from django.db import connection as db_connection, reset_queries
        # print "=" * 80
        # for query in db_connection.queries:
        #    print "%s - %s" % (query['time'], query['sql'][:1000])
        # print "-" * 80
        # print "took: %f" % (time.time() - start)
        # print "=" * 80
        # reset_queries()

        return results
Example #2
0
    def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None):
        """
        Returns the results for the passed in ruleset or contact field given the passed in filters and segments.

        Filters are expected in the following formats:
            { field: rulesetId, categories: ["Red", "Blue", "Yellow"] }

        Segments are expected in these formats instead:
            { ruleset: 1515, categories: ["Red", "Blue"] }  // segmenting by another field, for those categories
            { groups: 124,151,151 }                         // segment by each each group in the passed in ids
            { location: "State", parent: null }             // segment for each admin boundary within the parent
            { contact_field: "Country", values: ["US", "EN", "RW"] } // segment by a contact field for these values
        """
        from temba.contacts.models import ContactGroup, ContactField
        from temba.flows.models import TrueTest, RuleSet

        start = time.time()
        results = []

        if (not ruleset and not contact_field) or (ruleset and contact_field):
            raise ValueError("Must specify either a RuleSet or Contact field.")

        org = ruleset.flow.org if ruleset else contact_field.org

        open_ended = ruleset and ruleset.ruleset_type == RuleSet.TYPE_WAIT_MESSAGE and len(ruleset.get_rules()) == 1

        # default our filters to an empty list if None are passed in
        if filters is None:
            filters = []

        # build the kwargs for our subcall
        kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters)

        # this is our list of dependencies, that is things that will blow away our results
        dependencies = set()
        fingerprint_dict = dict(filters=filters, segment=segment)
        if ruleset:
            fingerprint_dict['ruleset'] = ruleset.id
            dependencies.add(RULESET_KEY % ruleset.id)
        if contact_field:
            fingerprint_dict['contact_field'] = contact_field.id
            dependencies.add(CONTACT_KEY % contact_field.id)

        for contact_filter in filters:
            if 'ruleset' in contact_filter:
                dependencies.add(RULESET_KEY % contact_filter['ruleset'])
            if 'groups' in contact_filter:
                for group_id in contact_filter['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in contact_filter:
                field = ContactField.get_by_label(org, contact_filter['location'])
                dependencies.add(CONTACT_KEY % field.id)

        if segment:
            if 'ruleset' in segment:
                dependencies.add(RULESET_KEY % segment['ruleset'])
            if 'groups' in segment:
                for group_id in segment['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in segment:
                field = ContactField.get_by_label(org, segment['location'])
                dependencies.add(CONTACT_KEY % field.id)

        # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the
        # kwargs passed to this method, generate that hash
        fingerprint = hash(dict_to_json(fingerprint_dict))

        # generate our key
        key = VALUE_SUMMARY_CACHE_KEY + ":" + str(org.id) + ":".join(sorted(list(dependencies))) + ":" + str(fingerprint)

        # does our value exist?
        r = get_redis_connection()
        cached = r.get(key)

        if cached is not None:
            try:
                return json_to_dict(cached)
            except Exception:
                # failed decoding, oh well, go calculate it instead
                pass

        if segment:
            # segmenting a result is the same as calculating the result with the addition of each
            # category as a filter so we expand upon the passed in filters to do this
            if 'ruleset' in segment and 'categories' in segment:
                for category in segment['categories']:
                    category_filter = list(filters)
                    category_filter.append(dict(ruleset=segment['ruleset'], categories=[category]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories))

            # segmenting by groups instead, same principle but we add group filters
            elif 'groups' in segment:
                for group_id in segment['groups']:
                    # load our group
                    group = ContactGroup.user_groups.get(org=org, pk=group_id)

                    category_filter = list(filters)
                    category_filter.append(dict(groups=[group_id]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories))

            # segmenting by a contact field, only for passed in categories
            elif 'contact_field' in segment and 'values' in segment:
                # look up the contact field
                field = ContactField.get_by_label(org, segment['contact_field'])

                for value in segment['values']:
                    value_filter = list(filters)
                    value_filter.append(dict(contact_field=field.pk, values=[value]))

                    # calculate our results for this segment
                    kwargs['filters'] = value_filter
                    (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(dict(label=value, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories))

            # segmenting by a location field
            elif 'location' in segment:
                # look up the contact field
                field = ContactField.get_by_label(org, segment['location'])

                # make sure they are segmenting on a location type that makes sense
                if field.value_type not in [Value.TYPE_STATE, Value.TYPE_DISTRICT, Value.TYPE_WARD]:
                    raise ValueError(_("Cannot segment on location for field that is not a State or District type"))

                # make sure our org has a country for location based responses
                if not org.country:
                    raise ValueError(_("Cannot segment by location until country has been selected for organization"))

                # the boundaries we will segment by
                parent = org.country

                # figure out our parent
                parent_osm_id = segment.get('parent', None)
                if parent_osm_id:
                    parent = AdminBoundary.objects.get(osm_id=parent_osm_id)

                # get all the boundaries we are segmenting on
                boundaries = list(AdminBoundary.objects.filter(parent=parent).order_by('name'))

                # if the field is a district field, they need to specify the parent state
                if not parent_osm_id and field.value_type == Value.TYPE_DISTRICT:
                    raise ValueError(_("You must specify a parent state to segment results by district"))

                if not parent_osm_id and field.value_type == Value.TYPE_WARD:
                    raise ValueError(_("You must specify a parent state to segment results by ward"))

                # if this is a district, we can speed things up by only including those districts in our parent, build
                # the filter for that
                if parent and field.value_type in [Value.TYPE_DISTRICT, Value.TYPE_WARD]:
                    location_filters = [filters, dict(location=field.pk, boundary=[b.osm_id for b in boundaries])]
                else:
                    location_filters = filters

                # get all the contacts segment by location first
                (location_set_contacts, location_unset_contacts, location_results) = \
                    cls.get_filtered_value_summary(contact_field=field, filters=location_filters, return_contacts=True)

                # now get the contacts for our primary query
                kwargs['return_contacts'] = True
                kwargs['filter_contacts'] = location_set_contacts
                (primary_set_contacts, primary_unset_contacts, primary_results) = cls.get_filtered_value_summary(**kwargs)

                # build a map of osm_id to location_result
                osm_results = {lr['label']: lr for lr in location_results}
                empty_result = dict(contacts=list())

                for boundary in boundaries:
                    location_result = osm_results.get(boundary.osm_id, empty_result)

                    # clone our primary results
                    segmented_results = dict(label=boundary.name,
                                             boundary=boundary.osm_id,
                                             open_ended=open_ended)

                    location_categories = list()
                    location_contacts = set(location_result['contacts'])

                    for category in primary_results:
                        category_contacts = set(category['contacts'])

                        intersection = location_contacts & category_contacts
                        location_categories.append(dict(label=category['label'], count=len(intersection)))

                    segmented_results['set'] = len(location_contacts & primary_set_contacts)
                    segmented_results['unset'] = len(location_contacts & primary_unset_contacts)
                    segmented_results['categories'] = location_categories
                    results.append(segmented_results)

                results = sorted(results, key=lambda r: r['label'])

        else:
            (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)

            # Check we have and we have an OPEN ENDED ruleset
            if ruleset and len(ruleset.get_rules()) == 1 and isinstance(ruleset.get_rules()[0].test, TrueTest):
                cursor = connection.cursor()

                custom_sql = """SELECT w.label, count(*) AS count FROM (
                    SELECT
                      regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label
                    FROM msgs_msg INNER JOIN contacts_contact ON ( msgs_msg.contact_id = contacts_contact.id )
                    WHERE msgs_msg.id IN (
                      SELECT
                        msg_id
                        FROM flows_flowstep_messages, flows_flowstep
                        WHERE flowstep_id = flows_flowstep.id AND
                        flows_flowstep.step_uuid = '%s'
                      ) AND contacts_contact.is_test = False
                  ) w group by w.label order by count desc;""" % ruleset.uuid

                cursor.execute(custom_sql)
                unclean_categories = get_dict_from_cursor(cursor)
                categories = []

                org_languages = [lang.name.lower() for lang in org.languages.filter(orgs=None).distinct()]

                if 'english' not in org_languages:
                    org_languages.append('english')

                ignore_words = []
                for lang in org_languages:
                    ignore_words += safe_get_stop_words(lang)

                for category in unclean_categories:
                    if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100:
                        categories.append(dict(label=category['label'], count=int(category['count'])))

                # sort by count, then alphabetically
                categories = sorted(categories, key=lambda c: (-c['count'], c['label']))

            results.append(dict(label=unicode(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories))

        # for each of our dependencies, add our key as something that depends on it
        pipe = r.pipeline()
        for dependency in dependencies:
            pipe.sadd(dependency, key)
            pipe.expire(dependency, VALUE_SUMMARY_CACHE_TIME)

        # and finally set our result
        pipe.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME)
        pipe.execute()

        # leave me: nice for profiling..
        #from django.db import connection as db_connection, reset_queries
        #print "=" * 80
        #for query in db_connection.queries:
        #    print "%s - %s" % (query['time'], query['sql'][:1000])
        #print "-" * 80
        #print "took: %f" % (time.time() - start)
        #print "=" * 80
        #reset_queries()

        return results
Example #3
0
    def get_value_summary(cls,
                          ruleset=None,
                          contact_field=None,
                          filters=None,
                          segment=None,
                          latest_only=True):
        """
        Returns the results for the passed in ruleset or contact field given the passed in filters and segments.

        Filters are expected in the following formats:
            { field: rulesetId, categories: ["Red", "Blue", "Yellow"] }

        Segments are expected in these formats instead:
            { ruleset: 1515, categories: ["Red", "Blue"] }  // segmenting by another field, for those categories
            { groups: 124,151,151 }                         // segment by each each group in the passed in ids
            { location: "State", parent: null }             // segment for each admin boundary within the parent
        """
        from temba.contacts.models import ContactGroup, ContactField
        from temba.flows.models import TrueTest

        results = []

        if (not ruleset and not contact_field) or (ruleset and contact_field):
            raise Exception("Must specify either a RuleSet or Contact field.")

        org = ruleset.flow.org if ruleset else contact_field.org

        open_ended = ruleset and len(ruleset.get_rules()) == 1

        # default our filters to an empty list if None are passed in
        if filters is None:
            filters = []

        # build the kwargs for our subcall
        kwargs = dict(ruleset=ruleset,
                      contact_field=contact_field,
                      filters=filters,
                      latest_only=latest_only)

        # this is our list of dependencies, that is things that will blow away our results
        dependencies = set()
        fingerprint_dict = dict(filters=filters,
                                segment=segment,
                                latest_only=latest_only)
        if ruleset:
            fingerprint_dict['ruleset'] = ruleset.id
            dependencies.add(RULESET_KEY % ruleset.id)
        if contact_field:
            fingerprint_dict['contact_field'] = contact_field.id
            dependencies.add(CONTACT_KEY % contact_field.id)

        for filter in filters:
            if 'ruleset' in filter:
                dependencies.add('vsr%d' % filter['ruleset'])
            if 'groups' in filter:
                for group_id in filter['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in filter:
                field = ContactField.objects.get(
                    org=org, label__iexact=filter['location'])
                dependencies.add(CONTACT_KEY % field.id)

        if segment:
            if 'ruleset' in segment:
                dependencies.add('vsr%d' % segment['ruleset'])
            if 'groups' in segment:
                for group_id in segment['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in segment:
                field = ContactField.objects.get(
                    org=org, label__iexact=segment['location'])
                dependencies.add(CONTACT_KEY % field.id)

        # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the
        # kwargs passed to this method, generate that hash
        fingerprint = hash(dict_to_json(fingerprint_dict))

        # generate our key
        key = VALUE_SUMMARY_CACHE_KEY + ":" + ":".join(
            sorted(list(dependencies))) + ":" + str(fingerprint)

        # does our value exist?
        r = get_redis_connection()
        cached = r.get(key)

        if not cached is None:
            try:
                return json_to_dict(cached)
            except:
                # failed decoding, oh well, go calculate it instead
                pass

        if segment:
            # segmenting a result is the same as calculating the result with the addition of each
            # category as a filter so we expand upon the passed in filters to do this
            if 'categories' in segment:
                for category in segment['categories']:
                    category_filter = list(filters)
                    category_filter.append(
                        dict(ruleset=segment['ruleset'],
                             categories=[category]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count,
                     categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(
                        dict(label=category,
                             open_ended=open_ended,
                             set=set_count,
                             unset=unset_count,
                             categories=categories))

            # segmenting by groups instead, same principle but we add group filters
            elif 'groups' in segment:
                for group_id in segment['groups']:
                    # load our group
                    group = ContactGroup.objects.get(is_active=True,
                                                     org=org,
                                                     pk=group_id)

                    category_filter = list(filters)
                    category_filter.append(dict(groups=[group_id]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count,
                     categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(
                        dict(label=group.name,
                             open_ended=open_ended,
                             set=set_count,
                             unset_count=unset_count,
                             categories=categories))

            # segmenting by a location field
            elif 'location' in segment:
                # look up the contact field
                field = ContactField.objects.get(
                    org=org, label__iexact=segment['location'])

                # make sure they are segmenting on a location type that makes sense
                if not field.value_type in [STATE, DISTRICT]:
                    raise Exception(
                        _("Cannot segment on location for field that is not a State or District type"
                          ))

                # make sure our org has a country for location based responses
                if not org.country:
                    raise Exception(
                        _("Cannot segment by location until country has been selected for organization"
                          ))

                # the boundaries we will segment by
                parent = org.country

                # figure out our parent
                parent_osm_id = segment.get('parent', None)
                if parent_osm_id:
                    parent = AdminBoundary.objects.get(osm_id=parent_osm_id)

                # if the field is a district field, they need to specify the parent state
                if not parent_osm_id and field.value_type == DISTRICT:
                    raise Exception(
                        _("You must specify a parent state to segment results by district"
                          ))

                # now segment by all the children of this parent
                for boundary in AdminBoundary.objects.filter(
                        parent=parent).order_by('name'):
                    boundary_filter = list(filters)
                    boundary_filter.append(
                        dict(location=field.id, boundary=boundary.osm_id))
                    kwargs['filters'] = boundary_filter

                    # calculate our results for this segment
                    (set_count, unset_count,
                     categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(
                        dict(label=boundary.name,
                             boundary=boundary.osm_id,
                             open_ended=open_ended,
                             set=set_count,
                             unset=unset_count,
                             categories=categories))

        else:
            (set_count, unset_count,
             categories) = cls.get_filtered_value_summary(**kwargs)

            # Check we have and we have an OPEN ENDED ruleset
            if ruleset and len(ruleset.get_rules()) == 1 and isinstance(
                    ruleset.get_rules()[0].test, TrueTest):
                cursor = connection.cursor()

                custom_sql = """
                  SELECT w.label, count(*) AS count FROM (
                    SELECT
                      regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label
                    FROM msgs_msg
                    WHERE id IN (
                      SELECT
                        msg_id
                        FROM flows_flowstep_messages, flows_flowstep
                        WHERE flowstep_id = flows_flowstep.id AND
                        flows_flowstep.step_uuid = '%s'
                      )
                  ) w group by w.label order by count desc;
                """ % ruleset.uuid

                cursor.execute(custom_sql)
                unclean_categories = get_dict_from_cursor(cursor)
                categories = []
                ignore_words = get_stop_words('english')

                for category in unclean_categories:
                    if len(category['label']) > 1 and category[
                            'label'] not in ignore_words and len(
                                categories) < 100:
                        categories.append(
                            dict(label=category['label'],
                                 count=int(category['count'])))

                # sort by count, then alphabetically
                categories = sorted(categories,
                                    key=lambda c: (-c['count'], c['label']))

            results.append(
                dict(label=unicode(_("All")),
                     open_ended=open_ended,
                     set=set_count,
                     unset=unset_count,
                     categories=categories))

        # cache this result set
        r.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME)

        return results
Example #4
0
    def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None, latest_only=True):
        """
        Returns the results for the passed in ruleset or contact field given the passed in filters and segments.

        Filters are expected in the following formats:
            { field: rulesetId, categories: ["Red", "Blue", "Yellow"] }

        Segments are expected in these formats instead:
            { ruleset: 1515, categories: ["Red", "Blue"] }  // segmenting by another field, for those categories
            { groups: 124,151,151 }                         // segment by each each group in the passed in ids
            { location: "State", parent: null }             // segment for each admin boundary within the parent
        """
        from temba.contacts.models import ContactGroup, ContactField
        from temba.flows.models import TrueTest

        results = []

        if (not ruleset and not contact_field) or (ruleset and contact_field):
            raise Exception("Must specify either a RuleSet or Contact field.")

        org = ruleset.flow.org if ruleset else contact_field.org

        open_ended = ruleset and len(ruleset.get_rules()) == 1

        # default our filters to an empty list if None are passed in
        if filters is None:
            filters = []

        # build the kwargs for our subcall
        kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters, latest_only=latest_only)

        # this is our list of dependencies, that is things that will blow away our results
        dependencies = set()
        fingerprint_dict = dict(filters=filters, segment=segment, latest_only=latest_only)
        if ruleset:
            fingerprint_dict['ruleset'] = ruleset.id
            dependencies.add(RULESET_KEY % ruleset.id)
        if contact_field:
            fingerprint_dict['contact_field'] = contact_field.id
            dependencies.add(CONTACT_KEY % contact_field.id)

        for filter in filters:
            if 'ruleset' in filter: dependencies.add('vsr%d' % filter['ruleset'])
            if 'groups' in filter:
                for group_id in filter['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in filter:
                field = ContactField.objects.get(org=org, label__iexact=filter['location'])
                dependencies.add(CONTACT_KEY % field.id)

        if segment:
            if 'ruleset' in segment: dependencies.add('vsr%d' % segment['ruleset'])
            if 'groups' in segment:
                for group_id in segment['groups']:
                    dependencies.add(GROUP_KEY % group_id)
            if 'location' in segment:
                field = ContactField.objects.get(org=org, label__iexact=segment['location'])
                dependencies.add(CONTACT_KEY % field.id)

        # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the
        # kwargs passed to this method, generate that hash
        fingerprint = hash(dict_to_json(fingerprint_dict))

        # generate our key
        key = VALUE_SUMMARY_CACHE_KEY + ":" + ":".join(sorted(list(dependencies))) + ":" + str(fingerprint)

        # does our value exist?
        r = get_redis_connection()
        cached = r.get(key)

        if not cached is None:
            try:
                return json_to_dict(cached)
            except:
                # failed decoding, oh well, go calculate it instead
                pass

        if segment:
            # segmenting a result is the same as calculating the result with the addition of each
            # category as a filter so we expand upon the passed in filters to do this
            if 'categories' in segment:
                for category in segment['categories']:
                    category_filter = list(filters)
                    category_filter.append(dict(ruleset=segment['ruleset'], categories=[category]))


                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories))


            # segmenting by groups instead, same principle but we add group filters
            elif 'groups' in segment:
                for group_id in segment['groups']:
                    # load our group
                    group = ContactGroup.objects.get(is_active=True, org=org, pk=group_id)

                    category_filter = list(filters)
                    category_filter.append(dict(groups=[group_id]))

                    # calculate our results for this segment
                    kwargs['filters'] = category_filter
                    (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories))


            # segmenting by a location field
            elif 'location' in segment:
                # look up the contact field
                field = ContactField.objects.get(org=org, label__iexact=segment['location'])

                # make sure they are segmenting on a location type that makes sense
                if not field.value_type in [STATE, DISTRICT]:
                    raise Exception(_("Cannot segment on location for field that is not a State or District type"))

                # make sure our org has a country for location based responses
                if not org.country:
                    raise Exception(_("Cannot segment by location until country has been selected for organization"))

                # the boundaries we will segment by
                parent = org.country

                # figure out our parent
                parent_osm_id = segment.get('parent', None)
                if parent_osm_id:
                    parent = AdminBoundary.objects.get(osm_id=parent_osm_id)

                # if the field is a district field, they need to specify the parent state
                if not parent_osm_id and field.value_type == DISTRICT:
                    raise Exception(_("You must specify a parent state to segment results by district"))

                # now segment by all the children of this parent
                for boundary in AdminBoundary.objects.filter(parent=parent).order_by('name'):
                    boundary_filter = list(filters)
                    boundary_filter.append(dict(location=field.id, boundary=boundary.osm_id))
                    kwargs['filters'] = boundary_filter

                    # calculate our results for this segment
                    (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)
                    results.append(dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended,
                                        set=set_count, unset=unset_count, categories=categories))


        else:
            (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs)

            # Check we have and we have an OPEN ENDED ruleset
            if ruleset and len(ruleset.get_rules()) == 1 and isinstance(ruleset.get_rules()[0].test, TrueTest):
                cursor = connection.cursor()

                custom_sql = """
                  SELECT w.label, count(*) AS count FROM (
                    SELECT
                      regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label
                    FROM msgs_msg
                    WHERE id IN (
                      SELECT
                        msg_id
                        FROM flows_flowstep_messages, flows_flowstep
                        WHERE flowstep_id = flows_flowstep.id AND
                        flows_flowstep.step_uuid = '%s'
                      )
                  ) w group by w.label order by count desc;
                """ % ruleset.uuid

                cursor.execute(custom_sql)
                unclean_categories = get_dict_from_cursor(cursor)
                categories = []
                ignore_words = get_stop_words('english')

                for category in unclean_categories:
                    if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100:
                        categories.append(dict(label=category['label'], count=int(category['count'])))

                # sort by count, then alphabetically
                categories= sorted(categories, key=lambda c: (-c['count'], c['label']))

            results.append(dict(label=unicode(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories))

        # cache this result set
        r.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME)

        return results