def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None): """ Returns the results for the passed in ruleset or contact field given the passed in filters and segments. Filters are expected in the following formats: { field: rulesetId, categories: ["Red", "Blue", "Yellow"] } Segments are expected in these formats instead: { ruleset: 1515, categories: ["Red", "Blue"] } // segmenting by another field, for those categories { groups: 124,151,151 } // segment by each each group in the passed in ids { location: "State", parent: null } // segment for each admin boundary within the parent { contact_field: "Country", values: ["US", "EN", "RW"] } // segment by a contact field for these values """ from temba.contacts.models import ContactGroup, ContactField from temba.flows.models import TrueTest, RuleSet # start = time.time() results = [] if (not ruleset and not contact_field) or ( ruleset and contact_field): # pragma: needs cover raise ValueError("Must specify either a RuleSet or Contact field.") org = ruleset.flow.org if ruleset else contact_field.org open_ended = ruleset and ruleset.ruleset_type == RuleSet.TYPE_WAIT_MESSAGE and len( ruleset.get_rules()) == 1 # default our filters to an empty list if None are passed in if filters is None: filters = [] # build the kwargs for our subcall kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters) # this is our list of dependencies, that is things that will blow away our results dependencies = set() fingerprint_dict = dict(filters=filters, segment=segment) if ruleset: fingerprint_dict['ruleset'] = ruleset.id dependencies.add(RULESET_KEY % ruleset.id) if contact_field: fingerprint_dict['contact_field'] = contact_field.id dependencies.add(CONTACT_KEY % contact_field.id) for contact_filter in filters: if 'ruleset' in contact_filter: dependencies.add(RULESET_KEY % contact_filter['ruleset']) if 'groups' in contact_filter: for group_id in contact_filter['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in contact_filter: # pragma: needs cover field = ContactField.get_by_label(org, contact_filter['location']) dependencies.add(CONTACT_KEY % field.id) if segment: if 'ruleset' in segment: dependencies.add(RULESET_KEY % segment['ruleset']) if 'groups' in segment: # pragma: needs cover for group_id in segment['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in segment: field = ContactField.get_by_label(org, segment['location']) dependencies.add(CONTACT_KEY % field.id) # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the # kwargs passed to this method, generate that hash fingerprint = hash(dict_to_json(fingerprint_dict)) # generate our key key = VALUE_SUMMARY_CACHE_KEY + ":" + str(org.id) + ":".join( sorted(list(dependencies))) + ":" + str(fingerprint) # does our value exist? r = get_redis_connection() cached = r.get(key) if cached is not None: try: return json_to_dict(cached) except Exception: # pragma: needs cover # failed decoding, oh well, go calculate it instead pass if segment: # segmenting a result is the same as calculating the result with the addition of each # category as a filter so we expand upon the passed in filters to do this if 'ruleset' in segment and 'categories' in segment: for category in segment['categories']: category_filter = list(filters) category_filter.append( dict(ruleset=segment['ruleset'], categories=[category])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by groups instead, same principle but we add group filters elif 'groups' in segment: # pragma: needs cover for group_id in segment['groups']: # load our group group = ContactGroup.user_groups.get(org=org, pk=group_id) category_filter = list(filters) category_filter.append(dict(groups=[group_id])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories)) # segmenting by a contact field, only for passed in categories elif 'contact_field' in segment and 'values' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['contact_field']) for value in segment['values']: value_filter = list(filters) value_filter.append( dict(contact_field=field.pk, values=[value])) # calculate our results for this segment kwargs['filters'] = value_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=value, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by a location field elif 'location' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['location']) # make sure they are segmenting on a location type that makes sense if field.value_type not in [ Value.TYPE_STATE, Value.TYPE_DISTRICT, Value.TYPE_WARD ]: # pragma: needs cover raise ValueError( _("Cannot segment on location for field that is not a State or District type" )) # make sure our org has a country for location based responses if not org.country: # pragma: needs cover raise ValueError( _("Cannot segment by location until country has been selected for organization" )) # the boundaries we will segment by parent = org.country # figure out our parent parent_osm_id = segment.get('parent', None) if parent_osm_id: parent = AdminBoundary.objects.get(osm_id=parent_osm_id) # get all the boundaries we are segmenting on boundaries = list( AdminBoundary.objects.filter( parent=parent).order_by('name')) # if the field is a district field, they need to specify the parent state if not parent_osm_id and field.value_type == Value.TYPE_DISTRICT: # pragma: needs cover raise ValueError( _("You must specify a parent state to segment results by district" )) if not parent_osm_id and field.value_type == Value.TYPE_WARD: # pragma: needs cover raise ValueError( _("You must specify a parent state to segment results by ward" )) # if this is a district, we can speed things up by only including those districts in our parent, build # the filter for that if parent and field.value_type in [ Value.TYPE_DISTRICT, Value.TYPE_WARD ]: location_filters = [ filters, dict(location=field.pk, boundary=[b.osm_id for b in boundaries]) ] else: location_filters = filters # get all the contacts segment by location first (location_set_contacts, location_unset_contacts, location_results) = \ cls.get_filtered_value_summary(contact_field=field, filters=location_filters, return_contacts=True) # now get the contacts for our primary query kwargs['return_contacts'] = True kwargs['filter_contacts'] = location_set_contacts (primary_set_contacts, primary_unset_contacts, primary_results) = cls.get_filtered_value_summary(**kwargs) # build a map of osm_id to location_result osm_results = {lr['label']: lr for lr in location_results} empty_result = dict(contacts=list()) for boundary in boundaries: location_result = osm_results.get(boundary.osm_id, empty_result) # clone our primary results segmented_results = dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended) location_categories = list() location_contacts = set(location_result['contacts']) for category in primary_results: category_contacts = set(category['contacts']) intersection = location_contacts & category_contacts location_categories.append( dict(label=category['label'], count=len(intersection))) segmented_results['set'] = len(location_contacts & primary_set_contacts) segmented_results['unset'] = len(location_contacts & primary_unset_contacts) segmented_results['categories'] = location_categories results.append(segmented_results) results = sorted(results, key=lambda r: r['label']) else: (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) # Check we have and we have an OPEN ENDED ruleset if ruleset and len(ruleset.get_rules()) == 1 and isinstance( ruleset.get_rules()[0].test, TrueTest): cursor = connection.cursor() custom_sql = """SELECT w.label, count(*) AS count FROM ( SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM msgs_msg INNER JOIN contacts_contact ON ( msgs_msg.contact_id = contacts_contact.id ) WHERE msgs_msg.id IN ( SELECT msg_id FROM flows_flowstep_messages, flows_flowstep WHERE flowstep_id = flows_flowstep.id AND flows_flowstep.step_uuid = '%s' ) AND contacts_contact.is_test = False ) w group by w.label order by count desc;""" % ruleset.uuid cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = [] org_languages = [ lang.name.lower() for lang in org.languages.filter(orgs=None).distinct() ] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) for category in unclean_categories: if len(category['label']) > 1 and category[ 'label'] not in ignore_words and len( categories) < 100: categories.append( dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append( dict(label=six.text_type(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # for each of our dependencies, add our key as something that depends on it pipe = r.pipeline() for dependency in dependencies: pipe.sadd(dependency, key) pipe.expire(dependency, VALUE_SUMMARY_CACHE_TIME) # and finally set our result pipe.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME) pipe.execute() # leave me: nice for profiling.. # from django.db import connection as db_connection, reset_queries # print "=" * 80 # for query in db_connection.queries: # print "%s - %s" % (query['time'], query['sql'][:1000]) # print "-" * 80 # print "took: %f" % (time.time() - start) # print "=" * 80 # reset_queries() return results
def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None): """ Returns the results for the passed in ruleset or contact field given the passed in filters and segments. Filters are expected in the following formats: { field: rulesetId, categories: ["Red", "Blue", "Yellow"] } Segments are expected in these formats instead: { ruleset: 1515, categories: ["Red", "Blue"] } // segmenting by another field, for those categories { groups: 124,151,151 } // segment by each each group in the passed in ids { location: "State", parent: null } // segment for each admin boundary within the parent { contact_field: "Country", values: ["US", "EN", "RW"] } // segment by a contact field for these values """ from temba.contacts.models import ContactGroup, ContactField from temba.flows.models import TrueTest, RuleSet start = time.time() results = [] if (not ruleset and not contact_field) or (ruleset and contact_field): raise ValueError("Must specify either a RuleSet or Contact field.") org = ruleset.flow.org if ruleset else contact_field.org open_ended = ruleset and ruleset.ruleset_type == RuleSet.TYPE_WAIT_MESSAGE and len(ruleset.get_rules()) == 1 # default our filters to an empty list if None are passed in if filters is None: filters = [] # build the kwargs for our subcall kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters) # this is our list of dependencies, that is things that will blow away our results dependencies = set() fingerprint_dict = dict(filters=filters, segment=segment) if ruleset: fingerprint_dict['ruleset'] = ruleset.id dependencies.add(RULESET_KEY % ruleset.id) if contact_field: fingerprint_dict['contact_field'] = contact_field.id dependencies.add(CONTACT_KEY % contact_field.id) for contact_filter in filters: if 'ruleset' in contact_filter: dependencies.add(RULESET_KEY % contact_filter['ruleset']) if 'groups' in contact_filter: for group_id in contact_filter['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in contact_filter: field = ContactField.get_by_label(org, contact_filter['location']) dependencies.add(CONTACT_KEY % field.id) if segment: if 'ruleset' in segment: dependencies.add(RULESET_KEY % segment['ruleset']) if 'groups' in segment: for group_id in segment['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in segment: field = ContactField.get_by_label(org, segment['location']) dependencies.add(CONTACT_KEY % field.id) # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the # kwargs passed to this method, generate that hash fingerprint = hash(dict_to_json(fingerprint_dict)) # generate our key key = VALUE_SUMMARY_CACHE_KEY + ":" + str(org.id) + ":".join(sorted(list(dependencies))) + ":" + str(fingerprint) # does our value exist? r = get_redis_connection() cached = r.get(key) if cached is not None: try: return json_to_dict(cached) except Exception: # failed decoding, oh well, go calculate it instead pass if segment: # segmenting a result is the same as calculating the result with the addition of each # category as a filter so we expand upon the passed in filters to do this if 'ruleset' in segment and 'categories' in segment: for category in segment['categories']: category_filter = list(filters) category_filter.append(dict(ruleset=segment['ruleset'], categories=[category])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by groups instead, same principle but we add group filters elif 'groups' in segment: for group_id in segment['groups']: # load our group group = ContactGroup.user_groups.get(org=org, pk=group_id) category_filter = list(filters) category_filter.append(dict(groups=[group_id])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories)) # segmenting by a contact field, only for passed in categories elif 'contact_field' in segment and 'values' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['contact_field']) for value in segment['values']: value_filter = list(filters) value_filter.append(dict(contact_field=field.pk, values=[value])) # calculate our results for this segment kwargs['filters'] = value_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=value, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by a location field elif 'location' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['location']) # make sure they are segmenting on a location type that makes sense if field.value_type not in [Value.TYPE_STATE, Value.TYPE_DISTRICT, Value.TYPE_WARD]: raise ValueError(_("Cannot segment on location for field that is not a State or District type")) # make sure our org has a country for location based responses if not org.country: raise ValueError(_("Cannot segment by location until country has been selected for organization")) # the boundaries we will segment by parent = org.country # figure out our parent parent_osm_id = segment.get('parent', None) if parent_osm_id: parent = AdminBoundary.objects.get(osm_id=parent_osm_id) # get all the boundaries we are segmenting on boundaries = list(AdminBoundary.objects.filter(parent=parent).order_by('name')) # if the field is a district field, they need to specify the parent state if not parent_osm_id and field.value_type == Value.TYPE_DISTRICT: raise ValueError(_("You must specify a parent state to segment results by district")) if not parent_osm_id and field.value_type == Value.TYPE_WARD: raise ValueError(_("You must specify a parent state to segment results by ward")) # if this is a district, we can speed things up by only including those districts in our parent, build # the filter for that if parent and field.value_type in [Value.TYPE_DISTRICT, Value.TYPE_WARD]: location_filters = [filters, dict(location=field.pk, boundary=[b.osm_id for b in boundaries])] else: location_filters = filters # get all the contacts segment by location first (location_set_contacts, location_unset_contacts, location_results) = \ cls.get_filtered_value_summary(contact_field=field, filters=location_filters, return_contacts=True) # now get the contacts for our primary query kwargs['return_contacts'] = True kwargs['filter_contacts'] = location_set_contacts (primary_set_contacts, primary_unset_contacts, primary_results) = cls.get_filtered_value_summary(**kwargs) # build a map of osm_id to location_result osm_results = {lr['label']: lr for lr in location_results} empty_result = dict(contacts=list()) for boundary in boundaries: location_result = osm_results.get(boundary.osm_id, empty_result) # clone our primary results segmented_results = dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended) location_categories = list() location_contacts = set(location_result['contacts']) for category in primary_results: category_contacts = set(category['contacts']) intersection = location_contacts & category_contacts location_categories.append(dict(label=category['label'], count=len(intersection))) segmented_results['set'] = len(location_contacts & primary_set_contacts) segmented_results['unset'] = len(location_contacts & primary_unset_contacts) segmented_results['categories'] = location_categories results.append(segmented_results) results = sorted(results, key=lambda r: r['label']) else: (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) # Check we have and we have an OPEN ENDED ruleset if ruleset and len(ruleset.get_rules()) == 1 and isinstance(ruleset.get_rules()[0].test, TrueTest): cursor = connection.cursor() custom_sql = """SELECT w.label, count(*) AS count FROM ( SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM msgs_msg INNER JOIN contacts_contact ON ( msgs_msg.contact_id = contacts_contact.id ) WHERE msgs_msg.id IN ( SELECT msg_id FROM flows_flowstep_messages, flows_flowstep WHERE flowstep_id = flows_flowstep.id AND flows_flowstep.step_uuid = '%s' ) AND contacts_contact.is_test = False ) w group by w.label order by count desc;""" % ruleset.uuid cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = [] org_languages = [lang.name.lower() for lang in org.languages.filter(orgs=None).distinct()] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) for category in unclean_categories: if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100: categories.append(dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append(dict(label=unicode(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # for each of our dependencies, add our key as something that depends on it pipe = r.pipeline() for dependency in dependencies: pipe.sadd(dependency, key) pipe.expire(dependency, VALUE_SUMMARY_CACHE_TIME) # and finally set our result pipe.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME) pipe.execute() # leave me: nice for profiling.. #from django.db import connection as db_connection, reset_queries #print "=" * 80 #for query in db_connection.queries: # print "%s - %s" % (query['time'], query['sql'][:1000]) #print "-" * 80 #print "took: %f" % (time.time() - start) #print "=" * 80 #reset_queries() return results
def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None, latest_only=True): """ Returns the results for the passed in ruleset or contact field given the passed in filters and segments. Filters are expected in the following formats: { field: rulesetId, categories: ["Red", "Blue", "Yellow"] } Segments are expected in these formats instead: { ruleset: 1515, categories: ["Red", "Blue"] } // segmenting by another field, for those categories { groups: 124,151,151 } // segment by each each group in the passed in ids { location: "State", parent: null } // segment for each admin boundary within the parent """ from temba.contacts.models import ContactGroup, ContactField from temba.flows.models import TrueTest results = [] if (not ruleset and not contact_field) or (ruleset and contact_field): raise Exception("Must specify either a RuleSet or Contact field.") org = ruleset.flow.org if ruleset else contact_field.org open_ended = ruleset and len(ruleset.get_rules()) == 1 # default our filters to an empty list if None are passed in if filters is None: filters = [] # build the kwargs for our subcall kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters, latest_only=latest_only) # this is our list of dependencies, that is things that will blow away our results dependencies = set() fingerprint_dict = dict(filters=filters, segment=segment, latest_only=latest_only) if ruleset: fingerprint_dict['ruleset'] = ruleset.id dependencies.add(RULESET_KEY % ruleset.id) if contact_field: fingerprint_dict['contact_field'] = contact_field.id dependencies.add(CONTACT_KEY % contact_field.id) for filter in filters: if 'ruleset' in filter: dependencies.add('vsr%d' % filter['ruleset']) if 'groups' in filter: for group_id in filter['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in filter: field = ContactField.objects.get( org=org, label__iexact=filter['location']) dependencies.add(CONTACT_KEY % field.id) if segment: if 'ruleset' in segment: dependencies.add('vsr%d' % segment['ruleset']) if 'groups' in segment: for group_id in segment['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in segment: field = ContactField.objects.get( org=org, label__iexact=segment['location']) dependencies.add(CONTACT_KEY % field.id) # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the # kwargs passed to this method, generate that hash fingerprint = hash(dict_to_json(fingerprint_dict)) # generate our key key = VALUE_SUMMARY_CACHE_KEY + ":" + ":".join( sorted(list(dependencies))) + ":" + str(fingerprint) # does our value exist? r = get_redis_connection() cached = r.get(key) if not cached is None: try: return json_to_dict(cached) except: # failed decoding, oh well, go calculate it instead pass if segment: # segmenting a result is the same as calculating the result with the addition of each # category as a filter so we expand upon the passed in filters to do this if 'categories' in segment: for category in segment['categories']: category_filter = list(filters) category_filter.append( dict(ruleset=segment['ruleset'], categories=[category])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by groups instead, same principle but we add group filters elif 'groups' in segment: for group_id in segment['groups']: # load our group group = ContactGroup.objects.get(is_active=True, org=org, pk=group_id) category_filter = list(filters) category_filter.append(dict(groups=[group_id])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories)) # segmenting by a location field elif 'location' in segment: # look up the contact field field = ContactField.objects.get( org=org, label__iexact=segment['location']) # make sure they are segmenting on a location type that makes sense if not field.value_type in [STATE, DISTRICT]: raise Exception( _("Cannot segment on location for field that is not a State or District type" )) # make sure our org has a country for location based responses if not org.country: raise Exception( _("Cannot segment by location until country has been selected for organization" )) # the boundaries we will segment by parent = org.country # figure out our parent parent_osm_id = segment.get('parent', None) if parent_osm_id: parent = AdminBoundary.objects.get(osm_id=parent_osm_id) # if the field is a district field, they need to specify the parent state if not parent_osm_id and field.value_type == DISTRICT: raise Exception( _("You must specify a parent state to segment results by district" )) # now segment by all the children of this parent for boundary in AdminBoundary.objects.filter( parent=parent).order_by('name'): boundary_filter = list(filters) boundary_filter.append( dict(location=field.id, boundary=boundary.osm_id)) kwargs['filters'] = boundary_filter # calculate our results for this segment (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) else: (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) # Check we have and we have an OPEN ENDED ruleset if ruleset and len(ruleset.get_rules()) == 1 and isinstance( ruleset.get_rules()[0].test, TrueTest): cursor = connection.cursor() custom_sql = """ SELECT w.label, count(*) AS count FROM ( SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM msgs_msg WHERE id IN ( SELECT msg_id FROM flows_flowstep_messages, flows_flowstep WHERE flowstep_id = flows_flowstep.id AND flows_flowstep.step_uuid = '%s' ) ) w group by w.label order by count desc; """ % ruleset.uuid cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = [] ignore_words = get_stop_words('english') for category in unclean_categories: if len(category['label']) > 1 and category[ 'label'] not in ignore_words and len( categories) < 100: categories.append( dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append( dict(label=unicode(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # cache this result set r.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME) return results
def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None, latest_only=True): """ Returns the results for the passed in ruleset or contact field given the passed in filters and segments. Filters are expected in the following formats: { field: rulesetId, categories: ["Red", "Blue", "Yellow"] } Segments are expected in these formats instead: { ruleset: 1515, categories: ["Red", "Blue"] } // segmenting by another field, for those categories { groups: 124,151,151 } // segment by each each group in the passed in ids { location: "State", parent: null } // segment for each admin boundary within the parent """ from temba.contacts.models import ContactGroup, ContactField from temba.flows.models import TrueTest results = [] if (not ruleset and not contact_field) or (ruleset and contact_field): raise Exception("Must specify either a RuleSet or Contact field.") org = ruleset.flow.org if ruleset else contact_field.org open_ended = ruleset and len(ruleset.get_rules()) == 1 # default our filters to an empty list if None are passed in if filters is None: filters = [] # build the kwargs for our subcall kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters, latest_only=latest_only) # this is our list of dependencies, that is things that will blow away our results dependencies = set() fingerprint_dict = dict(filters=filters, segment=segment, latest_only=latest_only) if ruleset: fingerprint_dict['ruleset'] = ruleset.id dependencies.add(RULESET_KEY % ruleset.id) if contact_field: fingerprint_dict['contact_field'] = contact_field.id dependencies.add(CONTACT_KEY % contact_field.id) for filter in filters: if 'ruleset' in filter: dependencies.add('vsr%d' % filter['ruleset']) if 'groups' in filter: for group_id in filter['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in filter: field = ContactField.objects.get(org=org, label__iexact=filter['location']) dependencies.add(CONTACT_KEY % field.id) if segment: if 'ruleset' in segment: dependencies.add('vsr%d' % segment['ruleset']) if 'groups' in segment: for group_id in segment['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in segment: field = ContactField.objects.get(org=org, label__iexact=segment['location']) dependencies.add(CONTACT_KEY % field.id) # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the # kwargs passed to this method, generate that hash fingerprint = hash(dict_to_json(fingerprint_dict)) # generate our key key = VALUE_SUMMARY_CACHE_KEY + ":" + ":".join(sorted(list(dependencies))) + ":" + str(fingerprint) # does our value exist? r = get_redis_connection() cached = r.get(key) if not cached is None: try: return json_to_dict(cached) except: # failed decoding, oh well, go calculate it instead pass if segment: # segmenting a result is the same as calculating the result with the addition of each # category as a filter so we expand upon the passed in filters to do this if 'categories' in segment: for category in segment['categories']: category_filter = list(filters) category_filter.append(dict(ruleset=segment['ruleset'], categories=[category])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by groups instead, same principle but we add group filters elif 'groups' in segment: for group_id in segment['groups']: # load our group group = ContactGroup.objects.get(is_active=True, org=org, pk=group_id) category_filter = list(filters) category_filter.append(dict(groups=[group_id])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories)) # segmenting by a location field elif 'location' in segment: # look up the contact field field = ContactField.objects.get(org=org, label__iexact=segment['location']) # make sure they are segmenting on a location type that makes sense if not field.value_type in [STATE, DISTRICT]: raise Exception(_("Cannot segment on location for field that is not a State or District type")) # make sure our org has a country for location based responses if not org.country: raise Exception(_("Cannot segment by location until country has been selected for organization")) # the boundaries we will segment by parent = org.country # figure out our parent parent_osm_id = segment.get('parent', None) if parent_osm_id: parent = AdminBoundary.objects.get(osm_id=parent_osm_id) # if the field is a district field, they need to specify the parent state if not parent_osm_id and field.value_type == DISTRICT: raise Exception(_("You must specify a parent state to segment results by district")) # now segment by all the children of this parent for boundary in AdminBoundary.objects.filter(parent=parent).order_by('name'): boundary_filter = list(filters) boundary_filter.append(dict(location=field.id, boundary=boundary.osm_id)) kwargs['filters'] = boundary_filter # calculate our results for this segment (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) else: (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) # Check we have and we have an OPEN ENDED ruleset if ruleset and len(ruleset.get_rules()) == 1 and isinstance(ruleset.get_rules()[0].test, TrueTest): cursor = connection.cursor() custom_sql = """ SELECT w.label, count(*) AS count FROM ( SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM msgs_msg WHERE id IN ( SELECT msg_id FROM flows_flowstep_messages, flows_flowstep WHERE flowstep_id = flows_flowstep.id AND flows_flowstep.step_uuid = '%s' ) ) w group by w.label order by count desc; """ % ruleset.uuid cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = [] ignore_words = get_stop_words('english') for category in unclean_categories: if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100: categories.append(dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories= sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append(dict(label=unicode(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # cache this result set r.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME) return results