def pre_save(self, task): extra_fields = [] cleaned_data = self.form.cleaned_data # enumerate the columns which the user has chosen to include as fields for column in self.column_controls: if cleaned_data[column['include_field']]: label = cleaned_data[column['label_field']] label = label.strip() value_type = cleaned_data[column['type_field']] org = self.derive_org() field_key = slugify_with(label) existing_field = ContactField.get_by_label(org, label) if existing_field: field_key = existing_field.key extra_fields.append(dict(key=field_key, header=column['header'], label=label, type=value_type)) # update the extra_fields in the task's params params = json.loads(task.import_params) params['extra_fields'] = extra_fields task.import_params = json.dumps(params) return task
def save(self): """ Update our contact """ name = self.validated_data.get('name') fields = self.validated_data.get('fields') language = self.validated_data.get('language') # treat empty names as None if not name: name = None changed = [] if self.instance: if self.parsed_urns is not None: self.instance.update_urns(self.user, self.parsed_urns) # update our name and language if name != self.instance.name: self.instance.name = name changed.append('name') else: self.instance = Contact.get_or_create_by_urns(self.org, self.user, name, urns=self.parsed_urns, language=language, force_urn_update=True) # Contact.get_or_create doesn't nullify language so do that here if 'language' in self.validated_data and language is None: self.instance.language = language.lower() if language else None self.instance.save() # save our contact if it changed if changed: self.instance.save(update_fields=changed) # update our fields if fields is not None: for key, value in fields.items(): existing_by_key = ContactField.objects.filter(org=self.org, key__iexact=key, is_active=True).first() if existing_by_key: self.instance.set_field(self.user, existing_by_key.key, value) continue elif self.new_fields and key in self.new_fields: new_field = ContactField.get_or_create(org=self.org, user=self.user, key=regex.sub('[^A-Za-z0-9]+', '_', key).lower(), label=key) self.instance.set_field(self.user, new_field.key, value) # TODO as above, need to get users to stop updating via label existing_by_label = ContactField.get_by_label(self.org, key) if existing_by_label: self.instance.set_field(self.user, existing_by_label.key, value) # update our contact's groups if self.group_objs is not None: self.instance.update_static_groups(self.user, self.group_objs) return self.instance
def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None): """ Returns the results for the passed in ruleset or contact field given the passed in filters and segments. Filters are expected in the following formats: { field: rulesetId, categories: ["Red", "Blue", "Yellow"] } Segments are expected in these formats instead: { ruleset: 1515, categories: ["Red", "Blue"] } // segmenting by another field, for those categories { groups: 124,151,151 } // segment by each each group in the passed in ids { location: "State", parent: null } // segment for each admin boundary within the parent { contact_field: "Country", values: ["US", "EN", "RW"] } // segment by a contact field for these values """ from temba.contacts.models import ContactGroup, ContactField from temba.flows.models import TrueTest, RuleSet # start = time.time() results = [] if (not ruleset and not contact_field) or ( ruleset and contact_field): # pragma: needs cover raise ValueError("Must specify either a RuleSet or Contact field.") org = ruleset.flow.org if ruleset else contact_field.org open_ended = ruleset and ruleset.ruleset_type == RuleSet.TYPE_WAIT_MESSAGE and len( ruleset.get_rules()) == 1 # default our filters to an empty list if None are passed in if filters is None: filters = [] # build the kwargs for our subcall kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters) # this is our list of dependencies, that is things that will blow away our results dependencies = set() fingerprint_dict = dict(filters=filters, segment=segment) if ruleset: fingerprint_dict['ruleset'] = ruleset.id dependencies.add(RULESET_KEY % ruleset.id) if contact_field: fingerprint_dict['contact_field'] = contact_field.id dependencies.add(CONTACT_KEY % contact_field.id) for contact_filter in filters: if 'ruleset' in contact_filter: dependencies.add(RULESET_KEY % contact_filter['ruleset']) if 'groups' in contact_filter: for group_id in contact_filter['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in contact_filter: # pragma: needs cover field = ContactField.get_by_label(org, contact_filter['location']) dependencies.add(CONTACT_KEY % field.id) if segment: if 'ruleset' in segment: dependencies.add(RULESET_KEY % segment['ruleset']) if 'groups' in segment: # pragma: needs cover for group_id in segment['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in segment: field = ContactField.get_by_label(org, segment['location']) dependencies.add(CONTACT_KEY % field.id) # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the # kwargs passed to this method, generate that hash fingerprint = hash(dict_to_json(fingerprint_dict)) # generate our key key = VALUE_SUMMARY_CACHE_KEY + ":" + str(org.id) + ":".join( sorted(list(dependencies))) + ":" + str(fingerprint) # does our value exist? r = get_redis_connection() cached = r.get(key) if cached is not None: try: return json_to_dict(cached) except Exception: # pragma: needs cover # failed decoding, oh well, go calculate it instead pass if segment: # segmenting a result is the same as calculating the result with the addition of each # category as a filter so we expand upon the passed in filters to do this if 'ruleset' in segment and 'categories' in segment: for category in segment['categories']: category_filter = list(filters) category_filter.append( dict(ruleset=segment['ruleset'], categories=[category])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by groups instead, same principle but we add group filters elif 'groups' in segment: # pragma: needs cover for group_id in segment['groups']: # load our group group = ContactGroup.user_groups.get(org=org, pk=group_id) category_filter = list(filters) category_filter.append(dict(groups=[group_id])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories)) # segmenting by a contact field, only for passed in categories elif 'contact_field' in segment and 'values' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['contact_field']) for value in segment['values']: value_filter = list(filters) value_filter.append( dict(contact_field=field.pk, values=[value])) # calculate our results for this segment kwargs['filters'] = value_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append( dict(label=value, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by a location field elif 'location' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['location']) # make sure they are segmenting on a location type that makes sense if field.value_type not in [ Value.TYPE_STATE, Value.TYPE_DISTRICT, Value.TYPE_WARD ]: # pragma: needs cover raise ValueError( _("Cannot segment on location for field that is not a State or District type" )) # make sure our org has a country for location based responses if not org.country: # pragma: needs cover raise ValueError( _("Cannot segment by location until country has been selected for organization" )) # the boundaries we will segment by parent = org.country # figure out our parent parent_osm_id = segment.get('parent', None) if parent_osm_id: parent = AdminBoundary.objects.get(osm_id=parent_osm_id) # get all the boundaries we are segmenting on boundaries = list( AdminBoundary.objects.filter( parent=parent).order_by('name')) # if the field is a district field, they need to specify the parent state if not parent_osm_id and field.value_type == Value.TYPE_DISTRICT: # pragma: needs cover raise ValueError( _("You must specify a parent state to segment results by district" )) if not parent_osm_id and field.value_type == Value.TYPE_WARD: # pragma: needs cover raise ValueError( _("You must specify a parent state to segment results by ward" )) # if this is a district, we can speed things up by only including those districts in our parent, build # the filter for that if parent and field.value_type in [ Value.TYPE_DISTRICT, Value.TYPE_WARD ]: location_filters = [ filters, dict(location=field.pk, boundary=[b.osm_id for b in boundaries]) ] else: location_filters = filters # get all the contacts segment by location first (location_set_contacts, location_unset_contacts, location_results) = \ cls.get_filtered_value_summary(contact_field=field, filters=location_filters, return_contacts=True) # now get the contacts for our primary query kwargs['return_contacts'] = True kwargs['filter_contacts'] = location_set_contacts (primary_set_contacts, primary_unset_contacts, primary_results) = cls.get_filtered_value_summary(**kwargs) # build a map of osm_id to location_result osm_results = {lr['label']: lr for lr in location_results} empty_result = dict(contacts=list()) for boundary in boundaries: location_result = osm_results.get(boundary.osm_id, empty_result) # clone our primary results segmented_results = dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended) location_categories = list() location_contacts = set(location_result['contacts']) for category in primary_results: category_contacts = set(category['contacts']) intersection = location_contacts & category_contacts location_categories.append( dict(label=category['label'], count=len(intersection))) segmented_results['set'] = len(location_contacts & primary_set_contacts) segmented_results['unset'] = len(location_contacts & primary_unset_contacts) segmented_results['categories'] = location_categories results.append(segmented_results) results = sorted(results, key=lambda r: r['label']) else: (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) # Check we have and we have an OPEN ENDED ruleset if ruleset and len(ruleset.get_rules()) == 1 and isinstance( ruleset.get_rules()[0].test, TrueTest): cursor = connection.cursor() custom_sql = """SELECT w.label, count(*) AS count FROM ( SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM msgs_msg INNER JOIN contacts_contact ON ( msgs_msg.contact_id = contacts_contact.id ) WHERE msgs_msg.id IN ( SELECT msg_id FROM flows_flowstep_messages, flows_flowstep WHERE flowstep_id = flows_flowstep.id AND flows_flowstep.step_uuid = '%s' ) AND contacts_contact.is_test = False ) w group by w.label order by count desc;""" % ruleset.uuid cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = [] org_languages = [ lang.name.lower() for lang in org.languages.filter(orgs=None).distinct() ] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) for category in unclean_categories: if len(category['label']) > 1 and category[ 'label'] not in ignore_words and len( categories) < 100: categories.append( dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append( dict(label=six.text_type(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # for each of our dependencies, add our key as something that depends on it pipe = r.pipeline() for dependency in dependencies: pipe.sadd(dependency, key) pipe.expire(dependency, VALUE_SUMMARY_CACHE_TIME) # and finally set our result pipe.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME) pipe.execute() # leave me: nice for profiling.. # from django.db import connection as db_connection, reset_queries # print "=" * 80 # for query in db_connection.queries: # print "%s - %s" % (query['time'], query['sql'][:1000]) # print "-" * 80 # print "took: %f" % (time.time() - start) # print "=" * 80 # reset_queries() return results
def get_value_summary(cls, ruleset=None, contact_field=None, filters=None, segment=None): """ Returns the results for the passed in ruleset or contact field given the passed in filters and segments. Filters are expected in the following formats: { field: rulesetId, categories: ["Red", "Blue", "Yellow"] } Segments are expected in these formats instead: { ruleset: 1515, categories: ["Red", "Blue"] } // segmenting by another field, for those categories { groups: 124,151,151 } // segment by each each group in the passed in ids { location: "State", parent: null } // segment for each admin boundary within the parent { contact_field: "Country", values: ["US", "EN", "RW"] } // segment by a contact field for these values """ from temba.contacts.models import ContactGroup, ContactField from temba.flows.models import TrueTest, RuleSet start = time.time() results = [] if (not ruleset and not contact_field) or (ruleset and contact_field): raise ValueError("Must specify either a RuleSet or Contact field.") org = ruleset.flow.org if ruleset else contact_field.org open_ended = ruleset and ruleset.ruleset_type == RuleSet.TYPE_WAIT_MESSAGE and len(ruleset.get_rules()) == 1 # default our filters to an empty list if None are passed in if filters is None: filters = [] # build the kwargs for our subcall kwargs = dict(ruleset=ruleset, contact_field=contact_field, filters=filters) # this is our list of dependencies, that is things that will blow away our results dependencies = set() fingerprint_dict = dict(filters=filters, segment=segment) if ruleset: fingerprint_dict['ruleset'] = ruleset.id dependencies.add(RULESET_KEY % ruleset.id) if contact_field: fingerprint_dict['contact_field'] = contact_field.id dependencies.add(CONTACT_KEY % contact_field.id) for contact_filter in filters: if 'ruleset' in contact_filter: dependencies.add(RULESET_KEY % contact_filter['ruleset']) if 'groups' in contact_filter: for group_id in contact_filter['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in contact_filter: field = ContactField.get_by_label(org, contact_filter['location']) dependencies.add(CONTACT_KEY % field.id) if segment: if 'ruleset' in segment: dependencies.add(RULESET_KEY % segment['ruleset']) if 'groups' in segment: for group_id in segment['groups']: dependencies.add(GROUP_KEY % group_id) if 'location' in segment: field = ContactField.get_by_label(org, segment['location']) dependencies.add(CONTACT_KEY % field.id) # our final redis key will contain each dependency as well as a HASH representing the fingerprint of the # kwargs passed to this method, generate that hash fingerprint = hash(dict_to_json(fingerprint_dict)) # generate our key key = VALUE_SUMMARY_CACHE_KEY + ":" + str(org.id) + ":".join(sorted(list(dependencies))) + ":" + str(fingerprint) # does our value exist? r = get_redis_connection() cached = r.get(key) if cached is not None: try: return json_to_dict(cached) except Exception: # failed decoding, oh well, go calculate it instead pass if segment: # segmenting a result is the same as calculating the result with the addition of each # category as a filter so we expand upon the passed in filters to do this if 'ruleset' in segment and 'categories' in segment: for category in segment['categories']: category_filter = list(filters) category_filter.append(dict(ruleset=segment['ruleset'], categories=[category])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=category, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by groups instead, same principle but we add group filters elif 'groups' in segment: for group_id in segment['groups']: # load our group group = ContactGroup.user_groups.get(org=org, pk=group_id) category_filter = list(filters) category_filter.append(dict(groups=[group_id])) # calculate our results for this segment kwargs['filters'] = category_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=group.name, open_ended=open_ended, set=set_count, unset_count=unset_count, categories=categories)) # segmenting by a contact field, only for passed in categories elif 'contact_field' in segment and 'values' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['contact_field']) for value in segment['values']: value_filter = list(filters) value_filter.append(dict(contact_field=field.pk, values=[value])) # calculate our results for this segment kwargs['filters'] = value_filter (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) results.append(dict(label=value, open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # segmenting by a location field elif 'location' in segment: # look up the contact field field = ContactField.get_by_label(org, segment['location']) # make sure they are segmenting on a location type that makes sense if field.value_type not in [Value.TYPE_STATE, Value.TYPE_DISTRICT, Value.TYPE_WARD]: raise ValueError(_("Cannot segment on location for field that is not a State or District type")) # make sure our org has a country for location based responses if not org.country: raise ValueError(_("Cannot segment by location until country has been selected for organization")) # the boundaries we will segment by parent = org.country # figure out our parent parent_osm_id = segment.get('parent', None) if parent_osm_id: parent = AdminBoundary.objects.get(osm_id=parent_osm_id) # get all the boundaries we are segmenting on boundaries = list(AdminBoundary.objects.filter(parent=parent).order_by('name')) # if the field is a district field, they need to specify the parent state if not parent_osm_id and field.value_type == Value.TYPE_DISTRICT: raise ValueError(_("You must specify a parent state to segment results by district")) if not parent_osm_id and field.value_type == Value.TYPE_WARD: raise ValueError(_("You must specify a parent state to segment results by ward")) # if this is a district, we can speed things up by only including those districts in our parent, build # the filter for that if parent and field.value_type in [Value.TYPE_DISTRICT, Value.TYPE_WARD]: location_filters = [filters, dict(location=field.pk, boundary=[b.osm_id for b in boundaries])] else: location_filters = filters # get all the contacts segment by location first (location_set_contacts, location_unset_contacts, location_results) = \ cls.get_filtered_value_summary(contact_field=field, filters=location_filters, return_contacts=True) # now get the contacts for our primary query kwargs['return_contacts'] = True kwargs['filter_contacts'] = location_set_contacts (primary_set_contacts, primary_unset_contacts, primary_results) = cls.get_filtered_value_summary(**kwargs) # build a map of osm_id to location_result osm_results = {lr['label']: lr for lr in location_results} empty_result = dict(contacts=list()) for boundary in boundaries: location_result = osm_results.get(boundary.osm_id, empty_result) # clone our primary results segmented_results = dict(label=boundary.name, boundary=boundary.osm_id, open_ended=open_ended) location_categories = list() location_contacts = set(location_result['contacts']) for category in primary_results: category_contacts = set(category['contacts']) intersection = location_contacts & category_contacts location_categories.append(dict(label=category['label'], count=len(intersection))) segmented_results['set'] = len(location_contacts & primary_set_contacts) segmented_results['unset'] = len(location_contacts & primary_unset_contacts) segmented_results['categories'] = location_categories results.append(segmented_results) results = sorted(results, key=lambda r: r['label']) else: (set_count, unset_count, categories) = cls.get_filtered_value_summary(**kwargs) # Check we have and we have an OPEN ENDED ruleset if ruleset and len(ruleset.get_rules()) == 1 and isinstance(ruleset.get_rules()[0].test, TrueTest): cursor = connection.cursor() custom_sql = """SELECT w.label, count(*) AS count FROM ( SELECT regexp_split_to_table(LOWER(text), E'[^[:alnum:]_]') AS label FROM msgs_msg INNER JOIN contacts_contact ON ( msgs_msg.contact_id = contacts_contact.id ) WHERE msgs_msg.id IN ( SELECT msg_id FROM flows_flowstep_messages, flows_flowstep WHERE flowstep_id = flows_flowstep.id AND flows_flowstep.step_uuid = '%s' ) AND contacts_contact.is_test = False ) w group by w.label order by count desc;""" % ruleset.uuid cursor.execute(custom_sql) unclean_categories = get_dict_from_cursor(cursor) categories = [] org_languages = [lang.name.lower() for lang in org.languages.filter(orgs=None).distinct()] if 'english' not in org_languages: org_languages.append('english') ignore_words = [] for lang in org_languages: ignore_words += safe_get_stop_words(lang) for category in unclean_categories: if len(category['label']) > 1 and category['label'] not in ignore_words and len(categories) < 100: categories.append(dict(label=category['label'], count=int(category['count']))) # sort by count, then alphabetically categories = sorted(categories, key=lambda c: (-c['count'], c['label'])) results.append(dict(label=unicode(_("All")), open_ended=open_ended, set=set_count, unset=unset_count, categories=categories)) # for each of our dependencies, add our key as something that depends on it pipe = r.pipeline() for dependency in dependencies: pipe.sadd(dependency, key) pipe.expire(dependency, VALUE_SUMMARY_CACHE_TIME) # and finally set our result pipe.set(key, dict_to_json(results), VALUE_SUMMARY_CACHE_TIME) pipe.execute() # leave me: nice for profiling.. #from django.db import connection as db_connection, reset_queries #print "=" * 80 #for query in db_connection.queries: # print "%s - %s" % (query['time'], query['sql'][:1000]) #print "-" * 80 #print "took: %f" % (time.time() - start) #print "=" * 80 #reset_queries() return results
def save(self): """ Update our contact """ name = self.validated_data.get("name") fields = self.validated_data.get("fields") language = self.validated_data.get("language") # treat empty names as None if not name: name = None changed = [] if self.instance: if self.parsed_urns is not None: self.instance.update_urns(self.user, self.parsed_urns) # update our name and language if name != self.instance.name: self.instance.name = name changed.append("name") else: self.instance = Contact.get_or_create_by_urns( self.org, self.user, name, urns=self.parsed_urns, language=language, force_urn_update=True ) # Contact.get_or_create doesn't nullify language so do that here if "language" in self.validated_data and language is None: self.instance.language = language.lower() if language else None changed.append("language") # save our contact if it changed if changed: self.instance.save(update_fields=changed, handle_update=True) # update our fields if fields is not None: for key, value in fields.items(): existing_by_key = ContactField.user_fields.filter( org=self.org, key__iexact=key, is_active=True ).first() if existing_by_key: self.instance.set_field(self.user, existing_by_key.key, value) continue elif self.new_fields and key in self.new_fields: new_field = ContactField.get_or_create( org=self.org, user=self.user, key=regex.sub("[^A-Za-z0-9]+", "_", key).lower(), label=key ) self.instance.set_field(self.user, new_field.key, value) # TODO as above, need to get users to stop updating via label existing_by_label = ContactField.get_by_label(self.org, key) if existing_by_label: self.instance.set_field(self.user, existing_by_label.key, value) # update our contact's groups if self.group_objs is not None: self.instance.update_static_groups(self.user, self.group_objs) return self.instance