def get_advanced_search_results(self, keywords_text, include_types): clauses = keywords_text.split(',') inclusive_keywords = [] exclusive_keywords = [] queryset = self.tweet_words.all() message_queryset = self.message_set.all() if (len(include_types) > 0): message_queryset = message_queryset.filter( utils.levels_or('type__name', map(lambda x: x.name, include_types))) final_queryset = self.message_set.none() for clause in clauses: if clause.startswith("NOT "): words = clause[4:].split(' ') word_list = utils.get_word_objs( queryset=queryset, text_field_name='original_text', related_field_name="tweet_words__id", words=words) if len(word_list) > 0: # TODO: makes this real AND #and_word_list = reduce(operator.or_, word_list) exclusive_keywords.extend(word_list) else: words = clause.split(' ') word_list = utils.get_word_objs( queryset=queryset, text_field_name='original_text', related_field_name="tweet_words__id", words=words) if len(word_list) > 0: #and_word_list = reduce(operator.and_, word_list) #inclusive_keywords.append(and_word_list) clause_queryset = message_queryset for or_word_list in word_list: clause_queryset = clause_queryset.filter(or_word_list) final_queryset |= clause_queryset queryset = final_queryset #if len(inclusive_keywords) > 0: # inclusive_keywords = reduce(operator.or_, inclusive_keywords) # queryset = queryset.filter(inclusive_keywords) if len(exclusive_keywords) > 0: for word in exclusive_keywords: queryset = queryset.exclude(word) return queryset.distinct()
def get_advanced_search_results(self, keywords_text, include_types): clauses = keywords_text.split(',') inclusive_keywords = [] exclusive_keywords = [] queryset = self.tweet_words.all() message_queryset = self.message_set.all() if (len(include_types) > 0): message_queryset = message_queryset.filter(utils.levels_or('type__name', map(lambda x: x.name, include_types))) final_queryset = self.message_set.none() for clause in clauses: if clause.startswith("NOT "): words = clause[4:].split(' ') word_list = utils.get_word_objs(queryset=queryset, text_field_name='original_text', related_field_name="tweet_words__id", words=words) if len(word_list) > 0: # TODO: makes this real AND #and_word_list = reduce(operator.or_, word_list) exclusive_keywords.extend(word_list) else: words = clause.split(' ') word_list = utils.get_word_objs(queryset=queryset, text_field_name='original_text', related_field_name="tweet_words__id", words=words) if len(word_list) > 0: #and_word_list = reduce(operator.and_, word_list) #inclusive_keywords.append(and_word_list) clause_queryset = message_queryset for or_word_list in word_list: clause_queryset = clause_queryset.filter(or_word_list) final_queryset |= clause_queryset queryset = final_queryset #if len(inclusive_keywords) > 0: # inclusive_keywords = reduce(operator.or_, inclusive_keywords) # queryset = queryset.filter(inclusive_keywords) if len(exclusive_keywords) > 0: for word in exclusive_keywords: queryset = queryset.exclude(word) return queryset.distinct()
def all_messages(self): queryset = self.dataset.get_message_set() queryset = queryset.filter( utils.levels_or("tweet_words__id", map(lambda x: x.id, self.related_features))) return queryset
def generate(self, dataset, filters=None, exclude=None, page_size=100, page=None, search_key=None, groups=None): """ Generate a complete data group table response. This includes 'table', which provides the non-zero message frequency for each combination of primary and secondary dimension values, respecting the filters. It also includes 'domains', which provides, for both primary and secondary dimensions, the levels of the dimension irrespective of filters (except on those actual dimensions). """ if (groups is None): queryset = dataset.message_set.all() # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time primary_filter = None secondary_filter = None if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter primary_exclude = None secondary_exclude = None if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False # Include the domains for primary and (secondary) dimensions domain, labels = self.domain(self.primary_dimension, unfiltered_queryset, primary_filter, primary_exclude) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key(domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domain)) else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] queryset_for_others = queryset queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.domain(self.secondary_dimension, unfiltered_queryset, secondary_filter, secondary_exclude) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter(utils.levels_or(self.secondary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels # Render a table table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others(queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) results = { 'table': table, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page else: domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False primary_filter = None secondary_filter = None primary_exclude = None secondary_exclude = None queryset = dataset.message_set.all() queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter queryset_all = queryset #queryset = corpus_models.Message.objects.none() group_querysets = [] group_labels = [] #message_list = set() for group in groups: group_obj = groups_models.Group.objects.get(id=group) if group_obj.order > 0: group_labels.append("#%d %s"%(group_obj.order, group_obj.name)) else: group_labels.append("%s"%(group_obj.name)) queryset = group_obj.messages # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) group_querysets.append(queryset) ######################################################################################################################### # deal with union distribution # This is due to union of queries in django does not work... # super ugly. Refactoring is required. # Include the domains for primary and (secondary) dimensions domain, labels = self.groups_domain(self.primary_dimension, queryset_all, group_querysets) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key(domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.groups_domain(self.secondary_dimension, queryset_all, group_querysets) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels ######################################################################################################################### group_tables = [] for queryset in group_querysets: queryset_for_others = queryset if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical(): queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) if self.secondary_dimension: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical(): if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter(utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) # Render a table if self.primary_dimension.key == "words": table = group_messages_by_words_with_raw_query(utils.quote(str(queryset.query)), fetchall_table) else: table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others(queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) group_tables.append(table) if self.secondary_dimension is None: final_table = [] for idx, group_table in enumerate(group_tables): for item in group_table: item['groups'] = groups[idx] final_table.extend(group_table) domains['groups'] = groups domain_labels['groups'] = group_labels results = { 'table': final_table, 'domains': domains, 'domain_labels': domain_labels } else: tables = [] for idx, group_table in enumerate(group_tables): tables.append({ 'group_id': groups[idx], 'group_name': group_labels[idx], 'table': group_table }) results = { 'tables': tables, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page return results
def render_others(self, queryset, domains, primary_flag, secondary_flag, desired_primary_bins=None, desired_secondary_bins=None): """ Given a set of messages (already filtered as necessary), calculate the data table. Optionally, a number of primary and secondary bins may be given. The result is a list of dictionaries. Each dictionary contains a key for each dimension and a value key for the count. """ # check if any of the dimensions is categorical if not primary_flag and not secondary_flag: return None if not self.secondary_dimension and self.primary_dimension.is_categorical() and primary_flag: # If there is only one dimension, we should be able to fall back # on that dimension's group_by() implementation. queryset = queryset.exclude(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) domains[self.primary_dimension.key].append(u'Other ' + self.primary_dimension.name) return [{self.primary_dimension.key: u'Other ' + self.primary_dimension.name, 'value': queryset.count()}] elif self.secondary_dimension: # both dimensions are categorical if self.primary_dimension.is_categorical() and self.secondary_dimension.is_categorical(): original_queryset = queryset others_results = [] if primary_flag: domains[self.primary_dimension.key].append(u'Other ' + self.primary_dimension.name) if secondary_flag: domains[self.secondary_dimension.key].append(u'Other ' + self.secondary_dimension.name) # primary others x secondary others if primary_flag and secondary_flag: queryset = queryset.exclude(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) queryset = queryset.exclude(utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) others_results.append({self.primary_dimension.key: u'Other ' + self.primary_dimension.name, self.secondary_dimension.key: u'Other ' + self.secondary_dimension.name, 'value': queryset.count()}) # primary top ones x secondary others if secondary_flag: queryset = original_queryset queryset = queryset.filter(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) queryset = queryset.exclude(utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) queryset = self.primary_dimension.group_by(queryset, grouping_key=self.primary_dimension.key) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.secondary_dimension.key] = u'Other ' + self.secondary_dimension.name others_results.extend(results) # primary others x secondary top ones if primary_flag: queryset = original_queryset queryset = queryset.exclude(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) queryset = queryset.filter(utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) queryset = self.secondary_dimension.group_by(queryset, grouping_key=self.secondary_dimension.key) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.primary_dimension.key] = u'Other ' + self.primary_dimension.name others_results.extend(results) return others_results # primary categorical and secondary quantitative elif self.primary_dimension.is_categorical() and primary_flag and not self.secondary_dimension.is_categorical(): queryset = queryset.exclude(utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) domains[self.primary_dimension.key].append(u'Other ' + self.primary_dimension.name) queryset = self.secondary_dimension.group_by(queryset, grouping_key=self.secondary_dimension.key, bins=desired_secondary_bins) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.primary_dimension.key] = u'Other ' + self.primary_dimension.name return results # primary quantitative and secondary categorical elif not self.primary_dimension.is_categorical() and self.secondary_dimension.is_categorical() and secondary_flag: queryset = queryset.exclude(utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) domains[self.secondary_dimension.key].append(u'Other ' + self.secondary_dimension.name) queryset = self.primary_dimension.group_by(queryset, grouping_key=self.primary_dimension.key, bins=desired_primary_bins) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.secondary_dimension.key] = u'Other ' + self.secondary_dimension.name return results
def generate(self, dataset, filters=None, exclude=None, page_size=100, page=None, search_key=None, groups=None): """ Generate a complete data group table response. This includes 'table', which provides the non-zero message frequency for each combination of primary and secondary dimension values, respecting the filters. It also includes 'domains', which provides, for both primary and secondary dimensions, the levels of the dimension irrespective of filters (except on those actual dimensions). """ if (groups is None): queryset = dataset.message_set.all() # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time primary_filter = None secondary_filter = None if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter primary_exclude = None secondary_exclude = None if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False # Include the domains for primary and (secondary) dimensions domain, labels = self.domain(self.primary_dimension, unfiltered_queryset, primary_filter, primary_exclude) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key( domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domain)) else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] queryset_for_others = queryset queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.domain(self.secondary_dimension, unfiltered_queryset, secondary_filter, secondary_exclude) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter( utils.levels_or(self.secondary_dimension.field_name, domain)) if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels # Render a table table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others(queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) results = { 'table': table, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page else: domains = {} domain_labels = {} max_page = None queryset_for_others = None # flag is true if the dimension is categorical and has more than MAX_CATEGORICAL_LEVELS levels primary_flag = False secondary_flag = False primary_filter = None secondary_filter = None primary_exclude = None secondary_exclude = None queryset = dataset.message_set.all() queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter(time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if dimension == self.primary_dimension: primary_filter = filter if dimension == self.secondary_dimension: secondary_filter = filter if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) if dimension == self.primary_dimension: primary_exclude = exclude_filter if dimension == self.secondary_dimension: secondary_exclude = exclude_filter queryset_all = queryset #queryset = corpus_models.Message.objects.none() group_querysets = [] group_labels = [] #message_list = set() for group in groups: group_obj = groups_models.Group.objects.get(id=group) if group_obj.order > 0: group_labels.append("#%d %s" % (group_obj.order, group_obj.name)) else: group_labels.append("%s" % (group_obj.name)) queryset = group_obj.messages # Filter out null time queryset = queryset.exclude(time__isnull=True) if dataset.start_time and dataset.end_time: range = dataset.end_time - dataset.start_time buffer = timedelta(seconds=range.total_seconds() * 0.1) queryset = queryset.filter( time__gte=dataset.start_time - buffer, time__lte=dataset.end_time + buffer) unfiltered_queryset = queryset # Filter the data (look for filters on the primary/secondary dimensions at the same time if filters is not None: for filter in filters: dimension = filter['dimension'] queryset = dimension.filter(queryset, **filter) if exclude is not None: for exclude_filter in exclude: dimension = exclude_filter['dimension'] queryset = dimension.exclude(queryset, **exclude_filter) group_querysets.append(queryset) ######################################################################################################################### # deal with union distribution # This is due to union of queries in django does not work... # super ugly. Refactoring is required. # Include the domains for primary and (secondary) dimensions domain, labels = self.groups_domain(self.primary_dimension, queryset_all, group_querysets) # paging the first dimension, this is for the filter distribution if primary_filter is None and self.secondary_dimension is None and page is not None: if search_key is not None: domain, labels = self.filter_search_key( domain, labels, search_key) start = (page - 1) * page_size end = min(start + page_size, len(domain)) max_page = (len(domain) / page_size) + 1 # no level left if len(domain) == 0 or start > len(domain): return None domain = domain[start:end] if labels is not None: labels = labels[start:end] else: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical() and len(domain) > MAX_CATEGORICAL_LEVELS: primary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.primary_dimension.key] = domain if labels is not None: domain_labels[self.primary_dimension.key] = labels if self.secondary_dimension: domain, labels = self.groups_domain(self.secondary_dimension, queryset_all, group_querysets) if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical() and \ len(domain) > MAX_CATEGORICAL_LEVELS: secondary_flag = True domain = domain[:MAX_CATEGORICAL_LEVELS] if labels is not None: labels = labels[:MAX_CATEGORICAL_LEVELS] domains[self.secondary_dimension.key] = domain if labels is not None: domain_labels[self.secondary_dimension.key] = labels ######################################################################################################################### group_tables = [] for queryset in group_querysets: queryset_for_others = queryset if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.primary_dimension.is_categorical(): queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) if self.secondary_dimension: if (self.mode == 'enable_others' or self.mode == 'omit_others') and \ self.secondary_dimension.is_categorical(): if queryset_for_others is None: queryset_for_others = queryset queryset = queryset.filter( utils.levels_or( self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) # Render a table if self.primary_dimension.key == "words": table = group_messages_by_words_with_raw_query( utils.quote(str(queryset.query)), fetchall_table) else: table = self.render(queryset) if self.mode == "enable_others" and queryset_for_others is not None: # adding others to the results table_for_others = self.render_others( queryset_for_others, domains, primary_flag, secondary_flag) table = list(table) table.extend(table_for_others) group_tables.append(table) if self.secondary_dimension is None: final_table = [] for idx, group_table in enumerate(group_tables): for item in group_table: item['groups'] = groups[idx] final_table.extend(group_table) domains['groups'] = groups domain_labels['groups'] = group_labels results = { 'table': final_table, 'domains': domains, 'domain_labels': domain_labels } else: tables = [] for idx, group_table in enumerate(group_tables): tables.append({ 'group_id': groups[idx], 'group_name': group_labels[idx], 'table': group_table }) results = { 'tables': tables, 'domains': domains, 'domain_labels': domain_labels } if max_page is not None: results['max_page'] = max_page return results
def render_others(self, queryset, domains, primary_flag, secondary_flag, desired_primary_bins=None, desired_secondary_bins=None): """ Given a set of messages (already filtered as necessary), calculate the data table. Optionally, a number of primary and secondary bins may be given. The result is a list of dictionaries. Each dictionary contains a key for each dimension and a value key for the count. """ # check if any of the dimensions is categorical if not primary_flag and not secondary_flag: return None if not self.secondary_dimension and self.primary_dimension.is_categorical( ) and primary_flag: # If there is only one dimension, we should be able to fall back # on that dimension's group_by() implementation. queryset = queryset.exclude( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) domains[self.primary_dimension.key].append( u'Other ' + self.primary_dimension.name) return [{ self.primary_dimension.key: u'Other ' + self.primary_dimension.name, 'value': queryset.count() }] elif self.secondary_dimension: # both dimensions are categorical if self.primary_dimension.is_categorical( ) and self.secondary_dimension.is_categorical(): original_queryset = queryset others_results = [] if primary_flag: domains[self.primary_dimension.key].append( u'Other ' + self.primary_dimension.name) if secondary_flag: domains[self.secondary_dimension.key].append( u'Other ' + self.secondary_dimension.name) # primary others x secondary others if primary_flag and secondary_flag: queryset = queryset.exclude( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) queryset = queryset.exclude( utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) others_results.append({ self.primary_dimension.key: u'Other ' + self.primary_dimension.name, self.secondary_dimension.key: u'Other ' + self.secondary_dimension.name, 'value': queryset.count() }) # primary top ones x secondary others if secondary_flag: queryset = original_queryset queryset = queryset.filter( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) queryset = queryset.exclude( utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) queryset = self.primary_dimension.group_by( queryset, grouping_key=self.primary_dimension.key) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.secondary_dimension. key] = u'Other ' + self.secondary_dimension.name others_results.extend(results) # primary others x secondary top ones if primary_flag: queryset = original_queryset queryset = queryset.exclude( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) queryset = queryset.filter( utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) queryset = self.secondary_dimension.group_by( queryset, grouping_key=self.secondary_dimension.key) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.primary_dimension. key] = u'Other ' + self.primary_dimension.name others_results.extend(results) return others_results # primary categorical and secondary quantitative elif self.primary_dimension.is_categorical( ) and primary_flag and not self.secondary_dimension.is_categorical( ): queryset = queryset.exclude( utils.levels_or(self.primary_dimension.field_name, domains[self.primary_dimension.key])) domains[self.primary_dimension.key].append( u'Other ' + self.primary_dimension.name) queryset = self.secondary_dimension.group_by( queryset, grouping_key=self.secondary_dimension.key, bins=desired_secondary_bins) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.primary_dimension. key] = u'Other ' + self.primary_dimension.name return results # primary quantitative and secondary categorical elif not self.primary_dimension.is_categorical( ) and self.secondary_dimension.is_categorical() and secondary_flag: queryset = queryset.exclude( utils.levels_or(self.secondary_dimension.field_name, domains[self.secondary_dimension.key])) domains[self.secondary_dimension.key].append( u'Other ' + self.secondary_dimension.name) queryset = self.primary_dimension.group_by( queryset, grouping_key=self.primary_dimension.key, bins=desired_primary_bins) queryset = queryset.annotate(value=models.Count('id')) results = list(queryset) for r in results: r[self.secondary_dimension. key] = u'Other ' + self.secondary_dimension.name return results
def all_messages(self): queryset = self.dataset.message_set.all() queryset = queryset.filter(utils.levels_or("tweet_words__id", map(lambda x: x.id, self.related_words))) return queryset