def get_unique_terms(self, input_data): """Parses the input data to find the list of unique terms in the tag cloud """ unique_terms = {} for field_id in self.term_forms: if field_id in input_data: input_data_field = input_data.get(field_id, '') if isinstance(input_data_field, basestring): case_sensitive = self.term_analysis[field_id].get( 'case_sensitive', True) token_mode = self.term_analysis[field_id].get( 'token_mode', 'all') if token_mode != TM_FULL_TERM: terms = parse_terms(input_data_field, case_sensitive=case_sensitive) else: terms = [] full_term = input_data_field if case_sensitive \ else input_data_field.lower() # We add full_term if needed. Note that when there's # only one term in the input_data, full_term and term are # equal. Then full_term will not be added to avoid # duplicated counters for the term. if token_mode == TM_FULL_TERM or \ (token_mode == TM_ALL and terms[0] != full_term): terms.append(full_term) unique_terms[field_id] = get_unique_terms( terms, self.term_forms[field_id], self.tag_clouds.get(field_id, [])) else: unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] # the same for items fields for field_id in self.item_analysis: if field_id in input_data: input_data_field = input_data.get(field_id, '') if isinstance(input_data_field, basestring): # parsing the items in input_data separator = self.item_analysis[field_id].get( 'separator', ' ') regexp = self.item_analysis[field_id].get( 'separator_regexp') if regexp is None: regexp = ur'%s' % re.escape(separator) terms = parse_items(input_data_field, regexp) unique_terms[field_id] = get_unique_terms( terms, {}, self.items.get(field_id, [])) else: unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] for field_id in self.categories: if field_id in input_data: input_data_field = input_data.get(field_id, '') unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] return unique_terms
def get_unique_terms(self, input_data): """Parses the input data to find the list of unique terms in the tag cloud """ unique_terms = {} for field_id in self.term_forms: if field_id in input_data: input_data_field = input_data.get(field_id, '') if isinstance(input_data_field, basestring): case_sensitive = self.term_analysis[field_id].get( 'case_sensitive', True) token_mode = self.term_analysis[field_id].get( 'token_mode', 'all') if token_mode != TM_FULL_TERM: terms = parse_terms(input_data_field, case_sensitive=case_sensitive) else: terms = [] if token_mode != TM_TOKENS: terms.append( input_data_field if case_sensitive else input_data_field.lower()) unique_terms[field_id] = get_unique_terms( terms, self.term_forms[field_id], self.tag_clouds.get(field_id, [])) else: unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] # the same for items fields for field_id in self.item_analysis: if field_id in input_data: input_data_field = input_data.get(field_id, '') if isinstance(input_data_field, basestring): # parsing the items in input_data separator = self.item_analysis[field_id].get( 'separator', ' ') regexp = self.item_analysis[field_id].get( 'separator_regexp') if regexp is None: regexp = ur'%s' % re.escape(separator) terms = parse_items(input_data_field, regexp) unique_terms[field_id] = get_unique_terms( terms, {}, self.items.get(field_id, [])) else: unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] for field_id in self.categories: if field_id in input_data: input_data_field = input_data.get(field_id, '') unique_terms[field_id] = [(input_data_field, 1)] del input_data[field_id] return unique_terms