def _make_start_format(tag_uri, tag_name, attributes, encoding): # We must search for translatable attributes result = [(u'<%s' % get_qname(tag_uri, tag_name), False, None)] for attr_uri, attr_name in attributes: qname = get_attribute_qname(attr_uri, attr_name) qname = Unicode.decode(qname, encoding=encoding) value = attributes[(attr_uri, attr_name)] value = Unicode.decode(value, encoding=encoding) value = XMLAttribute.encode(value) datatype = get_attr_datatype(tag_uri, tag_name, attr_uri, attr_name, attributes) if issubclass(datatype, Unicode): result[-1] = (result[-1][0] + u' %s="' % qname, False, None) context = _get_attr_context(datatype, tag_name, attr_name) result.append((value, True, context)) result.append((u'"', False, None)) else: result[-1] = (result[-1][0] + u' %s="%s"' % (qname, value), False, None) # Close the start tag if is_empty(tag_uri, tag_name): result[-1] = (result[-1][0] + u'/>', False, None) else: result[-1] = (result[-1][0] + u'>', False, None) return result
def normalize(data): """ Normalize data http://www.w3.org/TR/html401/struct/text.html#h-9.1 collapse input white space sequences when producing output inter-word space. """ # decode the data data = Unicode.decode(data, encoding) return ' '.join(data.split())
def decode(cls, data): # Neither upper() nor lower() to preserve enumerates value = Unicode.decode(data.strip()) # Allow single "=" as equals value = single_eq.sub(ur"==", value) value = (value # Alternative to name variables .replace(u'#', u'') # Non-break spaces .replace(u'\u00a0', u'') # F*****g replacement .replace(u'«', u'"').replace(u'»', u'"')) return value
def join_content(data): data = ''.join(data) data = Unicode.decode(data, encoding) return data
def test_Unicode(self): x = u'العربيه 中文 Español Français' data = Unicode.encode(x) self.assertEqual(x, Unicode.decode(data))
def decode(cls, data): value = Unicode.decode(data) return { #'name': checkid(value) or '', 'name': value, 'value': value}
def to_text(self): result = {} site_root = self.get_site_root() languages = site_root.get_property('website_languages') product_model = self.get_product_model() schema = {} if product_model: schema = product_model.get_model_schema() purchase_options_schema = self.get_purchase_options_schema() declinations = list(self.search_resources(cls=Declination)) for language in languages: texts = result.setdefault(language, []) for key in ('title', 'description'): value = self.get_property(key, language=language) if value: texts.append(value) # Parent category current_category = self.parent while current_category.class_id == 'category': texts.append(current_category.get_title(language=language)) current_category = current_category.parent # data (html) events = self.get_property('data', language=language) if events: text = [ unicode(value, 'utf-8') for event, value, line in events if event == TEXT ] if text: texts.append(u' '.join(text)) # Dynamic properties for key, datatype in schema.iteritems(): value = self.get_property(key) if value: text = None multiple = datatype.multiple if issubclass(datatype, Unicode): if multiple: text = ' '.join([ x for x in value ]) else: text = value elif issubclass(datatype, String): if multiple: text = ' '.join([ Unicode.decode(x) for x in value ]) else: text = Unicode.decode(value) elif issubclass(datatype, Enumerate): values = value if multiple is False: values = [value] # XXX use multilingual label text = ' '.join(values) if text: texts.append(text) # Manufacturer manufacturer = self.get_property('manufacturer') if manufacturer: manufacturer = site_root.get_resource(manufacturer) texts.append(manufacturer.get_title()) # Purchase options for declination in declinations: for key, datatype in purchase_options_schema.iteritems(): name = declination.get_property(key) value = datatype.to_text(name, languages) if value: texts.append(value) # Join for language, texts in result.iteritems(): result[language] = u'\n'.join(texts) return result
def to_text(self): result = {} site_root = self.get_site_root() languages = site_root.get_property('website_languages') product_model = self.get_product_model() schema = {} if product_model: schema = product_model.get_model_schema() purchase_options_schema = self.get_purchase_options_schema() declinations = list(self.search_resources(cls=Declination)) for language in languages: texts = result.setdefault(language, []) for key in ('title', 'description'): value = self.get_property(key, language=language) if value: texts.append(value) # Parent category current_category = self.parent while current_category.class_id == 'category': texts.append(current_category.get_title(language=language)) current_category = current_category.parent # data (html) events = self.get_property('data', language=language) if events: text = [ unicode(value, 'utf-8') for event, value, line in events if event == TEXT ] if text: texts.append(u' '.join(text)) # Dynamic properties for key, datatype in schema.iteritems(): value = self.get_property(key) if value: text = None multiple = datatype.multiple if issubclass(datatype, Unicode): if multiple: text = ' '.join([x for x in value]) else: text = value elif issubclass(datatype, String): if multiple: text = ' '.join([Unicode.decode(x) for x in value]) else: text = Unicode.decode(value) elif issubclass(datatype, Enumerate): values = value if multiple is False: values = [value] # XXX use multilingual label text = ' '.join(values) if text: texts.append(text) # Manufacturer manufacturer = self.get_property('manufacturer') if manufacturer: manufacturer = site_root.get_resource(manufacturer) texts.append(manufacturer.get_title()) # Purchase options for declination in declinations: for key, datatype in purchase_options_schema.iteritems(): name = declination.get_property(key) value = datatype.to_text(name, languages) if value: texts.append(value) # Join for language, texts in result.iteritems(): result[language] = u'\n'.join(texts) return result