def wash_pattern(p): """Wash pattern passed in URL. Check for sanity of the wildcard by removing wildcards if they are appended to extremely short words (1-3 letters). """ # strip accents: # p = strip_accents(p) # FIXME: when available, strip accents all the time # add leading/trailing whitespace for the two following wildcard-sanity # checking regexps: p = " " + p + " " # replace spaces within quotes by __SPACE__ temporarily: p = re_pattern_single_quotes.sub( lambda x: "'" + x.group(1).replace(' ', '__SPACE__') + "'", p) p = re_pattern_double_quotes.sub( lambda x: "\"" + x.group(1).replace(' ', '__SPACE__') + "\"", p) p = re_pattern_regexp_quotes.sub( lambda x: "/" + x.group(1).replace(' ', '__SPACE__') + "/", p) # get rid of unquoted wildcards after spaces: p = re_pattern_wildcards_after_spaces.sub("\\1", p) # get rid of extremely short words (1-3 letters with wildcards): # p = re_pattern_short_words.sub("\\1", p) # replace back __SPACE__ by spaces: p = re_pattern_space.sub(" ", p) # replace special terms: p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p) # remove unnecessary whitespace: p = p.strip() # remove potentially wrong UTF-8 characters: p = wash_for_utf8(p) return p
def create_tag( tag, escaper=EscapedHTMLString, opening_only=False, body=None, escape_body=False, escape_attr=True, indent=0, attrs=None, **other_attrs): """ Create an XML/HTML tag. This function create a full XML/HTML tag, putting toghether an optional inner body and a dictionary of attributes. >>> print create_html_tag ("select", create_html_tag("h1", ... "hello", other_attrs={'class': "foo"})) <select> <h1 class="foo"> hello </h1> </select> @param tag: the tag (e.g. "select", "body", "h1"...). @type tag: string @param body: some text/HTML to put in the body of the tag (this body will be indented WRT the tag). @type body: string @param escape_body: wether the body (if any) must be escaped. @type escape_body: boolean @param escape_attr: wether the attribute values (if any) must be escaped. @type escape_attr: boolean @param indent: number of level of indentation for the tag. @type indent: integer @param attrs: map of attributes to add to the tag. @type attrs: dict @return: the HTML tag. @rtype: string """ if attrs is None: attrs = {} for key, value in iteritems(other_attrs): if value is not None: if key.endswith('_'): attrs[key[:-1]] = value else: attrs[key] = value out = "<%s" % tag for key, value in iteritems(attrs): if escape_attr: value = escaper(value, escape_quotes=True) out += ' %s="%s"' % (key, value) if body is not None: if callable(body) and body.__name__ == 'handle_body': body = body() out += ">" if escape_body and not isinstance(body, EscapedString): body = escaper(body) out += body if not opening_only: out += "</%s>" % tag elif not opening_only: out += " />" if indent: out = indent_text(out, indent)[:-1] from invenio_utils.text import wash_for_utf8 return EscapedString(wash_for_utf8(out))