def getwords(s): # We decompose the string so that ascii letters with accents can be part of the word. s = normalize('NFD', s) s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower() s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace) return [_f for _f in s.split(' ') if _f] # remove empty elements
def do_process(s, clean=True): if clean: s = multi_replace(s, FS_FORBIDDEN, ' ') if whitespaces == WS_SPACES_TO_UNDERSCORES: return s.replace(' ', '_').replace('\t', '_') elif whitespaces == WS_UNDERSCORES_TO_SPACES: return s.replace('_', ' ') else: return s
def handle_item(chunk, which, letter=0): if not len(chunk): return if which == 'first': result = chunk[0].name elif which == 'last': result = chunk[-1].name if letter: result = result[:tryint(letter, None)] result = multi_replace(result, FS_FORBIDDEN, ' ') return result
def value_to_db(value): value_type = get_value_type(value) if value_type == ATTR_TYPE_INT: value = str(value) elif value_type == ATTR_TYPE_BINARY: value = ''.join(['%02x' % ord(char) for char in value]) else: if isinstance(value,str): value = multi_replace(value,['\n','\r'],'') else: value = str(value) return value
def value_to_db(value): value_type = get_value_type(value) if value_type == ATTR_TYPE_INT: value = str(value) elif value_type == ATTR_TYPE_BINARY: value = ''.join(['%02x' % ord(char) for char in value]) else: if isinstance(value, str): value = multi_replace(value, ['\n', '\r'], '') else: value = str(value) return value
def getwords(s): # We decompose the string so that ascii letters with accents can be part of the word. s = normalize("NFD", s) s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower() # logging.debug(f"DEBUG chars for: {s}\n" # f"{[c for c in s if ord(c) != 32]}\n" # f"{[ord(c) for c in s if ord(c) != 32]}") # HACK We shouldn't ignore non-ascii characters altogether. Any Unicode char # above common european characters that cannot be "sanitized" (ie. stripped # of their accents, etc.) are preserved as is. The arbitrary limit is # obtained from this one: ord("\u037e") GREEK QUESTION MARK s = "".join(c for c in s if (ord(c) <= 894 and c in string.ascii_letters + string.digits + string.whitespace) or ord(c) > 894) return [_f for _f in s.split(" ") if _f] # remove empty elements
def RenameNode(self, node, new_name): #Returns what the node has actually been renamed to node.name = multi_replace(new_name, FS_FORBIDDEN) return node.name
def getwords(s): # We decompose the string so that ascii letters with accents can be part of the word. s = normalize("NFD", s) s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower() s = "".join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace) return [_f for _f in s.split(" ") if _f] # remove empty elements
def RenameNode(self,node,new_name): #Returns what the node has actually been renamed to node.name = multi_replace(new_name,FS_FORBIDDEN) return node.name