def getEmployeeName(self, num): ''' This method get a employee name @param num: int - is a current employee ''' objEmployee = self.objJSON("Items")[num] return "{0} {1}".format(uni(objEmployee[u"FirstName"]), uni(objEmployee[u"LastName"]))
def parse_figma(json, frames=False, selection=''): # Succeed placed here due to startup time of fzf spinner.succeed() for canvas in json['document']['children']: canvasString = "{} -- {}".format(uni(canvas['name']), uni(canvas['id'])) canvasString = canvasString.replace('\n', '') yield canvasString for canvas in json['document']['children']: for frame in canvas['children']: frameString = "{}.{} -- {}".format(uni(canvas['name']), uni(frame['name']), uni(frame['id'])) frameString = frameString.replace('\n', '') yield frameString
def say(self): words = [] dbase = self.learn() put = uni(raw_input('> ').decode('utf-8')).upper() put = re.sub('[.,?!]', '', put) if put in self.questioned: print 'Creuza: de novo?' return self.questioned.append(put) if len(put.split()) < 2: if put.find('OI') != -1 or put.find('OLA') != -1: print 'Creuza: Oi, tudo bem?' if put.find('PRAZER') != -1: print 'Creuza: o prazer é todo seu..' if put.find('PORQUE') != -1: print 'Creuza: não enche..' return for question in dbase: words = [] q = question[0].upper() q = re.sub('[.,?!]', ' ', q) answer = question[1] # text = re.split('\W+', q) text = q.split() for w in text: # if len(w) > 0: words.append(w) confirm = set(put.split()) & set(words) # print put.split() # if len(confirm): # print confirm if len(confirm) == len(put.split()): print 'Creuza:', choice(answer) return
def unidecode(string) -> str: """Attempt to convert a Unicode object into an ASCII string. Args: string (str): The string to be decoded. :Design: This function is a light wrapper around the ``unidecode.unidecode`` function. **Per the** ``unicode`` **docstring:** "Transliterate an Unicode object into an ASCII string. >>> unidecode(u"北亰") "Bei Jing " This function first tries to convert the string using ASCII codec. If it fails (because of non-ASCII characters), it falls back to transliteration using the character tables. This is approx. five times faster if the string only contains ASCII characters, but slightly slower than using unidecode directly if non-ASCII chars are present." :Dependencies: * unidecode :Example: :: import utils3.utils as u s = u.unidecode(string) Returns: If the passed ``string`` value is a str data type, the decoded string is returned, otherwise the original value is returned. """ # EXTERNAL IMPORTS # RENAME unidecode TO AVOID CONFLICT WITH utils.unidecode FUNCTION from unidecode import unidecode as uni # INITIALISE VARIABLE decoded = None # TEST PASSED VALUE AS BEING A STRING >> STORE DECODED (OR ORIGINAL) VALUE decoded = uni(string) if isinstance(string, str) else string # RETURN VALUE return decoded
def get_tokens(inp): tokens = [] inp_target = inp[target_key] if target == 'title': inp_target = [inp_target] for sent in inp_target: # convert all non-ascii to nearest ascii sent = uni(sent).lower() for token in word_tokenize(sent): # if token is the beginning or end of a quotation # drop the quotation. however, 's is OK. if token != "'s": if len(token) > 1 and token[0] == "'": token = token[1:] if len(token) > 1 and token[-1] == "'": token = token[:-1] tokens.append(token) return tokens
def say(self): words = [] dbase = self.learn() put = uni(raw_input('> ').decode('utf-8')).upper() put = re.sub('[.,?!]', '', put) # if put in self.questioned: # print 'Creuza: de novo?' # return self.questioned.append(put) if len(put.split()) < 2: if put.find('#ADD') != -1: self.add() print if put.find('KKK') != -1 or put.find('HAHA') != -1: print 'Creuza: ha ha ha' if put.find('OI') != -1 or put.find('OLA') != -1: print 'Creuza: Oi, tudo bem?' if put.find('OBRIGAD') != -1: print 'Creuza: por nada :)' if put.find('PRAZER') != -1: print 'Creuza: o prazer é todo seu..' if put.find('TCHAU') != -1: print 'Creuza: vaza!!' if put.find('PORQUE') != -1: print 'Creuza: não enche..' return for question in dbase: words = [] q = question[0].upper() q = re.sub('[.,?!]', ' ', q) answer = question[1] # text = re.split('\W+', q) text = q.split() for w in text: words.append(w) confirm = set(put.split()) & set(words) if len(confirm) == len(put.split()): time.sleep(1) print 'Creuza:', choice(answer).rstrip() return
with open('index.txt', 'r') as f: # Ler o sumário index = f.read() with open('all_text.txt', 'r') as f: # Ler as tabelas em texto all_text = f.read() # Regex que identifica o título de todas as tabelas, sendo do tipo 'Registro' ou não TABLE_TITLE_REGEX = r'\n((Registros do \n?evento )?S\n-\n(\d+) \n-\n \n([\w,. /]+\n))' titles = re.findall(TABLE_TITLE_REGEX, all_text, re.MULTILINE|re.UNICODE) # As tabelas são o conteúdo entre dois títulos consecutivos tables = re.sub(TABLE_TITLE_REGEX, '$', all_text).split('$')[1:] # Dicionário que contém como chave o número da tabela, e como valor o nome da tabela # e seus dados, como uma list(list(string)) table_numbers = { re.sub(r'\n', '', number): { 'name': re.sub(r'[\-/\.,\s]+', '_', uni(desc[:-3].lower().replace('\n', ''))) } for number, desc in re.findall( # Procura no índice o número e nome da tabela r' \nS\n-\n(\d+(?:\n\d+)?) \n-\n \n((?:(?:\w[\w,\. /]+| |-)\n)+)\.{2,}', index, re.IGNORECASE) } # Um grande problema é que o único separador presente nas tabelas é a quebra de linha ('\n \n'). # Entretanto, em muitos textos do campo 'Condição' há quebras de linha, o que impede diferenciar # se uma quebra representa uma nova célula da tabela ou uma quebra de linha dentro da mesma célula. # Isso será explicado adiante. for title, table in list(zip(titles, tables))[1::2]: # Transforma o campo 'Ocorr.' para uma única linha tables_cells = re.sub(r'(\d+)\n-\n(\d+)', r'\g<1>-\g<2>', table)