def find_overlapping(self, text): """ Finds the known words in text. text: str returns: list of matches """ self.build_automaton() if self.replace_foreign_chars: text = anyascii(text) keywords = [] _text = text.lower() if self._ignore_case_in_search else text if not self.handlers and not self.left_bound_chars and not self.right_bound_chars: return [ self.extract_fn(end_index - length + 1, end_index + 1, norm, text) for end_index, (length, norm) in self.automaton.iter(_text) ] handlers = self.handlers + [(False, True, self.bounds_check)] for end_index, (length, norm) in self.automaton.iter(_text): if norm is None: continue start = end_index - length + 1 stop = end_index + 1 for compare, keep_result, handler in handlers: if compare and compare is not norm: continue _, _, result = handler(text, start, stop, norm) if not keep_result: continue if result is None: # maybe want to remove this continue keywords.append(result) return [x for x in keywords if x is not None]
def contains(self, text): """ Test whether any known words match in text. text: str returns: bool """ self.automaton.make_automaton() self.words_changed = False if self.replace_foreign_chars: text = anyascii(text) _text = text.lower() if self._ignore_case_in_search else text if not self.handlers and not self.left_bound_chars and not self.right_bound_chars: for end_index, (length, norm) in self.automaton.iter(_text): return True handlers = self.handlers + [(False, True, self.bounds_check)] for end_index, (length, norm) in self.automaton.iter(_text): if norm is None: continue start = end_index - length + 1 stop = end_index + 1 for compare, keep_result, handler in handlers: if compare and compare is not norm: continue start, stop, result = handler(text, start, stop, norm) if result is not None and keep_result: return True return False
def __contains__(self, key): # note that smart includes lowercase, so it's an easy check if self.replace_foreign_chars: key = anyascii(key) if self._ignore_case_in_search or self.case == "smart": return key.lower() in self.automaton return key in self.automaton
def print_changelog(): try: releases = _get_releases().reversed for release in releases: print(Fore.LIGHTCYAN_EX + "v" + release.tag_name + Fore.LIGHTYELLOW_EX + anyascii(release.body).replace("- ", "\t")) except Exception as e: raise MetaError(e, message="Error retrieving changelog")
def split_in_words_mention(inputstr): """ This regexp also splits 'AL-NAHAR', which should be a single word into 'AL' and 'NAHAR', resulting in the inability to find a match. Same with U.S. """ tokenizer = RegexpTokenizer(r"\w+") return [anyascii(w) for w in inputstr.split()] # #inputstr.split()]#
def add_one(self, k, v=None): self._root_dict[k] = v self.words_changed = True if self.replace_foreign_chars: k = anyascii(k) v = k if v is None else v length = len(k) if self.case == "ignore": self.add_ignore(k, v, length) elif self.case == "insensitive": self.add_insensitive(k, v, length) else: self.add_sensitive(k, v, length)
def replace(self, text, return_entities=False): """ Replaces known words in text. text: str returns: replaced str If return_entities=True, returns: replaced str, list of matches """ if self.returns != "norm" and not callable(self.returns): raise ValueError("no idea how i would do that") self.build_automaton() if self.replace_foreign_chars: text = anyascii(text) keywords = [(None, None, 0, ("", ""))] current_stop = -1 _text = text.lower() if self._ignore_case_in_search else text handlers = self.handlers + [(False, True, self.bounds_check)] for end_index, (length, norm) in self.automaton.iter(_text): start = end_index - length + 1 stop = end_index + 1 for compare, keep_result, handler in handlers: if compare and compare is not norm: continue start, stop, result = handler(text, start, stop, norm) if result is None: # maybe want to remove this break if not keep_result: break if start >= current_stop: current_stop = stop result = (current_stop - start, start, current_stop, (text[start:stop], result)) keywords.append(result) elif stop - start > keywords[-1][0]: current_stop = max(current_stop, stop) result = (current_stop - start, start, current_stop, (text[start:stop], result)) # keywords[-1] = (stop - start, start, stop, result) keywords[-1] = result keywords.append((None, len(text), None, ("", ""))) text_ = "" for (_, start1, stop1, result1), (_, start2, stop2, result2) in zip(keywords[:-1], keywords[1:]): norm = result2[1] if isinstance(result2[1], str) else result2[1].norm text_ += text[stop1:start2] + norm if return_entities: return text_, [x[-1] for x in keywords[1:-1]] return text_
def remove(self, k): """ Remove k from known words. Takes into account the casing. """ if k not in self._root_dict: return False del self._root_dict[k] self.words_changed = True if self.replace_foreign_chars: k = anyascii(k) if self.case == "smart": self.automaton.remove_word(k) self.automaton.remove_word(k.lower()) self.automaton.remove_word(k.title()) self.automaton.remove_word(k.upper()) self.automaton.remove_word(to_sentence_case(k)) elif self.case == "sensitive": self.automaton.remove_word(k) else: self.automaton.remove_word(k.lower()) return True
def findall(self, text): """ Finds the known words in text. text: str returns: list of matches """ self.build_automaton() if self.replace_foreign_chars: text = anyascii(text) keywords = [] current_stop = -1 _text = text.lower() if self._ignore_case_in_search else text # if not self.handlers and not self.left_bound_chars and not self.right_bound_chars: # # might overlap? # for end_index, (length, norm) in self.automaton.iter(_text): # start = end_index - length + 1 # stop = end_index + 1 # res = self.extract_fn(end_index - length + 1, end_index + 1, norm, text) # if start >= current_stop: # current_stop = stop # result = (current_stop - start, start, current_stop, res) # keywords.append(result) # elif stop - start > keywords[-1][0]: # current_stop = max(stop, current_stop) # result = (current_stop - start, start, current_stop, res) # keywords[-1] = result # return [x[3] for x in keywords] handlers = self.handlers + [(False, True, self.bounds_check)] for end_index, (length, norm) in self.automaton.iter(_text): if norm is None: continue start = end_index - length + 1 stop = end_index + 1 for compare, keep_result, handler in handlers: if compare and compare is not norm and norm != { "norm": compare, "exact": False }: continue start, stop, result = handler(text, start, stop, norm) if result is None: # maybe want to remove this break if not keep_result: current_stop = stop keywords.append( (current_stop - start, start, current_stop, None)) break if start >= current_stop: current_stop = stop result = (current_stop - start, start, current_stop, result) keywords.append(result) elif stop - start > keywords[-1][0]: current_stop = max(stop, current_stop) result = (current_stop - start, start, current_stop, result) keywords[-1] = result # whyyyyy ?? better commment it out # else: # import pdb # pdb.set_trace() # current_stop = stop # result = (current_stop - start, start, current_stop, result) # keywords.append(result) return [x[3] for x in keywords if x[3] is not None]
def check(s, expected): assert anyascii(s) == expected
cur_inst = getInstructionAt(xref_addr) if cur_inst != None: inst_addr = cur_inst.getAddress() mnemonic = cur_inst.getMnemonicString() if mnemonic == "MOV": # e.g. MOV EDX, 0xb3 decoded_list_index = cur_inst.getOpObjects(1)[0] if cur_inst.getOperandType(1) == ghidra.program.model.lang.OperandType.SCALAR: decoded_list_index = cur_inst.getOpObjects(1)[0] t = currentProgram.startTransaction("Address Labeling") try: print(f"[+] Decode function: {xref.getFromAddress()}, Address contains index: {inst_addr}, Index: {decoded_list_index.getValue()}, String: {mapped_string[str(decoded_list_index.getValue())]}") if " " in mapped_string[str(decoded_list_index.getValue())]: # Convert wide char to single byte char and replace space with _ createLabel(xref.getFromAddress(), anyascii(mapped_string[str(decoded_list_index.getValue())].replace(" ", "_")), True) else: createLabel(xref.getFromAddress(), mapped_string[str(decoded_list_index.getValue())], True) # getValue() convert SCALAR type to int comment_addr(xref.getFromAddress(), mapped_string[str(decoded_list_index.getValue())]) except: print(f"[-] Labeling Error: {xref_addr}") pass finally: currentProgram.endTransaction(t, True) break xref_addr = xref_addr.previous()
def html_to_visible_text(html): soup = bs4.BeautifulSoup(html, features="html.parser") for s in soup(["style", "script", "[document]", "head", "title"]): s.extract() return anyascii(unescape(soup.get_text())).lower()
def get_slug(self, struct_value): return slugify(anyascii(struct_value['field_label']))
async def handle_msg(self, message: Dict) -> None: self.logger.debug(message) if 'error' in message and message['error'] == 'Auth not valid': raise ConnectionRefusedError( 'User ID/Bearer invalid. Please check your settings.ini.') message_type = message['type'] if message_type == 'broadcastEnded': if 'reason' in message: reason = message['reason'] self.logger.info(f'Disconnected: {reason}') else: self.logger.info('Disconnected.') elif message_type == 'interaction' and self.show_chat and not self.block_chat: self.logger.info( f'{message["metadata"]["username"]}: {message["metadata"]["message"]}' ) elif message_type == 'question': question = anyascii(message['question']) choices = [ anyascii(choice['text']) for choice in message['answers'] ] self.logger.info('\n' * 5) self.logger.info( f'Question {message["questionNumber"]} out of {message["questionCount"]}' ) self.logger.info(question, extra={"pre": colorama.Fore.BLUE}) self.logger.info(f'Choices: {", ".join(choices)}', extra={'pre': colorama.Fore.BLUE}) await self.question_handler.answer_question(question, choices) self.block_chat = True elif message_type == 'questionSummary' and self.show_question_summary: question = anyascii(message['question']) self.logger.info(f'Question summary: {question}', extra={'pre': colorama.Fore.BLUE}) for answer in message['answerCounts']: ans_str = anyascii(answer['answer']) self.logger.info( f'{ans_str}:{answer["count"]}:{answer["correct"]}', extra={ 'pre': colorama.Fore.GREEN if answer['correct'] else colorama.Fore.RED }) self.logger.info( f'{message["advancingPlayersCount"]} players advancing') self.logger.info( f'{message["eliminatedPlayersCount"]} players eliminated\n') elif message_type == 'questionClosed' and self.block_chat: self.block_chat = False if self.show_chat: self.logger.info('\n' * 5)
async def handle_msg(self, message: Dict) -> None: self.logger.debug(message) if "error" in message and message["error"] == "Auth not valid": raise ConnectionRefusedError( "User ID/Bearer invalid. Please check your settings.ini.") message_type = message["type"] if message_type == "broadcastEnded": if "reason" in message: reason = message["reason"] self.logger.info(f"Disconnected: {reason}") else: self.logger.info("Disconnected.") elif message_type == "interaction" and self.show_chat and not self.block_chat: self.logger.info( f'{message["metadata"]["username"]}: {message["metadata"]["message"]}' ) elif message_type == "question": question = anyascii(message["question"]) choices = [ anyascii(choice["text"]) for choice in message["answers"] ] self.logger.info("\n" * 5) self.logger.info( f'Question {message["questionNumber"]} out of {message["questionCount"]}' ) self.logger.info(question, extra={"pre": colorama.Fore.BLUE}) self.logger.info(f'Choices: {", ".join(choices)}', extra={"pre": colorama.Fore.BLUE}) await self.question_handler.answer_question(question, choices) self.block_chat = True elif message_type == "questionSummary" and self.show_question_summary: question = anyascii(message["question"]) self.logger.info(f"Question summary: {question}", extra={"pre": colorama.Fore.BLUE}) for answer in message["answerCounts"]: ans_str = anyascii(answer["answer"]) self.logger.info( f'{ans_str}:{answer["count"]}:{answer["correct"]}', extra={ "pre": colorama.Fore.GREEN if answer["correct"] else colorama.Fore.RED }, ) self.logger.info( f'{message["advancingPlayersCount"]} players advancing') self.logger.info( f'{message["eliminatedPlayersCount"]} players eliminated\n') elif message_type == "questionClosed" and self.block_chat: self.block_chat = False if self.show_chat: self.logger.info("\n" * 5)
def string_to_ascii(value): """ Convert a string to ascii. """ return str(anyascii(value))
def html_to_visible_text(html): soup = bs4.BeautifulSoup(html, features='html.parser') for s in soup(['style', 'script', '[document]', 'head', 'title']): s.extract() return anyascii(unescape(soup.get_text())).lower()
def convert_to_ascii(text): return anyascii(text)
def make_content_disposition(disposition, fname): rfc6266_part = "filename*=utf-8''%s" % (percent_encode(fname, safe='!#$&+-.^_`|~', encoding='utf-8'), ) ascii_part = 'filename="%s"' % (anyascii(fname), ) return ';'.join((disposition, ascii_part, rfc6266_part))