def test_qualified_re_split(self): self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) self.assertEqual(re.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':', 'b::c']) self.assertEqual(re.split("(:*)", ":a:b::c", 2), ['', ':', 'a', ':', 'b::c'])
def toNumber(self, lang, s): if lang == "en": try: if s[-1] in ["a", "b"]: amud = s[-1] daf = int(s[:-1]) else: amud = "a" daf = int(s) except ValueError: raise InputError(u"Couldn't parse Talmud reference: {}".format(s)) if self.length and daf > self.length: #todo: Catch this above and put the book name on it. Proably change Exception type. raise InputError(u"{} exceeds max of {} dafs.".format(daf, self.length)) indx = daf * 2 if amud == "a": indx -= 1 return indx elif lang == "he": num = re.split("[.:,\s]", s)[0] daf = decode_hebrew_numeral(num) * 2 if s[-1] == ":" or ( s[-1] == u"\u05d1" #bet and ((len(s) > 2 and s[-2] in ", ") # simple bet or (len(s) > 4 and s[-3] == u'\u05e2') # ayin"bet or (len(s) > 5 and s[-4] == u"\u05e2") # ayin''bet ) ): return daf # amud B return daf - 1
def add_to_whitelist(logins): logins = re.split(r'[\s@,]+', logins.strip(' \t@')) added = [] already = [] denied = [] not_found = [] for login in logins: try: if users.add_to_whitelist(login): added.append(login) else: already.append(login) except SubscribeError: denied.append(login) except UserNotFound: not_found.append(login) except AlreadySubscribed: already.append(login) return xmpp_template('wl_updated', added=added, already=already, denied=denied, not_found=not_found)
def pipe(self, key, value): doc = value for i, elm in enumerate(doc.content_html.elements): build_blobs(elm, doc, [i]) for blob in doc.blobs: tokenized = nltk.word_tokenize(blob.text) pos_tagged = ((t, convert_pos(p)) for t, p in nltk.pos_tag(tokenized)) pos_filtered = ((t, p) for t, p in pos_tagged if p in self.pos_tags) lemmatized = (self.wordnet.lemmatize(t, convert_pos(p)) for t, p in pos_filtered) normalized = (t.lower() for s in lemmatized for t in re2.split(r"\W+", s) if t) enumerated = ((i, t) for i, t in enumerate(normalized) if not t.isdigit()) filtered = ((i, t) for i, t in enumerated if t not in self.stopwords) for index, text in filtered: blob.words.add(index=index, text=text) if not self.debug: doc.ClearField("raw_html") doc.ClearField("parsed_html") doc.ClearField("content_html") yield key, doc
def add_post(): text = env.request.args('text', '').strip() tags = env.request.args('tags', '').strip(' \t*,;') if isinstance(tags, str): tags = tags.decode('utf-8') tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)] private = bool(env.request.args('private')) m = re.search(r'^\s*(?P<to>(?:@[a-z0-9_-]+[,\s]*)+)', text) to = parse_logins(m.group('to')) if m else [] files = _files([]) sess = Session() sess['clear_post_input'] = True sess.save() try: id = posts.add_post(text, tags=tags, to=to, private=private, files=files) except PostTextError: return render('/post-error.html') return Response(redirect='%s://%s.%s/%s' % \ (env.request.protocol, env.user.login, settings.domain, id))
def findRelations(sentence, relex): ''' iterate through words in a sentence and extract all relations ''' relations = set() for word in filter(None, re.split('[ ,.]', sentence)): if word in relex: relations.add(word) return relations
def calculateManaPerms(manaCost): """Calculate the possible mana permutations of cards. Used for hybrid cards when comparing mana costs) Input: Mana Cost of Card as a string (i.e {4}{U/G}{R/G}) Output: Array of unique different mana cost combinations (i.e [4UR, 4UG, 4GR, 4GG]) """ manacost_permutes = [] manacost_statics = [] totalmanaoptions = [] manacost_split = re.split("\{(.*?)\}+?", manaCost) for manaitem in manacost_split: if manaitem != '': if "/" in manaitem: manacost_permutes.append(manaitem.replace("/", "")) else: manacost_statics.append(manaitem) if manacost_permutes == []: totalmanaoptions.append("".join(manacost_statics)) else: for x in product(*manacost_permutes): v = "".join(manacost_statics) + "".join(x) totalmanaoptions.append(''.join(sorted(v))) return dedupe(totalmanaoptions)
def add_post(): text = env.request.args('text', '').strip() tags = env.request.args('tags', '').strip(' \t*,;') if isinstance(tags, str): tags = tags.decode('utf-8') tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)] private = bool(env.request.args('private')) m = re.search(r'^\s*(?P<to>(?:@[a-z0-9_-]+[,\s]*)+)', text) to = parse_logins(m.group('to')) if m else [] files = _files([]) try: id = posts.add_post(text, tags=tags, to=to, private=private, files=files) except PostTextError: return render('/post-error.html') return Response(redirect='%s://%s.%s/%s' % \ (env.request.protocol, env.user.login, settings.domain, id))
def toNumber(self, lang, s): if lang == "en": try: if s[-1] in ["a", "b"]: amud = s[-1] daf = int(s[:-1]) else: amud = "a" daf = int(s) except ValueError: raise InputError( u"Couldn't parse Talmud reference: {}".format(s)) if self.length and daf > self.length: #todo: Catch this above and put the book name on it. Proably change Exception type. raise InputError(u"{} exceeds max of {} dafs.".format( daf, self.length)) indx = daf * 2 if amud == "a": indx -= 1 return indx elif lang == "he": num = re.split("[.:,\s]", s)[0] daf = decode_hebrew_numeral(num) * 2 if s[-1] == ":" or ( s[-1] == u"\u05d1" #bet and ((len(s) > 2 and s[-2] in ", ") # simple bet or (len(s) > 4 and s[-3] == u'\u05e2') # ayin"bet or (len(s) > 5 and s[-4] == u"\u05e2") # ayin''bet )): return daf # amud B return daf - 1
def get_word_counts(string): out = {} for word in re2.split(r"\W", string): if word in out: out[word] += 1 else: out[word] = 1 return out
def isReference(sentence, authors): ''' checks if a sentence is a reference - use a hashed author database to check each word''' score = 0 for word in re.split('[ ,;]', sentence): if word in authors: score += 1 if score > 3: return True return False
def diff_ratio(str1, str2): if not isinstance(str1, unicode): str1 = str1.decode('utf-8') str1 = ' '.join(re.split(r'[\s\.]+', str1)).lower() if not isinstance(str2, unicode): str2 = str2.decode('utf-8') str2 = ' '.join(re.split(r'[\s\.]+', str2)).lower() d = distance(str1, str2) if d <= 1: return True if settings.edit_distance > 0 and d > settings.edit_distance: return False r = ratio(str1, str2) if r < settings.edit_ratio: return False return True
def diff_ratio(str1, str2): if not isinstance(str1, unicode): str1 = str1.decode("utf-8") str1 = " ".join(re.split(r"[\s\.]+", str1)).lower() if not isinstance(str2, unicode): str2 = str2.decode("utf-8") str2 = " ".join(re.split(r"[\s\.]+", str2)).lower() d = distance(str1, str2) if d <= 1: return True if settings.edit_distance > 0 and d > settings.edit_distance: return False r = ratio(str1, str2) if r < settings.edit_ratio: return False return True
def test_re_split(self): self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) self.assertEqual(re.split("(:*)", ":a:b::c"), ['', ':', 'a', ':', 'b', '::', 'c']) self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) self.assertEqual(re.split("(:)*", ":a:b::c"), ['', ':', 'a', ':', 'b', ':', 'c']) self.assertEqual(re.split("([b:]+)", ":a:b::c"), ['', ':', 'a', ':b::', 'c']) self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c']) self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '', '', 'c'])
def test_re_split(self): self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) self.assertEqual(re.split("(:*)", ":a:b::c"), ['', ':', 'a', ':', 'b', '::', 'c']) self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) self.assertEqual(re.split("(:)*", ":a:b::c"), ['', ':', 'a', ':', 'b', ':', 'c']) self.assertEqual(re.split("([b:]+)", ":a:b::c"), ['', ':', 'a', ':b::', 'c']) self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), [ '', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c' ]) self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '', '', 'c'])
def save(post): text = env.request.args('text', '').strip() tags = env.request.args('tags', '').strip(' \t*,;') if isinstance(tags, str): tags = tags.decode('utf-8') tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)] private = bool(env.request.args('private')) posts.edit_post(post, text=text, tags=tags, private=private, files=files) return Response(redirect='%s://%s.%s/%s' % \ (env.request.protocol, env.user.login, settings.domain, post.id))
def parse_tags(tags): if tags: tags = tags.strip(" \r\n\t*") if isinstance(tags, str): tags = tags.decode("utf-8") # tags = re.findall(r'[^\s*]+', tags) tags = filter( None, [t.replace(u"\xa0", " ").strip()[:64] for t in uniqify(re.split(r"(?<!\\)[\*,]", tags)[:10])] ) if not tags: tags = None else: tags = [] return map(lambda t: re.sub(r"\\,", ",", t), tags)
def parse_tags(tags): if tags: tags = tags.strip(' \r\n\t*') if isinstance(tags, str): tags = tags.decode('utf-8') #tags = re.findall(r'[^\s*]+', tags) tags = filter(None, [t.replace(u"\xa0", " ").strip()[:64] for t in \ uniqify(re.split(r'(?<!\\)[\*,]', tags)[:10])]) if not tags: tags = None else: tags = [] return map(lambda t: re.sub(r'\\,', ',', t), tags)
def parse_macro(macro): opts = {} vb_vars = {} result = {} cleaned = "" strings = set() iocs = [] macro = normalize_code(macro) enc_func_name, enc_type = find_enc_function(macro) if not enc_func_name: enc_func_name, enc_type = r"xor\w+", "xor" decrypt_func = DECRYPTORS.get(enc_type) opts = { "enc_func_name": enc_func_name, "decrypt_func": decrypt_func, "vb_vars": vb_vars } for line in macro.splitlines(): line = line.strip() if line.startswith("'"): continue substituted = handle_techniques(line, **opts) # Look for variable assignments split = [ part for part in re.split(r"^(\w+)\s*=\s*", line, maxsplit=1)[1:] if part ] # Basic variable data find/replace. if len(split) == 2: name, val = split vb_vars[name] = substituted # Walk the deobfuscated macro and check for any IOCs for string in substituted.splitlines(): ioc = extract_iocs(string) if ioc: iocs.append(ioc) # Dedup IOCs result = sorted(set(iocs), key=lambda p: p[0]) return result
def del_from_blacklist(logins): logins = re.split(r'[\s@,]+', logins.strip(' \t@')) deleted = [] not_deleted = [] not_found = [] for login in logins: try: if users.del_from_blacklist(login): deleted.append(login) else: not_deleted.append(login) except UserNotFound: not_found.append(login) return xmpp_template('bl_updated', deleted=deleted, not_deleted=not_deleted, not_found=not_found)
def add_to_blacklist(logins): logins = re.split(r'[\s@,]+', logins.strip(' \t@')) added = [] already = [] not_found = [] for login in logins: try: if users.add_to_blacklist(login): added.append(login) else: already.append(login) except SubscribeError: pass except UserNotFound: not_found.append(login) return xmpp_template('bl_updated', added=added, already=already, not_found=not_found)
def parse_macro(macro): opts = {} vb_vars = {} result = {} cleaned = "" strings = set() iocs = [] macro = normalize_code(macro) enc_func_name, enc_type = find_enc_function(macro) if not enc_func_name: enc_func_name, enc_type = r"xor\w+", "xor" decrypt_func = DECRYPTORS.get(enc_type) opts = {"enc_func_name": enc_func_name, "decrypt_func": decrypt_func, "vb_vars": vb_vars} for line in macro.splitlines(): line = line.strip() if line.startswith("'"): continue substituted = handle_techniques(line, **opts) # Look for variable assignments split = [part for part in re.split(r"^(\w+)\s*=\s*", line, maxsplit=1)[1:] if part] # Basic variable data find/replace. if len(split) == 2: name, val = split vb_vars[name] = substituted # Walk the deobfuscated macro and check for any IOCs for string in substituted.splitlines(): ioc = extract_iocs(string) if ioc: iocs.append(ioc) # Dedup IOCs result = sorted(set(iocs), key=lambda p: p[0]) return result
def _get_shortcut_url_map(self, pat, lines, shortcut_size): shortcut_url_map = {} secondary_lines = [] total_rules = 0 total_comments = 0 total_shortcuts = 0 for line in lines: line.strip() if line[0] == '!': total_comments += 1 continue total_rules += 1 url = re2.split(r'\$+', line)[0] searches = pat.findall(url) flag = 0 if searches: total_shortcuts += 1 else: secondary_lines.append(line) continue min_count = -1 for s in searches: for i in xrange(len(s) - shortcut_size+1): cur_s = s[i:i+shortcut_size] if cur_s not in shortcut_url_map: shortcut_url_map[cur_s] = [line] flag = 1 break if (min_count == -1 or len(shortcut_url_map[cur_s]) < min_count): min_count = len(shortcut_url_map[cur_s]) min_s = cur_s if flag == 1: break if flag == 0: shortcut_url_map[min_s].append(line) if self.print_maps: self._print_statistics_of_map( shortcut_size, total_rules, total_comments, total_shortcuts, len(secondary_lines), shortcut_url_map) return shortcut_url_map, secondary_lines
def _generate_login(self): name = unidecode(self.get_info('name')).lower() if not name: name = re.sub(r'^\w+:/+', '', self._url.lower()) name = re.sub('^\W+|\W+$', '', name) words = re.split(r'\W+', name) name = '' br = False for w in words[:]: if not name: _name = w else: _name = "%s-%s" % (name, w) if len(_name) <= 16: name = _name else: name = _name[:16] br = True break if br: try: ri = name.rindex('-') except ValueError: ri = 16 if ri > 6: name = name[:ri] i = 0 while True: login = '******' % (name, i or '') try: User('login', login) except UserNotFound: return login i += 1
def GetValues(string): parts = (s for s in re2.split(r"\(.*\)", string) if s) return set(Normalize(s) for part in parts for s in part.split(','))
def nl2br(environ, text): return ''.join(['<p>%s</p>' % escape(s) for s in re.split(_nl_re, text)])
def dispatch_message(message, raw_message, channel): """For a message, figure out how to handle it and return the text to reply with. INPUT: message = Message string INPUT: channel = TRUE if message came from a main channel, FALSE if came from PM OUTPUT: (optional: list of) tuple of (reply_message, pm_override) OUTPUT: pm_override is TRUE if the reply should go through PM regardless If they give us "<string> extend", assume that it's "<cardname extend>". If they give us "<string>*", assume that it's "<cardname*>" """ logging.debug("Dispatching message: {} (Raw text {})".format(message, raw_message)) if message == "help": return (help(), True) elif message == "helpsearch": return (helpsearch(), True) elif message.startswith("url "): return (url(message[4:]), False) elif message.startswith("printsets"): c.execute('SELECT DISTINCT(name), code, releaseDate FROM sets ORDER BY ' + ('releaseDate' if message.endswith("inorder") else 'name') + ' ASC') message_out = "" for name, code, date in [(x[0], x[1], x[2]) for x in c.fetchall()]: message_out += name + " (" + code + ")" + " [" + date + "]" + "\n" return (message_out, True) elif message == "random": cards = cardSearch(c, ['en:' + random.choice(allCardNames)]) if not cards: return ("No cards found :(", False) return (printCard(c, cards[0], quick=False, slackChannel=channel), False) elif message.endswith("extend"): cards = cardSearch(c, ['en:' + message[:-6].rstrip()]) if not cards: return ("", False) return (printCard(c, cards[0], extend=2, quick=False), True) elif message.endswith("*"): cards = cardSearch(c, ['n:' + message[:-1]]) if not cards: return ("", False) if len(cards) > 20: return ("Too many cards to print! ({} > 20). Please narrow search".format(len(cards)), False) if channel: # If we've asked for some cards in a channel if len(cards) == 1: # One card is fine, show them return (printCard(c, cards[0], quick=False, slackChannel=channel), False) elif len(cards) <= 5: # 2 - 5 cards is fine, but only show name and mana cost return ("\n".join([printCard(c, card, quick=True, slackChannel=channel) for card in cards]), False) else: # > 5 is only showing name and mana cost and forced to PM return [("{} results sent to PM".format(len(cards)), False), ("\n".join([printCard(c, card, quick=True, slackChannel=channel) for card in cards]), True)] else: return ("\n".join([printCard(c, card, quick=False, slackChannel=channel) for card in cards] + ["{} result/s".format(len(cards))]), False) elif raw_message.startswith("!s ") or raw_message.startswith("!qs "): logging.debug("Advanced Search!") quick = False if message == "qs": quick = True card_name = raw_message[4:].lower() else: card_name = raw_message[3:].lower() logging.debug("Searching for {}".format(card_name)) output = [] try: parsed_data = super_total.parseString(card_name) logging.debug("Parsed it as: {}".format(parsed_data)) except (ParseException, ParseFatalException) as e: return ("Unable to parse search terms\n{}".format(e), False) last_was_s = False for idx, x in enumerate(parsed_data.asList()): if x in ["and", "or", "not"]: output.append(x) last_was_s = False elif x == "(": if last_was_s: output.append("AND") output.append("(") last_was_s = False elif x == ")": output.append(")") last_was_s = False else: if last_was_s: output.append("AND") output.append(x) last_was_s = True logging.debug("Advanced search final terms: {}".format(output)) cards = cardSearch(c, output) if not cards: return ("No cards found", False) if len(cards) > 20: return ("Too many cards to print! ({} > 20). Please narrow search".format(len(cards)), False) if channel: # If we've asked for some cards in a channel # If they're quick, <= 10 is fine if quick and len(cards) <= 10: return ("\n".join([printCard(c, card, quick=quick, slackChannel=channel) for card in cards]), False) if len(cards) <= 5: # 1 - 5 cards is fine return ("\n".join([printCard(c, card, quick=quick, slackChannel=channel) for card in cards]), False) else: # > 5 is only showing name and mana cost and forced to PM return [("{} results sent to PM".format(len(cards)), False), ("\n".join([printCard(c, card, quick=True, slackChannel=channel) for card in cards]), True)] else: return ("\n".join([printCard(c, card, quick=quick, slackChannel=channel) for card in cards] + ["{} result/s".format(len(cards))]), False) elif raw_message.startswith("!r ") or rule_regexp.match(raw_message): logging.debug("Rules query!") if message == "r": message = raw_message[3:] return (ruleSearch(all_rules, message), False) else: logging.debug("Trying to figure out card name") logging.debug("Maybe we get extremely lucky") if message in allCardNames: logging.debug("We do!") cards = cardSearch(c, ['en:' + message]) return (printCard(c, cards[0], quick=False, slackChannel=channel), False) logging.debug("We don't") # Handle !card1 !card2 # Handle !card1&!card2 # Handle Blah !card1 blah !card2 # Don't forget if it's a PM we'll have stripped the possible initial ! so let's # use the raw message # TODO: Do it backwards, so longest matches are better command_list = bot_command_regex.findall(raw_message) logging.debug("Command list: {}".format(command_list)) cards_found = [] for card in command_list: if card in allCardNames: logging.debug("Bailing early due to exact match") cards_found.append('en:"%s"' % card) continue card_tokens = re.split(' |&', raw_message[raw_message.find(card):]) logging.debug("Tokenising: {}".format(card_tokens)) backup = [] real = False for i in xrange(1, len(card_tokens) + 1): card_name = " ".join(card_tokens[:i]) if card_tokens[i - 1].startswith("!"): break if not backup: backup.extend([x for x in allCardNames if difflib.SequenceMatcher(None, x.split(", ")[0].lower(), card_name.lower()).ratio() >= 0.8]) real = difflib.get_close_matches(card_name, allCardNames, cutoff=0.8) if len(real): cards_found.append('en:"%s"' % real[0]) real = True break if not real: if backup: cards_found.append('en:"%s"' % backup[0]) logging.debug("Finally, the cards: {}".format(cards_found)) if cards_found: terms = list(intersperse("OR", cards_found)) logging.debug("Searching for {}".format(terms)) cards = cardSearch(c, terms) logging.debug("Found {} cards".format(len(cards))) if len(cards) > 20: return ("Too many cards to print! ({} > 20). Please narrow search".format(len(cards)), False) if len(cards) <= 5: return ("\n".join([printCard(c, card, quick=False, slackChannel=channel) for card in cards]), False) else: return [("{} results sent to PM".format(len(cards)), False), ("\n".join([printCard(c, card, quick=False, slackChannel=channel) for card in cards]), True)] else: logging.debug("I didn't understand the command") return ("", False)
def do_command(self, e, cmd, nick, target, reply, dm): c = self.connection emoticontable = { ':)': '☺', # Some lines commented out due to lack of widespread font support # ':D': '😃', # '^^': '😄', # '^_^':'😄', # ':|': '😑', ':(': '☹', # ':/': '😕', # ':\\':'😕', # '-.-':'😒', # ':P' :'😛', # ';P' :'😜', # 'xP' :'😝', # ';)' :'😉', # ':?' :'😖', # '>:(':'😠', # 'D:' :'😦', # ':o' :'😯', # ':O' :'😮', # 'B)' :'😎' } for emoticon, uchar in emoticontable.items(): if re.findall('(^|\W)'+re.escape(emoticon)+'(\W|$)', cmd) and random() < 0.333: reply('Did you mean {} (U+{:x}) with “{}”?'.format(uchar, ord(uchar), emoticon)) break def replyopen(): if self.lastopen: reply('Space was last marked {} by {} on {}.'.format(*self.lastopen)) else: reply("I don't know when was the last time the space was open.") if cmd.startswith('open'): if '?' in cmd or '‽' in cmd: if cmd.count('?') >= 5: self.sendchan('afrabot: open?') return replyopen() else: if cmd.count('!') > 5: reply('u mad bro?') return self.set_open(True, nick) return if cmd.startswith('closed'): if '?' in cmd or '‽' in cmd: replyopen() else: if cmd.count('!') > 5: reply('u mad bro?') return dm('Please remember to follow the shutdown protocol.') self.set_open(False, nick) return if re.match('^ *genug +pleniert[.!]{,5}$', cmd) or re.match('^plenum[?!‽.]{,5}$', cmd): cs = self.chaossternchen if 'genug' in cmd: self.chaossternchen = [] reply('Plenum beendet.') else: reply('Aye! So far, there are {} Chaos-☆'.format(len(cs)) + ('.' if len(cs) == 0 else ':')) for entry in enumerate(cs): reply('Chaos-☆ {}: {}'.format(*entry)) return csmatch = re.match('^ *(delete|remove) +chaos-?([☆★☼☀*]|sternchen) *([0-9]+)[.!]{,5}$', cmd) if csmatch: try: num = int(csmatch.group(3)) del self.chaossternchen[num] reply('Chaos-☆ {} deleted.'.format(num)) except: reply('wut?') return if re.match('^help[?!‽.]*$', cmd): helptext = """open|closed? - query whether space is open open|closed - set space open/closed chaos*: [foobar] - add plenum topic delete chaos* [num] - delete plenum topic number [n] shutdown - list things to do when closing the space plenum - list plenum topics ... and many more, doc urgently needed. Please submit PRs on github: https://github.com/afra/afrab0t """ for line in helptext.splitlines(): reply(line) return if re.match('^shutdown[?‽]*$', cmd): helptext = """* Fenster schließen (Beim rechten Fenster muss ein Hebel unten am Fenster betätigt werden. Bitte stellt sicher, dass beide Fenster dicht geschlossen sind.) * Tische aufräumen und bei Bedarf kurz abwischen * Geschirr spülen * Kühlschrank auffüllen * Heizung auf eine angemessene Stufe stellen (Winter: 2-3) * Lampen, Computer, Boxen, Beamer, Kochplatte, Ofen, *Wasserkocher*, Laser abschalten * Gucken, ob ralisi noch Geschirr abwäscht * Müll mit runter nehmen * Raum-, Aufgangs- und Haustür verschließen """ for line in helptext.splitlines(): reply(line) return if cmd == 'ponies?': reply('yes please!') return if re.match('^ *tell +afrab[o0]t +', cmd): reply('what is your problem?') return if cmd.rstrip('?') in ('where', 'location', 'wo'): reply('AfRA e.V. is located at Herzbergstr. 55, 10365 Berlin, 2.HH/Aufgang B, 3. floor on the' 'left (Rm 3.08). Public transport: Tram M8, 21, 37 & Bus 256, N56, N50 → Herzbergstr./Siegfriedstr.' 'Door closed? Try +49-176-29769254 !') return if cmd.rstrip('?') in ('tel', 'telefon', 'telephone', 'phone', 'handy', 'fon'): reply("Locked out? Wanna know what's up at AfRA? Try +49-176-29769254 !") return if cmd.rstrip('?!.') in ('cats', 'katzen', 'kittens', 'kätzchen'): try: submissions = self.reddit.get_subreddit('cats').get_hot(limit=50) index, item = next((i,s) for i,s in enumerate(submissions) if s.url not in self.catpiccache and not s.stickied and not s.is_self) self.catpiccache.append(item.url) if index != 5: reply('Got some cats for you: '+item.url) else: reply("Gee, you really like those cat things, don't you? You know, I could use some love, too: https://github.com/afra/afrab0t") except StopIteration: reply('The intertubes are empty.') return if cmd.rstrip('?!.') == 'catspam': def catspam(): try: submissions = self.reddit.get_subreddit('cats').get_hot(limit=32) for s in submissions: if s.url not in self.nickcatpiccache[nick] and s.url not in self.catpiccache and not s.stickied and not s.is_self: self.nickcatpiccache[nick].append(s.url) dm(s.url) time.sleep(3) except Exception as e: log('Catspam problem:', e) reply('The intertubes are empty.') thr = Thread(target=catspam) thr.start() return if cmd.rstrip('?!.') in ('answer', 'antworte', 'antwort'): reply('42') return # ETA handling if cmd.rstrip('?') in ('etas', 'who', 'da'): with self.db as db: db.execute("DELETE FROM etas WHERE timestamp < DATETIME('now', '-1 day')") etas = ', '.join(nick+': '+eta for nick,eta in db.execute("SELECT nick, eta FROM etas").fetchall()) if etas: reply('Current ETAs: '+etas) else: reply('No ETAs have been announced yet.') return # key handling keycmd = re.match('key ([\w]+) to ([\w]+)( *: *.*)?', cmd) if keycmd: with self.db as db: keystate, = db.execute("SELECT keystate FROM keylog ORDER BY timestamp DESC LIMIT 1").fetchone() keystatelist = keystate.split(', ') fromnick, tonick, comment = keycmd.groups() if not fromnick in keystatelist: reply('According to my information, as of now {} does not have a key. Current key' 'holders are {}.'.format(fromnick, keystate)) return keystatelist[keystatelist.index(fromnick)] = tonick keystate = ', '.join(keystatelist) db.execute("INSERT INTO keylog VALUES (DATETIME('now'),?,?,?,?)", (fromnick, tonick, keystate, comment)) self.sendchan('Key transfer: {}→{}. Current key holders: {}'.format(fromnick, tonick, keystate)) return if cmd.rstrip('?') == 'progress': t = datetime.datetime.now().time() p = 0 if t.hour > 6 and t.hour < 18: p = ((t.hour-6)*3600+t.minute*60+t.second)/(3600*11) foo = round(67*p) bar = '='*foo space = ' '*(67-foo) reply('['+bar+'>'+space+'] ({:.2f}%)'.format(p*100)) return if cmd.startswith('keystate '): keystate = re.split('[,;/: ]*', cmd)[1:] self.db.execute("INSERT INTO keylog VALUES (DATETIME('now'),'','',?,'')", (', '.join(keystate),)) self.sendchan('Key status set. Current key holders: {}'.format(', '.join(keystate))) return keylog = re.match('keylog *([0-9]*)', cmd) if keylog: num = max(50, int(keylog.group(1) or 8)) dm('The latest {} key log entries:'.format(num)) loglines = self.db.execute("SELECT * FROM keylog ORDER BY timestamp DESC LIMIT ?", (num,)) for timestamp, fromnick, tonick, keystate, comment in reversed(loglines): dm('{}: {}→{}; Key holders {}; Comment: "{}"'.format( timestamp, fromnick, tonick, keystate, comment)) dm('EOL') return if cmd.startswith("f**k you"): reply('F*****g is entirely unnecessary: I can reproduce via copy-and-paste!') return if cmd.startswith("geh kacken"): reply('Command "kacken" not implemented. You are welcome to submit a pull request on github at https://github.com/afra/afrab0t') return # fall-through c.notice(nick, 'I don\'t know what you mean with "{}"'.format(cmd))
def get_data(files, expect_labels=True, tokenize=False, verbose=False, files_already_opened=False): """ load text from files, returning an instance of the Doc class doc.frag is the first frag, and each points to the next """ if type(files) == type(''): files = [files] frag_list = None word_index = 0 frag_index = 0 curr_words = [] lower_words, non_abbrs = sbd_util.Counter(), sbd_util.Counter() for file in files: sys.stderr.write('reading [%s]\n' %file) #fh = open(file) if files_already_opened: fh = file else: fh = open(file) for line in fh: ## deal with blank lines if (not line.strip()) and frag_list: if not curr_words: frag.ends_seg = True else: frag = Frag(' '.join(curr_words)) frag.ends_seg = True if expect_labels: frag.label = True prev.next = frag if tokenize: tokens = word_tokenize.tokenize(frag.orig) frag.tokenized = tokens frag_index += 1 prev = frag curr_words = [] for word in line.split(): curr_words.append(word) if is_sbd_hyp(word): #if True: # hypothesize all words frag = Frag(' '.join(curr_words)) if not frag_list: frag_list = frag else: prev.next = frag ## get label; tokenize if expect_labels: frag.label = int('<S>' in word) if tokenize: tokens = word_tokenize.tokenize(frag.orig) # BJD possible hack, but pretty sure this is needed tmp_tokens = tokens.split() tokens = ' '.join(tmp_tokens[:-1] + re.split(r'([.?!]+["\')\]]*)$', tmp_tokens[-1])) else: tokens = frag.orig tokens = re.sub('(<A>)|(<E>)|(<S>)', '', tokens) frag.tokenized = tokens frag_index += 1 prev = frag curr_words = [] word_index += 1 if files_already_opened: pass else: fh.close() #fh.close() ## last frag frag = Frag(' '.join(curr_words)) if not frag_list: frag_list = frag else: prev.next = frag if expect_labels: frag.label = int('<S>' in word) if tokenize: tokens = word_tokenize.tokenize(frag.orig) else: tokens = frag.orig tokens = re.sub('(<A>)|(<E>)|(<S>)', '', tokens) frag.tokenized = tokens frag.ends_seg = True frag_index += 1 if verbose: sys.stderr.write(' words [%d] sbd hyps [%d]\n' %(word_index, frag_index)) ## create a Doc object to hold all this information doc = Doc(frag_list) return doc
def _parse_response(self, response): if response == b"": logging.info("[SPAM ASSASSIN] Empty response") return None match = divider_pattern.match(response) if not match: logging.error("[SPAM ASSASSIN] Response error:") logging.error(response) return None first_line = match.group(1) headers = match.group(2) body = response[match.end(0) :] # Checking response is good match = first_line_pattern.match(first_line) if not match: logging.error("[SPAM ASSASSIN] invalid response:") logging.error(first_line) return None report_list = [ s.strip() for s in body.decode("utf-8", errors="ignore").strip().split("\n") ] linebreak_num = report_list.index([s for s in report_list if "---" in s][0]) tablelists = [s for s in report_list[linebreak_num + 1 :]] self.report_fulltext = "\n".join(report_list) # join line when current one is only wrap of previous tablelists_temp = [] if tablelists: for _, tablelist in enumerate(tablelists): if len(tablelist) > 1: if (tablelist[0].isnumeric() or tablelist[0] == "-") and ( tablelist[1].isnumeric() or tablelist[1] == "." ): tablelists_temp.append(tablelist) else: if tablelists_temp: tablelists_temp[-1] += " " + tablelist tablelists = tablelists_temp # create final json self.report_json = dict() for tablelist in tablelists: wordlist = re.split(r"\s+", tablelist) try: self.report_json[wordlist[1]] = { "partscore": float(wordlist[0]), "description": " ".join(wordlist[1:]), } except ValueError: LOG.w("Cannot parse %s %s", wordlist[0], wordlist) headers = ( headers.decode("utf-8") .replace(" ", "") .replace(":", ";") .replace("/", ";") .split(";") ) self.score = float(headers[2])
def __init__(self, input_date): self.input = input_date self.min = None self.max = None self.compare = None # possible values: # 1. absolute date # 2. relative date # 3. range (relative or absolute) # for absolute: # parse # create max and min # for relative: # create a timedelta # subtract that from now # no need for max and min, subtraction is precise # for range: # create a DateRange for each # select max and min from both to create largest possible range # first let's handle the range if ".." in self.input: self.input = self.input.split("..") if len(self.input) != 2: raise CommandError("Date ranges must have 2 dates.") # if the date is a manual range, convert to a DateRange self.max = [] self.min = [] for date in self.input: date = DateRange(date) self.max.append(date.max) self.min.append(date.min) # max and min are now both lists of possible dates # pick max and min to yield the biggest date # max: None is always Now # min: None is alwyas The Beginning of Time # for 2 absolute dates this is easy, just pick biggest diff # for 2 relative dates, pick both of whichever is not None # for 1:1, pick not None of relative then ->largest of absolute # filter None from lists self.max = [i for i in self.max if i] self.min = [i for i in self.min if i] # special case for 2 relative dates - both will only have max if len(self.max) == 2 and len(self.min) == 0: self.min = min(self.max) self.max = max(self.max) return diffs = [] for i, minimum in enumerate(self.min): for j, maximum in enumerate(self.max): diffs.append( { 'i': i, 'j': j, 'diff': self.min[i].diff(self.max[j]).in_seconds(), } ) diffs = max(diffs, key=lambda x: x['diff']) self.max = self.max[diffs['j']] self.min = self.min[diffs['i']] # do other stuff return # strip the comparison match = re.match(r"([>=<]{1,2})(.*)", self.input) if match: self.compare = match.group(1) self.input = match.group(2) if self.date_is_absolute(): # the date is absolute # minimise the date minimum = pd.datetime(*self.date.lower_strict()[:6]) minimum = minimum.set(hour=0, minute=0, second=0) # maximise the date maximum = pd.datetime(*self.date.upper_strict()[:6]) maximum = maximum.set(hour=23, minute=59, second=59) if self.compare == "<": self.max = minimum elif self.compare == "<=": self.max = maximum elif self.compare == ">": self.min = maximum elif self.compare == ">=": self.min = minimum elif self.compare in ["=", None]: # = means between maximum and minimum self.min = minimum self.max = maximum else: raise CommandError( "Unknown operator in absolute date " "comparison ({}).".format(self.compare) ) elif re.match(r"([0-9]+[A-Za-z])+$", self.input): # the date is relative sel = [i for i in re.split(r"([0-9]+)", self.input) if i] # sel is now a number-letter-repeat list # convert list to dict via pairwise sel = DateRange.reverse_pairwise(sel) # convert all numbers to int sel = dict([a, int(x)] for a, x in sel.items()) self.date = pd.now() # check time units for key in sel: if key not in 'smhdwMy': raise CommandError( "'{}' isn't a valid unit of time in a relative date. " "Valid units are s, m, h, d, w, M, and y.".format(key) ) self.date = pd.now().subtract( years=sel.get('y', 0), months=sel.get('M', 0), weeks=sel.get('w', 0), days=sel.get('d', 0), hours=sel.get('h', 0), minutes=sel.get('m', 0), seconds=sel.get('s', 0), ) if self.compare in ["<", "<="]: self.min = self.date elif self.compare in [">", ">=", None]: self.max = self.date elif self.compare == "=": self.max = self.date self.min = self.date else: raise CommandError( "Unknown operator in relative date " "comparison ({}).".format(self.compare) ) else: raise CommandError( "'{}' isn't a valid absolute or relative date " "type.".format(self.input) )
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')): return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)]