def _handle_page(self, title, text): self.n_pages += 1 if not self.filter_page_raw(title, text): return if '==' in text: text = text.split('==')[0] text = self.braces_pattern.sub('', text) text = self.wikiref_pattern.sub(lambda x: x.groups(0)[1], text) text = self.wikiref2_pattern.sub('', text) text = self.extraref_pattern.sub(lambda x: x.groups(0)[1], text) text = self.extraref2_pattern.sub('', text) text = self.xml_tag_pattern.sub('', text) text = self.paren_fixup_pattern.sub('(', text) text = self.space_fixup_pattern.sub(' ', text) text = self.entity_fixup_pattern.sub(lambda x: { 'mdash': '-', 'ndash': '-', 'quot': '"', 'amp': '&', 'lt': '<', 'gt': '>' }[x.groups()[0]], text) text = text.strip() self.handle_page(title, text)
def process_gh_project_names(text): if '-' in text: return ' '.join(text.split('-')) elif text[0].isupper(): return ''.join( map(lambda x: x if x.islower() else " " + x, text[0].lower() + text[1:])) else: return ''.join(map(lambda x: x if x.islower() else " " + x, text))
def Tokenize(text): tokenizer = MWETokenizer(category.all()) for word in category: if word.find(' '): tokenizer.add_mwe(word.split()) for word in sub_category: if word.find(' '): tokenizer.add_mwe(word.split()) for word in brand: if word.find(' '): tokenizer.add_mwe(word.split()) for word in article: if word.find(' '): tokenizer.add_mwe(word.split()) token = tokenizer.tokenize(text.split()) tokens = [] for word in token: word = word.replace("_", " ") tokens.append(word) return tokens
def build_tsquery( text ): text=scalar('select gazetteer.gaz_plainText2(:text)',text=text) return ' & '.join(map( lambda x: re.sub(r'\*$',':*',x),text.split()))
def preprocess_search_query(text): text = re.sub(r'\W', ' ', text, re.UNICODE) tokens = text.split() tokens = map(lambda t: t.strip(), tokens) return ' & '.join(['%s:*' % t for t in tokens])
def build_tsquery(text): text = scalar('select gazetteer.gaz_plainText2(:text)', text=text) return ' & '.join(map(lambda x: re.sub(r'\*$', ':*', x), text.split()))
def handle_app_mention(event_data): message = event_data["event"] # Get the user who initiated the @mention source_user = slack_client.users_info(user=message["user"]) # Default variable values slack_message, msg, mentioned_user, attachments = "", "", None, None notify_user = source_user["user"]["id"] # Iterate through message body looking for the destination user. Use the first match. elements = message["blocks"][0]["elements"][0]["elements"] for element in elements: if element["type"] == "user" and element["user_id"] != SLACKBOT_USERID: mentioned_user = slack_client.users_info(user=element["user_id"]) break # If there was a user mention, handle the points. if mentioned_user is not None: if source_user["user"]["id"] == mentioned_user["user"]["id"]: msg = "Hey <@" + source_user["user"][ "id"] + "> - nice try but you can only give points to others" else: # Try to get the number of points, default to 1. text = message.get('text') points_array = [int(s) for s in text.split() if s.isdigit()] points = 1 if len(points_array) == 0 else points_array[0] source_user_email = source_user["user"]["profile"]["email"] mentioned_user_email = mentioned_user["user"]["profile"]["email"] original_message = slack_client.chat_getPermalink( channel=message["channel"], message_ts=message["ts"]) # Try to grant points if points > 10: msg = """Hey <@{source}> :wave: I couldn't do <{permalink}|this>. You can give a maximum of 10 points at a time! You should try giving <@{mentioned}> some points again (but only up to 10, remember?)!""".format( source=source_user["user"]["id"], permalink=original_message["permalink"], mentioned=mentioned_user["user"]["id"]) else: error = try_grant_points(source_user_email, mentioned_user_email, points) if error is None: msg = """Hey <@{mentioned}> :wave: <{permalink}|you got {points} points> from <@{source}>! ⚡ Check out the leaderboard <https://snitch-leaderboard.herokuapp.com/|here>! ⚡ ⚡ And get to snitching! ⚡""".format( mentioned=mentioned_user["user"]["id"], points=str(points), source=source_user["user"]["id"], permalink=original_message["permalink"]) notify_user = mentioned_user["user"]["id"] #React to the slack post now, for some sense of transparency try: slack_client.reactions_add( channel=message["channel"], timestamp=message["event_ts"], name="thumbsup") except: print( "well that's too bad I couldn't react... try again with another emoji" ) try: slack_client.reactions_add( channel=message["channel"], timestamp=message["event_ts"], name="white_check_mark") except: print("something went really really wrong") pass # If we couldn't grant points, let people know else: msg = "Hey <@" + source_user["user"][ "id"] + "> - I couldn't give <@" + mentioned_user["user"][ "id"] + "> points from you, here's what the computer told me: " + error # If there was no user mention, handle the error. else: msg = "Hey <@" + source_user["user"][ "id"] + "> something funny just happened... can you try granting that point again?" print("Sending %s the following alert: %s" % (notify_user, msg)) slack_client.chat_postMessage(channel=notify_user, text=msg)
def build_tsquery(text): text = scalar("select gazetteer.gaz_plainText2(:text)", text=text) return " & ".join([re.sub(r"\*$", ":*", x) for x in text.split()])