async def whatDoWeThinkOf(self, prefix, suffix, message, timer=NoTimer()): with timer.sub_timer("reload") as t: self.reloadIndex() reload(subject, "subject.py") userIds = await self.loadAllUsers() self.index.setUsers(userIds) # TODO: Strip mentions ret = "" query = suffix query = self.makeQuery(query) results = self.index.queryLong(query, max=300, timer=timer) with timer.sub_timer("subject-filter") as t: results = subject.filter(results, query, max=5) lines = [] for r in results: un = await self.getUserName(r[0]) text = await self.stripMentions(r[1]) lines.append("{0}: {1}".format(un, text)) ret += "We think...\n" + "\n".join(lines) return ret
async def consumeInternal(self, message, timer=NoTimer()): async with ScopedStatus(self.client, "with your text data") as status: fromUser = message.author.display_name payload = re.sub(Soph.addressPat, "", message.content) server = None if message.channel and hasattr(message.channel, 'server'): server = message.channel.server if message.channel.type != discord.ChannelType.private: if len(payload) == len(message.content): return None if message.channel.name == "ch160": return None if not payload: return "What?" x = await self.dispatch(payload, message, timer=timer) if x: return x if fromUser == "fLux": return "Lux, pls. :sweat_drops:" reply = await self.stripMentions(payload, server) return "I was addressed, and {0} said \"{1}\"".format( fromUser, reply)
async def respondImpersonate(self, prefix, suffix, message, timer=NoTimer()): reloaded = reload(markov, "markov.py") if reloaded or not self.corpus: self.log("Loading corpus") self.corpus = markov.Corpus("./markovData") self.log("Loaded corpus") names = re.split(",", suffix.strip()) names = [name.strip() for name in names] try: ids = [self.userNameCache[name] for name in names] except KeyError as key: return "Data for {0} not found {1}".format(key, g_Lann) try: lines = self.corpus.impersonate(ids, 1) if lines: reply = lines[0] if message.channel.type != discord.ChannelType.private: reply = await self.stripMentions(reply, message.channel.server) return reply return "Hmm... I couldn't think of anything to say {0}".format( g_Lann) except Exception as e: self.log(e) return g_Lann
async def respondQueryStats(self, prefix, suffix, message, timer=NoTimer()): with timer.sub_timer("query-stats-callback") as t: fromUser = message.author.display_name userIds = await self.loadAllUsers() query = suffix query = self.makeQuery(query) results = await wsClient.call(8888, message.server.id, "call", "termStats", query) if len(results) > 10: results = results[:10] if not results: return "No one, apparently, {0}".format(fromUser) lines = [] lines.append("{0:<18}: {1:<6} \t[{2}]".format( "user", "count", "freq/1000 lines")) for v in results: name = await self.resolveId(v[1]) c = v[2] lines.append("{0:<18}: {1:<6} \t[{2:.1f}]".format( name, v[0], 1000 * v[0] / c)) return "```" + "\n".join(lines) + "```"
async def collect_terms(self, t, usernames, corpusThresh, freq, minScore, corpusSize, filters={}, timer=NoTimer()): with timer.sub_timer("collect_terms outer") as t_: with self.getSearcher() as s: ret = [] q = whoosh.query.Term("content", t) for u in usernames: uq = filters.get(u, whoosh.query.Term("user", u)) with t_.sub_timer("search") as tt_: res = s.search(q, limit=100000000, filter=uq) with t_.sub_timer("length") as tt_: occs = res.scored_length() with t_.sub_timer("counting") as tt_: if occs and occs > corpusThresh * freq: score = (occs / self.getCounts(u)) / (freq / corpusSize) if score > 0: score = math.log(score) * 10 if score > minScore: ret.append((u, t, score)) return ret
def queryStats(self, text, expand=False, timer=NoTimer()): """ Returns a sorted tuple of (count, userName) """ with timer.sub_timer("query-stats") as t: with self.getSearcher() as searcher: from whoosh.qparser import QueryParser if expand: qp = QueryParser("content", schema=self.ix.schema, termclass=whoosh.query.Variations) else: qp = QueryParser("content", schema=self.ix.schema) q = qp.parse(text) with t.sub_timer("searcher.search") as s: results = searcher.search(q, limit=100000) with t.sub_timer("results") as s: counts = defaultdict(lambda: 0) with s.sub_timer("counts") as r: for r in results: u = r["user"] counts[u] += 1 with s.sub_timer("reverse") as r: counts = [(count, id) for id, count in counts.items() if count > 0] sc = reversed(sorted(counts)) return [v for v in sc]
async def help(self, prefix, suffix, message, timer=NoTimer()): suffix = suffix.strip() if not suffix: ret = "I can parse requests of the following forms:\n" ret += "\n".join([c[0].help() for c in self.callbacks]) return ret elif suffix.startswith("timezones"): if message.channel.type != discord.ChannelType.private: return "Ask me in private :shy:" region = suffix[len("timezones"):] region = region.strip() with open("all_timezones.json") as f: tzs = json.loads(f.read()) if not region: pat = re.compile(r'/.*') zones = set([pat.sub("", t) for t in tzs if "/" in t]) return "Need a region to filter on, because there are loads.\nUse the command help timezones <region> with one of these regions:\n{0}".format( "\n".join(zones)) tzs = [ re.sub(".*/", "", t) for t in tzs if t.lower().startswith(region.lower()) ] return "The supported locales in {0} are:\n".format( region) + "\n".join(tzs) return g_Lann
async def testTextEngine(self, prefix, suffix, message, timer=NoTimer()): un = {} aliasMap = utils.SophDefaultDict(lambda x: list()) for k, v in self.aliases.items(): aliasMap[v].append(k) if hasattr(message, "server"): for m in message.server.members: un[m.display_name] = m.id un[m.name] = m.id for alias in aliasMap[m.id]: un[alias] = m.id results = await wsClient.call(8888, message.server.id, "call", "answer", suffix, un) lines = [] if not results: return "I couldn't get an answer for that..." for r in results: name = await self.resolveId(r[0]) content = r[1].replace("\n", "\n\t") content = await self.stripMentions(content, message.server) if len(content) > 100: content = content[:100] + "..." lines.append("{0}: {1}".format(name, content)) return "\n".join(lines)
async def respondWhoVerb(self, prefix, suffix, message, want_bool=False, timer=NoTimer()): reload(subject, "subject.py") index = self.reloadIndex() userIds = await self.loadAllUsers() thisUserWords = [] i_results = [] pred = self.makeQuery(suffix) userNames = [k for k, v in self.userNameCache.items()] with timer.sub_timer("combined-query") as t: res = self.index.query(pred, 100, None, expand=True, userNames=None, dedupe=True, timer=t) filteredResults = [] with timer.sub_timer("subject-filter") as t: for r in res: if len(filteredResults) >= 10: break try: doc = r[1] output = subject.checkVerb(doc, None, pred, want_bool, timer=t) if not output: for n in thisUserWords: output = subject.checkVerb(doc, n, pred, want_bool, timer=t) if output: break if output: filteredResults.append((r[0], output["extract"])) except: pass if filteredResults: return "\n".join([ "{0}: {1}".format(userIds.get(r[0], r[0]), r[1]) for r in filteredResults ]) if " " in pred: return "I don't know" return "I'm not sure what {0} {1}s".format(subj, pred)
async def respondUserVerbObject(self, prefix, suffix, message, timer=NoTimer()): return await self.respondUserVerb(prefix, suffix, message, True, timer=timer)
async def setTimeZone(self, prefix, suffix, message, timer=NoTimer()): tz = suffix.strip() if "/" not in tz: tz = "Europe/" + tz try: timeutils.to_utc("00:00", tz) except: return "Tried to set your locale to {0}, but that doesn't work with time conversion".format( tz) self.tz[message.author.id] = tz with open("timezones", "w", encoding="utf-8") as of: of.write(json.dumps(self.tz)) return "Done"
async def respondTimeExt(self, prefix, suffix, message, timer=NoTimer()): try: opts = self.serverOpts.get(message.server.id, {}) if self.options["timehelp"]: thc = opts.get("timeHelpChannels", {}) if thc.get(message.channel.name, False): resp = await self.respondTime(message) if resp: return resp except Exception as e: pass return None
async def dispatch(self, payload, message, timer=NoTimer()): for c in self.callbacks: offset = c[0](payload) if offset != -1: self.log(message.content[0:100]) resp = await c[1](self, payload[:offset], payload[offset:].strip(), message, timer=timer) if resp: return resp return None
async def terms_async(self, usernames, corpusThresh=0.6, corpusNorm=False, minScore=450, timer=NoTimer()): ret = [] with timer.sub_timer("getCounts") as t: totalCounts = {u: self.getCounts(u) for u in usernames} num = re.compile(r"^\d+$") with timer.sub_timer("getReader") as t: reader = self.ix.reader() with timer.sub_timer("numDocs") as t: numDocs = reader.doc_count() with timer.sub_timer("initFilters") as t: with self.getSearcher() as s: filters = { u: s.search(whoosh.query.Term("user", u)) for u in usernames } with timer.sub_timer("termLoop") as t_: for t in reader.field_terms("content"): if num.match(t): continue if len(t) < 3: continue with t_.sub_timer("termFreq") as t__: freq = reader.frequency("content", t) if freq > 50 and freq < numDocs / 100: try: ret += await self.collect_terms(t, usernames, corpusThresh, freq, minScore, numDocs, filters=filters, timer=t_) except Exception as e: print( "Error while iterating through terms: {0}".format( e)) return ret
async def respondMentions(self, prefix, suffix, message, timer=NoTimer()): fromUser = message.author.display_name self.reloadIndex() userIds = await self.loadAllUsers() query = suffix for k, v in userIds.items(): query = query.replace(v, k) query = self.makeQuery(query) results = self.index.queryStats( query) # TODO: do a proper mentions query... if len(results) > 10: results = results[:10] if not results: return "No one, apparently, {0}".format(fromUser) return "\n".join( ["{0}: {1}".format(userIds[v[1]], v[0]) for v in results])
def queryLong(self, text, max=3, user=None, expand=False, timer=NoTimer()): with timer.sub_timer("query-long") as t: for attempt in range(0, 3): with t.sub_timer(attempt) as s: results = self.query(text, max * (2 + attempt), user, expand=(expand or (attempt > 0)), timer=t, dedupe=True) ret = list(results) if len(ret) >= max: ret = ret[:max] break return ret
async def respondUserSaidWhat(self, prefix, suffix, message, timer=NoTimer()): fromUser = message.author.display_name server = getattr(message.channel, "server", None) self.reloadIndex() await self.loadAllUsers() userNames = self.userNameCache sayPat = re.compile(r"\s+say about\s") match = sayPat.finditer(suffix) for m in match: name = suffix[:m.start(0)].strip() user = userNames.get(name, None) if not user: if name == "Soph": return "I can't tell you that." return "I don't know who {0} is {1}".format(name, g_Lann) payload = self.makeQuery(suffix[m.end(0):]) ret = "" rgen = self.index.queryLong(payload, user=user, max=20, expand=True, timer=timer) results = [] for r in rgen: if len(results) > 4: break if len(r[1]) < 300 and not subject.isSame(r[1], payload): results.append(r) if results: payload = re.sub(r'\*', r'', payload) resp = "*{0} on {1}*:\n".format(name, payload) for i in range(0, len(results)): text = results[i][1] text = await self.stripMentions(text, server) resp += "{0}) {1}\n".format(i + 1, text) ret += resp if ret: return ret return "Nothing, apparently, {0}".format(fromUser)
async def respondMentions(self, prefix, suffix, message, timer=NoTimer()): fromUser = message.author.display_name userIds = await self.loadAllUsers() query = suffix for k, v in userIds.items(): query = query.replace(v, k) query = self.makeQuery(query) results = await wsClient.call(8888, message.server.id, "call", "termStats", query) if len(results) > 10: results = results[:10] if not results: return "No one, apparently, {0}".format(fromUser) return "\n".join( ["{0}: {1}".format(userIds[v[1]], v[0]) for v in results])
async def setOption(self, prefix, suffix, message, timer=NoTimer()): if message.author.id != Soph.master_id: return "You aren't allowed to touch my buttons :shy:" suffix = suffix.strip() index = suffix.index("=") key = suffix[0:index].strip() val = suffix[index + 1:].strip() if val.lower() == "true": val = True elif val.lower() == "false": val = False self.options[key] = val if key == "markov": self.corpus = markov.Corpus(val) return "Done"
async def respondWhoSaid(self, prefix, suffix, message, timer=NoTimer()): fromUser = message.author.display_name server = getattr(message.channel, 'server', None) with timer.sub_timer("reload") as t: self.reloadIndex() userIds = await self.loadAllUsers() query = suffix query = self.makeQuery(query) with timer.sub_timer("query-long-wrap") as t: results = self.index.queryLong(query, timer=t, max=10) results = [r for r in results if len(r[1]) < 300] if not results: return "Apparently nothing, {0}".format(fromUser) ret = "\n".join( ["{0}: {1}".format(userIds.get(r[0], "?"), r[1]) for r in results]) with timer.sub_timer("strip-mentions") as t: ret = await self.stripMentions(ret) return ret
async def respondQueryStats(self, prefix, suffix, message, timer=NoTimer()): with timer.sub_timer("query-stats-callback") as t: fromUser = message.author.display_name with t.sub_timer("reload") as r: self.reloadIndex() userIds = await self.loadAllUsers() self.index.setUsers(userIds) query = suffix query = self.makeQuery(query) results = self.index.queryStats(query, expand=True, timer=t) if len(results) > 10: results = results[:10] if not results: return "No one, apparently, {0}".format(fromUser) return "\n".join( ["{0}: {1}".format(userIds[v[1]], v[0]) for v in results])
async def respondGreet(self, prefix, suffix, message, timer=NoTimer()): try: server = message.server opts = self.serverOpts.get(server.id, {}) if message.channel.name in opts.get("greetChannels", {}): g = self.greeters[server.id] if g.checkGreeting(message.content): master_info = await self.client.get_user_info( Soph.master_id) await self.client.add_reaction(message, "👋") while random.randint(0, 10) > 4: e = greeter.randomEmoji() try: await self.client.add_reaction(message, e) except: break else: pass except Exception as e: pass return None
async def setAlias(self, prefix, suffix, message, timer=NoTimer()): if message.author.id != Soph.master_id: return "You aren't allowed to touch my buttons :shy:" index = suffix.index("=") left = suffix[0:index].strip() right = suffix[index + 1:].strip() await self.loadAllUsers() if left in self.userNameCache: existingName = left newName = right elif right in self.userNameCache: existingName = right newName = left else: return g_Lann if newName in self.userNameCache: canonicalName = self.userCache[self.userNameCache[newName]] if canonicalName == newName: newName = existingName return "{0} is already called {1} :/".format( canonicalName, newName) self.userNameCache[newName] = self.userNameCache[existingName] self.aliases[newName] = self.userNameCache[existingName] aliases = {} if os.path.exists(Soph.aliasPath): with open(Soph.aliasPath) as f: aliases = json.loads(f.read()) aliases[newName] = self.userNameCache[newName] with open(Soph.aliasPath, "w") as f: f.write(json.dumps(aliases, indent=True)) return "Done ({0} -> {1})".format(newName, existingName)
async def dispatch(self, payload, message, timer=NoTimer(), usePrefix=True): if usePrefix: cbs = self.callbacks else: cbs = self.noPrefixCallbacks for c in cbs: offset = c[0](payload) if offset != -1: try: resp = await c[1](self, payload[:offset], payload[offset:].strip(), message, timer=timer) if resp: return resp except Exception as e: self.log(e) return None
def answer(self, qtext, users={}, timer=NoTimer()): """ Find lines that answer the question 'who verbs?' users should be a map of userName -> userIds """ userIds = {v: k for k, v in users.items()} pq = self.qp.parse(qtext, users) i_results = [] restrictUser = None thisUserNames = None doFilter = True if pq.subject_type == question.SubjectTypes.User: restrictUser = users[pq.subject_val] thisUserNames = [ re.escape(k) for k, v in users.items() if v == restrictUser ] thisUserNames.append(restrictUser) if pq.subject_type == question.SubjectTypes.We: #thisUserNames = [k for k,v in users.items()] pass if pq.subject_type == question.SubjectTypes.Author: raise KeyError("Doh") predicates = set() objectIsSubject = False if pq.verb: if pq.verb.lemma_ == "say": doFilter = False thisUserNames = {} elif pq.verb.lemma_ == "think" and pq.objects: objectIsSubject = True thisUserNames = {} else: predicates.add(pq.verb.text) for o in pq.objects: if o.is_digit: predicates.add("({0} OR +{0})".format(o.text)) else: predicates.add(o.text) if pq.other_obj_words: predicates.update([o.lemma_ for o in pq.other_obj_words]) subjectId = None if pq.subject_val: subjectId = users.get(pq.subject_val, None) searchtext = " AND ".join(predicates) with timer.sub_timer("combined-query") as t: res = self.index.query(searchtext, self.maxResults, restrictUser, expand=True, userNames=thisUserNames, dedupe=True, timer=t) want_bool = False any_subj = False pred = None filteredResults = [] if objectIsSubject: subjects = [o.lemma_ for o in pq.objects] want_bool = True # we allow "subject verb" for "what do we think" else: if pq.subject_type == question.SubjectTypes.We: subjects = [k for k, v in users.items()] + ["we"] elif pq.subject_type == question.SubjectTypes.User: subjects = [k for k, v in users.items() if v == subjectId] pred = pq.verb.lemma_ if pq.objects: pred = pred + " " + " ".join([o.lemma_ for o in pq.objects]) if pq.question_word: if pq.question_word.lemma_ == "who": subjects = [k for k, v in users.items()] any_subj = True else: want_bool = True with timer.sub_timer("subject-filter") as t: for r in res: if len(filteredResults) >= 10: break try: if not doFilter: filteredResults.append(r) continue doc = stripMentions(r[1], users) allow_i = False if (pq.subject_type == question.SubjectTypes.We or any_subj) and not objectIsSubject: allow_i = True elif pq.subject_type == question.SubjectTypes.User and not objectIsSubject: allow_i = (r[0] == subjectId) # author subj not impl output = subject.checkVerbFull(doc, subjects, pred, want_bool, timer=t, subj_i=allow_i) if output: filteredResults.append((r[0], output["extract"])) except Exception as e: self.log("Exception while doing NLP filter: {0}".format(e)) if filteredResults: return filteredResults return None
async def respondReact(self, prefix, suffix, message, timer=NoTimer()): if self.reactor: await self.reactor.react(message, self.client) return None
async def respondUserTerms(self, prefix, suffix, message, timer=NoTimer()): try: name = re.sub(" talk about\??", "", suffix).strip() uid = self.userNameCache[name] self.log("Getting terms") await self.client.add_reaction(message, "👍🏻") done = False async def thinking(): try: nums = list(range(len(Soph.thinkingEmojis))) random.shuffle(nums) for i in nums: eee = Soph.thinkingEmojis[i] await asyncio.sleep(10) if not done: try: asyncio.ensure_future( self.client.add_reaction(message, eee)) except: self.log("{0}th emoji was invalid".format(i)) except: pass return True asyncio.ensure_future(thinking()) try: terms = await wsClient.call(8888, message.server.id, "call", "userTerms", {uid: name}, corpusThresh=0, minScore=0) except: raise finally: done = True self.log("Got terms") userTerms = collections.defaultdict(list) self.log("Making list") for t in terms: userTerms[t[0]].append(t) ret = "" self.log("Collated {0} terms".format(len(terms))) for uid, t in userTerms.items(): t = sorted(t, key=lambda x: -x[2]) try: ret += ("Important words for {0}:\n".format(name)) except Exception as e: self.log(e) for tup in t[:25]: try: ret += ("\t{0} (score: {1})".format( tup[1], int(tup[2]))) ret += ("\n") except: pass if ret: return "```" + ret + "```" except Exception as e: self.log(e) return None
async def respondUserVerb(self, prefix, suffix, message, want_bool=False, timer=NoTimer()): reload(subject, "subject.py") index = self.reloadIndex() userIds = await self.loadAllUsers() userNames = self.userNameCache thisUserWords = [] i_results = [] for subj, uid in self.userNameCache.items(): if suffix.startswith(subj) and suffix[len(subj)] == " ": pred = self.makeQuery(suffix[len(subj):].strip()) nickNames = None thisUserWords = [uid] for _name, _id in self.userNameCache.items(): if _id == uid: thisUserWords.append(_name) break with timer.sub_timer("combined-query") as t: res = self.index.query(pred, 100, uid, expand=True, userNames=thisUserWords, dedupe=True, timer=t) filteredResults = [] with timer.sub_timer("subject-filter") as t: for r in res: if len(filteredResults) >= 10: break try: doc = r[1] if uid == r[0]: output = subject.checkVerb(doc, None, pred, want_bool, timer=t) else: for n in thisUserWords: output = subject.checkVerb(doc, n, pred, want_bool, timer=t) if output: break if output: filteredResults.append((r[0], output["extract"])) except: pass if filteredResults: return "\n".join([ "{0}: {1}".format(userIds.get(r[0], r[0]), r[1]) for r in filteredResults ]) if " " in pred: return "I don't know" return "I'm not sure what {0} {1}s".format(subj, pred)