def applyManualCorrections(acronymDB): for line in DictReader(open(file_msh_manual_corrections, "rb"), delimiter=","): acronym = TextTools.toUnicode(line["acronym"]) wrong_exp = TextTools.toUnicode(line["wrong_expansion"]) correct_exp = TextTools.toUnicode(line["correct_expansion"]) for entry in acronymDB[acronym]: if entry[0] == wrong_exp: entry[0] = correct_exp return acronymDB
def serialGetWordCorpus(articleDB): word_corpus = {} for article_id, text in articleDB.items(): word_corpus[article_id] = TextTools.getCleanedWords(text, stem_words=stem_words, removeNumbers=removeNumbers) if len(word_corpus) % 1000 == 0: common_logger.debug("converted " + str(len(word_corpus)) + " articles to words") return word_corpus
def writefile(f, defs): f.write("entitydefs = {\n") items = sorted(defs.items()) for name, (charcode, comment) in items: if charcode[:2] == '&#': code = int(charcode[2:-1]) if code < 256: charcode = "'\%o'" % code else: charcode = repr(charcode) else: charcode = repr(charcode) comment = TextTools.collapse(comment) f.write(" '%s':\t%s, \t# %s\n" % (name, charcode, comment)) f.write('\n}\n')
def writefile(f,defs): f.write("entitydefs = {\n") items = sorted(defs.items()) for name, (charcode,comment) in items: if charcode[:2] == '&#': code = int(charcode[2:-1]) if code < 256: charcode = "'\%o'" % code else: charcode = repr(charcode) else: charcode = repr(charcode) comment = TextTools.collapse(comment) f.write(" '%s':\t%s, \t# %s\n" % (name,charcode,comment)) f.write('\n}\n')
async def summary(self, ctx, *, msg): user = "" user_toon = msg.strip() if not ctx.message.mentions: results = User.toon_search(user_toon) if results.first() is not None: for x in results: converter = MemberConverter() user = await converter.convert(ctx, x.discord_id) break else: user = ctx.message.mentions[0] user_summary = TextTools.list_summary(str(user.id), user.joined_at, str(ctx.message.guild.id)) for x in user_summary: await ctx.message.channel.send(x) await Users.get_profile_image(self, ctx=ctx, msg=user.mention)
def _createArticleAndAcronymDB(): acronymExpander = Expander_fromText_v2() articleDB = {} acronymDB = {} CUID_to_expansion = {} for fileName in os.listdir(folder_msh_arff): filePath = os.path.join(folder_msh_arff, fileName) file_reader = arff.Reader(open(filePath, "rb")) # the iterator needs to be called for the self.relation part to be # initialized lines = list(file_reader) cuids = file_reader.relation.strip().split("_") # storing all acronyms as uppercase values acronym = _fileNameToAcronym(fileName).upper() cuid_and_pmid = [] for line in lines: pmid = unicode(line.PMID) text = TextTools.toUnicode(line.citation) cuid = cuids[_classToIndex(line["class"])] textWithoutMarkup = _removeMarkup(text) if (cuid not in CUID_to_expansion): acronymExpansions = [] acronymExpansions = acronymExpander.expand( acronym, acronymExpansions, textWithoutMarkup) if (len(acronymExpansions) != 0 and acronymExpansions[0].expansion != acronym): CUID_to_expansion[cuid] = acronymExpansions[0].expansion if (pmid not in articleDB): articleDB[pmid] = textWithoutMarkup cuid_and_pmid.append([cuid, pmid]) if (acronym in acronymDB): common_logger.error("acronym already present in acronymDB") else: acronymDB[acronym] = [] for cuid, pmid in cuid_and_pmid: if (cuid in CUID_to_expansion): acronymDB[acronym].append([CUID_to_expansion[cuid], pmid, 0]) else: common_logger.error( "Expansion not found for CUID %s of %s" % (cuid, acronym)) acronymDB[acronym].append([cuid, pmid, 0]) return acronymDB, articleDB
async def find_toon(self, ctx, *, msg): if not ctx.message.mentions: user_toon = msg.strip() results = User.toon_search(user_toon) if results.first() is not None: for x in results: await ctx.message.channel.send("Character: " + x.character + " was added to: <@!" + x.discord_id + "> on: " + x.timestamp) else: await ctx.message.channel.send("Could not find character: " + user_toon) else: user = str(ctx.message.mentions[0].id) results = User.toon_search_by_user(user) if results.first() is not None: for x in TextTools.list_toons(results): await ctx.message.channel.send(x) else: await ctx.message.channel.send( "Could not find any characters for: <@!" + str(ctx.message.mentions[0].id) + ">")
#!/usr/local/bin/python