def test_emoji_lis(): assert emoji.emoji_lis("Hi, I am fine. ๐") == [{ 'location': 15, 'emoji': '๐' }] assert emoji.emoji_lis("Hi") == [] assert emoji.emoji_lis("Hello ๐ซ๐ท๐") == [{ 'emoji': '๐ซ๐ท', 'location': 6 }, { 'emoji': '๐', 'location': 8 }]
def extract_emoji(text): output_list = [] emojis = e.emoji_lis(text) for emoji in emojis: emoji = emoji['emoji'] output_list.append(emoji) return output_list
def join_emoji(bigram): if type(bigram) == tuple: bigram = ' '.join(bigram) emoji_present = emoji.emoji_lis(bigram) if len(emoji_present) > 0: bigram = bigram.replace(' ', '') return bigram
def remove_emoji_by_polarity(self, string, p_rng=[-1, 1]): for emoji_data in emoji_lis(string)[::-1]: i, emoji = emoji_data.values() _polarity = self.get_df_by_emoji(emoji)['polarity'].iloc[0] if p_rng[0] <= _polarity <= p_rng[1]: string = string[:i] + '' + string[i + 1:].lstrip() return string.rstrip()
def get_emoji(text: str, client: Client) -> [str, None]: emojis = emoji_lis(text, "en") if emojis: return emojis[0]["emoji"] return ParsingUtils.get_custom_emoji(text, client)
def handle_data(self, text): try: if self.font is None: self.font = self.fonts["normal"] except AttributeError: self.font = self.fonts["normal"] emojis = emoji.emoji_lis(text) if len(emojis) != 0: Nemo = len(emojis) Ntxt = len(text) #divide the text up into segments enclosed by emoji segments = [] for i in range(Nemo): if emojis[i]["location"] == 0: #add emojus segments.append((0, 1, True)) #add text following emojus first = 1 try: last = emojis[i + 1]["location"] except IndexError: last = Ntxt segments.append((first, last, False)) elif emojis[i]["location"] == Ntxt - 1: #add text preceding first emojus if i == 0: segments.append((0, emojis[i]["location"], False)) #add emojus segments.append((Ntxt - 1, Ntxt, True)) else: #add text preceding first emojus if i == 0: segments.append((0, emojis[i]["location"], False)) #add emojus segments.append((emojis[i]["location"], emojis[i]["location"] + 1, True)) #add text following emojus first = emojis[i]["location"] + 1 try: last = emojis[i + 1]["location"] except IndexError: last = Ntxt segments.append((first, last, False)) #add all segments to the document for seg in segments: if seg[2]: run = self.paragraph.add_run(text[seg[0]:seg[1]]) run.font.size = Pt(10) run.font.name = self.fonts["emoji"] else: run = self.paragraph.add_run(text[seg[0]:seg[1]]) run.font.name = self.font run.font.size = Pt(10) run.bold = True else: run = self.paragraph.add_run(text) run.font.name = self.font run.font.size = Pt(10) run.bold = True
def get_unicode_emoji_count_from_string(content): count_dict = {} for standard_emoji in emoji_lis(content): codepoint = standard_emoji["emoji"] name = codepoint_to_name(codepoint) count_dict[(name, codepoint)] = count_dict.get( (name, codepoint), 0) + 1 return count_dict
def get_emojis(string): RE_CUSTOM_EMOJI = r'<(a?)(:[A-Za-z0-9_]+:)([0-9]+)>' emojis = list() for e in emoji.emoji_lis(string): emojis.append(e['emoji']) for e in re.findall(RE_CUSTOM_EMOJI, string): emojis.append(e[1]) return emojis
def test_emoji_lis(): assert emoji.emoji_lis('Hi, I am ๐ test')[0]['location'] == 9 assert emoji.emoji_lis('Hi') == [] if len( 'Hello ๐ซ๐ท๐' ) < 10: # skip these tests on python with UCS-2 as the string length/positions are different assert emoji.emoji_lis('Hi, I am fine. ๐') == [{ 'location': 15, 'emoji': '๐' }] assert emoji.emoji_lis('Hello ๐ซ๐ท๐') == [{ 'emoji': '๐ซ๐ท', 'location': 6 }, { 'emoji': '๐', 'location': 8 }]
def emoji_parser(doc): emoji_list = emoji.emoji_lis(doc) doc = doc.encode("ascii", "replace").decode('utf-8') for em in emoji_list: doc_list = list(doc) doc_list[em['location']] = em['emoji'] doc = ''.join(doc_list) return doc
def clean_tweet(text): """Processes the text of a tweet Parameters: text (string): The input string (tweet) which is to pe preprocessed Returns: list: A list of words representing the processed tweet """ # convert to lower case text = text.lower() # detect and remove URL's reg = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|" \ r"(?:%[0-9a-fA-F][0-9a-fA-F]))+" text = re.sub(reg, 'URL', text) # detect and remove @ mentions text = re.sub(r"(?:\s|^)@[\w_-]+", ' MENTION', text) # replace & with "and" text = text.replace("&", ' and ') text = text.replace(r"\n", "") # remove the # sign hashtagwords = re.findall(r'(?:\s|^)#[\w_-]+', text) hashtagwords = [h.replace("#", "") for h in hashtagwords] text = text.replace("#", "") # detect and remove ordinals text = re.sub(r"[0-9]+(?:st|nd|rd|th)", 'ORDINAL', text) #convert emoji for emoj in emoji.emoji_lis(text): text = text.replace(emoj['emoji'], " " + emoj['emoji'] + " ") text = emoji.demojize(text) #encoding punctuation text = text.replace("_", "") text = text.replace("'", '') text = text.replace("โ", '') text = text.replace(".", " . ") text = text.replace(",", " , ") text = text.replace("?", " ? ") text = text.replace("!", " ! ") # remove all characters except a to z and apostrophes (i.e. numbers etc) text = re.sub(r"[^A-Za-z'โ,.?_!]+", ' ', text) special_words = ["MENTION", "ORDINAL", "URL"] return [word for word in text.split() if word not in special_words], hashtagwords
def count_emoji(text): emoji_list = [i['emoji'] for i in emoji.emoji_lis((text))] count = (Counter((emoji_list))) df_from_counter = pd.DataFrame.from_dict(count, orient='index').reset_index() df_from_counter = df_from_counter.rename(columns={ 'index': 'Emoji', 0: 'count' }) df_from_counter = df_from_counter.sort_values('count', ascending=False) df_from_counter.to_csv('most_used_emoji.csv')
async def post(self, request: Request, data: dict) -> Response: """Handle the POST request for registration.""" hass = request.app["hass"] webhook_id = secrets.token_hex() if hass.components.cloud.async_active_subscription(): data[ CONF_CLOUDHOOK_URL ] = await hass.components.cloud.async_create_cloudhook(webhook_id) data[CONF_WEBHOOK_ID] = webhook_id if data[ATTR_SUPPORTS_ENCRYPTION] and supports_encryption(): data[CONF_SECRET] = secrets.token_hex(SecretBox.KEY_SIZE) data[CONF_USER_ID] = request["hass_user"].id if slugify(data[ATTR_DEVICE_NAME], separator=""): # if slug is not empty and would not only be underscores # use DEVICE_NAME pass elif emoji.emoji_count(data[ATTR_DEVICE_NAME]): # If otherwise empty string contains emoji # use descriptive name of the first emoji data[ATTR_DEVICE_NAME] = emoji.demojize( emoji.emoji_lis(data[ATTR_DEVICE_NAME])[0]["emoji"] ).replace(":", "") else: # Fallback to DEVICE_ID data[ATTR_DEVICE_NAME] = data[ATTR_DEVICE_ID] await hass.async_create_task( hass.config_entries.flow.async_init( DOMAIN, data=data, context={"source": "registration"} ) ) remote_ui_url = None try: remote_ui_url = hass.components.cloud.async_remote_ui_url() except hass.components.cloud.CloudNotAvailable: pass return self.json( { CONF_CLOUDHOOK_URL: data.get(CONF_CLOUDHOOK_URL), CONF_REMOTE_UI_URL: remote_ui_url, CONF_SECRET: data.get(CONF_SECRET), CONF_WEBHOOK_ID: data[CONF_WEBHOOK_ID], }, status_code=HTTP_CREATED, )
def _emoji_description_extractor(text: str): """ Used to extract the decription of emojis used in the given text. """ emoji_list = emoji.emoji_lis(text) emoji_descriptions = [ str.strip( emoji.demojize(vars.get("emoji")).replace("_", " ").replace(":", "")) for vars in emoji_list ] return emoji_descriptions
def extract_emoji_terms(text): """ """ emoji_list = emoji.emoji_lis(text) unique_terms = set([ emoji.demojize(e['emoji']).replace(':', '').replace('_', ' ') for e in emoji_list ]) str_terms = ' '.join(unique_terms) return str_terms
def tag_emoji_subject(line): ret_cat = [] emoji_l = (emoji.emoji_lis(line)) for j in emoji_l: f = True ucode = emoji.unicode_codes.EMOJI_UNICODE[emoji.demojize(j['emoji'])] for em in emoji_Class: if em['char'] == ucode: ret_cat.append(em['category']) f = False if f: print("f**k") return ret_cat
def __init__(self, line: str): self.sender = None self.content = "" self.date = None self.emojis = list() self.group_event = False try: self.date = datetime.date(int(line[6:10]), int(line[3:5]), int(line[0:2])) pattern = " \".[^\"]*\"" if re.sub(pattern, "", line[19:]).__contains__(":"): offset = line[19:].index(":") + 19 self.sender = line[19: offset] self.content = line[offset + 2:] else: # is a group event line self.group_event = True if line.__contains__("removeu"): #TODO ver o fomrtado da remoรงรฃo offset = line.index("removeu") - 1 self.sender = line[19:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("adicionou"):#TODO ver o formato de adicionar offset = line.index("adicionou") - 1 self.sender = line[19:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("saiu"): #TODO ver o formato de sair offset = line.index("saiu") - 1 self.sender = line[19:offset] self.content = "saiu" elif line.__contains__("mudou o nome de"): offset = line.index("mudou o nome de") - 1 self.sender = line[19:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("mudou a imagem"): offset = line.index("mudou a imagem") - 1 self.sender = line[19:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("apagou a imagem"): offset = line.index("apagou a imagem") - 1 self.sender = line[19:offset] offset = offset + 1 self.content = line[offset:] if emoji.emoji_count(self.content) > 0: self.emojis = emoji.emoji_lis(line) except Exception as e: print(e) if self.sender == "Vocรช": self.sender = MY_WHATSAPP_USERNAME
def emoji_count(tweet): """ receives a tweet and counts how many common words it has with each of the tweeters """ with open('most_common_5_emojis.json', 'r') as fp: most_common_emojis = json.load(fp=fp) emoji_l = (emoji.emoji_lis(tweet)) emoji_count_list = [0] * 10 for emoji_dict in emoji_l: current_emoji = emoji_dict['emoji'] for user in most_common_emojis: if current_emoji in most_common_emojis[user]: emoji_count_list[user] += 1 return emoji_count_list
def emoji_to_types(string): """ Tool to extract emojis from string of text Input: string with some (if any) emojis Ouput: dictionary containing discovered emojies and their corresponding label. """ emos = emoji.emoji_lis(string) result = dict() for idx, emo in enumerate(emos): descr = emoji.UNICODE_EMOJI_ALIAS[emo['emoji']] cat = df[df.EMOJI_ALIAS_UNICODE == descr].VEC_TO_CAT.item() result[cat] = result.get(cat, []) + [emo['emoji']] return result
def parse_option(cls, opt_line: str) -> VoteOption: matches = cls.emoji_regex.match(opt_line) if matches is None: # it is not a disnake emoji, try unicode emojis = emoji.emoji_lis(opt_line) if len(emojis) > 0 and emojis[0]['location'] == 0: opt_emoji = emojis[0]['emoji'] opt_message = opt_line[len(opt_emoji):].strip() else: raise cls.NotEmojiError(opt_line) else: opt_emoji = matches.group(1) opt_message = matches.group(2).strip() return cls.VoteOption(opt_emoji, matches is None, opt_message, 0)
def __init__(self, line: str, user_typed: bool): self.user_typed = user_typed self.sender = None self.content = "" self.date = None self.emojis = list() self.group_event = False try: if line[0] == "[": self.date = datetime.date(int(line[7:11]), int(line[4:6]), int(line[1:3])) pattern = " \".[^\"]*\"" if re.sub(pattern, "", line[22:]).__contains__(":"): offset = line[22:].index(":") + 22 self.sender = line[22: offset] self.content = line[offset + 2:] else: #is a group event line self.group_event = True if line.__contains__("removeu"): offset = line.index("removeu") - 1 self.sender = line[22:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("adicionou"): offset = line.index("adicionou") - 1 self.sender = line[22:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("saiu"): offset = line.index("saiu") - 1 self.sender = line[22:offset] self.content = "saiu" elif line.__contains__("mudou o nome do grupo para"): offset = line.index("mudou o nome do grupo para") - 1 self.sender = line[22:offset] offset = offset + 1 self.content = line[offset:] elif line.__contains__("mudou a imagem"): offset = line.index("mudou a imagem") - 1 self.sender = line[22:offset] offset = offset + 1 self.content = line[offset:] if emoji.emoji_count(self.content) > 0: self.emojis = emoji.emoji_lis(line) except Exception as e: print(e) if self.sender == "Vocรช": self.sender = MY_WHATSAPP_USERNAME
def find_roles(self, embed: discord.Embed, guild: discord.Guild): reaction_to_role = {} for field in embed.fields: emoji_list = emoji.emoji_lis(field.value) if not emoji_list: continue reaction = emoji_list[0]["emoji"] match: re.Match = re.match(r"<@&(.+)>", field.value) if match is None: continue role_id: str = match.group(1) role: discord.Role = guild.get_role(int(role_id)) if role is not None: reaction_to_role[reaction] = role return reaction_to_role
async def fetch_content(self, ctx, *, args=None): if args is None: await ctx.send("please send actual text") if args: args = discord.utils.escape_mentions(args) args = discord.utils.escape_markdown(args, as_needed=False, ignore_links=False) for x in ctx.message.mentions: args = args.replace(x.mention, f"\{x.mention}") emojis = emoji.emoji_lis(args) emojis_return = [d["emoji"] for d in emojis] for x in emojis_return: args = args.replace(x, f"\{x}") for x in re.findall(r":\w*:\d*", args): args = args.replace(x, f"\{x}") await ctx.send(f"{args}")
def on_buttonpress(): if text_input.value != "": tweets = get_tweets() allemojis = [] ef = [] for i in range(len(t)): rr = len(t[i]) for j in range(rr): allemojis = "".join(t[i][j]['emoji']) emoji_list = emoji.emoji_lis(allemojis) if emoji_list != []: ef.append(emoji_list) em = [] for k in range(len(ef)): for l in range(len(ef[k])): em.append(ef[k][l]['emoji']) emoji_series = pd.Series(em) emojis = pd.DataFrame( emoji_series.value_counts()).reset_index().rename( columns={ 'index': 'emoji', 0: 'Count' }) emojis['Rank'] = pd.Series(range(1, len(emojis))) emojis = emojis.head(10) emojis['Rank'] = emojis['Rank'].apply(lambda x: int(x)) source_emoji.data = dict(emoji=emojis['emoji'], Count=emojis['Count'], Rank=emojis['Rank']) labels = LabelSet(x="Rank", y="Count", text="emoji", level='glyph', render_mode='canvas', source=source_emoji, x_offset=-12, text_font_size="12pt") p.vbar(x="Rank", top="Count", width=0.95, source=source_emoji) p.xaxis.minor_tick_line_color = None # turn off x-axis minor ticks p.yaxis.minor_tick_line_color = None # turn off y-axis minor ticks p.y_range.start = 0 p.x_range.start = 0 p.xaxis[0].ticker.desired_num_ticks = 10 p.add_layout(labels) else: pass
async def poll(self, ctx, *, a: str): id = ctx.message.author.id emoji_init_string = str(emoji.emoji_lis(a)) disc_emoji_sep = re.findall(r"':([^:']*):'", emoji.demojize(emoji_init_string)) disc_emoji_string = emoji.emojize( str([''.join(':' + demoji + ':') for demoji in disc_emoji_sep])) disc_emoji = re.findall(r"'([^']*)'", disc_emoji_string) custom_emojis = re.findall(r'<([^>]*)>', a) cemojilist = [''.join('<' + cemoji + '>') for cemoji in custom_emojis] all_emojis = disc_emoji + cemojilist poll = await ctx.send("**Poll from** <@" + str(id) + ">**!!**\n" "" + a) for i in all_emojis: try: await poll.add_reaction(i) except: print("Emoji " + i + " not found")
async def process_message(message, add_count_fn): message_date = date_to_string(message.created_at) channel = str(message.channel) custom_emojis_in_message = re.findall(r'<:\w*:\d*>', message.content) custom_emojis_in_message = [ e.split(':')[1].replace('>', '') for e in custom_emojis_in_message ] for custom_emoji in custom_emojis_in_message: add_count_fn(message_date, channel, custom_emoji) for standard_emoji in emoji_lis(message.content): add_count_fn(message_date, channel, demojize(standard_emoji["emoji"], delimiters=("", ""))) for react in message.reactions: e = react.emoji if (not isinstance(e, str)): e = e.name else: e = demojize(e, delimiters=("", "")) add_count_fn(message_date, channel, e, react.count)
def create_top_k_emoji_json(k: int): df = pd.read_csv(r'./train_full') emoji_counter: Dict[int, Dict] = {i: {} for i in range(0, 10)} for i in range(len(df['tweets'])): user: int = df['user'][i] tweet = df['tweets'][i] emoji_l = (emoji.emoji_lis(tweet)) if emoji_l: for emoji_dict in emoji_l: current_emoji = emoji_dict['emoji'] if current_emoji not in emoji_counter[user]: emoji_counter[user][current_emoji] = 0 emoji_counter[user][current_emoji] += 1 top_k_emojis = { i: Counter(emoji_counter[i]).most_common(k) for i in range(0, 10) } with open(f'most_common_{k}_emojis.json', 'w') as fp: json.dump(top_k_emojis, fp)
def tweet_preprocessor(tweets): """ Function that uses the tweet-preprocessor and emojis libraries to remove: - #Hashtags - @Mentions - Emojis - URLs - Standalone numbers and transform all letters into lower case Args: tweets: list containing all tweets Returns: clean_tweets: lower-cased preprocessed list of tweets Sources: https://pypi.org/project/tweet-preprocessor/ https://github.com/carpedm20/emoji/ """ # set the global options for the library. These settings define which # elements of the tweet to pay attention to tp.set_options(tp.OPT.URL, tp.OPT.MENTION, tp.OPT.NUMBER, tp.OPT.HASHTAG) # create a list to store the results clean_tweets = [] # iterate over all tweets in the list for tweet in tweets: # remove emojis for e in emoji.emoji_lis(tweet): tweet = tweet.replace(e['emoji'], '') # append the cleaned lowered-cassed tweet clean_tweets.append(tp.clean(tweet).lower()) return(clean_tweets)
def parse_emojis(text): return [emoji.demojize(e['emoji']) for e in emoji.emoji_lis(text)]
def get_emotes(emotes: str) -> str: from emoji import emoji_lis emotes = emoji_lis(emotes) if len(emotes) > 24: raise SyntaxError("You cant put more than 24 emotes in a message.") return "".join([emoji["emoji"] for emoji in emotes])