def parse_poll(message): poll = message.content.split('"') poll_options = "" emoji = iter('🇦 🇧 🇨 🇩 🇪 🇫 🇬 🇠🇮 🇯 🇰 🇱 🇲 🇳 🇴 🇵 🇶 🇷 🇸 🇹 🇺 🇻 🇼 🇽 🇾 🇿'.split()) emoji_opt = [] i = 1 del poll[0] poll[0] = poll[0].strip() poll_title = "**:bar_chart:" + str(poll[0]) + "**" while i < len(poll)-1: del poll[i] poll[i] = poll[i].strip() if poll[i].startswith("<"): poll_options = poll_options + poll[i] + "\n" emoji_opt.append(poll[i].split(">")[0]) emoji_opt[-1] = emoji_opt[-1] + ">" elif emojis.get(poll[i][0]) != set(): poll_options = poll_options + poll[i] + "\n" emoji_opt.append(emojis.get(poll[i][0]).pop()) else: current_emoji = next(emoji) poll_options = poll_options + current_emoji + ' ' + poll[i] + '\n' emoji_opt.append(current_emoji) i += 1 poll_options = discord.Embed(description=poll_options, color=poll_color) poll = [poll_title, poll_options, emoji_opt] return poll
def parse_emotes(args): """Parses emotes from arguments param=args: string array representing arguments, generally from parse_args() returns=string list with the new arguments, emotes list, and default emotes """ # default emotes if one isn't given default = iter( '🇦🇧🇨🇩🇪🇫🇬ðŸ‡ðŸ‡®ðŸ‡¯ðŸ‡°ðŸ‡±ðŸ‡²ðŸ‡³ðŸ‡´ðŸ‡µðŸ‡¶ðŸ‡·ðŸ‡¸ðŸ‡¹ðŸ‡ºðŸ‡»ðŸ‡¼ðŸ‡½ðŸ‡¾ðŸ‡¿' ) # list to hold the new arguments in tuple form (emote, arg) new_args = [] # cosntructing new arguments and emote list for arg in args: # argument uses a guild emote if '>' in arg: arg = arg.split('>') new_args.append((f'{arg[0].strip()}>', arg[1].strip())) # argument uses a general emote elif emojis.get(arg.split()[0]): arg = arg.split(maxsplit=1) new_args.append((arg[0].strip(), arg[1].strip())) # argument does not specify an emote else: new_args.append((next(default), arg.strip())) return new_args
def extract_features(tweets): word_grams = {n: [] for n in range(*WORD_GRAM_RANGE)} character_grams = {n: [] for n in range(*CHARACTER_GRAM_RANGE)} emoji_list = [] hashtags = [] for tweet in tweets: tweet_word_grams = { n: list(ngrams(tweet['full_text'].split(), n)) for n in range(*WORD_GRAM_RANGE) } tweet_character_grams = { n: list(ngrams(tweet['full_text'], n)) for n in range(*CHARACTER_GRAM_RANGE) } tweet_emojis = list(emojis.get(tweet['full_text'])) tweet_hashtags = [x['text'] for x in tweet['entities']['hashtags']] for n in range(*WORD_GRAM_RANGE): for val in tweet_word_grams[n]: word_grams[n].append(val) for n in range(*CHARACTER_GRAM_RANGE): for val in tweet_character_grams[n]: character_grams[n].append(val) hashtags[0:0] = tweet_hashtags emoji_list[0:0] = tweet_emojis return { "word_grams": word_grams, "character_grams": character_grams, "emoji_list": emoji_list, "hashtags": hashtags }
async def sb_emoji(self, ctx, emoji: typing.Union[discord.Emoji, str] = None): if not emoji: await self.bot.config.upsert({"_id": ctx.guild.id, "emoji": None}) await ctx.send("Reset your server's custom emoji.") elif isinstance(emoji, discord.Emoji): if not emoji.is_usable(): await ctx.send("I can't use that emoji.") return await self.bot.config.upsert({ "_id": ctx.guild.id, "emoji": str(emoji) }) await ctx.send("Added your emoji.") else: emos = emojis.get(emoji) if emos: await self.bot.config.upsert({ "_id": ctx.guild.id, "emoji": emoji }) await ctx.send("Added your emoji.") else: await ctx.send("Please use a proper emoji.")
def count_tweet_emoticons(tweets): results = {CATEGORY_HATE: {}, CATEGORY_NON_HATE: {}} hate_total = non_hate_total = 0 for tweet in tweets: text = util.get_tweet_text(tweet) text_emojis = emojis.get(text) if len(text_emojis) == 0: # skip tweet if no emojis present continue category = "" if (tweet["hate_speech"]): category = CATEGORY_HATE hate_total += 1 else: category = CATEGORY_NON_HATE non_hate_total += 1 for emoji in text_emojis: emoji_key = emojis.decode(emoji) if emoji_key not in results[category]: results[category][emoji_key] = 1 else: results[category][emoji_key] += 1 # Order by results[CATEGORY_HATE] = sorted(results[CATEGORY_HATE].items(), key=lambda kv: (kv[1], kv[0]), reverse=True) results[CATEGORY_NON_HATE] = sorted(results[CATEGORY_NON_HATE].items(), key=lambda kv: (kv[1], kv[0]), reverse=True) results['hate_total'] = hate_total results['non_hate_total'] = non_hate_total return results
def get_emojis(s, itr): emoji_list = s[:] emoji_list = emojis.get(s) # set of emojies in this string emoji_list = list(emoji_list) # list of that set if len(emoji_list) > 0: for element in emoji_list: if element in emojies: emojies[element].increment() else: itr += 1 emojies[element] = Elements(element, 1, itr) isin = False for j in range(0, len(top10_emojies)): # check all the top 10 so far to change the counter if top10_emojies[j].value == element: top10_emojies[j].counter = emojies[element].counter isin = True if len(top10_emojies) < 10 and isin is False: top10_emojies.append(emojies[element]) elif emojies[element].counter >= top10_emojies[-1].counter and isin is False: # if current 'w' is now greater then or equal to the lowest element add it to the list top10_emojies.append(emojies[element]) top10_emojies.sort(reverse=True) # sort the list to put it in its correct place top10_emojies.pop() # pop one element off to keep at 10 total elements isin = True top10_emojies.sort(reverse=True) if len(top10_emojies) > 10: top10_emojies.pop()
async def rr_remove(self, ctx, emoji : typing.Union[discord.Emoji, str]): if not isinstance(emoji, discord.Emoji): emoji = emojis.get(emoji) emoji = emoji.pop() emoji = str(emoji) await self.bot.reaction_roles.delete(emoji) await self.rebuild_role_embed(ctx.guild.id) await ctx.send("That should have been removed for you!")
def to_emoji_list(arg: str) -> typing.List[typing.Union[discord.Emoji, str]]: emoji_pattern = re.compile(r'^<a?:([a-zA-Z0-9_]+):(\d+)>$') emoji_list = [] for emoji_code in arg.split(): if emoji_matches := emoji_lib.get(emoji_code): # Emoji is a unicode string if len(emoji_matches) == 1 and emoji_matches.pop() == emoji_code: emoji_list.append(emoji_code) else: raise exceptions.ForbiddenEmoji(emoji_code) else: # Emoji is a custom image # Match custom emoji looking strings if match_result := emoji_pattern.search(emoji_code): # Extract emoji_id from first regex group emoji_name, emoji_id = match_result.group(1, 2) # Get emoji from list of emojis visible by bot if emoji := discord.utils.get(zbot.bot.emojis, id=int(emoji_id)): emoji_list.append(emoji) else:
def getEmojiScore(content): # emoji_lst = emoji_str.join(emojis.get(content)) emoji_lst = list(emojis.get(content)) score = 0 for emoji in emoji_lst: if emoji in emoji_dict: # print(emoji_lst) # print(emoji_dict[emoji]) for e in emoji_pos: if str(emoji_dict[emoji]) == e: score += 1 for e in emoji_neg: if str(emoji_dict[emoji]) == e: score -= 1 return score
def grab_popular_tweets(topic_list, max_tweets): columns = [ 'pulled_at', 'created_at', 'username', 'user_location', 'region', 'search_type', 'trending_topic', 'retweetcount', 'favorites', 'text', 'hashtags', 'emojis'] # set up columns for dataframes tweets_data_grab = pd.DataFrame(columns = columns) # create empty dataframe for topic in topic_list: # loop though each trending topic # grab tweets with Cursor tweets = tweepy.Cursor(api.search, q = topic, # search for each trending topic lang="en", result_type = 'popular', # tweets in english , type is "recent"/"popular" tweet_mode = 'extended').items(max_tweets) # longer tweets, grab max_tweets number of tweets tweet_list = [tweet for tweet in tweets] # create list of tweets tweets_topic = pd.DataFrame(columns = columns) # create dataframe to put in current top tweets for this town and trending topic for tweet in tweet_list: # loop through each tweet that was grabbed username = tweet.user.screen_name # store username user_location = tweet.user.location # store location of user retweetcount = tweet.retweet_count # store retweet count favorites = tweet.favorite_count # store favorite count hashtags = [h['text'].lower() for h in tweet.entities['hashtags']] # store hashtags search_type = 'popular' # store search type region = "USA" # trending tweets in USA created_at = tweet.created_at # time tweet created pulled_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # time tweet was pulled try: text = tweet.retweeted_status.full_text # store text if it's a retweet except AttributeError: text = tweet.full_text # store text if it's a regular tweet emoji = list(emojis.get(text)) # get the emojis curr_tweet = [pulled_at, created_at, username, user_location, region, # store current tweet's data in a list soon to be a row search_type, topic, retweetcount, favorites, text, hashtags, emoji] tweets_topic.loc[len(tweets_topic)] = curr_tweet # add current tweet data to dataframe for town and topic tweets_topic.sort_values(by=['retweetcount', 'favorites'], inplace = True, ascending = False) # sort the retweet values highest first tweets_data_grab = pd.concat([tweets_data_grab, tweets_topic], ignore_index = True, sort = False) # concatenate top n to final dataframe return tweets_data_grab
def df_setup(df: pd.DataFrame) -> pd.DataFrame: # Creates the 'isMedia' column df["message"] = df["message"].astype(str) beginning = dt.datetime.now() df["isMedia"] = df.apply( lambda row: row["message"].find("<Media omessi>") != -1, axis=1) print((dt.datetime.now() - beginning).microseconds / 1000, "ms to create the isMedia column") # 14/06/15 12:52:00 beginning = dt.datetime.now() df["datetime"] = pd.to_datetime(df["datetime"], format="%d/%m/%y %H:%M:%S") print((dt.datetime.now() - beginning).microseconds / 1000, "ms to convert 'datetime' from string") beginning = dt.datetime.now() df["isMedia"] = df["isMedia"].astype(bool) df["author"] = df["author"].astype(str) print((dt.datetime.now() - beginning).microseconds / 1000, "ms to convert column types") beginning = dt.datetime.now() df["message"] = df.apply( lambda row: row["message"].replace("__x__", "|").replace( "__a__", "*").replace("__vv__", '"').replace("__v__", "'"), axis=1) print((dt.datetime.now() - beginning).microseconds / 1000, "ms to reformat the 'message' column") beginning = dt.datetime.now() df["emojis"] = df.apply(lambda row: emojis.get(row["message"]), axis=1) print((dt.datetime.now() - beginning).microseconds / 1000, "ms to create the 'emojis' column") beginning = dt.datetime.now() df["length"] = df.apply(lambda row: len(row["message"]), axis=1) print((dt.datetime.now() - beginning).microseconds / 1000, "ms to create the 'length' column") return df
async def rr_add(self, ctx, emoji: typing.Union[discord.Emoji, str], *, role: discord.Role): """Add a new reaction role.""" reacts = await self.get_current_reactions(ctx.guild.id) if len(reacts) >= 20: await ctx.send("This does not support more then 20 reaction roles per guild!") return if not isinstance(emoji, discord.Emoji): emoji = emojis.get(emoji) emoji = emoji.pop() elif isinstance(emoji, discord.Emoji): if not emoji.is_usable(): await ctx.send("I can't use that emoji") return emoji = str(emoji) await self.bot.reaction_roles.upsert({"_id": emoji, "role": role.id, "guild_id": ctx.guild.id}) await self.rebuild_role_embed(ctx.guild.id) await ctx.send("That is added and good to go!")
def process_tweets(tweets): prepared_tweets = {} for tweet in tweets: prepped_tweet = { "text": tweet['full_text'], "hash_tags": [x['text'] for x in tweet['entities']['hashtags']], "word_grams": { n: list(ngrams(tweet['full_text'].split(), n)) for n in range(*WORD_GRAM_RANGE) }, #https://stackoverflow.com/q/50004602 "character_grams": { n: list(ngrams(tweet['full_text'], n)) for n in range(*CHARACTER_GRAM_RANGE) }, #https://stackoverflow.com/q/50004602 "emojis": list(emojis.get( tweet['full_text'])), # https://stackoverflow.com/a/43146653 "popular": True if tweet['retweet_count'] > POPULAR_THRESHOLD else False, "id": tweet['id'] } prepared_tweets[tweet['id']] = prepped_tweet return prepared_tweets
def extract_emojis(text): res = emojis.get(text) return res
f = codecs.open(file_name, 'r', encoding="utf8") Lines = f.readlines() for line in Lines: if "</div>" in line: is_text = False if is_header: header = line header = header.replace('\n', '') header = header.replace(' ', '_') is_header = False if is_text: # print(line) line = line.replace('\n', '<br>') line = line.replace('..', '<br>') line_emojis = emojis.get(line) for an_emoji in line_emojis: line.replace(an_emoji, '<br>') splitted_line = line.split('<br>') for a_line in splitted_line: if a_line == '': continue count += 1 output_list = ['NO_DATE', a_line, header] a_series = pd.Series(output_list, index=df.columns) df = df.append(a_series, ignore_index=True) # is_text = False if "<div class=\"text\">" in line: is_text = True if "<div class=\"text bold\">" in line:
async def _emoji(self, ctx, *, args=None): if args is None: return await self.bot.send_help(ctx) try: emoji = await commands.EmojiConverter().convert(ctx, args) except BadArgument: try: regex_match = EMOJI_REGEX.match(args) emoji = await commands.PartialEmojiConverter().convert( ctx, regex_match.group()) except: emoji = emojis.get(emojis.encode(args)) if len(emoji) > 0: emoji = list(emoji)[0] else: return await ctx.send('Não encontrei este emoji!') if isinstance(emoji, discord.Emoji) or isinstance( emoji, discord.PartialEmoji): if hasattr(emoji, 'is_unicode_emoji') and emoji.is_unicode_emoji(): return await ctx.reply( 'Não tenho suporte a esse tipo de emoji!', mention_author=False) embed = discord.Embed(title='Emoji personalizado', url=str(emoji.url), colour=discord.Colour.random(), timestamp=datetime.utcnow()) embed.set_footer(text=f'{ctx.author}', icon_url=ctx.author.avatar_url) embed.set_thumbnail(url=str(emoji.url)) embed.add_field( name='Tipo do emoji:', value='`Animado`' if emoji.animated else '`Estático`', inline=True) embed.add_field(name='Id:', value=f'`{emoji.id}`', inline=True) embed.add_field(name='Nome:', value=f'`{emoji.name}`', inline=True) embed.add_field(name='Uso:', value='`<{}:{}:\u200b{}>`'.format( 'a' if emoji.animated else '', emoji.name, emoji.id), inline=False) if isinstance(emoji, discord.Emoji): if emoji.guild == ctx.guild: embed.add_field(name='O emoji é deste servidor!', value='** **', inline=True) embed.add_field( name='Criado em:', value= f'`{emoji.created_at.strftime("%d/%m/%Y")}`({datetime_format(emoji.created_at)})', inline=True) else: url = await emoji_to_url(emoji) if not url.startswith( 'https://twemoji.maxcdn.com/v/latest/72x72/'): return await ctx.reply('Não consegui achar este emoji!', mention_author=False) embed = discord.Embed( title=f'Detalhes sobre o emoji {emoji}', description='Infelizmente as informações estão em inglês.', url=url, colour=discord.Colour.random(), timestamp=datetime.utcnow()) emoji = emojis.db.get_emoji_by_code(emoji) embed.set_thumbnail(url=url) if emoji is not None: embed.add_field(name='Emoji:', value=f'\\{emoji.emoji}', inline=False) if len(emoji.aliases) > 0: embed.add_field(name='aliases:', value=f'`{", ".join(emoji.aliases)}`', inline=False) if len(emoji.tags) > 0: embed.add_field(name='Tags:', value=f'`{", ".join(emoji.tags)}`', inline=False) if emoji.category != '': embed.add_field(name='Category:', value=f'`{emoji.category}`', inline=False) return await ctx.reply(embed=embed, mention_author=False)
location = tweet.author.location else: location = tweet.user.location likes = tweet.favorite_count retweets = tweet.retweet_count #use TextBlob to do sentiment analysis clean_tweet = ' '.join( re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet.text).split()) analysis = TextBlob(clean_tweet) polarity = analysis.polarity subjectivity = analysis.subjectivity #use emojis to do emoji process emoji_all = emojis.get(tweet.text) emoji_see = emojis.decode(" ".join(emoji_all)) emoji_num = emojis.count(tweet.text) #previous tweet text data, use unicode to encode, also can .decode("unicode_escape") tweet = tweet.text.encode("unicode_escape") s = pd.Series({ 'id': my_id, 'date': date, 'author': author, 'location': location, 'likes': likes, 'retweets': retweets, 'polarity': polarity, 'subjectivity': subjectivity,
def extractEmoji(pText: str): return " ".join(list(emojis.get(pText)))
You can see the detail about this dataset in https://www.kaggle.com/rexhaif/emojifydata-en ''' import numpy as np import pandas as pd import re import emojis import json from textblob import TextBlob ##Collect all different kinds of emojis in the data set end then just seve the emoji list and their numbers filename = "emoji_1.txt" content = {} print(1) for line in open("emoji_1.txt", encoding='utf-16'): for i in emojis.get(line): num = line.count(i) if i not in content.keys(): content[i] = num else: content[i] = content[i] + num print(2) for line in open("emoji_2.txt", encoding='utf-16'): for i in emojis.get(line): num = line.count(i) if i not in content.keys(): content[i] = num else: content[i] = content[i] + num print(3) for line in open("emoji_3.txt", encoding='utf-16'):